diff --git a/cxr_finetune_lora/checkpoints/0000500/README.md b/cxr_finetune_lora/checkpoints/0000500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0000500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0000500/adapter_config.json b/cxr_finetune_lora/checkpoints/0000500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0000500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0000500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0000500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5139fca1f6ce8a40851ac10ee5739721ebb0ac20 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0000500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8edc9ee69fb257dce6520effe2d9bfc618a1c905be9b97a3638fb34ae4ead532 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0001000/README.md b/cxr_finetune_lora/checkpoints/0001000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0001000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0001000/adapter_config.json b/cxr_finetune_lora/checkpoints/0001000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0001000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0001000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0001000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dff59a21fea91d555b39d1f684360cc1099a3e6b --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0001000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c5cfef0d062f8996a7a278772d46fba0c2b15e83c4e08216a08015dc515d729 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0001500/README.md b/cxr_finetune_lora/checkpoints/0001500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0001500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0001500/adapter_config.json b/cxr_finetune_lora/checkpoints/0001500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0001500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0001500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0001500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee4dead4c5179ed372fa0e924f432610af2fd238 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0001500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11ea3b6b208e7e447ddf848987e0b7f277783b9898b344de9eb03329feeb9259 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0002000/README.md b/cxr_finetune_lora/checkpoints/0002000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0002000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0002000/adapter_config.json b/cxr_finetune_lora/checkpoints/0002000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0002000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0002000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0002000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..261baaeec878adf247d8ad9e2c37193d2fc1a5cf --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0002000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54397d218b902143da7f29b6526c7faea4c467e6721bed0384922eb3e1ede823 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0002500/README.md b/cxr_finetune_lora/checkpoints/0002500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0002500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0002500/adapter_config.json b/cxr_finetune_lora/checkpoints/0002500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0002500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0002500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0002500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1d2a82cca25d22b7d6064b1b84ba7d4bbfc0fbd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0002500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980b81d496c0ea5e3b2dcf455aebad7e250265f8ec8443fedf21c3651e399e13 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0003000/README.md b/cxr_finetune_lora/checkpoints/0003000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0003000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0003000/adapter_config.json b/cxr_finetune_lora/checkpoints/0003000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0003000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0003000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0003000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..426d5de569af8e47d72cbdb0e6bfe734d264fffb --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0003000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d8884439d5d82ae6f247179db3b9ec51c9a1798008800c1f45a610460bedae6 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0003500/README.md b/cxr_finetune_lora/checkpoints/0003500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0003500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0003500/adapter_config.json b/cxr_finetune_lora/checkpoints/0003500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0003500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0003500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0003500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf43db8b7923be9af64d31792bb255d83ea42edc --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0003500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6651f04b9f153e2655855df9d0d162e855185df56192439cddf2c781f774286 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0004000/README.md b/cxr_finetune_lora/checkpoints/0004000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0004000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0004000/adapter_config.json b/cxr_finetune_lora/checkpoints/0004000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0004000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0004000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0004000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48f997af8c6526ba41ec72f7aeb23731b809bc4c --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0004000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:325de077c1b659ce7433e5069fbbda753450dd0eb376dba046e1af77887d03f0 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0004500/README.md b/cxr_finetune_lora/checkpoints/0004500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0004500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0004500/adapter_config.json b/cxr_finetune_lora/checkpoints/0004500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0004500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0004500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0004500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e94d7bc7d1881cb6c21b3c57d6b928b3d67bdc58 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0004500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e7bea8382f3471f094e794eb79c35f60278f3640375e6c343db62e5c7cb61a +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0005000/README.md b/cxr_finetune_lora/checkpoints/0005000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0005000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0005000/adapter_config.json b/cxr_finetune_lora/checkpoints/0005000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0005000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0005000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0005000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ffebca975d4fef8dbc41f53517cf9a4b8c3d749 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0005000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc9410b45ce398ca25ba381f25afbfde68aeb98e74af39aba8110f1adae914e +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0005500/README.md b/cxr_finetune_lora/checkpoints/0005500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0005500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0005500/adapter_config.json b/cxr_finetune_lora/checkpoints/0005500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0005500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0005500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0005500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59e4097954d2a423f6822b774955fa282a2c8e32 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0005500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94c22b88c56e1c771d57a17f842d261d33e412ad6f57cacb05bf7355a58c624b +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0006000/README.md b/cxr_finetune_lora/checkpoints/0006000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0006000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0006000/adapter_config.json b/cxr_finetune_lora/checkpoints/0006000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0006000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0006000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0006000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..653a0bdc91e707c9712657d495290812e0283101 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0006000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:182bdf4191fceb46d6883ad9e585b2f5c69b4ce49763ef34a84b91d648360b29 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0006500/README.md b/cxr_finetune_lora/checkpoints/0006500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0006500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0006500/adapter_config.json b/cxr_finetune_lora/checkpoints/0006500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0006500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0006500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0006500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd644fe877ae773db2ccbbf5089db48998c3cedd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0006500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591b8d8804db3f62eb6ce8ed3219e9678eed76bcec179f4a822c2fa4d6ed4454 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0007000/README.md b/cxr_finetune_lora/checkpoints/0007000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0007000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0007000/adapter_config.json b/cxr_finetune_lora/checkpoints/0007000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0007000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0007000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0007000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56a39c726e8efa3c33a285e9933c45c752705645 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0007000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ec37909cbe09577ff0438492875d898da01339ee0758266a18589d97432bb79 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0007500/README.md b/cxr_finetune_lora/checkpoints/0007500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0007500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0007500/adapter_config.json b/cxr_finetune_lora/checkpoints/0007500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0007500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0007500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0007500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a5979ddbd276485c754641c62aaf95b9d95c068 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0007500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff77c696505cd7c2bdbafb075441ca9221d80ec887ae247f6e4bb2b370e1339 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0008000/README.md b/cxr_finetune_lora/checkpoints/0008000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0008000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0008000/adapter_config.json b/cxr_finetune_lora/checkpoints/0008000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0008000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0008000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0008000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..318201db801b76cdefb6d40d7aca38c6cdfaa167 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0008000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:359a8ac3aae08dfd4b3c4bfa5da6974c3307f48a1faa3863c497f4db030ab355 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0008500/README.md b/cxr_finetune_lora/checkpoints/0008500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0008500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0008500/adapter_config.json b/cxr_finetune_lora/checkpoints/0008500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0008500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0008500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0008500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7101827f300922bc14f08c9a4d3590daf8421420 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0008500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83595120f3758d6d5d29d16f0a3d559b49808bdd5042ec43be88f512922a7850 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0009000/README.md b/cxr_finetune_lora/checkpoints/0009000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0009000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0009000/adapter_config.json b/cxr_finetune_lora/checkpoints/0009000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0009000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0009000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0009000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f2d0334aa7c3d44b3d40604e6b3a9fa55383d71 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0009000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc8115b8545691439d448e3cf84b5f6ddfab3a37a066a1e63bc5e43c1866598 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0009500/README.md b/cxr_finetune_lora/checkpoints/0009500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0009500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0009500/adapter_config.json b/cxr_finetune_lora/checkpoints/0009500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0009500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0009500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0009500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2c814ef0d0686c3c3b1261762973188cd0ecfe9 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0009500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4435b1f0f3ec9da2934304dc3466e8b696b2671494b86f41f6d0c81d3d262993 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0010000/README.md b/cxr_finetune_lora/checkpoints/0010000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0010000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0010000/adapter_config.json b/cxr_finetune_lora/checkpoints/0010000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0010000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0010000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0010000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..440c0f16b3aced15277a4c0a202df659c32ca336 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0010000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f82d239462c8b1d2cbeec7437825ac8cf6114a06bcdcd9453c456df98a2330f +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0010500/README.md b/cxr_finetune_lora/checkpoints/0010500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0010500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0010500/adapter_config.json b/cxr_finetune_lora/checkpoints/0010500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0010500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0010500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0010500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07de89a9b741e7aa90b8cb78d618ff4a333966f3 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0010500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8b990511491a84e2a7eac8d9968091fdd352d64ed92d1ccf85ae75092bdd80c +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0011000/README.md b/cxr_finetune_lora/checkpoints/0011000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0011000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0011000/adapter_config.json b/cxr_finetune_lora/checkpoints/0011000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0011000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0011000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0011000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8c7976f99722ed7dbdefd7a495b68fda20ad691 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0011000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27f9eb2380ea3385f839bd1e5027807c5495eab4d43cc45a031b46e0fa9ddaa3 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0011500/README.md b/cxr_finetune_lora/checkpoints/0011500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0011500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0011500/adapter_config.json b/cxr_finetune_lora/checkpoints/0011500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0011500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0011500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0011500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03f0714979858a5d349b94697acd691a541dd5df --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0011500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4edf29bdb01c05e61ed2cde859de8168af353319471b7bc55a8d5c6deb973ebe +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0012000/README.md b/cxr_finetune_lora/checkpoints/0012000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0012000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0012000/adapter_config.json b/cxr_finetune_lora/checkpoints/0012000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0012000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0012000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0012000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf51af464e26001f39e76f91ce6a164a243ee8e0 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0012000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c04ac83e8aadded9f29e2906c9698d3389036505f41331da17ad9faa57f12cb +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0012500/README.md b/cxr_finetune_lora/checkpoints/0012500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0012500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0012500/adapter_config.json b/cxr_finetune_lora/checkpoints/0012500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0012500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0012500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0012500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2f84afcb1a56a17750d05d14b6824f148c1bf2d --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0012500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d26e789164858fc795d32d61f35fcd1c13aed991dea413181253c415c76caede +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0013000/README.md b/cxr_finetune_lora/checkpoints/0013000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0013000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0013000/adapter_config.json b/cxr_finetune_lora/checkpoints/0013000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0013000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0013000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0013000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44a358a0c378d03607522cbbcca2e53b50ebf3ef --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0013000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d362f154a5e6fbb909da2b33883ef78c19ac53b5b736de8899457bbbb684a7ae +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0013500/README.md b/cxr_finetune_lora/checkpoints/0013500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0013500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0013500/adapter_config.json b/cxr_finetune_lora/checkpoints/0013500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0013500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0013500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0013500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1fd5d59d370579edd2f5cc3482f66e762c5ce9d9 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0013500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c7d92e9559f809ae2ce7e65945240a7b71ed18c9e02e69ea3f49eb160e5d28c +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0014000/README.md b/cxr_finetune_lora/checkpoints/0014000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0014000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0014000/adapter_config.json b/cxr_finetune_lora/checkpoints/0014000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0014000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0014000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0014000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..533540e8b35135dec93e06e732884a54910f6e89 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0014000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a32aaed0fcf5978f4080eb8a86567c4147cf4767dc2e657b9ad08ae60af331df +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0014500/README.md b/cxr_finetune_lora/checkpoints/0014500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0014500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0014500/adapter_config.json b/cxr_finetune_lora/checkpoints/0014500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0014500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0014500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0014500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24d5eb3e158992da7cc78cefc3b88501f0de4526 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0014500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f731123edaf5c88209b6944b05e71b0e8906234c4b921546b046342f2c92a70 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0015000/README.md b/cxr_finetune_lora/checkpoints/0015000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0015000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0015000/adapter_config.json b/cxr_finetune_lora/checkpoints/0015000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0015000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0015000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0015000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9371bb8da70c65f652e6ad4f39f5addb13a7f735 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0015000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09656c80ecf02c3ee50554e840c480f9f5677fa11991cc8e920c8a417a31d207 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0015500/README.md b/cxr_finetune_lora/checkpoints/0015500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0015500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0015500/adapter_config.json b/cxr_finetune_lora/checkpoints/0015500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0015500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0015500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0015500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..537b73e2a3de7c1f4e2264c11d9b4ff46f724beb --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0015500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a1b0b9846577063af3f19a66729dd21c5d75c092ce2e2e5c893646ca8d8eef0 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0016000/README.md b/cxr_finetune_lora/checkpoints/0016000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0016000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0016000/adapter_config.json b/cxr_finetune_lora/checkpoints/0016000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0016000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0016000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0016000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8876b25383f8c4f4a881356cf689d2eb377babb --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0016000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d99e93d090ca3238920df6991bbfa09e7e2d3b8d2489f5b7d432924f04816a2 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0016500/README.md b/cxr_finetune_lora/checkpoints/0016500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0016500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0016500/adapter_config.json b/cxr_finetune_lora/checkpoints/0016500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0016500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0016500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0016500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9c2343020bd621558e50fa32235058fb8f2d245 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0016500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dcf4df70cfe44e6074ebeb6641f84275dcd90c85ef62d937ff92788768a9414 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0017000/README.md b/cxr_finetune_lora/checkpoints/0017000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0017000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0017000/adapter_config.json b/cxr_finetune_lora/checkpoints/0017000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0017000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0017000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0017000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..643bf952dfb66a89aa58233fa6974933023f03ac --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0017000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bac76bea609140ba044b30aeb109ed7863596e94cabb0ba4a3b77ea551d2bc9f +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0017500/README.md b/cxr_finetune_lora/checkpoints/0017500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0017500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0017500/adapter_config.json b/cxr_finetune_lora/checkpoints/0017500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0017500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0017500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0017500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45532dc9e415de89cb63041cf82056abe932b124 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0017500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67169ae2443004ec5f28a994b5c4e632ef865e5ac7876e56559b0dc20cad76b5 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0018000/README.md b/cxr_finetune_lora/checkpoints/0018000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0018000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0018000/adapter_config.json b/cxr_finetune_lora/checkpoints/0018000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0018000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0018000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0018000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4811a4809dd2df29895df8a03219db14edbcc97f --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0018000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55336b5dbfa85363516f72905e9fc4af52d380456d6cf28558d150da8b8ea5b2 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0018500/README.md b/cxr_finetune_lora/checkpoints/0018500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0018500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0018500/adapter_config.json b/cxr_finetune_lora/checkpoints/0018500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0018500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0018500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0018500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5a51367b1555e342ba3f029fe690b42e39e71e9 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0018500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:277a7748bcdbb7c3554b289c96e8a6984dbdd889116374ac68ffe6ce673b3187 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0019000/README.md b/cxr_finetune_lora/checkpoints/0019000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0019000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0019000/adapter_config.json b/cxr_finetune_lora/checkpoints/0019000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0019000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0019000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0019000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f12bc2a41351120338f2621c3828a6bf800fcf58 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0019000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b53bc3786d2bc6c0649264e6e7ba98c10bfa0362f2830587e6b674063cdccd81 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0019500/README.md b/cxr_finetune_lora/checkpoints/0019500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0019500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0019500/adapter_config.json b/cxr_finetune_lora/checkpoints/0019500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0019500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0019500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0019500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0e7604f7403294fe05a46cf3903a5593d5a7448 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0019500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6f82a0247c4b273c5aea90fc0cec29d4d3e263b24f06173fa9b591067f71f73 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0020000/README.md b/cxr_finetune_lora/checkpoints/0020000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0020000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0020000/adapter_config.json b/cxr_finetune_lora/checkpoints/0020000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0020000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0020000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0020000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76063c728bd8fbad8609d392178d59cb3e8ab545 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0020000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57f980095ebed912906438ca1016d45a80bc37483780060fccf09216e3d72cfb +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0020500/README.md b/cxr_finetune_lora/checkpoints/0020500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0020500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0020500/adapter_config.json b/cxr_finetune_lora/checkpoints/0020500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0020500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0020500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0020500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d7d2875a5e6eed8ccea53581f82c471a127a9a0 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0020500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a71d8f3ac4d05446e310d571000549e117cd28ca26d3b5b9b77c585f015cd6 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0021000/README.md b/cxr_finetune_lora/checkpoints/0021000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0021000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0021000/adapter_config.json b/cxr_finetune_lora/checkpoints/0021000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0021000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0021000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0021000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01f19711f9d74094d3e26e7f5c900b368129b16a --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0021000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aec40194c7c96fa5d08d44d1efb408c90676fc15473fb38efe736a90e3ddb14 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0021500/README.md b/cxr_finetune_lora/checkpoints/0021500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0021500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0021500/adapter_config.json b/cxr_finetune_lora/checkpoints/0021500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0021500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0021500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0021500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3db232be5014a2c9dc3081142053f8bddf2909ee --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0021500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:807768f78fa4d3110d84dbddadf41379e58ea01c3555fcba88f9e2881b4aac61 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0022000/README.md b/cxr_finetune_lora/checkpoints/0022000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0022000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0022000/adapter_config.json b/cxr_finetune_lora/checkpoints/0022000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0022000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0022000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0022000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e186929e13f633123737846927670ab9e10384e --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0022000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0ae8b01a1c396e9abb31e3fb26e4f39db558a8c482e627cce2867fae9835656 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0022500/README.md b/cxr_finetune_lora/checkpoints/0022500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0022500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0022500/adapter_config.json b/cxr_finetune_lora/checkpoints/0022500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0022500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0022500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0022500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6f3d063b06a0cf52b35b9f86b3e09c58974d27c --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0022500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:302c1b5b6d6b38b33db947acaf293cd2328d89b0c39320c6743b4cdf178f71bb +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0023000/README.md b/cxr_finetune_lora/checkpoints/0023000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0023000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0023000/adapter_config.json b/cxr_finetune_lora/checkpoints/0023000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0023000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0023000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0023000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..880a7558ac1f5c425bb5d6ab7f8d384bfe3010af --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0023000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8f613ae11bcd7ec4b50b4db84a32937ded907e932448b476b1559d78dcfdff4 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0023500/README.md b/cxr_finetune_lora/checkpoints/0023500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0023500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0023500/adapter_config.json b/cxr_finetune_lora/checkpoints/0023500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0023500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0023500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0023500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4798d6f3afe25922dcfe662709a401a23ed7e57 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0023500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca0dd6963f1d3ec6dec1dcd16c8f8b635c50799ed1470beea97586051a768489 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0024000/README.md b/cxr_finetune_lora/checkpoints/0024000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0024000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0024000/adapter_config.json b/cxr_finetune_lora/checkpoints/0024000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0024000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0024000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0024000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22a99a84320d8876965675f2c6a7bb9b0774351f --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0024000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8158a3fc9e6ea4ff6f73606814c61a316a1aff5aabc022c18ce7cb4730cd809c +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0024500/README.md b/cxr_finetune_lora/checkpoints/0024500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0024500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0024500/adapter_config.json b/cxr_finetune_lora/checkpoints/0024500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0024500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0024500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0024500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f54861fa931a273b0a42f79588245cdf33c1e75e --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0024500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13223c9f43af0f56fd6faec1868d0aeae7cd3794c89e1377104285f5760bdfa2 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0025000/README.md b/cxr_finetune_lora/checkpoints/0025000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0025000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0025000/adapter_config.json b/cxr_finetune_lora/checkpoints/0025000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0025000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0025000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0025000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ad3da5bcd5c1d26563bd3d96a6b645887eca447 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0025000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a95936474ed2550450a8c3503b564065fc68e8dce37fcd4810c361370ec94d6e +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0025500/README.md b/cxr_finetune_lora/checkpoints/0025500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0025500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0025500/adapter_config.json b/cxr_finetune_lora/checkpoints/0025500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0025500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0025500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0025500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c529bb4f43a5f79642fcefd8d09d13208f5ee763 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0025500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52aebbc0315681b21e44e44f71bede9d9e3d8a0abfcade00e52ab1d3a2ae3922 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0026000/README.md b/cxr_finetune_lora/checkpoints/0026000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0026000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0026000/adapter_config.json b/cxr_finetune_lora/checkpoints/0026000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0026000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0026000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0026000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..496d32c3576e396ff3ff14e28e902e43a3b603a7 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0026000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fecfb7b978506f05164bff4bb6335781cec87bc880bda4f55193becbcec5f604 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0026500/README.md b/cxr_finetune_lora/checkpoints/0026500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0026500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0026500/adapter_config.json b/cxr_finetune_lora/checkpoints/0026500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0026500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0026500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0026500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0b66dd30e2e2bc90dc434a19b6cd02841f87209 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0026500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f58748f6f83c8e7be8c236ab0584df86d59f24b90e46273cb2e5aa14c9bf6e8 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0027000/README.md b/cxr_finetune_lora/checkpoints/0027000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0027000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0027000/adapter_config.json b/cxr_finetune_lora/checkpoints/0027000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0027000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0027000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0027000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e0ddb4fcd08f5b122b39b4aae391ec4ebbd1524 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0027000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c331f8f06a9fa97cac89b3bc048cc5593eaee28570561c0cec9e5def1663a99 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0027500/README.md b/cxr_finetune_lora/checkpoints/0027500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0027500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0027500/adapter_config.json b/cxr_finetune_lora/checkpoints/0027500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0027500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0027500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0027500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cb96c1509de405b41f59939ac2579096687b53a --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0027500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:891697f7ecc613e02ec07fea8cda88bc8922841e55e5a85df47203daa16d80ed +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0028000/README.md b/cxr_finetune_lora/checkpoints/0028000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0028000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0028000/adapter_config.json b/cxr_finetune_lora/checkpoints/0028000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0028000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0028000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0028000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de59992b0bdcd9f4108dbebe04d9fe66cad5c4b8 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0028000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe5382b664043f3988728239abfb27decc2ab842f0e23b572b06caf4b871d8fb +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0028500/README.md b/cxr_finetune_lora/checkpoints/0028500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0028500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0028500/adapter_config.json b/cxr_finetune_lora/checkpoints/0028500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0028500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0028500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0028500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6fd3354ea7e823c73a832787b07d29990a6b648 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0028500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0ee116b3888c7adc829bf9adfb9ad403db1ebd99632ea66d5394f7a38eab93f +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0029000/README.md b/cxr_finetune_lora/checkpoints/0029000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0029000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0029000/adapter_config.json b/cxr_finetune_lora/checkpoints/0029000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0029000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0029000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0029000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc946f4875bea11e51cc7430a546e784bbf40c23 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0029000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba7e654ece31d5cf4562e73d8f03b016a3dcfbc63be8638087bf3d7c97d32de7 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0029500/README.md b/cxr_finetune_lora/checkpoints/0029500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0029500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0029500/adapter_config.json b/cxr_finetune_lora/checkpoints/0029500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0029500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0029500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0029500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7b23ab7d41684c3f9036e374924e0081fccc12e --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0029500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a17283651bd3bbaa9e1973404a05f6417cee6defa372dec836fad975d86573a5 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0030000/README.md b/cxr_finetune_lora/checkpoints/0030000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0030000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0030000/adapter_config.json b/cxr_finetune_lora/checkpoints/0030000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0030000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0030000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0030000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85c2f83781b530de0211b728ed1a762ae72563d6 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0030000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f90f6657a5a399818da3912db21e586d2233df6ad7fc2e8097998753fb0256d +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0030500/README.md b/cxr_finetune_lora/checkpoints/0030500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0030500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0030500/adapter_config.json b/cxr_finetune_lora/checkpoints/0030500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0030500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0030500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0030500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d234099b5f9232752f6bd310d270e9338ebc5476 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0030500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70c6698badb931bcbbfb7f04e88a3c219c5a0a6727acb82a90d2ba714275a5b2 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0031000/README.md b/cxr_finetune_lora/checkpoints/0031000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0031000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0031000/adapter_config.json b/cxr_finetune_lora/checkpoints/0031000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0031000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0031000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0031000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e5e2918dbd0140747ae64b1e3d84bb32f127a05 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0031000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c380644344bd0b2f3d48ae22a08c27991f68c81f80c715cdbf79dfa067a99289 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0031500/README.md b/cxr_finetune_lora/checkpoints/0031500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0031500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0031500/adapter_config.json b/cxr_finetune_lora/checkpoints/0031500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0031500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0031500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0031500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3c23fb7ed9f0e90a733b7dd676de1b51a323094 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0031500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e812b193e2e309c59a00f39212de5f725aa9c72c857f54c5bf0d1593091feebc +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0032000/README.md b/cxr_finetune_lora/checkpoints/0032000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0032000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0032000/adapter_config.json b/cxr_finetune_lora/checkpoints/0032000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0032000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0032000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0032000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7318a786392f08f24eecb11b4478ad256b97cec5 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0032000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b811d0236674e57d90aa8ee1958febd104a792cccf6c13cd22013a909e3a5d3d +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0032500/README.md b/cxr_finetune_lora/checkpoints/0032500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0032500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0032500/adapter_config.json b/cxr_finetune_lora/checkpoints/0032500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0032500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0032500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0032500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f6c9d63ee564747a950fbbd79dcd680a963175fc --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0032500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd8de7527f06d79766cfb021487a7786a1a4f60695befff071ee8896e1b71be5 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0033000/README.md b/cxr_finetune_lora/checkpoints/0033000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0033000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0033000/adapter_config.json b/cxr_finetune_lora/checkpoints/0033000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0033000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0033000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0033000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26385a5305ac89a951bc2d6489476662e567afdd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0033000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f83e4a62a40c09890d9c09a880c5bf75af98e42ae0be8d2d2133c1f7740725d +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0033500/README.md b/cxr_finetune_lora/checkpoints/0033500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0033500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0033500/adapter_config.json b/cxr_finetune_lora/checkpoints/0033500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0033500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0033500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0033500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cbc7e61bb9bedd6541b19b43cb34ba8e5a484bcd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0033500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b86ee5484c440e953097c4f038a96035a6ce7a234857d3575b64b68c7e56dcdb +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0034000/README.md b/cxr_finetune_lora/checkpoints/0034000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0034000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0034000/adapter_config.json b/cxr_finetune_lora/checkpoints/0034000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0034000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0034000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0034000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ff117c26d8840585247f3c24aa7e076b5f306ec --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0034000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bda70ce6206d84352d0c0a91b077cdafe22706577d8ce3c709533efef10db88 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0034500/README.md b/cxr_finetune_lora/checkpoints/0034500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0034500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0034500/adapter_config.json b/cxr_finetune_lora/checkpoints/0034500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0034500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0034500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0034500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ad493654b55eaaef83004cc437dff98d0fa9d64 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0034500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3dc9d6435230e0c366056c85e88248df0516a90ed1e8c4dbdd129bc7b74c33 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0035000/README.md b/cxr_finetune_lora/checkpoints/0035000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0035000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0035000/adapter_config.json b/cxr_finetune_lora/checkpoints/0035000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0035000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0035000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0035000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..afc831067b5bfe52d3c35353dda3c14271b62c34 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0035000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73907e45f981dfc0c9c75627804539abeac8d43eb9111598476cdd896720beeb +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0035500/README.md b/cxr_finetune_lora/checkpoints/0035500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0035500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0035500/adapter_config.json b/cxr_finetune_lora/checkpoints/0035500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0035500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0035500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0035500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a6789c8c7afb0533d0d1cbcf9edcd20ac7b36bd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0035500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcd4b34e8412d5e9fc62c5ce2201ad78aaf7a5f2f822c2ea7da4dda09da083c0 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0036000/README.md b/cxr_finetune_lora/checkpoints/0036000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0036000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0036000/adapter_config.json b/cxr_finetune_lora/checkpoints/0036000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0036000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0036000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0036000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1ce046d89a667e170816a8f5045df50012a82fd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0036000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4324aee8355d79145a476039fe5169ff890626c12af6776a3fc24a35b5775d54 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0036500/README.md b/cxr_finetune_lora/checkpoints/0036500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0036500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0036500/adapter_config.json b/cxr_finetune_lora/checkpoints/0036500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0036500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0036500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0036500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9bc2c9382759a40b476d583f2077f2df4e3889da --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0036500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30156ab09cfa09fb238e731b7af2e0c0e9c7f2259be5166b9c1d37e162f3f23c +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0037000/README.md b/cxr_finetune_lora/checkpoints/0037000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0037000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0037000/adapter_config.json b/cxr_finetune_lora/checkpoints/0037000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0037000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0037000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0037000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4be0a4bf1123643ab1e3e31c7d0337fab80a9693 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0037000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35c35cebc6f23f03dbf9cff85a6c60b7e73708b561cd457ba9c92fb36a2cf5ae +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0037500/README.md b/cxr_finetune_lora/checkpoints/0037500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0037500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0037500/adapter_config.json b/cxr_finetune_lora/checkpoints/0037500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0037500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0037500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0037500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80eb2aa0c44e5635ef813ea13aa65220debb598b --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0037500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee1343d2fa493eecd73801b6c0ad50411415993e99247ea9b34b79ee325abbd4 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0038000/README.md b/cxr_finetune_lora/checkpoints/0038000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0038000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0038000/adapter_config.json b/cxr_finetune_lora/checkpoints/0038000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0038000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0038000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0038000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08927a6c7813a7e125df066caac672a0f754dda6 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0038000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38c2533ad45181f8a9332941851989832dfc8de0a94e33c9c1ca78e801e2d485 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0038500/README.md b/cxr_finetune_lora/checkpoints/0038500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0038500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0038500/adapter_config.json b/cxr_finetune_lora/checkpoints/0038500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0038500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0038500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0038500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e18e6ba7fb9fed02f7ba3bfb3c35fb2c60ea0b3 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0038500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eef45b371fadea6209dad1a0a84bfb37964b3f4d475f88dc6b33bb6a2167d64c +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0039000/README.md b/cxr_finetune_lora/checkpoints/0039000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0039000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0039000/adapter_config.json b/cxr_finetune_lora/checkpoints/0039000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0039000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0039000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0039000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df2d8b160f326039b3655a365a9c860a0349ba16 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0039000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1237e211590762770143dbb8cf835934e91f7cedaf4007d35e8dded3455c2bfa +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0039500/README.md b/cxr_finetune_lora/checkpoints/0039500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0039500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0039500/adapter_config.json b/cxr_finetune_lora/checkpoints/0039500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0039500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0039500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0039500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66c142ddc3e6b8004f7a1892a5dd20b5270772fd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0039500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e40180d1334ef691e93163fab4ca6729034770ecf690811205401655433a8256 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0040000/README.md b/cxr_finetune_lora/checkpoints/0040000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0040000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0040000/adapter_config.json b/cxr_finetune_lora/checkpoints/0040000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0040000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0040000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0040000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74eb3ae478db7802bf1cb95e2bd3880db62cfad7 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0040000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:464f8c31da032931ca056207f188c0aecf34f8b7e2ba7c6005ca322e4f95a56c +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0040500/README.md b/cxr_finetune_lora/checkpoints/0040500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0040500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0040500/adapter_config.json b/cxr_finetune_lora/checkpoints/0040500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0040500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0040500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0040500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee6d32d714907f99e4355eca30821985ae0c596c --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0040500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:246b0cc351f4a82afcbdd1594068100d01e3b4490511022bc5c937e6d1276495 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0041000/README.md b/cxr_finetune_lora/checkpoints/0041000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0041000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0041000/adapter_config.json b/cxr_finetune_lora/checkpoints/0041000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0041000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0041000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0041000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94ec0b5b5b3975d1b7f2e311601616100ad7b52e --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0041000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d03007bf6ae5fa36cece06be175098460174d79e6ab0404b2ab1e49ed834253 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0041500/README.md b/cxr_finetune_lora/checkpoints/0041500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0041500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0041500/adapter_config.json b/cxr_finetune_lora/checkpoints/0041500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0041500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0041500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0041500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..284e726b7706de729767ce2d9582433d420246ec --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0041500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b20b364b55548e82178e618e0b2404d5e871e1ef176d44f45abedae5c1600339 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0042000/README.md b/cxr_finetune_lora/checkpoints/0042000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0042000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0042000/adapter_config.json b/cxr_finetune_lora/checkpoints/0042000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0042000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0042000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0042000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ba8b2cb99f7ee9b51160819ad764123bf38f041 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0042000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1e746576304d16f345c2ebafd4fbc9e4c88076a5a30806c8eb4f0b5786945a2 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0042500/README.md b/cxr_finetune_lora/checkpoints/0042500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0042500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0042500/adapter_config.json b/cxr_finetune_lora/checkpoints/0042500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0042500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0042500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0042500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3388c15492a933eb2984ee2503c7556762e6921 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0042500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb8b7320b42bea1fc708201aa7fbedb724835dc176d957b745c1b73b89031f7d +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0043000/README.md b/cxr_finetune_lora/checkpoints/0043000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0043000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0043000/adapter_config.json b/cxr_finetune_lora/checkpoints/0043000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0043000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0043000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0043000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5150db90a3d30efad5b03e5d789e6766efaa5182 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0043000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c90b2ff57575e2971684dbdc4b21113e580a94bea1964309c17cef40b71947a +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0043500/README.md b/cxr_finetune_lora/checkpoints/0043500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0043500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0043500/adapter_config.json b/cxr_finetune_lora/checkpoints/0043500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0043500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0043500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0043500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f7fda77f05df409beaf4af1819702375c8c04ea --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0043500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a66a1d59f35b1e1f416f2a485fa78bd08291e44a723071e7c4e73e311aa7dca +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0044000/README.md b/cxr_finetune_lora/checkpoints/0044000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0044000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0044000/adapter_config.json b/cxr_finetune_lora/checkpoints/0044000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0044000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0044000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0044000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c125ac7eddd89930edc5fb088c5a9623314043f7 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0044000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e22dc1b99770874217ced56427053f306811c5bfdb6719a6539c546408a0c1a9 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0044500/README.md b/cxr_finetune_lora/checkpoints/0044500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0044500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0044500/adapter_config.json b/cxr_finetune_lora/checkpoints/0044500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0044500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0044500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0044500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8abf98a66b9bdaf792e84b94a13e6585c17baba8 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0044500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbd1f0c9c74615f3474a2348c250793a6db336cbafbb820c022906143caedc2e +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0045000/README.md b/cxr_finetune_lora/checkpoints/0045000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0045000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0045000/adapter_config.json b/cxr_finetune_lora/checkpoints/0045000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0045000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0045000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0045000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bff31f60cde524a43f832f78a4bc058e5914cc3b --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0045000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bf4fa769e700bc688545ddd4f589b5399fca7f4910affac3054a48305bbb60c +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0045500/README.md b/cxr_finetune_lora/checkpoints/0045500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0045500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0045500/adapter_config.json b/cxr_finetune_lora/checkpoints/0045500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0045500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0045500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0045500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dfccad61880b1aa817978e915391b956a5c48649 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0045500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f212024dcf2be4d6fb8d31fddad7af244e448dc9ea2fbbd95e52a4ab225dd803 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0046000/README.md b/cxr_finetune_lora/checkpoints/0046000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0046000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0046000/adapter_config.json b/cxr_finetune_lora/checkpoints/0046000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0046000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0046000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0046000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d7019cd9680cc280da2193c03ffacc4dd3b6b8a --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0046000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5920959ef2f496bcef615b610e746f435dd99c22a0464fca1b82069e7a02c9bd +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0046500/README.md b/cxr_finetune_lora/checkpoints/0046500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0046500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0046500/adapter_config.json b/cxr_finetune_lora/checkpoints/0046500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0046500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0046500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0046500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b5df3997c6da0e09dab8182cad1cd5b413c57b6 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0046500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c10eca953454689ae124ad3cdd210429542c61b8600fd8388b0dde174910ecb7 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0047000/README.md b/cxr_finetune_lora/checkpoints/0047000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0047000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0047000/adapter_config.json b/cxr_finetune_lora/checkpoints/0047000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0047000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0047000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0047000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4ae3583b5854b32e267fb0994830f04585c53ae --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0047000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fdf752b0e40dae02e01e61fa3f0c50de68840731ce0e14a1389587d6164449f +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0047500/README.md b/cxr_finetune_lora/checkpoints/0047500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0047500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0047500/adapter_config.json b/cxr_finetune_lora/checkpoints/0047500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0047500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0047500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0047500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d54990ddabcd028af9e2c5e8de85bb791df3ef77 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0047500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:494964ff3d04fafa88d927b19d2e1551cd8efb614c6dbf0bb45e5f8967cae1a5 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0048000/README.md b/cxr_finetune_lora/checkpoints/0048000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0048000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0048000/adapter_config.json b/cxr_finetune_lora/checkpoints/0048000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0048000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0048000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0048000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a694f6d9d9851c2c6e39e82c7b4a5d128dd4b15 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0048000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd935347c64656f4df55ce266e0b87fedd5f8c13af20f18983ca879365da7eb1 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0048500/README.md b/cxr_finetune_lora/checkpoints/0048500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0048500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0048500/adapter_config.json b/cxr_finetune_lora/checkpoints/0048500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0048500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0048500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0048500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d73c8b8d3381738802bbf421930d1fdcfa5c2fbb --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0048500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba86f1f87ceaa8a9c6e77fe1011014e71863ad6a38ebcf790ca14b64539a4b8a +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0049000/README.md b/cxr_finetune_lora/checkpoints/0049000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0049000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0049000/adapter_config.json b/cxr_finetune_lora/checkpoints/0049000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0049000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0049000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0049000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1aba2fe007f5af0dd69c823fa43528826d6b2b63 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0049000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cee38e4c8eb5c1111c383fa15017d4bfe6107d5bffa7da8c8f4daa90a114616a +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0049500/README.md b/cxr_finetune_lora/checkpoints/0049500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0049500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0049500/adapter_config.json b/cxr_finetune_lora/checkpoints/0049500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0049500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0049500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0049500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..187101ceae5a930ab179284315ecf06d1fad487a --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0049500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64e8616c9b4215b6165f1a8fdc3b1237b093949f1a3d821956207f8a39675d95 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0050000/README.md b/cxr_finetune_lora/checkpoints/0050000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0050000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0050000/adapter_config.json b/cxr_finetune_lora/checkpoints/0050000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0050000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0050000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0050000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2cea8635fe9d2a8f39ab039cdd4221d66b32d7ba --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0050000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:994f1bd59ffe5818381d618fea77372d289aa72c3e29f5772b455aa6986bca65 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0050500/README.md b/cxr_finetune_lora/checkpoints/0050500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0050500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0050500/adapter_config.json b/cxr_finetune_lora/checkpoints/0050500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0050500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0050500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0050500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..135e45384a664ba02a32f1629d84691ecbfb02e5 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0050500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57fee5739ea3664f0cf85ba9a3c538795ad3d6837906e3cafc378f15b9baa2fb +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0051000/README.md b/cxr_finetune_lora/checkpoints/0051000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0051000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0051000/adapter_config.json b/cxr_finetune_lora/checkpoints/0051000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0051000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0051000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0051000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95453ece03586b0b992571746fadadd45e68c947 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0051000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d784e8eeefcbf139d3c76c13b476596511ffb568ce05e33bc8e12469dbd7af39 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0051500/README.md b/cxr_finetune_lora/checkpoints/0051500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0051500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0051500/adapter_config.json b/cxr_finetune_lora/checkpoints/0051500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0051500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0051500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0051500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b1abe2763210a20a40b6ac6cab7637af5d25cf1 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0051500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49b7bf35530f14e1c07ae78e1a01f80fe0de581d8ea3b8c91385f7327d0843e3 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0052000/README.md b/cxr_finetune_lora/checkpoints/0052000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0052000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0052000/adapter_config.json b/cxr_finetune_lora/checkpoints/0052000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0052000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0052000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0052000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac29db087d5375d1e06d7cf99da70db0fa3603f4 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0052000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4376fc294f0f25d98ae1be5612fb5a30cf935716eea9c09c6a64684dac27658 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0052500/README.md b/cxr_finetune_lora/checkpoints/0052500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0052500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0052500/adapter_config.json b/cxr_finetune_lora/checkpoints/0052500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0052500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0052500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0052500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61d852b233f8a88dffa49bb8ba6d7cfa2ad03cb9 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0052500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acf0190cca88694d0d66301c31080a66492d998f3ccc827d60657ba2829b824e +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0053000/README.md b/cxr_finetune_lora/checkpoints/0053000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0053000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0053000/adapter_config.json b/cxr_finetune_lora/checkpoints/0053000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0053000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0053000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0053000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..492b09e62964219dadb3892da224c67cda1bab42 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0053000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac38ff715b533b6e2b7873403006b7a799a9bea6dffecde50b6785b573043538 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0053500/README.md b/cxr_finetune_lora/checkpoints/0053500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0053500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0053500/adapter_config.json b/cxr_finetune_lora/checkpoints/0053500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0053500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0053500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0053500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba305984f48b3b26bd452adfc836345d268dd491 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0053500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:224eff3b632986fb9f405e32838f3ab9562d6057c127dfff06d8a882073294e1 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0054000/README.md b/cxr_finetune_lora/checkpoints/0054000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0054000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0054000/adapter_config.json b/cxr_finetune_lora/checkpoints/0054000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0054000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0054000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0054000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7789ebbb574e5a39fd69e64db91c73a7c13dad7 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0054000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0956e1d314e666b03dd097a631dc3b8d80c251b17a5afe0271ef960479cf0419 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0054500/README.md b/cxr_finetune_lora/checkpoints/0054500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0054500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0054500/adapter_config.json b/cxr_finetune_lora/checkpoints/0054500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0054500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0054500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0054500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca69f8463fc9461e12928f5f8fd76cefb935b300 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0054500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d6db4d8b934d896af09acaeee10e6ea8b81839cd333a68fc68833758356c68d +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0055000/README.md b/cxr_finetune_lora/checkpoints/0055000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0055000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0055000/adapter_config.json b/cxr_finetune_lora/checkpoints/0055000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0055000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0055000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0055000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..176460c4302630351255e6ef064b7326af5dbda4 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0055000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3c0509477a77c12511dc1ef3f2ba85bdec43cc2548ae3a3720d7aa4060234f4 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0055500/README.md b/cxr_finetune_lora/checkpoints/0055500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0055500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0055500/adapter_config.json b/cxr_finetune_lora/checkpoints/0055500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0055500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0055500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0055500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e7378c5f3812f88e488cfad472670433db0f666 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0055500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6bd7d830381fbffa2e3fd362dbb3e2dbeb02de01e250e5cf9bf9f62d6bcca9d +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0056000/README.md b/cxr_finetune_lora/checkpoints/0056000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0056000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0056000/adapter_config.json b/cxr_finetune_lora/checkpoints/0056000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0056000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0056000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0056000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27b9b5461e50b0d3cadb04077194fd623ffe9f08 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0056000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fe62015c6120726ebf4dd691f03f6937e5d17a8ae863eecb2f14d56dfe59b17 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0056500/README.md b/cxr_finetune_lora/checkpoints/0056500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0056500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0056500/adapter_config.json b/cxr_finetune_lora/checkpoints/0056500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0056500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0056500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0056500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f0b747b1ca1946296b7822dd530686f59649d58b --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0056500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27ac626a098aeab1ee1754a9fed5855492a173abf44efc6105bd5253332543f +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0057000/README.md b/cxr_finetune_lora/checkpoints/0057000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0057000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0057000/adapter_config.json b/cxr_finetune_lora/checkpoints/0057000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0057000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0057000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0057000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a54a19c91141d4ef1184d737919d052e745162d --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0057000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f23bb8af94495fe281f5dba667fe306d686446f495a7354f3e0d621b19e850d +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0057500/README.md b/cxr_finetune_lora/checkpoints/0057500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0057500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0057500/adapter_config.json b/cxr_finetune_lora/checkpoints/0057500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0057500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0057500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0057500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32d9bedadd6fb9e1996a39840f14182c15507155 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0057500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e9eeb5ab7c711bb99cbb5b45cb1a566c7efeb658ae912a3e92a65bd3383441 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0058000/README.md b/cxr_finetune_lora/checkpoints/0058000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0058000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0058000/adapter_config.json b/cxr_finetune_lora/checkpoints/0058000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0058000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0058000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0058000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a13e708a9616a5d5db6f65cc3a45a00204e64c77 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0058000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11ef6e157679d12f24669a3a226bb5f556f2a4085c83353688d8b51f1be29fa2 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0058500/README.md b/cxr_finetune_lora/checkpoints/0058500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0058500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0058500/adapter_config.json b/cxr_finetune_lora/checkpoints/0058500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0058500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0058500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0058500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81c63e7355891fff1d221e871d9248e7b8943b4c --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0058500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e0da601958dc3debc72470332d519e1698af4148a5b94c5eeb07eba490b23fb +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0059000/README.md b/cxr_finetune_lora/checkpoints/0059000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0059000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0059000/adapter_config.json b/cxr_finetune_lora/checkpoints/0059000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0059000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0059000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0059000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fba176d2bd5ccd0ca297c9a38a85e044ecbfc40f --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0059000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a0b7c244910d81aa7c358d37f08035024fd7a6faadfacc87068bbfda2dc1217 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0059500/README.md b/cxr_finetune_lora/checkpoints/0059500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0059500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0059500/adapter_config.json b/cxr_finetune_lora/checkpoints/0059500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0059500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0059500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0059500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4293f35e5572561a629ed7e9a6fe86efb07621b --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0059500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9469676042a28df251a567867dba17060b0ba33e3d12ce0d4dd00ff2a2ebe43 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0060000/README.md b/cxr_finetune_lora/checkpoints/0060000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0060000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0060000/adapter_config.json b/cxr_finetune_lora/checkpoints/0060000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0060000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0060000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0060000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04da7bf2e7aed8e1ef0acfa008533f0354753c4e --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0060000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6df0fd12f9bb52d5137716cf70cd14877ede87923ab5abb1aca25ca6d64e2239 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0060500/README.md b/cxr_finetune_lora/checkpoints/0060500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0060500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0060500/adapter_config.json b/cxr_finetune_lora/checkpoints/0060500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0060500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0060500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0060500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77e5da00dee8f64c60ab6c4ee1c47f62c52f4df8 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0060500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b299737b02cfce23964a4be91bcc89555f44dcd6263260e704cdd3dc9a538c3 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0061000/README.md b/cxr_finetune_lora/checkpoints/0061000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0061000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0061000/adapter_config.json b/cxr_finetune_lora/checkpoints/0061000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0061000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0061000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0061000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e47724bad255d7e3e4fb405c7efb3929c9d95b8 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0061000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bcbab697c55ab9ece9ad9ce123a6c812667769982677013e9410e6c1aa024d8 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0061500/README.md b/cxr_finetune_lora/checkpoints/0061500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0061500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0061500/adapter_config.json b/cxr_finetune_lora/checkpoints/0061500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0061500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0061500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0061500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2478d91d58a739a5894d936622cdda496e022d3e --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0061500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b8f9ef19db824e3944bf955b6000eea8e98365b7147c3ed7cbeebcea7759263 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0062000/README.md b/cxr_finetune_lora/checkpoints/0062000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0062000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0062000/adapter_config.json b/cxr_finetune_lora/checkpoints/0062000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0062000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0062000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0062000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53861605eeb72654c675091b9927c757b85c8bfc --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0062000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd7b46322252e8495ce2698b4a592c407acff3c6580238114dbf1c858183d8f +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0062500/README.md b/cxr_finetune_lora/checkpoints/0062500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0062500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0062500/adapter_config.json b/cxr_finetune_lora/checkpoints/0062500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0062500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0062500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0062500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a334fd2f0f5a8d2a5d7077d5d4a712817790dd70 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0062500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94c931ba09b0d2eecc082c38f3c4096e7a3a906a18588554ba9635a8fd1138a5 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0063000/README.md b/cxr_finetune_lora/checkpoints/0063000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0063000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0063000/adapter_config.json b/cxr_finetune_lora/checkpoints/0063000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0063000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0063000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0063000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2054d929f7bcb2dda2175eeb4f6c85d015f8846 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0063000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bda540c0a883887798feb0332ed1e94c5a396dd18e8053b566ead91ece6f3a8 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0063500/README.md b/cxr_finetune_lora/checkpoints/0063500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0063500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0063500/adapter_config.json b/cxr_finetune_lora/checkpoints/0063500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0063500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0063500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0063500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42ff4467ca98381f8b5994eae6d6f7323596953b --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0063500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315f1d1ec3188b81f0267bd356dbf0216a15f1dc68d817b97cc2df41e753291e +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0064000/README.md b/cxr_finetune_lora/checkpoints/0064000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0064000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0064000/adapter_config.json b/cxr_finetune_lora/checkpoints/0064000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0064000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0064000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0064000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6323020d2a478175e7563177bb2d7045ee97f076 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0064000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5facc15ecb472175483c9ffd366355bc9f32624bfb98949369dd5ce12ecc25b5 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0064500/README.md b/cxr_finetune_lora/checkpoints/0064500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0064500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0064500/adapter_config.json b/cxr_finetune_lora/checkpoints/0064500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0064500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0064500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0064500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c418521ab120d3b387bb1fb3cbe51f6e256c043 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0064500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:817787f7c2421a266b0edb3f5283f0257fb4daa061c94aa1397bfdd5c24f1c04 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0065000/README.md b/cxr_finetune_lora/checkpoints/0065000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0065000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0065000/adapter_config.json b/cxr_finetune_lora/checkpoints/0065000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0065000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0065000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0065000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fdf91f913b0b1a2420bf7f4d517117b74f92e505 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0065000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:596b4717ce2de9ce4aa92787abb90655e24f962517c9c364d3fae10acd6900b4 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0065500/README.md b/cxr_finetune_lora/checkpoints/0065500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0065500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0065500/adapter_config.json b/cxr_finetune_lora/checkpoints/0065500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0065500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0065500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0065500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe39b1aa76e8ab4c2897812b6c0ee0e281679aff --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0065500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5cf00b9b96474c84476a4a6c895d0556c80f9b296a7427a924dea0c0eae4edf +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0066000/README.md b/cxr_finetune_lora/checkpoints/0066000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0066000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0066000/adapter_config.json b/cxr_finetune_lora/checkpoints/0066000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0066000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0066000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0066000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c07416645000a1d04661e20158ce231469a51873 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0066000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b00eca7892e9b1a54930777309ad7a5c1f9209320134fac16ab8b0566267afab +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0066500/README.md b/cxr_finetune_lora/checkpoints/0066500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0066500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0066500/adapter_config.json b/cxr_finetune_lora/checkpoints/0066500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0066500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0066500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0066500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd9bf2fee4a4809d089892568a1f859cd9d90519 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0066500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:340277127d1cd3e204f17907c94b21221a374a5943b884e59abcc9fd0ece6e97 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0067000/README.md b/cxr_finetune_lora/checkpoints/0067000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0067000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0067000/adapter_config.json b/cxr_finetune_lora/checkpoints/0067000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0067000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0067000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0067000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e025c912b7e98bbcc660f3b77fb5fe986e75d4a1 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0067000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2d36a32a51a41cbb422f5a706d836b6d079861ea49d325c8ede833f1eb33273 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0067500/README.md b/cxr_finetune_lora/checkpoints/0067500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0067500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0067500/adapter_config.json b/cxr_finetune_lora/checkpoints/0067500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0067500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0067500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0067500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bafce482643ddad1589310a024ad1f1725ca205d --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0067500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f65b9a546caf74a3e855ca8f38bf37b9c41fdffb52a0eedd76b2fac3156d230 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0068000/README.md b/cxr_finetune_lora/checkpoints/0068000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0068000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0068000/adapter_config.json b/cxr_finetune_lora/checkpoints/0068000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0068000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0068000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0068000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f04dc9c802cbcb22b1911906b0f03a798a80af1 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0068000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bffaab561f270b6501657305d9fff43de5313158316e052d72f45abc9fdb048 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0068500/README.md b/cxr_finetune_lora/checkpoints/0068500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0068500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0068500/adapter_config.json b/cxr_finetune_lora/checkpoints/0068500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0068500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0068500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0068500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48ab1ac3a0332d2fc31b058769d1fd81d08d475c --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0068500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5817932364231494dd559879fb01c36e69e01926f329ae2b5546af3f29891c1f +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0069000/README.md b/cxr_finetune_lora/checkpoints/0069000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0069000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0069000/adapter_config.json b/cxr_finetune_lora/checkpoints/0069000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0069000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0069000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0069000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66fe330e68ee2b3d502d22454cd9f62593dbc0b7 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0069000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be7e51f1077a659547f8bc955a3dc63113d7293ecb0a05c6bb3989b1624a19a1 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0069500/README.md b/cxr_finetune_lora/checkpoints/0069500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0069500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0069500/adapter_config.json b/cxr_finetune_lora/checkpoints/0069500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0069500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0069500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0069500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f90d474d03c01ea3a280ca0da87cbbb7c53d3003 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0069500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f08a386a420f4cc9e582b8dc02646df629851591421e3f4558b32d46ce2e200 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0070000/README.md b/cxr_finetune_lora/checkpoints/0070000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0070000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0070000/adapter_config.json b/cxr_finetune_lora/checkpoints/0070000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0070000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0070000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0070000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..628862b86edf062a872adb01383a7b42eb9ebfdf --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0070000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e91e5f7b5c7d552de6c19d232a918fef21178417a1195debeda5aa6443b3eef2 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0070500/README.md b/cxr_finetune_lora/checkpoints/0070500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0070500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0070500/adapter_config.json b/cxr_finetune_lora/checkpoints/0070500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0070500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0070500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0070500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5e80d2badc03ffddd6aaae15e66e89489b0ca35 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0070500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ef0a51c114920772aa35d4a59ed88d19324146772d96cfce948edf950665946 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0071000/README.md b/cxr_finetune_lora/checkpoints/0071000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0071000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0071000/adapter_config.json b/cxr_finetune_lora/checkpoints/0071000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0071000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0071000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0071000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..414adbb0ea2eb27cfa8662fd0bda8e08a1e490e7 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0071000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c31751ea7980e80a1cdf9e790348dbbec25b13e9b273d78b30e14a82927010a +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0071500/README.md b/cxr_finetune_lora/checkpoints/0071500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0071500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0071500/adapter_config.json b/cxr_finetune_lora/checkpoints/0071500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0071500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0071500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0071500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a54e5c21a6886a57ab7630190a08c6721f34209e --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0071500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12492189c948dc5f4594a4308cb3a02ae10c2182019e98ac30e6a6028076e615 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0072000/README.md b/cxr_finetune_lora/checkpoints/0072000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0072000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0072000/adapter_config.json b/cxr_finetune_lora/checkpoints/0072000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0072000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0072000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0072000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5341e43617bf2c6d9096abb21b1e38ca6f80717a --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0072000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5179e57438d45be406aa94cbabef15402ed23c13201ed8ed810c14440bbcf51a +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0072500/README.md b/cxr_finetune_lora/checkpoints/0072500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0072500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0072500/adapter_config.json b/cxr_finetune_lora/checkpoints/0072500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0072500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0072500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0072500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bfa3b03305a00153ad3108080130210b0d6abe3d --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0072500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5d9bc3ada5a622a9458409e53fb006828d0d55f87ba77b979e0ff6687b2f7da +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0073000/README.md b/cxr_finetune_lora/checkpoints/0073000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0073000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0073000/adapter_config.json b/cxr_finetune_lora/checkpoints/0073000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0073000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0073000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0073000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..211e87704cf09bb8a5b5d3ef9bd0aa79376fa70f --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0073000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca5d459a145af517ead22d781f1e2316230cc71e2e9711f38b179a4c1657d348 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0073500/README.md b/cxr_finetune_lora/checkpoints/0073500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0073500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0073500/adapter_config.json b/cxr_finetune_lora/checkpoints/0073500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0073500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0073500/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0073500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f0a77cde526889d9802fdc46e2712df6bfe73d2 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0073500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bc1487ded281fbe5f53319dd1485df216e32719582a70db5b89af0bbd331477 +size 9454048 diff --git a/cxr_finetune_lora/checkpoints/0074000/README.md b/cxr_finetune_lora/checkpoints/0074000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0074000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0074000/adapter_config.json b/cxr_finetune_lora/checkpoints/0074000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee5b418c2618df40877ea6748c5282d72fb2f55 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0074000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "qkv_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora/checkpoints/0074000/adapter_model.safetensors b/cxr_finetune_lora/checkpoints/0074000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f562466bb6df9562f06490360f3a4cb733f5aa1 --- /dev/null +++ b/cxr_finetune_lora/checkpoints/0074000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12388feec351e24b13d3b11567fc291a450427caca92a70e0ea820fee8386fd1 +size 9454048 diff --git a/cxr_finetune_lora/log.txt b/cxr_finetune_lora/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7ac46556d9bbe047164642c3f7d6d2a1574c6b1f --- /dev/null +++ b/cxr_finetune_lora/log.txt @@ -0,0 +1,74391 @@ +[2026-02-27 05:06:00] Experiment directory created at ./results/cxr_finetune_lora +[2026-02-27 05:06:00] Downloaded model to /root/.cache/huggingface/hub/models--Shitao--OmniGen-v1/snapshots/58e249c7c7634423c0ba41c34a774af79aa87889 +[2026-02-27 05:06:00] Downloaded model to /root/.cache/huggingface/hub/models--Shitao--OmniGen-v1/snapshots/58e249c7c7634423c0ba41c34a774af79aa87889 +[2026-02-27 05:06:58] Dataset contains 1,308,627 +[2026-02-27 05:06:59] Training for 100 epochs... +[2026-02-27 05:06:59] Beginning epoch 0... +[2026-02-27 05:07:14] (step=0000001) Train Loss: 0.5334, Train Steps/Sec: 0.07, Epoch: 0.00019565642731363725, LR: 0.0003 +[2026-02-27 05:07:22] (step=0000002) Train Loss: 0.5303, Train Steps/Sec: 0.13, Epoch: 0.0003913128546272745, LR: 0.0003 +[2026-02-27 05:07:30] (step=0000003) Train Loss: 0.5237, Train Steps/Sec: 0.13, Epoch: 0.0005869692819409117, LR: 0.0003 +[2026-02-27 05:07:38] (step=0000004) Train Loss: 0.5282, Train Steps/Sec: 0.13, Epoch: 0.000782625709254549, LR: 0.0003 +[2026-02-27 05:07:46] (step=0000005) Train Loss: 0.5148, Train Steps/Sec: 0.13, Epoch: 0.0009782821365681863, LR: 0.0003 +[2026-02-27 05:07:54] (step=0000006) Train Loss: 0.5125, Train Steps/Sec: 0.13, Epoch: 0.0011739385638818234, LR: 0.0003 +[2026-02-27 05:08:01] (step=0000007) Train Loss: 0.5282, Train Steps/Sec: 0.13, Epoch: 0.0013695949911954608, LR: 0.0003 +[2026-02-27 05:08:09] (step=0000008) Train Loss: 0.5213, Train Steps/Sec: 0.13, Epoch: 0.001565251418509098, LR: 0.0003 +[2026-02-27 05:08:17] (step=0000009) Train Loss: 0.5158, Train Steps/Sec: 0.13, Epoch: 0.0017609078458227353, LR: 0.0003 +[2026-02-27 05:08:25] (step=0000010) Train Loss: 0.5169, Train Steps/Sec: 0.13, Epoch: 0.0019565642731363725, LR: 0.0003 +[2026-02-27 05:08:33] (step=0000011) Train Loss: 0.5213, Train Steps/Sec: 0.13, Epoch: 0.00215222070045001, LR: 0.0003 +[2026-02-27 05:08:41] (step=0000012) Train Loss: 0.5152, Train Steps/Sec: 0.13, Epoch: 0.002347877127763647, LR: 0.0003 +[2026-02-27 05:08:48] (step=0000013) Train Loss: 0.5200, Train Steps/Sec: 0.13, Epoch: 0.002543533555077284, LR: 0.0003 +[2026-02-27 05:08:56] (step=0000014) Train Loss: 0.5203, Train Steps/Sec: 0.13, Epoch: 0.0027391899823909216, LR: 0.0003 +[2026-02-27 05:09:04] (step=0000015) Train Loss: 0.5201, Train Steps/Sec: 0.13, Epoch: 0.002934846409704559, LR: 0.0003 +[2026-02-27 05:09:12] (step=0000016) Train Loss: 0.5234, Train Steps/Sec: 0.13, Epoch: 0.003130502837018196, LR: 0.0003 +[2026-02-27 05:09:20] (step=0000017) Train Loss: 0.5117, Train Steps/Sec: 0.13, Epoch: 0.0033261592643318333, LR: 0.0003 +[2026-02-27 05:09:28] (step=0000018) Train Loss: 0.5033, Train Steps/Sec: 0.13, Epoch: 0.0035218156916454707, LR: 0.0003 +[2026-02-27 05:09:35] (step=0000019) Train Loss: 0.5208, Train Steps/Sec: 0.13, Epoch: 0.0037174721189591076, LR: 0.0003 +[2026-02-27 05:09:43] (step=0000020) Train Loss: 0.5059, Train Steps/Sec: 0.13, Epoch: 0.003913128546272745, LR: 0.0003 +[2026-02-27 05:09:51] (step=0000021) Train Loss: 0.5269, Train Steps/Sec: 0.13, Epoch: 0.004108784973586382, LR: 0.0003 +[2026-02-27 05:09:59] (step=0000022) Train Loss: 0.5118, Train Steps/Sec: 0.13, Epoch: 0.00430444140090002, LR: 0.0003 +[2026-02-27 05:10:07] (step=0000023) Train Loss: 0.5089, Train Steps/Sec: 0.13, Epoch: 0.004500097828213657, LR: 0.0003 +[2026-02-27 05:10:15] (step=0000024) Train Loss: 0.5192, Train Steps/Sec: 0.13, Epoch: 0.004695754255527294, LR: 0.0003 +[2026-02-27 05:10:22] (step=0000025) Train Loss: 0.5141, Train Steps/Sec: 0.13, Epoch: 0.004891410682840931, LR: 0.0003 +[2026-02-27 05:10:30] (step=0000026) Train Loss: 0.5299, Train Steps/Sec: 0.13, Epoch: 0.005087067110154568, LR: 0.0003 +[2026-02-27 05:10:38] (step=0000027) Train Loss: 0.5151, Train Steps/Sec: 0.13, Epoch: 0.005282723537468206, LR: 0.0003 +[2026-02-27 05:10:46] (step=0000028) Train Loss: 0.4996, Train Steps/Sec: 0.13, Epoch: 0.005478379964781843, LR: 0.0003 +[2026-02-27 05:10:54] (step=0000029) Train Loss: 0.5208, Train Steps/Sec: 0.13, Epoch: 0.005674036392095481, LR: 0.0003 +[2026-02-27 05:11:02] (step=0000030) Train Loss: 0.5108, Train Steps/Sec: 0.13, Epoch: 0.005869692819409118, LR: 0.0003 +[2026-02-27 05:11:10] (step=0000031) Train Loss: 0.5344, Train Steps/Sec: 0.13, Epoch: 0.0060653492467227545, LR: 0.0003 +[2026-02-27 05:11:17] (step=0000032) Train Loss: 0.5134, Train Steps/Sec: 0.13, Epoch: 0.006261005674036392, LR: 0.0003 +[2026-02-27 05:11:25] (step=0000033) Train Loss: 0.5123, Train Steps/Sec: 0.13, Epoch: 0.006456662101350029, LR: 0.0003 +[2026-02-27 05:11:33] (step=0000034) Train Loss: 0.4974, Train Steps/Sec: 0.13, Epoch: 0.006652318528663667, LR: 0.0003 +[2026-02-27 05:11:41] (step=0000035) Train Loss: 0.5129, Train Steps/Sec: 0.13, Epoch: 0.006847974955977304, LR: 0.0003 +[2026-02-27 05:11:49] (step=0000036) Train Loss: 0.4947, Train Steps/Sec: 0.13, Epoch: 0.007043631383290941, LR: 0.0003 +[2026-02-27 05:11:57] (step=0000037) Train Loss: 0.5099, Train Steps/Sec: 0.13, Epoch: 0.007239287810604579, LR: 0.0003 +[2026-02-27 05:12:05] (step=0000038) Train Loss: 0.5091, Train Steps/Sec: 0.13, Epoch: 0.007434944237918215, LR: 0.0003 +[2026-02-27 05:12:12] (step=0000039) Train Loss: 0.4989, Train Steps/Sec: 0.13, Epoch: 0.007630600665231853, LR: 0.0003 +[2026-02-27 05:12:20] (step=0000040) Train Loss: 0.4874, Train Steps/Sec: 0.13, Epoch: 0.00782625709254549, LR: 0.0003 +[2026-02-27 05:12:28] (step=0000041) Train Loss: 0.5145, Train Steps/Sec: 0.13, Epoch: 0.008021913519859127, LR: 0.0003 +[2026-02-27 05:12:36] (step=0000042) Train Loss: 0.5154, Train Steps/Sec: 0.13, Epoch: 0.008217569947172765, LR: 0.0003 +[2026-02-27 05:12:44] (step=0000043) Train Loss: 0.5044, Train Steps/Sec: 0.13, Epoch: 0.008413226374486401, LR: 0.0003 +[2026-02-27 05:12:52] (step=0000044) Train Loss: 0.4994, Train Steps/Sec: 0.13, Epoch: 0.00860888280180004, LR: 0.0003 +[2026-02-27 05:13:00] (step=0000045) Train Loss: 0.5246, Train Steps/Sec: 0.13, Epoch: 0.008804539229113676, LR: 0.0003 +[2026-02-27 05:13:08] (step=0000046) Train Loss: 0.5087, Train Steps/Sec: 0.13, Epoch: 0.009000195656427314, LR: 0.0003 +[2026-02-27 05:13:15] (step=0000047) Train Loss: 0.4996, Train Steps/Sec: 0.13, Epoch: 0.00919585208374095, LR: 0.0003 +[2026-02-27 05:13:23] (step=0000048) Train Loss: 0.5217, Train Steps/Sec: 0.13, Epoch: 0.009391508511054587, LR: 0.0003 +[2026-02-27 05:13:31] (step=0000049) Train Loss: 0.5250, Train Steps/Sec: 0.13, Epoch: 0.009587164938368226, LR: 0.0003 +[2026-02-27 05:13:39] (step=0000050) Train Loss: 0.5037, Train Steps/Sec: 0.13, Epoch: 0.009782821365681862, LR: 0.0003 +[2026-02-27 05:13:47] (step=0000051) Train Loss: 0.5018, Train Steps/Sec: 0.13, Epoch: 0.0099784777929955, LR: 0.0003 +[2026-02-27 05:13:55] (step=0000052) Train Loss: 0.5098, Train Steps/Sec: 0.13, Epoch: 0.010174134220309137, LR: 0.0003 +[2026-02-27 05:14:03] (step=0000053) Train Loss: 0.4983, Train Steps/Sec: 0.13, Epoch: 0.010369790647622775, LR: 0.0003 +[2026-02-27 05:14:10] (step=0000054) Train Loss: 0.5009, Train Steps/Sec: 0.13, Epoch: 0.010565447074936412, LR: 0.0003 +[2026-02-27 05:14:18] (step=0000055) Train Loss: 0.4993, Train Steps/Sec: 0.13, Epoch: 0.010761103502250048, LR: 0.0003 +[2026-02-27 05:14:26] (step=0000056) Train Loss: 0.5072, Train Steps/Sec: 0.13, Epoch: 0.010956759929563686, LR: 0.0003 +[2026-02-27 05:14:34] (step=0000057) Train Loss: 0.5144, Train Steps/Sec: 0.13, Epoch: 0.011152416356877323, LR: 0.0003 +[2026-02-27 05:14:42] (step=0000058) Train Loss: 0.5031, Train Steps/Sec: 0.13, Epoch: 0.011348072784190961, LR: 0.0003 +[2026-02-27 05:14:50] (step=0000059) Train Loss: 0.5027, Train Steps/Sec: 0.13, Epoch: 0.011543729211504598, LR: 0.0003 +[2026-02-27 05:14:58] (step=0000060) Train Loss: 0.5065, Train Steps/Sec: 0.13, Epoch: 0.011739385638818236, LR: 0.0003 +[2026-02-27 05:15:05] (step=0000061) Train Loss: 0.4982, Train Steps/Sec: 0.13, Epoch: 0.011935042066131872, LR: 0.0003 +[2026-02-27 05:15:13] (step=0000062) Train Loss: 0.5165, Train Steps/Sec: 0.13, Epoch: 0.012130698493445509, LR: 0.0003 +[2026-02-27 05:15:21] (step=0000063) Train Loss: 0.5183, Train Steps/Sec: 0.13, Epoch: 0.012326354920759147, LR: 0.0003 +[2026-02-27 05:15:29] (step=0000064) Train Loss: 0.5050, Train Steps/Sec: 0.13, Epoch: 0.012522011348072784, LR: 0.0003 +[2026-02-27 05:15:37] (step=0000065) Train Loss: 0.5141, Train Steps/Sec: 0.13, Epoch: 0.012717667775386422, LR: 0.0003 +[2026-02-27 05:15:45] (step=0000066) Train Loss: 0.5038, Train Steps/Sec: 0.13, Epoch: 0.012913324202700058, LR: 0.0003 +[2026-02-27 05:15:52] (step=0000067) Train Loss: 0.5081, Train Steps/Sec: 0.13, Epoch: 0.013108980630013697, LR: 0.0003 +[2026-02-27 05:16:00] (step=0000068) Train Loss: 0.5048, Train Steps/Sec: 0.13, Epoch: 0.013304637057327333, LR: 0.0003 +[2026-02-27 05:16:08] (step=0000069) Train Loss: 0.5067, Train Steps/Sec: 0.13, Epoch: 0.01350029348464097, LR: 0.0003 +[2026-02-27 05:16:16] (step=0000070) Train Loss: 0.4975, Train Steps/Sec: 0.13, Epoch: 0.013695949911954608, LR: 0.0003 +[2026-02-27 05:16:24] (step=0000071) Train Loss: 0.5026, Train Steps/Sec: 0.13, Epoch: 0.013891606339268244, LR: 0.0003 +[2026-02-27 05:16:32] (step=0000072) Train Loss: 0.4949, Train Steps/Sec: 0.13, Epoch: 0.014087262766581883, LR: 0.0003 +[2026-02-27 05:16:40] (step=0000073) Train Loss: 0.5199, Train Steps/Sec: 0.13, Epoch: 0.01428291919389552, LR: 0.0003 +[2026-02-27 05:16:48] (step=0000074) Train Loss: 0.5019, Train Steps/Sec: 0.13, Epoch: 0.014478575621209158, LR: 0.0003 +[2026-02-27 05:16:55] (step=0000075) Train Loss: 0.5021, Train Steps/Sec: 0.13, Epoch: 0.014674232048522794, LR: 0.0003 +[2026-02-27 05:17:03] (step=0000076) Train Loss: 0.5220, Train Steps/Sec: 0.13, Epoch: 0.01486988847583643, LR: 0.0003 +[2026-02-27 05:17:11] (step=0000077) Train Loss: 0.4977, Train Steps/Sec: 0.13, Epoch: 0.015065544903150069, LR: 0.0003 +[2026-02-27 05:17:19] (step=0000078) Train Loss: 0.4990, Train Steps/Sec: 0.13, Epoch: 0.015261201330463705, LR: 0.0003 +[2026-02-27 05:17:27] (step=0000079) Train Loss: 0.5031, Train Steps/Sec: 0.13, Epoch: 0.015456857757777344, LR: 0.0003 +[2026-02-27 05:17:35] (step=0000080) Train Loss: 0.5197, Train Steps/Sec: 0.13, Epoch: 0.01565251418509098, LR: 0.0003 +[2026-02-27 05:17:42] (step=0000081) Train Loss: 0.4992, Train Steps/Sec: 0.13, Epoch: 0.01584817061240462, LR: 0.0003 +[2026-02-27 05:17:50] (step=0000082) Train Loss: 0.5031, Train Steps/Sec: 0.13, Epoch: 0.016043827039718253, LR: 0.0003 +[2026-02-27 05:17:58] (step=0000083) Train Loss: 0.5030, Train Steps/Sec: 0.13, Epoch: 0.01623948346703189, LR: 0.0003 +[2026-02-27 05:18:06] (step=0000084) Train Loss: 0.5030, Train Steps/Sec: 0.13, Epoch: 0.01643513989434553, LR: 0.0003 +[2026-02-27 05:18:14] (step=0000085) Train Loss: 0.4983, Train Steps/Sec: 0.13, Epoch: 0.016630796321659168, LR: 0.0003 +[2026-02-27 05:18:22] (step=0000086) Train Loss: 0.5170, Train Steps/Sec: 0.13, Epoch: 0.016826452748972803, LR: 0.0003 +[2026-02-27 05:18:30] (step=0000087) Train Loss: 0.5019, Train Steps/Sec: 0.13, Epoch: 0.01702210917628644, LR: 0.0003 +[2026-02-27 05:18:37] (step=0000088) Train Loss: 0.4960, Train Steps/Sec: 0.13, Epoch: 0.01721776560360008, LR: 0.0003 +[2026-02-27 05:18:45] (step=0000089) Train Loss: 0.5170, Train Steps/Sec: 0.13, Epoch: 0.017413422030913714, LR: 0.0003 +[2026-02-27 05:18:53] (step=0000090) Train Loss: 0.5137, Train Steps/Sec: 0.13, Epoch: 0.017609078458227352, LR: 0.0003 +[2026-02-27 05:19:01] (step=0000091) Train Loss: 0.5087, Train Steps/Sec: 0.13, Epoch: 0.01780473488554099, LR: 0.0003 +[2026-02-27 05:19:09] (step=0000092) Train Loss: 0.5001, Train Steps/Sec: 0.13, Epoch: 0.01800039131285463, LR: 0.0003 +[2026-02-27 05:19:17] (step=0000093) Train Loss: 0.5206, Train Steps/Sec: 0.13, Epoch: 0.018196047740168263, LR: 0.0003 +[2026-02-27 05:19:25] (step=0000094) Train Loss: 0.5040, Train Steps/Sec: 0.13, Epoch: 0.0183917041674819, LR: 0.0003 +[2026-02-27 05:19:33] (step=0000095) Train Loss: 0.5067, Train Steps/Sec: 0.13, Epoch: 0.01858736059479554, LR: 0.0003 +[2026-02-27 05:19:40] (step=0000096) Train Loss: 0.5129, Train Steps/Sec: 0.13, Epoch: 0.018783017022109175, LR: 0.0003 +[2026-02-27 05:19:48] (step=0000097) Train Loss: 0.4978, Train Steps/Sec: 0.13, Epoch: 0.018978673449422813, LR: 0.0003 +[2026-02-27 05:19:56] (step=0000098) Train Loss: 0.4922, Train Steps/Sec: 0.13, Epoch: 0.01917432987673645, LR: 0.0003 +[2026-02-27 05:20:04] (step=0000099) Train Loss: 0.4975, Train Steps/Sec: 0.13, Epoch: 0.01936998630405009, LR: 0.0003 +[2026-02-27 05:20:12] (step=0000100) Train Loss: 0.5069, Train Steps/Sec: 0.13, Epoch: 0.019565642731363724, LR: 0.0003 +[2026-02-27 05:20:20] (step=0000101) Train Loss: 0.4991, Train Steps/Sec: 0.13, Epoch: 0.019761299158677362, LR: 0.0003 +[2026-02-27 05:20:28] (step=0000102) Train Loss: 0.5106, Train Steps/Sec: 0.13, Epoch: 0.019956955585991, LR: 0.0003 +[2026-02-27 05:20:35] (step=0000103) Train Loss: 0.4983, Train Steps/Sec: 0.13, Epoch: 0.020152612013304635, LR: 0.0003 +[2026-02-27 05:20:43] (step=0000104) Train Loss: 0.4887, Train Steps/Sec: 0.13, Epoch: 0.020348268440618274, LR: 0.0003 +[2026-02-27 05:20:51] (step=0000105) Train Loss: 0.4893, Train Steps/Sec: 0.13, Epoch: 0.020543924867931912, LR: 0.0003 +[2026-02-27 05:20:59] (step=0000106) Train Loss: 0.4864, Train Steps/Sec: 0.13, Epoch: 0.02073958129524555, LR: 0.0003 +[2026-02-27 05:21:07] (step=0000107) Train Loss: 0.4985, Train Steps/Sec: 0.13, Epoch: 0.020935237722559185, LR: 0.0003 +[2026-02-27 05:21:15] (step=0000108) Train Loss: 0.4925, Train Steps/Sec: 0.13, Epoch: 0.021130894149872823, LR: 0.0003 +[2026-02-27 05:21:23] (step=0000109) Train Loss: 0.4970, Train Steps/Sec: 0.13, Epoch: 0.02132655057718646, LR: 0.0003 +[2026-02-27 05:21:30] (step=0000110) Train Loss: 0.5085, Train Steps/Sec: 0.13, Epoch: 0.021522207004500096, LR: 0.0003 +[2026-02-27 05:21:38] (step=0000111) Train Loss: 0.5088, Train Steps/Sec: 0.13, Epoch: 0.021717863431813735, LR: 0.0003 +[2026-02-27 05:21:46] (step=0000112) Train Loss: 0.4950, Train Steps/Sec: 0.13, Epoch: 0.021913519859127373, LR: 0.0003 +[2026-02-27 05:21:54] (step=0000113) Train Loss: 0.4885, Train Steps/Sec: 0.13, Epoch: 0.02210917628644101, LR: 0.0003 +[2026-02-27 05:22:02] (step=0000114) Train Loss: 0.5069, Train Steps/Sec: 0.13, Epoch: 0.022304832713754646, LR: 0.0003 +[2026-02-27 05:22:10] (step=0000115) Train Loss: 0.5103, Train Steps/Sec: 0.13, Epoch: 0.022500489141068284, LR: 0.0003 +[2026-02-27 05:22:18] (step=0000116) Train Loss: 0.4939, Train Steps/Sec: 0.13, Epoch: 0.022696145568381922, LR: 0.0003 +[2026-02-27 05:22:25] (step=0000117) Train Loss: 0.4913, Train Steps/Sec: 0.13, Epoch: 0.022891801995695557, LR: 0.0003 +[2026-02-27 05:22:33] (step=0000118) Train Loss: 0.5048, Train Steps/Sec: 0.13, Epoch: 0.023087458423009195, LR: 0.0003 +[2026-02-27 05:22:41] (step=0000119) Train Loss: 0.5019, Train Steps/Sec: 0.13, Epoch: 0.023283114850322834, LR: 0.0003 +[2026-02-27 05:22:49] (step=0000120) Train Loss: 0.5106, Train Steps/Sec: 0.13, Epoch: 0.023478771277636472, LR: 0.0003 +[2026-02-27 05:22:57] (step=0000121) Train Loss: 0.5022, Train Steps/Sec: 0.13, Epoch: 0.023674427704950107, LR: 0.0003 +[2026-02-27 05:23:05] (step=0000122) Train Loss: 0.4965, Train Steps/Sec: 0.13, Epoch: 0.023870084132263745, LR: 0.0003 +[2026-02-27 05:23:12] (step=0000123) Train Loss: 0.5024, Train Steps/Sec: 0.13, Epoch: 0.024065740559577383, LR: 0.0003 +[2026-02-27 05:23:20] (step=0000124) Train Loss: 0.4984, Train Steps/Sec: 0.13, Epoch: 0.024261396986891018, LR: 0.0003 +[2026-02-27 05:23:28] (step=0000125) Train Loss: 0.4904, Train Steps/Sec: 0.13, Epoch: 0.024457053414204656, LR: 0.0003 +[2026-02-27 05:23:36] (step=0000126) Train Loss: 0.5034, Train Steps/Sec: 0.13, Epoch: 0.024652709841518294, LR: 0.0003 +[2026-02-27 05:23:44] (step=0000127) Train Loss: 0.5151, Train Steps/Sec: 0.13, Epoch: 0.024848366268831933, LR: 0.0003 +[2026-02-27 05:23:52] (step=0000128) Train Loss: 0.5033, Train Steps/Sec: 0.13, Epoch: 0.025044022696145567, LR: 0.0003 +[2026-02-27 05:24:00] (step=0000129) Train Loss: 0.4871, Train Steps/Sec: 0.13, Epoch: 0.025239679123459206, LR: 0.0003 +[2026-02-27 05:24:07] (step=0000130) Train Loss: 0.4998, Train Steps/Sec: 0.13, Epoch: 0.025435335550772844, LR: 0.0003 +[2026-02-27 05:24:15] (step=0000131) Train Loss: 0.4935, Train Steps/Sec: 0.13, Epoch: 0.02563099197808648, LR: 0.0003 +[2026-02-27 05:24:23] (step=0000132) Train Loss: 0.4960, Train Steps/Sec: 0.13, Epoch: 0.025826648405400117, LR: 0.0003 +[2026-02-27 05:24:31] (step=0000133) Train Loss: 0.4957, Train Steps/Sec: 0.13, Epoch: 0.026022304832713755, LR: 0.0003 +[2026-02-27 05:24:39] (step=0000134) Train Loss: 0.4915, Train Steps/Sec: 0.13, Epoch: 0.026217961260027393, LR: 0.0003 +[2026-02-27 05:24:47] (step=0000135) Train Loss: 0.4986, Train Steps/Sec: 0.13, Epoch: 0.026413617687341028, LR: 0.0003 +[2026-02-27 05:24:55] (step=0000136) Train Loss: 0.5036, Train Steps/Sec: 0.13, Epoch: 0.026609274114654666, LR: 0.0003 +[2026-02-27 05:25:02] (step=0000137) Train Loss: 0.5020, Train Steps/Sec: 0.13, Epoch: 0.026804930541968305, LR: 0.0003 +[2026-02-27 05:25:10] (step=0000138) Train Loss: 0.4947, Train Steps/Sec: 0.13, Epoch: 0.02700058696928194, LR: 0.0003 +[2026-02-27 05:25:18] (step=0000139) Train Loss: 0.5035, Train Steps/Sec: 0.13, Epoch: 0.027196243396595578, LR: 0.0003 +[2026-02-27 05:25:26] (step=0000140) Train Loss: 0.4897, Train Steps/Sec: 0.13, Epoch: 0.027391899823909216, LR: 0.0003 +[2026-02-27 05:25:34] (step=0000141) Train Loss: 0.4991, Train Steps/Sec: 0.13, Epoch: 0.027587556251222854, LR: 0.0003 +[2026-02-27 05:25:42] (step=0000142) Train Loss: 0.4918, Train Steps/Sec: 0.13, Epoch: 0.02778321267853649, LR: 0.0003 +[2026-02-27 05:25:50] (step=0000143) Train Loss: 0.4872, Train Steps/Sec: 0.13, Epoch: 0.027978869105850127, LR: 0.0003 +[2026-02-27 05:25:57] (step=0000144) Train Loss: 0.4984, Train Steps/Sec: 0.13, Epoch: 0.028174525533163765, LR: 0.0003 +[2026-02-27 05:26:05] (step=0000145) Train Loss: 0.4953, Train Steps/Sec: 0.13, Epoch: 0.0283701819604774, LR: 0.0003 +[2026-02-27 05:26:13] (step=0000146) Train Loss: 0.4907, Train Steps/Sec: 0.13, Epoch: 0.02856583838779104, LR: 0.0003 +[2026-02-27 05:26:21] (step=0000147) Train Loss: 0.5097, Train Steps/Sec: 0.13, Epoch: 0.028761494815104677, LR: 0.0003 +[2026-02-27 05:26:29] (step=0000148) Train Loss: 0.4950, Train Steps/Sec: 0.13, Epoch: 0.028957151242418315, LR: 0.0003 +[2026-02-27 05:26:37] (step=0000149) Train Loss: 0.4924, Train Steps/Sec: 0.13, Epoch: 0.02915280766973195, LR: 0.0003 +[2026-02-27 05:26:45] (step=0000150) Train Loss: 0.4831, Train Steps/Sec: 0.13, Epoch: 0.029348464097045588, LR: 0.0003 +[2026-02-27 05:26:53] (step=0000151) Train Loss: 0.4992, Train Steps/Sec: 0.13, Epoch: 0.029544120524359226, LR: 0.0003 +[2026-02-27 05:27:01] (step=0000152) Train Loss: 0.4983, Train Steps/Sec: 0.13, Epoch: 0.02973977695167286, LR: 0.0003 +[2026-02-27 05:27:08] (step=0000153) Train Loss: 0.4945, Train Steps/Sec: 0.13, Epoch: 0.0299354333789865, LR: 0.0003 +[2026-02-27 05:27:16] (step=0000154) Train Loss: 0.4972, Train Steps/Sec: 0.13, Epoch: 0.030131089806300138, LR: 0.0003 +[2026-02-27 05:27:24] (step=0000155) Train Loss: 0.4961, Train Steps/Sec: 0.13, Epoch: 0.030326746233613776, LR: 0.0003 +[2026-02-27 05:27:32] (step=0000156) Train Loss: 0.4885, Train Steps/Sec: 0.13, Epoch: 0.03052240266092741, LR: 0.0003 +[2026-02-27 05:27:40] (step=0000157) Train Loss: 0.4993, Train Steps/Sec: 0.13, Epoch: 0.03071805908824105, LR: 0.0003 +[2026-02-27 05:27:48] (step=0000158) Train Loss: 0.4796, Train Steps/Sec: 0.13, Epoch: 0.030913715515554687, LR: 0.0003 +[2026-02-27 05:27:56] (step=0000159) Train Loss: 0.4996, Train Steps/Sec: 0.13, Epoch: 0.031109371942868322, LR: 0.0003 +[2026-02-27 05:28:03] (step=0000160) Train Loss: 0.5067, Train Steps/Sec: 0.13, Epoch: 0.03130502837018196, LR: 0.0003 +[2026-02-27 05:28:11] (step=0000161) Train Loss: 0.4935, Train Steps/Sec: 0.13, Epoch: 0.0315006847974956, LR: 0.0003 +[2026-02-27 05:28:19] (step=0000162) Train Loss: 0.4889, Train Steps/Sec: 0.13, Epoch: 0.03169634122480924, LR: 0.0003 +[2026-02-27 05:28:27] (step=0000163) Train Loss: 0.4875, Train Steps/Sec: 0.13, Epoch: 0.031891997652122875, LR: 0.0003 +[2026-02-27 05:28:35] (step=0000164) Train Loss: 0.5008, Train Steps/Sec: 0.13, Epoch: 0.032087654079436506, LR: 0.0003 +[2026-02-27 05:28:43] (step=0000165) Train Loss: 0.5072, Train Steps/Sec: 0.13, Epoch: 0.032283310506750144, LR: 0.0003 +[2026-02-27 05:28:50] (step=0000166) Train Loss: 0.4961, Train Steps/Sec: 0.13, Epoch: 0.03247896693406378, LR: 0.0003 +[2026-02-27 05:28:58] (step=0000167) Train Loss: 0.4972, Train Steps/Sec: 0.13, Epoch: 0.03267462336137742, LR: 0.0003 +[2026-02-27 05:29:06] (step=0000168) Train Loss: 0.4791, Train Steps/Sec: 0.13, Epoch: 0.03287027978869106, LR: 0.0003 +[2026-02-27 05:29:14] (step=0000169) Train Loss: 0.4915, Train Steps/Sec: 0.13, Epoch: 0.0330659362160047, LR: 0.0003 +[2026-02-27 05:29:22] (step=0000170) Train Loss: 0.5026, Train Steps/Sec: 0.13, Epoch: 0.033261592643318336, LR: 0.0003 +[2026-02-27 05:29:30] (step=0000171) Train Loss: 0.4897, Train Steps/Sec: 0.13, Epoch: 0.03345724907063197, LR: 0.0003 +[2026-02-27 05:29:38] (step=0000172) Train Loss: 0.4914, Train Steps/Sec: 0.13, Epoch: 0.033652905497945605, LR: 0.0003 +[2026-02-27 05:29:45] (step=0000173) Train Loss: 0.4913, Train Steps/Sec: 0.13, Epoch: 0.03384856192525924, LR: 0.0003 +[2026-02-27 05:29:53] (step=0000174) Train Loss: 0.4890, Train Steps/Sec: 0.13, Epoch: 0.03404421835257288, LR: 0.0003 +[2026-02-27 05:30:01] (step=0000175) Train Loss: 0.4929, Train Steps/Sec: 0.13, Epoch: 0.03423987477988652, LR: 0.0003 +[2026-02-27 05:30:09] (step=0000176) Train Loss: 0.4878, Train Steps/Sec: 0.13, Epoch: 0.03443553120720016, LR: 0.0003 +[2026-02-27 05:30:17] (step=0000177) Train Loss: 0.4938, Train Steps/Sec: 0.13, Epoch: 0.034631187634513796, LR: 0.0003 +[2026-02-27 05:30:25] (step=0000178) Train Loss: 0.4890, Train Steps/Sec: 0.13, Epoch: 0.03482684406182743, LR: 0.0003 +[2026-02-27 05:30:33] (step=0000179) Train Loss: 0.4867, Train Steps/Sec: 0.13, Epoch: 0.035022500489141066, LR: 0.0003 +[2026-02-27 05:30:40] (step=0000180) Train Loss: 0.4868, Train Steps/Sec: 0.13, Epoch: 0.035218156916454704, LR: 0.0003 +[2026-02-27 05:30:48] (step=0000181) Train Loss: 0.4866, Train Steps/Sec: 0.13, Epoch: 0.03541381334376834, LR: 0.0003 +[2026-02-27 05:30:56] (step=0000182) Train Loss: 0.4879, Train Steps/Sec: 0.13, Epoch: 0.03560946977108198, LR: 0.0003 +[2026-02-27 05:31:04] (step=0000183) Train Loss: 0.4889, Train Steps/Sec: 0.13, Epoch: 0.03580512619839562, LR: 0.0003 +[2026-02-27 05:31:12] (step=0000184) Train Loss: 0.4829, Train Steps/Sec: 0.13, Epoch: 0.03600078262570926, LR: 0.0003 +[2026-02-27 05:31:20] (step=0000185) Train Loss: 0.4991, Train Steps/Sec: 0.13, Epoch: 0.03619643905302289, LR: 0.0003 +[2026-02-27 05:31:27] (step=0000186) Train Loss: 0.4880, Train Steps/Sec: 0.13, Epoch: 0.03639209548033653, LR: 0.0003 +[2026-02-27 05:31:35] (step=0000187) Train Loss: 0.4810, Train Steps/Sec: 0.13, Epoch: 0.036587751907650165, LR: 0.0003 +[2026-02-27 05:31:43] (step=0000188) Train Loss: 0.4892, Train Steps/Sec: 0.13, Epoch: 0.0367834083349638, LR: 0.0003 +[2026-02-27 05:31:51] (step=0000189) Train Loss: 0.4781, Train Steps/Sec: 0.13, Epoch: 0.03697906476227744, LR: 0.0003 +[2026-02-27 05:31:59] (step=0000190) Train Loss: 0.4939, Train Steps/Sec: 0.12, Epoch: 0.03717472118959108, LR: 0.0003 +[2026-02-27 05:32:07] (step=0000191) Train Loss: 0.4912, Train Steps/Sec: 0.13, Epoch: 0.03737037761690472, LR: 0.0003 +[2026-02-27 05:32:15] (step=0000192) Train Loss: 0.4871, Train Steps/Sec: 0.13, Epoch: 0.03756603404421835, LR: 0.0003 +[2026-02-27 05:32:23] (step=0000193) Train Loss: 0.4998, Train Steps/Sec: 0.13, Epoch: 0.03776169047153199, LR: 0.0003 +[2026-02-27 05:32:30] (step=0000194) Train Loss: 0.4904, Train Steps/Sec: 0.13, Epoch: 0.037957346898845626, LR: 0.0003 +[2026-02-27 05:32:38] (step=0000195) Train Loss: 0.4969, Train Steps/Sec: 0.13, Epoch: 0.038153003326159264, LR: 0.0003 +[2026-02-27 05:32:46] (step=0000196) Train Loss: 0.4896, Train Steps/Sec: 0.13, Epoch: 0.0383486597534729, LR: 0.0003 +[2026-02-27 05:32:54] (step=0000197) Train Loss: 0.4875, Train Steps/Sec: 0.13, Epoch: 0.03854431618078654, LR: 0.0003 +[2026-02-27 05:33:02] (step=0000198) Train Loss: 0.4860, Train Steps/Sec: 0.13, Epoch: 0.03873997260810018, LR: 0.0003 +[2026-02-27 05:33:10] (step=0000199) Train Loss: 0.4947, Train Steps/Sec: 0.13, Epoch: 0.03893562903541381, LR: 0.0003 +[2026-02-27 05:33:18] (step=0000200) Train Loss: 0.5009, Train Steps/Sec: 0.13, Epoch: 0.03913128546272745, LR: 0.0003 +[2026-02-27 05:33:26] (step=0000201) Train Loss: 0.4952, Train Steps/Sec: 0.13, Epoch: 0.03932694189004109, LR: 0.0003 +[2026-02-27 05:33:33] (step=0000202) Train Loss: 0.5013, Train Steps/Sec: 0.13, Epoch: 0.039522598317354725, LR: 0.0003 +[2026-02-27 05:33:41] (step=0000203) Train Loss: 0.4941, Train Steps/Sec: 0.13, Epoch: 0.03971825474466836, LR: 0.0003 +[2026-02-27 05:33:49] (step=0000204) Train Loss: 0.5056, Train Steps/Sec: 0.13, Epoch: 0.039913911171982, LR: 0.0003 +[2026-02-27 05:33:57] (step=0000205) Train Loss: 0.4878, Train Steps/Sec: 0.13, Epoch: 0.04010956759929564, LR: 0.0003 +[2026-02-27 05:34:05] (step=0000206) Train Loss: 0.4866, Train Steps/Sec: 0.13, Epoch: 0.04030522402660927, LR: 0.0003 +[2026-02-27 05:34:13] (step=0000207) Train Loss: 0.4847, Train Steps/Sec: 0.13, Epoch: 0.04050088045392291, LR: 0.0003 +[2026-02-27 05:34:21] (step=0000208) Train Loss: 0.4922, Train Steps/Sec: 0.13, Epoch: 0.04069653688123655, LR: 0.0003 +[2026-02-27 05:34:28] (step=0000209) Train Loss: 0.4930, Train Steps/Sec: 0.13, Epoch: 0.040892193308550186, LR: 0.0003 +[2026-02-27 05:34:36] (step=0000210) Train Loss: 0.4916, Train Steps/Sec: 0.13, Epoch: 0.041087849735863824, LR: 0.0003 +[2026-02-27 05:34:44] (step=0000211) Train Loss: 0.4900, Train Steps/Sec: 0.13, Epoch: 0.04128350616317746, LR: 0.0003 +[2026-02-27 05:34:52] (step=0000212) Train Loss: 0.5044, Train Steps/Sec: 0.13, Epoch: 0.0414791625904911, LR: 0.0003 +[2026-02-27 05:35:00] (step=0000213) Train Loss: 0.4852, Train Steps/Sec: 0.13, Epoch: 0.04167481901780473, LR: 0.0003 +[2026-02-27 05:35:08] (step=0000214) Train Loss: 0.4988, Train Steps/Sec: 0.13, Epoch: 0.04187047544511837, LR: 0.0003 +[2026-02-27 05:35:15] (step=0000215) Train Loss: 0.5002, Train Steps/Sec: 0.13, Epoch: 0.04206613187243201, LR: 0.0003 +[2026-02-27 05:35:23] (step=0000216) Train Loss: 0.4997, Train Steps/Sec: 0.13, Epoch: 0.042261788299745646, LR: 0.0003 +[2026-02-27 05:35:31] (step=0000217) Train Loss: 0.4851, Train Steps/Sec: 0.13, Epoch: 0.042457444727059285, LR: 0.0003 +[2026-02-27 05:35:39] (step=0000218) Train Loss: 0.4882, Train Steps/Sec: 0.13, Epoch: 0.04265310115437292, LR: 0.0003 +[2026-02-27 05:35:47] (step=0000219) Train Loss: 0.4927, Train Steps/Sec: 0.13, Epoch: 0.04284875758168656, LR: 0.0003 +[2026-02-27 05:35:55] (step=0000220) Train Loss: 0.4832, Train Steps/Sec: 0.13, Epoch: 0.04304441400900019, LR: 0.0003 +[2026-02-27 05:36:03] (step=0000221) Train Loss: 0.4932, Train Steps/Sec: 0.13, Epoch: 0.04324007043631383, LR: 0.0003 +[2026-02-27 05:36:10] (step=0000222) Train Loss: 0.4913, Train Steps/Sec: 0.13, Epoch: 0.04343572686362747, LR: 0.0003 +[2026-02-27 05:36:18] (step=0000223) Train Loss: 0.4898, Train Steps/Sec: 0.13, Epoch: 0.04363138329094111, LR: 0.0003 +[2026-02-27 05:36:26] (step=0000224) Train Loss: 0.4861, Train Steps/Sec: 0.13, Epoch: 0.043827039718254746, LR: 0.0003 +[2026-02-27 05:36:34] (step=0000225) Train Loss: 0.4790, Train Steps/Sec: 0.13, Epoch: 0.044022696145568384, LR: 0.0003 +[2026-02-27 05:36:42] (step=0000226) Train Loss: 0.4860, Train Steps/Sec: 0.13, Epoch: 0.04421835257288202, LR: 0.0003 +[2026-02-27 05:36:50] (step=0000227) Train Loss: 0.4915, Train Steps/Sec: 0.13, Epoch: 0.04441400900019565, LR: 0.0003 +[2026-02-27 05:36:58] (step=0000228) Train Loss: 0.4773, Train Steps/Sec: 0.13, Epoch: 0.04460966542750929, LR: 0.0003 +[2026-02-27 05:37:05] (step=0000229) Train Loss: 0.4872, Train Steps/Sec: 0.13, Epoch: 0.04480532185482293, LR: 0.0003 +[2026-02-27 05:37:13] (step=0000230) Train Loss: 0.4919, Train Steps/Sec: 0.13, Epoch: 0.04500097828213657, LR: 0.0003 +[2026-02-27 05:37:21] (step=0000231) Train Loss: 0.4911, Train Steps/Sec: 0.13, Epoch: 0.045196634709450206, LR: 0.0003 +[2026-02-27 05:37:29] (step=0000232) Train Loss: 0.4886, Train Steps/Sec: 0.13, Epoch: 0.045392291136763845, LR: 0.0003 +[2026-02-27 05:37:37] (step=0000233) Train Loss: 0.4844, Train Steps/Sec: 0.13, Epoch: 0.04558794756407748, LR: 0.0003 +[2026-02-27 05:37:45] (step=0000234) Train Loss: 0.4862, Train Steps/Sec: 0.13, Epoch: 0.045783603991391114, LR: 0.0003 +[2026-02-27 05:37:53] (step=0000235) Train Loss: 0.4745, Train Steps/Sec: 0.13, Epoch: 0.04597926041870475, LR: 0.0003 +[2026-02-27 05:38:01] (step=0000236) Train Loss: 0.4780, Train Steps/Sec: 0.13, Epoch: 0.04617491684601839, LR: 0.0003 +[2026-02-27 05:38:08] (step=0000237) Train Loss: 0.4806, Train Steps/Sec: 0.13, Epoch: 0.04637057327333203, LR: 0.0003 +[2026-02-27 05:38:16] (step=0000238) Train Loss: 0.4825, Train Steps/Sec: 0.13, Epoch: 0.04656622970064567, LR: 0.0003 +[2026-02-27 05:38:24] (step=0000239) Train Loss: 0.4946, Train Steps/Sec: 0.13, Epoch: 0.046761886127959305, LR: 0.0003 +[2026-02-27 05:38:32] (step=0000240) Train Loss: 0.4983, Train Steps/Sec: 0.13, Epoch: 0.046957542555272944, LR: 0.0003 +[2026-02-27 05:38:40] (step=0000241) Train Loss: 0.4910, Train Steps/Sec: 0.13, Epoch: 0.047153198982586575, LR: 0.0003 +[2026-02-27 05:38:48] (step=0000242) Train Loss: 0.4929, Train Steps/Sec: 0.13, Epoch: 0.04734885540990021, LR: 0.0003 +[2026-02-27 05:38:56] (step=0000243) Train Loss: 0.4860, Train Steps/Sec: 0.13, Epoch: 0.04754451183721385, LR: 0.0003 +[2026-02-27 05:39:03] (step=0000244) Train Loss: 0.4775, Train Steps/Sec: 0.13, Epoch: 0.04774016826452749, LR: 0.0003 +[2026-02-27 05:39:11] (step=0000245) Train Loss: 0.4671, Train Steps/Sec: 0.12, Epoch: 0.04793582469184113, LR: 0.0003 +[2026-02-27 05:39:19] (step=0000246) Train Loss: 0.4923, Train Steps/Sec: 0.13, Epoch: 0.048131481119154766, LR: 0.0003 +[2026-02-27 05:39:27] (step=0000247) Train Loss: 0.4981, Train Steps/Sec: 0.13, Epoch: 0.048327137546468404, LR: 0.0003 +[2026-02-27 05:39:35] (step=0000248) Train Loss: 0.4812, Train Steps/Sec: 0.13, Epoch: 0.048522793973782036, LR: 0.0003 +[2026-02-27 05:39:43] (step=0000249) Train Loss: 0.4883, Train Steps/Sec: 0.13, Epoch: 0.048718450401095674, LR: 0.0003 +[2026-02-27 05:39:51] (step=0000250) Train Loss: 0.4840, Train Steps/Sec: 0.13, Epoch: 0.04891410682840931, LR: 0.0003 +[2026-02-27 05:39:59] (step=0000251) Train Loss: 0.4855, Train Steps/Sec: 0.13, Epoch: 0.04910976325572295, LR: 0.0003 +[2026-02-27 05:40:06] (step=0000252) Train Loss: 0.4864, Train Steps/Sec: 0.13, Epoch: 0.04930541968303659, LR: 0.0003 +[2026-02-27 05:40:14] (step=0000253) Train Loss: 0.4958, Train Steps/Sec: 0.13, Epoch: 0.04950107611035023, LR: 0.0003 +[2026-02-27 05:40:22] (step=0000254) Train Loss: 0.4915, Train Steps/Sec: 0.13, Epoch: 0.049696732537663865, LR: 0.0003 +[2026-02-27 05:40:30] (step=0000255) Train Loss: 0.4947, Train Steps/Sec: 0.13, Epoch: 0.0498923889649775, LR: 0.0003 +[2026-02-27 05:40:38] (step=0000256) Train Loss: 0.4951, Train Steps/Sec: 0.13, Epoch: 0.050088045392291135, LR: 0.0003 +[2026-02-27 05:40:46] (step=0000257) Train Loss: 0.4702, Train Steps/Sec: 0.13, Epoch: 0.05028370181960477, LR: 0.0003 +[2026-02-27 05:40:54] (step=0000258) Train Loss: 0.4748, Train Steps/Sec: 0.13, Epoch: 0.05047935824691841, LR: 0.0003 +[2026-02-27 05:41:01] (step=0000259) Train Loss: 0.4947, Train Steps/Sec: 0.13, Epoch: 0.05067501467423205, LR: 0.0003 +[2026-02-27 05:41:09] (step=0000260) Train Loss: 0.4853, Train Steps/Sec: 0.13, Epoch: 0.05087067110154569, LR: 0.0003 +[2026-02-27 05:41:17] (step=0000261) Train Loss: 0.4936, Train Steps/Sec: 0.13, Epoch: 0.051066327528859326, LR: 0.0003 +[2026-02-27 05:41:25] (step=0000262) Train Loss: 0.4880, Train Steps/Sec: 0.13, Epoch: 0.05126198395617296, LR: 0.0003 +[2026-02-27 05:41:33] (step=0000263) Train Loss: 0.4802, Train Steps/Sec: 0.13, Epoch: 0.051457640383486596, LR: 0.0003 +[2026-02-27 05:41:41] (step=0000264) Train Loss: 0.4833, Train Steps/Sec: 0.13, Epoch: 0.051653296810800234, LR: 0.0003 +[2026-02-27 05:41:49] (step=0000265) Train Loss: 0.4927, Train Steps/Sec: 0.13, Epoch: 0.05184895323811387, LR: 0.0003 +[2026-02-27 05:41:56] (step=0000266) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 0.05204460966542751, LR: 0.0003 +[2026-02-27 05:42:04] (step=0000267) Train Loss: 0.4974, Train Steps/Sec: 0.13, Epoch: 0.05224026609274115, LR: 0.0003 +[2026-02-27 05:42:12] (step=0000268) Train Loss: 0.4956, Train Steps/Sec: 0.13, Epoch: 0.05243592252005479, LR: 0.0003 +[2026-02-27 05:42:20] (step=0000269) Train Loss: 0.4927, Train Steps/Sec: 0.13, Epoch: 0.05263157894736842, LR: 0.0003 +[2026-02-27 05:42:28] (step=0000270) Train Loss: 0.4832, Train Steps/Sec: 0.13, Epoch: 0.052827235374682056, LR: 0.0003 +[2026-02-27 05:42:36] (step=0000271) Train Loss: 0.4799, Train Steps/Sec: 0.13, Epoch: 0.053022891801995695, LR: 0.0003 +[2026-02-27 05:42:44] (step=0000272) Train Loss: 0.4759, Train Steps/Sec: 0.13, Epoch: 0.05321854822930933, LR: 0.0003 +[2026-02-27 05:42:51] (step=0000273) Train Loss: 0.4912, Train Steps/Sec: 0.13, Epoch: 0.05341420465662297, LR: 0.0003 +[2026-02-27 05:42:59] (step=0000274) Train Loss: 0.4914, Train Steps/Sec: 0.13, Epoch: 0.05360986108393661, LR: 0.0003 +[2026-02-27 05:43:07] (step=0000275) Train Loss: 0.4802, Train Steps/Sec: 0.13, Epoch: 0.05380551751125025, LR: 0.0003 +[2026-02-27 05:43:15] (step=0000276) Train Loss: 0.4849, Train Steps/Sec: 0.13, Epoch: 0.05400117393856388, LR: 0.0003 +[2026-02-27 05:43:23] (step=0000277) Train Loss: 0.4935, Train Steps/Sec: 0.13, Epoch: 0.05419683036587752, LR: 0.0003 +[2026-02-27 05:43:31] (step=0000278) Train Loss: 0.4974, Train Steps/Sec: 0.13, Epoch: 0.054392486793191155, LR: 0.0003 +[2026-02-27 05:43:39] (step=0000279) Train Loss: 0.4742, Train Steps/Sec: 0.13, Epoch: 0.054588143220504794, LR: 0.0003 +[2026-02-27 05:43:46] (step=0000280) Train Loss: 0.4908, Train Steps/Sec: 0.13, Epoch: 0.05478379964781843, LR: 0.0003 +[2026-02-27 05:43:54] (step=0000281) Train Loss: 0.4850, Train Steps/Sec: 0.13, Epoch: 0.05497945607513207, LR: 0.0003 +[2026-02-27 05:44:02] (step=0000282) Train Loss: 0.4840, Train Steps/Sec: 0.13, Epoch: 0.05517511250244571, LR: 0.0003 +[2026-02-27 05:44:10] (step=0000283) Train Loss: 0.4932, Train Steps/Sec: 0.13, Epoch: 0.05537076892975934, LR: 0.0003 +[2026-02-27 05:44:18] (step=0000284) Train Loss: 0.4885, Train Steps/Sec: 0.13, Epoch: 0.05556642535707298, LR: 0.0003 +[2026-02-27 05:44:26] (step=0000285) Train Loss: 0.4889, Train Steps/Sec: 0.13, Epoch: 0.055762081784386616, LR: 0.0003 +[2026-02-27 05:44:34] (step=0000286) Train Loss: 0.4898, Train Steps/Sec: 0.13, Epoch: 0.055957738211700254, LR: 0.0003 +[2026-02-27 05:44:42] (step=0000287) Train Loss: 0.4852, Train Steps/Sec: 0.13, Epoch: 0.05615339463901389, LR: 0.0003 +[2026-02-27 05:44:49] (step=0000288) Train Loss: 0.4983, Train Steps/Sec: 0.13, Epoch: 0.05634905106632753, LR: 0.0003 +[2026-02-27 05:44:57] (step=0000289) Train Loss: 0.4784, Train Steps/Sec: 0.13, Epoch: 0.05654470749364117, LR: 0.0003 +[2026-02-27 05:45:05] (step=0000290) Train Loss: 0.4909, Train Steps/Sec: 0.13, Epoch: 0.0567403639209548, LR: 0.0003 +[2026-02-27 05:45:13] (step=0000291) Train Loss: 0.4883, Train Steps/Sec: 0.13, Epoch: 0.05693602034826844, LR: 0.0003 +[2026-02-27 05:45:21] (step=0000292) Train Loss: 0.4834, Train Steps/Sec: 0.13, Epoch: 0.05713167677558208, LR: 0.0003 +[2026-02-27 05:45:29] (step=0000293) Train Loss: 0.4899, Train Steps/Sec: 0.13, Epoch: 0.057327333202895715, LR: 0.0003 +[2026-02-27 05:45:37] (step=0000294) Train Loss: 0.4863, Train Steps/Sec: 0.13, Epoch: 0.057522989630209354, LR: 0.0003 +[2026-02-27 05:45:45] (step=0000295) Train Loss: 0.4849, Train Steps/Sec: 0.13, Epoch: 0.05771864605752299, LR: 0.0003 +[2026-02-27 05:45:52] (step=0000296) Train Loss: 0.4881, Train Steps/Sec: 0.13, Epoch: 0.05791430248483663, LR: 0.0003 +[2026-02-27 05:46:00] (step=0000297) Train Loss: 0.4805, Train Steps/Sec: 0.13, Epoch: 0.05810995891215026, LR: 0.0003 +[2026-02-27 05:46:08] (step=0000298) Train Loss: 0.4939, Train Steps/Sec: 0.13, Epoch: 0.0583056153394639, LR: 0.0003 +[2026-02-27 05:46:16] (step=0000299) Train Loss: 0.4806, Train Steps/Sec: 0.13, Epoch: 0.05850127176677754, LR: 0.0003 +[2026-02-27 05:46:24] (step=0000300) Train Loss: 0.4803, Train Steps/Sec: 0.13, Epoch: 0.058696928194091176, LR: 0.0003 +[2026-02-27 05:46:32] (step=0000301) Train Loss: 0.4943, Train Steps/Sec: 0.13, Epoch: 0.058892584621404814, LR: 0.0003 +[2026-02-27 05:46:40] (step=0000302) Train Loss: 0.4776, Train Steps/Sec: 0.13, Epoch: 0.05908824104871845, LR: 0.0003 +[2026-02-27 05:46:47] (step=0000303) Train Loss: 0.4942, Train Steps/Sec: 0.13, Epoch: 0.05928389747603209, LR: 0.0003 +[2026-02-27 05:46:55] (step=0000304) Train Loss: 0.4907, Train Steps/Sec: 0.13, Epoch: 0.05947955390334572, LR: 0.0003 +[2026-02-27 05:47:03] (step=0000305) Train Loss: 0.4860, Train Steps/Sec: 0.13, Epoch: 0.05967521033065936, LR: 0.0003 +[2026-02-27 05:47:11] (step=0000306) Train Loss: 0.4848, Train Steps/Sec: 0.13, Epoch: 0.059870866757973, LR: 0.0003 +[2026-02-27 05:47:19] (step=0000307) Train Loss: 0.4807, Train Steps/Sec: 0.13, Epoch: 0.06006652318528664, LR: 0.0003 +[2026-02-27 05:47:27] (step=0000308) Train Loss: 0.4850, Train Steps/Sec: 0.13, Epoch: 0.060262179612600275, LR: 0.0003 +[2026-02-27 05:47:35] (step=0000309) Train Loss: 0.4885, Train Steps/Sec: 0.13, Epoch: 0.06045783603991391, LR: 0.0003 +[2026-02-27 05:47:42] (step=0000310) Train Loss: 0.4829, Train Steps/Sec: 0.13, Epoch: 0.06065349246722755, LR: 0.0003 +[2026-02-27 05:47:50] (step=0000311) Train Loss: 0.4726, Train Steps/Sec: 0.13, Epoch: 0.06084914889454118, LR: 0.0003 +[2026-02-27 05:47:58] (step=0000312) Train Loss: 0.4818, Train Steps/Sec: 0.13, Epoch: 0.06104480532185482, LR: 0.0003 +[2026-02-27 05:48:06] (step=0000313) Train Loss: 0.4844, Train Steps/Sec: 0.13, Epoch: 0.06124046174916846, LR: 0.0003 +[2026-02-27 05:48:14] (step=0000314) Train Loss: 0.4859, Train Steps/Sec: 0.13, Epoch: 0.0614361181764821, LR: 0.0003 +[2026-02-27 05:48:22] (step=0000315) Train Loss: 0.4821, Train Steps/Sec: 0.13, Epoch: 0.061631774603795736, LR: 0.0003 +[2026-02-27 05:48:30] (step=0000316) Train Loss: 0.4879, Train Steps/Sec: 0.13, Epoch: 0.061827431031109374, LR: 0.0003 +[2026-02-27 05:48:37] (step=0000317) Train Loss: 0.4779, Train Steps/Sec: 0.13, Epoch: 0.06202308745842301, LR: 0.0003 +[2026-02-27 05:48:45] (step=0000318) Train Loss: 0.4864, Train Steps/Sec: 0.13, Epoch: 0.062218743885736644, LR: 0.0003 +[2026-02-27 05:48:53] (step=0000319) Train Loss: 0.4965, Train Steps/Sec: 0.13, Epoch: 0.06241440031305028, LR: 0.0003 +[2026-02-27 05:49:01] (step=0000320) Train Loss: 0.4810, Train Steps/Sec: 0.13, Epoch: 0.06261005674036392, LR: 0.0003 +[2026-02-27 05:49:09] (step=0000321) Train Loss: 0.4890, Train Steps/Sec: 0.13, Epoch: 0.06280571316767755, LR: 0.0003 +[2026-02-27 05:49:17] (step=0000322) Train Loss: 0.4753, Train Steps/Sec: 0.13, Epoch: 0.0630013695949912, LR: 0.0003 +[2026-02-27 05:49:24] (step=0000323) Train Loss: 0.4845, Train Steps/Sec: 0.13, Epoch: 0.06319702602230483, LR: 0.0003 +[2026-02-27 05:49:32] (step=0000324) Train Loss: 0.4762, Train Steps/Sec: 0.13, Epoch: 0.06339268244961847, LR: 0.0003 +[2026-02-27 05:49:40] (step=0000325) Train Loss: 0.4854, Train Steps/Sec: 0.13, Epoch: 0.0635883388769321, LR: 0.0003 +[2026-02-27 05:49:48] (step=0000326) Train Loss: 0.4766, Train Steps/Sec: 0.13, Epoch: 0.06378399530424575, LR: 0.0003 +[2026-02-27 05:49:56] (step=0000327) Train Loss: 0.4754, Train Steps/Sec: 0.13, Epoch: 0.06397965173155938, LR: 0.0003 +[2026-02-27 05:50:04] (step=0000328) Train Loss: 0.4770, Train Steps/Sec: 0.13, Epoch: 0.06417530815887301, LR: 0.0003 +[2026-02-27 05:50:12] (step=0000329) Train Loss: 0.4791, Train Steps/Sec: 0.13, Epoch: 0.06437096458618666, LR: 0.0003 +[2026-02-27 05:50:19] (step=0000330) Train Loss: 0.4818, Train Steps/Sec: 0.13, Epoch: 0.06456662101350029, LR: 0.0003 +[2026-02-27 05:50:27] (step=0000331) Train Loss: 0.4827, Train Steps/Sec: 0.13, Epoch: 0.06476227744081393, LR: 0.0003 +[2026-02-27 05:50:35] (step=0000332) Train Loss: 0.4916, Train Steps/Sec: 0.13, Epoch: 0.06495793386812757, LR: 0.0003 +[2026-02-27 05:50:43] (step=0000333) Train Loss: 0.4867, Train Steps/Sec: 0.13, Epoch: 0.06515359029544121, LR: 0.0003 +[2026-02-27 05:50:51] (step=0000334) Train Loss: 0.4997, Train Steps/Sec: 0.12, Epoch: 0.06534924672275484, LR: 0.0003 +[2026-02-27 05:50:59] (step=0000335) Train Loss: 0.4833, Train Steps/Sec: 0.13, Epoch: 0.06554490315006847, LR: 0.0003 +[2026-02-27 05:51:07] (step=0000336) Train Loss: 0.4825, Train Steps/Sec: 0.13, Epoch: 0.06574055957738212, LR: 0.0003 +[2026-02-27 05:51:15] (step=0000337) Train Loss: 0.4758, Train Steps/Sec: 0.13, Epoch: 0.06593621600469575, LR: 0.0003 +[2026-02-27 05:51:23] (step=0000338) Train Loss: 0.4807, Train Steps/Sec: 0.13, Epoch: 0.0661318724320094, LR: 0.0003 +[2026-02-27 05:51:30] (step=0000339) Train Loss: 0.4800, Train Steps/Sec: 0.13, Epoch: 0.06632752885932303, LR: 0.0003 +[2026-02-27 05:51:38] (step=0000340) Train Loss: 0.4996, Train Steps/Sec: 0.13, Epoch: 0.06652318528663667, LR: 0.0003 +[2026-02-27 05:51:46] (step=0000341) Train Loss: 0.4904, Train Steps/Sec: 0.13, Epoch: 0.0667188417139503, LR: 0.0003 +[2026-02-27 05:51:54] (step=0000342) Train Loss: 0.4922, Train Steps/Sec: 0.13, Epoch: 0.06691449814126393, LR: 0.0003 +[2026-02-27 05:52:02] (step=0000343) Train Loss: 0.4890, Train Steps/Sec: 0.13, Epoch: 0.06711015456857758, LR: 0.0003 +[2026-02-27 05:52:10] (step=0000344) Train Loss: 0.4988, Train Steps/Sec: 0.13, Epoch: 0.06730581099589121, LR: 0.0003 +[2026-02-27 05:52:18] (step=0000345) Train Loss: 0.4773, Train Steps/Sec: 0.13, Epoch: 0.06750146742320486, LR: 0.0003 +[2026-02-27 05:52:26] (step=0000346) Train Loss: 0.4865, Train Steps/Sec: 0.12, Epoch: 0.06769712385051849, LR: 0.0003 +[2026-02-27 05:52:33] (step=0000347) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.06789278027783213, LR: 0.0003 +[2026-02-27 05:52:41] (step=0000348) Train Loss: 0.4891, Train Steps/Sec: 0.13, Epoch: 0.06808843670514576, LR: 0.0003 +[2026-02-27 05:52:49] (step=0000349) Train Loss: 0.4899, Train Steps/Sec: 0.13, Epoch: 0.0682840931324594, LR: 0.0003 +[2026-02-27 05:52:57] (step=0000350) Train Loss: 0.4909, Train Steps/Sec: 0.13, Epoch: 0.06847974955977304, LR: 0.0003 +[2026-02-27 05:53:05] (step=0000351) Train Loss: 0.4931, Train Steps/Sec: 0.13, Epoch: 0.06867540598708667, LR: 0.0003 +[2026-02-27 05:53:13] (step=0000352) Train Loss: 0.4803, Train Steps/Sec: 0.13, Epoch: 0.06887106241440032, LR: 0.0003 +[2026-02-27 05:53:20] (step=0000353) Train Loss: 0.4911, Train Steps/Sec: 0.13, Epoch: 0.06906671884171395, LR: 0.0003 +[2026-02-27 05:53:28] (step=0000354) Train Loss: 0.4816, Train Steps/Sec: 0.13, Epoch: 0.06926237526902759, LR: 0.0003 +[2026-02-27 05:53:36] (step=0000355) Train Loss: 0.4841, Train Steps/Sec: 0.13, Epoch: 0.06945803169634122, LR: 0.0003 +[2026-02-27 05:53:44] (step=0000356) Train Loss: 0.4871, Train Steps/Sec: 0.13, Epoch: 0.06965368812365486, LR: 0.0003 +[2026-02-27 05:53:52] (step=0000357) Train Loss: 0.4765, Train Steps/Sec: 0.13, Epoch: 0.0698493445509685, LR: 0.0003 +[2026-02-27 05:54:00] (step=0000358) Train Loss: 0.4854, Train Steps/Sec: 0.13, Epoch: 0.07004500097828213, LR: 0.0003 +[2026-02-27 05:54:08] (step=0000359) Train Loss: 0.4749, Train Steps/Sec: 0.13, Epoch: 0.07024065740559578, LR: 0.0003 +[2026-02-27 05:54:15] (step=0000360) Train Loss: 0.4812, Train Steps/Sec: 0.13, Epoch: 0.07043631383290941, LR: 0.0003 +[2026-02-27 05:54:23] (step=0000361) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.07063197026022305, LR: 0.0003 +[2026-02-27 05:54:31] (step=0000362) Train Loss: 0.4809, Train Steps/Sec: 0.13, Epoch: 0.07082762668753669, LR: 0.0003 +[2026-02-27 05:54:39] (step=0000363) Train Loss: 0.4851, Train Steps/Sec: 0.13, Epoch: 0.07102328311485032, LR: 0.0003 +[2026-02-27 05:54:47] (step=0000364) Train Loss: 0.4842, Train Steps/Sec: 0.13, Epoch: 0.07121893954216396, LR: 0.0003 +[2026-02-27 05:54:55] (step=0000365) Train Loss: 0.4825, Train Steps/Sec: 0.13, Epoch: 0.07141459596947759, LR: 0.0003 +[2026-02-27 05:55:03] (step=0000366) Train Loss: 0.4923, Train Steps/Sec: 0.13, Epoch: 0.07161025239679124, LR: 0.0003 +[2026-02-27 05:55:10] (step=0000367) Train Loss: 0.4786, Train Steps/Sec: 0.13, Epoch: 0.07180590882410487, LR: 0.0003 +[2026-02-27 05:55:18] (step=0000368) Train Loss: 0.4802, Train Steps/Sec: 0.13, Epoch: 0.07200156525141851, LR: 0.0003 +[2026-02-27 05:55:26] (step=0000369) Train Loss: 0.4909, Train Steps/Sec: 0.13, Epoch: 0.07219722167873215, LR: 0.0003 +[2026-02-27 05:55:34] (step=0000370) Train Loss: 0.4846, Train Steps/Sec: 0.13, Epoch: 0.07239287810604578, LR: 0.0003 +[2026-02-27 05:55:42] (step=0000371) Train Loss: 0.4868, Train Steps/Sec: 0.13, Epoch: 0.07258853453335942, LR: 0.0003 +[2026-02-27 05:55:50] (step=0000372) Train Loss: 0.4847, Train Steps/Sec: 0.13, Epoch: 0.07278419096067305, LR: 0.0003 +[2026-02-27 05:55:58] (step=0000373) Train Loss: 0.4832, Train Steps/Sec: 0.13, Epoch: 0.0729798473879867, LR: 0.0003 +[2026-02-27 05:56:05] (step=0000374) Train Loss: 0.4790, Train Steps/Sec: 0.13, Epoch: 0.07317550381530033, LR: 0.0003 +[2026-02-27 05:56:13] (step=0000375) Train Loss: 0.4807, Train Steps/Sec: 0.13, Epoch: 0.07337116024261398, LR: 0.0003 +[2026-02-27 05:56:21] (step=0000376) Train Loss: 0.4884, Train Steps/Sec: 0.13, Epoch: 0.0735668166699276, LR: 0.0003 +[2026-02-27 05:56:29] (step=0000377) Train Loss: 0.4867, Train Steps/Sec: 0.13, Epoch: 0.07376247309724124, LR: 0.0003 +[2026-02-27 05:56:37] (step=0000378) Train Loss: 0.4738, Train Steps/Sec: 0.13, Epoch: 0.07395812952455488, LR: 0.0003 +[2026-02-27 05:56:45] (step=0000379) Train Loss: 0.4749, Train Steps/Sec: 0.13, Epoch: 0.07415378595186851, LR: 0.0003 +[2026-02-27 05:56:52] (step=0000380) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.07434944237918216, LR: 0.0003 +[2026-02-27 05:57:00] (step=0000381) Train Loss: 0.4815, Train Steps/Sec: 0.13, Epoch: 0.07454509880649579, LR: 0.0003 +[2026-02-27 05:57:08] (step=0000382) Train Loss: 0.4802, Train Steps/Sec: 0.12, Epoch: 0.07474075523380944, LR: 0.0003 +[2026-02-27 05:57:16] (step=0000383) Train Loss: 0.4719, Train Steps/Sec: 0.13, Epoch: 0.07493641166112307, LR: 0.0003 +[2026-02-27 05:57:24] (step=0000384) Train Loss: 0.4771, Train Steps/Sec: 0.13, Epoch: 0.0751320680884367, LR: 0.0003 +[2026-02-27 05:57:32] (step=0000385) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.07532772451575034, LR: 0.0003 +[2026-02-27 05:57:40] (step=0000386) Train Loss: 0.4883, Train Steps/Sec: 0.13, Epoch: 0.07552338094306398, LR: 0.0003 +[2026-02-27 05:57:48] (step=0000387) Train Loss: 0.4859, Train Steps/Sec: 0.13, Epoch: 0.07571903737037762, LR: 0.0003 +[2026-02-27 05:57:55] (step=0000388) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.07591469379769125, LR: 0.0003 +[2026-02-27 05:58:03] (step=0000389) Train Loss: 0.4889, Train Steps/Sec: 0.13, Epoch: 0.0761103502250049, LR: 0.0003 +[2026-02-27 05:58:11] (step=0000390) Train Loss: 0.4809, Train Steps/Sec: 0.13, Epoch: 0.07630600665231853, LR: 0.0003 +[2026-02-27 05:58:19] (step=0000391) Train Loss: 0.4913, Train Steps/Sec: 0.13, Epoch: 0.07650166307963216, LR: 0.0003 +[2026-02-27 05:58:27] (step=0000392) Train Loss: 0.4779, Train Steps/Sec: 0.13, Epoch: 0.0766973195069458, LR: 0.0003 +[2026-02-27 05:58:35] (step=0000393) Train Loss: 0.4817, Train Steps/Sec: 0.13, Epoch: 0.07689297593425944, LR: 0.0003 +[2026-02-27 05:58:43] (step=0000394) Train Loss: 0.4808, Train Steps/Sec: 0.13, Epoch: 0.07708863236157308, LR: 0.0003 +[2026-02-27 05:58:50] (step=0000395) Train Loss: 0.4772, Train Steps/Sec: 0.13, Epoch: 0.07728428878888671, LR: 0.0003 +[2026-02-27 05:58:58] (step=0000396) Train Loss: 0.4838, Train Steps/Sec: 0.13, Epoch: 0.07747994521620036, LR: 0.0003 +[2026-02-27 05:59:06] (step=0000397) Train Loss: 0.4688, Train Steps/Sec: 0.12, Epoch: 0.07767560164351399, LR: 0.0003 +[2026-02-27 05:59:14] (step=0000398) Train Loss: 0.4853, Train Steps/Sec: 0.13, Epoch: 0.07787125807082762, LR: 0.0003 +[2026-02-27 05:59:22] (step=0000399) Train Loss: 0.4725, Train Steps/Sec: 0.13, Epoch: 0.07806691449814127, LR: 0.0003 +[2026-02-27 05:59:30] (step=0000400) Train Loss: 0.4862, Train Steps/Sec: 0.13, Epoch: 0.0782625709254549, LR: 0.0003 +[2026-02-27 05:59:38] (step=0000401) Train Loss: 0.4766, Train Steps/Sec: 0.13, Epoch: 0.07845822735276854, LR: 0.0003 +[2026-02-27 05:59:46] (step=0000402) Train Loss: 0.4793, Train Steps/Sec: 0.13, Epoch: 0.07865388378008217, LR: 0.0003 +[2026-02-27 05:59:53] (step=0000403) Train Loss: 0.4730, Train Steps/Sec: 0.13, Epoch: 0.07884954020739582, LR: 0.0003 +[2026-02-27 06:00:01] (step=0000404) Train Loss: 0.4788, Train Steps/Sec: 0.13, Epoch: 0.07904519663470945, LR: 0.0003 +[2026-02-27 06:00:09] (step=0000405) Train Loss: 0.4864, Train Steps/Sec: 0.13, Epoch: 0.07924085306202308, LR: 0.0003 +[2026-02-27 06:00:17] (step=0000406) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.07943650948933673, LR: 0.0003 +[2026-02-27 06:00:25] (step=0000407) Train Loss: 0.4875, Train Steps/Sec: 0.13, Epoch: 0.07963216591665036, LR: 0.0003 +[2026-02-27 06:00:33] (step=0000408) Train Loss: 0.4937, Train Steps/Sec: 0.13, Epoch: 0.079827822343964, LR: 0.0003 +[2026-02-27 06:00:41] (step=0000409) Train Loss: 0.4821, Train Steps/Sec: 0.13, Epoch: 0.08002347877127763, LR: 0.0003 +[2026-02-27 06:00:48] (step=0000410) Train Loss: 0.4755, Train Steps/Sec: 0.13, Epoch: 0.08021913519859128, LR: 0.0003 +[2026-02-27 06:00:56] (step=0000411) Train Loss: 0.4829, Train Steps/Sec: 0.13, Epoch: 0.08041479162590491, LR: 0.0003 +[2026-02-27 06:01:04] (step=0000412) Train Loss: 0.4800, Train Steps/Sec: 0.13, Epoch: 0.08061044805321854, LR: 0.0003 +[2026-02-27 06:01:12] (step=0000413) Train Loss: 0.4784, Train Steps/Sec: 0.13, Epoch: 0.08080610448053219, LR: 0.0003 +[2026-02-27 06:01:20] (step=0000414) Train Loss: 0.4874, Train Steps/Sec: 0.13, Epoch: 0.08100176090784582, LR: 0.0003 +[2026-02-27 06:01:28] (step=0000415) Train Loss: 0.4858, Train Steps/Sec: 0.13, Epoch: 0.08119741733515946, LR: 0.0003 +[2026-02-27 06:01:36] (step=0000416) Train Loss: 0.4780, Train Steps/Sec: 0.13, Epoch: 0.0813930737624731, LR: 0.0003 +[2026-02-27 06:01:43] (step=0000417) Train Loss: 0.4907, Train Steps/Sec: 0.13, Epoch: 0.08158873018978674, LR: 0.0003 +[2026-02-27 06:01:51] (step=0000418) Train Loss: 0.4790, Train Steps/Sec: 0.13, Epoch: 0.08178438661710037, LR: 0.0003 +[2026-02-27 06:01:59] (step=0000419) Train Loss: 0.4821, Train Steps/Sec: 0.13, Epoch: 0.081980043044414, LR: 0.0003 +[2026-02-27 06:02:07] (step=0000420) Train Loss: 0.4766, Train Steps/Sec: 0.13, Epoch: 0.08217569947172765, LR: 0.0003 +[2026-02-27 06:02:15] (step=0000421) Train Loss: 0.4823, Train Steps/Sec: 0.13, Epoch: 0.08237135589904128, LR: 0.0003 +[2026-02-27 06:02:23] (step=0000422) Train Loss: 0.4788, Train Steps/Sec: 0.13, Epoch: 0.08256701232635492, LR: 0.0003 +[2026-02-27 06:02:31] (step=0000423) Train Loss: 0.4797, Train Steps/Sec: 0.13, Epoch: 0.08276266875366856, LR: 0.0003 +[2026-02-27 06:02:38] (step=0000424) Train Loss: 0.4717, Train Steps/Sec: 0.13, Epoch: 0.0829583251809822, LR: 0.0003 +[2026-02-27 06:02:46] (step=0000425) Train Loss: 0.4859, Train Steps/Sec: 0.13, Epoch: 0.08315398160829583, LR: 0.0003 +[2026-02-27 06:02:54] (step=0000426) Train Loss: 0.4807, Train Steps/Sec: 0.13, Epoch: 0.08334963803560946, LR: 0.0003 +[2026-02-27 06:03:02] (step=0000427) Train Loss: 0.4797, Train Steps/Sec: 0.13, Epoch: 0.08354529446292311, LR: 0.0003 +[2026-02-27 06:03:10] (step=0000428) Train Loss: 0.4809, Train Steps/Sec: 0.13, Epoch: 0.08374095089023674, LR: 0.0003 +[2026-02-27 06:03:18] (step=0000429) Train Loss: 0.4775, Train Steps/Sec: 0.13, Epoch: 0.08393660731755039, LR: 0.0003 +[2026-02-27 06:03:26] (step=0000430) Train Loss: 0.5070, Train Steps/Sec: 0.13, Epoch: 0.08413226374486402, LR: 0.0003 +[2026-02-27 06:03:33] (step=0000431) Train Loss: 0.4858, Train Steps/Sec: 0.13, Epoch: 0.08432792017217766, LR: 0.0003 +[2026-02-27 06:03:41] (step=0000432) Train Loss: 0.4813, Train Steps/Sec: 0.13, Epoch: 0.08452357659949129, LR: 0.0003 +[2026-02-27 06:03:49] (step=0000433) Train Loss: 0.4898, Train Steps/Sec: 0.13, Epoch: 0.08471923302680492, LR: 0.0003 +[2026-02-27 06:03:57] (step=0000434) Train Loss: 0.4741, Train Steps/Sec: 0.13, Epoch: 0.08491488945411857, LR: 0.0003 +[2026-02-27 06:04:05] (step=0000435) Train Loss: 0.4795, Train Steps/Sec: 0.13, Epoch: 0.0851105458814322, LR: 0.0003 +[2026-02-27 06:04:13] (step=0000436) Train Loss: 0.4838, Train Steps/Sec: 0.13, Epoch: 0.08530620230874585, LR: 0.0003 +[2026-02-27 06:04:21] (step=0000437) Train Loss: 0.4850, Train Steps/Sec: 0.13, Epoch: 0.08550185873605948, LR: 0.0003 +[2026-02-27 06:04:29] (step=0000438) Train Loss: 0.4751, Train Steps/Sec: 0.13, Epoch: 0.08569751516337312, LR: 0.0003 +[2026-02-27 06:04:36] (step=0000439) Train Loss: 0.4758, Train Steps/Sec: 0.13, Epoch: 0.08589317159068675, LR: 0.0003 +[2026-02-27 06:04:44] (step=0000440) Train Loss: 0.4779, Train Steps/Sec: 0.13, Epoch: 0.08608882801800039, LR: 0.0003 +[2026-02-27 06:04:52] (step=0000441) Train Loss: 0.4773, Train Steps/Sec: 0.13, Epoch: 0.08628448444531403, LR: 0.0003 +[2026-02-27 06:05:00] (step=0000442) Train Loss: 0.4770, Train Steps/Sec: 0.13, Epoch: 0.08648014087262766, LR: 0.0003 +[2026-02-27 06:05:08] (step=0000443) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.0866757972999413, LR: 0.0003 +[2026-02-27 06:05:16] (step=0000444) Train Loss: 0.4815, Train Steps/Sec: 0.13, Epoch: 0.08687145372725494, LR: 0.0003 +[2026-02-27 06:05:24] (step=0000445) Train Loss: 0.4805, Train Steps/Sec: 0.13, Epoch: 0.08706711015456858, LR: 0.0003 +[2026-02-27 06:05:32] (step=0000446) Train Loss: 0.4776, Train Steps/Sec: 0.12, Epoch: 0.08726276658188221, LR: 0.0003 +[2026-02-27 06:05:39] (step=0000447) Train Loss: 0.4839, Train Steps/Sec: 0.13, Epoch: 0.08745842300919585, LR: 0.0003 +[2026-02-27 06:05:47] (step=0000448) Train Loss: 0.4760, Train Steps/Sec: 0.13, Epoch: 0.08765407943650949, LR: 0.0003 +[2026-02-27 06:05:55] (step=0000449) Train Loss: 0.4753, Train Steps/Sec: 0.13, Epoch: 0.08784973586382312, LR: 0.0003 +[2026-02-27 06:06:03] (step=0000450) Train Loss: 0.4801, Train Steps/Sec: 0.13, Epoch: 0.08804539229113677, LR: 0.0003 +[2026-02-27 06:06:11] (step=0000451) Train Loss: 0.4754, Train Steps/Sec: 0.13, Epoch: 0.0882410487184504, LR: 0.0003 +[2026-02-27 06:06:19] (step=0000452) Train Loss: 0.4835, Train Steps/Sec: 0.13, Epoch: 0.08843670514576404, LR: 0.0003 +[2026-02-27 06:06:27] (step=0000453) Train Loss: 0.4847, Train Steps/Sec: 0.13, Epoch: 0.08863236157307768, LR: 0.0003 +[2026-02-27 06:06:34] (step=0000454) Train Loss: 0.4879, Train Steps/Sec: 0.13, Epoch: 0.0888280180003913, LR: 0.0003 +[2026-02-27 06:06:42] (step=0000455) Train Loss: 0.4842, Train Steps/Sec: 0.13, Epoch: 0.08902367442770495, LR: 0.0003 +[2026-02-27 06:06:50] (step=0000456) Train Loss: 0.4896, Train Steps/Sec: 0.13, Epoch: 0.08921933085501858, LR: 0.0003 +[2026-02-27 06:06:58] (step=0000457) Train Loss: 0.4762, Train Steps/Sec: 0.13, Epoch: 0.08941498728233223, LR: 0.0003 +[2026-02-27 06:07:06] (step=0000458) Train Loss: 0.4829, Train Steps/Sec: 0.13, Epoch: 0.08961064370964586, LR: 0.0003 +[2026-02-27 06:07:14] (step=0000459) Train Loss: 0.4841, Train Steps/Sec: 0.13, Epoch: 0.0898063001369595, LR: 0.0003 +[2026-02-27 06:07:21] (step=0000460) Train Loss: 0.4735, Train Steps/Sec: 0.13, Epoch: 0.09000195656427314, LR: 0.0003 +[2026-02-27 06:07:29] (step=0000461) Train Loss: 0.4768, Train Steps/Sec: 0.13, Epoch: 0.09019761299158677, LR: 0.0003 +[2026-02-27 06:07:37] (step=0000462) Train Loss: 0.4924, Train Steps/Sec: 0.13, Epoch: 0.09039326941890041, LR: 0.0003 +[2026-02-27 06:07:45] (step=0000463) Train Loss: 0.4781, Train Steps/Sec: 0.13, Epoch: 0.09058892584621404, LR: 0.0003 +[2026-02-27 06:07:53] (step=0000464) Train Loss: 0.4850, Train Steps/Sec: 0.13, Epoch: 0.09078458227352769, LR: 0.0003 +[2026-02-27 06:08:01] (step=0000465) Train Loss: 0.4791, Train Steps/Sec: 0.13, Epoch: 0.09098023870084132, LR: 0.0003 +[2026-02-27 06:08:09] (step=0000466) Train Loss: 0.4906, Train Steps/Sec: 0.13, Epoch: 0.09117589512815497, LR: 0.0003 +[2026-02-27 06:08:16] (step=0000467) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.0913715515554686, LR: 0.0003 +[2026-02-27 06:08:24] (step=0000468) Train Loss: 0.4788, Train Steps/Sec: 0.13, Epoch: 0.09156720798278223, LR: 0.0003 +[2026-02-27 06:08:32] (step=0000469) Train Loss: 0.4811, Train Steps/Sec: 0.13, Epoch: 0.09176286441009587, LR: 0.0003 +[2026-02-27 06:08:40] (step=0000470) Train Loss: 0.4789, Train Steps/Sec: 0.13, Epoch: 0.0919585208374095, LR: 0.0003 +[2026-02-27 06:08:48] (step=0000471) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.09215417726472315, LR: 0.0003 +[2026-02-27 06:08:56] (step=0000472) Train Loss: 0.4813, Train Steps/Sec: 0.13, Epoch: 0.09234983369203678, LR: 0.0003 +[2026-02-27 06:09:04] (step=0000473) Train Loss: 0.4793, Train Steps/Sec: 0.13, Epoch: 0.09254549011935043, LR: 0.0003 +[2026-02-27 06:09:11] (step=0000474) Train Loss: 0.4850, Train Steps/Sec: 0.13, Epoch: 0.09274114654666406, LR: 0.0003 +[2026-02-27 06:09:19] (step=0000475) Train Loss: 0.4867, Train Steps/Sec: 0.13, Epoch: 0.09293680297397769, LR: 0.0003 +[2026-02-27 06:09:27] (step=0000476) Train Loss: 0.4879, Train Steps/Sec: 0.13, Epoch: 0.09313245940129133, LR: 0.0003 +[2026-02-27 06:09:35] (step=0000477) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 0.09332811582860497, LR: 0.0003 +[2026-02-27 06:09:43] (step=0000478) Train Loss: 0.4822, Train Steps/Sec: 0.13, Epoch: 0.09352377225591861, LR: 0.0003 +[2026-02-27 06:09:51] (step=0000479) Train Loss: 0.4774, Train Steps/Sec: 0.13, Epoch: 0.09371942868323224, LR: 0.0003 +[2026-02-27 06:09:59] (step=0000480) Train Loss: 0.4754, Train Steps/Sec: 0.13, Epoch: 0.09391508511054589, LR: 0.0003 +[2026-02-27 06:10:06] (step=0000481) Train Loss: 0.4859, Train Steps/Sec: 0.13, Epoch: 0.09411074153785952, LR: 0.0003 +[2026-02-27 06:10:14] (step=0000482) Train Loss: 0.4839, Train Steps/Sec: 0.13, Epoch: 0.09430639796517315, LR: 0.0003 +[2026-02-27 06:10:22] (step=0000483) Train Loss: 0.4808, Train Steps/Sec: 0.13, Epoch: 0.0945020543924868, LR: 0.0003 +[2026-02-27 06:10:30] (step=0000484) Train Loss: 0.4781, Train Steps/Sec: 0.13, Epoch: 0.09469771081980043, LR: 0.0003 +[2026-02-27 06:10:38] (step=0000485) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.09489336724711407, LR: 0.0003 +[2026-02-27 06:10:46] (step=0000486) Train Loss: 0.4740, Train Steps/Sec: 0.13, Epoch: 0.0950890236744277, LR: 0.0003 +[2026-02-27 06:10:54] (step=0000487) Train Loss: 0.4722, Train Steps/Sec: 0.13, Epoch: 0.09528468010174135, LR: 0.0003 +[2026-02-27 06:11:02] (step=0000488) Train Loss: 0.4831, Train Steps/Sec: 0.13, Epoch: 0.09548033652905498, LR: 0.0003 +[2026-02-27 06:11:09] (step=0000489) Train Loss: 0.4802, Train Steps/Sec: 0.13, Epoch: 0.09567599295636861, LR: 0.0003 +[2026-02-27 06:11:17] (step=0000490) Train Loss: 0.4804, Train Steps/Sec: 0.13, Epoch: 0.09587164938368226, LR: 0.0003 +[2026-02-27 06:11:25] (step=0000491) Train Loss: 0.4851, Train Steps/Sec: 0.13, Epoch: 0.09606730581099589, LR: 0.0003 +[2026-02-27 06:11:33] (step=0000492) Train Loss: 0.4974, Train Steps/Sec: 0.13, Epoch: 0.09626296223830953, LR: 0.0003 +[2026-02-27 06:11:41] (step=0000493) Train Loss: 0.4808, Train Steps/Sec: 0.13, Epoch: 0.09645861866562316, LR: 0.0003 +[2026-02-27 06:11:49] (step=0000494) Train Loss: 0.4919, Train Steps/Sec: 0.13, Epoch: 0.09665427509293681, LR: 0.0003 +[2026-02-27 06:11:57] (step=0000495) Train Loss: 0.4704, Train Steps/Sec: 0.12, Epoch: 0.09684993152025044, LR: 0.0003 +[2026-02-27 06:12:05] (step=0000496) Train Loss: 0.4918, Train Steps/Sec: 0.13, Epoch: 0.09704558794756407, LR: 0.0003 +[2026-02-27 06:12:12] (step=0000497) Train Loss: 0.4770, Train Steps/Sec: 0.13, Epoch: 0.09724124437487772, LR: 0.0003 +[2026-02-27 06:12:20] (step=0000498) Train Loss: 0.4754, Train Steps/Sec: 0.13, Epoch: 0.09743690080219135, LR: 0.0003 +[2026-02-27 06:12:28] (step=0000499) Train Loss: 0.4827, Train Steps/Sec: 0.13, Epoch: 0.097632557229505, LR: 0.0003 +[2026-02-27 06:12:36] (step=0000500) Train Loss: 0.4786, Train Steps/Sec: 0.13, Epoch: 0.09782821365681862, LR: 0.0003 +[2026-02-27 06:12:36] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0000500/ +[2026-02-27 06:12:44] (step=0000501) Train Loss: 0.4744, Train Steps/Sec: 0.13, Epoch: 0.09802387008413227, LR: 0.0003 +[2026-02-27 06:12:52] (step=0000502) Train Loss: 0.4790, Train Steps/Sec: 0.13, Epoch: 0.0982195265114459, LR: 0.0003 +[2026-02-27 06:13:00] (step=0000503) Train Loss: 0.4721, Train Steps/Sec: 0.13, Epoch: 0.09841518293875953, LR: 0.0003 +[2026-02-27 06:13:07] (step=0000504) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 0.09861083936607318, LR: 0.0003 +[2026-02-27 06:13:15] (step=0000505) Train Loss: 0.4727, Train Steps/Sec: 0.13, Epoch: 0.09880649579338681, LR: 0.0003 +[2026-02-27 06:13:23] (step=0000506) Train Loss: 0.4840, Train Steps/Sec: 0.13, Epoch: 0.09900215222070045, LR: 0.0003 +[2026-02-27 06:13:31] (step=0000507) Train Loss: 0.4819, Train Steps/Sec: 0.13, Epoch: 0.09919780864801409, LR: 0.0003 +[2026-02-27 06:13:39] (step=0000508) Train Loss: 0.4819, Train Steps/Sec: 0.13, Epoch: 0.09939346507532773, LR: 0.0003 +[2026-02-27 06:13:47] (step=0000509) Train Loss: 0.4754, Train Steps/Sec: 0.13, Epoch: 0.09958912150264136, LR: 0.0003 +[2026-02-27 06:13:55] (step=0000510) Train Loss: 0.4728, Train Steps/Sec: 0.13, Epoch: 0.099784777929955, LR: 0.0003 +[2026-02-27 06:14:02] (step=0000511) Train Loss: 0.4722, Train Steps/Sec: 0.13, Epoch: 0.09998043435726864, LR: 0.0003 +[2026-02-27 06:14:10] (step=0000512) Train Loss: 0.4757, Train Steps/Sec: 0.13, Epoch: 0.10017609078458227, LR: 0.0003 +[2026-02-27 06:14:18] (step=0000513) Train Loss: 0.4795, Train Steps/Sec: 0.13, Epoch: 0.10037174721189591, LR: 0.0003 +[2026-02-27 06:14:26] (step=0000514) Train Loss: 0.4817, Train Steps/Sec: 0.13, Epoch: 0.10056740363920955, LR: 0.0003 +[2026-02-27 06:14:34] (step=0000515) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.10076306006652319, LR: 0.0003 +[2026-02-27 06:14:42] (step=0000516) Train Loss: 0.4906, Train Steps/Sec: 0.13, Epoch: 0.10095871649383682, LR: 0.0003 +[2026-02-27 06:14:50] (step=0000517) Train Loss: 0.4747, Train Steps/Sec: 0.13, Epoch: 0.10115437292115045, LR: 0.0003 +[2026-02-27 06:14:57] (step=0000518) Train Loss: 0.4779, Train Steps/Sec: 0.13, Epoch: 0.1013500293484641, LR: 0.0003 +[2026-02-27 06:15:05] (step=0000519) Train Loss: 0.4832, Train Steps/Sec: 0.13, Epoch: 0.10154568577577773, LR: 0.0003 +[2026-02-27 06:15:13] (step=0000520) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.10174134220309138, LR: 0.0003 +[2026-02-27 06:15:21] (step=0000521) Train Loss: 0.4841, Train Steps/Sec: 0.13, Epoch: 0.101936998630405, LR: 0.0003 +[2026-02-27 06:15:29] (step=0000522) Train Loss: 0.4820, Train Steps/Sec: 0.13, Epoch: 0.10213265505771865, LR: 0.0003 +[2026-02-27 06:15:37] (step=0000523) Train Loss: 0.4882, Train Steps/Sec: 0.13, Epoch: 0.10232831148503228, LR: 0.0003 +[2026-02-27 06:15:44] (step=0000524) Train Loss: 0.4885, Train Steps/Sec: 0.13, Epoch: 0.10252396791234591, LR: 0.0003 +[2026-02-27 06:15:52] (step=0000525) Train Loss: 0.4728, Train Steps/Sec: 0.13, Epoch: 0.10271962433965956, LR: 0.0003 +[2026-02-27 06:16:00] (step=0000526) Train Loss: 0.4763, Train Steps/Sec: 0.13, Epoch: 0.10291528076697319, LR: 0.0003 +[2026-02-27 06:16:08] (step=0000527) Train Loss: 0.4765, Train Steps/Sec: 0.13, Epoch: 0.10311093719428684, LR: 0.0003 +[2026-02-27 06:16:16] (step=0000528) Train Loss: 0.4825, Train Steps/Sec: 0.13, Epoch: 0.10330659362160047, LR: 0.0003 +[2026-02-27 06:16:24] (step=0000529) Train Loss: 0.4768, Train Steps/Sec: 0.12, Epoch: 0.10350225004891411, LR: 0.0003 +[2026-02-27 06:16:32] (step=0000530) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.10369790647622774, LR: 0.0003 +[2026-02-27 06:16:40] (step=0000531) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 0.10389356290354138, LR: 0.0003 +[2026-02-27 06:16:47] (step=0000532) Train Loss: 0.4703, Train Steps/Sec: 0.13, Epoch: 0.10408921933085502, LR: 0.0003 +[2026-02-27 06:16:55] (step=0000533) Train Loss: 0.4776, Train Steps/Sec: 0.13, Epoch: 0.10428487575816865, LR: 0.0003 +[2026-02-27 06:17:03] (step=0000534) Train Loss: 0.4786, Train Steps/Sec: 0.13, Epoch: 0.1044805321854823, LR: 0.0003 +[2026-02-27 06:17:11] (step=0000535) Train Loss: 0.4733, Train Steps/Sec: 0.13, Epoch: 0.10467618861279593, LR: 0.0003 +[2026-02-27 06:17:19] (step=0000536) Train Loss: 0.4693, Train Steps/Sec: 0.13, Epoch: 0.10487184504010957, LR: 0.0003 +[2026-02-27 06:17:27] (step=0000537) Train Loss: 0.4777, Train Steps/Sec: 0.13, Epoch: 0.1050675014674232, LR: 0.0003 +[2026-02-27 06:17:35] (step=0000538) Train Loss: 0.4859, Train Steps/Sec: 0.13, Epoch: 0.10526315789473684, LR: 0.0003 +[2026-02-27 06:17:42] (step=0000539) Train Loss: 0.4836, Train Steps/Sec: 0.13, Epoch: 0.10545881432205048, LR: 0.0003 +[2026-02-27 06:17:50] (step=0000540) Train Loss: 0.4815, Train Steps/Sec: 0.13, Epoch: 0.10565447074936411, LR: 0.0003 +[2026-02-27 06:17:58] (step=0000541) Train Loss: 0.4925, Train Steps/Sec: 0.12, Epoch: 0.10585012717667776, LR: 0.0003 +[2026-02-27 06:18:06] (step=0000542) Train Loss: 0.4780, Train Steps/Sec: 0.13, Epoch: 0.10604578360399139, LR: 0.0003 +[2026-02-27 06:18:14] (step=0000543) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.10624144003130503, LR: 0.0003 +[2026-02-27 06:18:22] (step=0000544) Train Loss: 0.4807, Train Steps/Sec: 0.13, Epoch: 0.10643709645861867, LR: 0.0003 +[2026-02-27 06:18:30] (step=0000545) Train Loss: 0.4815, Train Steps/Sec: 0.13, Epoch: 0.1066327528859323, LR: 0.0003 +[2026-02-27 06:18:38] (step=0000546) Train Loss: 0.4829, Train Steps/Sec: 0.13, Epoch: 0.10682840931324594, LR: 0.0003 +[2026-02-27 06:18:45] (step=0000547) Train Loss: 0.4799, Train Steps/Sec: 0.13, Epoch: 0.10702406574055957, LR: 0.0003 +[2026-02-27 06:18:53] (step=0000548) Train Loss: 0.4718, Train Steps/Sec: 0.13, Epoch: 0.10721972216787322, LR: 0.0003 +[2026-02-27 06:19:01] (step=0000549) Train Loss: 0.4809, Train Steps/Sec: 0.13, Epoch: 0.10741537859518685, LR: 0.0003 +[2026-02-27 06:19:09] (step=0000550) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 0.1076110350225005, LR: 0.0003 +[2026-02-27 06:19:17] (step=0000551) Train Loss: 0.4794, Train Steps/Sec: 0.13, Epoch: 0.10780669144981413, LR: 0.0003 +[2026-02-27 06:19:25] (step=0000552) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.10800234787712776, LR: 0.0003 +[2026-02-27 06:19:33] (step=0000553) Train Loss: 0.4709, Train Steps/Sec: 0.13, Epoch: 0.1081980043044414, LR: 0.0003 +[2026-02-27 06:19:40] (step=0000554) Train Loss: 0.4745, Train Steps/Sec: 0.13, Epoch: 0.10839366073175503, LR: 0.0003 +[2026-02-27 06:19:48] (step=0000555) Train Loss: 0.4752, Train Steps/Sec: 0.13, Epoch: 0.10858931715906868, LR: 0.0003 +[2026-02-27 06:19:56] (step=0000556) Train Loss: 0.4813, Train Steps/Sec: 0.13, Epoch: 0.10878497358638231, LR: 0.0003 +[2026-02-27 06:20:04] (step=0000557) Train Loss: 0.4824, Train Steps/Sec: 0.13, Epoch: 0.10898063001369596, LR: 0.0003 +[2026-02-27 06:20:12] (step=0000558) Train Loss: 0.4842, Train Steps/Sec: 0.13, Epoch: 0.10917628644100959, LR: 0.0003 +[2026-02-27 06:20:20] (step=0000559) Train Loss: 0.4798, Train Steps/Sec: 0.13, Epoch: 0.10937194286832322, LR: 0.0003 +[2026-02-27 06:20:28] (step=0000560) Train Loss: 0.4812, Train Steps/Sec: 0.13, Epoch: 0.10956759929563686, LR: 0.0003 +[2026-02-27 06:20:35] (step=0000561) Train Loss: 0.4764, Train Steps/Sec: 0.13, Epoch: 0.1097632557229505, LR: 0.0003 +[2026-02-27 06:20:43] (step=0000562) Train Loss: 0.4835, Train Steps/Sec: 0.13, Epoch: 0.10995891215026414, LR: 0.0003 +[2026-02-27 06:20:51] (step=0000563) Train Loss: 0.4802, Train Steps/Sec: 0.13, Epoch: 0.11015456857757777, LR: 0.0003 +[2026-02-27 06:20:59] (step=0000564) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.11035022500489142, LR: 0.0003 +[2026-02-27 06:21:07] (step=0000565) Train Loss: 0.4859, Train Steps/Sec: 0.13, Epoch: 0.11054588143220505, LR: 0.0003 +[2026-02-27 06:21:15] (step=0000566) Train Loss: 0.4756, Train Steps/Sec: 0.13, Epoch: 0.11074153785951868, LR: 0.0003 +[2026-02-27 06:21:22] (step=0000567) Train Loss: 0.4854, Train Steps/Sec: 0.13, Epoch: 0.11093719428683232, LR: 0.0003 +[2026-02-27 06:21:30] (step=0000568) Train Loss: 0.4738, Train Steps/Sec: 0.13, Epoch: 0.11113285071414596, LR: 0.0003 +[2026-02-27 06:21:38] (step=0000569) Train Loss: 0.4786, Train Steps/Sec: 0.13, Epoch: 0.1113285071414596, LR: 0.0003 +[2026-02-27 06:21:46] (step=0000570) Train Loss: 0.4853, Train Steps/Sec: 0.13, Epoch: 0.11152416356877323, LR: 0.0003 +[2026-02-27 06:21:54] (step=0000571) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.11171981999608688, LR: 0.0003 +[2026-02-27 06:22:02] (step=0000572) Train Loss: 0.4721, Train Steps/Sec: 0.13, Epoch: 0.11191547642340051, LR: 0.0003 +[2026-02-27 06:22:09] (step=0000573) Train Loss: 0.4875, Train Steps/Sec: 0.13, Epoch: 0.11211113285071414, LR: 0.0003 +[2026-02-27 06:22:17] (step=0000574) Train Loss: 0.4782, Train Steps/Sec: 0.13, Epoch: 0.11230678927802779, LR: 0.0003 +[2026-02-27 06:22:25] (step=0000575) Train Loss: 0.4785, Train Steps/Sec: 0.13, Epoch: 0.11250244570534142, LR: 0.0003 +[2026-02-27 06:22:33] (step=0000576) Train Loss: 0.4887, Train Steps/Sec: 0.13, Epoch: 0.11269810213265506, LR: 0.0003 +[2026-02-27 06:22:41] (step=0000577) Train Loss: 0.4787, Train Steps/Sec: 0.13, Epoch: 0.1128937585599687, LR: 0.0003 +[2026-02-27 06:22:49] (step=0000578) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.11308941498728234, LR: 0.0003 +[2026-02-27 06:22:57] (step=0000579) Train Loss: 0.4760, Train Steps/Sec: 0.13, Epoch: 0.11328507141459597, LR: 0.0003 +[2026-02-27 06:23:05] (step=0000580) Train Loss: 0.4836, Train Steps/Sec: 0.12, Epoch: 0.1134807278419096, LR: 0.0003 +[2026-02-27 06:23:12] (step=0000581) Train Loss: 0.4821, Train Steps/Sec: 0.13, Epoch: 0.11367638426922325, LR: 0.0003 +[2026-02-27 06:23:20] (step=0000582) Train Loss: 0.4812, Train Steps/Sec: 0.13, Epoch: 0.11387204069653688, LR: 0.0003 +[2026-02-27 06:23:28] (step=0000583) Train Loss: 0.4696, Train Steps/Sec: 0.13, Epoch: 0.11406769712385052, LR: 0.0003 +[2026-02-27 06:23:36] (step=0000584) Train Loss: 0.4854, Train Steps/Sec: 0.13, Epoch: 0.11426335355116415, LR: 0.0003 +[2026-02-27 06:23:44] (step=0000585) Train Loss: 0.4846, Train Steps/Sec: 0.13, Epoch: 0.1144590099784778, LR: 0.0003 +[2026-02-27 06:23:52] (step=0000586) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 0.11465466640579143, LR: 0.0003 +[2026-02-27 06:24:00] (step=0000587) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 0.11485032283310506, LR: 0.0003 +[2026-02-27 06:24:07] (step=0000588) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 0.11504597926041871, LR: 0.0003 +[2026-02-27 06:24:15] (step=0000589) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.11524163568773234, LR: 0.0003 +[2026-02-27 06:24:23] (step=0000590) Train Loss: 0.4854, Train Steps/Sec: 0.13, Epoch: 0.11543729211504598, LR: 0.0003 +[2026-02-27 06:24:31] (step=0000591) Train Loss: 0.4865, Train Steps/Sec: 0.13, Epoch: 0.11563294854235961, LR: 0.0003 +[2026-02-27 06:24:39] (step=0000592) Train Loss: 0.4752, Train Steps/Sec: 0.13, Epoch: 0.11582860496967326, LR: 0.0003 +[2026-02-27 06:24:47] (step=0000593) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.11602426139698689, LR: 0.0003 +[2026-02-27 06:24:55] (step=0000594) Train Loss: 0.4837, Train Steps/Sec: 0.13, Epoch: 0.11621991782430052, LR: 0.0003 +[2026-02-27 06:25:03] (step=0000595) Train Loss: 0.4759, Train Steps/Sec: 0.13, Epoch: 0.11641557425161417, LR: 0.0003 +[2026-02-27 06:25:10] (step=0000596) Train Loss: 0.4770, Train Steps/Sec: 0.13, Epoch: 0.1166112306789278, LR: 0.0003 +[2026-02-27 06:25:18] (step=0000597) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 0.11680688710624144, LR: 0.0003 +[2026-02-27 06:25:26] (step=0000598) Train Loss: 0.4830, Train Steps/Sec: 0.13, Epoch: 0.11700254353355508, LR: 0.0003 +[2026-02-27 06:25:34] (step=0000599) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.11719819996086872, LR: 0.0003 +[2026-02-27 06:25:42] (step=0000600) Train Loss: 0.4693, Train Steps/Sec: 0.13, Epoch: 0.11739385638818235, LR: 0.0003 +[2026-02-27 06:25:50] (step=0000601) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.11758951281549598, LR: 0.0003 +[2026-02-27 06:25:57] (step=0000602) Train Loss: 0.4872, Train Steps/Sec: 0.13, Epoch: 0.11778516924280963, LR: 0.0003 +[2026-02-27 06:26:05] (step=0000603) Train Loss: 0.4760, Train Steps/Sec: 0.13, Epoch: 0.11798082567012326, LR: 0.0003 +[2026-02-27 06:26:13] (step=0000604) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 0.1181764820974369, LR: 0.0003 +[2026-02-27 06:26:21] (step=0000605) Train Loss: 0.4829, Train Steps/Sec: 0.13, Epoch: 0.11837213852475054, LR: 0.0003 +[2026-02-27 06:26:29] (step=0000606) Train Loss: 0.4760, Train Steps/Sec: 0.13, Epoch: 0.11856779495206418, LR: 0.0003 +[2026-02-27 06:26:37] (step=0000607) Train Loss: 0.4816, Train Steps/Sec: 0.13, Epoch: 0.11876345137937781, LR: 0.0003 +[2026-02-27 06:26:45] (step=0000608) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.11895910780669144, LR: 0.0003 +[2026-02-27 06:26:52] (step=0000609) Train Loss: 0.4865, Train Steps/Sec: 0.13, Epoch: 0.11915476423400509, LR: 0.0003 +[2026-02-27 06:27:00] (step=0000610) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.11935042066131872, LR: 0.0003 +[2026-02-27 06:27:08] (step=0000611) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.11954607708863237, LR: 0.0003 +[2026-02-27 06:27:16] (step=0000612) Train Loss: 0.4794, Train Steps/Sec: 0.13, Epoch: 0.119741733515946, LR: 0.0003 +[2026-02-27 06:27:24] (step=0000613) Train Loss: 0.4783, Train Steps/Sec: 0.13, Epoch: 0.11993738994325964, LR: 0.0003 +[2026-02-27 06:27:32] (step=0000614) Train Loss: 0.4800, Train Steps/Sec: 0.13, Epoch: 0.12013304637057327, LR: 0.0003 +[2026-02-27 06:27:40] (step=0000615) Train Loss: 0.4743, Train Steps/Sec: 0.13, Epoch: 0.1203287027978869, LR: 0.0003 +[2026-02-27 06:27:47] (step=0000616) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.12052435922520055, LR: 0.0003 +[2026-02-27 06:27:55] (step=0000617) Train Loss: 0.4799, Train Steps/Sec: 0.13, Epoch: 0.12072001565251418, LR: 0.0003 +[2026-02-27 06:28:03] (step=0000618) Train Loss: 0.4693, Train Steps/Sec: 0.13, Epoch: 0.12091567207982783, LR: 0.0003 +[2026-02-27 06:28:11] (step=0000619) Train Loss: 0.4766, Train Steps/Sec: 0.13, Epoch: 0.12111132850714146, LR: 0.0003 +[2026-02-27 06:28:19] (step=0000620) Train Loss: 0.4713, Train Steps/Sec: 0.13, Epoch: 0.1213069849344551, LR: 0.0003 +[2026-02-27 06:28:27] (step=0000621) Train Loss: 0.4770, Train Steps/Sec: 0.13, Epoch: 0.12150264136176873, LR: 0.0003 +[2026-02-27 06:28:34] (step=0000622) Train Loss: 0.4736, Train Steps/Sec: 0.13, Epoch: 0.12169829778908237, LR: 0.0003 +[2026-02-27 06:28:42] (step=0000623) Train Loss: 0.4745, Train Steps/Sec: 0.13, Epoch: 0.12189395421639601, LR: 0.0003 +[2026-02-27 06:28:50] (step=0000624) Train Loss: 0.4796, Train Steps/Sec: 0.13, Epoch: 0.12208961064370964, LR: 0.0003 +[2026-02-27 06:28:58] (step=0000625) Train Loss: 0.4728, Train Steps/Sec: 0.13, Epoch: 0.12228526707102329, LR: 0.0003 +[2026-02-27 06:29:06] (step=0000626) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.12248092349833692, LR: 0.0003 +[2026-02-27 06:29:14] (step=0000627) Train Loss: 0.4791, Train Steps/Sec: 0.13, Epoch: 0.12267657992565056, LR: 0.0003 +[2026-02-27 06:29:22] (step=0000628) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.1228722363529642, LR: 0.0003 +[2026-02-27 06:29:30] (step=0000629) Train Loss: 0.4733, Train Steps/Sec: 0.13, Epoch: 0.12306789278027783, LR: 0.0003 +[2026-02-27 06:29:37] (step=0000630) Train Loss: 0.4790, Train Steps/Sec: 0.13, Epoch: 0.12326354920759147, LR: 0.0003 +[2026-02-27 06:29:45] (step=0000631) Train Loss: 0.4717, Train Steps/Sec: 0.13, Epoch: 0.1234592056349051, LR: 0.0003 +[2026-02-27 06:29:53] (step=0000632) Train Loss: 0.4763, Train Steps/Sec: 0.13, Epoch: 0.12365486206221875, LR: 0.0003 +[2026-02-27 06:30:01] (step=0000633) Train Loss: 0.4849, Train Steps/Sec: 0.13, Epoch: 0.12385051848953238, LR: 0.0003 +[2026-02-27 06:30:09] (step=0000634) Train Loss: 0.4828, Train Steps/Sec: 0.13, Epoch: 0.12404617491684602, LR: 0.0003 +[2026-02-27 06:30:17] (step=0000635) Train Loss: 0.4765, Train Steps/Sec: 0.13, Epoch: 0.12424183134415966, LR: 0.0003 +[2026-02-27 06:30:25] (step=0000636) Train Loss: 0.4822, Train Steps/Sec: 0.13, Epoch: 0.12443748777147329, LR: 0.0003 +[2026-02-27 06:30:32] (step=0000637) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 0.12463314419878693, LR: 0.0003 +[2026-02-27 06:30:40] (step=0000638) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.12482880062610056, LR: 0.0003 +[2026-02-27 06:30:48] (step=0000639) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.1250244570534142, LR: 0.0003 +[2026-02-27 06:30:56] (step=0000640) Train Loss: 0.4726, Train Steps/Sec: 0.13, Epoch: 0.12522011348072784, LR: 0.0003 +[2026-02-27 06:31:04] (step=0000641) Train Loss: 0.4831, Train Steps/Sec: 0.13, Epoch: 0.12541576990804149, LR: 0.0003 +[2026-02-27 06:31:12] (step=0000642) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.1256114263353551, LR: 0.0003 +[2026-02-27 06:31:20] (step=0000643) Train Loss: 0.4750, Train Steps/Sec: 0.13, Epoch: 0.12580708276266875, LR: 0.0003 +[2026-02-27 06:31:27] (step=0000644) Train Loss: 0.4758, Train Steps/Sec: 0.13, Epoch: 0.1260027391899824, LR: 0.0003 +[2026-02-27 06:31:35] (step=0000645) Train Loss: 0.4782, Train Steps/Sec: 0.13, Epoch: 0.12619839561729604, LR: 0.0003 +[2026-02-27 06:31:43] (step=0000646) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.12639405204460966, LR: 0.0003 +[2026-02-27 06:31:51] (step=0000647) Train Loss: 0.4790, Train Steps/Sec: 0.13, Epoch: 0.1265897084719233, LR: 0.0003 +[2026-02-27 06:31:59] (step=0000648) Train Loss: 0.4795, Train Steps/Sec: 0.13, Epoch: 0.12678536489923695, LR: 0.0003 +[2026-02-27 06:32:07] (step=0000649) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.12698102132655056, LR: 0.0003 +[2026-02-27 06:32:15] (step=0000650) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.1271766777538642, LR: 0.0003 +[2026-02-27 06:32:22] (step=0000651) Train Loss: 0.4745, Train Steps/Sec: 0.13, Epoch: 0.12737233418117785, LR: 0.0003 +[2026-02-27 06:32:30] (step=0000652) Train Loss: 0.4932, Train Steps/Sec: 0.13, Epoch: 0.1275679906084915, LR: 0.0003 +[2026-02-27 06:32:38] (step=0000653) Train Loss: 0.4734, Train Steps/Sec: 0.13, Epoch: 0.12776364703580512, LR: 0.0003 +[2026-02-27 06:32:46] (step=0000654) Train Loss: 0.4837, Train Steps/Sec: 0.13, Epoch: 0.12795930346311876, LR: 0.0003 +[2026-02-27 06:32:54] (step=0000655) Train Loss: 0.4738, Train Steps/Sec: 0.13, Epoch: 0.1281549598904324, LR: 0.0003 +[2026-02-27 06:33:02] (step=0000656) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.12835061631774602, LR: 0.0003 +[2026-02-27 06:33:10] (step=0000657) Train Loss: 0.4792, Train Steps/Sec: 0.13, Epoch: 0.12854627274505967, LR: 0.0003 +[2026-02-27 06:33:17] (step=0000658) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.12874192917237332, LR: 0.0003 +[2026-02-27 06:33:25] (step=0000659) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.12893758559968696, LR: 0.0003 +[2026-02-27 06:33:33] (step=0000660) Train Loss: 0.4820, Train Steps/Sec: 0.13, Epoch: 0.12913324202700058, LR: 0.0003 +[2026-02-27 06:33:41] (step=0000661) Train Loss: 0.4734, Train Steps/Sec: 0.13, Epoch: 0.12932889845431422, LR: 0.0003 +[2026-02-27 06:33:49] (step=0000662) Train Loss: 0.4733, Train Steps/Sec: 0.13, Epoch: 0.12952455488162787, LR: 0.0003 +[2026-02-27 06:33:57] (step=0000663) Train Loss: 0.4702, Train Steps/Sec: 0.13, Epoch: 0.12972021130894149, LR: 0.0003 +[2026-02-27 06:34:05] (step=0000664) Train Loss: 0.4727, Train Steps/Sec: 0.13, Epoch: 0.12991586773625513, LR: 0.0003 +[2026-02-27 06:34:12] (step=0000665) Train Loss: 0.4735, Train Steps/Sec: 0.13, Epoch: 0.13011152416356878, LR: 0.0003 +[2026-02-27 06:34:20] (step=0000666) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.13030718059088242, LR: 0.0003 +[2026-02-27 06:34:28] (step=0000667) Train Loss: 0.4753, Train Steps/Sec: 0.13, Epoch: 0.13050283701819604, LR: 0.0003 +[2026-02-27 06:34:36] (step=0000668) Train Loss: 0.4810, Train Steps/Sec: 0.13, Epoch: 0.13069849344550968, LR: 0.0003 +[2026-02-27 06:34:44] (step=0000669) Train Loss: 0.4720, Train Steps/Sec: 0.13, Epoch: 0.13089414987282333, LR: 0.0003 +[2026-02-27 06:34:52] (step=0000670) Train Loss: 0.4884, Train Steps/Sec: 0.13, Epoch: 0.13108980630013695, LR: 0.0003 +[2026-02-27 06:34:59] (step=0000671) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.1312854627274506, LR: 0.0003 +[2026-02-27 06:35:07] (step=0000672) Train Loss: 0.4703, Train Steps/Sec: 0.13, Epoch: 0.13148111915476424, LR: 0.0003 +[2026-02-27 06:35:15] (step=0000673) Train Loss: 0.4745, Train Steps/Sec: 0.13, Epoch: 0.13167677558207788, LR: 0.0003 +[2026-02-27 06:35:23] (step=0000674) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 0.1318724320093915, LR: 0.0003 +[2026-02-27 06:35:31] (step=0000675) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.13206808843670514, LR: 0.0003 +[2026-02-27 06:35:39] (step=0000676) Train Loss: 0.4722, Train Steps/Sec: 0.13, Epoch: 0.1322637448640188, LR: 0.0003 +[2026-02-27 06:35:47] (step=0000677) Train Loss: 0.4848, Train Steps/Sec: 0.13, Epoch: 0.1324594012913324, LR: 0.0003 +[2026-02-27 06:35:54] (step=0000678) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.13265505771864605, LR: 0.0003 +[2026-02-27 06:36:02] (step=0000679) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.1328507141459597, LR: 0.0003 +[2026-02-27 06:36:10] (step=0000680) Train Loss: 0.4819, Train Steps/Sec: 0.12, Epoch: 0.13304637057327334, LR: 0.0003 +[2026-02-27 06:36:18] (step=0000681) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.13324202700058696, LR: 0.0003 +[2026-02-27 06:36:26] (step=0000682) Train Loss: 0.4805, Train Steps/Sec: 0.13, Epoch: 0.1334376834279006, LR: 0.0003 +[2026-02-27 06:36:34] (step=0000683) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.13363333985521425, LR: 0.0003 +[2026-02-27 06:36:42] (step=0000684) Train Loss: 0.4864, Train Steps/Sec: 0.13, Epoch: 0.13382899628252787, LR: 0.0003 +[2026-02-27 06:36:50] (step=0000685) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.1340246527098415, LR: 0.0003 +[2026-02-27 06:36:58] (step=0000686) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.13422030913715516, LR: 0.0003 +[2026-02-27 06:37:05] (step=0000687) Train Loss: 0.4764, Train Steps/Sec: 0.13, Epoch: 0.1344159655644688, LR: 0.0003 +[2026-02-27 06:37:13] (step=0000688) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 0.13461162199178242, LR: 0.0003 +[2026-02-27 06:37:21] (step=0000689) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 0.13480727841909607, LR: 0.0003 +[2026-02-27 06:37:29] (step=0000690) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 0.1350029348464097, LR: 0.0003 +[2026-02-27 06:37:37] (step=0000691) Train Loss: 0.4766, Train Steps/Sec: 0.13, Epoch: 0.13519859127372333, LR: 0.0003 +[2026-02-27 06:37:45] (step=0000692) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.13539424770103697, LR: 0.0003 +[2026-02-27 06:37:53] (step=0000693) Train Loss: 0.4754, Train Steps/Sec: 0.13, Epoch: 0.13558990412835062, LR: 0.0003 +[2026-02-27 06:38:00] (step=0000694) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 0.13578556055566426, LR: 0.0003 +[2026-02-27 06:38:08] (step=0000695) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.13598121698297788, LR: 0.0003 +[2026-02-27 06:38:16] (step=0000696) Train Loss: 0.4754, Train Steps/Sec: 0.13, Epoch: 0.13617687341029153, LR: 0.0003 +[2026-02-27 06:38:24] (step=0000697) Train Loss: 0.5017, Train Steps/Sec: 0.13, Epoch: 0.13637252983760517, LR: 0.0003 +[2026-02-27 06:38:32] (step=0000698) Train Loss: 0.4727, Train Steps/Sec: 0.13, Epoch: 0.1365681862649188, LR: 0.0003 +[2026-02-27 06:38:40] (step=0000699) Train Loss: 0.4807, Train Steps/Sec: 0.13, Epoch: 0.13676384269223243, LR: 0.0003 +[2026-02-27 06:38:47] (step=0000700) Train Loss: 0.4814, Train Steps/Sec: 0.13, Epoch: 0.13695949911954608, LR: 0.0003 +[2026-02-27 06:38:55] (step=0000701) Train Loss: 0.4772, Train Steps/Sec: 0.13, Epoch: 0.13715515554685972, LR: 0.0003 +[2026-02-27 06:39:03] (step=0000702) Train Loss: 0.4734, Train Steps/Sec: 0.13, Epoch: 0.13735081197417334, LR: 0.0003 +[2026-02-27 06:39:11] (step=0000703) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.137546468401487, LR: 0.0003 +[2026-02-27 06:39:19] (step=0000704) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.13774212482880063, LR: 0.0003 +[2026-02-27 06:39:27] (step=0000705) Train Loss: 0.4732, Train Steps/Sec: 0.13, Epoch: 0.13793778125611425, LR: 0.0003 +[2026-02-27 06:39:35] (step=0000706) Train Loss: 0.4742, Train Steps/Sec: 0.13, Epoch: 0.1381334376834279, LR: 0.0003 +[2026-02-27 06:39:42] (step=0000707) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.13832909411074154, LR: 0.0003 +[2026-02-27 06:39:50] (step=0000708) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.13852475053805519, LR: 0.0003 +[2026-02-27 06:39:58] (step=0000709) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.1387204069653688, LR: 0.0003 +[2026-02-27 06:40:06] (step=0000710) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.13891606339268245, LR: 0.0003 +[2026-02-27 06:40:14] (step=0000711) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.1391117198199961, LR: 0.0003 +[2026-02-27 06:40:22] (step=0000712) Train Loss: 0.4780, Train Steps/Sec: 0.13, Epoch: 0.1393073762473097, LR: 0.0003 +[2026-02-27 06:40:30] (step=0000713) Train Loss: 0.4751, Train Steps/Sec: 0.13, Epoch: 0.13950303267462336, LR: 0.0003 +[2026-02-27 06:40:37] (step=0000714) Train Loss: 0.4750, Train Steps/Sec: 0.13, Epoch: 0.139698689101937, LR: 0.0003 +[2026-02-27 06:40:45] (step=0000715) Train Loss: 0.4753, Train Steps/Sec: 0.13, Epoch: 0.13989434552925065, LR: 0.0003 +[2026-02-27 06:40:53] (step=0000716) Train Loss: 0.4738, Train Steps/Sec: 0.13, Epoch: 0.14009000195656426, LR: 0.0003 +[2026-02-27 06:41:01] (step=0000717) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.1402856583838779, LR: 0.0003 +[2026-02-27 06:41:09] (step=0000718) Train Loss: 0.4916, Train Steps/Sec: 0.13, Epoch: 0.14048131481119155, LR: 0.0003 +[2026-02-27 06:41:17] (step=0000719) Train Loss: 0.4814, Train Steps/Sec: 0.13, Epoch: 0.14067697123850517, LR: 0.0003 +[2026-02-27 06:41:25] (step=0000720) Train Loss: 0.4849, Train Steps/Sec: 0.13, Epoch: 0.14087262766581882, LR: 0.0003 +[2026-02-27 06:41:32] (step=0000721) Train Loss: 0.4853, Train Steps/Sec: 0.13, Epoch: 0.14106828409313246, LR: 0.0003 +[2026-02-27 06:41:40] (step=0000722) Train Loss: 0.4845, Train Steps/Sec: 0.13, Epoch: 0.1412639405204461, LR: 0.0003 +[2026-02-27 06:41:48] (step=0000723) Train Loss: 0.4794, Train Steps/Sec: 0.13, Epoch: 0.14145959694775972, LR: 0.0003 +[2026-02-27 06:41:56] (step=0000724) Train Loss: 0.4878, Train Steps/Sec: 0.13, Epoch: 0.14165525337507337, LR: 0.0003 +[2026-02-27 06:42:04] (step=0000725) Train Loss: 0.4757, Train Steps/Sec: 0.13, Epoch: 0.14185090980238702, LR: 0.0003 +[2026-02-27 06:42:12] (step=0000726) Train Loss: 0.4741, Train Steps/Sec: 0.13, Epoch: 0.14204656622970063, LR: 0.0003 +[2026-02-27 06:42:19] (step=0000727) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.14224222265701428, LR: 0.0003 +[2026-02-27 06:42:28] (step=0000728) Train Loss: 0.4746, Train Steps/Sec: 0.12, Epoch: 0.14243787908432792, LR: 0.0003 +[2026-02-27 06:42:35] (step=0000729) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.14263353551164157, LR: 0.0003 +[2026-02-27 06:42:43] (step=0000730) Train Loss: 0.4800, Train Steps/Sec: 0.13, Epoch: 0.14282919193895519, LR: 0.0003 +[2026-02-27 06:42:51] (step=0000731) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.14302484836626883, LR: 0.0003 +[2026-02-27 06:42:59] (step=0000732) Train Loss: 0.4788, Train Steps/Sec: 0.13, Epoch: 0.14322050479358248, LR: 0.0003 +[2026-02-27 06:43:07] (step=0000733) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.1434161612208961, LR: 0.0003 +[2026-02-27 06:43:15] (step=0000734) Train Loss: 0.4769, Train Steps/Sec: 0.13, Epoch: 0.14361181764820974, LR: 0.0003 +[2026-02-27 06:43:23] (step=0000735) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.14380747407552338, LR: 0.0003 +[2026-02-27 06:43:30] (step=0000736) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.14400313050283703, LR: 0.0003 +[2026-02-27 06:43:38] (step=0000737) Train Loss: 0.4734, Train Steps/Sec: 0.13, Epoch: 0.14419878693015065, LR: 0.0003 +[2026-02-27 06:43:46] (step=0000738) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 0.1443944433574643, LR: 0.0003 +[2026-02-27 06:43:54] (step=0000739) Train Loss: 0.4782, Train Steps/Sec: 0.13, Epoch: 0.14459009978477794, LR: 0.0003 +[2026-02-27 06:44:02] (step=0000740) Train Loss: 0.4750, Train Steps/Sec: 0.13, Epoch: 0.14478575621209155, LR: 0.0003 +[2026-02-27 06:44:10] (step=0000741) Train Loss: 0.4728, Train Steps/Sec: 0.13, Epoch: 0.1449814126394052, LR: 0.0003 +[2026-02-27 06:44:18] (step=0000742) Train Loss: 0.4773, Train Steps/Sec: 0.13, Epoch: 0.14517706906671884, LR: 0.0003 +[2026-02-27 06:44:25] (step=0000743) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.1453727254940325, LR: 0.0003 +[2026-02-27 06:44:33] (step=0000744) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.1455683819213461, LR: 0.0003 +[2026-02-27 06:44:41] (step=0000745) Train Loss: 0.4808, Train Steps/Sec: 0.13, Epoch: 0.14576403834865975, LR: 0.0003 +[2026-02-27 06:44:49] (step=0000746) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.1459596947759734, LR: 0.0003 +[2026-02-27 06:44:57] (step=0000747) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.14615535120328702, LR: 0.0003 +[2026-02-27 06:45:05] (step=0000748) Train Loss: 0.4729, Train Steps/Sec: 0.13, Epoch: 0.14635100763060066, LR: 0.0003 +[2026-02-27 06:45:13] (step=0000749) Train Loss: 0.4841, Train Steps/Sec: 0.13, Epoch: 0.1465466640579143, LR: 0.0003 +[2026-02-27 06:45:20] (step=0000750) Train Loss: 0.4855, Train Steps/Sec: 0.13, Epoch: 0.14674232048522795, LR: 0.0003 +[2026-02-27 06:45:28] (step=0000751) Train Loss: 0.4783, Train Steps/Sec: 0.13, Epoch: 0.14693797691254157, LR: 0.0003 +[2026-02-27 06:45:36] (step=0000752) Train Loss: 0.4853, Train Steps/Sec: 0.13, Epoch: 0.1471336333398552, LR: 0.0003 +[2026-02-27 06:45:44] (step=0000753) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.14732928976716886, LR: 0.0003 +[2026-02-27 06:45:52] (step=0000754) Train Loss: 0.4875, Train Steps/Sec: 0.13, Epoch: 0.14752494619448248, LR: 0.0003 +[2026-02-27 06:46:00] (step=0000755) Train Loss: 0.4809, Train Steps/Sec: 0.13, Epoch: 0.14772060262179612, LR: 0.0003 +[2026-02-27 06:46:08] (step=0000756) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.14791625904910977, LR: 0.0003 +[2026-02-27 06:46:15] (step=0000757) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.1481119154764234, LR: 0.0003 +[2026-02-27 06:46:23] (step=0000758) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.14830757190373703, LR: 0.0003 +[2026-02-27 06:46:31] (step=0000759) Train Loss: 0.4734, Train Steps/Sec: 0.13, Epoch: 0.14850322833105067, LR: 0.0003 +[2026-02-27 06:46:39] (step=0000760) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.14869888475836432, LR: 0.0003 +[2026-02-27 06:46:47] (step=0000761) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.14889454118567794, LR: 0.0003 +[2026-02-27 06:46:55] (step=0000762) Train Loss: 0.4796, Train Steps/Sec: 0.13, Epoch: 0.14909019761299158, LR: 0.0003 +[2026-02-27 06:47:03] (step=0000763) Train Loss: 0.4730, Train Steps/Sec: 0.13, Epoch: 0.14928585404030523, LR: 0.0003 +[2026-02-27 06:47:10] (step=0000764) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.14948151046761887, LR: 0.0003 +[2026-02-27 06:47:18] (step=0000765) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.1496771668949325, LR: 0.0003 +[2026-02-27 06:47:26] (step=0000766) Train Loss: 0.4772, Train Steps/Sec: 0.13, Epoch: 0.14987282332224613, LR: 0.0003 +[2026-02-27 06:47:34] (step=0000767) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.15006847974955978, LR: 0.0003 +[2026-02-27 06:47:42] (step=0000768) Train Loss: 0.4733, Train Steps/Sec: 0.13, Epoch: 0.1502641361768734, LR: 0.0003 +[2026-02-27 06:47:50] (step=0000769) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.15045979260418704, LR: 0.0003 +[2026-02-27 06:47:58] (step=0000770) Train Loss: 0.4874, Train Steps/Sec: 0.13, Epoch: 0.1506554490315007, LR: 0.0003 +[2026-02-27 06:48:05] (step=0000771) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.15085110545881433, LR: 0.0003 +[2026-02-27 06:48:13] (step=0000772) Train Loss: 0.4858, Train Steps/Sec: 0.13, Epoch: 0.15104676188612795, LR: 0.0003 +[2026-02-27 06:48:21] (step=0000773) Train Loss: 0.4713, Train Steps/Sec: 0.12, Epoch: 0.1512424183134416, LR: 0.0003 +[2026-02-27 06:48:29] (step=0000774) Train Loss: 0.4744, Train Steps/Sec: 0.13, Epoch: 0.15143807474075524, LR: 0.0003 +[2026-02-27 06:48:37] (step=0000775) Train Loss: 0.4730, Train Steps/Sec: 0.13, Epoch: 0.15163373116806886, LR: 0.0003 +[2026-02-27 06:48:45] (step=0000776) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.1518293875953825, LR: 0.0003 +[2026-02-27 06:48:53] (step=0000777) Train Loss: 0.4761, Train Steps/Sec: 0.13, Epoch: 0.15202504402269615, LR: 0.0003 +[2026-02-27 06:49:00] (step=0000778) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.1522207004500098, LR: 0.0003 +[2026-02-27 06:49:08] (step=0000779) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.1524163568773234, LR: 0.0003 +[2026-02-27 06:49:16] (step=0000780) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.15261201330463706, LR: 0.0003 +[2026-02-27 06:49:24] (step=0000781) Train Loss: 0.4726, Train Steps/Sec: 0.13, Epoch: 0.1528076697319507, LR: 0.0003 +[2026-02-27 06:49:32] (step=0000782) Train Loss: 0.4768, Train Steps/Sec: 0.13, Epoch: 0.15300332615926432, LR: 0.0003 +[2026-02-27 06:49:40] (step=0000783) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 0.15319898258657796, LR: 0.0003 +[2026-02-27 06:49:48] (step=0000784) Train Loss: 0.4725, Train Steps/Sec: 0.13, Epoch: 0.1533946390138916, LR: 0.0003 +[2026-02-27 06:49:55] (step=0000785) Train Loss: 0.4767, Train Steps/Sec: 0.13, Epoch: 0.15359029544120525, LR: 0.0003 +[2026-02-27 06:50:03] (step=0000786) Train Loss: 0.4725, Train Steps/Sec: 0.12, Epoch: 0.15378595186851887, LR: 0.0003 +[2026-02-27 06:50:11] (step=0000787) Train Loss: 0.4703, Train Steps/Sec: 0.13, Epoch: 0.15398160829583252, LR: 0.0003 +[2026-02-27 06:50:19] (step=0000788) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.15417726472314616, LR: 0.0003 +[2026-02-27 06:50:27] (step=0000789) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.15437292115045978, LR: 0.0003 +[2026-02-27 06:50:35] (step=0000790) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 0.15456857757777342, LR: 0.0003 +[2026-02-27 06:50:43] (step=0000791) Train Loss: 0.4733, Train Steps/Sec: 0.13, Epoch: 0.15476423400508707, LR: 0.0003 +[2026-02-27 06:50:51] (step=0000792) Train Loss: 0.4863, Train Steps/Sec: 0.13, Epoch: 0.15495989043240072, LR: 0.0003 +[2026-02-27 06:50:58] (step=0000793) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.15515554685971433, LR: 0.0003 +[2026-02-27 06:51:06] (step=0000794) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 0.15535120328702798, LR: 0.0003 +[2026-02-27 06:51:14] (step=0000795) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.15554685971434162, LR: 0.0003 +[2026-02-27 06:51:22] (step=0000796) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 0.15574251614165524, LR: 0.0003 +[2026-02-27 06:51:30] (step=0000797) Train Loss: 0.4703, Train Steps/Sec: 0.13, Epoch: 0.15593817256896889, LR: 0.0003 +[2026-02-27 06:51:38] (step=0000798) Train Loss: 0.4804, Train Steps/Sec: 0.13, Epoch: 0.15613382899628253, LR: 0.0003 +[2026-02-27 06:51:46] (step=0000799) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.15632948542359618, LR: 0.0003 +[2026-02-27 06:51:53] (step=0000800) Train Loss: 0.4784, Train Steps/Sec: 0.13, Epoch: 0.1565251418509098, LR: 0.0003 +[2026-02-27 06:52:01] (step=0000801) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.15672079827822344, LR: 0.0003 +[2026-02-27 06:52:09] (step=0000802) Train Loss: 0.4736, Train Steps/Sec: 0.13, Epoch: 0.15691645470553708, LR: 0.0003 +[2026-02-27 06:52:17] (step=0000803) Train Loss: 0.4763, Train Steps/Sec: 0.13, Epoch: 0.1571121111328507, LR: 0.0003 +[2026-02-27 06:52:25] (step=0000804) Train Loss: 0.4743, Train Steps/Sec: 0.13, Epoch: 0.15730776756016435, LR: 0.0003 +[2026-02-27 06:52:33] (step=0000805) Train Loss: 0.4725, Train Steps/Sec: 0.13, Epoch: 0.157503423987478, LR: 0.0003 +[2026-02-27 06:52:41] (step=0000806) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 0.15769908041479164, LR: 0.0003 +[2026-02-27 06:52:49] (step=0000807) Train Loss: 0.4782, Train Steps/Sec: 0.13, Epoch: 0.15789473684210525, LR: 0.0003 +[2026-02-27 06:52:56] (step=0000808) Train Loss: 0.4807, Train Steps/Sec: 0.13, Epoch: 0.1580903932694189, LR: 0.0003 +[2026-02-27 06:53:04] (step=0000809) Train Loss: 0.4782, Train Steps/Sec: 0.13, Epoch: 0.15828604969673254, LR: 0.0003 +[2026-02-27 06:53:12] (step=0000810) Train Loss: 0.4875, Train Steps/Sec: 0.13, Epoch: 0.15848170612404616, LR: 0.0003 +[2026-02-27 06:53:20] (step=0000811) Train Loss: 0.4811, Train Steps/Sec: 0.13, Epoch: 0.1586773625513598, LR: 0.0003 +[2026-02-27 06:53:28] (step=0000812) Train Loss: 0.4760, Train Steps/Sec: 0.13, Epoch: 0.15887301897867345, LR: 0.0003 +[2026-02-27 06:53:36] (step=0000813) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 0.1590686754059871, LR: 0.0003 +[2026-02-27 06:53:44] (step=0000814) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 0.15926433183330072, LR: 0.0003 +[2026-02-27 06:53:51] (step=0000815) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.15945998826061436, LR: 0.0003 +[2026-02-27 06:53:59] (step=0000816) Train Loss: 0.4757, Train Steps/Sec: 0.13, Epoch: 0.159655644687928, LR: 0.0003 +[2026-02-27 06:54:07] (step=0000817) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 0.15985130111524162, LR: 0.0003 +[2026-02-27 06:54:15] (step=0000818) Train Loss: 0.4744, Train Steps/Sec: 0.13, Epoch: 0.16004695754255527, LR: 0.0003 +[2026-02-27 06:54:23] (step=0000819) Train Loss: 0.4748, Train Steps/Sec: 0.13, Epoch: 0.1602426139698689, LR: 0.0003 +[2026-02-27 06:54:31] (step=0000820) Train Loss: 0.4798, Train Steps/Sec: 0.13, Epoch: 0.16043827039718256, LR: 0.0003 +[2026-02-27 06:54:39] (step=0000821) Train Loss: 0.4787, Train Steps/Sec: 0.13, Epoch: 0.16063392682449618, LR: 0.0003 +[2026-02-27 06:54:46] (step=0000822) Train Loss: 0.4805, Train Steps/Sec: 0.13, Epoch: 0.16082958325180982, LR: 0.0003 +[2026-02-27 06:54:54] (step=0000823) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.16102523967912347, LR: 0.0003 +[2026-02-27 06:55:02] (step=0000824) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.16122089610643708, LR: 0.0003 +[2026-02-27 06:55:10] (step=0000825) Train Loss: 0.4830, Train Steps/Sec: 0.13, Epoch: 0.16141655253375073, LR: 0.0003 +[2026-02-27 06:55:18] (step=0000826) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.16161220896106437, LR: 0.0003 +[2026-02-27 06:55:26] (step=0000827) Train Loss: 0.4703, Train Steps/Sec: 0.13, Epoch: 0.16180786538837802, LR: 0.0003 +[2026-02-27 06:55:34] (step=0000828) Train Loss: 0.4761, Train Steps/Sec: 0.13, Epoch: 0.16200352181569164, LR: 0.0003 +[2026-02-27 06:55:41] (step=0000829) Train Loss: 0.4843, Train Steps/Sec: 0.13, Epoch: 0.16219917824300528, LR: 0.0003 +[2026-02-27 06:55:49] (step=0000830) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 0.16239483467031893, LR: 0.0003 +[2026-02-27 06:55:57] (step=0000831) Train Loss: 0.4812, Train Steps/Sec: 0.13, Epoch: 0.16259049109763254, LR: 0.0003 +[2026-02-27 06:56:05] (step=0000832) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.1627861475249462, LR: 0.0003 +[2026-02-27 06:56:13] (step=0000833) Train Loss: 0.4739, Train Steps/Sec: 0.13, Epoch: 0.16298180395225983, LR: 0.0003 +[2026-02-27 06:56:21] (step=0000834) Train Loss: 0.4735, Train Steps/Sec: 0.13, Epoch: 0.16317746037957348, LR: 0.0003 +[2026-02-27 06:56:29] (step=0000835) Train Loss: 0.4746, Train Steps/Sec: 0.13, Epoch: 0.1633731168068871, LR: 0.0003 +[2026-02-27 06:56:37] (step=0000836) Train Loss: 0.4747, Train Steps/Sec: 0.13, Epoch: 0.16356877323420074, LR: 0.0003 +[2026-02-27 06:56:44] (step=0000837) Train Loss: 0.4718, Train Steps/Sec: 0.13, Epoch: 0.1637644296615144, LR: 0.0003 +[2026-02-27 06:56:52] (step=0000838) Train Loss: 0.4750, Train Steps/Sec: 0.13, Epoch: 0.163960086088828, LR: 0.0003 +[2026-02-27 06:57:00] (step=0000839) Train Loss: 0.4734, Train Steps/Sec: 0.13, Epoch: 0.16415574251614165, LR: 0.0003 +[2026-02-27 06:57:08] (step=0000840) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 0.1643513989434553, LR: 0.0003 +[2026-02-27 06:57:16] (step=0000841) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.16454705537076894, LR: 0.0003 +[2026-02-27 06:57:24] (step=0000842) Train Loss: 0.4903, Train Steps/Sec: 0.13, Epoch: 0.16474271179808256, LR: 0.0003 +[2026-02-27 06:57:31] (step=0000843) Train Loss: 0.4744, Train Steps/Sec: 0.13, Epoch: 0.1649383682253962, LR: 0.0003 +[2026-02-27 06:57:39] (step=0000844) Train Loss: 0.4785, Train Steps/Sec: 0.13, Epoch: 0.16513402465270985, LR: 0.0003 +[2026-02-27 06:57:47] (step=0000845) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 0.16532968108002347, LR: 0.0003 +[2026-02-27 06:57:55] (step=0000846) Train Loss: 0.4836, Train Steps/Sec: 0.13, Epoch: 0.1655253375073371, LR: 0.0003 +[2026-02-27 06:58:03] (step=0000847) Train Loss: 0.4794, Train Steps/Sec: 0.13, Epoch: 0.16572099393465076, LR: 0.0003 +[2026-02-27 06:58:11] (step=0000848) Train Loss: 0.4698, Train Steps/Sec: 0.13, Epoch: 0.1659166503619644, LR: 0.0003 +[2026-02-27 06:58:19] (step=0000849) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.16611230678927802, LR: 0.0003 +[2026-02-27 06:58:26] (step=0000850) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.16630796321659166, LR: 0.0003 +[2026-02-27 06:58:34] (step=0000851) Train Loss: 0.4802, Train Steps/Sec: 0.13, Epoch: 0.1665036196439053, LR: 0.0003 +[2026-02-27 06:58:42] (step=0000852) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 0.16669927607121893, LR: 0.0003 +[2026-02-27 06:58:50] (step=0000853) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.16689493249853257, LR: 0.0003 +[2026-02-27 06:58:58] (step=0000854) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.16709058892584622, LR: 0.0003 +[2026-02-27 06:59:06] (step=0000855) Train Loss: 0.4698, Train Steps/Sec: 0.13, Epoch: 0.16728624535315986, LR: 0.0003 +[2026-02-27 06:59:14] (step=0000856) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.16748190178047348, LR: 0.0003 +[2026-02-27 06:59:21] (step=0000857) Train Loss: 0.4709, Train Steps/Sec: 0.13, Epoch: 0.16767755820778713, LR: 0.0003 +[2026-02-27 06:59:29] (step=0000858) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.16787321463510077, LR: 0.0003 +[2026-02-27 06:59:37] (step=0000859) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 0.1680688710624144, LR: 0.0003 +[2026-02-27 06:59:45] (step=0000860) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 0.16826452748972803, LR: 0.0003 +[2026-02-27 06:59:53] (step=0000861) Train Loss: 0.4729, Train Steps/Sec: 0.13, Epoch: 0.16846018391704168, LR: 0.0003 +[2026-02-27 07:00:01] (step=0000862) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.16865584034435532, LR: 0.0003 +[2026-02-27 07:00:09] (step=0000863) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.16885149677166894, LR: 0.0003 +[2026-02-27 07:00:16] (step=0000864) Train Loss: 0.5019, Train Steps/Sec: 0.13, Epoch: 0.16904715319898259, LR: 0.0003 +[2026-02-27 07:00:24] (step=0000865) Train Loss: 0.4719, Train Steps/Sec: 0.13, Epoch: 0.16924280962629623, LR: 0.0003 +[2026-02-27 07:00:32] (step=0000866) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.16943846605360985, LR: 0.0003 +[2026-02-27 07:00:40] (step=0000867) Train Loss: 0.4794, Train Steps/Sec: 0.13, Epoch: 0.1696341224809235, LR: 0.0003 +[2026-02-27 07:00:48] (step=0000868) Train Loss: 0.4852, Train Steps/Sec: 0.13, Epoch: 0.16982977890823714, LR: 0.0003 +[2026-02-27 07:00:56] (step=0000869) Train Loss: 0.4760, Train Steps/Sec: 0.12, Epoch: 0.17002543533555078, LR: 0.0003 +[2026-02-27 07:01:04] (step=0000870) Train Loss: 0.4806, Train Steps/Sec: 0.13, Epoch: 0.1702210917628644, LR: 0.0003 +[2026-02-27 07:01:12] (step=0000871) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.17041674819017805, LR: 0.0003 +[2026-02-27 07:01:19] (step=0000872) Train Loss: 0.4747, Train Steps/Sec: 0.13, Epoch: 0.1706124046174917, LR: 0.0003 +[2026-02-27 07:01:27] (step=0000873) Train Loss: 0.4775, Train Steps/Sec: 0.13, Epoch: 0.1708080610448053, LR: 0.0003 +[2026-02-27 07:01:35] (step=0000874) Train Loss: 0.4771, Train Steps/Sec: 0.13, Epoch: 0.17100371747211895, LR: 0.0003 +[2026-02-27 07:01:43] (step=0000875) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.1711993738994326, LR: 0.0003 +[2026-02-27 07:01:51] (step=0000876) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.17139503032674624, LR: 0.0003 +[2026-02-27 07:01:59] (step=0000877) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.17159068675405986, LR: 0.0003 +[2026-02-27 07:02:07] (step=0000878) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.1717863431813735, LR: 0.0003 +[2026-02-27 07:02:14] (step=0000879) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.17198199960868715, LR: 0.0003 +[2026-02-27 07:02:22] (step=0000880) Train Loss: 0.4863, Train Steps/Sec: 0.13, Epoch: 0.17217765603600077, LR: 0.0003 +[2026-02-27 07:02:30] (step=0000881) Train Loss: 0.4852, Train Steps/Sec: 0.13, Epoch: 0.17237331246331442, LR: 0.0003 +[2026-02-27 07:02:38] (step=0000882) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 0.17256896889062806, LR: 0.0003 +[2026-02-27 07:02:46] (step=0000883) Train Loss: 0.4665, Train Steps/Sec: 0.12, Epoch: 0.1727646253179417, LR: 0.0003 +[2026-02-27 07:02:54] (step=0000884) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.17296028174525532, LR: 0.0003 +[2026-02-27 07:03:02] (step=0000885) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 0.17315593817256897, LR: 0.0003 +[2026-02-27 07:03:10] (step=0000886) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.1733515945998826, LR: 0.0003 +[2026-02-27 07:03:17] (step=0000887) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.17354725102719623, LR: 0.0003 +[2026-02-27 07:03:25] (step=0000888) Train Loss: 0.4836, Train Steps/Sec: 0.13, Epoch: 0.17374290745450988, LR: 0.0003 +[2026-02-27 07:03:33] (step=0000889) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.17393856388182352, LR: 0.0003 +[2026-02-27 07:03:41] (step=0000890) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.17413422030913717, LR: 0.0003 +[2026-02-27 07:03:49] (step=0000891) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.17432987673645078, LR: 0.0003 +[2026-02-27 07:03:57] (step=0000892) Train Loss: 0.4816, Train Steps/Sec: 0.13, Epoch: 0.17452553316376443, LR: 0.0003 +[2026-02-27 07:04:05] (step=0000893) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.17472118959107807, LR: 0.0003 +[2026-02-27 07:04:12] (step=0000894) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.1749168460183917, LR: 0.0003 +[2026-02-27 07:04:20] (step=0000895) Train Loss: 0.4726, Train Steps/Sec: 0.13, Epoch: 0.17511250244570534, LR: 0.0003 +[2026-02-27 07:04:28] (step=0000896) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.17530815887301898, LR: 0.0003 +[2026-02-27 07:04:36] (step=0000897) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.17550381530033263, LR: 0.0003 +[2026-02-27 07:04:44] (step=0000898) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.17569947172764624, LR: 0.0003 +[2026-02-27 07:04:52] (step=0000899) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.1758951281549599, LR: 0.0003 +[2026-02-27 07:05:00] (step=0000900) Train Loss: 0.4857, Train Steps/Sec: 0.13, Epoch: 0.17609078458227354, LR: 0.0003 +[2026-02-27 07:05:07] (step=0000901) Train Loss: 0.4909, Train Steps/Sec: 0.13, Epoch: 0.17628644100958715, LR: 0.0003 +[2026-02-27 07:05:15] (step=0000902) Train Loss: 0.4717, Train Steps/Sec: 0.13, Epoch: 0.1764820974369008, LR: 0.0003 +[2026-02-27 07:05:23] (step=0000903) Train Loss: 0.4741, Train Steps/Sec: 0.13, Epoch: 0.17667775386421444, LR: 0.0003 +[2026-02-27 07:05:31] (step=0000904) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.1768734102915281, LR: 0.0003 +[2026-02-27 07:05:39] (step=0000905) Train Loss: 0.4789, Train Steps/Sec: 0.13, Epoch: 0.1770690667188417, LR: 0.0003 +[2026-02-27 07:05:47] (step=0000906) Train Loss: 0.4813, Train Steps/Sec: 0.13, Epoch: 0.17726472314615535, LR: 0.0003 +[2026-02-27 07:05:54] (step=0000907) Train Loss: 0.4824, Train Steps/Sec: 0.13, Epoch: 0.177460379573469, LR: 0.0003 +[2026-02-27 07:06:02] (step=0000908) Train Loss: 0.4713, Train Steps/Sec: 0.13, Epoch: 0.1776560360007826, LR: 0.0003 +[2026-02-27 07:06:10] (step=0000909) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.17785169242809626, LR: 0.0003 +[2026-02-27 07:06:18] (step=0000910) Train Loss: 0.4804, Train Steps/Sec: 0.13, Epoch: 0.1780473488554099, LR: 0.0003 +[2026-02-27 07:06:26] (step=0000911) Train Loss: 0.4813, Train Steps/Sec: 0.13, Epoch: 0.17824300528272355, LR: 0.0003 +[2026-02-27 07:06:34] (step=0000912) Train Loss: 0.4876, Train Steps/Sec: 0.13, Epoch: 0.17843866171003717, LR: 0.0003 +[2026-02-27 07:06:42] (step=0000913) Train Loss: 0.4771, Train Steps/Sec: 0.13, Epoch: 0.1786343181373508, LR: 0.0003 +[2026-02-27 07:06:50] (step=0000914) Train Loss: 0.4742, Train Steps/Sec: 0.13, Epoch: 0.17882997456466446, LR: 0.0003 +[2026-02-27 07:06:57] (step=0000915) Train Loss: 0.4814, Train Steps/Sec: 0.13, Epoch: 0.17902563099197807, LR: 0.0003 +[2026-02-27 07:07:05] (step=0000916) Train Loss: 0.4799, Train Steps/Sec: 0.13, Epoch: 0.17922128741929172, LR: 0.0003 +[2026-02-27 07:07:13] (step=0000917) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.17941694384660536, LR: 0.0003 +[2026-02-27 07:07:21] (step=0000918) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.179612600273919, LR: 0.0003 +[2026-02-27 07:07:29] (step=0000919) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.17980825670123263, LR: 0.0003 +[2026-02-27 07:07:37] (step=0000920) Train Loss: 0.4740, Train Steps/Sec: 0.13, Epoch: 0.18000391312854627, LR: 0.0003 +[2026-02-27 07:07:45] (step=0000921) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.18019956955585992, LR: 0.0003 +[2026-02-27 07:07:52] (step=0000922) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 0.18039522598317353, LR: 0.0003 +[2026-02-27 07:08:00] (step=0000923) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 0.18059088241048718, LR: 0.0003 +[2026-02-27 07:08:08] (step=0000924) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.18078653883780083, LR: 0.0003 +[2026-02-27 07:08:16] (step=0000925) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.18098219526511447, LR: 0.0003 +[2026-02-27 07:08:24] (step=0000926) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.1811778516924281, LR: 0.0003 +[2026-02-27 07:08:32] (step=0000927) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 0.18137350811974173, LR: 0.0003 +[2026-02-27 07:08:40] (step=0000928) Train Loss: 0.4756, Train Steps/Sec: 0.13, Epoch: 0.18156916454705538, LR: 0.0003 +[2026-02-27 07:08:47] (step=0000929) Train Loss: 0.4837, Train Steps/Sec: 0.13, Epoch: 0.181764820974369, LR: 0.0003 +[2026-02-27 07:08:55] (step=0000930) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.18196047740168264, LR: 0.0003 +[2026-02-27 07:09:03] (step=0000931) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.1821561338289963, LR: 0.0003 +[2026-02-27 07:09:11] (step=0000932) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.18235179025630993, LR: 0.0003 +[2026-02-27 07:09:19] (step=0000933) Train Loss: 0.4728, Train Steps/Sec: 0.13, Epoch: 0.18254744668362355, LR: 0.0003 +[2026-02-27 07:09:27] (step=0000934) Train Loss: 0.4732, Train Steps/Sec: 0.13, Epoch: 0.1827431031109372, LR: 0.0003 +[2026-02-27 07:09:35] (step=0000935) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.18293875953825084, LR: 0.0003 +[2026-02-27 07:09:42] (step=0000936) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.18313441596556446, LR: 0.0003 +[2026-02-27 07:09:50] (step=0000937) Train Loss: 0.4720, Train Steps/Sec: 0.13, Epoch: 0.1833300723928781, LR: 0.0003 +[2026-02-27 07:09:58] (step=0000938) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 0.18352572882019175, LR: 0.0003 +[2026-02-27 07:10:06] (step=0000939) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 0.1837213852475054, LR: 0.0003 +[2026-02-27 07:10:14] (step=0000940) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.183917041674819, LR: 0.0003 +[2026-02-27 07:10:22] (step=0000941) Train Loss: 0.4776, Train Steps/Sec: 0.13, Epoch: 0.18411269810213265, LR: 0.0003 +[2026-02-27 07:10:30] (step=0000942) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.1843083545294463, LR: 0.0003 +[2026-02-27 07:10:37] (step=0000943) Train Loss: 0.4935, Train Steps/Sec: 0.13, Epoch: 0.18450401095675992, LR: 0.0003 +[2026-02-27 07:10:45] (step=0000944) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.18469966738407356, LR: 0.0003 +[2026-02-27 07:10:53] (step=0000945) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.1848953238113872, LR: 0.0003 +[2026-02-27 07:11:01] (step=0000946) Train Loss: 0.4755, Train Steps/Sec: 0.13, Epoch: 0.18509098023870085, LR: 0.0003 +[2026-02-27 07:11:09] (step=0000947) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 0.18528663666601447, LR: 0.0003 +[2026-02-27 07:11:17] (step=0000948) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.18548229309332812, LR: 0.0003 +[2026-02-27 07:11:25] (step=0000949) Train Loss: 0.4822, Train Steps/Sec: 0.13, Epoch: 0.18567794952064176, LR: 0.0003 +[2026-02-27 07:11:32] (step=0000950) Train Loss: 0.4726, Train Steps/Sec: 0.13, Epoch: 0.18587360594795538, LR: 0.0003 +[2026-02-27 07:11:40] (step=0000951) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 0.18606926237526902, LR: 0.0003 +[2026-02-27 07:11:48] (step=0000952) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 0.18626491880258267, LR: 0.0003 +[2026-02-27 07:11:56] (step=0000953) Train Loss: 0.4729, Train Steps/Sec: 0.13, Epoch: 0.1864605752298963, LR: 0.0003 +[2026-02-27 07:12:04] (step=0000954) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.18665623165720993, LR: 0.0003 +[2026-02-27 07:12:12] (step=0000955) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.18685188808452358, LR: 0.0003 +[2026-02-27 07:12:20] (step=0000956) Train Loss: 0.4777, Train Steps/Sec: 0.13, Epoch: 0.18704754451183722, LR: 0.0003 +[2026-02-27 07:12:27] (step=0000957) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 0.18724320093915084, LR: 0.0003 +[2026-02-27 07:12:35] (step=0000958) Train Loss: 0.4830, Train Steps/Sec: 0.13, Epoch: 0.18743885736646448, LR: 0.0003 +[2026-02-27 07:12:43] (step=0000959) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.18763451379377813, LR: 0.0003 +[2026-02-27 07:12:51] (step=0000960) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.18783017022109177, LR: 0.0003 +[2026-02-27 07:12:59] (step=0000961) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.1880258266484054, LR: 0.0003 +[2026-02-27 07:13:07] (step=0000962) Train Loss: 0.4689, Train Steps/Sec: 0.12, Epoch: 0.18822148307571904, LR: 0.0003 +[2026-02-27 07:13:15] (step=0000963) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.18841713950303268, LR: 0.0003 +[2026-02-27 07:13:22] (step=0000964) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.1886127959303463, LR: 0.0003 +[2026-02-27 07:13:30] (step=0000965) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.18880845235765994, LR: 0.0003 +[2026-02-27 07:13:38] (step=0000966) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.1890041087849736, LR: 0.0003 +[2026-02-27 07:13:46] (step=0000967) Train Loss: 0.4775, Train Steps/Sec: 0.13, Epoch: 0.18919976521228724, LR: 0.0003 +[2026-02-27 07:13:54] (step=0000968) Train Loss: 0.4854, Train Steps/Sec: 0.13, Epoch: 0.18939542163960085, LR: 0.0003 +[2026-02-27 07:14:02] (step=0000969) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.1895910780669145, LR: 0.0003 +[2026-02-27 07:14:10] (step=0000970) Train Loss: 0.4713, Train Steps/Sec: 0.13, Epoch: 0.18978673449422814, LR: 0.0003 +[2026-02-27 07:14:18] (step=0000971) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.18998239092154176, LR: 0.0003 +[2026-02-27 07:14:25] (step=0000972) Train Loss: 0.4804, Train Steps/Sec: 0.13, Epoch: 0.1901780473488554, LR: 0.0003 +[2026-02-27 07:14:33] (step=0000973) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.19037370377616905, LR: 0.0003 +[2026-02-27 07:14:41] (step=0000974) Train Loss: 0.4798, Train Steps/Sec: 0.13, Epoch: 0.1905693602034827, LR: 0.0003 +[2026-02-27 07:14:49] (step=0000975) Train Loss: 0.4820, Train Steps/Sec: 0.13, Epoch: 0.1907650166307963, LR: 0.0003 +[2026-02-27 07:14:57] (step=0000976) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.19096067305810996, LR: 0.0003 +[2026-02-27 07:15:05] (step=0000977) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.1911563294854236, LR: 0.0003 +[2026-02-27 07:15:12] (step=0000978) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.19135198591273722, LR: 0.0003 +[2026-02-27 07:15:20] (step=0000979) Train Loss: 0.4766, Train Steps/Sec: 0.13, Epoch: 0.19154764234005087, LR: 0.0003 +[2026-02-27 07:15:28] (step=0000980) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.1917432987673645, LR: 0.0003 +[2026-02-27 07:15:36] (step=0000981) Train Loss: 0.4721, Train Steps/Sec: 0.13, Epoch: 0.19193895519467816, LR: 0.0003 +[2026-02-27 07:15:44] (step=0000982) Train Loss: 0.4738, Train Steps/Sec: 0.13, Epoch: 0.19213461162199177, LR: 0.0003 +[2026-02-27 07:15:52] (step=0000983) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.19233026804930542, LR: 0.0003 +[2026-02-27 07:16:00] (step=0000984) Train Loss: 0.4719, Train Steps/Sec: 0.13, Epoch: 0.19252592447661906, LR: 0.0003 +[2026-02-27 07:16:08] (step=0000985) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.19272158090393268, LR: 0.0003 +[2026-02-27 07:16:15] (step=0000986) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.19291723733124633, LR: 0.0003 +[2026-02-27 07:16:23] (step=0000987) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.19311289375855997, LR: 0.0003 +[2026-02-27 07:16:31] (step=0000988) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.19330855018587362, LR: 0.0003 +[2026-02-27 07:16:39] (step=0000989) Train Loss: 0.4729, Train Steps/Sec: 0.13, Epoch: 0.19350420661318724, LR: 0.0003 +[2026-02-27 07:16:47] (step=0000990) Train Loss: 0.4740, Train Steps/Sec: 0.13, Epoch: 0.19369986304050088, LR: 0.0003 +[2026-02-27 07:16:55] (step=0000991) Train Loss: 0.4861, Train Steps/Sec: 0.13, Epoch: 0.19389551946781453, LR: 0.0003 +[2026-02-27 07:17:03] (step=0000992) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 0.19409117589512814, LR: 0.0003 +[2026-02-27 07:17:10] (step=0000993) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.1942868323224418, LR: 0.0003 +[2026-02-27 07:17:18] (step=0000994) Train Loss: 0.4796, Train Steps/Sec: 0.13, Epoch: 0.19448248874975543, LR: 0.0003 +[2026-02-27 07:17:26] (step=0000995) Train Loss: 0.4725, Train Steps/Sec: 0.13, Epoch: 0.19467814517706908, LR: 0.0003 +[2026-02-27 07:17:34] (step=0000996) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.1948738016043827, LR: 0.0003 +[2026-02-27 07:17:42] (step=0000997) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.19506945803169634, LR: 0.0003 +[2026-02-27 07:17:50] (step=0000998) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.19526511445901, LR: 0.0003 +[2026-02-27 07:17:57] (step=0000999) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.1954607708863236, LR: 0.0003 +[2026-02-27 07:18:05] (step=0001000) Train Loss: 0.4777, Train Steps/Sec: 0.13, Epoch: 0.19565642731363725, LR: 0.0003 +[2026-02-27 07:18:05] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0001000/ +[2026-02-27 07:18:13] (step=0001001) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 0.1958520837409509, LR: 0.0003 +[2026-02-27 07:18:21] (step=0001002) Train Loss: 0.4832, Train Steps/Sec: 0.13, Epoch: 0.19604774016826454, LR: 0.0003 +[2026-02-27 07:18:29] (step=0001003) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.19624339659557816, LR: 0.0003 +[2026-02-27 07:18:37] (step=0001004) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.1964390530228918, LR: 0.0003 +[2026-02-27 07:18:45] (step=0001005) Train Loss: 0.4788, Train Steps/Sec: 0.13, Epoch: 0.19663470945020545, LR: 0.0003 +[2026-02-27 07:18:52] (step=0001006) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.19683036587751906, LR: 0.0003 +[2026-02-27 07:19:00] (step=0001007) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 0.1970260223048327, LR: 0.0003 +[2026-02-27 07:19:08] (step=0001008) Train Loss: 0.4744, Train Steps/Sec: 0.13, Epoch: 0.19722167873214635, LR: 0.0003 +[2026-02-27 07:19:16] (step=0001009) Train Loss: 0.4739, Train Steps/Sec: 0.13, Epoch: 0.19741733515946, LR: 0.0003 +[2026-02-27 07:19:24] (step=0001010) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.19761299158677362, LR: 0.0003 +[2026-02-27 07:19:32] (step=0001011) Train Loss: 0.4828, Train Steps/Sec: 0.13, Epoch: 0.19780864801408726, LR: 0.0003 +[2026-02-27 07:19:40] (step=0001012) Train Loss: 0.4717, Train Steps/Sec: 0.13, Epoch: 0.1980043044414009, LR: 0.0003 +[2026-02-27 07:19:48] (step=0001013) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.19819996086871453, LR: 0.0003 +[2026-02-27 07:19:55] (step=0001014) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.19839561729602817, LR: 0.0003 +[2026-02-27 07:20:03] (step=0001015) Train Loss: 0.4745, Train Steps/Sec: 0.13, Epoch: 0.19859127372334182, LR: 0.0003 +[2026-02-27 07:20:11] (step=0001016) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.19878693015065546, LR: 0.0003 +[2026-02-27 07:20:19] (step=0001017) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.19898258657796908, LR: 0.0003 +[2026-02-27 07:20:27] (step=0001018) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.19917824300528272, LR: 0.0003 +[2026-02-27 07:20:35] (step=0001019) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.19937389943259637, LR: 0.0003 +[2026-02-27 07:20:43] (step=0001020) Train Loss: 0.4789, Train Steps/Sec: 0.13, Epoch: 0.19956955585991, LR: 0.0003 +[2026-02-27 07:20:50] (step=0001021) Train Loss: 0.4777, Train Steps/Sec: 0.13, Epoch: 0.19976521228722363, LR: 0.0003 +[2026-02-27 07:20:58] (step=0001022) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 0.19996086871453728, LR: 0.0003 +[2026-02-27 07:21:06] (step=0001023) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.20015652514185092, LR: 0.0003 +[2026-02-27 07:21:14] (step=0001024) Train Loss: 0.4755, Train Steps/Sec: 0.13, Epoch: 0.20035218156916454, LR: 0.0003 +[2026-02-27 07:21:22] (step=0001025) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.20054783799647818, LR: 0.0003 +[2026-02-27 07:21:30] (step=0001026) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.20074349442379183, LR: 0.0003 +[2026-02-27 07:21:38] (step=0001027) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 0.20093915085110545, LR: 0.0003 +[2026-02-27 07:21:45] (step=0001028) Train Loss: 0.4722, Train Steps/Sec: 0.13, Epoch: 0.2011348072784191, LR: 0.0003 +[2026-02-27 07:21:53] (step=0001029) Train Loss: 0.4774, Train Steps/Sec: 0.13, Epoch: 0.20133046370573274, LR: 0.0003 +[2026-02-27 07:22:01] (step=0001030) Train Loss: 0.4748, Train Steps/Sec: 0.13, Epoch: 0.20152612013304638, LR: 0.0003 +[2026-02-27 07:22:09] (step=0001031) Train Loss: 0.4728, Train Steps/Sec: 0.13, Epoch: 0.20172177656036, LR: 0.0003 +[2026-02-27 07:22:17] (step=0001032) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.20191743298767365, LR: 0.0003 +[2026-02-27 07:22:25] (step=0001033) Train Loss: 0.4877, Train Steps/Sec: 0.12, Epoch: 0.2021130894149873, LR: 0.0003 +[2026-02-27 07:22:33] (step=0001034) Train Loss: 0.4801, Train Steps/Sec: 0.13, Epoch: 0.2023087458423009, LR: 0.0003 +[2026-02-27 07:22:41] (step=0001035) Train Loss: 0.4777, Train Steps/Sec: 0.13, Epoch: 0.20250440226961455, LR: 0.0003 +[2026-02-27 07:22:48] (step=0001036) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.2027000586969282, LR: 0.0003 +[2026-02-27 07:22:56] (step=0001037) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.20289571512424184, LR: 0.0003 +[2026-02-27 07:23:04] (step=0001038) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.20309137155155546, LR: 0.0003 +[2026-02-27 07:23:12] (step=0001039) Train Loss: 0.4722, Train Steps/Sec: 0.13, Epoch: 0.2032870279788691, LR: 0.0003 +[2026-02-27 07:23:20] (step=0001040) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.20348268440618275, LR: 0.0003 +[2026-02-27 07:23:28] (step=0001041) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.20367834083349637, LR: 0.0003 +[2026-02-27 07:23:36] (step=0001042) Train Loss: 0.4799, Train Steps/Sec: 0.13, Epoch: 0.20387399726081, LR: 0.0003 +[2026-02-27 07:23:43] (step=0001043) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.20406965368812366, LR: 0.0003 +[2026-02-27 07:23:51] (step=0001044) Train Loss: 0.4747, Train Steps/Sec: 0.13, Epoch: 0.2042653101154373, LR: 0.0003 +[2026-02-27 07:23:59] (step=0001045) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.20446096654275092, LR: 0.0003 +[2026-02-27 07:24:07] (step=0001046) Train Loss: 0.4840, Train Steps/Sec: 0.13, Epoch: 0.20465662297006457, LR: 0.0003 +[2026-02-27 07:24:15] (step=0001047) Train Loss: 0.4736, Train Steps/Sec: 0.13, Epoch: 0.2048522793973782, LR: 0.0003 +[2026-02-27 07:24:23] (step=0001048) Train Loss: 0.4773, Train Steps/Sec: 0.13, Epoch: 0.20504793582469183, LR: 0.0003 +[2026-02-27 07:24:31] (step=0001049) Train Loss: 0.4739, Train Steps/Sec: 0.13, Epoch: 0.20524359225200547, LR: 0.0003 +[2026-02-27 07:24:38] (step=0001050) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.20543924867931912, LR: 0.0003 +[2026-02-27 07:24:46] (step=0001051) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.20563490510663276, LR: 0.0003 +[2026-02-27 07:24:54] (step=0001052) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.20583056153394638, LR: 0.0003 +[2026-02-27 07:25:02] (step=0001053) Train Loss: 0.4730, Train Steps/Sec: 0.13, Epoch: 0.20602621796126003, LR: 0.0003 +[2026-02-27 07:25:10] (step=0001054) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.20622187438857367, LR: 0.0003 +[2026-02-27 07:25:18] (step=0001055) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.2064175308158873, LR: 0.0003 +[2026-02-27 07:25:25] (step=0001056) Train Loss: 0.4738, Train Steps/Sec: 0.13, Epoch: 0.20661318724320094, LR: 0.0003 +[2026-02-27 07:25:33] (step=0001057) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.20680884367051458, LR: 0.0003 +[2026-02-27 07:25:41] (step=0001058) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 0.20700450009782823, LR: 0.0003 +[2026-02-27 07:25:49] (step=0001059) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.20720015652514184, LR: 0.0003 +[2026-02-27 07:25:57] (step=0001060) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.2073958129524555, LR: 0.0003 +[2026-02-27 07:26:05] (step=0001061) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.20759146937976913, LR: 0.0003 +[2026-02-27 07:26:13] (step=0001062) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.20778712580708275, LR: 0.0003 +[2026-02-27 07:26:20] (step=0001063) Train Loss: 0.4714, Train Steps/Sec: 0.13, Epoch: 0.2079827822343964, LR: 0.0003 +[2026-02-27 07:26:28] (step=0001064) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.20817843866171004, LR: 0.0003 +[2026-02-27 07:26:36] (step=0001065) Train Loss: 0.4722, Train Steps/Sec: 0.13, Epoch: 0.2083740950890237, LR: 0.0003 +[2026-02-27 07:26:44] (step=0001066) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.2085697515163373, LR: 0.0003 +[2026-02-27 07:26:52] (step=0001067) Train Loss: 0.4719, Train Steps/Sec: 0.13, Epoch: 0.20876540794365095, LR: 0.0003 +[2026-02-27 07:27:00] (step=0001068) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.2089610643709646, LR: 0.0003 +[2026-02-27 07:27:08] (step=0001069) Train Loss: 0.4718, Train Steps/Sec: 0.13, Epoch: 0.2091567207982782, LR: 0.0003 +[2026-02-27 07:27:16] (step=0001070) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.20935237722559186, LR: 0.0003 +[2026-02-27 07:27:23] (step=0001071) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.2095480336529055, LR: 0.0003 +[2026-02-27 07:27:31] (step=0001072) Train Loss: 0.4764, Train Steps/Sec: 0.13, Epoch: 0.20974369008021915, LR: 0.0003 +[2026-02-27 07:27:39] (step=0001073) Train Loss: 0.4761, Train Steps/Sec: 0.13, Epoch: 0.20993934650753276, LR: 0.0003 +[2026-02-27 07:27:47] (step=0001074) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 0.2101350029348464, LR: 0.0003 +[2026-02-27 07:27:55] (step=0001075) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.21033065936216006, LR: 0.0003 +[2026-02-27 07:28:03] (step=0001076) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.21052631578947367, LR: 0.0003 +[2026-02-27 07:28:10] (step=0001077) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.21072197221678732, LR: 0.0003 +[2026-02-27 07:28:18] (step=0001078) Train Loss: 0.4763, Train Steps/Sec: 0.13, Epoch: 0.21091762864410096, LR: 0.0003 +[2026-02-27 07:28:26] (step=0001079) Train Loss: 0.4717, Train Steps/Sec: 0.13, Epoch: 0.2111132850714146, LR: 0.0003 +[2026-02-27 07:28:34] (step=0001080) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.21130894149872823, LR: 0.0003 +[2026-02-27 07:28:42] (step=0001081) Train Loss: 0.4767, Train Steps/Sec: 0.13, Epoch: 0.21150459792604187, LR: 0.0003 +[2026-02-27 07:28:50] (step=0001082) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.21170025435335552, LR: 0.0003 +[2026-02-27 07:28:58] (step=0001083) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.21189591078066913, LR: 0.0003 +[2026-02-27 07:29:06] (step=0001084) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 0.21209156720798278, LR: 0.0003 +[2026-02-27 07:29:13] (step=0001085) Train Loss: 0.4765, Train Steps/Sec: 0.13, Epoch: 0.21228722363529642, LR: 0.0003 +[2026-02-27 07:29:21] (step=0001086) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.21248288006261007, LR: 0.0003 +[2026-02-27 07:29:29] (step=0001087) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.2126785364899237, LR: 0.0003 +[2026-02-27 07:29:37] (step=0001088) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.21287419291723733, LR: 0.0003 +[2026-02-27 07:29:45] (step=0001089) Train Loss: 0.4762, Train Steps/Sec: 0.13, Epoch: 0.21306984934455098, LR: 0.0003 +[2026-02-27 07:29:53] (step=0001090) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 0.2132655057718646, LR: 0.0003 +[2026-02-27 07:30:00] (step=0001091) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.21346116219917824, LR: 0.0003 +[2026-02-27 07:30:08] (step=0001092) Train Loss: 0.4742, Train Steps/Sec: 0.13, Epoch: 0.21365681862649188, LR: 0.0003 +[2026-02-27 07:30:16] (step=0001093) Train Loss: 0.4693, Train Steps/Sec: 0.13, Epoch: 0.21385247505380553, LR: 0.0003 +[2026-02-27 07:30:24] (step=0001094) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.21404813148111915, LR: 0.0003 +[2026-02-27 07:30:32] (step=0001095) Train Loss: 0.4714, Train Steps/Sec: 0.13, Epoch: 0.2142437879084328, LR: 0.0003 +[2026-02-27 07:30:40] (step=0001096) Train Loss: 0.4764, Train Steps/Sec: 0.13, Epoch: 0.21443944433574644, LR: 0.0003 +[2026-02-27 07:30:48] (step=0001097) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.21463510076306005, LR: 0.0003 +[2026-02-27 07:30:56] (step=0001098) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.2148307571903737, LR: 0.0003 +[2026-02-27 07:31:03] (step=0001099) Train Loss: 0.4779, Train Steps/Sec: 0.13, Epoch: 0.21502641361768735, LR: 0.0003 +[2026-02-27 07:31:11] (step=0001100) Train Loss: 0.4745, Train Steps/Sec: 0.13, Epoch: 0.215222070045001, LR: 0.0003 +[2026-02-27 07:31:19] (step=0001101) Train Loss: 0.4800, Train Steps/Sec: 0.13, Epoch: 0.2154177264723146, LR: 0.0003 +[2026-02-27 07:31:27] (step=0001102) Train Loss: 0.4787, Train Steps/Sec: 0.13, Epoch: 0.21561338289962825, LR: 0.0003 +[2026-02-27 07:31:35] (step=0001103) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.2158090393269419, LR: 0.0003 +[2026-02-27 07:31:43] (step=0001104) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.21600469575425552, LR: 0.0003 +[2026-02-27 07:31:50] (step=0001105) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.21620035218156916, LR: 0.0003 +[2026-02-27 07:31:58] (step=0001106) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.2163960086088828, LR: 0.0003 +[2026-02-27 07:32:06] (step=0001107) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 0.21659166503619645, LR: 0.0003 +[2026-02-27 07:32:14] (step=0001108) Train Loss: 0.4809, Train Steps/Sec: 0.13, Epoch: 0.21678732146351007, LR: 0.0003 +[2026-02-27 07:32:22] (step=0001109) Train Loss: 0.4842, Train Steps/Sec: 0.13, Epoch: 0.21698297789082371, LR: 0.0003 +[2026-02-27 07:32:30] (step=0001110) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.21717863431813736, LR: 0.0003 +[2026-02-27 07:32:38] (step=0001111) Train Loss: 0.4734, Train Steps/Sec: 0.13, Epoch: 0.21737429074545098, LR: 0.0003 +[2026-02-27 07:32:46] (step=0001112) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.21756994717276462, LR: 0.0003 +[2026-02-27 07:32:53] (step=0001113) Train Loss: 0.4729, Train Steps/Sec: 0.13, Epoch: 0.21776560360007827, LR: 0.0003 +[2026-02-27 07:33:01] (step=0001114) Train Loss: 0.4774, Train Steps/Sec: 0.13, Epoch: 0.2179612600273919, LR: 0.0003 +[2026-02-27 07:33:09] (step=0001115) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.21815691645470553, LR: 0.0003 +[2026-02-27 07:33:17] (step=0001116) Train Loss: 0.4714, Train Steps/Sec: 0.13, Epoch: 0.21835257288201917, LR: 0.0003 +[2026-02-27 07:33:25] (step=0001117) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.21854822930933282, LR: 0.0003 +[2026-02-27 07:33:33] (step=0001118) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.21874388573664644, LR: 0.0003 +[2026-02-27 07:33:41] (step=0001119) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.21893954216396008, LR: 0.0003 +[2026-02-27 07:33:48] (step=0001120) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 0.21913519859127373, LR: 0.0003 +[2026-02-27 07:33:56] (step=0001121) Train Loss: 0.4729, Train Steps/Sec: 0.13, Epoch: 0.21933085501858737, LR: 0.0003 +[2026-02-27 07:34:04] (step=0001122) Train Loss: 0.4801, Train Steps/Sec: 0.13, Epoch: 0.219526511445901, LR: 0.0003 +[2026-02-27 07:34:12] (step=0001123) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.21972216787321464, LR: 0.0003 +[2026-02-27 07:34:20] (step=0001124) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.21991782430052828, LR: 0.0003 +[2026-02-27 07:34:28] (step=0001125) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.2201134807278419, LR: 0.0003 +[2026-02-27 07:34:36] (step=0001126) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.22030913715515554, LR: 0.0003 +[2026-02-27 07:34:44] (step=0001127) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.2205047935824692, LR: 0.0003 +[2026-02-27 07:34:51] (step=0001128) Train Loss: 0.4816, Train Steps/Sec: 0.13, Epoch: 0.22070045000978283, LR: 0.0003 +[2026-02-27 07:34:59] (step=0001129) Train Loss: 0.4756, Train Steps/Sec: 0.13, Epoch: 0.22089610643709645, LR: 0.0003 +[2026-02-27 07:35:07] (step=0001130) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.2210917628644101, LR: 0.0003 +[2026-02-27 07:35:15] (step=0001131) Train Loss: 0.4768, Train Steps/Sec: 0.13, Epoch: 0.22128741929172374, LR: 0.0003 +[2026-02-27 07:35:23] (step=0001132) Train Loss: 0.4719, Train Steps/Sec: 0.13, Epoch: 0.22148307571903736, LR: 0.0003 +[2026-02-27 07:35:31] (step=0001133) Train Loss: 0.4832, Train Steps/Sec: 0.13, Epoch: 0.221678732146351, LR: 0.0003 +[2026-02-27 07:35:39] (step=0001134) Train Loss: 0.4755, Train Steps/Sec: 0.13, Epoch: 0.22187438857366465, LR: 0.0003 +[2026-02-27 07:35:46] (step=0001135) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.2220700450009783, LR: 0.0003 +[2026-02-27 07:35:54] (step=0001136) Train Loss: 0.4718, Train Steps/Sec: 0.13, Epoch: 0.2222657014282919, LR: 0.0003 +[2026-02-27 07:36:02] (step=0001137) Train Loss: 0.4781, Train Steps/Sec: 0.13, Epoch: 0.22246135785560556, LR: 0.0003 +[2026-02-27 07:36:10] (step=0001138) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.2226570142829192, LR: 0.0003 +[2026-02-27 07:36:18] (step=0001139) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.22285267071023282, LR: 0.0003 +[2026-02-27 07:36:26] (step=0001140) Train Loss: 0.4713, Train Steps/Sec: 0.13, Epoch: 0.22304832713754646, LR: 0.0003 +[2026-02-27 07:36:33] (step=0001141) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 0.2232439835648601, LR: 0.0003 +[2026-02-27 07:36:41] (step=0001142) Train Loss: 0.4792, Train Steps/Sec: 0.13, Epoch: 0.22343963999217376, LR: 0.0003 +[2026-02-27 07:36:49] (step=0001143) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.22363529641948737, LR: 0.0003 +[2026-02-27 07:36:57] (step=0001144) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.22383095284680102, LR: 0.0003 +[2026-02-27 07:37:05] (step=0001145) Train Loss: 0.4769, Train Steps/Sec: 0.13, Epoch: 0.22402660927411466, LR: 0.0003 +[2026-02-27 07:37:13] (step=0001146) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 0.22422226570142828, LR: 0.0003 +[2026-02-27 07:37:21] (step=0001147) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.22441792212874193, LR: 0.0003 +[2026-02-27 07:37:28] (step=0001148) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.22461357855605557, LR: 0.0003 +[2026-02-27 07:37:36] (step=0001149) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.22480923498336922, LR: 0.0003 +[2026-02-27 07:37:44] (step=0001150) Train Loss: 0.4777, Train Steps/Sec: 0.13, Epoch: 0.22500489141068283, LR: 0.0003 +[2026-02-27 07:37:52] (step=0001151) Train Loss: 0.4736, Train Steps/Sec: 0.13, Epoch: 0.22520054783799648, LR: 0.0003 +[2026-02-27 07:38:00] (step=0001152) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.22539620426531012, LR: 0.0003 +[2026-02-27 07:38:08] (step=0001153) Train Loss: 0.4734, Train Steps/Sec: 0.13, Epoch: 0.22559186069262374, LR: 0.0003 +[2026-02-27 07:38:15] (step=0001154) Train Loss: 0.4768, Train Steps/Sec: 0.13, Epoch: 0.2257875171199374, LR: 0.0003 +[2026-02-27 07:38:23] (step=0001155) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.22598317354725103, LR: 0.0003 +[2026-02-27 07:38:31] (step=0001156) Train Loss: 0.4784, Train Steps/Sec: 0.13, Epoch: 0.22617882997456468, LR: 0.0003 +[2026-02-27 07:38:39] (step=0001157) Train Loss: 0.4743, Train Steps/Sec: 0.13, Epoch: 0.2263744864018783, LR: 0.0003 +[2026-02-27 07:38:47] (step=0001158) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.22657014282919194, LR: 0.0003 +[2026-02-27 07:38:55] (step=0001159) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.22676579925650558, LR: 0.0003 +[2026-02-27 07:39:03] (step=0001160) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.2269614556838192, LR: 0.0003 +[2026-02-27 07:39:11] (step=0001161) Train Loss: 0.4728, Train Steps/Sec: 0.13, Epoch: 0.22715711211113285, LR: 0.0003 +[2026-02-27 07:39:18] (step=0001162) Train Loss: 0.4752, Train Steps/Sec: 0.13, Epoch: 0.2273527685384465, LR: 0.0003 +[2026-02-27 07:39:26] (step=0001163) Train Loss: 0.4702, Train Steps/Sec: 0.13, Epoch: 0.22754842496576014, LR: 0.0003 +[2026-02-27 07:39:34] (step=0001164) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.22774408139307376, LR: 0.0003 +[2026-02-27 07:39:42] (step=0001165) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.2279397378203874, LR: 0.0003 +[2026-02-27 07:39:50] (step=0001166) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.22813539424770105, LR: 0.0003 +[2026-02-27 07:39:58] (step=0001167) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.22833105067501466, LR: 0.0003 +[2026-02-27 07:40:06] (step=0001168) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.2285267071023283, LR: 0.0003 +[2026-02-27 07:40:13] (step=0001169) Train Loss: 0.4785, Train Steps/Sec: 0.13, Epoch: 0.22872236352964195, LR: 0.0003 +[2026-02-27 07:40:21] (step=0001170) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 0.2289180199569556, LR: 0.0003 +[2026-02-27 07:40:29] (step=0001171) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.22911367638426922, LR: 0.0003 +[2026-02-27 07:40:37] (step=0001172) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.22930933281158286, LR: 0.0003 +[2026-02-27 07:40:45] (step=0001173) Train Loss: 0.4758, Train Steps/Sec: 0.13, Epoch: 0.2295049892388965, LR: 0.0003 +[2026-02-27 07:40:53] (step=0001174) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.22970064566621012, LR: 0.0003 +[2026-02-27 07:41:01] (step=0001175) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 0.22989630209352377, LR: 0.0003 +[2026-02-27 07:41:08] (step=0001176) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.23009195852083741, LR: 0.0003 +[2026-02-27 07:41:16] (step=0001177) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.23028761494815106, LR: 0.0003 +[2026-02-27 07:41:24] (step=0001178) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.23048327137546468, LR: 0.0003 +[2026-02-27 07:41:32] (step=0001179) Train Loss: 0.4758, Train Steps/Sec: 0.13, Epoch: 0.23067892780277832, LR: 0.0003 +[2026-02-27 07:41:40] (step=0001180) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.23087458423009197, LR: 0.0003 +[2026-02-27 07:41:48] (step=0001181) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.23107024065740558, LR: 0.0003 +[2026-02-27 07:41:56] (step=0001182) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 0.23126589708471923, LR: 0.0003 +[2026-02-27 07:42:03] (step=0001183) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.23146155351203287, LR: 0.0003 +[2026-02-27 07:42:11] (step=0001184) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 0.23165720993934652, LR: 0.0003 +[2026-02-27 07:42:19] (step=0001185) Train Loss: 0.4774, Train Steps/Sec: 0.13, Epoch: 0.23185286636666014, LR: 0.0003 +[2026-02-27 07:42:27] (step=0001186) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 0.23204852279397378, LR: 0.0003 +[2026-02-27 07:42:35] (step=0001187) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.23224417922128743, LR: 0.0003 +[2026-02-27 07:42:43] (step=0001188) Train Loss: 0.4720, Train Steps/Sec: 0.13, Epoch: 0.23243983564860105, LR: 0.0003 +[2026-02-27 07:42:51] (step=0001189) Train Loss: 0.4741, Train Steps/Sec: 0.13, Epoch: 0.2326354920759147, LR: 0.0003 +[2026-02-27 07:42:58] (step=0001190) Train Loss: 0.4702, Train Steps/Sec: 0.13, Epoch: 0.23283114850322834, LR: 0.0003 +[2026-02-27 07:43:06] (step=0001191) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.23302680493054198, LR: 0.0003 +[2026-02-27 07:43:14] (step=0001192) Train Loss: 0.4755, Train Steps/Sec: 0.13, Epoch: 0.2332224613578556, LR: 0.0003 +[2026-02-27 07:43:22] (step=0001193) Train Loss: 0.4780, Train Steps/Sec: 0.13, Epoch: 0.23341811778516924, LR: 0.0003 +[2026-02-27 07:43:30] (step=0001194) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.2336137742124829, LR: 0.0003 +[2026-02-27 07:43:38] (step=0001195) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.2338094306397965, LR: 0.0003 +[2026-02-27 07:43:46] (step=0001196) Train Loss: 0.4719, Train Steps/Sec: 0.13, Epoch: 0.23400508706711015, LR: 0.0003 +[2026-02-27 07:43:53] (step=0001197) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 0.2342007434944238, LR: 0.0003 +[2026-02-27 07:44:01] (step=0001198) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.23439639992173744, LR: 0.0003 +[2026-02-27 07:44:09] (step=0001199) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.23459205634905106, LR: 0.0003 +[2026-02-27 07:44:17] (step=0001200) Train Loss: 0.4747, Train Steps/Sec: 0.13, Epoch: 0.2347877127763647, LR: 0.0003 +[2026-02-27 07:44:25] (step=0001201) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.23498336920367835, LR: 0.0003 +[2026-02-27 07:44:33] (step=0001202) Train Loss: 0.4722, Train Steps/Sec: 0.13, Epoch: 0.23517902563099197, LR: 0.0003 +[2026-02-27 07:44:40] (step=0001203) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.2353746820583056, LR: 0.0003 +[2026-02-27 07:44:48] (step=0001204) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.23557033848561926, LR: 0.0003 +[2026-02-27 07:44:56] (step=0001205) Train Loss: 0.4735, Train Steps/Sec: 0.13, Epoch: 0.2357659949129329, LR: 0.0003 +[2026-02-27 07:45:04] (step=0001206) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.23596165134024652, LR: 0.0003 +[2026-02-27 07:45:12] (step=0001207) Train Loss: 0.4709, Train Steps/Sec: 0.13, Epoch: 0.23615730776756017, LR: 0.0003 +[2026-02-27 07:45:20] (step=0001208) Train Loss: 0.4621, Train Steps/Sec: 0.12, Epoch: 0.2363529641948738, LR: 0.0003 +[2026-02-27 07:45:28] (step=0001209) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.23654862062218743, LR: 0.0003 +[2026-02-27 07:45:36] (step=0001210) Train Loss: 0.4730, Train Steps/Sec: 0.13, Epoch: 0.23674427704950107, LR: 0.0003 +[2026-02-27 07:45:43] (step=0001211) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.23693993347681472, LR: 0.0003 +[2026-02-27 07:45:51] (step=0001212) Train Loss: 0.4741, Train Steps/Sec: 0.13, Epoch: 0.23713558990412836, LR: 0.0003 +[2026-02-27 07:45:59] (step=0001213) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.23733124633144198, LR: 0.0003 +[2026-02-27 07:46:07] (step=0001214) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.23752690275875563, LR: 0.0003 +[2026-02-27 07:46:15] (step=0001215) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.23772255918606927, LR: 0.0003 +[2026-02-27 07:46:23] (step=0001216) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 0.2379182156133829, LR: 0.0003 +[2026-02-27 07:46:31] (step=0001217) Train Loss: 0.4779, Train Steps/Sec: 0.12, Epoch: 0.23811387204069653, LR: 0.0003 +[2026-02-27 07:46:39] (step=0001218) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.23830952846801018, LR: 0.0003 +[2026-02-27 07:46:46] (step=0001219) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.23850518489532382, LR: 0.0003 +[2026-02-27 07:46:54] (step=0001220) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 0.23870084132263744, LR: 0.0003 +[2026-02-27 07:47:02] (step=0001221) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 0.2388964977499511, LR: 0.0003 +[2026-02-27 07:47:10] (step=0001222) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.23909215417726473, LR: 0.0003 +[2026-02-27 07:47:18] (step=0001223) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.23928781060457835, LR: 0.0003 +[2026-02-27 07:47:26] (step=0001224) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 0.239483467031892, LR: 0.0003 +[2026-02-27 07:47:34] (step=0001225) Train Loss: 0.4787, Train Steps/Sec: 0.13, Epoch: 0.23967912345920564, LR: 0.0003 +[2026-02-27 07:47:41] (step=0001226) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.23987477988651928, LR: 0.0003 +[2026-02-27 07:47:49] (step=0001227) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.2400704363138329, LR: 0.0003 +[2026-02-27 07:47:57] (step=0001228) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.24026609274114655, LR: 0.0003 +[2026-02-27 07:48:05] (step=0001229) Train Loss: 0.4730, Train Steps/Sec: 0.13, Epoch: 0.2404617491684602, LR: 0.0003 +[2026-02-27 07:48:13] (step=0001230) Train Loss: 0.4721, Train Steps/Sec: 0.13, Epoch: 0.2406574055957738, LR: 0.0003 +[2026-02-27 07:48:21] (step=0001231) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.24085306202308746, LR: 0.0003 +[2026-02-27 07:48:29] (step=0001232) Train Loss: 0.4828, Train Steps/Sec: 0.13, Epoch: 0.2410487184504011, LR: 0.0003 +[2026-02-27 07:48:36] (step=0001233) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.24124437487771475, LR: 0.0003 +[2026-02-27 07:48:44] (step=0001234) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.24144003130502836, LR: 0.0003 +[2026-02-27 07:48:52] (step=0001235) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.241635687732342, LR: 0.0003 +[2026-02-27 07:49:00] (step=0001236) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.24183134415965565, LR: 0.0003 +[2026-02-27 07:49:08] (step=0001237) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.24202700058696927, LR: 0.0003 +[2026-02-27 07:49:16] (step=0001238) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.24222265701428292, LR: 0.0003 +[2026-02-27 07:49:24] (step=0001239) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.24241831344159656, LR: 0.0003 +[2026-02-27 07:49:31] (step=0001240) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 0.2426139698689102, LR: 0.0003 +[2026-02-27 07:49:39] (step=0001241) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.24280962629622382, LR: 0.0003 +[2026-02-27 07:49:47] (step=0001242) Train Loss: 0.4739, Train Steps/Sec: 0.13, Epoch: 0.24300528272353747, LR: 0.0003 +[2026-02-27 07:49:55] (step=0001243) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.24320093915085111, LR: 0.0003 +[2026-02-27 07:50:03] (step=0001244) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.24339659557816473, LR: 0.0003 +[2026-02-27 07:50:11] (step=0001245) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.24359225200547838, LR: 0.0003 +[2026-02-27 07:50:19] (step=0001246) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 0.24378790843279202, LR: 0.0003 +[2026-02-27 07:50:26] (step=0001247) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.24398356486010567, LR: 0.0003 +[2026-02-27 07:50:34] (step=0001248) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.24417922128741928, LR: 0.0003 +[2026-02-27 07:50:42] (step=0001249) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.24437487771473293, LR: 0.0003 +[2026-02-27 07:50:50] (step=0001250) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.24457053414204658, LR: 0.0003 +[2026-02-27 07:50:58] (step=0001251) Train Loss: 0.4739, Train Steps/Sec: 0.13, Epoch: 0.2447661905693602, LR: 0.0003 +[2026-02-27 07:51:06] (step=0001252) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 0.24496184699667384, LR: 0.0003 +[2026-02-27 07:51:13] (step=0001253) Train Loss: 0.4781, Train Steps/Sec: 0.13, Epoch: 0.24515750342398748, LR: 0.0003 +[2026-02-27 07:51:21] (step=0001254) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.24535315985130113, LR: 0.0003 +[2026-02-27 07:51:29] (step=0001255) Train Loss: 0.4782, Train Steps/Sec: 0.13, Epoch: 0.24554881627861475, LR: 0.0003 +[2026-02-27 07:51:37] (step=0001256) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.2457444727059284, LR: 0.0003 +[2026-02-27 07:51:45] (step=0001257) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.24594012913324204, LR: 0.0003 +[2026-02-27 07:51:53] (step=0001258) Train Loss: 0.4733, Train Steps/Sec: 0.13, Epoch: 0.24613578556055565, LR: 0.0003 +[2026-02-27 07:52:01] (step=0001259) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.2463314419878693, LR: 0.0003 +[2026-02-27 07:52:09] (step=0001260) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.24652709841518294, LR: 0.0003 +[2026-02-27 07:52:16] (step=0001261) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.2467227548424966, LR: 0.0003 +[2026-02-27 07:52:24] (step=0001262) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.2469184112698102, LR: 0.0003 +[2026-02-27 07:52:32] (step=0001263) Train Loss: 0.4786, Train Steps/Sec: 0.13, Epoch: 0.24711406769712385, LR: 0.0003 +[2026-02-27 07:52:40] (step=0001264) Train Loss: 0.4741, Train Steps/Sec: 0.13, Epoch: 0.2473097241244375, LR: 0.0003 +[2026-02-27 07:52:48] (step=0001265) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 0.24750538055175111, LR: 0.0003 +[2026-02-27 07:52:56] (step=0001266) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.24770103697906476, LR: 0.0003 +[2026-02-27 07:53:04] (step=0001267) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 0.2478966934063784, LR: 0.0003 +[2026-02-27 07:53:12] (step=0001268) Train Loss: 0.4732, Train Steps/Sec: 0.13, Epoch: 0.24809234983369205, LR: 0.0003 +[2026-02-27 07:53:19] (step=0001269) Train Loss: 0.4789, Train Steps/Sec: 0.13, Epoch: 0.24828800626100567, LR: 0.0003 +[2026-02-27 07:53:27] (step=0001270) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.2484836626883193, LR: 0.0003 +[2026-02-27 07:53:35] (step=0001271) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.24867931911563296, LR: 0.0003 +[2026-02-27 07:53:43] (step=0001272) Train Loss: 0.4720, Train Steps/Sec: 0.13, Epoch: 0.24887497554294657, LR: 0.0003 +[2026-02-27 07:53:51] (step=0001273) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 0.24907063197026022, LR: 0.0003 +[2026-02-27 07:53:59] (step=0001274) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.24926628839757387, LR: 0.0003 +[2026-02-27 07:54:07] (step=0001275) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.2494619448248875, LR: 0.0003 +[2026-02-27 07:54:14] (step=0001276) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.24965760125220113, LR: 0.0003 +[2026-02-27 07:54:22] (step=0001277) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 0.24985325767951477, LR: 0.0003 +[2026-02-27 07:54:30] (step=0001278) Train Loss: 0.4760, Train Steps/Sec: 0.13, Epoch: 0.2500489141068284, LR: 0.0003 +[2026-02-27 07:54:38] (step=0001279) Train Loss: 0.4797, Train Steps/Sec: 0.13, Epoch: 0.25024457053414206, LR: 0.0003 +[2026-02-27 07:54:46] (step=0001280) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.2504402269614557, LR: 0.0003 +[2026-02-27 07:54:54] (step=0001281) Train Loss: 0.4775, Train Steps/Sec: 0.13, Epoch: 0.2506358833887693, LR: 0.0003 +[2026-02-27 07:55:02] (step=0001282) Train Loss: 0.4739, Train Steps/Sec: 0.13, Epoch: 0.25083153981608297, LR: 0.0003 +[2026-02-27 07:55:09] (step=0001283) Train Loss: 0.4791, Train Steps/Sec: 0.13, Epoch: 0.2510271962433966, LR: 0.0003 +[2026-02-27 07:55:17] (step=0001284) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.2512228526707102, LR: 0.0003 +[2026-02-27 07:55:25] (step=0001285) Train Loss: 0.4725, Train Steps/Sec: 0.13, Epoch: 0.2514185090980239, LR: 0.0003 +[2026-02-27 07:55:33] (step=0001286) Train Loss: 0.4744, Train Steps/Sec: 0.13, Epoch: 0.2516141655253375, LR: 0.0003 +[2026-02-27 07:55:41] (step=0001287) Train Loss: 0.4817, Train Steps/Sec: 0.13, Epoch: 0.25180982195265117, LR: 0.0003 +[2026-02-27 07:55:49] (step=0001288) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 0.2520054783799648, LR: 0.0003 +[2026-02-27 07:55:57] (step=0001289) Train Loss: 0.4774, Train Steps/Sec: 0.13, Epoch: 0.2522011348072784, LR: 0.0003 +[2026-02-27 07:56:04] (step=0001290) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.2523967912345921, LR: 0.0003 +[2026-02-27 07:56:12] (step=0001291) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.2525924476619057, LR: 0.0003 +[2026-02-27 07:56:20] (step=0001292) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.2527881040892193, LR: 0.0003 +[2026-02-27 07:56:28] (step=0001293) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.252983760516533, LR: 0.0003 +[2026-02-27 07:56:36] (step=0001294) Train Loss: 0.4817, Train Steps/Sec: 0.13, Epoch: 0.2531794169438466, LR: 0.0003 +[2026-02-27 07:56:44] (step=0001295) Train Loss: 0.4852, Train Steps/Sec: 0.13, Epoch: 0.2533750733711602, LR: 0.0003 +[2026-02-27 07:56:52] (step=0001296) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.2535707297984739, LR: 0.0003 +[2026-02-27 07:56:59] (step=0001297) Train Loss: 0.4709, Train Steps/Sec: 0.13, Epoch: 0.2537663862257875, LR: 0.0003 +[2026-02-27 07:57:07] (step=0001298) Train Loss: 0.4788, Train Steps/Sec: 0.13, Epoch: 0.2539620426531011, LR: 0.0003 +[2026-02-27 07:57:15] (step=0001299) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.2541576990804148, LR: 0.0003 +[2026-02-27 07:57:23] (step=0001300) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.2543533555077284, LR: 0.0003 +[2026-02-27 07:57:31] (step=0001301) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.2545490119350421, LR: 0.0003 +[2026-02-27 07:57:39] (step=0001302) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.2547446683623557, LR: 0.0003 +[2026-02-27 07:57:47] (step=0001303) Train Loss: 0.4831, Train Steps/Sec: 0.12, Epoch: 0.2549403247896693, LR: 0.0003 +[2026-02-27 07:57:55] (step=0001304) Train Loss: 0.4702, Train Steps/Sec: 0.13, Epoch: 0.255135981216983, LR: 0.0003 +[2026-02-27 07:58:02] (step=0001305) Train Loss: 0.4702, Train Steps/Sec: 0.13, Epoch: 0.2553316376442966, LR: 0.0003 +[2026-02-27 07:58:10] (step=0001306) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 0.25552729407161023, LR: 0.0003 +[2026-02-27 07:58:18] (step=0001307) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.2557229504989239, LR: 0.0003 +[2026-02-27 07:58:26] (step=0001308) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 0.2559186069262375, LR: 0.0003 +[2026-02-27 07:58:34] (step=0001309) Train Loss: 0.4720, Train Steps/Sec: 0.13, Epoch: 0.25611426335355114, LR: 0.0003 +[2026-02-27 07:58:42] (step=0001310) Train Loss: 0.4661, Train Steps/Sec: 0.12, Epoch: 0.2563099197808648, LR: 0.0003 +[2026-02-27 07:58:50] (step=0001311) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 0.25650557620817843, LR: 0.0003 +[2026-02-27 07:58:58] (step=0001312) Train Loss: 0.4737, Train Steps/Sec: 0.13, Epoch: 0.25670123263549205, LR: 0.0003 +[2026-02-27 07:59:05] (step=0001313) Train Loss: 0.4736, Train Steps/Sec: 0.13, Epoch: 0.2568968890628057, LR: 0.0003 +[2026-02-27 07:59:13] (step=0001314) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.25709254549011934, LR: 0.0003 +[2026-02-27 07:59:21] (step=0001315) Train Loss: 0.4748, Train Steps/Sec: 0.13, Epoch: 0.257288201917433, LR: 0.0003 +[2026-02-27 07:59:29] (step=0001316) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.25748385834474663, LR: 0.0003 +[2026-02-27 07:59:37] (step=0001317) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.25767951477206025, LR: 0.0003 +[2026-02-27 07:59:45] (step=0001318) Train Loss: 0.4726, Train Steps/Sec: 0.13, Epoch: 0.2578751711993739, LR: 0.0003 +[2026-02-27 07:59:53] (step=0001319) Train Loss: 0.4797, Train Steps/Sec: 0.13, Epoch: 0.25807082762668754, LR: 0.0003 +[2026-02-27 08:00:00] (step=0001320) Train Loss: 0.4746, Train Steps/Sec: 0.13, Epoch: 0.25826648405400116, LR: 0.0003 +[2026-02-27 08:00:08] (step=0001321) Train Loss: 0.4713, Train Steps/Sec: 0.13, Epoch: 0.25846214048131483, LR: 0.0003 +[2026-02-27 08:00:16] (step=0001322) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.25865779690862845, LR: 0.0003 +[2026-02-27 08:00:24] (step=0001323) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.25885345333594206, LR: 0.0003 +[2026-02-27 08:00:32] (step=0001324) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.25904910976325574, LR: 0.0003 +[2026-02-27 08:00:40] (step=0001325) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.25924476619056935, LR: 0.0003 +[2026-02-27 08:00:48] (step=0001326) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.25944042261788297, LR: 0.0003 +[2026-02-27 08:00:55] (step=0001327) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 0.25963607904519664, LR: 0.0003 +[2026-02-27 08:01:03] (step=0001328) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.25983173547251026, LR: 0.0003 +[2026-02-27 08:01:11] (step=0001329) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 0.26002739189982393, LR: 0.0003 +[2026-02-27 08:01:19] (step=0001330) Train Loss: 0.4738, Train Steps/Sec: 0.13, Epoch: 0.26022304832713755, LR: 0.0003 +[2026-02-27 08:01:27] (step=0001331) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.26041870475445117, LR: 0.0003 +[2026-02-27 08:01:35] (step=0001332) Train Loss: 0.4752, Train Steps/Sec: 0.13, Epoch: 0.26061436118176484, LR: 0.0003 +[2026-02-27 08:01:43] (step=0001333) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.26081001760907846, LR: 0.0003 +[2026-02-27 08:01:50] (step=0001334) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.2610056740363921, LR: 0.0003 +[2026-02-27 08:01:58] (step=0001335) Train Loss: 0.4762, Train Steps/Sec: 0.13, Epoch: 0.26120133046370575, LR: 0.0003 +[2026-02-27 08:02:06] (step=0001336) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 0.26139698689101937, LR: 0.0003 +[2026-02-27 08:02:14] (step=0001337) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.261592643318333, LR: 0.0003 +[2026-02-27 08:02:22] (step=0001338) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.26178829974564666, LR: 0.0003 +[2026-02-27 08:02:30] (step=0001339) Train Loss: 0.4702, Train Steps/Sec: 0.13, Epoch: 0.2619839561729603, LR: 0.0003 +[2026-02-27 08:02:38] (step=0001340) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.2621796126002739, LR: 0.0003 +[2026-02-27 08:02:45] (step=0001341) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.26237526902758757, LR: 0.0003 +[2026-02-27 08:02:53] (step=0001342) Train Loss: 0.4793, Train Steps/Sec: 0.13, Epoch: 0.2625709254549012, LR: 0.0003 +[2026-02-27 08:03:01] (step=0001343) Train Loss: 0.4743, Train Steps/Sec: 0.13, Epoch: 0.26276658188221486, LR: 0.0003 +[2026-02-27 08:03:09] (step=0001344) Train Loss: 0.4816, Train Steps/Sec: 0.13, Epoch: 0.2629622383095285, LR: 0.0003 +[2026-02-27 08:03:17] (step=0001345) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 0.2631578947368421, LR: 0.0003 +[2026-02-27 08:03:25] (step=0001346) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.26335355116415576, LR: 0.0003 +[2026-02-27 08:03:33] (step=0001347) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.2635492075914694, LR: 0.0003 +[2026-02-27 08:03:40] (step=0001348) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.263744864018783, LR: 0.0003 +[2026-02-27 08:03:48] (step=0001349) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.26394052044609667, LR: 0.0003 +[2026-02-27 08:03:56] (step=0001350) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.2641361768734103, LR: 0.0003 +[2026-02-27 08:04:04] (step=0001351) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 0.2643318333007239, LR: 0.0003 +[2026-02-27 08:04:12] (step=0001352) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.2645274897280376, LR: 0.0003 +[2026-02-27 08:04:20] (step=0001353) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.2647231461553512, LR: 0.0003 +[2026-02-27 08:04:28] (step=0001354) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 0.2649188025826648, LR: 0.0003 +[2026-02-27 08:04:35] (step=0001355) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.2651144590099785, LR: 0.0003 +[2026-02-27 08:04:43] (step=0001356) Train Loss: 0.4772, Train Steps/Sec: 0.13, Epoch: 0.2653101154372921, LR: 0.0003 +[2026-02-27 08:04:51] (step=0001357) Train Loss: 0.4774, Train Steps/Sec: 0.13, Epoch: 0.2655057718646058, LR: 0.0003 +[2026-02-27 08:04:59] (step=0001358) Train Loss: 0.4815, Train Steps/Sec: 0.13, Epoch: 0.2657014282919194, LR: 0.0003 +[2026-02-27 08:05:07] (step=0001359) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.265897084719233, LR: 0.0003 +[2026-02-27 08:05:15] (step=0001360) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.2660927411465467, LR: 0.0003 +[2026-02-27 08:05:23] (step=0001361) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.2662883975738603, LR: 0.0003 +[2026-02-27 08:05:31] (step=0001362) Train Loss: 0.4767, Train Steps/Sec: 0.13, Epoch: 0.2664840540011739, LR: 0.0003 +[2026-02-27 08:05:38] (step=0001363) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.2666797104284876, LR: 0.0003 +[2026-02-27 08:05:46] (step=0001364) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.2668753668558012, LR: 0.0003 +[2026-02-27 08:05:54] (step=0001365) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.26707102328311483, LR: 0.0003 +[2026-02-27 08:06:02] (step=0001366) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.2672666797104285, LR: 0.0003 +[2026-02-27 08:06:10] (step=0001367) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 0.2674623361377421, LR: 0.0003 +[2026-02-27 08:06:18] (step=0001368) Train Loss: 0.4719, Train Steps/Sec: 0.13, Epoch: 0.26765799256505574, LR: 0.0003 +[2026-02-27 08:06:26] (step=0001369) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.2678536489923694, LR: 0.0003 +[2026-02-27 08:06:33] (step=0001370) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.268049305419683, LR: 0.0003 +[2026-02-27 08:06:41] (step=0001371) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 0.2682449618469967, LR: 0.0003 +[2026-02-27 08:06:49] (step=0001372) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 0.2684406182743103, LR: 0.0003 +[2026-02-27 08:06:57] (step=0001373) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.26863627470162393, LR: 0.0003 +[2026-02-27 08:07:05] (step=0001374) Train Loss: 0.4698, Train Steps/Sec: 0.13, Epoch: 0.2688319311289376, LR: 0.0003 +[2026-02-27 08:07:13] (step=0001375) Train Loss: 0.4743, Train Steps/Sec: 0.13, Epoch: 0.2690275875562512, LR: 0.0003 +[2026-02-27 08:07:21] (step=0001376) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.26922324398356484, LR: 0.0003 +[2026-02-27 08:07:28] (step=0001377) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.2694189004108785, LR: 0.0003 +[2026-02-27 08:07:36] (step=0001378) Train Loss: 0.4771, Train Steps/Sec: 0.13, Epoch: 0.26961455683819213, LR: 0.0003 +[2026-02-27 08:07:44] (step=0001379) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.26981021326550575, LR: 0.0003 +[2026-02-27 08:07:52] (step=0001380) Train Loss: 0.4846, Train Steps/Sec: 0.13, Epoch: 0.2700058696928194, LR: 0.0003 +[2026-02-27 08:08:00] (step=0001381) Train Loss: 0.4734, Train Steps/Sec: 0.13, Epoch: 0.27020152612013304, LR: 0.0003 +[2026-02-27 08:08:08] (step=0001382) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.27039718254744666, LR: 0.0003 +[2026-02-27 08:08:16] (step=0001383) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.27059283897476033, LR: 0.0003 +[2026-02-27 08:08:23] (step=0001384) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.27078849540207395, LR: 0.0003 +[2026-02-27 08:08:31] (step=0001385) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.2709841518293876, LR: 0.0003 +[2026-02-27 08:08:39] (step=0001386) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.27117980825670124, LR: 0.0003 +[2026-02-27 08:08:47] (step=0001387) Train Loss: 0.4721, Train Steps/Sec: 0.13, Epoch: 0.27137546468401486, LR: 0.0003 +[2026-02-27 08:08:55] (step=0001388) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.27157112111132853, LR: 0.0003 +[2026-02-27 08:09:03] (step=0001389) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.27176677753864215, LR: 0.0003 +[2026-02-27 08:09:10] (step=0001390) Train Loss: 0.4767, Train Steps/Sec: 0.13, Epoch: 0.27196243396595576, LR: 0.0003 +[2026-02-27 08:09:18] (step=0001391) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 0.27215809039326944, LR: 0.0003 +[2026-02-27 08:09:26] (step=0001392) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.27235374682058305, LR: 0.0003 +[2026-02-27 08:09:34] (step=0001393) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.27254940324789667, LR: 0.0003 +[2026-02-27 08:09:42] (step=0001394) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.27274505967521034, LR: 0.0003 +[2026-02-27 08:09:50] (step=0001395) Train Loss: 0.4844, Train Steps/Sec: 0.13, Epoch: 0.27294071610252396, LR: 0.0003 +[2026-02-27 08:09:58] (step=0001396) Train Loss: 0.4732, Train Steps/Sec: 0.13, Epoch: 0.2731363725298376, LR: 0.0003 +[2026-02-27 08:10:05] (step=0001397) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.27333202895715125, LR: 0.0003 +[2026-02-27 08:10:13] (step=0001398) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.27352768538446487, LR: 0.0003 +[2026-02-27 08:10:21] (step=0001399) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.27372334181177854, LR: 0.0003 +[2026-02-27 08:10:29] (step=0001400) Train Loss: 0.4727, Train Steps/Sec: 0.13, Epoch: 0.27391899823909216, LR: 0.0003 +[2026-02-27 08:10:37] (step=0001401) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 0.2741146546664058, LR: 0.0003 +[2026-02-27 08:10:45] (step=0001402) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 0.27431031109371945, LR: 0.0003 +[2026-02-27 08:10:53] (step=0001403) Train Loss: 0.4782, Train Steps/Sec: 0.13, Epoch: 0.27450596752103307, LR: 0.0003 +[2026-02-27 08:11:01] (step=0001404) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.2747016239483467, LR: 0.0003 +[2026-02-27 08:11:08] (step=0001405) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.27489728037566036, LR: 0.0003 +[2026-02-27 08:11:16] (step=0001406) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.275092936802974, LR: 0.0003 +[2026-02-27 08:11:24] (step=0001407) Train Loss: 0.4713, Train Steps/Sec: 0.13, Epoch: 0.2752885932302876, LR: 0.0003 +[2026-02-27 08:11:32] (step=0001408) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.27548424965760127, LR: 0.0003 +[2026-02-27 08:11:40] (step=0001409) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.2756799060849149, LR: 0.0003 +[2026-02-27 08:11:48] (step=0001410) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.2758755625122285, LR: 0.0003 +[2026-02-27 08:11:56] (step=0001411) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.2760712189395422, LR: 0.0003 +[2026-02-27 08:12:03] (step=0001412) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.2762668753668558, LR: 0.0003 +[2026-02-27 08:12:11] (step=0001413) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.27646253179416946, LR: 0.0003 +[2026-02-27 08:12:19] (step=0001414) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 0.2766581882214831, LR: 0.0003 +[2026-02-27 08:12:27] (step=0001415) Train Loss: 0.4763, Train Steps/Sec: 0.13, Epoch: 0.2768538446487967, LR: 0.0003 +[2026-02-27 08:12:35] (step=0001416) Train Loss: 0.4746, Train Steps/Sec: 0.13, Epoch: 0.27704950107611037, LR: 0.0003 +[2026-02-27 08:12:43] (step=0001417) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.277245157503424, LR: 0.0003 +[2026-02-27 08:12:51] (step=0001418) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.2774408139307376, LR: 0.0003 +[2026-02-27 08:12:58] (step=0001419) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.2776364703580513, LR: 0.0003 +[2026-02-27 08:13:06] (step=0001420) Train Loss: 0.4728, Train Steps/Sec: 0.13, Epoch: 0.2778321267853649, LR: 0.0003 +[2026-02-27 08:13:14] (step=0001421) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.2780277832126785, LR: 0.0003 +[2026-02-27 08:13:22] (step=0001422) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 0.2782234396399922, LR: 0.0003 +[2026-02-27 08:13:30] (step=0001423) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.2784190960673058, LR: 0.0003 +[2026-02-27 08:13:38] (step=0001424) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.2786147524946194, LR: 0.0003 +[2026-02-27 08:13:46] (step=0001425) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.2788104089219331, LR: 0.0003 +[2026-02-27 08:13:53] (step=0001426) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.2790060653492467, LR: 0.0003 +[2026-02-27 08:14:01] (step=0001427) Train Loss: 0.4774, Train Steps/Sec: 0.13, Epoch: 0.2792017217765604, LR: 0.0003 +[2026-02-27 08:14:09] (step=0001428) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.279397378203874, LR: 0.0003 +[2026-02-27 08:14:17] (step=0001429) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.2795930346311876, LR: 0.0003 +[2026-02-27 08:14:25] (step=0001430) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.2797886910585013, LR: 0.0003 +[2026-02-27 08:14:33] (step=0001431) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.2799843474858149, LR: 0.0003 +[2026-02-27 08:14:41] (step=0001432) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.28018000391312853, LR: 0.0003 +[2026-02-27 08:14:48] (step=0001433) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.2803756603404422, LR: 0.0003 +[2026-02-27 08:14:56] (step=0001434) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 0.2805713167677558, LR: 0.0003 +[2026-02-27 08:15:04] (step=0001435) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.28076697319506944, LR: 0.0003 +[2026-02-27 08:15:12] (step=0001436) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.2809626296223831, LR: 0.0003 +[2026-02-27 08:15:20] (step=0001437) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.2811582860496967, LR: 0.0003 +[2026-02-27 08:15:28] (step=0001438) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.28135394247701034, LR: 0.0003 +[2026-02-27 08:15:36] (step=0001439) Train Loss: 0.4734, Train Steps/Sec: 0.13, Epoch: 0.281549598904324, LR: 0.0003 +[2026-02-27 08:15:43] (step=0001440) Train Loss: 0.4783, Train Steps/Sec: 0.13, Epoch: 0.28174525533163763, LR: 0.0003 +[2026-02-27 08:15:51] (step=0001441) Train Loss: 0.4742, Train Steps/Sec: 0.13, Epoch: 0.2819409117589513, LR: 0.0003 +[2026-02-27 08:15:59] (step=0001442) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 0.2821365681862649, LR: 0.0003 +[2026-02-27 08:16:07] (step=0001443) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 0.28233222461357854, LR: 0.0003 +[2026-02-27 08:16:15] (step=0001444) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.2825278810408922, LR: 0.0003 +[2026-02-27 08:16:23] (step=0001445) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.28272353746820583, LR: 0.0003 +[2026-02-27 08:16:31] (step=0001446) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.28291919389551945, LR: 0.0003 +[2026-02-27 08:16:38] (step=0001447) Train Loss: 0.4747, Train Steps/Sec: 0.13, Epoch: 0.2831148503228331, LR: 0.0003 +[2026-02-27 08:16:46] (step=0001448) Train Loss: 0.4761, Train Steps/Sec: 0.13, Epoch: 0.28331050675014674, LR: 0.0003 +[2026-02-27 08:16:54] (step=0001449) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.28350616317746036, LR: 0.0003 +[2026-02-27 08:17:02] (step=0001450) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.28370181960477403, LR: 0.0003 +[2026-02-27 08:17:10] (step=0001451) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 0.28389747603208765, LR: 0.0003 +[2026-02-27 08:17:18] (step=0001452) Train Loss: 0.4726, Train Steps/Sec: 0.13, Epoch: 0.28409313245940127, LR: 0.0003 +[2026-02-27 08:17:26] (step=0001453) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.28428878888671494, LR: 0.0003 +[2026-02-27 08:17:34] (step=0001454) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.28448444531402856, LR: 0.0003 +[2026-02-27 08:17:41] (step=0001455) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.28468010174134223, LR: 0.0003 +[2026-02-27 08:17:49] (step=0001456) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.28487575816865585, LR: 0.0003 +[2026-02-27 08:17:57] (step=0001457) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.28507141459596946, LR: 0.0003 +[2026-02-27 08:18:05] (step=0001458) Train Loss: 0.4614, Train Steps/Sec: 0.12, Epoch: 0.28526707102328314, LR: 0.0003 +[2026-02-27 08:18:13] (step=0001459) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.28546272745059675, LR: 0.0003 +[2026-02-27 08:18:21] (step=0001460) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.28565838387791037, LR: 0.0003 +[2026-02-27 08:18:29] (step=0001461) Train Loss: 0.4761, Train Steps/Sec: 0.13, Epoch: 0.28585404030522404, LR: 0.0003 +[2026-02-27 08:18:37] (step=0001462) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.28604969673253766, LR: 0.0003 +[2026-02-27 08:18:44] (step=0001463) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 0.2862453531598513, LR: 0.0003 +[2026-02-27 08:18:52] (step=0001464) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.28644100958716495, LR: 0.0003 +[2026-02-27 08:19:00] (step=0001465) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.28663666601447857, LR: 0.0003 +[2026-02-27 08:19:08] (step=0001466) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.2868323224417922, LR: 0.0003 +[2026-02-27 08:19:16] (step=0001467) Train Loss: 0.4735, Train Steps/Sec: 0.13, Epoch: 0.28702797886910586, LR: 0.0003 +[2026-02-27 08:19:24] (step=0001468) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.2872236352964195, LR: 0.0003 +[2026-02-27 08:19:31] (step=0001469) Train Loss: 0.4740, Train Steps/Sec: 0.13, Epoch: 0.28741929172373315, LR: 0.0003 +[2026-02-27 08:19:39] (step=0001470) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 0.28761494815104677, LR: 0.0003 +[2026-02-27 08:19:47] (step=0001471) Train Loss: 0.4811, Train Steps/Sec: 0.13, Epoch: 0.2878106045783604, LR: 0.0003 +[2026-02-27 08:19:55] (step=0001472) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.28800626100567406, LR: 0.0003 +[2026-02-27 08:20:03] (step=0001473) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.2882019174329877, LR: 0.0003 +[2026-02-27 08:20:11] (step=0001474) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.2883975738603013, LR: 0.0003 +[2026-02-27 08:20:19] (step=0001475) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.28859323028761497, LR: 0.0003 +[2026-02-27 08:20:26] (step=0001476) Train Loss: 0.4836, Train Steps/Sec: 0.13, Epoch: 0.2887888867149286, LR: 0.0003 +[2026-02-27 08:20:34] (step=0001477) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.2889845431422422, LR: 0.0003 +[2026-02-27 08:20:42] (step=0001478) Train Loss: 0.4803, Train Steps/Sec: 0.13, Epoch: 0.2891801995695559, LR: 0.0003 +[2026-02-27 08:20:50] (step=0001479) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.2893758559968695, LR: 0.0003 +[2026-02-27 08:20:58] (step=0001480) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.2895715124241831, LR: 0.0003 +[2026-02-27 08:21:06] (step=0001481) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.2897671688514968, LR: 0.0003 +[2026-02-27 08:21:14] (step=0001482) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 0.2899628252788104, LR: 0.0003 +[2026-02-27 08:21:21] (step=0001483) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.29015848170612407, LR: 0.0003 +[2026-02-27 08:21:29] (step=0001484) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 0.2903541381334377, LR: 0.0003 +[2026-02-27 08:21:37] (step=0001485) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.2905497945607513, LR: 0.0003 +[2026-02-27 08:21:45] (step=0001486) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.290745450988065, LR: 0.0003 +[2026-02-27 08:21:53] (step=0001487) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.2909411074153786, LR: 0.0003 +[2026-02-27 08:22:01] (step=0001488) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.2911367638426922, LR: 0.0003 +[2026-02-27 08:22:08] (step=0001489) Train Loss: 0.4758, Train Steps/Sec: 0.13, Epoch: 0.2913324202700059, LR: 0.0003 +[2026-02-27 08:22:16] (step=0001490) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.2915280766973195, LR: 0.0003 +[2026-02-27 08:22:24] (step=0001491) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.2917237331246331, LR: 0.0003 +[2026-02-27 08:22:32] (step=0001492) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.2919193895519468, LR: 0.0003 +[2026-02-27 08:22:40] (step=0001493) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.2921150459792604, LR: 0.0003 +[2026-02-27 08:22:48] (step=0001494) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.29231070240657403, LR: 0.0003 +[2026-02-27 08:22:56] (step=0001495) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.2925063588338877, LR: 0.0003 +[2026-02-27 08:23:03] (step=0001496) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.2927020152612013, LR: 0.0003 +[2026-02-27 08:23:11] (step=0001497) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.292897671688515, LR: 0.0003 +[2026-02-27 08:23:19] (step=0001498) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.2930933281158286, LR: 0.0003 +[2026-02-27 08:23:27] (step=0001499) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 0.29328898454314223, LR: 0.0003 +[2026-02-27 08:23:35] (step=0001500) Train Loss: 0.4763, Train Steps/Sec: 0.13, Epoch: 0.2934846409704559, LR: 0.0003 +[2026-02-27 08:23:35] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0001500/ +[2026-02-27 08:23:43] (step=0001501) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 0.2936802973977695, LR: 0.0003 +[2026-02-27 08:23:51] (step=0001502) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.29387595382508314, LR: 0.0003 +[2026-02-27 08:23:59] (step=0001503) Train Loss: 0.4702, Train Steps/Sec: 0.13, Epoch: 0.2940716102523968, LR: 0.0003 +[2026-02-27 08:24:07] (step=0001504) Train Loss: 0.4651, Train Steps/Sec: 0.12, Epoch: 0.2942672666797104, LR: 0.0003 +[2026-02-27 08:24:14] (step=0001505) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.29446292310702404, LR: 0.0003 +[2026-02-27 08:24:22] (step=0001506) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 0.2946585795343377, LR: 0.0003 +[2026-02-27 08:24:30] (step=0001507) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.29485423596165133, LR: 0.0003 +[2026-02-27 08:24:38] (step=0001508) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.29504989238896495, LR: 0.0003 +[2026-02-27 08:24:46] (step=0001509) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.2952455488162786, LR: 0.0003 +[2026-02-27 08:24:54] (step=0001510) Train Loss: 0.4771, Train Steps/Sec: 0.13, Epoch: 0.29544120524359224, LR: 0.0003 +[2026-02-27 08:25:02] (step=0001511) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.2956368616709059, LR: 0.0003 +[2026-02-27 08:25:09] (step=0001512) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.29583251809821953, LR: 0.0003 +[2026-02-27 08:25:17] (step=0001513) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.29602817452553315, LR: 0.0003 +[2026-02-27 08:25:25] (step=0001514) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.2962238309528468, LR: 0.0003 +[2026-02-27 08:25:33] (step=0001515) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.29641948738016044, LR: 0.0003 +[2026-02-27 08:25:41] (step=0001516) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.29661514380747406, LR: 0.0003 +[2026-02-27 08:25:49] (step=0001517) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.29681080023478773, LR: 0.0003 +[2026-02-27 08:25:57] (step=0001518) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.29700645666210135, LR: 0.0003 +[2026-02-27 08:26:04] (step=0001519) Train Loss: 0.4818, Train Steps/Sec: 0.13, Epoch: 0.29720211308941497, LR: 0.0003 +[2026-02-27 08:26:12] (step=0001520) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.29739776951672864, LR: 0.0003 +[2026-02-27 08:26:20] (step=0001521) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.29759342594404226, LR: 0.0003 +[2026-02-27 08:26:28] (step=0001522) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.2977890823713559, LR: 0.0003 +[2026-02-27 08:26:36] (step=0001523) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.29798473879866955, LR: 0.0003 +[2026-02-27 08:26:44] (step=0001524) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.29818039522598316, LR: 0.0003 +[2026-02-27 08:26:52] (step=0001525) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.29837605165329684, LR: 0.0003 +[2026-02-27 08:26:59] (step=0001526) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.29857170808061045, LR: 0.0003 +[2026-02-27 08:27:07] (step=0001527) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.29876736450792407, LR: 0.0003 +[2026-02-27 08:27:15] (step=0001528) Train Loss: 0.4736, Train Steps/Sec: 0.13, Epoch: 0.29896302093523774, LR: 0.0003 +[2026-02-27 08:27:23] (step=0001529) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.29915867736255136, LR: 0.0003 +[2026-02-27 08:27:31] (step=0001530) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.299354333789865, LR: 0.0003 +[2026-02-27 08:27:39] (step=0001531) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.29954999021717865, LR: 0.0003 +[2026-02-27 08:27:47] (step=0001532) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.29974564664449227, LR: 0.0003 +[2026-02-27 08:27:54] (step=0001533) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.2999413030718059, LR: 0.0003 +[2026-02-27 08:28:02] (step=0001534) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.30013695949911956, LR: 0.0003 +[2026-02-27 08:28:10] (step=0001535) Train Loss: 0.4745, Train Steps/Sec: 0.13, Epoch: 0.3003326159264332, LR: 0.0003 +[2026-02-27 08:28:18] (step=0001536) Train Loss: 0.4876, Train Steps/Sec: 0.13, Epoch: 0.3005282723537468, LR: 0.0003 +[2026-02-27 08:28:26] (step=0001537) Train Loss: 0.4721, Train Steps/Sec: 0.13, Epoch: 0.30072392878106047, LR: 0.0003 +[2026-02-27 08:28:34] (step=0001538) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.3009195852083741, LR: 0.0003 +[2026-02-27 08:28:41] (step=0001539) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.30111524163568776, LR: 0.0003 +[2026-02-27 08:28:49] (step=0001540) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.3013108980630014, LR: 0.0003 +[2026-02-27 08:28:57] (step=0001541) Train Loss: 0.4811, Train Steps/Sec: 0.13, Epoch: 0.301506554490315, LR: 0.0003 +[2026-02-27 08:29:05] (step=0001542) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.30170221091762867, LR: 0.0003 +[2026-02-27 08:29:13] (step=0001543) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.3018978673449423, LR: 0.0003 +[2026-02-27 08:29:21] (step=0001544) Train Loss: 0.4730, Train Steps/Sec: 0.13, Epoch: 0.3020935237722559, LR: 0.0003 +[2026-02-27 08:29:29] (step=0001545) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.3022891801995696, LR: 0.0003 +[2026-02-27 08:29:36] (step=0001546) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 0.3024848366268832, LR: 0.0003 +[2026-02-27 08:29:44] (step=0001547) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.3026804930541968, LR: 0.0003 +[2026-02-27 08:29:52] (step=0001548) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.3028761494815105, LR: 0.0003 +[2026-02-27 08:30:00] (step=0001549) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.3030718059088241, LR: 0.0003 +[2026-02-27 08:30:08] (step=0001550) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.3032674623361377, LR: 0.0003 +[2026-02-27 08:30:16] (step=0001551) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.3034631187634514, LR: 0.0003 +[2026-02-27 08:30:24] (step=0001552) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.303658775190765, LR: 0.0003 +[2026-02-27 08:30:32] (step=0001553) Train Loss: 0.4745, Train Steps/Sec: 0.13, Epoch: 0.3038544316180787, LR: 0.0003 +[2026-02-27 08:30:39] (step=0001554) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.3040500880453923, LR: 0.0003 +[2026-02-27 08:30:47] (step=0001555) Train Loss: 0.4670, Train Steps/Sec: 0.12, Epoch: 0.3042457444727059, LR: 0.0003 +[2026-02-27 08:30:55] (step=0001556) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.3044414009000196, LR: 0.0003 +[2026-02-27 08:31:03] (step=0001557) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.3046370573273332, LR: 0.0003 +[2026-02-27 08:31:11] (step=0001558) Train Loss: 0.4776, Train Steps/Sec: 0.13, Epoch: 0.3048327137546468, LR: 0.0003 +[2026-02-27 08:31:19] (step=0001559) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.3050283701819605, LR: 0.0003 +[2026-02-27 08:31:27] (step=0001560) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.3052240266092741, LR: 0.0003 +[2026-02-27 08:31:35] (step=0001561) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.30541968303658773, LR: 0.0003 +[2026-02-27 08:31:42] (step=0001562) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.3056153394639014, LR: 0.0003 +[2026-02-27 08:31:50] (step=0001563) Train Loss: 0.4766, Train Steps/Sec: 0.13, Epoch: 0.305810995891215, LR: 0.0003 +[2026-02-27 08:31:58] (step=0001564) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 0.30600665231852864, LR: 0.0003 +[2026-02-27 08:32:06] (step=0001565) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.3062023087458423, LR: 0.0003 +[2026-02-27 08:32:14] (step=0001566) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.30639796517315593, LR: 0.0003 +[2026-02-27 08:32:22] (step=0001567) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.3065936216004696, LR: 0.0003 +[2026-02-27 08:32:30] (step=0001568) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 0.3067892780277832, LR: 0.0003 +[2026-02-27 08:32:37] (step=0001569) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.30698493445509684, LR: 0.0003 +[2026-02-27 08:32:45] (step=0001570) Train Loss: 0.4758, Train Steps/Sec: 0.13, Epoch: 0.3071805908824105, LR: 0.0003 +[2026-02-27 08:32:53] (step=0001571) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 0.3073762473097241, LR: 0.0003 +[2026-02-27 08:33:01] (step=0001572) Train Loss: 0.4755, Train Steps/Sec: 0.13, Epoch: 0.30757190373703774, LR: 0.0003 +[2026-02-27 08:33:09] (step=0001573) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.3077675601643514, LR: 0.0003 +[2026-02-27 08:33:17] (step=0001574) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.30796321659166503, LR: 0.0003 +[2026-02-27 08:33:25] (step=0001575) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.30815887301897865, LR: 0.0003 +[2026-02-27 08:33:32] (step=0001576) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 0.3083545294462923, LR: 0.0003 +[2026-02-27 08:33:40] (step=0001577) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.30855018587360594, LR: 0.0003 +[2026-02-27 08:33:48] (step=0001578) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.30874584230091956, LR: 0.0003 +[2026-02-27 08:33:56] (step=0001579) Train Loss: 0.4726, Train Steps/Sec: 0.13, Epoch: 0.30894149872823323, LR: 0.0003 +[2026-02-27 08:34:04] (step=0001580) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.30913715515554685, LR: 0.0003 +[2026-02-27 08:34:12] (step=0001581) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.3093328115828605, LR: 0.0003 +[2026-02-27 08:34:19] (step=0001582) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.30952846801017414, LR: 0.0003 +[2026-02-27 08:34:27] (step=0001583) Train Loss: 0.4722, Train Steps/Sec: 0.13, Epoch: 0.30972412443748776, LR: 0.0003 +[2026-02-27 08:34:35] (step=0001584) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.30991978086480143, LR: 0.0003 +[2026-02-27 08:34:43] (step=0001585) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 0.31011543729211505, LR: 0.0003 +[2026-02-27 08:34:51] (step=0001586) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.31031109371942867, LR: 0.0003 +[2026-02-27 08:34:59] (step=0001587) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.31050675014674234, LR: 0.0003 +[2026-02-27 08:35:07] (step=0001588) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 0.31070240657405596, LR: 0.0003 +[2026-02-27 08:35:15] (step=0001589) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.3108980630013696, LR: 0.0003 +[2026-02-27 08:35:22] (step=0001590) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.31109371942868325, LR: 0.0003 +[2026-02-27 08:35:30] (step=0001591) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.31128937585599686, LR: 0.0003 +[2026-02-27 08:35:38] (step=0001592) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.3114850322833105, LR: 0.0003 +[2026-02-27 08:35:46] (step=0001593) Train Loss: 0.4802, Train Steps/Sec: 0.13, Epoch: 0.31168068871062415, LR: 0.0003 +[2026-02-27 08:35:54] (step=0001594) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.31187634513793777, LR: 0.0003 +[2026-02-27 08:36:02] (step=0001595) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.31207200156525144, LR: 0.0003 +[2026-02-27 08:36:10] (step=0001596) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 0.31226765799256506, LR: 0.0003 +[2026-02-27 08:36:17] (step=0001597) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.3124633144198787, LR: 0.0003 +[2026-02-27 08:36:25] (step=0001598) Train Loss: 0.4748, Train Steps/Sec: 0.13, Epoch: 0.31265897084719235, LR: 0.0003 +[2026-02-27 08:36:33] (step=0001599) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.31285462727450597, LR: 0.0003 +[2026-02-27 08:36:41] (step=0001600) Train Loss: 0.4854, Train Steps/Sec: 0.13, Epoch: 0.3130502837018196, LR: 0.0003 +[2026-02-27 08:36:49] (step=0001601) Train Loss: 0.4786, Train Steps/Sec: 0.13, Epoch: 0.31324594012913326, LR: 0.0003 +[2026-02-27 08:36:57] (step=0001602) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.3134415965564469, LR: 0.0003 +[2026-02-27 08:37:05] (step=0001603) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 0.3136372529837605, LR: 0.0003 +[2026-02-27 08:37:13] (step=0001604) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.31383290941107417, LR: 0.0003 +[2026-02-27 08:37:21] (step=0001605) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.3140285658383878, LR: 0.0003 +[2026-02-27 08:37:28] (step=0001606) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.3142242222657014, LR: 0.0003 +[2026-02-27 08:37:36] (step=0001607) Train Loss: 0.4693, Train Steps/Sec: 0.13, Epoch: 0.3144198786930151, LR: 0.0003 +[2026-02-27 08:37:44] (step=0001608) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.3146155351203287, LR: 0.0003 +[2026-02-27 08:37:52] (step=0001609) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 0.31481119154764237, LR: 0.0003 +[2026-02-27 08:38:00] (step=0001610) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 0.315006847974956, LR: 0.0003 +[2026-02-27 08:38:08] (step=0001611) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.3152025044022696, LR: 0.0003 +[2026-02-27 08:38:16] (step=0001612) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.3153981608295833, LR: 0.0003 +[2026-02-27 08:38:23] (step=0001613) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.3155938172568969, LR: 0.0003 +[2026-02-27 08:38:31] (step=0001614) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 0.3157894736842105, LR: 0.0003 +[2026-02-27 08:38:39] (step=0001615) Train Loss: 0.4737, Train Steps/Sec: 0.13, Epoch: 0.3159851301115242, LR: 0.0003 +[2026-02-27 08:38:47] (step=0001616) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.3161807865388378, LR: 0.0003 +[2026-02-27 08:38:55] (step=0001617) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.3163764429661514, LR: 0.0003 +[2026-02-27 08:39:03] (step=0001618) Train Loss: 0.4709, Train Steps/Sec: 0.13, Epoch: 0.3165720993934651, LR: 0.0003 +[2026-02-27 08:39:11] (step=0001619) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.3167677558207787, LR: 0.0003 +[2026-02-27 08:39:18] (step=0001620) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.3169634122480923, LR: 0.0003 +[2026-02-27 08:39:26] (step=0001621) Train Loss: 0.4754, Train Steps/Sec: 0.13, Epoch: 0.317159068675406, LR: 0.0003 +[2026-02-27 08:39:34] (step=0001622) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.3173547251027196, LR: 0.0003 +[2026-02-27 08:39:42] (step=0001623) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.3175503815300333, LR: 0.0003 +[2026-02-27 08:39:50] (step=0001624) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.3177460379573469, LR: 0.0003 +[2026-02-27 08:39:58] (step=0001625) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.3179416943846605, LR: 0.0003 +[2026-02-27 08:40:06] (step=0001626) Train Loss: 0.4747, Train Steps/Sec: 0.13, Epoch: 0.3181373508119742, LR: 0.0003 +[2026-02-27 08:40:13] (step=0001627) Train Loss: 0.4751, Train Steps/Sec: 0.13, Epoch: 0.3183330072392878, LR: 0.0003 +[2026-02-27 08:40:21] (step=0001628) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 0.31852866366660143, LR: 0.0003 +[2026-02-27 08:40:29] (step=0001629) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.3187243200939151, LR: 0.0003 +[2026-02-27 08:40:37] (step=0001630) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.3189199765212287, LR: 0.0003 +[2026-02-27 08:40:45] (step=0001631) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.31911563294854234, LR: 0.0003 +[2026-02-27 08:40:53] (step=0001632) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.319311289375856, LR: 0.0003 +[2026-02-27 08:41:01] (step=0001633) Train Loss: 0.4751, Train Steps/Sec: 0.13, Epoch: 0.31950694580316963, LR: 0.0003 +[2026-02-27 08:41:08] (step=0001634) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.31970260223048325, LR: 0.0003 +[2026-02-27 08:41:16] (step=0001635) Train Loss: 0.4755, Train Steps/Sec: 0.13, Epoch: 0.3198982586577969, LR: 0.0003 +[2026-02-27 08:41:24] (step=0001636) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.32009391508511054, LR: 0.0003 +[2026-02-27 08:41:32] (step=0001637) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.3202895715124242, LR: 0.0003 +[2026-02-27 08:41:40] (step=0001638) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 0.3204852279397378, LR: 0.0003 +[2026-02-27 08:41:48] (step=0001639) Train Loss: 0.4714, Train Steps/Sec: 0.13, Epoch: 0.32068088436705144, LR: 0.0003 +[2026-02-27 08:41:56] (step=0001640) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.3208765407943651, LR: 0.0003 +[2026-02-27 08:42:03] (step=0001641) Train Loss: 0.4714, Train Steps/Sec: 0.13, Epoch: 0.32107219722167873, LR: 0.0003 +[2026-02-27 08:42:11] (step=0001642) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.32126785364899235, LR: 0.0003 +[2026-02-27 08:42:19] (step=0001643) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.321463510076306, LR: 0.0003 +[2026-02-27 08:42:27] (step=0001644) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.32165916650361964, LR: 0.0003 +[2026-02-27 08:42:35] (step=0001645) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 0.32185482293093326, LR: 0.0003 +[2026-02-27 08:42:43] (step=0001646) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.32205047935824693, LR: 0.0003 +[2026-02-27 08:42:51] (step=0001647) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.32224613578556055, LR: 0.0003 +[2026-02-27 08:42:58] (step=0001648) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.32244179221287417, LR: 0.0003 +[2026-02-27 08:43:06] (step=0001649) Train Loss: 0.4698, Train Steps/Sec: 0.13, Epoch: 0.32263744864018784, LR: 0.0003 +[2026-02-27 08:43:14] (step=0001650) Train Loss: 0.4715, Train Steps/Sec: 0.12, Epoch: 0.32283310506750146, LR: 0.0003 +[2026-02-27 08:43:22] (step=0001651) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 0.32302876149481513, LR: 0.0003 +[2026-02-27 08:43:30] (step=0001652) Train Loss: 0.4629, Train Steps/Sec: 0.12, Epoch: 0.32322441792212875, LR: 0.0003 +[2026-02-27 08:43:38] (step=0001653) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.32342007434944237, LR: 0.0003 +[2026-02-27 08:43:46] (step=0001654) Train Loss: 0.4744, Train Steps/Sec: 0.13, Epoch: 0.32361573077675604, LR: 0.0003 +[2026-02-27 08:43:54] (step=0001655) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.32381138720406966, LR: 0.0003 +[2026-02-27 08:44:02] (step=0001656) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.3240070436313833, LR: 0.0003 +[2026-02-27 08:44:09] (step=0001657) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 0.32420270005869695, LR: 0.0003 +[2026-02-27 08:44:17] (step=0001658) Train Loss: 0.4792, Train Steps/Sec: 0.13, Epoch: 0.32439835648601056, LR: 0.0003 +[2026-02-27 08:44:25] (step=0001659) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.3245940129133242, LR: 0.0003 +[2026-02-27 08:44:33] (step=0001660) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.32478966934063785, LR: 0.0003 +[2026-02-27 08:44:41] (step=0001661) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 0.32498532576795147, LR: 0.0003 +[2026-02-27 08:44:49] (step=0001662) Train Loss: 0.4714, Train Steps/Sec: 0.13, Epoch: 0.3251809821952651, LR: 0.0003 +[2026-02-27 08:44:57] (step=0001663) Train Loss: 0.4749, Train Steps/Sec: 0.13, Epoch: 0.32537663862257876, LR: 0.0003 +[2026-02-27 08:45:04] (step=0001664) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 0.3255722950498924, LR: 0.0003 +[2026-02-27 08:45:12] (step=0001665) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.32576795147720605, LR: 0.0003 +[2026-02-27 08:45:20] (step=0001666) Train Loss: 0.4733, Train Steps/Sec: 0.13, Epoch: 0.32596360790451967, LR: 0.0003 +[2026-02-27 08:45:28] (step=0001667) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 0.3261592643318333, LR: 0.0003 +[2026-02-27 08:45:36] (step=0001668) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 0.32635492075914696, LR: 0.0003 +[2026-02-27 08:45:44] (step=0001669) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.3265505771864606, LR: 0.0003 +[2026-02-27 08:45:52] (step=0001670) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.3267462336137742, LR: 0.0003 +[2026-02-27 08:45:59] (step=0001671) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.32694189004108787, LR: 0.0003 +[2026-02-27 08:46:07] (step=0001672) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.3271375464684015, LR: 0.0003 +[2026-02-27 08:46:15] (step=0001673) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 0.3273332028957151, LR: 0.0003 +[2026-02-27 08:46:23] (step=0001674) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.3275288593230288, LR: 0.0003 +[2026-02-27 08:46:31] (step=0001675) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.3277245157503424, LR: 0.0003 +[2026-02-27 08:46:39] (step=0001676) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.327920172177656, LR: 0.0003 +[2026-02-27 08:46:47] (step=0001677) Train Loss: 0.4849, Train Steps/Sec: 0.13, Epoch: 0.3281158286049697, LR: 0.0003 +[2026-02-27 08:46:54] (step=0001678) Train Loss: 0.4693, Train Steps/Sec: 0.13, Epoch: 0.3283114850322833, LR: 0.0003 +[2026-02-27 08:47:02] (step=0001679) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.328507141459597, LR: 0.0003 +[2026-02-27 08:47:10] (step=0001680) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.3287027978869106, LR: 0.0003 +[2026-02-27 08:47:18] (step=0001681) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.3288984543142242, LR: 0.0003 +[2026-02-27 08:47:26] (step=0001682) Train Loss: 0.4727, Train Steps/Sec: 0.13, Epoch: 0.3290941107415379, LR: 0.0003 +[2026-02-27 08:47:34] (step=0001683) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.3292897671688515, LR: 0.0003 +[2026-02-27 08:47:42] (step=0001684) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.3294854235961651, LR: 0.0003 +[2026-02-27 08:47:49] (step=0001685) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.3296810800234788, LR: 0.0003 +[2026-02-27 08:47:57] (step=0001686) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.3298767364507924, LR: 0.0003 +[2026-02-27 08:48:05] (step=0001687) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.330072392878106, LR: 0.0003 +[2026-02-27 08:48:13] (step=0001688) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.3302680493054197, LR: 0.0003 +[2026-02-27 08:48:21] (step=0001689) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.3304637057327333, LR: 0.0003 +[2026-02-27 08:48:29] (step=0001690) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.33065936216004693, LR: 0.0003 +[2026-02-27 08:48:37] (step=0001691) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.3308550185873606, LR: 0.0003 +[2026-02-27 08:48:44] (step=0001692) Train Loss: 0.4735, Train Steps/Sec: 0.13, Epoch: 0.3310506750146742, LR: 0.0003 +[2026-02-27 08:48:52] (step=0001693) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.3312463314419879, LR: 0.0003 +[2026-02-27 08:49:00] (step=0001694) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.3314419878693015, LR: 0.0003 +[2026-02-27 08:49:08] (step=0001695) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 0.33163764429661513, LR: 0.0003 +[2026-02-27 08:49:16] (step=0001696) Train Loss: 0.4738, Train Steps/Sec: 0.13, Epoch: 0.3318333007239288, LR: 0.0003 +[2026-02-27 08:49:24] (step=0001697) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 0.3320289571512424, LR: 0.0003 +[2026-02-27 08:49:32] (step=0001698) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.33222461357855604, LR: 0.0003 +[2026-02-27 08:49:40] (step=0001699) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.3324202700058697, LR: 0.0003 +[2026-02-27 08:49:48] (step=0001700) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.33261592643318333, LR: 0.0003 +[2026-02-27 08:49:55] (step=0001701) Train Loss: 0.4721, Train Steps/Sec: 0.13, Epoch: 0.33281158286049695, LR: 0.0003 +[2026-02-27 08:50:03] (step=0001702) Train Loss: 0.4780, Train Steps/Sec: 0.13, Epoch: 0.3330072392878106, LR: 0.0003 +[2026-02-27 08:50:11] (step=0001703) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.33320289571512424, LR: 0.0003 +[2026-02-27 08:50:19] (step=0001704) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.33339855214243785, LR: 0.0003 +[2026-02-27 08:50:27] (step=0001705) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.3335942085697515, LR: 0.0003 +[2026-02-27 08:50:35] (step=0001706) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.33378986499706514, LR: 0.0003 +[2026-02-27 08:50:43] (step=0001707) Train Loss: 0.4732, Train Steps/Sec: 0.13, Epoch: 0.3339855214243788, LR: 0.0003 +[2026-02-27 08:50:50] (step=0001708) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.33418117785169243, LR: 0.0003 +[2026-02-27 08:50:58] (step=0001709) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 0.33437683427900605, LR: 0.0003 +[2026-02-27 08:51:06] (step=0001710) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 0.3345724907063197, LR: 0.0003 +[2026-02-27 08:51:14] (step=0001711) Train Loss: 0.4709, Train Steps/Sec: 0.13, Epoch: 0.33476814713363334, LR: 0.0003 +[2026-02-27 08:51:22] (step=0001712) Train Loss: 0.4726, Train Steps/Sec: 0.13, Epoch: 0.33496380356094696, LR: 0.0003 +[2026-02-27 08:51:30] (step=0001713) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.33515945998826063, LR: 0.0003 +[2026-02-27 08:51:38] (step=0001714) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 0.33535511641557425, LR: 0.0003 +[2026-02-27 08:51:45] (step=0001715) Train Loss: 0.4722, Train Steps/Sec: 0.13, Epoch: 0.33555077284288787, LR: 0.0003 +[2026-02-27 08:51:53] (step=0001716) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.33574642927020154, LR: 0.0003 +[2026-02-27 08:52:01] (step=0001717) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.33594208569751516, LR: 0.0003 +[2026-02-27 08:52:09] (step=0001718) Train Loss: 0.4732, Train Steps/Sec: 0.13, Epoch: 0.3361377421248288, LR: 0.0003 +[2026-02-27 08:52:17] (step=0001719) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 0.33633339855214245, LR: 0.0003 +[2026-02-27 08:52:25] (step=0001720) Train Loss: 0.4748, Train Steps/Sec: 0.13, Epoch: 0.33652905497945607, LR: 0.0003 +[2026-02-27 08:52:33] (step=0001721) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.33672471140676974, LR: 0.0003 +[2026-02-27 08:52:40] (step=0001722) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.33692036783408336, LR: 0.0003 +[2026-02-27 08:52:48] (step=0001723) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.337116024261397, LR: 0.0003 +[2026-02-27 08:52:56] (step=0001724) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 0.33731168068871065, LR: 0.0003 +[2026-02-27 08:53:04] (step=0001725) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.33750733711602426, LR: 0.0003 +[2026-02-27 08:53:12] (step=0001726) Train Loss: 0.4750, Train Steps/Sec: 0.13, Epoch: 0.3377029935433379, LR: 0.0003 +[2026-02-27 08:53:20] (step=0001727) Train Loss: 0.4730, Train Steps/Sec: 0.13, Epoch: 0.33789864997065155, LR: 0.0003 +[2026-02-27 08:53:28] (step=0001728) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.33809430639796517, LR: 0.0003 +[2026-02-27 08:53:35] (step=0001729) Train Loss: 0.4778, Train Steps/Sec: 0.13, Epoch: 0.3382899628252788, LR: 0.0003 +[2026-02-27 08:53:43] (step=0001730) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.33848561925259246, LR: 0.0003 +[2026-02-27 08:53:51] (step=0001731) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.3386812756799061, LR: 0.0003 +[2026-02-27 08:53:59] (step=0001732) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.3388769321072197, LR: 0.0003 +[2026-02-27 08:54:07] (step=0001733) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.33907258853453337, LR: 0.0003 +[2026-02-27 08:54:15] (step=0001734) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 0.339268244961847, LR: 0.0003 +[2026-02-27 08:54:23] (step=0001735) Train Loss: 0.4718, Train Steps/Sec: 0.13, Epoch: 0.33946390138916066, LR: 0.0003 +[2026-02-27 08:54:30] (step=0001736) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.3396595578164743, LR: 0.0003 +[2026-02-27 08:54:38] (step=0001737) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.3398552142437879, LR: 0.0003 +[2026-02-27 08:54:46] (step=0001738) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 0.34005087067110157, LR: 0.0003 +[2026-02-27 08:54:54] (step=0001739) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.3402465270984152, LR: 0.0003 +[2026-02-27 08:55:02] (step=0001740) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.3404421835257288, LR: 0.0003 +[2026-02-27 08:55:10] (step=0001741) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 0.3406378399530425, LR: 0.0003 +[2026-02-27 08:55:18] (step=0001742) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.3408334963803561, LR: 0.0003 +[2026-02-27 08:55:25] (step=0001743) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 0.3410291528076697, LR: 0.0003 +[2026-02-27 08:55:33] (step=0001744) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 0.3412248092349834, LR: 0.0003 +[2026-02-27 08:55:42] (step=0001745) Train Loss: 0.4661, Train Steps/Sec: 0.12, Epoch: 0.341420465662297, LR: 0.0003 +[2026-02-27 08:55:49] (step=0001746) Train Loss: 0.4752, Train Steps/Sec: 0.13, Epoch: 0.3416161220896106, LR: 0.0003 +[2026-02-27 08:55:57] (step=0001747) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.3418117785169243, LR: 0.0003 +[2026-02-27 08:56:05] (step=0001748) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 0.3420074349442379, LR: 0.0003 +[2026-02-27 08:56:13] (step=0001749) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.3422030913715516, LR: 0.0003 +[2026-02-27 08:56:21] (step=0001750) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.3423987477988652, LR: 0.0003 +[2026-02-27 08:56:29] (step=0001751) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.3425944042261788, LR: 0.0003 +[2026-02-27 08:56:36] (step=0001752) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 0.3427900606534925, LR: 0.0003 +[2026-02-27 08:56:44] (step=0001753) Train Loss: 0.4749, Train Steps/Sec: 0.13, Epoch: 0.3429857170808061, LR: 0.0003 +[2026-02-27 08:56:52] (step=0001754) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.3431813735081197, LR: 0.0003 +[2026-02-27 08:57:00] (step=0001755) Train Loss: 0.4764, Train Steps/Sec: 0.13, Epoch: 0.3433770299354334, LR: 0.0003 +[2026-02-27 08:57:08] (step=0001756) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 0.343572686362747, LR: 0.0003 +[2026-02-27 08:57:16] (step=0001757) Train Loss: 0.4763, Train Steps/Sec: 0.13, Epoch: 0.34376834279006063, LR: 0.0003 +[2026-02-27 08:57:24] (step=0001758) Train Loss: 0.4714, Train Steps/Sec: 0.13, Epoch: 0.3439639992173743, LR: 0.0003 +[2026-02-27 08:57:32] (step=0001759) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.3441596556446879, LR: 0.0003 +[2026-02-27 08:57:39] (step=0001760) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.34435531207200154, LR: 0.0003 +[2026-02-27 08:57:47] (step=0001761) Train Loss: 0.4728, Train Steps/Sec: 0.13, Epoch: 0.3445509684993152, LR: 0.0003 +[2026-02-27 08:57:55] (step=0001762) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.34474662492662883, LR: 0.0003 +[2026-02-27 08:58:03] (step=0001763) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 0.3449422813539425, LR: 0.0003 +[2026-02-27 08:58:11] (step=0001764) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.3451379377812561, LR: 0.0003 +[2026-02-27 08:58:19] (step=0001765) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.34533359420856974, LR: 0.0003 +[2026-02-27 08:58:27] (step=0001766) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.3455292506358834, LR: 0.0003 +[2026-02-27 08:58:34] (step=0001767) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.34572490706319703, LR: 0.0003 +[2026-02-27 08:58:42] (step=0001768) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.34592056349051065, LR: 0.0003 +[2026-02-27 08:58:50] (step=0001769) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.3461162199178243, LR: 0.0003 +[2026-02-27 08:58:58] (step=0001770) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 0.34631187634513794, LR: 0.0003 +[2026-02-27 08:59:06] (step=0001771) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.34650753277245155, LR: 0.0003 +[2026-02-27 08:59:14] (step=0001772) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.3467031891997652, LR: 0.0003 +[2026-02-27 08:59:22] (step=0001773) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.34689884562707884, LR: 0.0003 +[2026-02-27 08:59:29] (step=0001774) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.34709450205439246, LR: 0.0003 +[2026-02-27 08:59:37] (step=0001775) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.34729015848170613, LR: 0.0003 +[2026-02-27 08:59:45] (step=0001776) Train Loss: 0.4726, Train Steps/Sec: 0.13, Epoch: 0.34748581490901975, LR: 0.0003 +[2026-02-27 08:59:53] (step=0001777) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.3476814713363334, LR: 0.0003 +[2026-02-27 09:00:01] (step=0001778) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 0.34787712776364704, LR: 0.0003 +[2026-02-27 09:00:09] (step=0001779) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.34807278419096066, LR: 0.0003 +[2026-02-27 09:00:17] (step=0001780) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.34826844061827433, LR: 0.0003 +[2026-02-27 09:00:24] (step=0001781) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 0.34846409704558795, LR: 0.0003 +[2026-02-27 09:00:32] (step=0001782) Train Loss: 0.4747, Train Steps/Sec: 0.13, Epoch: 0.34865975347290157, LR: 0.0003 +[2026-02-27 09:00:40] (step=0001783) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 0.34885540990021524, LR: 0.0003 +[2026-02-27 09:00:48] (step=0001784) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 0.34905106632752886, LR: 0.0003 +[2026-02-27 09:00:56] (step=0001785) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.3492467227548425, LR: 0.0003 +[2026-02-27 09:01:04] (step=0001786) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.34944237918215615, LR: 0.0003 +[2026-02-27 09:01:12] (step=0001787) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 0.34963803560946977, LR: 0.0003 +[2026-02-27 09:01:19] (step=0001788) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 0.3498336920367834, LR: 0.0003 +[2026-02-27 09:01:27] (step=0001789) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.35002934846409706, LR: 0.0003 +[2026-02-27 09:01:35] (step=0001790) Train Loss: 0.4620, Train Steps/Sec: 0.12, Epoch: 0.3502250048914107, LR: 0.0003 +[2026-02-27 09:01:43] (step=0001791) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.35042066131872435, LR: 0.0003 +[2026-02-27 09:01:51] (step=0001792) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.35061631774603796, LR: 0.0003 +[2026-02-27 09:01:59] (step=0001793) Train Loss: 0.4730, Train Steps/Sec: 0.13, Epoch: 0.3508119741733516, LR: 0.0003 +[2026-02-27 09:02:07] (step=0001794) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.35100763060066525, LR: 0.0003 +[2026-02-27 09:02:15] (step=0001795) Train Loss: 0.4760, Train Steps/Sec: 0.13, Epoch: 0.35120328702797887, LR: 0.0003 +[2026-02-27 09:02:23] (step=0001796) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.3513989434552925, LR: 0.0003 +[2026-02-27 09:02:30] (step=0001797) Train Loss: 0.4803, Train Steps/Sec: 0.13, Epoch: 0.35159459988260616, LR: 0.0003 +[2026-02-27 09:02:38] (step=0001798) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.3517902563099198, LR: 0.0003 +[2026-02-27 09:02:46] (step=0001799) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.3519859127372334, LR: 0.0003 +[2026-02-27 09:02:54] (step=0001800) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.35218156916454707, LR: 0.0003 +[2026-02-27 09:03:02] (step=0001801) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.3523772255918607, LR: 0.0003 +[2026-02-27 09:03:10] (step=0001802) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.3525728820191743, LR: 0.0003 +[2026-02-27 09:03:18] (step=0001803) Train Loss: 0.4811, Train Steps/Sec: 0.13, Epoch: 0.352768538446488, LR: 0.0003 +[2026-02-27 09:03:25] (step=0001804) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.3529641948738016, LR: 0.0003 +[2026-02-27 09:03:33] (step=0001805) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.35315985130111527, LR: 0.0003 +[2026-02-27 09:03:41] (step=0001806) Train Loss: 0.4728, Train Steps/Sec: 0.13, Epoch: 0.3533555077284289, LR: 0.0003 +[2026-02-27 09:03:49] (step=0001807) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.3535511641557425, LR: 0.0003 +[2026-02-27 09:03:57] (step=0001808) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.3537468205830562, LR: 0.0003 +[2026-02-27 09:04:05] (step=0001809) Train Loss: 0.4818, Train Steps/Sec: 0.13, Epoch: 0.3539424770103698, LR: 0.0003 +[2026-02-27 09:04:13] (step=0001810) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.3541381334376834, LR: 0.0003 +[2026-02-27 09:04:20] (step=0001811) Train Loss: 0.4718, Train Steps/Sec: 0.13, Epoch: 0.3543337898649971, LR: 0.0003 +[2026-02-27 09:04:28] (step=0001812) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.3545294462923107, LR: 0.0003 +[2026-02-27 09:04:36] (step=0001813) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.3547251027196243, LR: 0.0003 +[2026-02-27 09:04:44] (step=0001814) Train Loss: 0.4746, Train Steps/Sec: 0.13, Epoch: 0.354920759146938, LR: 0.0003 +[2026-02-27 09:04:52] (step=0001815) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.3551164155742516, LR: 0.0003 +[2026-02-27 09:05:00] (step=0001816) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.3553120720015652, LR: 0.0003 +[2026-02-27 09:05:08] (step=0001817) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.3555077284288789, LR: 0.0003 +[2026-02-27 09:05:15] (step=0001818) Train Loss: 0.4719, Train Steps/Sec: 0.13, Epoch: 0.3557033848561925, LR: 0.0003 +[2026-02-27 09:05:23] (step=0001819) Train Loss: 0.4761, Train Steps/Sec: 0.13, Epoch: 0.3558990412835062, LR: 0.0003 +[2026-02-27 09:05:31] (step=0001820) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 0.3560946977108198, LR: 0.0003 +[2026-02-27 09:05:39] (step=0001821) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.3562903541381334, LR: 0.0003 +[2026-02-27 09:05:47] (step=0001822) Train Loss: 0.4717, Train Steps/Sec: 0.13, Epoch: 0.3564860105654471, LR: 0.0003 +[2026-02-27 09:05:55] (step=0001823) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.3566816669927607, LR: 0.0003 +[2026-02-27 09:06:03] (step=0001824) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.35687732342007433, LR: 0.0003 +[2026-02-27 09:06:11] (step=0001825) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 0.357072979847388, LR: 0.0003 +[2026-02-27 09:06:18] (step=0001826) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.3572686362747016, LR: 0.0003 +[2026-02-27 09:06:26] (step=0001827) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.35746429270201524, LR: 0.0003 +[2026-02-27 09:06:34] (step=0001828) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.3576599491293289, LR: 0.0003 +[2026-02-27 09:06:42] (step=0001829) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.35785560555664253, LR: 0.0003 +[2026-02-27 09:06:50] (step=0001830) Train Loss: 0.4763, Train Steps/Sec: 0.13, Epoch: 0.35805126198395615, LR: 0.0003 +[2026-02-27 09:06:58] (step=0001831) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.3582469184112698, LR: 0.0003 +[2026-02-27 09:07:06] (step=0001832) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.35844257483858344, LR: 0.0003 +[2026-02-27 09:07:13] (step=0001833) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.3586382312658971, LR: 0.0003 +[2026-02-27 09:07:21] (step=0001834) Train Loss: 0.4768, Train Steps/Sec: 0.13, Epoch: 0.35883388769321073, LR: 0.0003 +[2026-02-27 09:07:29] (step=0001835) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.35902954412052435, LR: 0.0003 +[2026-02-27 09:07:37] (step=0001836) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 0.359225200547838, LR: 0.0003 +[2026-02-27 09:07:45] (step=0001837) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.35942085697515164, LR: 0.0003 +[2026-02-27 09:07:53] (step=0001838) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.35961651340246525, LR: 0.0003 +[2026-02-27 09:08:01] (step=0001839) Train Loss: 0.4775, Train Steps/Sec: 0.12, Epoch: 0.3598121698297789, LR: 0.0003 +[2026-02-27 09:08:09] (step=0001840) Train Loss: 0.4722, Train Steps/Sec: 0.12, Epoch: 0.36000782625709254, LR: 0.0003 +[2026-02-27 09:08:17] (step=0001841) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.36020348268440616, LR: 0.0003 +[2026-02-27 09:08:25] (step=0001842) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 0.36039913911171984, LR: 0.0003 +[2026-02-27 09:08:32] (step=0001843) Train Loss: 0.4771, Train Steps/Sec: 0.13, Epoch: 0.36059479553903345, LR: 0.0003 +[2026-02-27 09:08:40] (step=0001844) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.36079045196634707, LR: 0.0003 +[2026-02-27 09:08:48] (step=0001845) Train Loss: 0.4833, Train Steps/Sec: 0.13, Epoch: 0.36098610839366074, LR: 0.0003 +[2026-02-27 09:08:56] (step=0001846) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.36118176482097436, LR: 0.0003 +[2026-02-27 09:09:04] (step=0001847) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 0.36137742124828803, LR: 0.0003 +[2026-02-27 09:09:12] (step=0001848) Train Loss: 0.4726, Train Steps/Sec: 0.13, Epoch: 0.36157307767560165, LR: 0.0003 +[2026-02-27 09:09:20] (step=0001849) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.36176873410291527, LR: 0.0003 +[2026-02-27 09:09:27] (step=0001850) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.36196439053022894, LR: 0.0003 +[2026-02-27 09:09:35] (step=0001851) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.36216004695754256, LR: 0.0003 +[2026-02-27 09:09:43] (step=0001852) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 0.3623557033848562, LR: 0.0003 +[2026-02-27 09:09:51] (step=0001853) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.36255135981216985, LR: 0.0003 +[2026-02-27 09:09:59] (step=0001854) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.36274701623948347, LR: 0.0003 +[2026-02-27 09:10:07] (step=0001855) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.3629426726667971, LR: 0.0003 +[2026-02-27 09:10:15] (step=0001856) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.36313832909411076, LR: 0.0003 +[2026-02-27 09:10:23] (step=0001857) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.3633339855214244, LR: 0.0003 +[2026-02-27 09:10:30] (step=0001858) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.363529641948738, LR: 0.0003 +[2026-02-27 09:10:38] (step=0001859) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.36372529837605166, LR: 0.0003 +[2026-02-27 09:10:46] (step=0001860) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.3639209548033653, LR: 0.0003 +[2026-02-27 09:10:54] (step=0001861) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.36411661123067895, LR: 0.0003 +[2026-02-27 09:11:02] (step=0001862) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.3643122676579926, LR: 0.0003 +[2026-02-27 09:11:10] (step=0001863) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.3645079240853062, LR: 0.0003 +[2026-02-27 09:11:18] (step=0001864) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.36470358051261986, LR: 0.0003 +[2026-02-27 09:11:25] (step=0001865) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.3648992369399335, LR: 0.0003 +[2026-02-27 09:11:33] (step=0001866) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.3650948933672471, LR: 0.0003 +[2026-02-27 09:11:41] (step=0001867) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.36529054979456077, LR: 0.0003 +[2026-02-27 09:11:49] (step=0001868) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.3654862062218744, LR: 0.0003 +[2026-02-27 09:11:57] (step=0001869) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.365681862649188, LR: 0.0003 +[2026-02-27 09:12:05] (step=0001870) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.3658775190765017, LR: 0.0003 +[2026-02-27 09:12:13] (step=0001871) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.3660731755038153, LR: 0.0003 +[2026-02-27 09:12:20] (step=0001872) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 0.3662688319311289, LR: 0.0003 +[2026-02-27 09:12:28] (step=0001873) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.3664644883584426, LR: 0.0003 +[2026-02-27 09:12:36] (step=0001874) Train Loss: 0.4718, Train Steps/Sec: 0.13, Epoch: 0.3666601447857562, LR: 0.0003 +[2026-02-27 09:12:44] (step=0001875) Train Loss: 0.4763, Train Steps/Sec: 0.13, Epoch: 0.3668558012130699, LR: 0.0003 +[2026-02-27 09:12:52] (step=0001876) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 0.3670514576403835, LR: 0.0003 +[2026-02-27 09:13:00] (step=0001877) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 0.3672471140676971, LR: 0.0003 +[2026-02-27 09:13:08] (step=0001878) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.3674427704950108, LR: 0.0003 +[2026-02-27 09:13:15] (step=0001879) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.3676384269223244, LR: 0.0003 +[2026-02-27 09:13:23] (step=0001880) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.367834083349638, LR: 0.0003 +[2026-02-27 09:13:31] (step=0001881) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.3680297397769517, LR: 0.0003 +[2026-02-27 09:13:39] (step=0001882) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.3682253962042653, LR: 0.0003 +[2026-02-27 09:13:47] (step=0001883) Train Loss: 0.4718, Train Steps/Sec: 0.13, Epoch: 0.3684210526315789, LR: 0.0003 +[2026-02-27 09:13:55] (step=0001884) Train Loss: 0.4757, Train Steps/Sec: 0.13, Epoch: 0.3686167090588926, LR: 0.0003 +[2026-02-27 09:14:03] (step=0001885) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.3688123654862062, LR: 0.0003 +[2026-02-27 09:14:10] (step=0001886) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 0.36900802191351983, LR: 0.0003 +[2026-02-27 09:14:18] (step=0001887) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.3692036783408335, LR: 0.0003 +[2026-02-27 09:14:26] (step=0001888) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.3693993347681471, LR: 0.0003 +[2026-02-27 09:14:34] (step=0001889) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.3695949911954608, LR: 0.0003 +[2026-02-27 09:14:42] (step=0001890) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.3697906476227744, LR: 0.0003 +[2026-02-27 09:14:50] (step=0001891) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 0.36998630405008803, LR: 0.0003 +[2026-02-27 09:14:58] (step=0001892) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.3701819604774017, LR: 0.0003 +[2026-02-27 09:15:06] (step=0001893) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 0.3703776169047153, LR: 0.0003 +[2026-02-27 09:15:14] (step=0001894) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.37057327333202894, LR: 0.0003 +[2026-02-27 09:15:21] (step=0001895) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.3707689297593426, LR: 0.0003 +[2026-02-27 09:15:29] (step=0001896) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 0.37096458618665623, LR: 0.0003 +[2026-02-27 09:15:37] (step=0001897) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.37116024261396985, LR: 0.0003 +[2026-02-27 09:15:45] (step=0001898) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.3713558990412835, LR: 0.0003 +[2026-02-27 09:15:53] (step=0001899) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 0.37155155546859714, LR: 0.0003 +[2026-02-27 09:16:01] (step=0001900) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 0.37174721189591076, LR: 0.0003 +[2026-02-27 09:16:09] (step=0001901) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.37194286832322443, LR: 0.0003 +[2026-02-27 09:16:16] (step=0001902) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.37213852475053805, LR: 0.0003 +[2026-02-27 09:16:24] (step=0001903) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.3723341811778517, LR: 0.0003 +[2026-02-27 09:16:32] (step=0001904) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.37252983760516534, LR: 0.0003 +[2026-02-27 09:16:40] (step=0001905) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.37272549403247895, LR: 0.0003 +[2026-02-27 09:16:48] (step=0001906) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.3729211504597926, LR: 0.0003 +[2026-02-27 09:16:56] (step=0001907) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.37311680688710624, LR: 0.0003 +[2026-02-27 09:17:04] (step=0001908) Train Loss: 0.4748, Train Steps/Sec: 0.13, Epoch: 0.37331246331441986, LR: 0.0003 +[2026-02-27 09:17:11] (step=0001909) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 0.37350811974173354, LR: 0.0003 +[2026-02-27 09:17:19] (step=0001910) Train Loss: 0.4760, Train Steps/Sec: 0.13, Epoch: 0.37370377616904715, LR: 0.0003 +[2026-02-27 09:17:27] (step=0001911) Train Loss: 0.4739, Train Steps/Sec: 0.13, Epoch: 0.37389943259636077, LR: 0.0003 +[2026-02-27 09:17:35] (step=0001912) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.37409508902367444, LR: 0.0003 +[2026-02-27 09:17:43] (step=0001913) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.37429074545098806, LR: 0.0003 +[2026-02-27 09:17:51] (step=0001914) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.3744864018783017, LR: 0.0003 +[2026-02-27 09:17:59] (step=0001915) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.37468205830561535, LR: 0.0003 +[2026-02-27 09:18:06] (step=0001916) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.37487771473292897, LR: 0.0003 +[2026-02-27 09:18:14] (step=0001917) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.37507337116024264, LR: 0.0003 +[2026-02-27 09:18:22] (step=0001918) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.37526902758755626, LR: 0.0003 +[2026-02-27 09:18:30] (step=0001919) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.3754646840148699, LR: 0.0003 +[2026-02-27 09:18:38] (step=0001920) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 0.37566034044218355, LR: 0.0003 +[2026-02-27 09:18:46] (step=0001921) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.37585599686949717, LR: 0.0003 +[2026-02-27 09:18:54] (step=0001922) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.3760516532968108, LR: 0.0003 +[2026-02-27 09:19:01] (step=0001923) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.37624730972412446, LR: 0.0003 +[2026-02-27 09:19:09] (step=0001924) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.3764429661514381, LR: 0.0003 +[2026-02-27 09:19:17] (step=0001925) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.3766386225787517, LR: 0.0003 +[2026-02-27 09:19:25] (step=0001926) Train Loss: 0.4774, Train Steps/Sec: 0.13, Epoch: 0.37683427900606536, LR: 0.0003 +[2026-02-27 09:19:33] (step=0001927) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.377029935433379, LR: 0.0003 +[2026-02-27 09:19:41] (step=0001928) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.3772255918606926, LR: 0.0003 +[2026-02-27 09:19:49] (step=0001929) Train Loss: 0.4738, Train Steps/Sec: 0.13, Epoch: 0.3774212482880063, LR: 0.0003 +[2026-02-27 09:19:56] (step=0001930) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 0.3776169047153199, LR: 0.0003 +[2026-02-27 09:20:04] (step=0001931) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.37781256114263356, LR: 0.0003 +[2026-02-27 09:20:12] (step=0001932) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 0.3780082175699472, LR: 0.0003 +[2026-02-27 09:20:20] (step=0001933) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.3782038739972608, LR: 0.0003 +[2026-02-27 09:20:28] (step=0001934) Train Loss: 0.4605, Train Steps/Sec: 0.12, Epoch: 0.37839953042457447, LR: 0.0003 +[2026-02-27 09:20:36] (step=0001935) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.3785951868518881, LR: 0.0003 +[2026-02-27 09:20:44] (step=0001936) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.3787908432792017, LR: 0.0003 +[2026-02-27 09:20:52] (step=0001937) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.3789864997065154, LR: 0.0003 +[2026-02-27 09:21:00] (step=0001938) Train Loss: 0.4626, Train Steps/Sec: 0.12, Epoch: 0.379182156133829, LR: 0.0003 +[2026-02-27 09:21:08] (step=0001939) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.3793778125611426, LR: 0.0003 +[2026-02-27 09:21:15] (step=0001940) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.3795734689884563, LR: 0.0003 +[2026-02-27 09:21:23] (step=0001941) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.3797691254157699, LR: 0.0003 +[2026-02-27 09:21:31] (step=0001942) Train Loss: 0.4713, Train Steps/Sec: 0.13, Epoch: 0.3799647818430835, LR: 0.0003 +[2026-02-27 09:21:39] (step=0001943) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 0.3801604382703972, LR: 0.0003 +[2026-02-27 09:21:47] (step=0001944) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.3803560946977108, LR: 0.0003 +[2026-02-27 09:21:55] (step=0001945) Train Loss: 0.4756, Train Steps/Sec: 0.13, Epoch: 0.3805517511250245, LR: 0.0003 +[2026-02-27 09:22:03] (step=0001946) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 0.3807474075523381, LR: 0.0003 +[2026-02-27 09:22:10] (step=0001947) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.3809430639796517, LR: 0.0003 +[2026-02-27 09:22:18] (step=0001948) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 0.3811387204069654, LR: 0.0003 +[2026-02-27 09:22:26] (step=0001949) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.381334376834279, LR: 0.0003 +[2026-02-27 09:22:34] (step=0001950) Train Loss: 0.4801, Train Steps/Sec: 0.13, Epoch: 0.3815300332615926, LR: 0.0003 +[2026-02-27 09:22:42] (step=0001951) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.3817256896889063, LR: 0.0003 +[2026-02-27 09:22:50] (step=0001952) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.3819213461162199, LR: 0.0003 +[2026-02-27 09:22:58] (step=0001953) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.38211700254353353, LR: 0.0003 +[2026-02-27 09:23:05] (step=0001954) Train Loss: 0.4728, Train Steps/Sec: 0.13, Epoch: 0.3823126589708472, LR: 0.0003 +[2026-02-27 09:23:13] (step=0001955) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.3825083153981608, LR: 0.0003 +[2026-02-27 09:23:21] (step=0001956) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.38270397182547444, LR: 0.0003 +[2026-02-27 09:23:29] (step=0001957) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.3828996282527881, LR: 0.0003 +[2026-02-27 09:23:37] (step=0001958) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 0.38309528468010173, LR: 0.0003 +[2026-02-27 09:23:45] (step=0001959) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.3832909411074154, LR: 0.0003 +[2026-02-27 09:23:53] (step=0001960) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.383486597534729, LR: 0.0003 +[2026-02-27 09:24:00] (step=0001961) Train Loss: 0.4754, Train Steps/Sec: 0.13, Epoch: 0.38368225396204264, LR: 0.0003 +[2026-02-27 09:24:08] (step=0001962) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.3838779103893563, LR: 0.0003 +[2026-02-27 09:24:16] (step=0001963) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.38407356681666993, LR: 0.0003 +[2026-02-27 09:24:24] (step=0001964) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.38426922324398355, LR: 0.0003 +[2026-02-27 09:24:32] (step=0001965) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.3844648796712972, LR: 0.0003 +[2026-02-27 09:24:40] (step=0001966) Train Loss: 0.4727, Train Steps/Sec: 0.13, Epoch: 0.38466053609861084, LR: 0.0003 +[2026-02-27 09:24:48] (step=0001967) Train Loss: 0.4722, Train Steps/Sec: 0.13, Epoch: 0.38485619252592446, LR: 0.0003 +[2026-02-27 09:24:56] (step=0001968) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.38505184895323813, LR: 0.0003 +[2026-02-27 09:25:03] (step=0001969) Train Loss: 0.4722, Train Steps/Sec: 0.13, Epoch: 0.38524750538055175, LR: 0.0003 +[2026-02-27 09:25:11] (step=0001970) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.38544316180786536, LR: 0.0003 +[2026-02-27 09:25:19] (step=0001971) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.38563881823517904, LR: 0.0003 +[2026-02-27 09:25:27] (step=0001972) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.38583447466249265, LR: 0.0003 +[2026-02-27 09:25:35] (step=0001973) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.3860301310898063, LR: 0.0003 +[2026-02-27 09:25:43] (step=0001974) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.38622578751711995, LR: 0.0003 +[2026-02-27 09:25:51] (step=0001975) Train Loss: 0.4727, Train Steps/Sec: 0.13, Epoch: 0.38642144394443356, LR: 0.0003 +[2026-02-27 09:25:58] (step=0001976) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.38661710037174724, LR: 0.0003 +[2026-02-27 09:26:06] (step=0001977) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.38681275679906085, LR: 0.0003 +[2026-02-27 09:26:14] (step=0001978) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.38700841322637447, LR: 0.0003 +[2026-02-27 09:26:22] (step=0001979) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 0.38720406965368814, LR: 0.0003 +[2026-02-27 09:26:30] (step=0001980) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.38739972608100176, LR: 0.0003 +[2026-02-27 09:26:38] (step=0001981) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.3875953825083154, LR: 0.0003 +[2026-02-27 09:26:46] (step=0001982) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.38779103893562905, LR: 0.0003 +[2026-02-27 09:26:54] (step=0001983) Train Loss: 0.4696, Train Steps/Sec: 0.12, Epoch: 0.38798669536294267, LR: 0.0003 +[2026-02-27 09:27:01] (step=0001984) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 0.3881823517902563, LR: 0.0003 +[2026-02-27 09:27:09] (step=0001985) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 0.38837800821756996, LR: 0.0003 +[2026-02-27 09:27:17] (step=0001986) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 0.3885736646448836, LR: 0.0003 +[2026-02-27 09:27:25] (step=0001987) Train Loss: 0.4758, Train Steps/Sec: 0.13, Epoch: 0.38876932107219725, LR: 0.0003 +[2026-02-27 09:27:33] (step=0001988) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.38896497749951087, LR: 0.0003 +[2026-02-27 09:27:41] (step=0001989) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.3891606339268245, LR: 0.0003 +[2026-02-27 09:27:49] (step=0001990) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 0.38935629035413816, LR: 0.0003 +[2026-02-27 09:27:57] (step=0001991) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.3895519467814518, LR: 0.0003 +[2026-02-27 09:28:04] (step=0001992) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.3897476032087654, LR: 0.0003 +[2026-02-27 09:28:12] (step=0001993) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.38994325963607906, LR: 0.0003 +[2026-02-27 09:28:20] (step=0001994) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.3901389160633927, LR: 0.0003 +[2026-02-27 09:28:28] (step=0001995) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 0.3903345724907063, LR: 0.0003 +[2026-02-27 09:28:36] (step=0001996) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.39053022891802, LR: 0.0003 +[2026-02-27 09:28:44] (step=0001997) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.3907258853453336, LR: 0.0003 +[2026-02-27 09:28:52] (step=0001998) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.3909215417726472, LR: 0.0003 +[2026-02-27 09:28:59] (step=0001999) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.3911171981999609, LR: 0.0003 +[2026-02-27 09:29:07] (step=0002000) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.3913128546272745, LR: 0.0003 +[2026-02-27 09:29:07] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0002000/ +[2026-02-27 09:29:15] (step=0002001) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.39150851105458817, LR: 0.0003 +[2026-02-27 09:29:23] (step=0002002) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 0.3917041674819018, LR: 0.0003 +[2026-02-27 09:29:31] (step=0002003) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.3918998239092154, LR: 0.0003 +[2026-02-27 09:29:39] (step=0002004) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.3920954803365291, LR: 0.0003 +[2026-02-27 09:29:47] (step=0002005) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.3922911367638427, LR: 0.0003 +[2026-02-27 09:29:55] (step=0002006) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.3924867931911563, LR: 0.0003 +[2026-02-27 09:30:02] (step=0002007) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 0.39268244961847, LR: 0.0003 +[2026-02-27 09:30:10] (step=0002008) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.3928781060457836, LR: 0.0003 +[2026-02-27 09:30:18] (step=0002009) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.3930737624730972, LR: 0.0003 +[2026-02-27 09:30:26] (step=0002010) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.3932694189004109, LR: 0.0003 +[2026-02-27 09:30:34] (step=0002011) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 0.3934650753277245, LR: 0.0003 +[2026-02-27 09:30:42] (step=0002012) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.39366073175503813, LR: 0.0003 +[2026-02-27 09:30:50] (step=0002013) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.3938563881823518, LR: 0.0003 +[2026-02-27 09:30:57] (step=0002014) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.3940520446096654, LR: 0.0003 +[2026-02-27 09:31:05] (step=0002015) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 0.3942477010369791, LR: 0.0003 +[2026-02-27 09:31:13] (step=0002016) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.3944433574642927, LR: 0.0003 +[2026-02-27 09:31:21] (step=0002017) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.3946390138916063, LR: 0.0003 +[2026-02-27 09:31:29] (step=0002018) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.39483467031892, LR: 0.0003 +[2026-02-27 09:31:37] (step=0002019) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 0.3950303267462336, LR: 0.0003 +[2026-02-27 09:31:45] (step=0002020) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.39522598317354724, LR: 0.0003 +[2026-02-27 09:31:52] (step=0002021) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.3954216396008609, LR: 0.0003 +[2026-02-27 09:32:00] (step=0002022) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.3956172960281745, LR: 0.0003 +[2026-02-27 09:32:08] (step=0002023) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 0.39581295245548814, LR: 0.0003 +[2026-02-27 09:32:16] (step=0002024) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.3960086088828018, LR: 0.0003 +[2026-02-27 09:32:24] (step=0002025) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 0.39620426531011543, LR: 0.0003 +[2026-02-27 09:32:32] (step=0002026) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.39639992173742905, LR: 0.0003 +[2026-02-27 09:32:40] (step=0002027) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.3965955781647427, LR: 0.0003 +[2026-02-27 09:32:48] (step=0002028) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.39679123459205634, LR: 0.0003 +[2026-02-27 09:32:55] (step=0002029) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.39698689101937, LR: 0.0003 +[2026-02-27 09:33:03] (step=0002030) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.39718254744668363, LR: 0.0003 +[2026-02-27 09:33:11] (step=0002031) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.39737820387399725, LR: 0.0003 +[2026-02-27 09:33:19] (step=0002032) Train Loss: 0.4719, Train Steps/Sec: 0.13, Epoch: 0.3975738603013109, LR: 0.0003 +[2026-02-27 09:33:27] (step=0002033) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.39776951672862454, LR: 0.0003 +[2026-02-27 09:33:35] (step=0002034) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.39796517315593816, LR: 0.0003 +[2026-02-27 09:33:43] (step=0002035) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.39816082958325183, LR: 0.0003 +[2026-02-27 09:33:51] (step=0002036) Train Loss: 0.4737, Train Steps/Sec: 0.13, Epoch: 0.39835648601056545, LR: 0.0003 +[2026-02-27 09:33:58] (step=0002037) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.39855214243787906, LR: 0.0003 +[2026-02-27 09:34:06] (step=0002038) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.39874779886519274, LR: 0.0003 +[2026-02-27 09:34:14] (step=0002039) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.39894345529250635, LR: 0.0003 +[2026-02-27 09:34:22] (step=0002040) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.39913911171982, LR: 0.0003 +[2026-02-27 09:34:30] (step=0002041) Train Loss: 0.4743, Train Steps/Sec: 0.13, Epoch: 0.39933476814713365, LR: 0.0003 +[2026-02-27 09:34:38] (step=0002042) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.39953042457444726, LR: 0.0003 +[2026-02-27 09:34:46] (step=0002043) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.39972608100176094, LR: 0.0003 +[2026-02-27 09:34:53] (step=0002044) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.39992173742907455, LR: 0.0003 +[2026-02-27 09:35:01] (step=0002045) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.40011739385638817, LR: 0.0003 +[2026-02-27 09:35:09] (step=0002046) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.40031305028370184, LR: 0.0003 +[2026-02-27 09:35:17] (step=0002047) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.40050870671101546, LR: 0.0003 +[2026-02-27 09:35:25] (step=0002048) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.4007043631383291, LR: 0.0003 +[2026-02-27 09:35:33] (step=0002049) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.40090001956564275, LR: 0.0003 +[2026-02-27 09:35:41] (step=0002050) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.40109567599295637, LR: 0.0003 +[2026-02-27 09:35:49] (step=0002051) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.40129133242027, LR: 0.0003 +[2026-02-27 09:35:56] (step=0002052) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.40148698884758366, LR: 0.0003 +[2026-02-27 09:36:04] (step=0002053) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.4016826452748973, LR: 0.0003 +[2026-02-27 09:36:12] (step=0002054) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.4018783017022109, LR: 0.0003 +[2026-02-27 09:36:20] (step=0002055) Train Loss: 0.4790, Train Steps/Sec: 0.13, Epoch: 0.40207395812952457, LR: 0.0003 +[2026-02-27 09:36:28] (step=0002056) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.4022696145568382, LR: 0.0003 +[2026-02-27 09:36:36] (step=0002057) Train Loss: 0.4727, Train Steps/Sec: 0.13, Epoch: 0.4024652709841518, LR: 0.0003 +[2026-02-27 09:36:44] (step=0002058) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.4026609274114655, LR: 0.0003 +[2026-02-27 09:36:51] (step=0002059) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.4028565838387791, LR: 0.0003 +[2026-02-27 09:36:59] (step=0002060) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.40305224026609276, LR: 0.0003 +[2026-02-27 09:37:07] (step=0002061) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.4032478966934064, LR: 0.0003 +[2026-02-27 09:37:15] (step=0002062) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.40344355312072, LR: 0.0003 +[2026-02-27 09:37:23] (step=0002063) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 0.4036392095480337, LR: 0.0003 +[2026-02-27 09:37:31] (step=0002064) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.4038348659753473, LR: 0.0003 +[2026-02-27 09:37:39] (step=0002065) Train Loss: 0.4761, Train Steps/Sec: 0.13, Epoch: 0.4040305224026609, LR: 0.0003 +[2026-02-27 09:37:46] (step=0002066) Train Loss: 0.4698, Train Steps/Sec: 0.13, Epoch: 0.4042261788299746, LR: 0.0003 +[2026-02-27 09:37:54] (step=0002067) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.4044218352572882, LR: 0.0003 +[2026-02-27 09:38:02] (step=0002068) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.4046174916846018, LR: 0.0003 +[2026-02-27 09:38:10] (step=0002069) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.4048131481119155, LR: 0.0003 +[2026-02-27 09:38:18] (step=0002070) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.4050088045392291, LR: 0.0003 +[2026-02-27 09:38:26] (step=0002071) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.4052044609665427, LR: 0.0003 +[2026-02-27 09:38:34] (step=0002072) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.4054001173938564, LR: 0.0003 +[2026-02-27 09:38:42] (step=0002073) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.40559577382117, LR: 0.0003 +[2026-02-27 09:38:49] (step=0002074) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 0.4057914302484837, LR: 0.0003 +[2026-02-27 09:38:57] (step=0002075) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.4059870866757973, LR: 0.0003 +[2026-02-27 09:39:05] (step=0002076) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.4061827431031109, LR: 0.0003 +[2026-02-27 09:39:13] (step=0002077) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.4063783995304246, LR: 0.0003 +[2026-02-27 09:39:21] (step=0002078) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 0.4065740559577382, LR: 0.0003 +[2026-02-27 09:39:29] (step=0002079) Train Loss: 0.4717, Train Steps/Sec: 0.13, Epoch: 0.40676971238505183, LR: 0.0003 +[2026-02-27 09:39:37] (step=0002080) Train Loss: 0.4864, Train Steps/Sec: 0.13, Epoch: 0.4069653688123655, LR: 0.0003 +[2026-02-27 09:39:45] (step=0002081) Train Loss: 0.4814, Train Steps/Sec: 0.13, Epoch: 0.4071610252396791, LR: 0.0003 +[2026-02-27 09:39:52] (step=0002082) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.40735668166699274, LR: 0.0003 +[2026-02-27 09:40:00] (step=0002083) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.4075523380943064, LR: 0.0003 +[2026-02-27 09:40:08] (step=0002084) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.40774799452162, LR: 0.0003 +[2026-02-27 09:40:16] (step=0002085) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.40794365094893364, LR: 0.0003 +[2026-02-27 09:40:24] (step=0002086) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 0.4081393073762473, LR: 0.0003 +[2026-02-27 09:40:32] (step=0002087) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.40833496380356094, LR: 0.0003 +[2026-02-27 09:40:40] (step=0002088) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.4085306202308746, LR: 0.0003 +[2026-02-27 09:40:47] (step=0002089) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.4087262766581882, LR: 0.0003 +[2026-02-27 09:40:55] (step=0002090) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.40892193308550184, LR: 0.0003 +[2026-02-27 09:41:03] (step=0002091) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.4091175895128155, LR: 0.0003 +[2026-02-27 09:41:11] (step=0002092) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.40931324594012913, LR: 0.0003 +[2026-02-27 09:41:19] (step=0002093) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 0.40950890236744275, LR: 0.0003 +[2026-02-27 09:41:27] (step=0002094) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.4097045587947564, LR: 0.0003 +[2026-02-27 09:41:35] (step=0002095) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.40990021522207004, LR: 0.0003 +[2026-02-27 09:41:42] (step=0002096) Train Loss: 0.4751, Train Steps/Sec: 0.13, Epoch: 0.41009587164938366, LR: 0.0003 +[2026-02-27 09:41:50] (step=0002097) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.41029152807669733, LR: 0.0003 +[2026-02-27 09:41:58] (step=0002098) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.41048718450401095, LR: 0.0003 +[2026-02-27 09:42:06] (step=0002099) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.41068284093132457, LR: 0.0003 +[2026-02-27 09:42:14] (step=0002100) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.41087849735863824, LR: 0.0003 +[2026-02-27 09:42:22] (step=0002101) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 0.41107415378595186, LR: 0.0003 +[2026-02-27 09:42:30] (step=0002102) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 0.41126981021326553, LR: 0.0003 +[2026-02-27 09:42:37] (step=0002103) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.41146546664057915, LR: 0.0003 +[2026-02-27 09:42:45] (step=0002104) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.41166112306789276, LR: 0.0003 +[2026-02-27 09:42:53] (step=0002105) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.41185677949520644, LR: 0.0003 +[2026-02-27 09:43:01] (step=0002106) Train Loss: 0.4744, Train Steps/Sec: 0.13, Epoch: 0.41205243592252006, LR: 0.0003 +[2026-02-27 09:43:09] (step=0002107) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.4122480923498337, LR: 0.0003 +[2026-02-27 09:43:17] (step=0002108) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 0.41244374877714735, LR: 0.0003 +[2026-02-27 09:43:25] (step=0002109) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.41263940520446096, LR: 0.0003 +[2026-02-27 09:43:32] (step=0002110) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 0.4128350616317746, LR: 0.0003 +[2026-02-27 09:43:40] (step=0002111) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.41303071805908825, LR: 0.0003 +[2026-02-27 09:43:48] (step=0002112) Train Loss: 0.4741, Train Steps/Sec: 0.13, Epoch: 0.41322637448640187, LR: 0.0003 +[2026-02-27 09:43:56] (step=0002113) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.4134220309137155, LR: 0.0003 +[2026-02-27 09:44:04] (step=0002114) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.41361768734102916, LR: 0.0003 +[2026-02-27 09:44:12] (step=0002115) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.4138133437683428, LR: 0.0003 +[2026-02-27 09:44:20] (step=0002116) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.41400900019565645, LR: 0.0003 +[2026-02-27 09:44:27] (step=0002117) Train Loss: 0.4738, Train Steps/Sec: 0.13, Epoch: 0.41420465662297007, LR: 0.0003 +[2026-02-27 09:44:35] (step=0002118) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.4144003130502837, LR: 0.0003 +[2026-02-27 09:44:43] (step=0002119) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.41459596947759736, LR: 0.0003 +[2026-02-27 09:44:51] (step=0002120) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.414791625904911, LR: 0.0003 +[2026-02-27 09:44:59] (step=0002121) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.4149872823322246, LR: 0.0003 +[2026-02-27 09:45:07] (step=0002122) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.41518293875953827, LR: 0.0003 +[2026-02-27 09:45:15] (step=0002123) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 0.4153785951868519, LR: 0.0003 +[2026-02-27 09:45:22] (step=0002124) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.4155742516141655, LR: 0.0003 +[2026-02-27 09:45:30] (step=0002125) Train Loss: 0.4752, Train Steps/Sec: 0.13, Epoch: 0.4157699080414792, LR: 0.0003 +[2026-02-27 09:45:38] (step=0002126) Train Loss: 0.4669, Train Steps/Sec: 0.12, Epoch: 0.4159655644687928, LR: 0.0003 +[2026-02-27 09:45:46] (step=0002127) Train Loss: 0.4757, Train Steps/Sec: 0.13, Epoch: 0.4161612208961064, LR: 0.0003 +[2026-02-27 09:45:54] (step=0002128) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.4163568773234201, LR: 0.0003 +[2026-02-27 09:46:02] (step=0002129) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 0.4165525337507337, LR: 0.0003 +[2026-02-27 09:46:10] (step=0002130) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.4167481901780474, LR: 0.0003 +[2026-02-27 09:46:18] (step=0002131) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 0.416943846605361, LR: 0.0003 +[2026-02-27 09:46:26] (step=0002132) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.4171395030326746, LR: 0.0003 +[2026-02-27 09:46:33] (step=0002133) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.4173351594599883, LR: 0.0003 +[2026-02-27 09:46:41] (step=0002134) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.4175308158873019, LR: 0.0003 +[2026-02-27 09:46:49] (step=0002135) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.4177264723146155, LR: 0.0003 +[2026-02-27 09:46:57] (step=0002136) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.4179221287419292, LR: 0.0003 +[2026-02-27 09:47:05] (step=0002137) Train Loss: 0.4718, Train Steps/Sec: 0.13, Epoch: 0.4181177851692428, LR: 0.0003 +[2026-02-27 09:47:13] (step=0002138) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 0.4183134415965564, LR: 0.0003 +[2026-02-27 09:47:21] (step=0002139) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 0.4185090980238701, LR: 0.0003 +[2026-02-27 09:47:28] (step=0002140) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.4187047544511837, LR: 0.0003 +[2026-02-27 09:47:36] (step=0002141) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.41890041087849733, LR: 0.0003 +[2026-02-27 09:47:44] (step=0002142) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.419096067305811, LR: 0.0003 +[2026-02-27 09:47:52] (step=0002143) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.4192917237331246, LR: 0.0003 +[2026-02-27 09:48:00] (step=0002144) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 0.4194873801604383, LR: 0.0003 +[2026-02-27 09:48:08] (step=0002145) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.4196830365877519, LR: 0.0003 +[2026-02-27 09:48:16] (step=0002146) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.41987869301506553, LR: 0.0003 +[2026-02-27 09:48:23] (step=0002147) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.4200743494423792, LR: 0.0003 +[2026-02-27 09:48:31] (step=0002148) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.4202700058696928, LR: 0.0003 +[2026-02-27 09:48:39] (step=0002149) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.42046566229700644, LR: 0.0003 +[2026-02-27 09:48:47] (step=0002150) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 0.4206613187243201, LR: 0.0003 +[2026-02-27 09:48:55] (step=0002151) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.4208569751516337, LR: 0.0003 +[2026-02-27 09:49:03] (step=0002152) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.42105263157894735, LR: 0.0003 +[2026-02-27 09:49:11] (step=0002153) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.421248288006261, LR: 0.0003 +[2026-02-27 09:49:18] (step=0002154) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.42144394443357464, LR: 0.0003 +[2026-02-27 09:49:26] (step=0002155) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.42163960086088825, LR: 0.0003 +[2026-02-27 09:49:34] (step=0002156) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.4218352572882019, LR: 0.0003 +[2026-02-27 09:49:42] (step=0002157) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.42203091371551554, LR: 0.0003 +[2026-02-27 09:49:50] (step=0002158) Train Loss: 0.4733, Train Steps/Sec: 0.13, Epoch: 0.4222265701428292, LR: 0.0003 +[2026-02-27 09:49:58] (step=0002159) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.42242222657014283, LR: 0.0003 +[2026-02-27 09:50:06] (step=0002160) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 0.42261788299745645, LR: 0.0003 +[2026-02-27 09:50:13] (step=0002161) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.4228135394247701, LR: 0.0003 +[2026-02-27 09:50:21] (step=0002162) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.42300919585208374, LR: 0.0003 +[2026-02-27 09:50:29] (step=0002163) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 0.42320485227939736, LR: 0.0003 +[2026-02-27 09:50:37] (step=0002164) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.42340050870671103, LR: 0.0003 +[2026-02-27 09:50:45] (step=0002165) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.42359616513402465, LR: 0.0003 +[2026-02-27 09:50:53] (step=0002166) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 0.42379182156133827, LR: 0.0003 +[2026-02-27 09:51:01] (step=0002167) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.42398747798865194, LR: 0.0003 +[2026-02-27 09:51:08] (step=0002168) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.42418313441596556, LR: 0.0003 +[2026-02-27 09:51:16] (step=0002169) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.4243787908432792, LR: 0.0003 +[2026-02-27 09:51:24] (step=0002170) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.42457444727059285, LR: 0.0003 +[2026-02-27 09:51:32] (step=0002171) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.42477010369790646, LR: 0.0003 +[2026-02-27 09:51:40] (step=0002172) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.42496576012522014, LR: 0.0003 +[2026-02-27 09:51:48] (step=0002173) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 0.42516141655253376, LR: 0.0003 +[2026-02-27 09:51:56] (step=0002174) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.4253570729798474, LR: 0.0003 +[2026-02-27 09:52:03] (step=0002175) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.42555272940716105, LR: 0.0003 +[2026-02-27 09:52:11] (step=0002176) Train Loss: 0.4561, Train Steps/Sec: 0.12, Epoch: 0.42574838583447466, LR: 0.0003 +[2026-02-27 09:52:19] (step=0002177) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 0.4259440422617883, LR: 0.0003 +[2026-02-27 09:52:27] (step=0002178) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.42613969868910195, LR: 0.0003 +[2026-02-27 09:52:35] (step=0002179) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.42633535511641557, LR: 0.0003 +[2026-02-27 09:52:43] (step=0002180) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.4265310115437292, LR: 0.0003 +[2026-02-27 09:52:51] (step=0002181) Train Loss: 0.4552, Train Steps/Sec: 0.12, Epoch: 0.42672666797104286, LR: 0.0003 +[2026-02-27 09:52:59] (step=0002182) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.4269223243983565, LR: 0.0003 +[2026-02-27 09:53:07] (step=0002183) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.4271179808256701, LR: 0.0003 +[2026-02-27 09:53:14] (step=0002184) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.42731363725298377, LR: 0.0003 +[2026-02-27 09:53:22] (step=0002185) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 0.4275092936802974, LR: 0.0003 +[2026-02-27 09:53:30] (step=0002186) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.42770495010761106, LR: 0.0003 +[2026-02-27 09:53:38] (step=0002187) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.4279006065349247, LR: 0.0003 +[2026-02-27 09:53:46] (step=0002188) Train Loss: 0.4735, Train Steps/Sec: 0.13, Epoch: 0.4280962629622383, LR: 0.0003 +[2026-02-27 09:53:54] (step=0002189) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.42829191938955197, LR: 0.0003 +[2026-02-27 09:54:02] (step=0002190) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.4284875758168656, LR: 0.0003 +[2026-02-27 09:54:10] (step=0002191) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 0.4286832322441792, LR: 0.0003 +[2026-02-27 09:54:17] (step=0002192) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 0.4288788886714929, LR: 0.0003 +[2026-02-27 09:54:25] (step=0002193) Train Loss: 0.4722, Train Steps/Sec: 0.13, Epoch: 0.4290745450988065, LR: 0.0003 +[2026-02-27 09:54:33] (step=0002194) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.4292702015261201, LR: 0.0003 +[2026-02-27 09:54:41] (step=0002195) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.4294658579534338, LR: 0.0003 +[2026-02-27 09:54:49] (step=0002196) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.4296615143807474, LR: 0.0003 +[2026-02-27 09:54:57] (step=0002197) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.429857170808061, LR: 0.0003 +[2026-02-27 09:55:05] (step=0002198) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.4300528272353747, LR: 0.0003 +[2026-02-27 09:55:12] (step=0002199) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.4302484836626883, LR: 0.0003 +[2026-02-27 09:55:20] (step=0002200) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.430444140090002, LR: 0.0003 +[2026-02-27 09:55:28] (step=0002201) Train Loss: 0.4769, Train Steps/Sec: 0.13, Epoch: 0.4306397965173156, LR: 0.0003 +[2026-02-27 09:55:36] (step=0002202) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.4308354529446292, LR: 0.0003 +[2026-02-27 09:55:44] (step=0002203) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 0.4310311093719429, LR: 0.0003 +[2026-02-27 09:55:52] (step=0002204) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.4312267657992565, LR: 0.0003 +[2026-02-27 09:56:00] (step=0002205) Train Loss: 0.4726, Train Steps/Sec: 0.13, Epoch: 0.4314224222265701, LR: 0.0003 +[2026-02-27 09:56:07] (step=0002206) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.4316180786538838, LR: 0.0003 +[2026-02-27 09:56:15] (step=0002207) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.4318137350811974, LR: 0.0003 +[2026-02-27 09:56:23] (step=0002208) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.43200939150851103, LR: 0.0003 +[2026-02-27 09:56:31] (step=0002209) Train Loss: 0.4775, Train Steps/Sec: 0.13, Epoch: 0.4322050479358247, LR: 0.0003 +[2026-02-27 09:56:39] (step=0002210) Train Loss: 0.4756, Train Steps/Sec: 0.13, Epoch: 0.4324007043631383, LR: 0.0003 +[2026-02-27 09:56:47] (step=0002211) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.43259636079045194, LR: 0.0003 +[2026-02-27 09:56:55] (step=0002212) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.4327920172177656, LR: 0.0003 +[2026-02-27 09:57:02] (step=0002213) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 0.43298767364507923, LR: 0.0003 +[2026-02-27 09:57:10] (step=0002214) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.4331833300723929, LR: 0.0003 +[2026-02-27 09:57:18] (step=0002215) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.4333789864997065, LR: 0.0003 +[2026-02-27 09:57:26] (step=0002216) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.43357464292702014, LR: 0.0003 +[2026-02-27 09:57:34] (step=0002217) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.4337702993543338, LR: 0.0003 +[2026-02-27 09:57:42] (step=0002218) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.43396595578164743, LR: 0.0003 +[2026-02-27 09:57:50] (step=0002219) Train Loss: 0.4769, Train Steps/Sec: 0.13, Epoch: 0.43416161220896105, LR: 0.0003 +[2026-02-27 09:57:58] (step=0002220) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 0.4343572686362747, LR: 0.0003 +[2026-02-27 09:58:05] (step=0002221) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.43455292506358834, LR: 0.0003 +[2026-02-27 09:58:13] (step=0002222) Train Loss: 0.4743, Train Steps/Sec: 0.13, Epoch: 0.43474858149090195, LR: 0.0003 +[2026-02-27 09:58:21] (step=0002223) Train Loss: 0.4702, Train Steps/Sec: 0.13, Epoch: 0.4349442379182156, LR: 0.0003 +[2026-02-27 09:58:29] (step=0002224) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.43513989434552924, LR: 0.0003 +[2026-02-27 09:58:37] (step=0002225) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.43533555077284286, LR: 0.0003 +[2026-02-27 09:58:45] (step=0002226) Train Loss: 0.4637, Train Steps/Sec: 0.12, Epoch: 0.43553120720015653, LR: 0.0003 +[2026-02-27 09:58:53] (step=0002227) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.43572686362747015, LR: 0.0003 +[2026-02-27 09:59:01] (step=0002228) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.4359225200547838, LR: 0.0003 +[2026-02-27 09:59:09] (step=0002229) Train Loss: 0.4552, Train Steps/Sec: 0.12, Epoch: 0.43611817648209744, LR: 0.0003 +[2026-02-27 09:59:16] (step=0002230) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.43631383290941106, LR: 0.0003 +[2026-02-27 09:59:24] (step=0002231) Train Loss: 0.4741, Train Steps/Sec: 0.13, Epoch: 0.43650948933672473, LR: 0.0003 +[2026-02-27 09:59:32] (step=0002232) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.43670514576403835, LR: 0.0003 +[2026-02-27 09:59:40] (step=0002233) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 0.43690080219135197, LR: 0.0003 +[2026-02-27 09:59:48] (step=0002234) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.43709645861866564, LR: 0.0003 +[2026-02-27 09:59:56] (step=0002235) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.43729211504597926, LR: 0.0003 +[2026-02-27 10:00:04] (step=0002236) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.4374877714732929, LR: 0.0003 +[2026-02-27 10:00:11] (step=0002237) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 0.43768342790060655, LR: 0.0003 +[2026-02-27 10:00:19] (step=0002238) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 0.43787908432792016, LR: 0.0003 +[2026-02-27 10:00:27] (step=0002239) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.4380747407552338, LR: 0.0003 +[2026-02-27 10:00:35] (step=0002240) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.43827039718254746, LR: 0.0003 +[2026-02-27 10:00:43] (step=0002241) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.4384660536098611, LR: 0.0003 +[2026-02-27 10:00:51] (step=0002242) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.43866171003717475, LR: 0.0003 +[2026-02-27 10:00:59] (step=0002243) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.43885736646448836, LR: 0.0003 +[2026-02-27 10:01:07] (step=0002244) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.439053022891802, LR: 0.0003 +[2026-02-27 10:01:14] (step=0002245) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.43924867931911565, LR: 0.0003 +[2026-02-27 10:01:22] (step=0002246) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.43944433574642927, LR: 0.0003 +[2026-02-27 10:01:30] (step=0002247) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 0.4396399921737429, LR: 0.0003 +[2026-02-27 10:01:38] (step=0002248) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.43983564860105656, LR: 0.0003 +[2026-02-27 10:01:46] (step=0002249) Train Loss: 0.4746, Train Steps/Sec: 0.13, Epoch: 0.4400313050283702, LR: 0.0003 +[2026-02-27 10:01:54] (step=0002250) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.4402269614556838, LR: 0.0003 +[2026-02-27 10:02:02] (step=0002251) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 0.44042261788299747, LR: 0.0003 +[2026-02-27 10:02:09] (step=0002252) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.4406182743103111, LR: 0.0003 +[2026-02-27 10:02:17] (step=0002253) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.4408139307376247, LR: 0.0003 +[2026-02-27 10:02:25] (step=0002254) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 0.4410095871649384, LR: 0.0003 +[2026-02-27 10:02:33] (step=0002255) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.441205243592252, LR: 0.0003 +[2026-02-27 10:02:41] (step=0002256) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.44140090001956567, LR: 0.0003 +[2026-02-27 10:02:49] (step=0002257) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.4415965564468793, LR: 0.0003 +[2026-02-27 10:02:57] (step=0002258) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.4417922128741929, LR: 0.0003 +[2026-02-27 10:03:04] (step=0002259) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.4419878693015066, LR: 0.0003 +[2026-02-27 10:03:12] (step=0002260) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.4421835257288202, LR: 0.0003 +[2026-02-27 10:03:20] (step=0002261) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.4423791821561338, LR: 0.0003 +[2026-02-27 10:03:28] (step=0002262) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.4425748385834475, LR: 0.0003 +[2026-02-27 10:03:36] (step=0002263) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.4427704950107611, LR: 0.0003 +[2026-02-27 10:03:44] (step=0002264) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 0.4429661514380747, LR: 0.0003 +[2026-02-27 10:03:52] (step=0002265) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 0.4431618078653884, LR: 0.0003 +[2026-02-27 10:04:00] (step=0002266) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.443357464292702, LR: 0.0003 +[2026-02-27 10:04:07] (step=0002267) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 0.4435531207200156, LR: 0.0003 +[2026-02-27 10:04:15] (step=0002268) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.4437487771473293, LR: 0.0003 +[2026-02-27 10:04:23] (step=0002269) Train Loss: 0.4769, Train Steps/Sec: 0.13, Epoch: 0.4439444335746429, LR: 0.0003 +[2026-02-27 10:04:31] (step=0002270) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.4441400900019566, LR: 0.0003 +[2026-02-27 10:04:39] (step=0002271) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.4443357464292702, LR: 0.0003 +[2026-02-27 10:04:47] (step=0002272) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.4445314028565838, LR: 0.0003 +[2026-02-27 10:04:54] (step=0002273) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.4447270592838975, LR: 0.0003 +[2026-02-27 10:05:02] (step=0002274) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.4449227157112111, LR: 0.0003 +[2026-02-27 10:05:10] (step=0002275) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.44511837213852473, LR: 0.0003 +[2026-02-27 10:05:18] (step=0002276) Train Loss: 0.4790, Train Steps/Sec: 0.12, Epoch: 0.4453140285658384, LR: 0.0003 +[2026-02-27 10:05:26] (step=0002277) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.445509684993152, LR: 0.0003 +[2026-02-27 10:05:34] (step=0002278) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.44570534142046564, LR: 0.0003 +[2026-02-27 10:05:42] (step=0002279) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.4459009978477793, LR: 0.0003 +[2026-02-27 10:05:50] (step=0002280) Train Loss: 0.4661, Train Steps/Sec: 0.12, Epoch: 0.44609665427509293, LR: 0.0003 +[2026-02-27 10:05:58] (step=0002281) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.44629231070240655, LR: 0.0003 +[2026-02-27 10:06:06] (step=0002282) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.4464879671297202, LR: 0.0003 +[2026-02-27 10:06:13] (step=0002283) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.44668362355703384, LR: 0.0003 +[2026-02-27 10:06:21] (step=0002284) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.4468792799843475, LR: 0.0003 +[2026-02-27 10:06:29] (step=0002285) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.44707493641166113, LR: 0.0003 +[2026-02-27 10:06:37] (step=0002286) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.44727059283897475, LR: 0.0003 +[2026-02-27 10:06:45] (step=0002287) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.4474662492662884, LR: 0.0003 +[2026-02-27 10:06:53] (step=0002288) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 0.44766190569360204, LR: 0.0003 +[2026-02-27 10:07:01] (step=0002289) Train Loss: 0.4767, Train Steps/Sec: 0.13, Epoch: 0.44785756212091565, LR: 0.0003 +[2026-02-27 10:07:08] (step=0002290) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.4480532185482293, LR: 0.0003 +[2026-02-27 10:07:16] (step=0002291) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.44824887497554294, LR: 0.0003 +[2026-02-27 10:07:24] (step=0002292) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 0.44844453140285656, LR: 0.0003 +[2026-02-27 10:07:32] (step=0002293) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 0.44864018783017023, LR: 0.0003 +[2026-02-27 10:07:40] (step=0002294) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.44883584425748385, LR: 0.0003 +[2026-02-27 10:07:48] (step=0002295) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.44903150068479747, LR: 0.0003 +[2026-02-27 10:07:56] (step=0002296) Train Loss: 0.4703, Train Steps/Sec: 0.13, Epoch: 0.44922715711211114, LR: 0.0003 +[2026-02-27 10:08:03] (step=0002297) Train Loss: 0.4748, Train Steps/Sec: 0.13, Epoch: 0.44942281353942476, LR: 0.0003 +[2026-02-27 10:08:11] (step=0002298) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 0.44961846996673843, LR: 0.0003 +[2026-02-27 10:08:19] (step=0002299) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.44981412639405205, LR: 0.0003 +[2026-02-27 10:08:27] (step=0002300) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.45000978282136567, LR: 0.0003 +[2026-02-27 10:08:40] (step=0002301) Train Loss: 0.4627, Train Steps/Sec: 0.08, Epoch: 0.45020543924867934, LR: 0.0003 +[2026-02-27 10:08:54] (step=0002302) Train Loss: 0.4740, Train Steps/Sec: 0.07, Epoch: 0.45040109567599296, LR: 0.0003 +[2026-02-27 10:09:05] (step=0002303) Train Loss: 0.4682, Train Steps/Sec: 0.09, Epoch: 0.4505967521033066, LR: 0.0003 +[2026-02-27 10:09:13] (step=0002304) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.45079240853062025, LR: 0.0003 +[2026-02-27 10:09:21] (step=0002305) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.45098806495793387, LR: 0.0003 +[2026-02-27 10:09:28] (step=0002306) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.4511837213852475, LR: 0.0003 +[2026-02-27 10:09:36] (step=0002307) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.45137937781256116, LR: 0.0003 +[2026-02-27 10:09:44] (step=0002308) Train Loss: 0.4785, Train Steps/Sec: 0.13, Epoch: 0.4515750342398748, LR: 0.0003 +[2026-02-27 10:09:52] (step=0002309) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.4517706906671884, LR: 0.0003 +[2026-02-27 10:10:00] (step=0002310) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.45196634709450206, LR: 0.0003 +[2026-02-27 10:10:08] (step=0002311) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 0.4521620035218157, LR: 0.0003 +[2026-02-27 10:10:16] (step=0002312) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 0.45235765994912935, LR: 0.0003 +[2026-02-27 10:10:23] (step=0002313) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.45255331637644297, LR: 0.0003 +[2026-02-27 10:10:31] (step=0002314) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.4527489728037566, LR: 0.0003 +[2026-02-27 10:10:39] (step=0002315) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 0.45294462923107026, LR: 0.0003 +[2026-02-27 10:10:47] (step=0002316) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.4531402856583839, LR: 0.0003 +[2026-02-27 10:10:55] (step=0002317) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.4533359420856975, LR: 0.0003 +[2026-02-27 10:11:03] (step=0002318) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 0.45353159851301117, LR: 0.0003 +[2026-02-27 10:11:11] (step=0002319) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.4537272549403248, LR: 0.0003 +[2026-02-27 10:11:18] (step=0002320) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.4539229113676384, LR: 0.0003 +[2026-02-27 10:11:26] (step=0002321) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.4541185677949521, LR: 0.0003 +[2026-02-27 10:11:34] (step=0002322) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.4543142242222657, LR: 0.0003 +[2026-02-27 10:11:42] (step=0002323) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 0.4545098806495793, LR: 0.0003 +[2026-02-27 10:11:50] (step=0002324) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.454705537076893, LR: 0.0003 +[2026-02-27 10:11:58] (step=0002325) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.4549011935042066, LR: 0.0003 +[2026-02-27 10:12:06] (step=0002326) Train Loss: 0.4741, Train Steps/Sec: 0.13, Epoch: 0.4550968499315203, LR: 0.0003 +[2026-02-27 10:12:14] (step=0002327) Train Loss: 0.4659, Train Steps/Sec: 0.12, Epoch: 0.4552925063588339, LR: 0.0003 +[2026-02-27 10:12:22] (step=0002328) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.4554881627861475, LR: 0.0003 +[2026-02-27 10:12:29] (step=0002329) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 0.4556838192134612, LR: 0.0003 +[2026-02-27 10:12:37] (step=0002330) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.4558794756407748, LR: 0.0003 +[2026-02-27 10:12:45] (step=0002331) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 0.4560751320680884, LR: 0.0003 +[2026-02-27 10:12:53] (step=0002332) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.4562707884954021, LR: 0.0003 +[2026-02-27 10:13:01] (step=0002333) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.4564664449227157, LR: 0.0003 +[2026-02-27 10:13:09] (step=0002334) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.4566621013500293, LR: 0.0003 +[2026-02-27 10:13:17] (step=0002335) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.456857757777343, LR: 0.0003 +[2026-02-27 10:13:24] (step=0002336) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.4570534142046566, LR: 0.0003 +[2026-02-27 10:13:32] (step=0002337) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.45724907063197023, LR: 0.0003 +[2026-02-27 10:13:40] (step=0002338) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.4574447270592839, LR: 0.0003 +[2026-02-27 10:13:48] (step=0002339) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.4576403834865975, LR: 0.0003 +[2026-02-27 10:13:56] (step=0002340) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.4578360399139112, LR: 0.0003 +[2026-02-27 10:14:04] (step=0002341) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.4580316963412248, LR: 0.0003 +[2026-02-27 10:14:12] (step=0002342) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.45822735276853843, LR: 0.0003 +[2026-02-27 10:14:19] (step=0002343) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.4584230091958521, LR: 0.0003 +[2026-02-27 10:14:27] (step=0002344) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.4586186656231657, LR: 0.0003 +[2026-02-27 10:14:35] (step=0002345) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.45881432205047934, LR: 0.0003 +[2026-02-27 10:14:43] (step=0002346) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.459009978477793, LR: 0.0003 +[2026-02-27 10:14:51] (step=0002347) Train Loss: 0.4730, Train Steps/Sec: 0.13, Epoch: 0.45920563490510663, LR: 0.0003 +[2026-02-27 10:14:59] (step=0002348) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 0.45940129133242025, LR: 0.0003 +[2026-02-27 10:15:07] (step=0002349) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.4595969477597339, LR: 0.0003 +[2026-02-27 10:15:14] (step=0002350) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.45979260418704754, LR: 0.0003 +[2026-02-27 10:15:22] (step=0002351) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.45998826061436116, LR: 0.0003 +[2026-02-27 10:15:30] (step=0002352) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.46018391704167483, LR: 0.0003 +[2026-02-27 10:15:38] (step=0002353) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.46037957346898845, LR: 0.0003 +[2026-02-27 10:15:46] (step=0002354) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.4605752298963021, LR: 0.0003 +[2026-02-27 10:15:54] (step=0002355) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.46077088632361574, LR: 0.0003 +[2026-02-27 10:16:02] (step=0002356) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.46096654275092935, LR: 0.0003 +[2026-02-27 10:16:09] (step=0002357) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 0.461162199178243, LR: 0.0003 +[2026-02-27 10:16:17] (step=0002358) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.46135785560555664, LR: 0.0003 +[2026-02-27 10:16:25] (step=0002359) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 0.46155351203287026, LR: 0.0003 +[2026-02-27 10:16:33] (step=0002360) Train Loss: 0.4733, Train Steps/Sec: 0.13, Epoch: 0.46174916846018393, LR: 0.0003 +[2026-02-27 10:16:41] (step=0002361) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.46194482488749755, LR: 0.0003 +[2026-02-27 10:16:49] (step=0002362) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.46214048131481117, LR: 0.0003 +[2026-02-27 10:16:57] (step=0002363) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.46233613774212484, LR: 0.0003 +[2026-02-27 10:17:04] (step=0002364) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.46253179416943846, LR: 0.0003 +[2026-02-27 10:17:12] (step=0002365) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.4627274505967521, LR: 0.0003 +[2026-02-27 10:17:20] (step=0002366) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.46292310702406575, LR: 0.0003 +[2026-02-27 10:17:28] (step=0002367) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.46311876345137937, LR: 0.0003 +[2026-02-27 10:17:36] (step=0002368) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.46331441987869304, LR: 0.0003 +[2026-02-27 10:17:44] (step=0002369) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.46351007630600666, LR: 0.0003 +[2026-02-27 10:17:52] (step=0002370) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.4637057327333203, LR: 0.0003 +[2026-02-27 10:17:59] (step=0002371) Train Loss: 0.4734, Train Steps/Sec: 0.13, Epoch: 0.46390138916063395, LR: 0.0003 +[2026-02-27 10:18:07] (step=0002372) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.46409704558794757, LR: 0.0003 +[2026-02-27 10:18:15] (step=0002373) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.4642927020152612, LR: 0.0003 +[2026-02-27 10:18:23] (step=0002374) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.46448835844257486, LR: 0.0003 +[2026-02-27 10:18:31] (step=0002375) Train Loss: 0.4522, Train Steps/Sec: 0.12, Epoch: 0.4646840148698885, LR: 0.0003 +[2026-02-27 10:18:39] (step=0002376) Train Loss: 0.4740, Train Steps/Sec: 0.13, Epoch: 0.4648796712972021, LR: 0.0003 +[2026-02-27 10:18:47] (step=0002377) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.46507532772451576, LR: 0.0003 +[2026-02-27 10:18:55] (step=0002378) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 0.4652709841518294, LR: 0.0003 +[2026-02-27 10:19:03] (step=0002379) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.465466640579143, LR: 0.0003 +[2026-02-27 10:19:10] (step=0002380) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.46566229700645667, LR: 0.0003 +[2026-02-27 10:19:18] (step=0002381) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.4658579534337703, LR: 0.0003 +[2026-02-27 10:19:26] (step=0002382) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.46605360986108396, LR: 0.0003 +[2026-02-27 10:19:34] (step=0002383) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.4662492662883976, LR: 0.0003 +[2026-02-27 10:19:42] (step=0002384) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.4664449227157112, LR: 0.0003 +[2026-02-27 10:19:50] (step=0002385) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.46664057914302487, LR: 0.0003 +[2026-02-27 10:19:58] (step=0002386) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.4668362355703385, LR: 0.0003 +[2026-02-27 10:20:05] (step=0002387) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.4670318919976521, LR: 0.0003 +[2026-02-27 10:20:13] (step=0002388) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.4672275484249658, LR: 0.0003 +[2026-02-27 10:20:21] (step=0002389) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.4674232048522794, LR: 0.0003 +[2026-02-27 10:20:29] (step=0002390) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 0.467618861279593, LR: 0.0003 +[2026-02-27 10:20:37] (step=0002391) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.4678145177069067, LR: 0.0003 +[2026-02-27 10:20:45] (step=0002392) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 0.4680101741342203, LR: 0.0003 +[2026-02-27 10:20:53] (step=0002393) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.4682058305615339, LR: 0.0003 +[2026-02-27 10:21:00] (step=0002394) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.4684014869888476, LR: 0.0003 +[2026-02-27 10:21:08] (step=0002395) Train Loss: 0.4709, Train Steps/Sec: 0.13, Epoch: 0.4685971434161612, LR: 0.0003 +[2026-02-27 10:21:16] (step=0002396) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.4687927998434749, LR: 0.0003 +[2026-02-27 10:21:24] (step=0002397) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.4689884562707885, LR: 0.0003 +[2026-02-27 10:21:32] (step=0002398) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.4691841126981021, LR: 0.0003 +[2026-02-27 10:21:40] (step=0002399) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.4693797691254158, LR: 0.0003 +[2026-02-27 10:21:48] (step=0002400) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.4695754255527294, LR: 0.0003 +[2026-02-27 10:21:55] (step=0002401) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.469771081980043, LR: 0.0003 +[2026-02-27 10:22:03] (step=0002402) Train Loss: 0.4717, Train Steps/Sec: 0.13, Epoch: 0.4699667384073567, LR: 0.0003 +[2026-02-27 10:22:11] (step=0002403) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.4701623948346703, LR: 0.0003 +[2026-02-27 10:22:19] (step=0002404) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 0.47035805126198393, LR: 0.0003 +[2026-02-27 10:22:27] (step=0002405) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.4705537076892976, LR: 0.0003 +[2026-02-27 10:22:35] (step=0002406) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.4707493641166112, LR: 0.0003 +[2026-02-27 10:22:43] (step=0002407) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.47094502054392484, LR: 0.0003 +[2026-02-27 10:22:50] (step=0002408) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.4711406769712385, LR: 0.0003 +[2026-02-27 10:22:58] (step=0002409) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.47133633339855213, LR: 0.0003 +[2026-02-27 10:23:06] (step=0002410) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.4715319898258658, LR: 0.0003 +[2026-02-27 10:23:14] (step=0002411) Train Loss: 0.4730, Train Steps/Sec: 0.13, Epoch: 0.4717276462531794, LR: 0.0003 +[2026-02-27 10:23:22] (step=0002412) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 0.47192330268049304, LR: 0.0003 +[2026-02-27 10:23:30] (step=0002413) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 0.4721189591078067, LR: 0.0003 +[2026-02-27 10:23:37] (step=0002414) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.47231461553512033, LR: 0.0003 +[2026-02-27 10:23:45] (step=0002415) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.47251027196243395, LR: 0.0003 +[2026-02-27 10:23:53] (step=0002416) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 0.4727059283897476, LR: 0.0003 +[2026-02-27 10:24:01] (step=0002417) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.47290158481706124, LR: 0.0003 +[2026-02-27 10:24:09] (step=0002418) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.47309724124437486, LR: 0.0003 +[2026-02-27 10:24:17] (step=0002419) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.47329289767168853, LR: 0.0003 +[2026-02-27 10:24:25] (step=0002420) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 0.47348855409900215, LR: 0.0003 +[2026-02-27 10:24:32] (step=0002421) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.47368421052631576, LR: 0.0003 +[2026-02-27 10:24:40] (step=0002422) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.47387986695362944, LR: 0.0003 +[2026-02-27 10:24:48] (step=0002423) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 0.47407552338094305, LR: 0.0003 +[2026-02-27 10:24:56] (step=0002424) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.4742711798082567, LR: 0.0003 +[2026-02-27 10:25:04] (step=0002425) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.47446683623557034, LR: 0.0003 +[2026-02-27 10:25:12] (step=0002426) Train Loss: 0.4709, Train Steps/Sec: 0.12, Epoch: 0.47466249266288396, LR: 0.0003 +[2026-02-27 10:25:20] (step=0002427) Train Loss: 0.4616, Train Steps/Sec: 0.12, Epoch: 0.47485814909019763, LR: 0.0003 +[2026-02-27 10:25:28] (step=0002428) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.47505380551751125, LR: 0.0003 +[2026-02-27 10:25:36] (step=0002429) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 0.47524946194482487, LR: 0.0003 +[2026-02-27 10:25:44] (step=0002430) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.47544511837213854, LR: 0.0003 +[2026-02-27 10:25:51] (step=0002431) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 0.47564077479945216, LR: 0.0003 +[2026-02-27 10:25:59] (step=0002432) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.4758364312267658, LR: 0.0003 +[2026-02-27 10:26:07] (step=0002433) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 0.47603208765407945, LR: 0.0003 +[2026-02-27 10:26:15] (step=0002434) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.47622774408139307, LR: 0.0003 +[2026-02-27 10:26:23] (step=0002435) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.4764234005087067, LR: 0.0003 +[2026-02-27 10:26:31] (step=0002436) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.47661905693602036, LR: 0.0003 +[2026-02-27 10:26:39] (step=0002437) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.476814713363334, LR: 0.0003 +[2026-02-27 10:26:46] (step=0002438) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.47701036979064765, LR: 0.0003 +[2026-02-27 10:26:54] (step=0002439) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 0.47720602621796127, LR: 0.0003 +[2026-02-27 10:27:02] (step=0002440) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.4774016826452749, LR: 0.0003 +[2026-02-27 10:27:10] (step=0002441) Train Loss: 0.4785, Train Steps/Sec: 0.13, Epoch: 0.47759733907258856, LR: 0.0003 +[2026-02-27 10:27:18] (step=0002442) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.4777929954999022, LR: 0.0003 +[2026-02-27 10:27:26] (step=0002443) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.4779886519272158, LR: 0.0003 +[2026-02-27 10:27:34] (step=0002444) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.47818430835452946, LR: 0.0003 +[2026-02-27 10:27:41] (step=0002445) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.4783799647818431, LR: 0.0003 +[2026-02-27 10:27:49] (step=0002446) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.4785756212091567, LR: 0.0003 +[2026-02-27 10:27:57] (step=0002447) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.47877127763647037, LR: 0.0003 +[2026-02-27 10:28:05] (step=0002448) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.478966934063784, LR: 0.0003 +[2026-02-27 10:28:13] (step=0002449) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.4791625904910976, LR: 0.0003 +[2026-02-27 10:28:21] (step=0002450) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 0.4793582469184113, LR: 0.0003 +[2026-02-27 10:28:29] (step=0002451) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.4795539033457249, LR: 0.0003 +[2026-02-27 10:28:36] (step=0002452) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.47974955977303857, LR: 0.0003 +[2026-02-27 10:28:44] (step=0002453) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.4799452162003522, LR: 0.0003 +[2026-02-27 10:28:52] (step=0002454) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.4801408726276658, LR: 0.0003 +[2026-02-27 10:29:00] (step=0002455) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.4803365290549795, LR: 0.0003 +[2026-02-27 10:29:08] (step=0002456) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.4805321854822931, LR: 0.0003 +[2026-02-27 10:29:16] (step=0002457) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.4807278419096067, LR: 0.0003 +[2026-02-27 10:29:24] (step=0002458) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.4809234983369204, LR: 0.0003 +[2026-02-27 10:29:31] (step=0002459) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.481119154764234, LR: 0.0003 +[2026-02-27 10:29:39] (step=0002460) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.4813148111915476, LR: 0.0003 +[2026-02-27 10:29:47] (step=0002461) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 0.4815104676188613, LR: 0.0003 +[2026-02-27 10:29:55] (step=0002462) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.4817061240461749, LR: 0.0003 +[2026-02-27 10:30:03] (step=0002463) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.48190178047348853, LR: 0.0003 +[2026-02-27 10:30:11] (step=0002464) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 0.4820974369008022, LR: 0.0003 +[2026-02-27 10:30:19] (step=0002465) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.4822930933281158, LR: 0.0003 +[2026-02-27 10:30:26] (step=0002466) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.4824887497554295, LR: 0.0003 +[2026-02-27 10:30:34] (step=0002467) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.4826844061827431, LR: 0.0003 +[2026-02-27 10:30:42] (step=0002468) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.4828800626100567, LR: 0.0003 +[2026-02-27 10:30:50] (step=0002469) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.4830757190373704, LR: 0.0003 +[2026-02-27 10:30:58] (step=0002470) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.483271375464684, LR: 0.0003 +[2026-02-27 10:31:06] (step=0002471) Train Loss: 0.4604, Train Steps/Sec: 0.12, Epoch: 0.48346703189199763, LR: 0.0003 +[2026-02-27 10:31:14] (step=0002472) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.4836626883193113, LR: 0.0003 +[2026-02-27 10:31:22] (step=0002473) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.4838583447466249, LR: 0.0003 +[2026-02-27 10:31:30] (step=0002474) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.48405400117393854, LR: 0.0003 +[2026-02-27 10:31:37] (step=0002475) Train Loss: 0.4806, Train Steps/Sec: 0.13, Epoch: 0.4842496576012522, LR: 0.0003 +[2026-02-27 10:31:45] (step=0002476) Train Loss: 0.4687, Train Steps/Sec: 0.12, Epoch: 0.48444531402856583, LR: 0.0003 +[2026-02-27 10:31:53] (step=0002477) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.48464097045587945, LR: 0.0003 +[2026-02-27 10:32:01] (step=0002478) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.4848366268831931, LR: 0.0003 +[2026-02-27 10:32:09] (step=0002479) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.48503228331050674, LR: 0.0003 +[2026-02-27 10:32:17] (step=0002480) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.4852279397378204, LR: 0.0003 +[2026-02-27 10:32:25] (step=0002481) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.48542359616513403, LR: 0.0003 +[2026-02-27 10:32:33] (step=0002482) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.48561925259244765, LR: 0.0003 +[2026-02-27 10:32:40] (step=0002483) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.4858149090197613, LR: 0.0003 +[2026-02-27 10:32:48] (step=0002484) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 0.48601056544707494, LR: 0.0003 +[2026-02-27 10:32:56] (step=0002485) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.48620622187438856, LR: 0.0003 +[2026-02-27 10:33:04] (step=0002486) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.48640187830170223, LR: 0.0003 +[2026-02-27 10:33:12] (step=0002487) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.48659753472901585, LR: 0.0003 +[2026-02-27 10:33:20] (step=0002488) Train Loss: 0.4761, Train Steps/Sec: 0.13, Epoch: 0.48679319115632946, LR: 0.0003 +[2026-02-27 10:33:28] (step=0002489) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 0.48698884758364314, LR: 0.0003 +[2026-02-27 10:33:36] (step=0002490) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.48718450401095675, LR: 0.0003 +[2026-02-27 10:33:43] (step=0002491) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.48738016043827037, LR: 0.0003 +[2026-02-27 10:33:51] (step=0002492) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.48757581686558404, LR: 0.0003 +[2026-02-27 10:33:59] (step=0002493) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 0.48777147329289766, LR: 0.0003 +[2026-02-27 10:34:07] (step=0002494) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.48796712972021133, LR: 0.0003 +[2026-02-27 10:34:15] (step=0002495) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.48816278614752495, LR: 0.0003 +[2026-02-27 10:34:23] (step=0002496) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.48835844257483857, LR: 0.0003 +[2026-02-27 10:34:31] (step=0002497) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.48855409900215224, LR: 0.0003 +[2026-02-27 10:34:39] (step=0002498) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.48874975542946586, LR: 0.0003 +[2026-02-27 10:34:46] (step=0002499) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.4889454118567795, LR: 0.0003 +[2026-02-27 10:34:54] (step=0002500) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.48914106828409315, LR: 0.0003 +[2026-02-27 10:34:54] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0002500/ +[2026-02-27 10:35:02] (step=0002501) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 0.48933672471140677, LR: 0.0003 +[2026-02-27 10:35:10] (step=0002502) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 0.4895323811387204, LR: 0.0003 +[2026-02-27 10:35:18] (step=0002503) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.48972803756603406, LR: 0.0003 +[2026-02-27 10:35:26] (step=0002504) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.4899236939933477, LR: 0.0003 +[2026-02-27 10:35:34] (step=0002505) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.4901193504206613, LR: 0.0003 +[2026-02-27 10:35:41] (step=0002506) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.49031500684797497, LR: 0.0003 +[2026-02-27 10:35:49] (step=0002507) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.4905106632752886, LR: 0.0003 +[2026-02-27 10:35:57] (step=0002508) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.49070631970260226, LR: 0.0003 +[2026-02-27 10:36:05] (step=0002509) Train Loss: 0.4725, Train Steps/Sec: 0.13, Epoch: 0.4909019761299159, LR: 0.0003 +[2026-02-27 10:36:13] (step=0002510) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.4910976325572295, LR: 0.0003 +[2026-02-27 10:36:21] (step=0002511) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.49129328898454316, LR: 0.0003 +[2026-02-27 10:36:29] (step=0002512) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.4914889454118568, LR: 0.0003 +[2026-02-27 10:36:36] (step=0002513) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 0.4916846018391704, LR: 0.0003 +[2026-02-27 10:36:44] (step=0002514) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.49188025826648407, LR: 0.0003 +[2026-02-27 10:36:52] (step=0002515) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.4920759146937977, LR: 0.0003 +[2026-02-27 10:37:00] (step=0002516) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.4922715711211113, LR: 0.0003 +[2026-02-27 10:37:08] (step=0002517) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.492467227548425, LR: 0.0003 +[2026-02-27 10:37:16] (step=0002518) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.4926628839757386, LR: 0.0003 +[2026-02-27 10:37:24] (step=0002519) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 0.4928585404030522, LR: 0.0003 +[2026-02-27 10:37:32] (step=0002520) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.4930541968303659, LR: 0.0003 +[2026-02-27 10:37:39] (step=0002521) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.4932498532576795, LR: 0.0003 +[2026-02-27 10:37:47] (step=0002522) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.4934455096849932, LR: 0.0003 +[2026-02-27 10:37:55] (step=0002523) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.4936411661123068, LR: 0.0003 +[2026-02-27 10:38:03] (step=0002524) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 0.4938368225396204, LR: 0.0003 +[2026-02-27 10:38:11] (step=0002525) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 0.4940324789669341, LR: 0.0003 +[2026-02-27 10:38:19] (step=0002526) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 0.4942281353942477, LR: 0.0003 +[2026-02-27 10:38:27] (step=0002527) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.4944237918215613, LR: 0.0003 +[2026-02-27 10:38:35] (step=0002528) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.494619448248875, LR: 0.0003 +[2026-02-27 10:38:42] (step=0002529) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.4948151046761886, LR: 0.0003 +[2026-02-27 10:38:50] (step=0002530) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.49501076110350223, LR: 0.0003 +[2026-02-27 10:38:58] (step=0002531) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.4952064175308159, LR: 0.0003 +[2026-02-27 10:39:06] (step=0002532) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.4954020739581295, LR: 0.0003 +[2026-02-27 10:39:14] (step=0002533) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 0.49559773038544314, LR: 0.0003 +[2026-02-27 10:39:22] (step=0002534) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.4957933868127568, LR: 0.0003 +[2026-02-27 10:39:30] (step=0002535) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.4959890432400704, LR: 0.0003 +[2026-02-27 10:39:38] (step=0002536) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.4961846996673841, LR: 0.0003 +[2026-02-27 10:39:45] (step=0002537) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 0.4963803560946977, LR: 0.0003 +[2026-02-27 10:39:53] (step=0002538) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 0.49657601252201133, LR: 0.0003 +[2026-02-27 10:40:01] (step=0002539) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.496771668949325, LR: 0.0003 +[2026-02-27 10:40:09] (step=0002540) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.4969673253766386, LR: 0.0003 +[2026-02-27 10:40:17] (step=0002541) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.49716298180395224, LR: 0.0003 +[2026-02-27 10:40:25] (step=0002542) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.4973586382312659, LR: 0.0003 +[2026-02-27 10:40:33] (step=0002543) Train Loss: 0.4740, Train Steps/Sec: 0.13, Epoch: 0.49755429465857953, LR: 0.0003 +[2026-02-27 10:40:40] (step=0002544) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.49774995108589315, LR: 0.0003 +[2026-02-27 10:40:48] (step=0002545) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.4979456075132068, LR: 0.0003 +[2026-02-27 10:40:56] (step=0002546) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.49814126394052044, LR: 0.0003 +[2026-02-27 10:41:04] (step=0002547) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 0.49833692036783406, LR: 0.0003 +[2026-02-27 10:41:12] (step=0002548) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.49853257679514773, LR: 0.0003 +[2026-02-27 10:41:20] (step=0002549) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.49872823322246135, LR: 0.0003 +[2026-02-27 10:41:28] (step=0002550) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.498923889649775, LR: 0.0003 +[2026-02-27 10:41:35] (step=0002551) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 0.49911954607708864, LR: 0.0003 +[2026-02-27 10:41:43] (step=0002552) Train Loss: 0.4767, Train Steps/Sec: 0.13, Epoch: 0.49931520250440226, LR: 0.0003 +[2026-02-27 10:41:51] (step=0002553) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.49951085893171593, LR: 0.0003 +[2026-02-27 10:41:59] (step=0002554) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.49970651535902955, LR: 0.0003 +[2026-02-27 10:42:07] (step=0002555) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.49990217178634316, LR: 0.0003 +[2026-02-27 10:42:15] (step=0002556) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 0.5000978282136568, LR: 0.0003 +[2026-02-27 10:42:23] (step=0002557) Train Loss: 0.4741, Train Steps/Sec: 0.13, Epoch: 0.5002934846409705, LR: 0.0003 +[2026-02-27 10:42:30] (step=0002558) Train Loss: 0.4703, Train Steps/Sec: 0.13, Epoch: 0.5004891410682841, LR: 0.0003 +[2026-02-27 10:42:38] (step=0002559) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.5006847974955977, LR: 0.0003 +[2026-02-27 10:42:46] (step=0002560) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.5008804539229114, LR: 0.0003 +[2026-02-27 10:42:54] (step=0002561) Train Loss: 0.4729, Train Steps/Sec: 0.13, Epoch: 0.501076110350225, LR: 0.0003 +[2026-02-27 10:43:02] (step=0002562) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.5012717667775386, LR: 0.0003 +[2026-02-27 10:43:10] (step=0002563) Train Loss: 0.4721, Train Steps/Sec: 0.13, Epoch: 0.5014674232048523, LR: 0.0003 +[2026-02-27 10:43:18] (step=0002564) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.5016630796321659, LR: 0.0003 +[2026-02-27 10:43:25] (step=0002565) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.5018587360594795, LR: 0.0003 +[2026-02-27 10:43:33] (step=0002566) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.5020543924867932, LR: 0.0003 +[2026-02-27 10:43:41] (step=0002567) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 0.5022500489141069, LR: 0.0003 +[2026-02-27 10:43:49] (step=0002568) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.5024457053414204, LR: 0.0003 +[2026-02-27 10:43:57] (step=0002569) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 0.5026413617687341, LR: 0.0003 +[2026-02-27 10:44:05] (step=0002570) Train Loss: 0.4805, Train Steps/Sec: 0.13, Epoch: 0.5028370181960478, LR: 0.0003 +[2026-02-27 10:44:13] (step=0002571) Train Loss: 0.4604, Train Steps/Sec: 0.12, Epoch: 0.5030326746233614, LR: 0.0003 +[2026-02-27 10:44:21] (step=0002572) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 0.503228331050675, LR: 0.0003 +[2026-02-27 10:44:29] (step=0002573) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 0.5034239874779887, LR: 0.0003 +[2026-02-27 10:44:36] (step=0002574) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.5036196439053023, LR: 0.0003 +[2026-02-27 10:44:44] (step=0002575) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.5038153003326159, LR: 0.0003 +[2026-02-27 10:44:52] (step=0002576) Train Loss: 0.4759, Train Steps/Sec: 0.13, Epoch: 0.5040109567599296, LR: 0.0003 +[2026-02-27 10:45:00] (step=0002577) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.5042066131872432, LR: 0.0003 +[2026-02-27 10:45:08] (step=0002578) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 0.5044022696145568, LR: 0.0003 +[2026-02-27 10:45:16] (step=0002579) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.5045979260418705, LR: 0.0003 +[2026-02-27 10:45:24] (step=0002580) Train Loss: 0.4747, Train Steps/Sec: 0.13, Epoch: 0.5047935824691842, LR: 0.0003 +[2026-02-27 10:45:31] (step=0002581) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 0.5049892388964977, LR: 0.0003 +[2026-02-27 10:45:39] (step=0002582) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.5051848953238114, LR: 0.0003 +[2026-02-27 10:45:47] (step=0002583) Train Loss: 0.4761, Train Steps/Sec: 0.13, Epoch: 0.5053805517511251, LR: 0.0003 +[2026-02-27 10:45:55] (step=0002584) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.5055762081784386, LR: 0.0003 +[2026-02-27 10:46:03] (step=0002585) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.5057718646057523, LR: 0.0003 +[2026-02-27 10:46:11] (step=0002586) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 0.505967521033066, LR: 0.0003 +[2026-02-27 10:46:19] (step=0002587) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 0.5061631774603795, LR: 0.0003 +[2026-02-27 10:46:26] (step=0002588) Train Loss: 0.4746, Train Steps/Sec: 0.13, Epoch: 0.5063588338876932, LR: 0.0003 +[2026-02-27 10:46:34] (step=0002589) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.5065544903150069, LR: 0.0003 +[2026-02-27 10:46:42] (step=0002590) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.5067501467423204, LR: 0.0003 +[2026-02-27 10:46:50] (step=0002591) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.5069458031696341, LR: 0.0003 +[2026-02-27 10:46:58] (step=0002592) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 0.5071414595969478, LR: 0.0003 +[2026-02-27 10:47:06] (step=0002593) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.5073371160242613, LR: 0.0003 +[2026-02-27 10:47:14] (step=0002594) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.507532772451575, LR: 0.0003 +[2026-02-27 10:47:21] (step=0002595) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.5077284288788887, LR: 0.0003 +[2026-02-27 10:47:29] (step=0002596) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.5079240853062023, LR: 0.0003 +[2026-02-27 10:47:37] (step=0002597) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.5081197417335159, LR: 0.0003 +[2026-02-27 10:47:45] (step=0002598) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.5083153981608296, LR: 0.0003 +[2026-02-27 10:47:53] (step=0002599) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.5085110545881433, LR: 0.0003 +[2026-02-27 10:48:01] (step=0002600) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.5087067110154568, LR: 0.0003 +[2026-02-27 10:48:09] (step=0002601) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.5089023674427705, LR: 0.0003 +[2026-02-27 10:48:16] (step=0002602) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.5090980238700842, LR: 0.0003 +[2026-02-27 10:48:24] (step=0002603) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.5092936802973977, LR: 0.0003 +[2026-02-27 10:48:32] (step=0002604) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.5094893367247114, LR: 0.0003 +[2026-02-27 10:48:40] (step=0002605) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.5096849931520251, LR: 0.0003 +[2026-02-27 10:48:48] (step=0002606) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 0.5098806495793387, LR: 0.0003 +[2026-02-27 10:48:56] (step=0002607) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.5100763060066523, LR: 0.0003 +[2026-02-27 10:49:04] (step=0002608) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 0.510271962433966, LR: 0.0003 +[2026-02-27 10:49:11] (step=0002609) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.5104676188612796, LR: 0.0003 +[2026-02-27 10:49:19] (step=0002610) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.5106632752885932, LR: 0.0003 +[2026-02-27 10:49:27] (step=0002611) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.5108589317159069, LR: 0.0003 +[2026-02-27 10:49:35] (step=0002612) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.5110545881432205, LR: 0.0003 +[2026-02-27 10:49:43] (step=0002613) Train Loss: 0.4784, Train Steps/Sec: 0.13, Epoch: 0.5112502445705341, LR: 0.0003 +[2026-02-27 10:49:51] (step=0002614) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.5114459009978478, LR: 0.0003 +[2026-02-27 10:49:59] (step=0002615) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.5116415574251614, LR: 0.0003 +[2026-02-27 10:50:06] (step=0002616) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 0.511837213852475, LR: 0.0003 +[2026-02-27 10:50:14] (step=0002617) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.5120328702797887, LR: 0.0003 +[2026-02-27 10:50:22] (step=0002618) Train Loss: 0.4571, Train Steps/Sec: 0.12, Epoch: 0.5122285267071023, LR: 0.0003 +[2026-02-27 10:50:30] (step=0002619) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.512424183134416, LR: 0.0003 +[2026-02-27 10:50:38] (step=0002620) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.5126198395617296, LR: 0.0003 +[2026-02-27 10:50:46] (step=0002621) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 0.5128154959890432, LR: 0.0003 +[2026-02-27 10:50:54] (step=0002622) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.5130111524163569, LR: 0.0003 +[2026-02-27 10:51:02] (step=0002623) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.5132068088436705, LR: 0.0003 +[2026-02-27 10:51:10] (step=0002624) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.5134024652709841, LR: 0.0003 +[2026-02-27 10:51:18] (step=0002625) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 0.5135981216982978, LR: 0.0003 +[2026-02-27 10:51:25] (step=0002626) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.5137937781256114, LR: 0.0003 +[2026-02-27 10:51:33] (step=0002627) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.5139894345529251, LR: 0.0003 +[2026-02-27 10:51:41] (step=0002628) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 0.5141850909802387, LR: 0.0003 +[2026-02-27 10:51:49] (step=0002629) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.5143807474075524, LR: 0.0003 +[2026-02-27 10:51:57] (step=0002630) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 0.514576403834866, LR: 0.0003 +[2026-02-27 10:52:05] (step=0002631) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.5147720602621796, LR: 0.0003 +[2026-02-27 10:52:13] (step=0002632) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.5149677166894933, LR: 0.0003 +[2026-02-27 10:52:20] (step=0002633) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.5151633731168069, LR: 0.0003 +[2026-02-27 10:52:28] (step=0002634) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.5153590295441205, LR: 0.0003 +[2026-02-27 10:52:36] (step=0002635) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 0.5155546859714342, LR: 0.0003 +[2026-02-27 10:52:44] (step=0002636) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.5157503423987478, LR: 0.0003 +[2026-02-27 10:52:52] (step=0002637) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 0.5159459988260614, LR: 0.0003 +[2026-02-27 10:53:00] (step=0002638) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.5161416552533751, LR: 0.0003 +[2026-02-27 10:53:08] (step=0002639) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.5163373116806887, LR: 0.0003 +[2026-02-27 10:53:15] (step=0002640) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 0.5165329681080023, LR: 0.0003 +[2026-02-27 10:53:23] (step=0002641) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.516728624535316, LR: 0.0003 +[2026-02-27 10:53:31] (step=0002642) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.5169242809626297, LR: 0.0003 +[2026-02-27 10:53:39] (step=0002643) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.5171199373899432, LR: 0.0003 +[2026-02-27 10:53:47] (step=0002644) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.5173155938172569, LR: 0.0003 +[2026-02-27 10:53:55] (step=0002645) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.5175112502445706, LR: 0.0003 +[2026-02-27 10:54:03] (step=0002646) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.5177069066718841, LR: 0.0003 +[2026-02-27 10:54:10] (step=0002647) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.5179025630991978, LR: 0.0003 +[2026-02-27 10:54:18] (step=0002648) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.5180982195265115, LR: 0.0003 +[2026-02-27 10:54:26] (step=0002649) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 0.518293875953825, LR: 0.0003 +[2026-02-27 10:54:34] (step=0002650) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.5184895323811387, LR: 0.0003 +[2026-02-27 10:54:42] (step=0002651) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.5186851888084524, LR: 0.0003 +[2026-02-27 10:54:50] (step=0002652) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.5188808452357659, LR: 0.0003 +[2026-02-27 10:54:58] (step=0002653) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 0.5190765016630796, LR: 0.0003 +[2026-02-27 10:55:05] (step=0002654) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.5192721580903933, LR: 0.0003 +[2026-02-27 10:55:13] (step=0002655) Train Loss: 0.4771, Train Steps/Sec: 0.13, Epoch: 0.519467814517707, LR: 0.0003 +[2026-02-27 10:55:21] (step=0002656) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 0.5196634709450205, LR: 0.0003 +[2026-02-27 10:55:29] (step=0002657) Train Loss: 0.4743, Train Steps/Sec: 0.13, Epoch: 0.5198591273723342, LR: 0.0003 +[2026-02-27 10:55:37] (step=0002658) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.5200547837996479, LR: 0.0003 +[2026-02-27 10:55:45] (step=0002659) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 0.5202504402269614, LR: 0.0003 +[2026-02-27 10:55:53] (step=0002660) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.5204460966542751, LR: 0.0003 +[2026-02-27 10:56:01] (step=0002661) Train Loss: 0.4765, Train Steps/Sec: 0.13, Epoch: 0.5206417530815888, LR: 0.0003 +[2026-02-27 10:56:08] (step=0002662) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.5208374095089023, LR: 0.0003 +[2026-02-27 10:56:16] (step=0002663) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.521033065936216, LR: 0.0003 +[2026-02-27 10:56:24] (step=0002664) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.5212287223635297, LR: 0.0003 +[2026-02-27 10:56:32] (step=0002665) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.5214243787908432, LR: 0.0003 +[2026-02-27 10:56:40] (step=0002666) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.5216200352181569, LR: 0.0003 +[2026-02-27 10:56:48] (step=0002667) Train Loss: 0.4752, Train Steps/Sec: 0.12, Epoch: 0.5218156916454706, LR: 0.0003 +[2026-02-27 10:56:56] (step=0002668) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.5220113480727842, LR: 0.0003 +[2026-02-27 10:57:04] (step=0002669) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.5222070045000978, LR: 0.0003 +[2026-02-27 10:57:11] (step=0002670) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 0.5224026609274115, LR: 0.0003 +[2026-02-27 10:57:19] (step=0002671) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.5225983173547251, LR: 0.0003 +[2026-02-27 10:57:27] (step=0002672) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 0.5227939737820387, LR: 0.0003 +[2026-02-27 10:57:35] (step=0002673) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.5229896302093524, LR: 0.0003 +[2026-02-27 10:57:43] (step=0002674) Train Loss: 0.4698, Train Steps/Sec: 0.13, Epoch: 0.523185286636666, LR: 0.0003 +[2026-02-27 10:57:51] (step=0002675) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.5233809430639796, LR: 0.0003 +[2026-02-27 10:57:59] (step=0002676) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.5235765994912933, LR: 0.0003 +[2026-02-27 10:58:07] (step=0002677) Train Loss: 0.4762, Train Steps/Sec: 0.13, Epoch: 0.5237722559186069, LR: 0.0003 +[2026-02-27 10:58:14] (step=0002678) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.5239679123459206, LR: 0.0003 +[2026-02-27 10:58:22] (step=0002679) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 0.5241635687732342, LR: 0.0003 +[2026-02-27 10:58:30] (step=0002680) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 0.5243592252005478, LR: 0.0003 +[2026-02-27 10:58:38] (step=0002681) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 0.5245548816278615, LR: 0.0003 +[2026-02-27 10:58:46] (step=0002682) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.5247505380551751, LR: 0.0003 +[2026-02-27 10:58:54] (step=0002683) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.5249461944824888, LR: 0.0003 +[2026-02-27 10:59:01] (step=0002684) Train Loss: 0.4760, Train Steps/Sec: 0.13, Epoch: 0.5251418509098024, LR: 0.0003 +[2026-02-27 10:59:09] (step=0002685) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.525337507337116, LR: 0.0003 +[2026-02-27 10:59:17] (step=0002686) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.5255331637644297, LR: 0.0003 +[2026-02-27 10:59:25] (step=0002687) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.5257288201917433, LR: 0.0003 +[2026-02-27 10:59:33] (step=0002688) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.525924476619057, LR: 0.0003 +[2026-02-27 10:59:41] (step=0002689) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.5261201330463706, LR: 0.0003 +[2026-02-27 10:59:49] (step=0002690) Train Loss: 0.4732, Train Steps/Sec: 0.13, Epoch: 0.5263157894736842, LR: 0.0003 +[2026-02-27 10:59:56] (step=0002691) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.5265114459009979, LR: 0.0003 +[2026-02-27 11:00:04] (step=0002692) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.5267071023283115, LR: 0.0003 +[2026-02-27 11:00:12] (step=0002693) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.5269027587556251, LR: 0.0003 +[2026-02-27 11:00:20] (step=0002694) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.5270984151829388, LR: 0.0003 +[2026-02-27 11:00:28] (step=0002695) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.5272940716102524, LR: 0.0003 +[2026-02-27 11:00:36] (step=0002696) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 0.527489728037566, LR: 0.0003 +[2026-02-27 11:00:44] (step=0002697) Train Loss: 0.4801, Train Steps/Sec: 0.13, Epoch: 0.5276853844648797, LR: 0.0003 +[2026-02-27 11:00:51] (step=0002698) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.5278810408921933, LR: 0.0003 +[2026-02-27 11:00:59] (step=0002699) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.5280766973195069, LR: 0.0003 +[2026-02-27 11:01:07] (step=0002700) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.5282723537468206, LR: 0.0003 +[2026-02-27 11:01:15] (step=0002701) Train Loss: 0.4721, Train Steps/Sec: 0.13, Epoch: 0.5284680101741343, LR: 0.0003 +[2026-02-27 11:01:23] (step=0002702) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.5286636666014478, LR: 0.0003 +[2026-02-27 11:01:31] (step=0002703) Train Loss: 0.4725, Train Steps/Sec: 0.13, Epoch: 0.5288593230287615, LR: 0.0003 +[2026-02-27 11:01:39] (step=0002704) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.5290549794560752, LR: 0.0003 +[2026-02-27 11:01:46] (step=0002705) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.5292506358833887, LR: 0.0003 +[2026-02-27 11:01:54] (step=0002706) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.5294462923107024, LR: 0.0003 +[2026-02-27 11:02:02] (step=0002707) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 0.5296419487380161, LR: 0.0003 +[2026-02-27 11:02:10] (step=0002708) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.5298376051653296, LR: 0.0003 +[2026-02-27 11:02:18] (step=0002709) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 0.5300332615926433, LR: 0.0003 +[2026-02-27 11:02:26] (step=0002710) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.530228918019957, LR: 0.0003 +[2026-02-27 11:02:34] (step=0002711) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 0.5304245744472706, LR: 0.0003 +[2026-02-27 11:02:41] (step=0002712) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.5306202308745842, LR: 0.0003 +[2026-02-27 11:02:49] (step=0002713) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 0.5308158873018979, LR: 0.0003 +[2026-02-27 11:02:57] (step=0002714) Train Loss: 0.4783, Train Steps/Sec: 0.13, Epoch: 0.5310115437292116, LR: 0.0003 +[2026-02-27 11:03:05] (step=0002715) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.5312072001565251, LR: 0.0003 +[2026-02-27 11:03:13] (step=0002716) Train Loss: 0.4638, Train Steps/Sec: 0.12, Epoch: 0.5314028565838388, LR: 0.0003 +[2026-02-27 11:03:21] (step=0002717) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.5315985130111525, LR: 0.0003 +[2026-02-27 11:03:29] (step=0002718) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.531794169438466, LR: 0.0003 +[2026-02-27 11:03:36] (step=0002719) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.5319898258657797, LR: 0.0003 +[2026-02-27 11:03:44] (step=0002720) Train Loss: 0.4794, Train Steps/Sec: 0.13, Epoch: 0.5321854822930934, LR: 0.0003 +[2026-02-27 11:03:52] (step=0002721) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.5323811387204069, LR: 0.0003 +[2026-02-27 11:04:00] (step=0002722) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 0.5325767951477206, LR: 0.0003 +[2026-02-27 11:04:08] (step=0002723) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.5327724515750343, LR: 0.0003 +[2026-02-27 11:04:16] (step=0002724) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 0.5329681080023478, LR: 0.0003 +[2026-02-27 11:04:24] (step=0002725) Train Loss: 0.4626, Train Steps/Sec: 0.12, Epoch: 0.5331637644296615, LR: 0.0003 +[2026-02-27 11:04:32] (step=0002726) Train Loss: 0.4717, Train Steps/Sec: 0.13, Epoch: 0.5333594208569752, LR: 0.0003 +[2026-02-27 11:04:40] (step=0002727) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.5335550772842887, LR: 0.0003 +[2026-02-27 11:04:47] (step=0002728) Train Loss: 0.4729, Train Steps/Sec: 0.13, Epoch: 0.5337507337116024, LR: 0.0003 +[2026-02-27 11:04:55] (step=0002729) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.5339463901389161, LR: 0.0003 +[2026-02-27 11:05:03] (step=0002730) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.5341420465662297, LR: 0.0003 +[2026-02-27 11:05:11] (step=0002731) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 0.5343377029935433, LR: 0.0003 +[2026-02-27 11:05:19] (step=0002732) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.534533359420857, LR: 0.0003 +[2026-02-27 11:05:27] (step=0002733) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.5347290158481706, LR: 0.0003 +[2026-02-27 11:05:35] (step=0002734) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 0.5349246722754842, LR: 0.0003 +[2026-02-27 11:05:42] (step=0002735) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.5351203287027979, LR: 0.0003 +[2026-02-27 11:05:50] (step=0002736) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.5353159851301115, LR: 0.0003 +[2026-02-27 11:05:58] (step=0002737) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.5355116415574251, LR: 0.0003 +[2026-02-27 11:06:06] (step=0002738) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.5357072979847388, LR: 0.0003 +[2026-02-27 11:06:14] (step=0002739) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.5359029544120525, LR: 0.0003 +[2026-02-27 11:06:22] (step=0002740) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.536098610839366, LR: 0.0003 +[2026-02-27 11:06:29] (step=0002741) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.5362942672666797, LR: 0.0003 +[2026-02-27 11:06:37] (step=0002742) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.5364899236939934, LR: 0.0003 +[2026-02-27 11:06:45] (step=0002743) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.536685580121307, LR: 0.0003 +[2026-02-27 11:06:53] (step=0002744) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.5368812365486206, LR: 0.0003 +[2026-02-27 11:07:01] (step=0002745) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.5370768929759343, LR: 0.0003 +[2026-02-27 11:07:09] (step=0002746) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.5372725494032479, LR: 0.0003 +[2026-02-27 11:07:16] (step=0002747) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.5374682058305615, LR: 0.0003 +[2026-02-27 11:07:24] (step=0002748) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.5376638622578752, LR: 0.0003 +[2026-02-27 11:07:32] (step=0002749) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 0.5378595186851888, LR: 0.0003 +[2026-02-27 11:07:40] (step=0002750) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.5380551751125024, LR: 0.0003 +[2026-02-27 11:07:48] (step=0002751) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 0.5382508315398161, LR: 0.0003 +[2026-02-27 11:07:56] (step=0002752) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.5384464879671297, LR: 0.0003 +[2026-02-27 11:08:03] (step=0002753) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.5386421443944434, LR: 0.0003 +[2026-02-27 11:08:11] (step=0002754) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.538837800821757, LR: 0.0003 +[2026-02-27 11:08:19] (step=0002755) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.5390334572490706, LR: 0.0003 +[2026-02-27 11:08:27] (step=0002756) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.5392291136763843, LR: 0.0003 +[2026-02-27 11:08:35] (step=0002757) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 0.5394247701036979, LR: 0.0003 +[2026-02-27 11:08:43] (step=0002758) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 0.5396204265310115, LR: 0.0003 +[2026-02-27 11:08:51] (step=0002759) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.5398160829583252, LR: 0.0003 +[2026-02-27 11:08:58] (step=0002760) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 0.5400117393856388, LR: 0.0003 +[2026-02-27 11:09:06] (step=0002761) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.5402073958129524, LR: 0.0003 +[2026-02-27 11:09:14] (step=0002762) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.5404030522402661, LR: 0.0003 +[2026-02-27 11:09:22] (step=0002763) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.5405987086675798, LR: 0.0003 +[2026-02-27 11:09:30] (step=0002764) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.5407943650948933, LR: 0.0003 +[2026-02-27 11:09:38] (step=0002765) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 0.540990021522207, LR: 0.0003 +[2026-02-27 11:09:46] (step=0002766) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.5411856779495207, LR: 0.0003 +[2026-02-27 11:09:53] (step=0002767) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.5413813343768343, LR: 0.0003 +[2026-02-27 11:10:01] (step=0002768) Train Loss: 0.4754, Train Steps/Sec: 0.13, Epoch: 0.5415769908041479, LR: 0.0003 +[2026-02-27 11:10:09] (step=0002769) Train Loss: 0.4725, Train Steps/Sec: 0.13, Epoch: 0.5417726472314616, LR: 0.0003 +[2026-02-27 11:10:17] (step=0002770) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.5419683036587752, LR: 0.0003 +[2026-02-27 11:10:25] (step=0002771) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.5421639600860888, LR: 0.0003 +[2026-02-27 11:10:33] (step=0002772) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.5423596165134025, LR: 0.0003 +[2026-02-27 11:10:40] (step=0002773) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.5425552729407161, LR: 0.0003 +[2026-02-27 11:10:48] (step=0002774) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.5427509293680297, LR: 0.0003 +[2026-02-27 11:10:56] (step=0002775) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.5429465857953434, LR: 0.0003 +[2026-02-27 11:11:04] (step=0002776) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.5431422422226571, LR: 0.0003 +[2026-02-27 11:11:12] (step=0002777) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.5433378986499706, LR: 0.0003 +[2026-02-27 11:11:20] (step=0002778) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.5435335550772843, LR: 0.0003 +[2026-02-27 11:11:27] (step=0002779) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.543729211504598, LR: 0.0003 +[2026-02-27 11:11:35] (step=0002780) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 0.5439248679319115, LR: 0.0003 +[2026-02-27 11:11:43] (step=0002781) Train Loss: 0.4702, Train Steps/Sec: 0.13, Epoch: 0.5441205243592252, LR: 0.0003 +[2026-02-27 11:11:51] (step=0002782) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.5443161807865389, LR: 0.0003 +[2026-02-27 11:11:59] (step=0002783) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 0.5445118372138524, LR: 0.0003 +[2026-02-27 11:12:07] (step=0002784) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 0.5447074936411661, LR: 0.0003 +[2026-02-27 11:12:14] (step=0002785) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.5449031500684798, LR: 0.0003 +[2026-02-27 11:12:22] (step=0002786) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 0.5450988064957933, LR: 0.0003 +[2026-02-27 11:12:30] (step=0002787) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 0.545294462923107, LR: 0.0003 +[2026-02-27 11:12:38] (step=0002788) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 0.5454901193504207, LR: 0.0003 +[2026-02-27 11:12:46] (step=0002789) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 0.5456857757777342, LR: 0.0003 +[2026-02-27 11:12:54] (step=0002790) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 0.5458814322050479, LR: 0.0003 +[2026-02-27 11:13:01] (step=0002791) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.5460770886323616, LR: 0.0003 +[2026-02-27 11:13:09] (step=0002792) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.5462727450596752, LR: 0.0003 +[2026-02-27 11:13:17] (step=0002793) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.5464684014869888, LR: 0.0003 +[2026-02-27 11:13:25] (step=0002794) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.5466640579143025, LR: 0.0003 +[2026-02-27 11:13:33] (step=0002795) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.5468597143416162, LR: 0.0003 +[2026-02-27 11:13:41] (step=0002796) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.5470553707689297, LR: 0.0003 +[2026-02-27 11:13:48] (step=0002797) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.5472510271962434, LR: 0.0003 +[2026-02-27 11:13:56] (step=0002798) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 0.5474466836235571, LR: 0.0003 +[2026-02-27 11:14:04] (step=0002799) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.5476423400508706, LR: 0.0003 +[2026-02-27 11:14:12] (step=0002800) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.5478379964781843, LR: 0.0003 +[2026-02-27 11:14:20] (step=0002801) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 0.548033652905498, LR: 0.0003 +[2026-02-27 11:14:28] (step=0002802) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.5482293093328116, LR: 0.0003 +[2026-02-27 11:14:35] (step=0002803) Train Loss: 0.4748, Train Steps/Sec: 0.13, Epoch: 0.5484249657601252, LR: 0.0003 +[2026-02-27 11:14:43] (step=0002804) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.5486206221874389, LR: 0.0003 +[2026-02-27 11:14:51] (step=0002805) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 0.5488162786147525, LR: 0.0003 +[2026-02-27 11:14:59] (step=0002806) Train Loss: 0.4771, Train Steps/Sec: 0.13, Epoch: 0.5490119350420661, LR: 0.0003 +[2026-02-27 11:15:07] (step=0002807) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 0.5492075914693798, LR: 0.0003 +[2026-02-27 11:15:15] (step=0002808) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.5494032478966934, LR: 0.0003 +[2026-02-27 11:15:22] (step=0002809) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 0.549598904324007, LR: 0.0003 +[2026-02-27 11:15:30] (step=0002810) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.5497945607513207, LR: 0.0003 +[2026-02-27 11:15:38] (step=0002811) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.5499902171786343, LR: 0.0003 +[2026-02-27 11:15:46] (step=0002812) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.550185873605948, LR: 0.0003 +[2026-02-27 11:15:54] (step=0002813) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 0.5503815300332616, LR: 0.0003 +[2026-02-27 11:16:02] (step=0002814) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.5505771864605752, LR: 0.0003 +[2026-02-27 11:16:09] (step=0002815) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.5507728428878889, LR: 0.0003 +[2026-02-27 11:16:17] (step=0002816) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.5509684993152025, LR: 0.0003 +[2026-02-27 11:16:25] (step=0002817) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 0.5511641557425161, LR: 0.0003 +[2026-02-27 11:16:33] (step=0002818) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.5513598121698298, LR: 0.0003 +[2026-02-27 11:16:41] (step=0002819) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.5515554685971434, LR: 0.0003 +[2026-02-27 11:16:49] (step=0002820) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.551751125024457, LR: 0.0003 +[2026-02-27 11:16:57] (step=0002821) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.5519467814517707, LR: 0.0003 +[2026-02-27 11:17:04] (step=0002822) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 0.5521424378790843, LR: 0.0003 +[2026-02-27 11:17:12] (step=0002823) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.552338094306398, LR: 0.0003 +[2026-02-27 11:17:20] (step=0002824) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.5525337507337116, LR: 0.0003 +[2026-02-27 11:17:28] (step=0002825) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.5527294071610253, LR: 0.0003 +[2026-02-27 11:17:36] (step=0002826) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.5529250635883389, LR: 0.0003 +[2026-02-27 11:17:44] (step=0002827) Train Loss: 0.4744, Train Steps/Sec: 0.13, Epoch: 0.5531207200156525, LR: 0.0003 +[2026-02-27 11:17:52] (step=0002828) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.5533163764429662, LR: 0.0003 +[2026-02-27 11:17:59] (step=0002829) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.5535120328702798, LR: 0.0003 +[2026-02-27 11:18:07] (step=0002830) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.5537076892975934, LR: 0.0003 +[2026-02-27 11:18:15] (step=0002831) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.5539033457249071, LR: 0.0003 +[2026-02-27 11:18:23] (step=0002832) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.5540990021522207, LR: 0.0003 +[2026-02-27 11:18:31] (step=0002833) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.5542946585795343, LR: 0.0003 +[2026-02-27 11:18:39] (step=0002834) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.554490315006848, LR: 0.0003 +[2026-02-27 11:18:47] (step=0002835) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.5546859714341617, LR: 0.0003 +[2026-02-27 11:18:54] (step=0002836) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.5548816278614752, LR: 0.0003 +[2026-02-27 11:19:02] (step=0002837) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.5550772842887889, LR: 0.0003 +[2026-02-27 11:19:10] (step=0002838) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.5552729407161026, LR: 0.0003 +[2026-02-27 11:19:18] (step=0002839) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.5554685971434161, LR: 0.0003 +[2026-02-27 11:19:26] (step=0002840) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 0.5556642535707298, LR: 0.0003 +[2026-02-27 11:19:34] (step=0002841) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.5558599099980435, LR: 0.0003 +[2026-02-27 11:19:41] (step=0002842) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.556055566425357, LR: 0.0003 +[2026-02-27 11:19:49] (step=0002843) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 0.5562512228526707, LR: 0.0003 +[2026-02-27 11:19:57] (step=0002844) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.5564468792799844, LR: 0.0003 +[2026-02-27 11:20:05] (step=0002845) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.5566425357072979, LR: 0.0003 +[2026-02-27 11:20:13] (step=0002846) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.5568381921346116, LR: 0.0003 +[2026-02-27 11:20:21] (step=0002847) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.5570338485619253, LR: 0.0003 +[2026-02-27 11:20:28] (step=0002848) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 0.5572295049892388, LR: 0.0003 +[2026-02-27 11:20:36] (step=0002849) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.5574251614165525, LR: 0.0003 +[2026-02-27 11:20:44] (step=0002850) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.5576208178438662, LR: 0.0003 +[2026-02-27 11:20:52] (step=0002851) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.5578164742711799, LR: 0.0003 +[2026-02-27 11:21:00] (step=0002852) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.5580121306984934, LR: 0.0003 +[2026-02-27 11:21:08] (step=0002853) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.5582077871258071, LR: 0.0003 +[2026-02-27 11:21:15] (step=0002854) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 0.5584034435531208, LR: 0.0003 +[2026-02-27 11:21:23] (step=0002855) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.5585990999804343, LR: 0.0003 +[2026-02-27 11:21:31] (step=0002856) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.558794756407748, LR: 0.0003 +[2026-02-27 11:21:39] (step=0002857) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.5589904128350617, LR: 0.0003 +[2026-02-27 11:21:47] (step=0002858) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 0.5591860692623752, LR: 0.0003 +[2026-02-27 11:21:54] (step=0002859) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 0.5593817256896889, LR: 0.0003 +[2026-02-27 11:22:02] (step=0002860) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.5595773821170026, LR: 0.0003 +[2026-02-27 11:22:10] (step=0002861) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.5597730385443161, LR: 0.0003 +[2026-02-27 11:22:18] (step=0002862) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.5599686949716298, LR: 0.0003 +[2026-02-27 11:22:26] (step=0002863) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.5601643513989435, LR: 0.0003 +[2026-02-27 11:22:34] (step=0002864) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.5603600078262571, LR: 0.0003 +[2026-02-27 11:22:41] (step=0002865) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.5605556642535707, LR: 0.0003 +[2026-02-27 11:22:49] (step=0002866) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.5607513206808844, LR: 0.0003 +[2026-02-27 11:22:57] (step=0002867) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 0.560946977108198, LR: 0.0003 +[2026-02-27 11:23:05] (step=0002868) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.5611426335355116, LR: 0.0003 +[2026-02-27 11:23:13] (step=0002869) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 0.5613382899628253, LR: 0.0003 +[2026-02-27 11:23:21] (step=0002870) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 0.5615339463901389, LR: 0.0003 +[2026-02-27 11:23:28] (step=0002871) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 0.5617296028174525, LR: 0.0003 +[2026-02-27 11:23:36] (step=0002872) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.5619252592447662, LR: 0.0003 +[2026-02-27 11:23:44] (step=0002873) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.5621209156720798, LR: 0.0003 +[2026-02-27 11:23:52] (step=0002874) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.5623165720993935, LR: 0.0003 +[2026-02-27 11:24:00] (step=0002875) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.5625122285267071, LR: 0.0003 +[2026-02-27 11:24:08] (step=0002876) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.5627078849540207, LR: 0.0003 +[2026-02-27 11:24:15] (step=0002877) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.5629035413813344, LR: 0.0003 +[2026-02-27 11:24:23] (step=0002878) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 0.563099197808648, LR: 0.0003 +[2026-02-27 11:24:31] (step=0002879) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.5632948542359617, LR: 0.0003 +[2026-02-27 11:24:39] (step=0002880) Train Loss: 0.4735, Train Steps/Sec: 0.13, Epoch: 0.5634905106632753, LR: 0.0003 +[2026-02-27 11:24:47] (step=0002881) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.5636861670905889, LR: 0.0003 +[2026-02-27 11:24:55] (step=0002882) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.5638818235179026, LR: 0.0003 +[2026-02-27 11:25:02] (step=0002883) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 0.5640774799452162, LR: 0.0003 +[2026-02-27 11:25:10] (step=0002884) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.5642731363725298, LR: 0.0003 +[2026-02-27 11:25:18] (step=0002885) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.5644687927998435, LR: 0.0003 +[2026-02-27 11:25:26] (step=0002886) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.5646644492271571, LR: 0.0003 +[2026-02-27 11:25:34] (step=0002887) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 0.5648601056544708, LR: 0.0003 +[2026-02-27 11:25:42] (step=0002888) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.5650557620817844, LR: 0.0003 +[2026-02-27 11:25:49] (step=0002889) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 0.565251418509098, LR: 0.0003 +[2026-02-27 11:25:57] (step=0002890) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.5654470749364117, LR: 0.0003 +[2026-02-27 11:26:05] (step=0002891) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.5656427313637253, LR: 0.0003 +[2026-02-27 11:26:13] (step=0002892) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.5658383877910389, LR: 0.0003 +[2026-02-27 11:26:21] (step=0002893) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.5660340442183526, LR: 0.0003 +[2026-02-27 11:26:29] (step=0002894) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 0.5662297006456662, LR: 0.0003 +[2026-02-27 11:26:36] (step=0002895) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.5664253570729798, LR: 0.0003 +[2026-02-27 11:26:44] (step=0002896) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 0.5666210135002935, LR: 0.0003 +[2026-02-27 11:26:52] (step=0002897) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.5668166699276072, LR: 0.0003 +[2026-02-27 11:27:00] (step=0002898) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 0.5670123263549207, LR: 0.0003 +[2026-02-27 11:27:08] (step=0002899) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.5672079827822344, LR: 0.0003 +[2026-02-27 11:27:16] (step=0002900) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.5674036392095481, LR: 0.0003 +[2026-02-27 11:27:23] (step=0002901) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.5675992956368616, LR: 0.0003 +[2026-02-27 11:27:31] (step=0002902) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.5677949520641753, LR: 0.0003 +[2026-02-27 11:27:39] (step=0002903) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.567990608491489, LR: 0.0003 +[2026-02-27 11:27:47] (step=0002904) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.5681862649188025, LR: 0.0003 +[2026-02-27 11:27:55] (step=0002905) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.5683819213461162, LR: 0.0003 +[2026-02-27 11:28:03] (step=0002906) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.5685775777734299, LR: 0.0003 +[2026-02-27 11:28:10] (step=0002907) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.5687732342007435, LR: 0.0003 +[2026-02-27 11:28:18] (step=0002908) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.5689688906280571, LR: 0.0003 +[2026-02-27 11:28:26] (step=0002909) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.5691645470553708, LR: 0.0003 +[2026-02-27 11:28:34] (step=0002910) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.5693602034826845, LR: 0.0003 +[2026-02-27 11:28:42] (step=0002911) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.569555859909998, LR: 0.0003 +[2026-02-27 11:28:50] (step=0002912) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 0.5697515163373117, LR: 0.0003 +[2026-02-27 11:28:58] (step=0002913) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.5699471727646254, LR: 0.0003 +[2026-02-27 11:29:05] (step=0002914) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.5701428291919389, LR: 0.0003 +[2026-02-27 11:29:13] (step=0002915) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.5703384856192526, LR: 0.0003 +[2026-02-27 11:29:21] (step=0002916) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.5705341420465663, LR: 0.0003 +[2026-02-27 11:29:29] (step=0002917) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.5707297984738798, LR: 0.0003 +[2026-02-27 11:29:37] (step=0002918) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.5709254549011935, LR: 0.0003 +[2026-02-27 11:29:45] (step=0002919) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.5711211113285072, LR: 0.0003 +[2026-02-27 11:29:52] (step=0002920) Train Loss: 0.4752, Train Steps/Sec: 0.13, Epoch: 0.5713167677558207, LR: 0.0003 +[2026-02-27 11:30:00] (step=0002921) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.5715124241831344, LR: 0.0003 +[2026-02-27 11:30:08] (step=0002922) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.5717080806104481, LR: 0.0003 +[2026-02-27 11:30:16] (step=0002923) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.5719037370377617, LR: 0.0003 +[2026-02-27 11:30:24] (step=0002924) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 0.5720993934650753, LR: 0.0003 +[2026-02-27 11:30:32] (step=0002925) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 0.572295049892389, LR: 0.0003 +[2026-02-27 11:30:40] (step=0002926) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.5724907063197026, LR: 0.0003 +[2026-02-27 11:30:47] (step=0002927) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.5726863627470162, LR: 0.0003 +[2026-02-27 11:30:55] (step=0002928) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 0.5728820191743299, LR: 0.0003 +[2026-02-27 11:31:03] (step=0002929) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.5730776756016435, LR: 0.0003 +[2026-02-27 11:31:11] (step=0002930) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 0.5732733320289571, LR: 0.0003 +[2026-02-27 11:31:19] (step=0002931) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.5734689884562708, LR: 0.0003 +[2026-02-27 11:31:27] (step=0002932) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 0.5736646448835844, LR: 0.0003 +[2026-02-27 11:31:34] (step=0002933) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.573860301310898, LR: 0.0003 +[2026-02-27 11:31:42] (step=0002934) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.5740559577382117, LR: 0.0003 +[2026-02-27 11:31:50] (step=0002935) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.5742516141655254, LR: 0.0003 +[2026-02-27 11:31:58] (step=0002936) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.574447270592839, LR: 0.0003 +[2026-02-27 11:32:06] (step=0002937) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.5746429270201526, LR: 0.0003 +[2026-02-27 11:32:14] (step=0002938) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.5748385834474663, LR: 0.0003 +[2026-02-27 11:32:22] (step=0002939) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.5750342398747799, LR: 0.0003 +[2026-02-27 11:32:30] (step=0002940) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.5752298963020935, LR: 0.0003 +[2026-02-27 11:32:37] (step=0002941) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.5754255527294072, LR: 0.0003 +[2026-02-27 11:32:45] (step=0002942) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.5756212091567208, LR: 0.0003 +[2026-02-27 11:32:53] (step=0002943) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.5758168655840344, LR: 0.0003 +[2026-02-27 11:33:01] (step=0002944) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.5760125220113481, LR: 0.0003 +[2026-02-27 11:33:09] (step=0002945) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.5762081784386617, LR: 0.0003 +[2026-02-27 11:33:17] (step=0002946) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.5764038348659754, LR: 0.0003 +[2026-02-27 11:33:24] (step=0002947) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.576599491293289, LR: 0.0003 +[2026-02-27 11:33:32] (step=0002948) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 0.5767951477206026, LR: 0.0003 +[2026-02-27 11:33:40] (step=0002949) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 0.5769908041479163, LR: 0.0003 +[2026-02-27 11:33:48] (step=0002950) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.5771864605752299, LR: 0.0003 +[2026-02-27 11:33:56] (step=0002951) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.5773821170025435, LR: 0.0003 +[2026-02-27 11:34:04] (step=0002952) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.5775777734298572, LR: 0.0003 +[2026-02-27 11:34:11] (step=0002953) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 0.5777734298571708, LR: 0.0003 +[2026-02-27 11:34:19] (step=0002954) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 0.5779690862844844, LR: 0.0003 +[2026-02-27 11:34:27] (step=0002955) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 0.5781647427117981, LR: 0.0003 +[2026-02-27 11:34:35] (step=0002956) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.5783603991391117, LR: 0.0003 +[2026-02-27 11:34:43] (step=0002957) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.5785560555664253, LR: 0.0003 +[2026-02-27 11:34:51] (step=0002958) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.578751711993739, LR: 0.0003 +[2026-02-27 11:34:59] (step=0002959) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.5789473684210527, LR: 0.0003 +[2026-02-27 11:35:06] (step=0002960) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.5791430248483662, LR: 0.0003 +[2026-02-27 11:35:14] (step=0002961) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.5793386812756799, LR: 0.0003 +[2026-02-27 11:35:22] (step=0002962) Train Loss: 0.4717, Train Steps/Sec: 0.13, Epoch: 0.5795343377029936, LR: 0.0003 +[2026-02-27 11:35:30] (step=0002963) Train Loss: 0.4732, Train Steps/Sec: 0.13, Epoch: 0.5797299941303072, LR: 0.0003 +[2026-02-27 11:35:38] (step=0002964) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.5799256505576208, LR: 0.0003 +[2026-02-27 11:35:46] (step=0002965) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.5801213069849345, LR: 0.0003 +[2026-02-27 11:35:53] (step=0002966) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.5803169634122481, LR: 0.0003 +[2026-02-27 11:36:01] (step=0002967) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 0.5805126198395617, LR: 0.0003 +[2026-02-27 11:36:09] (step=0002968) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.5807082762668754, LR: 0.0003 +[2026-02-27 11:36:17] (step=0002969) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.580903932694189, LR: 0.0003 +[2026-02-27 11:36:25] (step=0002970) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.5810995891215026, LR: 0.0003 +[2026-02-27 11:36:33] (step=0002971) Train Loss: 0.4736, Train Steps/Sec: 0.13, Epoch: 0.5812952455488163, LR: 0.0003 +[2026-02-27 11:36:41] (step=0002972) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.58149090197613, LR: 0.0003 +[2026-02-27 11:36:48] (step=0002973) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.5816865584034435, LR: 0.0003 +[2026-02-27 11:36:56] (step=0002974) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 0.5818822148307572, LR: 0.0003 +[2026-02-27 11:37:04] (step=0002975) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.5820778712580709, LR: 0.0003 +[2026-02-27 11:37:12] (step=0002976) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.5822735276853844, LR: 0.0003 +[2026-02-27 11:37:20] (step=0002977) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.5824691841126981, LR: 0.0003 +[2026-02-27 11:37:28] (step=0002978) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.5826648405400118, LR: 0.0003 +[2026-02-27 11:37:36] (step=0002979) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.5828604969673253, LR: 0.0003 +[2026-02-27 11:37:43] (step=0002980) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 0.583056153394639, LR: 0.0003 +[2026-02-27 11:37:51] (step=0002981) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.5832518098219527, LR: 0.0003 +[2026-02-27 11:37:59] (step=0002982) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 0.5834474662492662, LR: 0.0003 +[2026-02-27 11:38:07] (step=0002983) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.5836431226765799, LR: 0.0003 +[2026-02-27 11:38:15] (step=0002984) Train Loss: 0.4649, Train Steps/Sec: 0.12, Epoch: 0.5838387791038936, LR: 0.0003 +[2026-02-27 11:38:23] (step=0002985) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 0.5840344355312072, LR: 0.0003 +[2026-02-27 11:38:31] (step=0002986) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 0.5842300919585208, LR: 0.0003 +[2026-02-27 11:38:39] (step=0002987) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.5844257483858345, LR: 0.0003 +[2026-02-27 11:38:46] (step=0002988) Train Loss: 0.4714, Train Steps/Sec: 0.13, Epoch: 0.5846214048131481, LR: 0.0003 +[2026-02-27 11:38:54] (step=0002989) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.5848170612404617, LR: 0.0003 +[2026-02-27 11:39:02] (step=0002990) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.5850127176677754, LR: 0.0003 +[2026-02-27 11:39:10] (step=0002991) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.5852083740950891, LR: 0.0003 +[2026-02-27 11:39:18] (step=0002992) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.5854040305224026, LR: 0.0003 +[2026-02-27 11:39:26] (step=0002993) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.5855996869497163, LR: 0.0003 +[2026-02-27 11:39:33] (step=0002994) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 0.58579534337703, LR: 0.0003 +[2026-02-27 11:39:41] (step=0002995) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.5859909998043435, LR: 0.0003 +[2026-02-27 11:39:49] (step=0002996) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 0.5861866562316572, LR: 0.0003 +[2026-02-27 11:39:57] (step=0002997) Train Loss: 0.4725, Train Steps/Sec: 0.13, Epoch: 0.5863823126589709, LR: 0.0003 +[2026-02-27 11:40:05] (step=0002998) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.5865779690862845, LR: 0.0003 +[2026-02-27 11:40:13] (step=0002999) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.5867736255135981, LR: 0.0003 +[2026-02-27 11:40:20] (step=0003000) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.5869692819409118, LR: 0.0003 +[2026-02-27 11:40:20] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0003000/ +[2026-02-27 11:40:28] (step=0003001) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.5871649383682254, LR: 0.0003 +[2026-02-27 11:40:36] (step=0003002) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 0.587360594795539, LR: 0.0003 +[2026-02-27 11:40:44] (step=0003003) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.5875562512228527, LR: 0.0003 +[2026-02-27 11:40:52] (step=0003004) Train Loss: 0.4775, Train Steps/Sec: 0.13, Epoch: 0.5877519076501663, LR: 0.0003 +[2026-02-27 11:41:00] (step=0003005) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.58794756407748, LR: 0.0003 +[2026-02-27 11:41:08] (step=0003006) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.5881432205047936, LR: 0.0003 +[2026-02-27 11:41:15] (step=0003007) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.5883388769321072, LR: 0.0003 +[2026-02-27 11:41:23] (step=0003008) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.5885345333594209, LR: 0.0003 +[2026-02-27 11:41:31] (step=0003009) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.5887301897867345, LR: 0.0003 +[2026-02-27 11:41:39] (step=0003010) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 0.5889258462140481, LR: 0.0003 +[2026-02-27 11:41:47] (step=0003011) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.5891215026413618, LR: 0.0003 +[2026-02-27 11:41:55] (step=0003012) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.5893171590686754, LR: 0.0003 +[2026-02-27 11:42:02] (step=0003013) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.589512815495989, LR: 0.0003 +[2026-02-27 11:42:10] (step=0003014) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 0.5897084719233027, LR: 0.0003 +[2026-02-27 11:42:18] (step=0003015) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.5899041283506163, LR: 0.0003 +[2026-02-27 11:42:26] (step=0003016) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.5900997847779299, LR: 0.0003 +[2026-02-27 11:42:34] (step=0003017) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.5902954412052436, LR: 0.0003 +[2026-02-27 11:42:42] (step=0003018) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.5904910976325572, LR: 0.0003 +[2026-02-27 11:42:49] (step=0003019) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.5906867540598709, LR: 0.0003 +[2026-02-27 11:42:57] (step=0003020) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 0.5908824104871845, LR: 0.0003 +[2026-02-27 11:43:05] (step=0003021) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.5910780669144982, LR: 0.0003 +[2026-02-27 11:43:13] (step=0003022) Train Loss: 0.4637, Train Steps/Sec: 0.12, Epoch: 0.5912737233418118, LR: 0.0003 +[2026-02-27 11:43:21] (step=0003023) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.5914693797691254, LR: 0.0003 +[2026-02-27 11:43:29] (step=0003024) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.5916650361964391, LR: 0.0003 +[2026-02-27 11:43:37] (step=0003025) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.5918606926237527, LR: 0.0003 +[2026-02-27 11:43:45] (step=0003026) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.5920563490510663, LR: 0.0003 +[2026-02-27 11:43:52] (step=0003027) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 0.59225200547838, LR: 0.0003 +[2026-02-27 11:44:00] (step=0003028) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.5924476619056936, LR: 0.0003 +[2026-02-27 11:44:08] (step=0003029) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.5926433183330072, LR: 0.0003 +[2026-02-27 11:44:16] (step=0003030) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.5928389747603209, LR: 0.0003 +[2026-02-27 11:44:24] (step=0003031) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 0.5930346311876346, LR: 0.0003 +[2026-02-27 11:44:32] (step=0003032) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.5932302876149481, LR: 0.0003 +[2026-02-27 11:44:39] (step=0003033) Train Loss: 0.4745, Train Steps/Sec: 0.13, Epoch: 0.5934259440422618, LR: 0.0003 +[2026-02-27 11:44:47] (step=0003034) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.5936216004695755, LR: 0.0003 +[2026-02-27 11:44:55] (step=0003035) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.593817256896889, LR: 0.0003 +[2026-02-27 11:45:03] (step=0003036) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.5940129133242027, LR: 0.0003 +[2026-02-27 11:45:11] (step=0003037) Train Loss: 0.4709, Train Steps/Sec: 0.13, Epoch: 0.5942085697515164, LR: 0.0003 +[2026-02-27 11:45:19] (step=0003038) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.5944042261788299, LR: 0.0003 +[2026-02-27 11:45:27] (step=0003039) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.5945998826061436, LR: 0.0003 +[2026-02-27 11:45:35] (step=0003040) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.5947955390334573, LR: 0.0003 +[2026-02-27 11:45:42] (step=0003041) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.5949911954607708, LR: 0.0003 +[2026-02-27 11:45:50] (step=0003042) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 0.5951868518880845, LR: 0.0003 +[2026-02-27 11:45:58] (step=0003043) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.5953825083153982, LR: 0.0003 +[2026-02-27 11:46:06] (step=0003044) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.5955781647427117, LR: 0.0003 +[2026-02-27 11:46:14] (step=0003045) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.5957738211700254, LR: 0.0003 +[2026-02-27 11:46:22] (step=0003046) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.5959694775973391, LR: 0.0003 +[2026-02-27 11:46:29] (step=0003047) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.5961651340246528, LR: 0.0003 +[2026-02-27 11:46:37] (step=0003048) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.5963607904519663, LR: 0.0003 +[2026-02-27 11:46:45] (step=0003049) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.59655644687928, LR: 0.0003 +[2026-02-27 11:46:53] (step=0003050) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.5967521033065937, LR: 0.0003 +[2026-02-27 11:47:01] (step=0003051) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 0.5969477597339072, LR: 0.0003 +[2026-02-27 11:47:09] (step=0003052) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.5971434161612209, LR: 0.0003 +[2026-02-27 11:47:16] (step=0003053) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.5973390725885346, LR: 0.0003 +[2026-02-27 11:47:24] (step=0003054) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.5975347290158481, LR: 0.0003 +[2026-02-27 11:47:32] (step=0003055) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.5977303854431618, LR: 0.0003 +[2026-02-27 11:47:40] (step=0003056) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.5979260418704755, LR: 0.0003 +[2026-02-27 11:47:48] (step=0003057) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.598121698297789, LR: 0.0003 +[2026-02-27 11:47:56] (step=0003058) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.5983173547251027, LR: 0.0003 +[2026-02-27 11:48:04] (step=0003059) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.5985130111524164, LR: 0.0003 +[2026-02-27 11:48:11] (step=0003060) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 0.59870866757973, LR: 0.0003 +[2026-02-27 11:48:19] (step=0003061) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.5989043240070436, LR: 0.0003 +[2026-02-27 11:48:27] (step=0003062) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.5990999804343573, LR: 0.0003 +[2026-02-27 11:48:35] (step=0003063) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 0.5992956368616709, LR: 0.0003 +[2026-02-27 11:48:43] (step=0003064) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.5994912932889845, LR: 0.0003 +[2026-02-27 11:48:51] (step=0003065) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.5996869497162982, LR: 0.0003 +[2026-02-27 11:48:58] (step=0003066) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.5998826061436118, LR: 0.0003 +[2026-02-27 11:49:06] (step=0003067) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.6000782625709254, LR: 0.0003 +[2026-02-27 11:49:14] (step=0003068) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.6002739189982391, LR: 0.0003 +[2026-02-27 11:49:22] (step=0003069) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.6004695754255527, LR: 0.0003 +[2026-02-27 11:49:30] (step=0003070) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.6006652318528664, LR: 0.0003 +[2026-02-27 11:49:38] (step=0003071) Train Loss: 0.4717, Train Steps/Sec: 0.13, Epoch: 0.60086088828018, LR: 0.0003 +[2026-02-27 11:49:46] (step=0003072) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.6010565447074936, LR: 0.0003 +[2026-02-27 11:49:53] (step=0003073) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.6012522011348073, LR: 0.0003 +[2026-02-27 11:50:01] (step=0003074) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.6014478575621209, LR: 0.0003 +[2026-02-27 11:50:09] (step=0003075) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.6016435139894345, LR: 0.0003 +[2026-02-27 11:50:17] (step=0003076) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 0.6018391704167482, LR: 0.0003 +[2026-02-27 11:50:25] (step=0003077) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.6020348268440618, LR: 0.0003 +[2026-02-27 11:50:33] (step=0003078) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.6022304832713755, LR: 0.0003 +[2026-02-27 11:50:41] (step=0003079) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 0.6024261396986891, LR: 0.0003 +[2026-02-27 11:50:49] (step=0003080) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.6026217961260028, LR: 0.0003 +[2026-02-27 11:50:56] (step=0003081) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.6028174525533164, LR: 0.0003 +[2026-02-27 11:51:04] (step=0003082) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.60301310898063, LR: 0.0003 +[2026-02-27 11:51:12] (step=0003083) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.6032087654079437, LR: 0.0003 +[2026-02-27 11:51:20] (step=0003084) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.6034044218352573, LR: 0.0003 +[2026-02-27 11:51:28] (step=0003085) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 0.6036000782625709, LR: 0.0003 +[2026-02-27 11:51:36] (step=0003086) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 0.6037957346898846, LR: 0.0003 +[2026-02-27 11:51:44] (step=0003087) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.6039913911171982, LR: 0.0003 +[2026-02-27 11:51:51] (step=0003088) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.6041870475445118, LR: 0.0003 +[2026-02-27 11:51:59] (step=0003089) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.6043827039718255, LR: 0.0003 +[2026-02-27 11:52:07] (step=0003090) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.6045783603991391, LR: 0.0003 +[2026-02-27 11:52:15] (step=0003091) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.6047740168264527, LR: 0.0003 +[2026-02-27 11:52:23] (step=0003092) Train Loss: 0.4746, Train Steps/Sec: 0.13, Epoch: 0.6049696732537664, LR: 0.0003 +[2026-02-27 11:52:31] (step=0003093) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.60516532968108, LR: 0.0003 +[2026-02-27 11:52:38] (step=0003094) Train Loss: 0.4718, Train Steps/Sec: 0.13, Epoch: 0.6053609861083936, LR: 0.0003 +[2026-02-27 11:52:46] (step=0003095) Train Loss: 0.4785, Train Steps/Sec: 0.13, Epoch: 0.6055566425357073, LR: 0.0003 +[2026-02-27 11:52:54] (step=0003096) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.605752298963021, LR: 0.0003 +[2026-02-27 11:53:02] (step=0003097) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.6059479553903345, LR: 0.0003 +[2026-02-27 11:53:10] (step=0003098) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.6061436118176482, LR: 0.0003 +[2026-02-27 11:53:18] (step=0003099) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.6063392682449619, LR: 0.0003 +[2026-02-27 11:53:26] (step=0003100) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.6065349246722754, LR: 0.0003 +[2026-02-27 11:53:33] (step=0003101) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.6067305810995891, LR: 0.0003 +[2026-02-27 11:53:41] (step=0003102) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 0.6069262375269028, LR: 0.0003 +[2026-02-27 11:53:49] (step=0003103) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.6071218939542163, LR: 0.0003 +[2026-02-27 11:53:57] (step=0003104) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.60731755038153, LR: 0.0003 +[2026-02-27 11:54:05] (step=0003105) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.6075132068088437, LR: 0.0003 +[2026-02-27 11:54:13] (step=0003106) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 0.6077088632361574, LR: 0.0003 +[2026-02-27 11:54:20] (step=0003107) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.6079045196634709, LR: 0.0003 +[2026-02-27 11:54:28] (step=0003108) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.6081001760907846, LR: 0.0003 +[2026-02-27 11:54:36] (step=0003109) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.6082958325180983, LR: 0.0003 +[2026-02-27 11:54:44] (step=0003110) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.6084914889454118, LR: 0.0003 +[2026-02-27 11:54:52] (step=0003111) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.6086871453727255, LR: 0.0003 +[2026-02-27 11:55:00] (step=0003112) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.6088828018000392, LR: 0.0003 +[2026-02-27 11:55:08] (step=0003113) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.6090784582273527, LR: 0.0003 +[2026-02-27 11:55:15] (step=0003114) Train Loss: 0.4784, Train Steps/Sec: 0.13, Epoch: 0.6092741146546664, LR: 0.0003 +[2026-02-27 11:55:23] (step=0003115) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.6094697710819801, LR: 0.0003 +[2026-02-27 11:55:31] (step=0003116) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 0.6096654275092936, LR: 0.0003 +[2026-02-27 11:55:39] (step=0003117) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.6098610839366073, LR: 0.0003 +[2026-02-27 11:55:47] (step=0003118) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.610056740363921, LR: 0.0003 +[2026-02-27 11:55:55] (step=0003119) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.6102523967912346, LR: 0.0003 +[2026-02-27 11:56:03] (step=0003120) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.6104480532185482, LR: 0.0003 +[2026-02-27 11:56:10] (step=0003121) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.6106437096458619, LR: 0.0003 +[2026-02-27 11:56:18] (step=0003122) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.6108393660731755, LR: 0.0003 +[2026-02-27 11:56:26] (step=0003123) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 0.6110350225004891, LR: 0.0003 +[2026-02-27 11:56:34] (step=0003124) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.6112306789278028, LR: 0.0003 +[2026-02-27 11:56:42] (step=0003125) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.6114263353551164, LR: 0.0003 +[2026-02-27 11:56:50] (step=0003126) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.61162199178243, LR: 0.0003 +[2026-02-27 11:56:57] (step=0003127) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 0.6118176482097437, LR: 0.0003 +[2026-02-27 11:57:05] (step=0003128) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.6120133046370573, LR: 0.0003 +[2026-02-27 11:57:13] (step=0003129) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 0.612208961064371, LR: 0.0003 +[2026-02-27 11:57:21] (step=0003130) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.6124046174916846, LR: 0.0003 +[2026-02-27 11:57:29] (step=0003131) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.6126002739189982, LR: 0.0003 +[2026-02-27 11:57:37] (step=0003132) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 0.6127959303463119, LR: 0.0003 +[2026-02-27 11:57:45] (step=0003133) Train Loss: 0.4744, Train Steps/Sec: 0.13, Epoch: 0.6129915867736255, LR: 0.0003 +[2026-02-27 11:57:53] (step=0003134) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.6131872432009392, LR: 0.0003 +[2026-02-27 11:58:00] (step=0003135) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 0.6133828996282528, LR: 0.0003 +[2026-02-27 11:58:08] (step=0003136) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.6135785560555664, LR: 0.0003 +[2026-02-27 11:58:16] (step=0003137) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 0.6137742124828801, LR: 0.0003 +[2026-02-27 11:58:24] (step=0003138) Train Loss: 0.4713, Train Steps/Sec: 0.13, Epoch: 0.6139698689101937, LR: 0.0003 +[2026-02-27 11:58:32] (step=0003139) Train Loss: 0.4751, Train Steps/Sec: 0.13, Epoch: 0.6141655253375073, LR: 0.0003 +[2026-02-27 11:58:40] (step=0003140) Train Loss: 0.4808, Train Steps/Sec: 0.13, Epoch: 0.614361181764821, LR: 0.0003 +[2026-02-27 11:58:47] (step=0003141) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.6145568381921346, LR: 0.0003 +[2026-02-27 11:58:55] (step=0003142) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.6147524946194483, LR: 0.0003 +[2026-02-27 11:59:03] (step=0003143) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.6149481510467619, LR: 0.0003 +[2026-02-27 11:59:11] (step=0003144) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.6151438074740755, LR: 0.0003 +[2026-02-27 11:59:19] (step=0003145) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 0.6153394639013892, LR: 0.0003 +[2026-02-27 11:59:27] (step=0003146) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.6155351203287028, LR: 0.0003 +[2026-02-27 11:59:34] (step=0003147) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 0.6157307767560164, LR: 0.0003 +[2026-02-27 11:59:42] (step=0003148) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 0.6159264331833301, LR: 0.0003 +[2026-02-27 11:59:50] (step=0003149) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.6161220896106437, LR: 0.0003 +[2026-02-27 11:59:58] (step=0003150) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 0.6163177460379573, LR: 0.0003 +[2026-02-27 12:00:06] (step=0003151) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.616513402465271, LR: 0.0003 +[2026-02-27 12:00:14] (step=0003152) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.6167090588925846, LR: 0.0003 +[2026-02-27 12:00:21] (step=0003153) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.6169047153198982, LR: 0.0003 +[2026-02-27 12:00:29] (step=0003154) Train Loss: 0.4749, Train Steps/Sec: 0.13, Epoch: 0.6171003717472119, LR: 0.0003 +[2026-02-27 12:00:37] (step=0003155) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 0.6172960281745256, LR: 0.0003 +[2026-02-27 12:00:45] (step=0003156) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 0.6174916846018391, LR: 0.0003 +[2026-02-27 12:00:53] (step=0003157) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 0.6176873410291528, LR: 0.0003 +[2026-02-27 12:01:00] (step=0003158) Train Loss: 0.4696, Train Steps/Sec: 0.13, Epoch: 0.6178829974564665, LR: 0.0003 +[2026-02-27 12:01:08] (step=0003159) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 0.61807865388378, LR: 0.0003 +[2026-02-27 12:01:16] (step=0003160) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 0.6182743103110937, LR: 0.0003 +[2026-02-27 12:01:24] (step=0003161) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.6184699667384074, LR: 0.0003 +[2026-02-27 12:01:32] (step=0003162) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.618665623165721, LR: 0.0003 +[2026-02-27 12:01:40] (step=0003163) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.6188612795930346, LR: 0.0003 +[2026-02-27 12:01:47] (step=0003164) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.6190569360203483, LR: 0.0003 +[2026-02-27 12:01:55] (step=0003165) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.619252592447662, LR: 0.0003 +[2026-02-27 12:02:03] (step=0003166) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.6194482488749755, LR: 0.0003 +[2026-02-27 12:02:11] (step=0003167) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 0.6196439053022892, LR: 0.0003 +[2026-02-27 12:02:19] (step=0003168) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.6198395617296029, LR: 0.0003 +[2026-02-27 12:02:27] (step=0003169) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.6200352181569164, LR: 0.0003 +[2026-02-27 12:02:34] (step=0003170) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 0.6202308745842301, LR: 0.0003 +[2026-02-27 12:02:42] (step=0003171) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 0.6204265310115438, LR: 0.0003 +[2026-02-27 12:02:50] (step=0003172) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.6206221874388573, LR: 0.0003 +[2026-02-27 12:02:58] (step=0003173) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 0.620817843866171, LR: 0.0003 +[2026-02-27 12:03:06] (step=0003174) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.6210135002934847, LR: 0.0003 +[2026-02-27 12:03:14] (step=0003175) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.6212091567207982, LR: 0.0003 +[2026-02-27 12:03:21] (step=0003176) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.6214048131481119, LR: 0.0003 +[2026-02-27 12:03:29] (step=0003177) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.6216004695754256, LR: 0.0003 +[2026-02-27 12:03:37] (step=0003178) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 0.6217961260027391, LR: 0.0003 +[2026-02-27 12:03:45] (step=0003179) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 0.6219917824300528, LR: 0.0003 +[2026-02-27 12:03:53] (step=0003180) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.6221874388573665, LR: 0.0003 +[2026-02-27 12:04:01] (step=0003181) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.62238309528468, LR: 0.0003 +[2026-02-27 12:04:09] (step=0003182) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.6225787517119937, LR: 0.0003 +[2026-02-27 12:04:17] (step=0003183) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.6227744081393074, LR: 0.0003 +[2026-02-27 12:04:24] (step=0003184) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.622970064566621, LR: 0.0003 +[2026-02-27 12:04:32] (step=0003185) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 0.6231657209939346, LR: 0.0003 +[2026-02-27 12:04:40] (step=0003186) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 0.6233613774212483, LR: 0.0003 +[2026-02-27 12:04:48] (step=0003187) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.6235570338485619, LR: 0.0003 +[2026-02-27 12:04:56] (step=0003188) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.6237526902758755, LR: 0.0003 +[2026-02-27 12:05:04] (step=0003189) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 0.6239483467031892, LR: 0.0003 +[2026-02-27 12:05:11] (step=0003190) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 0.6241440031305029, LR: 0.0003 +[2026-02-27 12:05:19] (step=0003191) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.6243396595578165, LR: 0.0003 +[2026-02-27 12:05:27] (step=0003192) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.6245353159851301, LR: 0.0003 +[2026-02-27 12:05:35] (step=0003193) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 0.6247309724124438, LR: 0.0003 +[2026-02-27 12:05:43] (step=0003194) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 0.6249266288397574, LR: 0.0003 +[2026-02-27 12:05:51] (step=0003195) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.625122285267071, LR: 0.0003 +[2026-02-27 12:05:58] (step=0003196) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.6253179416943847, LR: 0.0003 +[2026-02-27 12:06:06] (step=0003197) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.6255135981216983, LR: 0.0003 +[2026-02-27 12:06:14] (step=0003198) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.6257092545490119, LR: 0.0003 +[2026-02-27 12:06:22] (step=0003199) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.6259049109763256, LR: 0.0003 +[2026-02-27 12:06:30] (step=0003200) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.6261005674036392, LR: 0.0003 +[2026-02-27 12:06:38] (step=0003201) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.6262962238309528, LR: 0.0003 +[2026-02-27 12:06:46] (step=0003202) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.6264918802582665, LR: 0.0003 +[2026-02-27 12:06:53] (step=0003203) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.6266875366855801, LR: 0.0003 +[2026-02-27 12:07:01] (step=0003204) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.6268831931128938, LR: 0.0003 +[2026-02-27 12:07:09] (step=0003205) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.6270788495402074, LR: 0.0003 +[2026-02-27 12:07:17] (step=0003206) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.627274505967521, LR: 0.0003 +[2026-02-27 12:07:25] (step=0003207) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.6274701623948347, LR: 0.0003 +[2026-02-27 12:07:33] (step=0003208) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.6276658188221483, LR: 0.0003 +[2026-02-27 12:07:40] (step=0003209) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.6278614752494619, LR: 0.0003 +[2026-02-27 12:07:48] (step=0003210) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.6280571316767756, LR: 0.0003 +[2026-02-27 12:07:56] (step=0003211) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 0.6282527881040892, LR: 0.0003 +[2026-02-27 12:08:04] (step=0003212) Train Loss: 0.4730, Train Steps/Sec: 0.13, Epoch: 0.6284484445314028, LR: 0.0003 +[2026-02-27 12:08:12] (step=0003213) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.6286441009587165, LR: 0.0003 +[2026-02-27 12:08:20] (step=0003214) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.6288397573860302, LR: 0.0003 +[2026-02-27 12:08:28] (step=0003215) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.6290354138133437, LR: 0.0003 +[2026-02-27 12:08:35] (step=0003216) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.6292310702406574, LR: 0.0003 +[2026-02-27 12:08:43] (step=0003217) Train Loss: 0.4717, Train Steps/Sec: 0.13, Epoch: 0.6294267266679711, LR: 0.0003 +[2026-02-27 12:08:51] (step=0003218) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.6296223830952847, LR: 0.0003 +[2026-02-27 12:08:59] (step=0003219) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.6298180395225983, LR: 0.0003 +[2026-02-27 12:09:07] (step=0003220) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.630013695949912, LR: 0.0003 +[2026-02-27 12:09:15] (step=0003221) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.6302093523772256, LR: 0.0003 +[2026-02-27 12:09:22] (step=0003222) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.6304050088045392, LR: 0.0003 +[2026-02-27 12:09:30] (step=0003223) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.6306006652318529, LR: 0.0003 +[2026-02-27 12:09:38] (step=0003224) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.6307963216591665, LR: 0.0003 +[2026-02-27 12:09:46] (step=0003225) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.6309919780864801, LR: 0.0003 +[2026-02-27 12:09:54] (step=0003226) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.6311876345137938, LR: 0.0003 +[2026-02-27 12:10:02] (step=0003227) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.6313832909411075, LR: 0.0003 +[2026-02-27 12:10:10] (step=0003228) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.631578947368421, LR: 0.0003 +[2026-02-27 12:10:18] (step=0003229) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.6317746037957347, LR: 0.0003 +[2026-02-27 12:10:25] (step=0003230) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.6319702602230484, LR: 0.0003 +[2026-02-27 12:10:33] (step=0003231) Train Loss: 0.4655, Train Steps/Sec: 0.12, Epoch: 0.6321659166503619, LR: 0.0003 +[2026-02-27 12:10:41] (step=0003232) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.6323615730776756, LR: 0.0003 +[2026-02-27 12:10:49] (step=0003233) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.6325572295049893, LR: 0.0003 +[2026-02-27 12:10:57] (step=0003234) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.6327528859323028, LR: 0.0003 +[2026-02-27 12:11:05] (step=0003235) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.6329485423596165, LR: 0.0003 +[2026-02-27 12:11:13] (step=0003236) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.6331441987869302, LR: 0.0003 +[2026-02-27 12:11:20] (step=0003237) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.6333398552142437, LR: 0.0003 +[2026-02-27 12:11:28] (step=0003238) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.6335355116415574, LR: 0.0003 +[2026-02-27 12:11:36] (step=0003239) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.6337311680688711, LR: 0.0003 +[2026-02-27 12:11:44] (step=0003240) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.6339268244961846, LR: 0.0003 +[2026-02-27 12:11:52] (step=0003241) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 0.6341224809234983, LR: 0.0003 +[2026-02-27 12:12:00] (step=0003242) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 0.634318137350812, LR: 0.0003 +[2026-02-27 12:12:08] (step=0003243) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.6345137937781256, LR: 0.0003 +[2026-02-27 12:12:15] (step=0003244) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.6347094502054392, LR: 0.0003 +[2026-02-27 12:12:23] (step=0003245) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.6349051066327529, LR: 0.0003 +[2026-02-27 12:12:31] (step=0003246) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.6351007630600666, LR: 0.0003 +[2026-02-27 12:12:39] (step=0003247) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.6352964194873801, LR: 0.0003 +[2026-02-27 12:12:47] (step=0003248) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.6354920759146938, LR: 0.0003 +[2026-02-27 12:12:55] (step=0003249) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.6356877323420075, LR: 0.0003 +[2026-02-27 12:13:02] (step=0003250) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.635883388769321, LR: 0.0003 +[2026-02-27 12:13:10] (step=0003251) Train Loss: 0.4721, Train Steps/Sec: 0.13, Epoch: 0.6360790451966347, LR: 0.0003 +[2026-02-27 12:13:18] (step=0003252) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 0.6362747016239484, LR: 0.0003 +[2026-02-27 12:13:26] (step=0003253) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 0.636470358051262, LR: 0.0003 +[2026-02-27 12:13:34] (step=0003254) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 0.6366660144785756, LR: 0.0003 +[2026-02-27 12:13:42] (step=0003255) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.6368616709058893, LR: 0.0003 +[2026-02-27 12:13:50] (step=0003256) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.6370573273332029, LR: 0.0003 +[2026-02-27 12:13:57] (step=0003257) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.6372529837605165, LR: 0.0003 +[2026-02-27 12:14:05] (step=0003258) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.6374486401878302, LR: 0.0003 +[2026-02-27 12:14:13] (step=0003259) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 0.6376442966151438, LR: 0.0003 +[2026-02-27 12:14:21] (step=0003260) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 0.6378399530424574, LR: 0.0003 +[2026-02-27 12:14:29] (step=0003261) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.6380356094697711, LR: 0.0003 +[2026-02-27 12:14:37] (step=0003262) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 0.6382312658970847, LR: 0.0003 +[2026-02-27 12:14:44] (step=0003263) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.6384269223243983, LR: 0.0003 +[2026-02-27 12:14:52] (step=0003264) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.638622578751712, LR: 0.0003 +[2026-02-27 12:15:00] (step=0003265) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.6388182351790256, LR: 0.0003 +[2026-02-27 12:15:08] (step=0003266) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 0.6390138916063393, LR: 0.0003 +[2026-02-27 12:15:16] (step=0003267) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.6392095480336529, LR: 0.0003 +[2026-02-27 12:15:24] (step=0003268) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.6394052044609665, LR: 0.0003 +[2026-02-27 12:15:32] (step=0003269) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.6396008608882802, LR: 0.0003 +[2026-02-27 12:15:39] (step=0003270) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 0.6397965173155938, LR: 0.0003 +[2026-02-27 12:15:47] (step=0003271) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.6399921737429074, LR: 0.0003 +[2026-02-27 12:15:55] (step=0003272) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 0.6401878301702211, LR: 0.0003 +[2026-02-27 12:16:03] (step=0003273) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.6403834865975347, LR: 0.0003 +[2026-02-27 12:16:11] (step=0003274) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.6405791430248484, LR: 0.0003 +[2026-02-27 12:16:19] (step=0003275) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.640774799452162, LR: 0.0003 +[2026-02-27 12:16:27] (step=0003276) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.6409704558794757, LR: 0.0003 +[2026-02-27 12:16:34] (step=0003277) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.6411661123067893, LR: 0.0003 +[2026-02-27 12:16:42] (step=0003278) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 0.6413617687341029, LR: 0.0003 +[2026-02-27 12:16:50] (step=0003279) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.6415574251614166, LR: 0.0003 +[2026-02-27 12:16:58] (step=0003280) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.6417530815887302, LR: 0.0003 +[2026-02-27 12:17:06] (step=0003281) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.6419487380160438, LR: 0.0003 +[2026-02-27 12:17:14] (step=0003282) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.6421443944433575, LR: 0.0003 +[2026-02-27 12:17:22] (step=0003283) Train Loss: 0.4751, Train Steps/Sec: 0.13, Epoch: 0.6423400508706711, LR: 0.0003 +[2026-02-27 12:17:29] (step=0003284) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 0.6425357072979847, LR: 0.0003 +[2026-02-27 12:17:37] (step=0003285) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.6427313637252984, LR: 0.0003 +[2026-02-27 12:17:45] (step=0003286) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.642927020152612, LR: 0.0003 +[2026-02-27 12:17:53] (step=0003287) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.6431226765799256, LR: 0.0003 +[2026-02-27 12:18:01] (step=0003288) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 0.6433183330072393, LR: 0.0003 +[2026-02-27 12:18:09] (step=0003289) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.643513989434553, LR: 0.0003 +[2026-02-27 12:18:17] (step=0003290) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 0.6437096458618665, LR: 0.0003 +[2026-02-27 12:18:24] (step=0003291) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.6439053022891802, LR: 0.0003 +[2026-02-27 12:18:32] (step=0003292) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.6441009587164939, LR: 0.0003 +[2026-02-27 12:18:40] (step=0003293) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.6442966151438074, LR: 0.0003 +[2026-02-27 12:18:48] (step=0003294) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.6444922715711211, LR: 0.0003 +[2026-02-27 12:18:56] (step=0003295) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.6446879279984348, LR: 0.0003 +[2026-02-27 12:19:04] (step=0003296) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.6448835844257483, LR: 0.0003 +[2026-02-27 12:19:11] (step=0003297) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.645079240853062, LR: 0.0003 +[2026-02-27 12:19:19] (step=0003298) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 0.6452748972803757, LR: 0.0003 +[2026-02-27 12:19:27] (step=0003299) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 0.6454705537076892, LR: 0.0003 +[2026-02-27 12:19:35] (step=0003300) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 0.6456662101350029, LR: 0.0003 +[2026-02-27 12:19:43] (step=0003301) Train Loss: 0.4693, Train Steps/Sec: 0.13, Epoch: 0.6458618665623166, LR: 0.0003 +[2026-02-27 12:19:51] (step=0003302) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.6460575229896303, LR: 0.0003 +[2026-02-27 12:19:59] (step=0003303) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 0.6462531794169438, LR: 0.0003 +[2026-02-27 12:20:06] (step=0003304) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.6464488358442575, LR: 0.0003 +[2026-02-27 12:20:14] (step=0003305) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.6466444922715712, LR: 0.0003 +[2026-02-27 12:20:22] (step=0003306) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.6468401486988847, LR: 0.0003 +[2026-02-27 12:20:30] (step=0003307) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.6470358051261984, LR: 0.0003 +[2026-02-27 12:20:38] (step=0003308) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 0.6472314615535121, LR: 0.0003 +[2026-02-27 12:20:46] (step=0003309) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.6474271179808256, LR: 0.0003 +[2026-02-27 12:20:53] (step=0003310) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.6476227744081393, LR: 0.0003 +[2026-02-27 12:21:01] (step=0003311) Train Loss: 0.4763, Train Steps/Sec: 0.13, Epoch: 0.647818430835453, LR: 0.0003 +[2026-02-27 12:21:09] (step=0003312) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.6480140872627665, LR: 0.0003 +[2026-02-27 12:21:17] (step=0003313) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 0.6482097436900802, LR: 0.0003 +[2026-02-27 12:21:25] (step=0003314) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 0.6484054001173939, LR: 0.0003 +[2026-02-27 12:21:33] (step=0003315) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 0.6486010565447075, LR: 0.0003 +[2026-02-27 12:21:41] (step=0003316) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.6487967129720211, LR: 0.0003 +[2026-02-27 12:21:49] (step=0003317) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.6489923693993348, LR: 0.0003 +[2026-02-27 12:21:56] (step=0003318) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.6491880258266484, LR: 0.0003 +[2026-02-27 12:22:04] (step=0003319) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.649383682253962, LR: 0.0003 +[2026-02-27 12:22:12] (step=0003320) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 0.6495793386812757, LR: 0.0003 +[2026-02-27 12:22:20] (step=0003321) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 0.6497749951085893, LR: 0.0003 +[2026-02-27 12:22:28] (step=0003322) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.6499706515359029, LR: 0.0003 +[2026-02-27 12:22:36] (step=0003323) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.6501663079632166, LR: 0.0003 +[2026-02-27 12:22:43] (step=0003324) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 0.6503619643905302, LR: 0.0003 +[2026-02-27 12:22:51] (step=0003325) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.6505576208178439, LR: 0.0003 +[2026-02-27 12:22:59] (step=0003326) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 0.6507532772451575, LR: 0.0003 +[2026-02-27 12:23:07] (step=0003327) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 0.6509489336724711, LR: 0.0003 +[2026-02-27 12:23:15] (step=0003328) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 0.6511445900997848, LR: 0.0003 +[2026-02-27 12:23:23] (step=0003329) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.6513402465270984, LR: 0.0003 +[2026-02-27 12:23:31] (step=0003330) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.6515359029544121, LR: 0.0003 +[2026-02-27 12:23:38] (step=0003331) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 0.6517315593817257, LR: 0.0003 +[2026-02-27 12:23:46] (step=0003332) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.6519272158090393, LR: 0.0003 +[2026-02-27 12:23:54] (step=0003333) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.652122872236353, LR: 0.0003 +[2026-02-27 12:24:02] (step=0003334) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.6523185286636666, LR: 0.0003 +[2026-02-27 12:24:10] (step=0003335) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.6525141850909802, LR: 0.0003 +[2026-02-27 12:24:18] (step=0003336) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.6527098415182939, LR: 0.0003 +[2026-02-27 12:24:26] (step=0003337) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.6529054979456075, LR: 0.0003 +[2026-02-27 12:24:33] (step=0003338) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.6531011543729212, LR: 0.0003 +[2026-02-27 12:24:41] (step=0003339) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 0.6532968108002348, LR: 0.0003 +[2026-02-27 12:24:49] (step=0003340) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.6534924672275484, LR: 0.0003 +[2026-02-27 12:24:57] (step=0003341) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 0.6536881236548621, LR: 0.0003 +[2026-02-27 12:25:05] (step=0003342) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 0.6538837800821757, LR: 0.0003 +[2026-02-27 12:25:13] (step=0003343) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 0.6540794365094893, LR: 0.0003 +[2026-02-27 12:25:20] (step=0003344) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.654275092936803, LR: 0.0003 +[2026-02-27 12:25:28] (step=0003345) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.6544707493641166, LR: 0.0003 +[2026-02-27 12:25:36] (step=0003346) Train Loss: 0.4703, Train Steps/Sec: 0.13, Epoch: 0.6546664057914302, LR: 0.0003 +[2026-02-27 12:25:44] (step=0003347) Train Loss: 0.4714, Train Steps/Sec: 0.13, Epoch: 0.6548620622187439, LR: 0.0003 +[2026-02-27 12:25:52] (step=0003348) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.6550577186460576, LR: 0.0003 +[2026-02-27 12:26:00] (step=0003349) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.6552533750733711, LR: 0.0003 +[2026-02-27 12:26:07] (step=0003350) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.6554490315006848, LR: 0.0003 +[2026-02-27 12:26:15] (step=0003351) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.6556446879279985, LR: 0.0003 +[2026-02-27 12:26:23] (step=0003352) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.655840344355312, LR: 0.0003 +[2026-02-27 12:26:31] (step=0003353) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.6560360007826257, LR: 0.0003 +[2026-02-27 12:26:39] (step=0003354) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.6562316572099394, LR: 0.0003 +[2026-02-27 12:26:47] (step=0003355) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.6564273136372529, LR: 0.0003 +[2026-02-27 12:26:55] (step=0003356) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 0.6566229700645666, LR: 0.0003 +[2026-02-27 12:27:02] (step=0003357) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.6568186264918803, LR: 0.0003 +[2026-02-27 12:27:10] (step=0003358) Train Loss: 0.4739, Train Steps/Sec: 0.13, Epoch: 0.657014282919194, LR: 0.0003 +[2026-02-27 12:27:18] (step=0003359) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.6572099393465075, LR: 0.0003 +[2026-02-27 12:27:26] (step=0003360) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 0.6574055957738212, LR: 0.0003 +[2026-02-27 12:27:34] (step=0003361) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.6576012522011349, LR: 0.0003 +[2026-02-27 12:27:42] (step=0003362) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.6577969086284484, LR: 0.0003 +[2026-02-27 12:27:50] (step=0003363) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 0.6579925650557621, LR: 0.0003 +[2026-02-27 12:27:57] (step=0003364) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.6581882214830758, LR: 0.0003 +[2026-02-27 12:28:05] (step=0003365) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.6583838779103893, LR: 0.0003 +[2026-02-27 12:28:13] (step=0003366) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.658579534337703, LR: 0.0003 +[2026-02-27 12:28:21] (step=0003367) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 0.6587751907650167, LR: 0.0003 +[2026-02-27 12:28:29] (step=0003368) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.6589708471923302, LR: 0.0003 +[2026-02-27 12:28:37] (step=0003369) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 0.6591665036196439, LR: 0.0003 +[2026-02-27 12:28:45] (step=0003370) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 0.6593621600469576, LR: 0.0003 +[2026-02-27 12:28:52] (step=0003371) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.6595578164742711, LR: 0.0003 +[2026-02-27 12:29:00] (step=0003372) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.6597534729015848, LR: 0.0003 +[2026-02-27 12:29:08] (step=0003373) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.6599491293288985, LR: 0.0003 +[2026-02-27 12:29:16] (step=0003374) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.660144785756212, LR: 0.0003 +[2026-02-27 12:29:24] (step=0003375) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.6603404421835257, LR: 0.0003 +[2026-02-27 12:29:32] (step=0003376) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.6605360986108394, LR: 0.0003 +[2026-02-27 12:29:40] (step=0003377) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.660731755038153, LR: 0.0003 +[2026-02-27 12:29:48] (step=0003378) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 0.6609274114654666, LR: 0.0003 +[2026-02-27 12:29:55] (step=0003379) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.6611230678927803, LR: 0.0003 +[2026-02-27 12:30:03] (step=0003380) Train Loss: 0.4693, Train Steps/Sec: 0.13, Epoch: 0.6613187243200939, LR: 0.0003 +[2026-02-27 12:30:11] (step=0003381) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.6615143807474075, LR: 0.0003 +[2026-02-27 12:30:19] (step=0003382) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.6617100371747212, LR: 0.0003 +[2026-02-27 12:30:27] (step=0003383) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 0.6619056936020348, LR: 0.0003 +[2026-02-27 12:30:35] (step=0003384) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.6621013500293484, LR: 0.0003 +[2026-02-27 12:30:42] (step=0003385) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.6622970064566621, LR: 0.0003 +[2026-02-27 12:30:50] (step=0003386) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.6624926628839758, LR: 0.0003 +[2026-02-27 12:30:58] (step=0003387) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.6626883193112894, LR: 0.0003 +[2026-02-27 12:31:06] (step=0003388) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.662883975738603, LR: 0.0003 +[2026-02-27 12:31:14] (step=0003389) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.6630796321659167, LR: 0.0003 +[2026-02-27 12:31:22] (step=0003390) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.6632752885932303, LR: 0.0003 +[2026-02-27 12:31:30] (step=0003391) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 0.6634709450205439, LR: 0.0003 +[2026-02-27 12:31:37] (step=0003392) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.6636666014478576, LR: 0.0003 +[2026-02-27 12:31:45] (step=0003393) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.6638622578751712, LR: 0.0003 +[2026-02-27 12:31:53] (step=0003394) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.6640579143024848, LR: 0.0003 +[2026-02-27 12:32:01] (step=0003395) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.6642535707297985, LR: 0.0003 +[2026-02-27 12:32:09] (step=0003396) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 0.6644492271571121, LR: 0.0003 +[2026-02-27 12:32:17] (step=0003397) Train Loss: 0.4702, Train Steps/Sec: 0.13, Epoch: 0.6646448835844257, LR: 0.0003 +[2026-02-27 12:32:25] (step=0003398) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.6648405400117394, LR: 0.0003 +[2026-02-27 12:32:32] (step=0003399) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 0.665036196439053, LR: 0.0003 +[2026-02-27 12:32:40] (step=0003400) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.6652318528663667, LR: 0.0003 +[2026-02-27 12:32:48] (step=0003401) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.6654275092936803, LR: 0.0003 +[2026-02-27 12:32:56] (step=0003402) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.6656231657209939, LR: 0.0003 +[2026-02-27 12:33:04] (step=0003403) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.6658188221483076, LR: 0.0003 +[2026-02-27 12:33:12] (step=0003404) Train Loss: 0.4737, Train Steps/Sec: 0.13, Epoch: 0.6660144785756212, LR: 0.0003 +[2026-02-27 12:33:19] (step=0003405) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 0.6662101350029348, LR: 0.0003 +[2026-02-27 12:33:27] (step=0003406) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.6664057914302485, LR: 0.0003 +[2026-02-27 12:33:35] (step=0003407) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.6666014478575621, LR: 0.0003 +[2026-02-27 12:33:43] (step=0003408) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.6667971042848757, LR: 0.0003 +[2026-02-27 12:33:51] (step=0003409) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.6669927607121894, LR: 0.0003 +[2026-02-27 12:33:59] (step=0003410) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 0.667188417139503, LR: 0.0003 +[2026-02-27 12:34:06] (step=0003411) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 0.6673840735668166, LR: 0.0003 +[2026-02-27 12:34:14] (step=0003412) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.6675797299941303, LR: 0.0003 +[2026-02-27 12:34:22] (step=0003413) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.667775386421444, LR: 0.0003 +[2026-02-27 12:34:30] (step=0003414) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.6679710428487576, LR: 0.0003 +[2026-02-27 12:34:38] (step=0003415) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.6681666992760712, LR: 0.0003 +[2026-02-27 12:34:46] (step=0003416) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 0.6683623557033849, LR: 0.0003 +[2026-02-27 12:34:54] (step=0003417) Train Loss: 0.4714, Train Steps/Sec: 0.13, Epoch: 0.6685580121306985, LR: 0.0003 +[2026-02-27 12:35:01] (step=0003418) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.6687536685580121, LR: 0.0003 +[2026-02-27 12:35:09] (step=0003419) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 0.6689493249853258, LR: 0.0003 +[2026-02-27 12:35:17] (step=0003420) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.6691449814126395, LR: 0.0003 +[2026-02-27 12:35:25] (step=0003421) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.669340637839953, LR: 0.0003 +[2026-02-27 12:35:33] (step=0003422) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.6695362942672667, LR: 0.0003 +[2026-02-27 12:35:41] (step=0003423) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.6697319506945804, LR: 0.0003 +[2026-02-27 12:35:49] (step=0003424) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.6699276071218939, LR: 0.0003 +[2026-02-27 12:35:56] (step=0003425) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 0.6701232635492076, LR: 0.0003 +[2026-02-27 12:36:04] (step=0003426) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.6703189199765213, LR: 0.0003 +[2026-02-27 12:36:12] (step=0003427) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.6705145764038348, LR: 0.0003 +[2026-02-27 12:36:20] (step=0003428) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 0.6707102328311485, LR: 0.0003 +[2026-02-27 12:36:28] (step=0003429) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 0.6709058892584622, LR: 0.0003 +[2026-02-27 12:36:36] (step=0003430) Train Loss: 0.4733, Train Steps/Sec: 0.13, Epoch: 0.6711015456857757, LR: 0.0003 +[2026-02-27 12:36:44] (step=0003431) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 0.6712972021130894, LR: 0.0003 +[2026-02-27 12:36:52] (step=0003432) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.6714928585404031, LR: 0.0003 +[2026-02-27 12:36:59] (step=0003433) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.6716885149677166, LR: 0.0003 +[2026-02-27 12:37:07] (step=0003434) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.6718841713950303, LR: 0.0003 +[2026-02-27 12:37:15] (step=0003435) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.672079827822344, LR: 0.0003 +[2026-02-27 12:37:23] (step=0003436) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.6722754842496576, LR: 0.0003 +[2026-02-27 12:37:31] (step=0003437) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.6724711406769712, LR: 0.0003 +[2026-02-27 12:37:39] (step=0003438) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.6726667971042849, LR: 0.0003 +[2026-02-27 12:37:46] (step=0003439) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.6728624535315985, LR: 0.0003 +[2026-02-27 12:37:54] (step=0003440) Train Loss: 0.4733, Train Steps/Sec: 0.13, Epoch: 0.6730581099589121, LR: 0.0003 +[2026-02-27 12:38:02] (step=0003441) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 0.6732537663862258, LR: 0.0003 +[2026-02-27 12:38:10] (step=0003442) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.6734494228135395, LR: 0.0003 +[2026-02-27 12:38:18] (step=0003443) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 0.673645079240853, LR: 0.0003 +[2026-02-27 12:38:26] (step=0003444) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 0.6738407356681667, LR: 0.0003 +[2026-02-27 12:38:33] (step=0003445) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.6740363920954804, LR: 0.0003 +[2026-02-27 12:38:41] (step=0003446) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 0.674232048522794, LR: 0.0003 +[2026-02-27 12:38:49] (step=0003447) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.6744277049501076, LR: 0.0003 +[2026-02-27 12:38:57] (step=0003448) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 0.6746233613774213, LR: 0.0003 +[2026-02-27 12:39:05] (step=0003449) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.6748190178047349, LR: 0.0003 +[2026-02-27 12:39:13] (step=0003450) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.6750146742320485, LR: 0.0003 +[2026-02-27 12:39:20] (step=0003451) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.6752103306593622, LR: 0.0003 +[2026-02-27 12:39:28] (step=0003452) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.6754059870866758, LR: 0.0003 +[2026-02-27 12:39:36] (step=0003453) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.6756016435139894, LR: 0.0003 +[2026-02-27 12:39:44] (step=0003454) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.6757972999413031, LR: 0.0003 +[2026-02-27 12:39:52] (step=0003455) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.6759929563686167, LR: 0.0003 +[2026-02-27 12:40:00] (step=0003456) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.6761886127959303, LR: 0.0003 +[2026-02-27 12:40:08] (step=0003457) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.676384269223244, LR: 0.0003 +[2026-02-27 12:40:15] (step=0003458) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 0.6765799256505576, LR: 0.0003 +[2026-02-27 12:40:23] (step=0003459) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 0.6767755820778713, LR: 0.0003 +[2026-02-27 12:40:31] (step=0003460) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.6769712385051849, LR: 0.0003 +[2026-02-27 12:40:39] (step=0003461) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.6771668949324985, LR: 0.0003 +[2026-02-27 12:40:47] (step=0003462) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.6773625513598122, LR: 0.0003 +[2026-02-27 12:40:55] (step=0003463) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.6775582077871258, LR: 0.0003 +[2026-02-27 12:41:02] (step=0003464) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.6777538642144394, LR: 0.0003 +[2026-02-27 12:41:10] (step=0003465) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 0.6779495206417531, LR: 0.0003 +[2026-02-27 12:41:18] (step=0003466) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 0.6781451770690667, LR: 0.0003 +[2026-02-27 12:41:26] (step=0003467) Train Loss: 0.4542, Train Steps/Sec: 0.12, Epoch: 0.6783408334963803, LR: 0.0003 +[2026-02-27 12:41:34] (step=0003468) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.678536489923694, LR: 0.0003 +[2026-02-27 12:41:42] (step=0003469) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 0.6787321463510076, LR: 0.0003 +[2026-02-27 12:41:50] (step=0003470) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 0.6789278027783213, LR: 0.0003 +[2026-02-27 12:41:58] (step=0003471) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.6791234592056349, LR: 0.0003 +[2026-02-27 12:42:05] (step=0003472) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 0.6793191156329486, LR: 0.0003 +[2026-02-27 12:42:13] (step=0003473) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.6795147720602622, LR: 0.0003 +[2026-02-27 12:42:21] (step=0003474) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.6797104284875758, LR: 0.0003 +[2026-02-27 12:42:29] (step=0003475) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 0.6799060849148895, LR: 0.0003 +[2026-02-27 12:42:37] (step=0003476) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 0.6801017413422031, LR: 0.0003 +[2026-02-27 12:42:45] (step=0003477) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 0.6802973977695167, LR: 0.0003 +[2026-02-27 12:42:53] (step=0003478) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.6804930541968304, LR: 0.0003 +[2026-02-27 12:43:01] (step=0003479) Train Loss: 0.4585, Train Steps/Sec: 0.12, Epoch: 0.680688710624144, LR: 0.0003 +[2026-02-27 12:43:08] (step=0003480) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.6808843670514576, LR: 0.0003 +[2026-02-27 12:43:16] (step=0003481) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.6810800234787713, LR: 0.0003 +[2026-02-27 12:43:24] (step=0003482) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.681275679906085, LR: 0.0003 +[2026-02-27 12:43:32] (step=0003483) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.6814713363333985, LR: 0.0003 +[2026-02-27 12:43:40] (step=0003484) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 0.6816669927607122, LR: 0.0003 +[2026-02-27 12:43:48] (step=0003485) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.6818626491880259, LR: 0.0003 +[2026-02-27 12:43:55] (step=0003486) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.6820583056153394, LR: 0.0003 +[2026-02-27 12:44:03] (step=0003487) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.6822539620426531, LR: 0.0003 +[2026-02-27 12:44:11] (step=0003488) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.6824496184699668, LR: 0.0003 +[2026-02-27 12:44:19] (step=0003489) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 0.6826452748972803, LR: 0.0003 +[2026-02-27 12:44:27] (step=0003490) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.682840931324594, LR: 0.0003 +[2026-02-27 12:44:35] (step=0003491) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 0.6830365877519077, LR: 0.0003 +[2026-02-27 12:44:43] (step=0003492) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.6832322441792212, LR: 0.0003 +[2026-02-27 12:44:50] (step=0003493) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 0.6834279006065349, LR: 0.0003 +[2026-02-27 12:44:58] (step=0003494) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.6836235570338486, LR: 0.0003 +[2026-02-27 12:45:06] (step=0003495) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.6838192134611621, LR: 0.0003 +[2026-02-27 12:45:14] (step=0003496) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 0.6840148698884758, LR: 0.0003 +[2026-02-27 12:45:22] (step=0003497) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.6842105263157895, LR: 0.0003 +[2026-02-27 12:45:30] (step=0003498) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.6844061827431032, LR: 0.0003 +[2026-02-27 12:45:38] (step=0003499) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.6846018391704167, LR: 0.0003 +[2026-02-27 12:45:45] (step=0003500) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 0.6847974955977304, LR: 0.0003 +[2026-02-27 12:45:45] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0003500/ +[2026-02-27 12:45:53] (step=0003501) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.6849931520250441, LR: 0.0003 +[2026-02-27 12:46:01] (step=0003502) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.6851888084523576, LR: 0.0003 +[2026-02-27 12:46:09] (step=0003503) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 0.6853844648796713, LR: 0.0003 +[2026-02-27 12:46:17] (step=0003504) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 0.685580121306985, LR: 0.0003 +[2026-02-27 12:46:25] (step=0003505) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 0.6857757777342985, LR: 0.0003 +[2026-02-27 12:46:32] (step=0003506) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.6859714341616122, LR: 0.0003 +[2026-02-27 12:46:40] (step=0003507) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.6861670905889259, LR: 0.0003 +[2026-02-27 12:46:48] (step=0003508) Train Loss: 0.4721, Train Steps/Sec: 0.13, Epoch: 0.6863627470162394, LR: 0.0003 +[2026-02-27 12:46:56] (step=0003509) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.6865584034435531, LR: 0.0003 +[2026-02-27 12:47:04] (step=0003510) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.6867540598708668, LR: 0.0003 +[2026-02-27 12:47:12] (step=0003511) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.6869497162981804, LR: 0.0003 +[2026-02-27 12:47:20] (step=0003512) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.687145372725494, LR: 0.0003 +[2026-02-27 12:47:28] (step=0003513) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.6873410291528077, LR: 0.0003 +[2026-02-27 12:47:35] (step=0003514) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 0.6875366855801213, LR: 0.0003 +[2026-02-27 12:47:43] (step=0003515) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.6877323420074349, LR: 0.0003 +[2026-02-27 12:47:51] (step=0003516) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.6879279984347486, LR: 0.0003 +[2026-02-27 12:47:59] (step=0003517) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.6881236548620622, LR: 0.0003 +[2026-02-27 12:48:07] (step=0003518) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 0.6883193112893758, LR: 0.0003 +[2026-02-27 12:48:15] (step=0003519) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.6885149677166895, LR: 0.0003 +[2026-02-27 12:48:22] (step=0003520) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 0.6887106241440031, LR: 0.0003 +[2026-02-27 12:48:30] (step=0003521) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.6889062805713168, LR: 0.0003 +[2026-02-27 12:48:38] (step=0003522) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.6891019369986304, LR: 0.0003 +[2026-02-27 12:48:46] (step=0003523) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.689297593425944, LR: 0.0003 +[2026-02-27 12:48:54] (step=0003524) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.6894932498532577, LR: 0.0003 +[2026-02-27 12:49:02] (step=0003525) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.6896889062805713, LR: 0.0003 +[2026-02-27 12:49:10] (step=0003526) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 0.689884562707885, LR: 0.0003 +[2026-02-27 12:49:18] (step=0003527) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 0.6900802191351986, LR: 0.0003 +[2026-02-27 12:49:25] (step=0003528) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 0.6902758755625122, LR: 0.0003 +[2026-02-27 12:49:33] (step=0003529) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.6904715319898259, LR: 0.0003 +[2026-02-27 12:49:41] (step=0003530) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.6906671884171395, LR: 0.0003 +[2026-02-27 12:49:49] (step=0003531) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.6908628448444532, LR: 0.0003 +[2026-02-27 12:49:57] (step=0003532) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 0.6910585012717668, LR: 0.0003 +[2026-02-27 12:50:05] (step=0003533) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.6912541576990804, LR: 0.0003 +[2026-02-27 12:50:12] (step=0003534) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 0.6914498141263941, LR: 0.0003 +[2026-02-27 12:50:20] (step=0003535) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 0.6916454705537077, LR: 0.0003 +[2026-02-27 12:50:28] (step=0003536) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.6918411269810213, LR: 0.0003 +[2026-02-27 12:50:36] (step=0003537) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.692036783408335, LR: 0.0003 +[2026-02-27 12:50:44] (step=0003538) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.6922324398356486, LR: 0.0003 +[2026-02-27 12:50:52] (step=0003539) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.6924280962629622, LR: 0.0003 +[2026-02-27 12:51:00] (step=0003540) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.6926237526902759, LR: 0.0003 +[2026-02-27 12:51:07] (step=0003541) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.6928194091175895, LR: 0.0003 +[2026-02-27 12:51:15] (step=0003542) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 0.6930150655449031, LR: 0.0003 +[2026-02-27 12:51:23] (step=0003543) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.6932107219722168, LR: 0.0003 +[2026-02-27 12:51:31] (step=0003544) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.6934063783995305, LR: 0.0003 +[2026-02-27 12:51:39] (step=0003545) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 0.693602034826844, LR: 0.0003 +[2026-02-27 12:51:47] (step=0003546) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.6937976912541577, LR: 0.0003 +[2026-02-27 12:51:55] (step=0003547) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 0.6939933476814714, LR: 0.0003 +[2026-02-27 12:52:02] (step=0003548) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.6941890041087849, LR: 0.0003 +[2026-02-27 12:52:10] (step=0003549) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.6943846605360986, LR: 0.0003 +[2026-02-27 12:52:18] (step=0003550) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.6945803169634123, LR: 0.0003 +[2026-02-27 12:52:26] (step=0003551) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.6947759733907258, LR: 0.0003 +[2026-02-27 12:52:34] (step=0003552) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.6949716298180395, LR: 0.0003 +[2026-02-27 12:52:42] (step=0003553) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.6951672862453532, LR: 0.0003 +[2026-02-27 12:52:49] (step=0003554) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.6953629426726669, LR: 0.0003 +[2026-02-27 12:52:57] (step=0003555) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.6955585990999804, LR: 0.0003 +[2026-02-27 12:53:05] (step=0003556) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.6957542555272941, LR: 0.0003 +[2026-02-27 12:53:13] (step=0003557) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.6959499119546078, LR: 0.0003 +[2026-02-27 12:53:21] (step=0003558) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.6961455683819213, LR: 0.0003 +[2026-02-27 12:53:29] (step=0003559) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.696341224809235, LR: 0.0003 +[2026-02-27 12:53:37] (step=0003560) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.6965368812365487, LR: 0.0003 +[2026-02-27 12:53:45] (step=0003561) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.6967325376638622, LR: 0.0003 +[2026-02-27 12:53:52] (step=0003562) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 0.6969281940911759, LR: 0.0003 +[2026-02-27 12:54:00] (step=0003563) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 0.6971238505184896, LR: 0.0003 +[2026-02-27 12:54:08] (step=0003564) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.6973195069458031, LR: 0.0003 +[2026-02-27 12:54:16] (step=0003565) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.6975151633731168, LR: 0.0003 +[2026-02-27 12:54:24] (step=0003566) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.6977108198004305, LR: 0.0003 +[2026-02-27 12:54:32] (step=0003567) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 0.697906476227744, LR: 0.0003 +[2026-02-27 12:54:40] (step=0003568) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 0.6981021326550577, LR: 0.0003 +[2026-02-27 12:54:47] (step=0003569) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 0.6982977890823714, LR: 0.0003 +[2026-02-27 12:54:55] (step=0003570) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.698493445509685, LR: 0.0003 +[2026-02-27 12:55:03] (step=0003571) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.6986891019369986, LR: 0.0003 +[2026-02-27 12:55:11] (step=0003572) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.6988847583643123, LR: 0.0003 +[2026-02-27 12:55:19] (step=0003573) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.6990804147916259, LR: 0.0003 +[2026-02-27 12:55:27] (step=0003574) Train Loss: 0.4732, Train Steps/Sec: 0.13, Epoch: 0.6992760712189395, LR: 0.0003 +[2026-02-27 12:55:35] (step=0003575) Train Loss: 0.4676, Train Steps/Sec: 0.12, Epoch: 0.6994717276462532, LR: 0.0003 +[2026-02-27 12:55:42] (step=0003576) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.6996673840735668, LR: 0.0003 +[2026-02-27 12:55:50] (step=0003577) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 0.6998630405008804, LR: 0.0003 +[2026-02-27 12:55:58] (step=0003578) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.7000586969281941, LR: 0.0003 +[2026-02-27 12:56:06] (step=0003579) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.7002543533555077, LR: 0.0003 +[2026-02-27 12:56:14] (step=0003580) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.7004500097828213, LR: 0.0003 +[2026-02-27 12:56:22] (step=0003581) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.700645666210135, LR: 0.0003 +[2026-02-27 12:56:30] (step=0003582) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 0.7008413226374487, LR: 0.0003 +[2026-02-27 12:56:37] (step=0003583) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.7010369790647623, LR: 0.0003 +[2026-02-27 12:56:45] (step=0003584) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.7012326354920759, LR: 0.0003 +[2026-02-27 12:56:53] (step=0003585) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 0.7014282919193896, LR: 0.0003 +[2026-02-27 12:57:01] (step=0003586) Train Loss: 0.4714, Train Steps/Sec: 0.13, Epoch: 0.7016239483467032, LR: 0.0003 +[2026-02-27 12:57:09] (step=0003587) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.7018196047740168, LR: 0.0003 +[2026-02-27 12:57:17] (step=0003588) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.7020152612013305, LR: 0.0003 +[2026-02-27 12:57:25] (step=0003589) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.7022109176286441, LR: 0.0003 +[2026-02-27 12:57:32] (step=0003590) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 0.7024065740559577, LR: 0.0003 +[2026-02-27 12:57:40] (step=0003591) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.7026022304832714, LR: 0.0003 +[2026-02-27 12:57:48] (step=0003592) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.702797886910585, LR: 0.0003 +[2026-02-27 12:57:56] (step=0003593) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.7029935433378987, LR: 0.0003 +[2026-02-27 12:58:04] (step=0003594) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.7031891997652123, LR: 0.0003 +[2026-02-27 12:58:12] (step=0003595) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.7033848561925259, LR: 0.0003 +[2026-02-27 12:58:19] (step=0003596) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 0.7035805126198396, LR: 0.0003 +[2026-02-27 12:58:27] (step=0003597) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.7037761690471532, LR: 0.0003 +[2026-02-27 12:58:35] (step=0003598) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.7039718254744668, LR: 0.0003 +[2026-02-27 12:58:43] (step=0003599) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 0.7041674819017805, LR: 0.0003 +[2026-02-27 12:58:51] (step=0003600) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.7043631383290941, LR: 0.0003 +[2026-02-27 12:58:59] (step=0003601) Train Loss: 0.4696, Train Steps/Sec: 0.13, Epoch: 0.7045587947564077, LR: 0.0003 +[2026-02-27 12:59:07] (step=0003602) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.7047544511837214, LR: 0.0003 +[2026-02-27 12:59:14] (step=0003603) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.704950107611035, LR: 0.0003 +[2026-02-27 12:59:22] (step=0003604) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 0.7051457640383486, LR: 0.0003 +[2026-02-27 12:59:30] (step=0003605) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.7053414204656623, LR: 0.0003 +[2026-02-27 12:59:38] (step=0003606) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 0.705537076892976, LR: 0.0003 +[2026-02-27 12:59:46] (step=0003607) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 0.7057327333202895, LR: 0.0003 +[2026-02-27 12:59:54] (step=0003608) Train Loss: 0.4580, Train Steps/Sec: 0.12, Epoch: 0.7059283897476032, LR: 0.0003 +[2026-02-27 13:00:02] (step=0003609) Train Loss: 0.4713, Train Steps/Sec: 0.13, Epoch: 0.7061240461749169, LR: 0.0003 +[2026-02-27 13:00:09] (step=0003610) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 0.7063197026022305, LR: 0.0003 +[2026-02-27 13:00:17] (step=0003611) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.7065153590295441, LR: 0.0003 +[2026-02-27 13:00:25] (step=0003612) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.7067110154568578, LR: 0.0003 +[2026-02-27 13:00:33] (step=0003613) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.7069066718841714, LR: 0.0003 +[2026-02-27 13:00:41] (step=0003614) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.707102328311485, LR: 0.0003 +[2026-02-27 13:00:49] (step=0003615) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.7072979847387987, LR: 0.0003 +[2026-02-27 13:00:57] (step=0003616) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.7074936411661124, LR: 0.0003 +[2026-02-27 13:01:04] (step=0003617) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.7076892975934259, LR: 0.0003 +[2026-02-27 13:01:12] (step=0003618) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.7078849540207396, LR: 0.0003 +[2026-02-27 13:01:20] (step=0003619) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.7080806104480533, LR: 0.0003 +[2026-02-27 13:01:28] (step=0003620) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.7082762668753668, LR: 0.0003 +[2026-02-27 13:01:36] (step=0003621) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 0.7084719233026805, LR: 0.0003 +[2026-02-27 13:01:44] (step=0003622) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.7086675797299942, LR: 0.0003 +[2026-02-27 13:01:51] (step=0003623) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.7088632361573077, LR: 0.0003 +[2026-02-27 13:01:59] (step=0003624) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.7090588925846214, LR: 0.0003 +[2026-02-27 13:02:07] (step=0003625) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.7092545490119351, LR: 0.0003 +[2026-02-27 13:02:15] (step=0003626) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 0.7094502054392486, LR: 0.0003 +[2026-02-27 13:02:23] (step=0003627) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 0.7096458618665623, LR: 0.0003 +[2026-02-27 13:02:31] (step=0003628) Train Loss: 0.4740, Train Steps/Sec: 0.13, Epoch: 0.709841518293876, LR: 0.0003 +[2026-02-27 13:02:39] (step=0003629) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 0.7100371747211895, LR: 0.0003 +[2026-02-27 13:02:46] (step=0003630) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.7102328311485032, LR: 0.0003 +[2026-02-27 13:02:54] (step=0003631) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.7104284875758169, LR: 0.0003 +[2026-02-27 13:03:02] (step=0003632) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.7106241440031305, LR: 0.0003 +[2026-02-27 13:03:10] (step=0003633) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.7108198004304441, LR: 0.0003 +[2026-02-27 13:03:18] (step=0003634) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 0.7110154568577578, LR: 0.0003 +[2026-02-27 13:03:26] (step=0003635) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 0.7112111132850714, LR: 0.0003 +[2026-02-27 13:03:34] (step=0003636) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.711406769712385, LR: 0.0003 +[2026-02-27 13:03:41] (step=0003637) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.7116024261396987, LR: 0.0003 +[2026-02-27 13:03:49] (step=0003638) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.7117980825670124, LR: 0.0003 +[2026-02-27 13:03:57] (step=0003639) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.7119937389943259, LR: 0.0003 +[2026-02-27 13:04:05] (step=0003640) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.7121893954216396, LR: 0.0003 +[2026-02-27 13:04:13] (step=0003641) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.7123850518489533, LR: 0.0003 +[2026-02-27 13:04:21] (step=0003642) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.7125807082762668, LR: 0.0003 +[2026-02-27 13:04:28] (step=0003643) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 0.7127763647035805, LR: 0.0003 +[2026-02-27 13:04:36] (step=0003644) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.7129720211308942, LR: 0.0003 +[2026-02-27 13:04:44] (step=0003645) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.7131676775582078, LR: 0.0003 +[2026-02-27 13:04:52] (step=0003646) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 0.7133633339855214, LR: 0.0003 +[2026-02-27 13:05:00] (step=0003647) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.7135589904128351, LR: 0.0003 +[2026-02-27 13:05:08] (step=0003648) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.7137546468401487, LR: 0.0003 +[2026-02-27 13:05:16] (step=0003649) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 0.7139503032674623, LR: 0.0003 +[2026-02-27 13:05:23] (step=0003650) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 0.714145959694776, LR: 0.0003 +[2026-02-27 13:05:31] (step=0003651) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.7143416161220896, LR: 0.0003 +[2026-02-27 13:05:39] (step=0003652) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 0.7145372725494032, LR: 0.0003 +[2026-02-27 13:05:47] (step=0003653) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 0.7147329289767169, LR: 0.0003 +[2026-02-27 13:05:55] (step=0003654) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 0.7149285854040305, LR: 0.0003 +[2026-02-27 13:06:03] (step=0003655) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 0.7151242418313442, LR: 0.0003 +[2026-02-27 13:06:10] (step=0003656) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.7153198982586578, LR: 0.0003 +[2026-02-27 13:06:18] (step=0003657) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.7155155546859714, LR: 0.0003 +[2026-02-27 13:06:26] (step=0003658) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.7157112111132851, LR: 0.0003 +[2026-02-27 13:06:34] (step=0003659) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.7159068675405987, LR: 0.0003 +[2026-02-27 13:06:42] (step=0003660) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.7161025239679123, LR: 0.0003 +[2026-02-27 13:06:50] (step=0003661) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.716298180395226, LR: 0.0003 +[2026-02-27 13:06:58] (step=0003662) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.7164938368225396, LR: 0.0003 +[2026-02-27 13:07:06] (step=0003663) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.7166894932498532, LR: 0.0003 +[2026-02-27 13:07:13] (step=0003664) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.7168851496771669, LR: 0.0003 +[2026-02-27 13:07:21] (step=0003665) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.7170808061044806, LR: 0.0003 +[2026-02-27 13:07:29] (step=0003666) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.7172764625317942, LR: 0.0003 +[2026-02-27 13:07:37] (step=0003667) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.7174721189591078, LR: 0.0003 +[2026-02-27 13:07:45] (step=0003668) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.7176677753864215, LR: 0.0003 +[2026-02-27 13:07:53] (step=0003669) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.7178634318137351, LR: 0.0003 +[2026-02-27 13:08:00] (step=0003670) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.7180590882410487, LR: 0.0003 +[2026-02-27 13:08:08] (step=0003671) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.7182547446683624, LR: 0.0003 +[2026-02-27 13:08:16] (step=0003672) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.718450401095676, LR: 0.0003 +[2026-02-27 13:08:24] (step=0003673) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 0.7186460575229896, LR: 0.0003 +[2026-02-27 13:08:32] (step=0003674) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.7188417139503033, LR: 0.0003 +[2026-02-27 13:08:40] (step=0003675) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 0.719037370377617, LR: 0.0003 +[2026-02-27 13:08:48] (step=0003676) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.7192330268049305, LR: 0.0003 +[2026-02-27 13:08:56] (step=0003677) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 0.7194286832322442, LR: 0.0003 +[2026-02-27 13:09:03] (step=0003678) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.7196243396595579, LR: 0.0003 +[2026-02-27 13:09:11] (step=0003679) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.7198199960868714, LR: 0.0003 +[2026-02-27 13:09:19] (step=0003680) Train Loss: 0.4798, Train Steps/Sec: 0.13, Epoch: 0.7200156525141851, LR: 0.0003 +[2026-02-27 13:09:27] (step=0003681) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 0.7202113089414988, LR: 0.0003 +[2026-02-27 13:09:35] (step=0003682) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.7204069653688123, LR: 0.0003 +[2026-02-27 13:09:43] (step=0003683) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 0.720602621796126, LR: 0.0003 +[2026-02-27 13:09:50] (step=0003684) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 0.7207982782234397, LR: 0.0003 +[2026-02-27 13:09:58] (step=0003685) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.7209939346507532, LR: 0.0003 +[2026-02-27 13:10:06] (step=0003686) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 0.7211895910780669, LR: 0.0003 +[2026-02-27 13:10:14] (step=0003687) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.7213852475053806, LR: 0.0003 +[2026-02-27 13:10:22] (step=0003688) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.7215809039326941, LR: 0.0003 +[2026-02-27 13:10:30] (step=0003689) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 0.7217765603600078, LR: 0.0003 +[2026-02-27 13:10:38] (step=0003690) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.7219722167873215, LR: 0.0003 +[2026-02-27 13:10:45] (step=0003691) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.722167873214635, LR: 0.0003 +[2026-02-27 13:10:53] (step=0003692) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 0.7223635296419487, LR: 0.0003 +[2026-02-27 13:11:01] (step=0003693) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.7225591860692624, LR: 0.0003 +[2026-02-27 13:11:09] (step=0003694) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 0.7227548424965761, LR: 0.0003 +[2026-02-27 13:11:17] (step=0003695) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.7229504989238896, LR: 0.0003 +[2026-02-27 13:11:25] (step=0003696) Train Loss: 0.4744, Train Steps/Sec: 0.13, Epoch: 0.7231461553512033, LR: 0.0003 +[2026-02-27 13:11:32] (step=0003697) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 0.723341811778517, LR: 0.0003 +[2026-02-27 13:11:40] (step=0003698) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 0.7235374682058305, LR: 0.0003 +[2026-02-27 13:11:48] (step=0003699) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.7237331246331442, LR: 0.0003 +[2026-02-27 13:11:56] (step=0003700) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.7239287810604579, LR: 0.0003 +[2026-02-27 13:12:04] (step=0003701) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.7241244374877714, LR: 0.0003 +[2026-02-27 13:12:12] (step=0003702) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.7243200939150851, LR: 0.0003 +[2026-02-27 13:12:20] (step=0003703) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.7245157503423988, LR: 0.0003 +[2026-02-27 13:12:27] (step=0003704) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 0.7247114067697124, LR: 0.0003 +[2026-02-27 13:12:35] (step=0003705) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.724907063197026, LR: 0.0003 +[2026-02-27 13:12:43] (step=0003706) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.7251027196243397, LR: 0.0003 +[2026-02-27 13:12:51] (step=0003707) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.7252983760516533, LR: 0.0003 +[2026-02-27 13:12:59] (step=0003708) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.7254940324789669, LR: 0.0003 +[2026-02-27 13:13:07] (step=0003709) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 0.7256896889062806, LR: 0.0003 +[2026-02-27 13:13:15] (step=0003710) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.7258853453335942, LR: 0.0003 +[2026-02-27 13:13:22] (step=0003711) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.7260810017609078, LR: 0.0003 +[2026-02-27 13:13:30] (step=0003712) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.7262766581882215, LR: 0.0003 +[2026-02-27 13:13:38] (step=0003713) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 0.7264723146155351, LR: 0.0003 +[2026-02-27 13:13:46] (step=0003714) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.7266679710428487, LR: 0.0003 +[2026-02-27 13:13:54] (step=0003715) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.7268636274701624, LR: 0.0003 +[2026-02-27 13:14:02] (step=0003716) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 0.727059283897476, LR: 0.0003 +[2026-02-27 13:14:10] (step=0003717) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 0.7272549403247897, LR: 0.0003 +[2026-02-27 13:14:17] (step=0003718) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 0.7274505967521033, LR: 0.0003 +[2026-02-27 13:14:25] (step=0003719) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 0.7276462531794169, LR: 0.0003 +[2026-02-27 13:14:33] (step=0003720) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.7278419096067306, LR: 0.0003 +[2026-02-27 13:14:41] (step=0003721) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.7280375660340442, LR: 0.0003 +[2026-02-27 13:14:49] (step=0003722) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 0.7282332224613579, LR: 0.0003 +[2026-02-27 13:14:57] (step=0003723) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 0.7284288788886715, LR: 0.0003 +[2026-02-27 13:15:04] (step=0003724) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.7286245353159851, LR: 0.0003 +[2026-02-27 13:15:12] (step=0003725) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.7288201917432988, LR: 0.0003 +[2026-02-27 13:15:20] (step=0003726) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.7290158481706124, LR: 0.0003 +[2026-02-27 13:15:28] (step=0003727) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.729211504597926, LR: 0.0003 +[2026-02-27 13:15:36] (step=0003728) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 0.7294071610252397, LR: 0.0003 +[2026-02-27 13:15:44] (step=0003729) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.7296028174525533, LR: 0.0003 +[2026-02-27 13:15:52] (step=0003730) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 0.729798473879867, LR: 0.0003 +[2026-02-27 13:16:00] (step=0003731) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 0.7299941303071806, LR: 0.0003 +[2026-02-27 13:16:07] (step=0003732) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 0.7301897867344942, LR: 0.0003 +[2026-02-27 13:16:15] (step=0003733) Train Loss: 0.4722, Train Steps/Sec: 0.13, Epoch: 0.7303854431618079, LR: 0.0003 +[2026-02-27 13:16:23] (step=0003734) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.7305810995891215, LR: 0.0003 +[2026-02-27 13:16:31] (step=0003735) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 0.7307767560164351, LR: 0.0003 +[2026-02-27 13:16:39] (step=0003736) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.7309724124437488, LR: 0.0003 +[2026-02-27 13:16:47] (step=0003737) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.7311680688710624, LR: 0.0003 +[2026-02-27 13:16:54] (step=0003738) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.731363725298376, LR: 0.0003 +[2026-02-27 13:17:02] (step=0003739) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.7315593817256897, LR: 0.0003 +[2026-02-27 13:17:10] (step=0003740) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 0.7317550381530034, LR: 0.0003 +[2026-02-27 13:17:18] (step=0003741) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.7319506945803169, LR: 0.0003 +[2026-02-27 13:17:26] (step=0003742) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.7321463510076306, LR: 0.0003 +[2026-02-27 13:17:34] (step=0003743) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.7323420074349443, LR: 0.0003 +[2026-02-27 13:17:41] (step=0003744) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 0.7325376638622578, LR: 0.0003 +[2026-02-27 13:17:49] (step=0003745) Train Loss: 0.4789, Train Steps/Sec: 0.13, Epoch: 0.7327333202895715, LR: 0.0003 +[2026-02-27 13:17:57] (step=0003746) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 0.7329289767168852, LR: 0.0003 +[2026-02-27 13:18:05] (step=0003747) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 0.7331246331441987, LR: 0.0003 +[2026-02-27 13:18:13] (step=0003748) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.7333202895715124, LR: 0.0003 +[2026-02-27 13:18:21] (step=0003749) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.7335159459988261, LR: 0.0003 +[2026-02-27 13:18:28] (step=0003750) Train Loss: 0.4693, Train Steps/Sec: 0.13, Epoch: 0.7337116024261398, LR: 0.0003 +[2026-02-27 13:18:36] (step=0003751) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 0.7339072588534533, LR: 0.0003 +[2026-02-27 13:18:44] (step=0003752) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 0.734102915280767, LR: 0.0003 +[2026-02-27 13:18:52] (step=0003753) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.7342985717080807, LR: 0.0003 +[2026-02-27 13:19:00] (step=0003754) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 0.7344942281353942, LR: 0.0003 +[2026-02-27 13:19:08] (step=0003755) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.7346898845627079, LR: 0.0003 +[2026-02-27 13:19:16] (step=0003756) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.7348855409900216, LR: 0.0003 +[2026-02-27 13:19:24] (step=0003757) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.7350811974173351, LR: 0.0003 +[2026-02-27 13:19:31] (step=0003758) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.7352768538446488, LR: 0.0003 +[2026-02-27 13:19:39] (step=0003759) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.7354725102719625, LR: 0.0003 +[2026-02-27 13:19:47] (step=0003760) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.735668166699276, LR: 0.0003 +[2026-02-27 13:19:55] (step=0003761) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.7358638231265897, LR: 0.0003 +[2026-02-27 13:20:03] (step=0003762) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 0.7360594795539034, LR: 0.0003 +[2026-02-27 13:20:11] (step=0003763) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.736255135981217, LR: 0.0003 +[2026-02-27 13:20:19] (step=0003764) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.7364507924085306, LR: 0.0003 +[2026-02-27 13:20:26] (step=0003765) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.7366464488358443, LR: 0.0003 +[2026-02-27 13:20:34] (step=0003766) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.7368421052631579, LR: 0.0003 +[2026-02-27 13:20:42] (step=0003767) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.7370377616904715, LR: 0.0003 +[2026-02-27 13:20:50] (step=0003768) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 0.7372334181177852, LR: 0.0003 +[2026-02-27 13:20:58] (step=0003769) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 0.7374290745450988, LR: 0.0003 +[2026-02-27 13:21:05] (step=0003770) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.7376247309724124, LR: 0.0003 +[2026-02-27 13:21:13] (step=0003771) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.7378203873997261, LR: 0.0003 +[2026-02-27 13:21:21] (step=0003772) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.7380160438270397, LR: 0.0003 +[2026-02-27 13:21:29] (step=0003773) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.7382117002543533, LR: 0.0003 +[2026-02-27 13:21:37] (step=0003774) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.738407356681667, LR: 0.0003 +[2026-02-27 13:21:45] (step=0003775) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.7386030131089806, LR: 0.0003 +[2026-02-27 13:21:53] (step=0003776) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 0.7387986695362943, LR: 0.0003 +[2026-02-27 13:22:01] (step=0003777) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.7389943259636079, LR: 0.0003 +[2026-02-27 13:22:08] (step=0003778) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 0.7391899823909216, LR: 0.0003 +[2026-02-27 13:22:16] (step=0003779) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 0.7393856388182352, LR: 0.0003 +[2026-02-27 13:22:24] (step=0003780) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.7395812952455488, LR: 0.0003 +[2026-02-27 13:22:32] (step=0003781) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.7397769516728625, LR: 0.0003 +[2026-02-27 13:22:40] (step=0003782) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.7399726081001761, LR: 0.0003 +[2026-02-27 13:22:48] (step=0003783) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.7401682645274897, LR: 0.0003 +[2026-02-27 13:22:55] (step=0003784) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 0.7403639209548034, LR: 0.0003 +[2026-02-27 13:23:03] (step=0003785) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.740559577382117, LR: 0.0003 +[2026-02-27 13:23:11] (step=0003786) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 0.7407552338094306, LR: 0.0003 +[2026-02-27 13:23:19] (step=0003787) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 0.7409508902367443, LR: 0.0003 +[2026-02-27 13:23:27] (step=0003788) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 0.7411465466640579, LR: 0.0003 +[2026-02-27 13:23:35] (step=0003789) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.7413422030913716, LR: 0.0003 +[2026-02-27 13:23:42] (step=0003790) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 0.7415378595186852, LR: 0.0003 +[2026-02-27 13:23:50] (step=0003791) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 0.7417335159459988, LR: 0.0003 +[2026-02-27 13:23:58] (step=0003792) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 0.7419291723733125, LR: 0.0003 +[2026-02-27 13:24:06] (step=0003793) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 0.7421248288006261, LR: 0.0003 +[2026-02-27 13:24:14] (step=0003794) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.7423204852279397, LR: 0.0003 +[2026-02-27 13:24:21] (step=0003795) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.7425161416552534, LR: 0.0003 +[2026-02-27 13:24:29] (step=0003796) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.742711798082567, LR: 0.0003 +[2026-02-27 13:24:37] (step=0003797) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 0.7429074545098806, LR: 0.0003 +[2026-02-27 13:24:45] (step=0003798) Train Loss: 0.4741, Train Steps/Sec: 0.13, Epoch: 0.7431031109371943, LR: 0.0003 +[2026-02-27 13:24:53] (step=0003799) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 0.743298767364508, LR: 0.0003 +[2026-02-27 13:25:01] (step=0003800) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.7434944237918215, LR: 0.0003 +[2026-02-27 13:25:09] (step=0003801) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.7436900802191352, LR: 0.0003 +[2026-02-27 13:25:16] (step=0003802) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.7438857366464489, LR: 0.0003 +[2026-02-27 13:25:24] (step=0003803) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 0.7440813930737624, LR: 0.0003 +[2026-02-27 13:25:32] (step=0003804) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.7442770495010761, LR: 0.0003 +[2026-02-27 13:25:40] (step=0003805) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 0.7444727059283898, LR: 0.0003 +[2026-02-27 13:25:48] (step=0003806) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.7446683623557034, LR: 0.0003 +[2026-02-27 13:25:56] (step=0003807) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.744864018783017, LR: 0.0003 +[2026-02-27 13:26:04] (step=0003808) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.7450596752103307, LR: 0.0003 +[2026-02-27 13:26:11] (step=0003809) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 0.7452553316376443, LR: 0.0003 +[2026-02-27 13:26:19] (step=0003810) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.7454509880649579, LR: 0.0003 +[2026-02-27 13:26:27] (step=0003811) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.7456466444922716, LR: 0.0003 +[2026-02-27 13:26:35] (step=0003812) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.7458423009195853, LR: 0.0003 +[2026-02-27 13:26:43] (step=0003813) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.7460379573468988, LR: 0.0003 +[2026-02-27 13:26:51] (step=0003814) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 0.7462336137742125, LR: 0.0003 +[2026-02-27 13:26:58] (step=0003815) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.7464292702015262, LR: 0.0003 +[2026-02-27 13:27:06] (step=0003816) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.7466249266288397, LR: 0.0003 +[2026-02-27 13:27:14] (step=0003817) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.7468205830561534, LR: 0.0003 +[2026-02-27 13:27:22] (step=0003818) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 0.7470162394834671, LR: 0.0003 +[2026-02-27 13:27:30] (step=0003819) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 0.7472118959107806, LR: 0.0003 +[2026-02-27 13:27:38] (step=0003820) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.7474075523380943, LR: 0.0003 +[2026-02-27 13:27:45] (step=0003821) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.747603208765408, LR: 0.0003 +[2026-02-27 13:27:53] (step=0003822) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.7477988651927215, LR: 0.0003 +[2026-02-27 13:28:01] (step=0003823) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.7479945216200352, LR: 0.0003 +[2026-02-27 13:28:09] (step=0003824) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 0.7481901780473489, LR: 0.0003 +[2026-02-27 13:28:17] (step=0003825) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.7483858344746624, LR: 0.0003 +[2026-02-27 13:28:25] (step=0003826) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.7485814909019761, LR: 0.0003 +[2026-02-27 13:28:32] (step=0003827) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.7487771473292898, LR: 0.0003 +[2026-02-27 13:28:40] (step=0003828) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.7489728037566034, LR: 0.0003 +[2026-02-27 13:28:48] (step=0003829) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 0.749168460183917, LR: 0.0003 +[2026-02-27 13:28:56] (step=0003830) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.7493641166112307, LR: 0.0003 +[2026-02-27 13:29:04] (step=0003831) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 0.7495597730385443, LR: 0.0003 +[2026-02-27 13:29:12] (step=0003832) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.7497554294658579, LR: 0.0003 +[2026-02-27 13:29:20] (step=0003833) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.7499510858931716, LR: 0.0003 +[2026-02-27 13:29:27] (step=0003834) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.7501467423204853, LR: 0.0003 +[2026-02-27 13:29:35] (step=0003835) Train Loss: 0.4776, Train Steps/Sec: 0.13, Epoch: 0.7503423987477988, LR: 0.0003 +[2026-02-27 13:29:43] (step=0003836) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.7505380551751125, LR: 0.0003 +[2026-02-27 13:29:51] (step=0003837) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.7507337116024262, LR: 0.0003 +[2026-02-27 13:29:59] (step=0003838) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.7509293680297398, LR: 0.0003 +[2026-02-27 13:30:07] (step=0003839) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.7511250244570534, LR: 0.0003 +[2026-02-27 13:30:15] (step=0003840) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.7513206808843671, LR: 0.0003 +[2026-02-27 13:30:22] (step=0003841) Train Loss: 0.4763, Train Steps/Sec: 0.13, Epoch: 0.7515163373116807, LR: 0.0003 +[2026-02-27 13:30:30] (step=0003842) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.7517119937389943, LR: 0.0003 +[2026-02-27 13:30:38] (step=0003843) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 0.751907650166308, LR: 0.0003 +[2026-02-27 13:30:46] (step=0003844) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.7521033065936216, LR: 0.0003 +[2026-02-27 13:30:54] (step=0003845) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 0.7522989630209352, LR: 0.0003 +[2026-02-27 13:31:02] (step=0003846) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.7524946194482489, LR: 0.0003 +[2026-02-27 13:31:09] (step=0003847) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.7526902758755625, LR: 0.0003 +[2026-02-27 13:31:17] (step=0003848) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.7528859323028761, LR: 0.0003 +[2026-02-27 13:31:25] (step=0003849) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.7530815887301898, LR: 0.0003 +[2026-02-27 13:31:33] (step=0003850) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.7532772451575034, LR: 0.0003 +[2026-02-27 13:31:41] (step=0003851) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.7534729015848171, LR: 0.0003 +[2026-02-27 13:31:49] (step=0003852) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.7536685580121307, LR: 0.0003 +[2026-02-27 13:31:56] (step=0003853) Train Loss: 0.4719, Train Steps/Sec: 0.13, Epoch: 0.7538642144394443, LR: 0.0003 +[2026-02-27 13:32:04] (step=0003854) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.754059870866758, LR: 0.0003 +[2026-02-27 13:32:12] (step=0003855) Train Loss: 0.4737, Train Steps/Sec: 0.13, Epoch: 0.7542555272940716, LR: 0.0003 +[2026-02-27 13:32:20] (step=0003856) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.7544511837213852, LR: 0.0003 +[2026-02-27 13:32:28] (step=0003857) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.7546468401486989, LR: 0.0003 +[2026-02-27 13:32:36] (step=0003858) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.7548424965760125, LR: 0.0003 +[2026-02-27 13:32:43] (step=0003859) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.7550381530033261, LR: 0.0003 +[2026-02-27 13:32:51] (step=0003860) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.7552338094306398, LR: 0.0003 +[2026-02-27 13:32:59] (step=0003861) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 0.7554294658579535, LR: 0.0003 +[2026-02-27 13:33:07] (step=0003862) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 0.7556251222852671, LR: 0.0003 +[2026-02-27 13:33:15] (step=0003863) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 0.7558207787125807, LR: 0.0003 +[2026-02-27 13:33:23] (step=0003864) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 0.7560164351398944, LR: 0.0003 +[2026-02-27 13:33:30] (step=0003865) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.756212091567208, LR: 0.0003 +[2026-02-27 13:33:38] (step=0003866) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 0.7564077479945216, LR: 0.0003 +[2026-02-27 13:33:46] (step=0003867) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.7566034044218353, LR: 0.0003 +[2026-02-27 13:33:54] (step=0003868) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.7567990608491489, LR: 0.0003 +[2026-02-27 13:34:02] (step=0003869) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 0.7569947172764625, LR: 0.0003 +[2026-02-27 13:34:10] (step=0003870) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.7571903737037762, LR: 0.0003 +[2026-02-27 13:34:18] (step=0003871) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.7573860301310898, LR: 0.0003 +[2026-02-27 13:34:25] (step=0003872) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 0.7575816865584034, LR: 0.0003 +[2026-02-27 13:34:33] (step=0003873) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 0.7577773429857171, LR: 0.0003 +[2026-02-27 13:34:41] (step=0003874) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.7579729994130308, LR: 0.0003 +[2026-02-27 13:34:49] (step=0003875) Train Loss: 0.4702, Train Steps/Sec: 0.13, Epoch: 0.7581686558403443, LR: 0.0003 +[2026-02-27 13:34:57] (step=0003876) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.758364312267658, LR: 0.0003 +[2026-02-27 13:35:05] (step=0003877) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 0.7585599686949717, LR: 0.0003 +[2026-02-27 13:35:12] (step=0003878) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 0.7587556251222852, LR: 0.0003 +[2026-02-27 13:35:20] (step=0003879) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 0.7589512815495989, LR: 0.0003 +[2026-02-27 13:35:28] (step=0003880) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.7591469379769126, LR: 0.0003 +[2026-02-27 13:35:36] (step=0003881) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 0.7593425944042261, LR: 0.0003 +[2026-02-27 13:35:44] (step=0003882) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.7595382508315398, LR: 0.0003 +[2026-02-27 13:35:52] (step=0003883) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.7597339072588535, LR: 0.0003 +[2026-02-27 13:35:59] (step=0003884) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.759929563686167, LR: 0.0003 +[2026-02-27 13:36:07] (step=0003885) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.7601252201134807, LR: 0.0003 +[2026-02-27 13:36:15] (step=0003886) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.7603208765407944, LR: 0.0003 +[2026-02-27 13:36:23] (step=0003887) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.760516532968108, LR: 0.0003 +[2026-02-27 13:36:31] (step=0003888) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.7607121893954216, LR: 0.0003 +[2026-02-27 13:36:39] (step=0003889) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.7609078458227353, LR: 0.0003 +[2026-02-27 13:36:47] (step=0003890) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.761103502250049, LR: 0.0003 +[2026-02-27 13:36:54] (step=0003891) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.7612991586773625, LR: 0.0003 +[2026-02-27 13:37:02] (step=0003892) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.7614948151046762, LR: 0.0003 +[2026-02-27 13:37:10] (step=0003893) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 0.7616904715319899, LR: 0.0003 +[2026-02-27 13:37:18] (step=0003894) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 0.7618861279593034, LR: 0.0003 +[2026-02-27 13:37:26] (step=0003895) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.7620817843866171, LR: 0.0003 +[2026-02-27 13:37:34] (step=0003896) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.7622774408139308, LR: 0.0003 +[2026-02-27 13:37:41] (step=0003897) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 0.7624730972412443, LR: 0.0003 +[2026-02-27 13:37:49] (step=0003898) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.762668753668558, LR: 0.0003 +[2026-02-27 13:37:57] (step=0003899) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 0.7628644100958717, LR: 0.0003 +[2026-02-27 13:38:05] (step=0003900) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.7630600665231853, LR: 0.0003 +[2026-02-27 13:38:13] (step=0003901) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 0.7632557229504989, LR: 0.0003 +[2026-02-27 13:38:21] (step=0003902) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 0.7634513793778126, LR: 0.0003 +[2026-02-27 13:38:28] (step=0003903) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.7636470358051262, LR: 0.0003 +[2026-02-27 13:38:36] (step=0003904) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.7638426922324398, LR: 0.0003 +[2026-02-27 13:38:44] (step=0003905) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 0.7640383486597535, LR: 0.0003 +[2026-02-27 13:38:52] (step=0003906) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.7642340050870671, LR: 0.0003 +[2026-02-27 13:39:00] (step=0003907) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.7644296615143807, LR: 0.0003 +[2026-02-27 13:39:08] (step=0003908) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.7646253179416944, LR: 0.0003 +[2026-02-27 13:39:15] (step=0003909) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.764820974369008, LR: 0.0003 +[2026-02-27 13:39:23] (step=0003910) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.7650166307963217, LR: 0.0003 +[2026-02-27 13:39:31] (step=0003911) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.7652122872236353, LR: 0.0003 +[2026-02-27 13:39:39] (step=0003912) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.7654079436509489, LR: 0.0003 +[2026-02-27 13:39:47] (step=0003913) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.7656036000782626, LR: 0.0003 +[2026-02-27 13:39:55] (step=0003914) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.7657992565055762, LR: 0.0003 +[2026-02-27 13:40:02] (step=0003915) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 0.7659949129328898, LR: 0.0003 +[2026-02-27 13:40:10] (step=0003916) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.7661905693602035, LR: 0.0003 +[2026-02-27 13:40:18] (step=0003917) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.7663862257875171, LR: 0.0003 +[2026-02-27 13:40:26] (step=0003918) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.7665818822148308, LR: 0.0003 +[2026-02-27 13:40:34] (step=0003919) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.7667775386421444, LR: 0.0003 +[2026-02-27 13:40:42] (step=0003920) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.766973195069458, LR: 0.0003 +[2026-02-27 13:40:50] (step=0003921) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 0.7671688514967717, LR: 0.0003 +[2026-02-27 13:40:57] (step=0003922) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.7673645079240853, LR: 0.0003 +[2026-02-27 13:41:05] (step=0003923) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.767560164351399, LR: 0.0003 +[2026-02-27 13:41:13] (step=0003924) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.7677558207787126, LR: 0.0003 +[2026-02-27 13:41:21] (step=0003925) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 0.7679514772060262, LR: 0.0003 +[2026-02-27 13:41:29] (step=0003926) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.7681471336333399, LR: 0.0003 +[2026-02-27 13:41:37] (step=0003927) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 0.7683427900606535, LR: 0.0003 +[2026-02-27 13:41:44] (step=0003928) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.7685384464879671, LR: 0.0003 +[2026-02-27 13:41:52] (step=0003929) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 0.7687341029152808, LR: 0.0003 +[2026-02-27 13:42:00] (step=0003930) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.7689297593425944, LR: 0.0003 +[2026-02-27 13:42:08] (step=0003931) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 0.769125415769908, LR: 0.0003 +[2026-02-27 13:42:16] (step=0003932) Train Loss: 0.4759, Train Steps/Sec: 0.13, Epoch: 0.7693210721972217, LR: 0.0003 +[2026-02-27 13:42:24] (step=0003933) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.7695167286245354, LR: 0.0003 +[2026-02-27 13:42:31] (step=0003934) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.7697123850518489, LR: 0.0003 +[2026-02-27 13:42:39] (step=0003935) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.7699080414791626, LR: 0.0003 +[2026-02-27 13:42:47] (step=0003936) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.7701036979064763, LR: 0.0003 +[2026-02-27 13:42:55] (step=0003937) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 0.7702993543337898, LR: 0.0003 +[2026-02-27 13:43:03] (step=0003938) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 0.7704950107611035, LR: 0.0003 +[2026-02-27 13:43:11] (step=0003939) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.7706906671884172, LR: 0.0003 +[2026-02-27 13:43:18] (step=0003940) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 0.7708863236157307, LR: 0.0003 +[2026-02-27 13:43:26] (step=0003941) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.7710819800430444, LR: 0.0003 +[2026-02-27 13:43:34] (step=0003942) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.7712776364703581, LR: 0.0003 +[2026-02-27 13:43:42] (step=0003943) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.7714732928976716, LR: 0.0003 +[2026-02-27 13:43:50] (step=0003944) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 0.7716689493249853, LR: 0.0003 +[2026-02-27 13:43:58] (step=0003945) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 0.771864605752299, LR: 0.0003 +[2026-02-27 13:44:05] (step=0003946) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 0.7720602621796127, LR: 0.0003 +[2026-02-27 13:44:13] (step=0003947) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.7722559186069262, LR: 0.0003 +[2026-02-27 13:44:21] (step=0003948) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.7724515750342399, LR: 0.0003 +[2026-02-27 13:44:29] (step=0003949) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.7726472314615536, LR: 0.0003 +[2026-02-27 13:44:37] (step=0003950) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 0.7728428878888671, LR: 0.0003 +[2026-02-27 13:44:45] (step=0003951) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.7730385443161808, LR: 0.0003 +[2026-02-27 13:44:53] (step=0003952) Train Loss: 0.4719, Train Steps/Sec: 0.13, Epoch: 0.7732342007434945, LR: 0.0003 +[2026-02-27 13:45:00] (step=0003953) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.773429857170808, LR: 0.0003 +[2026-02-27 13:45:08] (step=0003954) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.7736255135981217, LR: 0.0003 +[2026-02-27 13:45:16] (step=0003955) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.7738211700254354, LR: 0.0003 +[2026-02-27 13:45:24] (step=0003956) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.7740168264527489, LR: 0.0003 +[2026-02-27 13:45:32] (step=0003957) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 0.7742124828800626, LR: 0.0003 +[2026-02-27 13:45:40] (step=0003958) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 0.7744081393073763, LR: 0.0003 +[2026-02-27 13:45:47] (step=0003959) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.7746037957346898, LR: 0.0003 +[2026-02-27 13:45:55] (step=0003960) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.7747994521620035, LR: 0.0003 +[2026-02-27 13:46:03] (step=0003961) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 0.7749951085893172, LR: 0.0003 +[2026-02-27 13:46:11] (step=0003962) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.7751907650166308, LR: 0.0003 +[2026-02-27 13:46:19] (step=0003963) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.7753864214439444, LR: 0.0003 +[2026-02-27 13:46:27] (step=0003964) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.7755820778712581, LR: 0.0003 +[2026-02-27 13:46:35] (step=0003965) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.7757777342985717, LR: 0.0003 +[2026-02-27 13:46:42] (step=0003966) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 0.7759733907258853, LR: 0.0003 +[2026-02-27 13:46:50] (step=0003967) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 0.776169047153199, LR: 0.0003 +[2026-02-27 13:46:58] (step=0003968) Train Loss: 0.4709, Train Steps/Sec: 0.13, Epoch: 0.7763647035805126, LR: 0.0003 +[2026-02-27 13:47:06] (step=0003969) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.7765603600078262, LR: 0.0003 +[2026-02-27 13:47:14] (step=0003970) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.7767560164351399, LR: 0.0003 +[2026-02-27 13:47:22] (step=0003971) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 0.7769516728624535, LR: 0.0003 +[2026-02-27 13:47:30] (step=0003972) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.7771473292897672, LR: 0.0003 +[2026-02-27 13:47:37] (step=0003973) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.7773429857170808, LR: 0.0003 +[2026-02-27 13:47:45] (step=0003974) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 0.7775386421443945, LR: 0.0003 +[2026-02-27 13:47:53] (step=0003975) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.7777342985717081, LR: 0.0003 +[2026-02-27 13:48:01] (step=0003976) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.7779299549990217, LR: 0.0003 +[2026-02-27 13:48:09] (step=0003977) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.7781256114263354, LR: 0.0003 +[2026-02-27 13:48:17] (step=0003978) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.778321267853649, LR: 0.0003 +[2026-02-27 13:48:24] (step=0003979) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 0.7785169242809626, LR: 0.0003 +[2026-02-27 13:48:32] (step=0003980) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.7787125807082763, LR: 0.0003 +[2026-02-27 13:48:40] (step=0003981) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.7789082371355899, LR: 0.0003 +[2026-02-27 13:48:48] (step=0003982) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 0.7791038935629035, LR: 0.0003 +[2026-02-27 13:48:56] (step=0003983) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.7792995499902172, LR: 0.0003 +[2026-02-27 13:49:04] (step=0003984) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.7794952064175308, LR: 0.0003 +[2026-02-27 13:49:11] (step=0003985) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.7796908628448445, LR: 0.0003 +[2026-02-27 13:49:19] (step=0003986) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.7798865192721581, LR: 0.0003 +[2026-02-27 13:49:27] (step=0003987) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.7800821756994717, LR: 0.0003 +[2026-02-27 13:49:35] (step=0003988) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 0.7802778321267854, LR: 0.0003 +[2026-02-27 13:49:43] (step=0003989) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.780473488554099, LR: 0.0003 +[2026-02-27 13:49:51] (step=0003990) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.7806691449814126, LR: 0.0003 +[2026-02-27 13:49:58] (step=0003991) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.7808648014087263, LR: 0.0003 +[2026-02-27 13:50:06] (step=0003992) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.78106045783604, LR: 0.0003 +[2026-02-27 13:50:14] (step=0003993) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.7812561142633535, LR: 0.0003 +[2026-02-27 13:50:22] (step=0003994) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.7814517706906672, LR: 0.0003 +[2026-02-27 13:50:30] (step=0003995) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.7816474271179809, LR: 0.0003 +[2026-02-27 13:50:38] (step=0003996) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 0.7818430835452944, LR: 0.0003 +[2026-02-27 13:50:45] (step=0003997) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.7820387399726081, LR: 0.0003 +[2026-02-27 13:50:53] (step=0003998) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.7822343963999218, LR: 0.0003 +[2026-02-27 13:51:01] (step=0003999) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 0.7824300528272353, LR: 0.0003 +[2026-02-27 13:51:09] (step=0004000) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.782625709254549, LR: 0.0003 +[2026-02-27 13:51:09] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0004000/ +[2026-02-27 13:51:17] (step=0004001) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.7828213656818627, LR: 0.0003 +[2026-02-27 13:51:25] (step=0004002) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 0.7830170221091763, LR: 0.0003 +[2026-02-27 13:51:32] (step=0004003) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.7832126785364899, LR: 0.0003 +[2026-02-27 13:51:40] (step=0004004) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 0.7834083349638036, LR: 0.0003 +[2026-02-27 13:51:48] (step=0004005) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 0.7836039913911172, LR: 0.0003 +[2026-02-27 13:51:56] (step=0004006) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.7837996478184308, LR: 0.0003 +[2026-02-27 13:52:04] (step=0004007) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.7839953042457445, LR: 0.0003 +[2026-02-27 13:52:12] (step=0004008) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 0.7841909606730582, LR: 0.0003 +[2026-02-27 13:52:20] (step=0004009) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.7843866171003717, LR: 0.0003 +[2026-02-27 13:52:27] (step=0004010) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.7845822735276854, LR: 0.0003 +[2026-02-27 13:52:35] (step=0004011) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.7847779299549991, LR: 0.0003 +[2026-02-27 13:52:43] (step=0004012) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 0.7849735863823126, LR: 0.0003 +[2026-02-27 13:52:51] (step=0004013) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.7851692428096263, LR: 0.0003 +[2026-02-27 13:52:59] (step=0004014) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 0.78536489923694, LR: 0.0003 +[2026-02-27 13:53:07] (step=0004015) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 0.7855605556642535, LR: 0.0003 +[2026-02-27 13:53:15] (step=0004016) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.7857562120915672, LR: 0.0003 +[2026-02-27 13:53:22] (step=0004017) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.7859518685188809, LR: 0.0003 +[2026-02-27 13:53:30] (step=0004018) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.7861475249461944, LR: 0.0003 +[2026-02-27 13:53:38] (step=0004019) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.7863431813735081, LR: 0.0003 +[2026-02-27 13:53:46] (step=0004020) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 0.7865388378008218, LR: 0.0003 +[2026-02-27 13:53:54] (step=0004021) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.7867344942281354, LR: 0.0003 +[2026-02-27 13:54:02] (step=0004022) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.786930150655449, LR: 0.0003 +[2026-02-27 13:54:09] (step=0004023) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.7871258070827627, LR: 0.0003 +[2026-02-27 13:54:17] (step=0004024) Train Loss: 0.4714, Train Steps/Sec: 0.13, Epoch: 0.7873214635100763, LR: 0.0003 +[2026-02-27 13:54:25] (step=0004025) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.7875171199373899, LR: 0.0003 +[2026-02-27 13:54:33] (step=0004026) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.7877127763647036, LR: 0.0003 +[2026-02-27 13:54:41] (step=0004027) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.7879084327920172, LR: 0.0003 +[2026-02-27 13:54:49] (step=0004028) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.7881040892193308, LR: 0.0003 +[2026-02-27 13:54:57] (step=0004029) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.7882997456466445, LR: 0.0003 +[2026-02-27 13:55:04] (step=0004030) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.7884954020739582, LR: 0.0003 +[2026-02-27 13:55:12] (step=0004031) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 0.7886910585012717, LR: 0.0003 +[2026-02-27 13:55:20] (step=0004032) Train Loss: 0.4722, Train Steps/Sec: 0.13, Epoch: 0.7888867149285854, LR: 0.0003 +[2026-02-27 13:55:28] (step=0004033) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.7890823713558991, LR: 0.0003 +[2026-02-27 13:55:36] (step=0004034) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.7892780277832127, LR: 0.0003 +[2026-02-27 13:55:44] (step=0004035) Train Loss: 0.4736, Train Steps/Sec: 0.13, Epoch: 0.7894736842105263, LR: 0.0003 +[2026-02-27 13:55:51] (step=0004036) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.78966934063784, LR: 0.0003 +[2026-02-27 13:55:59] (step=0004037) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.7898649970651536, LR: 0.0003 +[2026-02-27 13:56:07] (step=0004038) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 0.7900606534924672, LR: 0.0003 +[2026-02-27 13:56:15] (step=0004039) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 0.7902563099197809, LR: 0.0003 +[2026-02-27 13:56:23] (step=0004040) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 0.7904519663470945, LR: 0.0003 +[2026-02-27 13:56:31] (step=0004041) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.7906476227744081, LR: 0.0003 +[2026-02-27 13:56:38] (step=0004042) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 0.7908432792017218, LR: 0.0003 +[2026-02-27 13:56:46] (step=0004043) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.7910389356290354, LR: 0.0003 +[2026-02-27 13:56:54] (step=0004044) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.791234592056349, LR: 0.0003 +[2026-02-27 13:57:02] (step=0004045) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.7914302484836627, LR: 0.0003 +[2026-02-27 13:57:10] (step=0004046) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 0.7916259049109763, LR: 0.0003 +[2026-02-27 13:57:18] (step=0004047) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.79182156133829, LR: 0.0003 +[2026-02-27 13:57:25] (step=0004048) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.7920172177656036, LR: 0.0003 +[2026-02-27 13:57:33] (step=0004049) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 0.7922128741929172, LR: 0.0003 +[2026-02-27 13:57:41] (step=0004050) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 0.7924085306202309, LR: 0.0003 +[2026-02-27 13:57:49] (step=0004051) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.7926041870475445, LR: 0.0003 +[2026-02-27 13:57:57] (step=0004052) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.7927998434748581, LR: 0.0003 +[2026-02-27 13:58:05] (step=0004053) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.7929954999021718, LR: 0.0003 +[2026-02-27 13:58:12] (step=0004054) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.7931911563294854, LR: 0.0003 +[2026-02-27 13:58:20] (step=0004055) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.793386812756799, LR: 0.0003 +[2026-02-27 13:58:28] (step=0004056) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 0.7935824691841127, LR: 0.0003 +[2026-02-27 13:58:36] (step=0004057) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.7937781256114264, LR: 0.0003 +[2026-02-27 13:58:44] (step=0004058) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 0.79397378203874, LR: 0.0003 +[2026-02-27 13:58:52] (step=0004059) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.7941694384660536, LR: 0.0003 +[2026-02-27 13:59:00] (step=0004060) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 0.7943650948933673, LR: 0.0003 +[2026-02-27 13:59:07] (step=0004061) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 0.7945607513206809, LR: 0.0003 +[2026-02-27 13:59:15] (step=0004062) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.7947564077479945, LR: 0.0003 +[2026-02-27 13:59:23] (step=0004063) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.7949520641753082, LR: 0.0003 +[2026-02-27 13:59:31] (step=0004064) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.7951477206026218, LR: 0.0003 +[2026-02-27 13:59:39] (step=0004065) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.7953433770299354, LR: 0.0003 +[2026-02-27 13:59:47] (step=0004066) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.7955390334572491, LR: 0.0003 +[2026-02-27 13:59:54] (step=0004067) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 0.7957346898845628, LR: 0.0003 +[2026-02-27 14:00:02] (step=0004068) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 0.7959303463118763, LR: 0.0003 +[2026-02-27 14:00:10] (step=0004069) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 0.79612600273919, LR: 0.0003 +[2026-02-27 14:00:18] (step=0004070) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.7963216591665037, LR: 0.0003 +[2026-02-27 14:00:26] (step=0004071) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.7965173155938172, LR: 0.0003 +[2026-02-27 14:00:34] (step=0004072) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.7967129720211309, LR: 0.0003 +[2026-02-27 14:00:42] (step=0004073) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 0.7969086284484446, LR: 0.0003 +[2026-02-27 14:00:49] (step=0004074) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.7971042848757581, LR: 0.0003 +[2026-02-27 14:00:57] (step=0004075) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.7972999413030718, LR: 0.0003 +[2026-02-27 14:01:05] (step=0004076) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.7974955977303855, LR: 0.0003 +[2026-02-27 14:01:13] (step=0004077) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.797691254157699, LR: 0.0003 +[2026-02-27 14:01:21] (step=0004078) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.7978869105850127, LR: 0.0003 +[2026-02-27 14:01:29] (step=0004079) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.7980825670123264, LR: 0.0003 +[2026-02-27 14:01:36] (step=0004080) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.79827822343964, LR: 0.0003 +[2026-02-27 14:01:44] (step=0004081) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 0.7984738798669536, LR: 0.0003 +[2026-02-27 14:01:52] (step=0004082) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.7986695362942673, LR: 0.0003 +[2026-02-27 14:02:00] (step=0004083) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.7988651927215809, LR: 0.0003 +[2026-02-27 14:02:08] (step=0004084) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.7990608491488945, LR: 0.0003 +[2026-02-27 14:02:16] (step=0004085) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 0.7992565055762082, LR: 0.0003 +[2026-02-27 14:02:23] (step=0004086) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.7994521620035219, LR: 0.0003 +[2026-02-27 14:02:31] (step=0004087) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 0.7996478184308354, LR: 0.0003 +[2026-02-27 14:02:39] (step=0004088) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.7998434748581491, LR: 0.0003 +[2026-02-27 14:02:47] (step=0004089) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.8000391312854628, LR: 0.0003 +[2026-02-27 14:02:55] (step=0004090) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.8002347877127763, LR: 0.0003 +[2026-02-27 14:03:03] (step=0004091) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 0.80043044414009, LR: 0.0003 +[2026-02-27 14:03:10] (step=0004092) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.8006261005674037, LR: 0.0003 +[2026-02-27 14:03:18] (step=0004093) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.8008217569947172, LR: 0.0003 +[2026-02-27 14:03:26] (step=0004094) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.8010174134220309, LR: 0.0003 +[2026-02-27 14:03:34] (step=0004095) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 0.8012130698493446, LR: 0.0003 +[2026-02-27 14:03:42] (step=0004096) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.8014087262766582, LR: 0.0003 +[2026-02-27 14:03:50] (step=0004097) Train Loss: 0.4696, Train Steps/Sec: 0.13, Epoch: 0.8016043827039718, LR: 0.0003 +[2026-02-27 14:03:57] (step=0004098) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.8018000391312855, LR: 0.0003 +[2026-02-27 14:04:05] (step=0004099) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.8019956955585991, LR: 0.0003 +[2026-02-27 14:04:13] (step=0004100) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 0.8021913519859127, LR: 0.0003 +[2026-02-27 14:04:21] (step=0004101) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 0.8023870084132264, LR: 0.0003 +[2026-02-27 14:04:29] (step=0004102) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 0.80258266484054, LR: 0.0003 +[2026-02-27 14:04:37] (step=0004103) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 0.8027783212678536, LR: 0.0003 +[2026-02-27 14:04:44] (step=0004104) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.8029739776951673, LR: 0.0003 +[2026-02-27 14:04:52] (step=0004105) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 0.8031696341224809, LR: 0.0003 +[2026-02-27 14:05:00] (step=0004106) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 0.8033652905497946, LR: 0.0003 +[2026-02-27 14:05:08] (step=0004107) Train Loss: 0.4757, Train Steps/Sec: 0.13, Epoch: 0.8035609469771082, LR: 0.0003 +[2026-02-27 14:05:16] (step=0004108) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 0.8037566034044218, LR: 0.0003 +[2026-02-27 14:05:24] (step=0004109) Train Loss: 0.4702, Train Steps/Sec: 0.13, Epoch: 0.8039522598317355, LR: 0.0003 +[2026-02-27 14:05:32] (step=0004110) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.8041479162590491, LR: 0.0003 +[2026-02-27 14:05:39] (step=0004111) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.8043435726863627, LR: 0.0003 +[2026-02-27 14:05:47] (step=0004112) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 0.8045392291136764, LR: 0.0003 +[2026-02-27 14:05:55] (step=0004113) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 0.80473488554099, LR: 0.0003 +[2026-02-27 14:06:03] (step=0004114) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 0.8049305419683036, LR: 0.0003 +[2026-02-27 14:06:11] (step=0004115) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.8051261983956173, LR: 0.0003 +[2026-02-27 14:06:19] (step=0004116) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 0.805321854822931, LR: 0.0003 +[2026-02-27 14:06:26] (step=0004117) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.8055175112502446, LR: 0.0003 +[2026-02-27 14:06:34] (step=0004118) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.8057131676775582, LR: 0.0003 +[2026-02-27 14:06:42] (step=0004119) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 0.8059088241048719, LR: 0.0003 +[2026-02-27 14:06:50] (step=0004120) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 0.8061044805321855, LR: 0.0003 +[2026-02-27 14:06:58] (step=0004121) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 0.8063001369594991, LR: 0.0003 +[2026-02-27 14:07:06] (step=0004122) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.8064957933868128, LR: 0.0003 +[2026-02-27 14:07:14] (step=0004123) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 0.8066914498141264, LR: 0.0003 +[2026-02-27 14:07:21] (step=0004124) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.80688710624144, LR: 0.0003 +[2026-02-27 14:07:29] (step=0004125) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.8070827626687537, LR: 0.0003 +[2026-02-27 14:07:37] (step=0004126) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.8072784190960673, LR: 0.0003 +[2026-02-27 14:07:45] (step=0004127) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.8074740755233809, LR: 0.0003 +[2026-02-27 14:07:53] (step=0004128) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 0.8076697319506946, LR: 0.0003 +[2026-02-27 14:08:01] (step=0004129) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.8078653883780083, LR: 0.0003 +[2026-02-27 14:08:08] (step=0004130) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 0.8080610448053218, LR: 0.0003 +[2026-02-27 14:08:16] (step=0004131) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 0.8082567012326355, LR: 0.0003 +[2026-02-27 14:08:24] (step=0004132) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.8084523576599492, LR: 0.0003 +[2026-02-27 14:08:32] (step=0004133) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.8086480140872627, LR: 0.0003 +[2026-02-27 14:08:40] (step=0004134) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 0.8088436705145764, LR: 0.0003 +[2026-02-27 14:08:48] (step=0004135) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 0.8090393269418901, LR: 0.0003 +[2026-02-27 14:08:55] (step=0004136) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.8092349833692036, LR: 0.0003 +[2026-02-27 14:09:03] (step=0004137) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.8094306397965173, LR: 0.0003 +[2026-02-27 14:09:11] (step=0004138) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.809626296223831, LR: 0.0003 +[2026-02-27 14:09:19] (step=0004139) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.8098219526511445, LR: 0.0003 +[2026-02-27 14:09:27] (step=0004140) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 0.8100176090784582, LR: 0.0003 +[2026-02-27 14:09:35] (step=0004141) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.8102132655057719, LR: 0.0003 +[2026-02-27 14:09:42] (step=0004142) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 0.8104089219330854, LR: 0.0003 +[2026-02-27 14:09:50] (step=0004143) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.8106045783603991, LR: 0.0003 +[2026-02-27 14:09:58] (step=0004144) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 0.8108002347877128, LR: 0.0003 +[2026-02-27 14:10:06] (step=0004145) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 0.8109958912150265, LR: 0.0003 +[2026-02-27 14:10:14] (step=0004146) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.81119154764234, LR: 0.0003 +[2026-02-27 14:10:22] (step=0004147) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.8113872040696537, LR: 0.0003 +[2026-02-27 14:10:30] (step=0004148) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.8115828604969674, LR: 0.0003 +[2026-02-27 14:10:37] (step=0004149) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 0.8117785169242809, LR: 0.0003 +[2026-02-27 14:10:45] (step=0004150) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 0.8119741733515946, LR: 0.0003 +[2026-02-27 14:10:53] (step=0004151) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 0.8121698297789083, LR: 0.0003 +[2026-02-27 14:11:01] (step=0004152) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.8123654862062218, LR: 0.0003 +[2026-02-27 14:11:09] (step=0004153) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.8125611426335355, LR: 0.0003 +[2026-02-27 14:11:17] (step=0004154) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.8127567990608492, LR: 0.0003 +[2026-02-27 14:11:24] (step=0004155) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.8129524554881628, LR: 0.0003 +[2026-02-27 14:11:32] (step=0004156) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 0.8131481119154764, LR: 0.0003 +[2026-02-27 14:11:40] (step=0004157) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.8133437683427901, LR: 0.0003 +[2026-02-27 14:11:48] (step=0004158) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 0.8135394247701037, LR: 0.0003 +[2026-02-27 14:11:56] (step=0004159) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 0.8137350811974173, LR: 0.0003 +[2026-02-27 14:12:04] (step=0004160) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 0.813930737624731, LR: 0.0003 +[2026-02-27 14:12:11] (step=0004161) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.8141263940520446, LR: 0.0003 +[2026-02-27 14:12:19] (step=0004162) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.8143220504793582, LR: 0.0003 +[2026-02-27 14:12:27] (step=0004163) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.8145177069066719, LR: 0.0003 +[2026-02-27 14:12:35] (step=0004164) Train Loss: 0.4765, Train Steps/Sec: 0.13, Epoch: 0.8147133633339855, LR: 0.0003 +[2026-02-27 14:12:43] (step=0004165) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.8149090197612991, LR: 0.0003 +[2026-02-27 14:12:51] (step=0004166) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 0.8151046761886128, LR: 0.0003 +[2026-02-27 14:12:58] (step=0004167) Train Loss: 0.4696, Train Steps/Sec: 0.13, Epoch: 0.8153003326159264, LR: 0.0003 +[2026-02-27 14:13:06] (step=0004168) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.81549598904324, LR: 0.0003 +[2026-02-27 14:13:14] (step=0004169) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 0.8156916454705537, LR: 0.0003 +[2026-02-27 14:13:22] (step=0004170) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 0.8158873018978673, LR: 0.0003 +[2026-02-27 14:13:30] (step=0004171) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 0.816082958325181, LR: 0.0003 +[2026-02-27 14:13:38] (step=0004172) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.8162786147524946, LR: 0.0003 +[2026-02-27 14:13:46] (step=0004173) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 0.8164742711798083, LR: 0.0003 +[2026-02-27 14:13:53] (step=0004174) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.8166699276071219, LR: 0.0003 +[2026-02-27 14:14:01] (step=0004175) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.8168655840344355, LR: 0.0003 +[2026-02-27 14:14:09] (step=0004176) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.8170612404617492, LR: 0.0003 +[2026-02-27 14:14:17] (step=0004177) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.8172568968890628, LR: 0.0003 +[2026-02-27 14:14:25] (step=0004178) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 0.8174525533163765, LR: 0.0003 +[2026-02-27 14:14:33] (step=0004179) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.8176482097436901, LR: 0.0003 +[2026-02-27 14:14:41] (step=0004180) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.8178438661710037, LR: 0.0003 +[2026-02-27 14:14:48] (step=0004181) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 0.8180395225983174, LR: 0.0003 +[2026-02-27 14:14:56] (step=0004182) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.818235179025631, LR: 0.0003 +[2026-02-27 14:15:04] (step=0004183) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 0.8184308354529446, LR: 0.0003 +[2026-02-27 14:15:12] (step=0004184) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 0.8186264918802583, LR: 0.0003 +[2026-02-27 14:15:20] (step=0004185) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.8188221483075719, LR: 0.0003 +[2026-02-27 14:15:28] (step=0004186) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.8190178047348855, LR: 0.0003 +[2026-02-27 14:15:35] (step=0004187) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 0.8192134611621992, LR: 0.0003 +[2026-02-27 14:15:43] (step=0004188) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 0.8194091175895128, LR: 0.0003 +[2026-02-27 14:15:51] (step=0004189) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.8196047740168264, LR: 0.0003 +[2026-02-27 14:15:59] (step=0004190) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 0.8198004304441401, LR: 0.0003 +[2026-02-27 14:16:07] (step=0004191) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 0.8199960868714538, LR: 0.0003 +[2026-02-27 14:16:15] (step=0004192) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.8201917432987673, LR: 0.0003 +[2026-02-27 14:16:22] (step=0004193) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.820387399726081, LR: 0.0003 +[2026-02-27 14:16:30] (step=0004194) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.8205830561533947, LR: 0.0003 +[2026-02-27 14:16:38] (step=0004195) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.8207787125807082, LR: 0.0003 +[2026-02-27 14:16:46] (step=0004196) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.8209743690080219, LR: 0.0003 +[2026-02-27 14:16:54] (step=0004197) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.8211700254353356, LR: 0.0003 +[2026-02-27 14:17:02] (step=0004198) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 0.8213656818626491, LR: 0.0003 +[2026-02-27 14:17:09] (step=0004199) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.8215613382899628, LR: 0.0003 +[2026-02-27 14:17:17] (step=0004200) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.8217569947172765, LR: 0.0003 +[2026-02-27 14:17:25] (step=0004201) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.8219526511445902, LR: 0.0003 +[2026-02-27 14:17:33] (step=0004202) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.8221483075719037, LR: 0.0003 +[2026-02-27 14:17:41] (step=0004203) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.8223439639992174, LR: 0.0003 +[2026-02-27 14:17:49] (step=0004204) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.8225396204265311, LR: 0.0003 +[2026-02-27 14:17:57] (step=0004205) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 0.8227352768538446, LR: 0.0003 +[2026-02-27 14:18:04] (step=0004206) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.8229309332811583, LR: 0.0003 +[2026-02-27 14:18:12] (step=0004207) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.823126589708472, LR: 0.0003 +[2026-02-27 14:18:20] (step=0004208) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 0.8233222461357855, LR: 0.0003 +[2026-02-27 14:18:28] (step=0004209) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.8235179025630992, LR: 0.0003 +[2026-02-27 14:18:36] (step=0004210) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.8237135589904129, LR: 0.0003 +[2026-02-27 14:18:44] (step=0004211) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 0.8239092154177264, LR: 0.0003 +[2026-02-27 14:18:51] (step=0004212) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.8241048718450401, LR: 0.0003 +[2026-02-27 14:18:59] (step=0004213) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.8243005282723538, LR: 0.0003 +[2026-02-27 14:19:07] (step=0004214) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.8244961846996673, LR: 0.0003 +[2026-02-27 14:19:15] (step=0004215) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 0.824691841126981, LR: 0.0003 +[2026-02-27 14:19:23] (step=0004216) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.8248874975542947, LR: 0.0003 +[2026-02-27 14:19:31] (step=0004217) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.8250831539816083, LR: 0.0003 +[2026-02-27 14:19:39] (step=0004218) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.8252788104089219, LR: 0.0003 +[2026-02-27 14:19:46] (step=0004219) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 0.8254744668362356, LR: 0.0003 +[2026-02-27 14:19:54] (step=0004220) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 0.8256701232635492, LR: 0.0003 +[2026-02-27 14:20:02] (step=0004221) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.8258657796908628, LR: 0.0003 +[2026-02-27 14:20:10] (step=0004222) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.8260614361181765, LR: 0.0003 +[2026-02-27 14:20:18] (step=0004223) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.8262570925454901, LR: 0.0003 +[2026-02-27 14:20:26] (step=0004224) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.8264527489728037, LR: 0.0003 +[2026-02-27 14:20:33] (step=0004225) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 0.8266484054001174, LR: 0.0003 +[2026-02-27 14:20:41] (step=0004226) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 0.826844061827431, LR: 0.0003 +[2026-02-27 14:20:49] (step=0004227) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.8270397182547446, LR: 0.0003 +[2026-02-27 14:20:57] (step=0004228) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.8272353746820583, LR: 0.0003 +[2026-02-27 14:21:05] (step=0004229) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.827431031109372, LR: 0.0003 +[2026-02-27 14:21:13] (step=0004230) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.8276266875366856, LR: 0.0003 +[2026-02-27 14:21:20] (step=0004231) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.8278223439639992, LR: 0.0003 +[2026-02-27 14:21:28] (step=0004232) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 0.8280180003913129, LR: 0.0003 +[2026-02-27 14:21:36] (step=0004233) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.8282136568186265, LR: 0.0003 +[2026-02-27 14:21:44] (step=0004234) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 0.8284093132459401, LR: 0.0003 +[2026-02-27 14:21:52] (step=0004235) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 0.8286049696732538, LR: 0.0003 +[2026-02-27 14:22:00] (step=0004236) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.8288006261005674, LR: 0.0003 +[2026-02-27 14:22:07] (step=0004237) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 0.828996282527881, LR: 0.0003 +[2026-02-27 14:22:15] (step=0004238) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.8291919389551947, LR: 0.0003 +[2026-02-27 14:22:23] (step=0004239) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.8293875953825083, LR: 0.0003 +[2026-02-27 14:22:31] (step=0004240) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 0.829583251809822, LR: 0.0003 +[2026-02-27 14:22:39] (step=0004241) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.8297789082371356, LR: 0.0003 +[2026-02-27 14:22:47] (step=0004242) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.8299745646644492, LR: 0.0003 +[2026-02-27 14:22:54] (step=0004243) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.8301702210917629, LR: 0.0003 +[2026-02-27 14:23:02] (step=0004244) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.8303658775190765, LR: 0.0003 +[2026-02-27 14:23:10] (step=0004245) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 0.8305615339463901, LR: 0.0003 +[2026-02-27 14:23:18] (step=0004246) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 0.8307571903737038, LR: 0.0003 +[2026-02-27 14:23:26] (step=0004247) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.8309528468010174, LR: 0.0003 +[2026-02-27 14:23:34] (step=0004248) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.831148503228331, LR: 0.0003 +[2026-02-27 14:23:41] (step=0004249) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.8313441596556447, LR: 0.0003 +[2026-02-27 14:23:49] (step=0004250) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.8315398160829583, LR: 0.0003 +[2026-02-27 14:23:57] (step=0004251) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 0.8317354725102719, LR: 0.0003 +[2026-02-27 14:24:05] (step=0004252) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.8319311289375856, LR: 0.0003 +[2026-02-27 14:24:13] (step=0004253) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 0.8321267853648993, LR: 0.0003 +[2026-02-27 14:24:21] (step=0004254) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.8323224417922128, LR: 0.0003 +[2026-02-27 14:24:29] (step=0004255) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.8325180982195265, LR: 0.0003 +[2026-02-27 14:24:36] (step=0004256) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 0.8327137546468402, LR: 0.0003 +[2026-02-27 14:24:44] (step=0004257) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.8329094110741538, LR: 0.0003 +[2026-02-27 14:24:52] (step=0004258) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.8331050675014674, LR: 0.0003 +[2026-02-27 14:25:00] (step=0004259) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.8333007239287811, LR: 0.0003 +[2026-02-27 14:25:08] (step=0004260) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.8334963803560947, LR: 0.0003 +[2026-02-27 14:25:16] (step=0004261) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 0.8336920367834083, LR: 0.0003 +[2026-02-27 14:25:23] (step=0004262) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.833887693210722, LR: 0.0003 +[2026-02-27 14:25:31] (step=0004263) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 0.8340833496380357, LR: 0.0003 +[2026-02-27 14:25:39] (step=0004264) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 0.8342790060653492, LR: 0.0003 +[2026-02-27 14:25:47] (step=0004265) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.8344746624926629, LR: 0.0003 +[2026-02-27 14:25:55] (step=0004266) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.8346703189199766, LR: 0.0003 +[2026-02-27 14:26:03] (step=0004267) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.8348659753472901, LR: 0.0003 +[2026-02-27 14:26:10] (step=0004268) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.8350616317746038, LR: 0.0003 +[2026-02-27 14:26:18] (step=0004269) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.8352572882019175, LR: 0.0003 +[2026-02-27 14:26:26] (step=0004270) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.835452944629231, LR: 0.0003 +[2026-02-27 14:26:34] (step=0004271) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 0.8356486010565447, LR: 0.0003 +[2026-02-27 14:26:42] (step=0004272) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.8358442574838584, LR: 0.0003 +[2026-02-27 14:26:50] (step=0004273) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 0.8360399139111719, LR: 0.0003 +[2026-02-27 14:26:58] (step=0004274) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.8362355703384856, LR: 0.0003 +[2026-02-27 14:27:05] (step=0004275) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 0.8364312267657993, LR: 0.0003 +[2026-02-27 14:27:13] (step=0004276) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.8366268831931128, LR: 0.0003 +[2026-02-27 14:27:21] (step=0004277) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.8368225396204265, LR: 0.0003 +[2026-02-27 14:27:29] (step=0004278) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.8370181960477402, LR: 0.0003 +[2026-02-27 14:27:37] (step=0004279) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 0.8372138524750538, LR: 0.0003 +[2026-02-27 14:27:45] (step=0004280) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.8374095089023674, LR: 0.0003 +[2026-02-27 14:27:52] (step=0004281) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.8376051653296811, LR: 0.0003 +[2026-02-27 14:28:00] (step=0004282) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.8378008217569947, LR: 0.0003 +[2026-02-27 14:28:08] (step=0004283) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.8379964781843083, LR: 0.0003 +[2026-02-27 14:28:16] (step=0004284) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.838192134611622, LR: 0.0003 +[2026-02-27 14:28:24] (step=0004285) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.8383877910389357, LR: 0.0003 +[2026-02-27 14:28:32] (step=0004286) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.8385834474662492, LR: 0.0003 +[2026-02-27 14:28:39] (step=0004287) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 0.8387791038935629, LR: 0.0003 +[2026-02-27 14:28:47] (step=0004288) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.8389747603208766, LR: 0.0003 +[2026-02-27 14:28:55] (step=0004289) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 0.8391704167481902, LR: 0.0003 +[2026-02-27 14:29:03] (step=0004290) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.8393660731755038, LR: 0.0003 +[2026-02-27 14:29:11] (step=0004291) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 0.8395617296028175, LR: 0.0003 +[2026-02-27 14:29:19] (step=0004292) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.8397573860301311, LR: 0.0003 +[2026-02-27 14:29:26] (step=0004293) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 0.8399530424574447, LR: 0.0003 +[2026-02-27 14:29:34] (step=0004294) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.8401486988847584, LR: 0.0003 +[2026-02-27 14:29:42] (step=0004295) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 0.840344355312072, LR: 0.0003 +[2026-02-27 14:29:50] (step=0004296) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.8405400117393856, LR: 0.0003 +[2026-02-27 14:29:58] (step=0004297) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.8407356681666993, LR: 0.0003 +[2026-02-27 14:30:06] (step=0004298) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 0.8409313245940129, LR: 0.0003 +[2026-02-27 14:30:13] (step=0004299) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 0.8411269810213265, LR: 0.0003 +[2026-02-27 14:30:21] (step=0004300) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 0.8413226374486402, LR: 0.0003 +[2026-02-27 14:30:29] (step=0004301) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.8415182938759538, LR: 0.0003 +[2026-02-27 14:30:37] (step=0004302) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.8417139503032675, LR: 0.0003 +[2026-02-27 14:30:45] (step=0004303) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.8419096067305811, LR: 0.0003 +[2026-02-27 14:30:53] (step=0004304) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.8421052631578947, LR: 0.0003 +[2026-02-27 14:31:01] (step=0004305) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.8423009195852084, LR: 0.0003 +[2026-02-27 14:31:09] (step=0004306) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.842496576012522, LR: 0.0003 +[2026-02-27 14:31:16] (step=0004307) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.8426922324398356, LR: 0.0003 +[2026-02-27 14:31:24] (step=0004308) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.8428878888671493, LR: 0.0003 +[2026-02-27 14:31:32] (step=0004309) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 0.8430835452944629, LR: 0.0003 +[2026-02-27 14:31:40] (step=0004310) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.8432792017217765, LR: 0.0003 +[2026-02-27 14:31:48] (step=0004311) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 0.8434748581490902, LR: 0.0003 +[2026-02-27 14:31:56] (step=0004312) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.8436705145764039, LR: 0.0003 +[2026-02-27 14:32:03] (step=0004313) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.8438661710037175, LR: 0.0003 +[2026-02-27 14:32:11] (step=0004314) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 0.8440618274310311, LR: 0.0003 +[2026-02-27 14:32:19] (step=0004315) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 0.8442574838583448, LR: 0.0003 +[2026-02-27 14:32:27] (step=0004316) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.8444531402856584, LR: 0.0003 +[2026-02-27 14:32:35] (step=0004317) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 0.844648796712972, LR: 0.0003 +[2026-02-27 14:32:43] (step=0004318) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 0.8448444531402857, LR: 0.0003 +[2026-02-27 14:32:50] (step=0004319) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.8450401095675993, LR: 0.0003 +[2026-02-27 14:32:58] (step=0004320) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 0.8452357659949129, LR: 0.0003 +[2026-02-27 14:33:06] (step=0004321) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 0.8454314224222266, LR: 0.0003 +[2026-02-27 14:33:14] (step=0004322) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.8456270788495402, LR: 0.0003 +[2026-02-27 14:33:22] (step=0004323) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.8458227352768538, LR: 0.0003 +[2026-02-27 14:33:30] (step=0004324) Train Loss: 0.4788, Train Steps/Sec: 0.13, Epoch: 0.8460183917041675, LR: 0.0003 +[2026-02-27 14:33:38] (step=0004325) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.8462140481314812, LR: 0.0003 +[2026-02-27 14:33:45] (step=0004326) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 0.8464097045587947, LR: 0.0003 +[2026-02-27 14:33:53] (step=0004327) Train Loss: 0.4713, Train Steps/Sec: 0.13, Epoch: 0.8466053609861084, LR: 0.0003 +[2026-02-27 14:34:01] (step=0004328) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.8468010174134221, LR: 0.0003 +[2026-02-27 14:34:09] (step=0004329) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 0.8469966738407356, LR: 0.0003 +[2026-02-27 14:34:17] (step=0004330) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.8471923302680493, LR: 0.0003 +[2026-02-27 14:34:25] (step=0004331) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 0.847387986695363, LR: 0.0003 +[2026-02-27 14:34:32] (step=0004332) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 0.8475836431226765, LR: 0.0003 +[2026-02-27 14:34:40] (step=0004333) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.8477792995499902, LR: 0.0003 +[2026-02-27 14:34:48] (step=0004334) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 0.8479749559773039, LR: 0.0003 +[2026-02-27 14:34:56] (step=0004335) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.8481706124046174, LR: 0.0003 +[2026-02-27 14:35:04] (step=0004336) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 0.8483662688319311, LR: 0.0003 +[2026-02-27 14:35:12] (step=0004337) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 0.8485619252592448, LR: 0.0003 +[2026-02-27 14:35:19] (step=0004338) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 0.8487575816865583, LR: 0.0003 +[2026-02-27 14:35:27] (step=0004339) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.848953238113872, LR: 0.0003 +[2026-02-27 14:35:35] (step=0004340) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.8491488945411857, LR: 0.0003 +[2026-02-27 14:35:43] (step=0004341) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 0.8493445509684994, LR: 0.0003 +[2026-02-27 14:35:51] (step=0004342) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.8495402073958129, LR: 0.0003 +[2026-02-27 14:35:59] (step=0004343) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.8497358638231266, LR: 0.0003 +[2026-02-27 14:36:06] (step=0004344) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.8499315202504403, LR: 0.0003 +[2026-02-27 14:36:14] (step=0004345) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 0.8501271766777538, LR: 0.0003 +[2026-02-27 14:36:22] (step=0004346) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.8503228331050675, LR: 0.0003 +[2026-02-27 14:36:30] (step=0004347) Train Loss: 0.4733, Train Steps/Sec: 0.13, Epoch: 0.8505184895323812, LR: 0.0003 +[2026-02-27 14:36:38] (step=0004348) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.8507141459596947, LR: 0.0003 +[2026-02-27 14:36:46] (step=0004349) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.8509098023870084, LR: 0.0003 +[2026-02-27 14:36:53] (step=0004350) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 0.8511054588143221, LR: 0.0003 +[2026-02-27 14:37:01] (step=0004351) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 0.8513011152416357, LR: 0.0003 +[2026-02-27 14:37:09] (step=0004352) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.8514967716689493, LR: 0.0003 +[2026-02-27 14:37:17] (step=0004353) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.851692428096263, LR: 0.0003 +[2026-02-27 14:37:25] (step=0004354) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 0.8518880845235766, LR: 0.0003 +[2026-02-27 14:37:32] (step=0004355) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.8520837409508902, LR: 0.0003 +[2026-02-27 14:37:40] (step=0004356) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 0.8522793973782039, LR: 0.0003 +[2026-02-27 14:37:48] (step=0004357) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.8524750538055175, LR: 0.0003 +[2026-02-27 14:37:56] (step=0004358) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.8526707102328311, LR: 0.0003 +[2026-02-27 14:38:04] (step=0004359) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.8528663666601448, LR: 0.0003 +[2026-02-27 14:38:12] (step=0004360) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 0.8530620230874584, LR: 0.0003 +[2026-02-27 14:38:20] (step=0004361) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.853257679514772, LR: 0.0003 +[2026-02-27 14:38:28] (step=0004362) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.8534533359420857, LR: 0.0003 +[2026-02-27 14:38:35] (step=0004363) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.8536489923693993, LR: 0.0003 +[2026-02-27 14:38:43] (step=0004364) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 0.853844648796713, LR: 0.0003 +[2026-02-27 14:38:51] (step=0004365) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 0.8540403052240266, LR: 0.0003 +[2026-02-27 14:38:59] (step=0004366) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 0.8542359616513402, LR: 0.0003 +[2026-02-27 14:39:07] (step=0004367) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 0.8544316180786539, LR: 0.0003 +[2026-02-27 14:39:15] (step=0004368) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.8546272745059675, LR: 0.0003 +[2026-02-27 14:39:22] (step=0004369) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 0.8548229309332812, LR: 0.0003 +[2026-02-27 14:39:30] (step=0004370) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 0.8550185873605948, LR: 0.0003 +[2026-02-27 14:39:38] (step=0004371) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.8552142437879084, LR: 0.0003 +[2026-02-27 14:39:46] (step=0004372) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 0.8554099002152221, LR: 0.0003 +[2026-02-27 14:39:54] (step=0004373) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 0.8556055566425357, LR: 0.0003 +[2026-02-27 14:40:02] (step=0004374) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 0.8558012130698494, LR: 0.0003 +[2026-02-27 14:40:09] (step=0004375) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.855996869497163, LR: 0.0003 +[2026-02-27 14:40:17] (step=0004376) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.8561925259244766, LR: 0.0003 +[2026-02-27 14:40:25] (step=0004377) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 0.8563881823517903, LR: 0.0003 +[2026-02-27 14:40:33] (step=0004378) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 0.8565838387791039, LR: 0.0003 +[2026-02-27 14:40:41] (step=0004379) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.8567794952064175, LR: 0.0003 +[2026-02-27 14:40:49] (step=0004380) Train Loss: 0.4721, Train Steps/Sec: 0.13, Epoch: 0.8569751516337312, LR: 0.0003 +[2026-02-27 14:40:57] (step=0004381) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 0.8571708080610448, LR: 0.0003 +[2026-02-27 14:41:04] (step=0004382) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 0.8573664644883584, LR: 0.0003 +[2026-02-27 14:41:12] (step=0004383) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.8575621209156721, LR: 0.0003 +[2026-02-27 14:41:20] (step=0004384) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 0.8577577773429857, LR: 0.0003 +[2026-02-27 14:41:28] (step=0004385) Train Loss: 0.4696, Train Steps/Sec: 0.13, Epoch: 0.8579534337702993, LR: 0.0003 +[2026-02-27 14:41:36] (step=0004386) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.858149090197613, LR: 0.0003 +[2026-02-27 14:41:44] (step=0004387) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 0.8583447466249267, LR: 0.0003 +[2026-02-27 14:41:51] (step=0004388) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 0.8585404030522402, LR: 0.0003 +[2026-02-27 14:41:59] (step=0004389) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.8587360594795539, LR: 0.0003 +[2026-02-27 14:42:07] (step=0004390) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.8589317159068676, LR: 0.0003 +[2026-02-27 14:42:15] (step=0004391) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.8591273723341811, LR: 0.0003 +[2026-02-27 14:42:23] (step=0004392) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 0.8593230287614948, LR: 0.0003 +[2026-02-27 14:42:30] (step=0004393) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.8595186851888085, LR: 0.0003 +[2026-02-27 14:42:38] (step=0004394) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.859714341616122, LR: 0.0003 +[2026-02-27 14:42:46] (step=0004395) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 0.8599099980434357, LR: 0.0003 +[2026-02-27 14:42:54] (step=0004396) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.8601056544707494, LR: 0.0003 +[2026-02-27 14:43:02] (step=0004397) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.860301310898063, LR: 0.0003 +[2026-02-27 14:43:10] (step=0004398) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.8604969673253766, LR: 0.0003 +[2026-02-27 14:43:17] (step=0004399) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.8606926237526903, LR: 0.0003 +[2026-02-27 14:43:25] (step=0004400) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 0.860888280180004, LR: 0.0003 +[2026-02-27 14:43:33] (step=0004401) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.8610839366073175, LR: 0.0003 +[2026-02-27 14:43:41] (step=0004402) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.8612795930346312, LR: 0.0003 +[2026-02-27 14:43:49] (step=0004403) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 0.8614752494619449, LR: 0.0003 +[2026-02-27 14:43:57] (step=0004404) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.8616709058892584, LR: 0.0003 +[2026-02-27 14:44:05] (step=0004405) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.8618665623165721, LR: 0.0003 +[2026-02-27 14:44:12] (step=0004406) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.8620622187438858, LR: 0.0003 +[2026-02-27 14:44:20] (step=0004407) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.8622578751711993, LR: 0.0003 +[2026-02-27 14:44:28] (step=0004408) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.862453531598513, LR: 0.0003 +[2026-02-27 14:44:36] (step=0004409) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 0.8626491880258267, LR: 0.0003 +[2026-02-27 14:44:44] (step=0004410) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 0.8628448444531402, LR: 0.0003 +[2026-02-27 14:44:52] (step=0004411) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.8630405008804539, LR: 0.0003 +[2026-02-27 14:45:00] (step=0004412) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 0.8632361573077676, LR: 0.0003 +[2026-02-27 14:45:07] (step=0004413) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.8634318137350812, LR: 0.0003 +[2026-02-27 14:45:15] (step=0004414) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.8636274701623948, LR: 0.0003 +[2026-02-27 14:45:23] (step=0004415) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 0.8638231265897085, LR: 0.0003 +[2026-02-27 14:45:31] (step=0004416) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.8640187830170221, LR: 0.0003 +[2026-02-27 14:45:39] (step=0004417) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.8642144394443357, LR: 0.0003 +[2026-02-27 14:45:46] (step=0004418) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.8644100958716494, LR: 0.0003 +[2026-02-27 14:45:54] (step=0004419) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.864605752298963, LR: 0.0003 +[2026-02-27 14:46:02] (step=0004420) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 0.8648014087262766, LR: 0.0003 +[2026-02-27 14:46:10] (step=0004421) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 0.8649970651535903, LR: 0.0003 +[2026-02-27 14:46:18] (step=0004422) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.8651927215809039, LR: 0.0003 +[2026-02-27 14:46:26] (step=0004423) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.8653883780082176, LR: 0.0003 +[2026-02-27 14:46:34] (step=0004424) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.8655840344355312, LR: 0.0003 +[2026-02-27 14:46:41] (step=0004425) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.8657796908628449, LR: 0.0003 +[2026-02-27 14:46:49] (step=0004426) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.8659753472901585, LR: 0.0003 +[2026-02-27 14:46:57] (step=0004427) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.8661710037174721, LR: 0.0003 +[2026-02-27 14:47:05] (step=0004428) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 0.8663666601447858, LR: 0.0003 +[2026-02-27 14:47:13] (step=0004429) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.8665623165720994, LR: 0.0003 +[2026-02-27 14:47:21] (step=0004430) Train Loss: 0.4797, Train Steps/Sec: 0.13, Epoch: 0.866757972999413, LR: 0.0003 +[2026-02-27 14:47:28] (step=0004431) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 0.8669536294267267, LR: 0.0003 +[2026-02-27 14:47:36] (step=0004432) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 0.8671492858540403, LR: 0.0003 +[2026-02-27 14:47:44] (step=0004433) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 0.867344942281354, LR: 0.0003 +[2026-02-27 14:47:52] (step=0004434) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 0.8675405987086676, LR: 0.0003 +[2026-02-27 14:48:00] (step=0004435) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.8677362551359812, LR: 0.0003 +[2026-02-27 14:48:08] (step=0004436) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 0.8679319115632949, LR: 0.0003 +[2026-02-27 14:48:15] (step=0004437) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 0.8681275679906085, LR: 0.0003 +[2026-02-27 14:48:23] (step=0004438) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.8683232244179221, LR: 0.0003 +[2026-02-27 14:48:31] (step=0004439) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.8685188808452358, LR: 0.0003 +[2026-02-27 14:48:39] (step=0004440) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 0.8687145372725494, LR: 0.0003 +[2026-02-27 14:48:47] (step=0004441) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.868910193699863, LR: 0.0003 +[2026-02-27 14:48:55] (step=0004442) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.8691058501271767, LR: 0.0003 +[2026-02-27 14:49:03] (step=0004443) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.8693015065544903, LR: 0.0003 +[2026-02-27 14:49:10] (step=0004444) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 0.8694971629818039, LR: 0.0003 +[2026-02-27 14:49:18] (step=0004445) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.8696928194091176, LR: 0.0003 +[2026-02-27 14:49:26] (step=0004446) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 0.8698884758364313, LR: 0.0003 +[2026-02-27 14:49:34] (step=0004447) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.8700841322637448, LR: 0.0003 +[2026-02-27 14:49:42] (step=0004448) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 0.8702797886910585, LR: 0.0003 +[2026-02-27 14:49:50] (step=0004449) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 0.8704754451183722, LR: 0.0003 +[2026-02-27 14:49:57] (step=0004450) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.8706711015456857, LR: 0.0003 +[2026-02-27 14:50:05] (step=0004451) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.8708667579729994, LR: 0.0003 +[2026-02-27 14:50:13] (step=0004452) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 0.8710624144003131, LR: 0.0003 +[2026-02-27 14:50:21] (step=0004453) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 0.8712580708276267, LR: 0.0003 +[2026-02-27 14:50:29] (step=0004454) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.8714537272549403, LR: 0.0003 +[2026-02-27 14:50:37] (step=0004455) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.871649383682254, LR: 0.0003 +[2026-02-27 14:50:44] (step=0004456) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.8718450401095676, LR: 0.0003 +[2026-02-27 14:50:52] (step=0004457) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.8720406965368812, LR: 0.0003 +[2026-02-27 14:51:00] (step=0004458) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 0.8722363529641949, LR: 0.0003 +[2026-02-27 14:51:08] (step=0004459) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 0.8724320093915086, LR: 0.0003 +[2026-02-27 14:51:16] (step=0004460) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.8726276658188221, LR: 0.0003 +[2026-02-27 14:51:24] (step=0004461) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.8728233222461358, LR: 0.0003 +[2026-02-27 14:51:31] (step=0004462) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.8730189786734495, LR: 0.0003 +[2026-02-27 14:51:39] (step=0004463) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 0.873214635100763, LR: 0.0003 +[2026-02-27 14:51:47] (step=0004464) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 0.8734102915280767, LR: 0.0003 +[2026-02-27 14:51:55] (step=0004465) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 0.8736059479553904, LR: 0.0003 +[2026-02-27 14:52:03] (step=0004466) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.8738016043827039, LR: 0.0003 +[2026-02-27 14:52:11] (step=0004467) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.8739972608100176, LR: 0.0003 +[2026-02-27 14:52:19] (step=0004468) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.8741929172373313, LR: 0.0003 +[2026-02-27 14:52:26] (step=0004469) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 0.8743885736646448, LR: 0.0003 +[2026-02-27 14:52:34] (step=0004470) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.8745842300919585, LR: 0.0003 +[2026-02-27 14:52:42] (step=0004471) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.8747798865192722, LR: 0.0003 +[2026-02-27 14:52:50] (step=0004472) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.8749755429465857, LR: 0.0003 +[2026-02-27 14:52:58] (step=0004473) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.8751711993738994, LR: 0.0003 +[2026-02-27 14:53:06] (step=0004474) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 0.8753668558012131, LR: 0.0003 +[2026-02-27 14:53:13] (step=0004475) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.8755625122285267, LR: 0.0003 +[2026-02-27 14:53:21] (step=0004476) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 0.8757581686558403, LR: 0.0003 +[2026-02-27 14:53:29] (step=0004477) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 0.875953825083154, LR: 0.0003 +[2026-02-27 14:53:37] (step=0004478) Train Loss: 0.4739, Train Steps/Sec: 0.13, Epoch: 0.8761494815104676, LR: 0.0003 +[2026-02-27 14:53:45] (step=0004479) Train Loss: 0.4791, Train Steps/Sec: 0.13, Epoch: 0.8763451379377812, LR: 0.0003 +[2026-02-27 14:53:53] (step=0004480) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.8765407943650949, LR: 0.0003 +[2026-02-27 14:54:01] (step=0004481) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.8767364507924086, LR: 0.0003 +[2026-02-27 14:54:08] (step=0004482) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.8769321072197221, LR: 0.0003 +[2026-02-27 14:54:16] (step=0004483) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.8771277636470358, LR: 0.0003 +[2026-02-27 14:54:24] (step=0004484) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.8773234200743495, LR: 0.0003 +[2026-02-27 14:54:32] (step=0004485) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.877519076501663, LR: 0.0003 +[2026-02-27 14:54:40] (step=0004486) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.8777147329289767, LR: 0.0003 +[2026-02-27 14:54:48] (step=0004487) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.8779103893562904, LR: 0.0003 +[2026-02-27 14:54:55] (step=0004488) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.878106045783604, LR: 0.0003 +[2026-02-27 14:55:03] (step=0004489) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.8783017022109176, LR: 0.0003 +[2026-02-27 14:55:11] (step=0004490) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 0.8784973586382313, LR: 0.0003 +[2026-02-27 14:55:19] (step=0004491) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 0.8786930150655449, LR: 0.0003 +[2026-02-27 14:55:27] (step=0004492) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.8788886714928585, LR: 0.0003 +[2026-02-27 14:55:35] (step=0004493) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 0.8790843279201722, LR: 0.0003 +[2026-02-27 14:55:42] (step=0004494) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 0.8792799843474858, LR: 0.0003 +[2026-02-27 14:55:50] (step=0004495) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.8794756407747994, LR: 0.0003 +[2026-02-27 14:55:58] (step=0004496) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 0.8796712972021131, LR: 0.0003 +[2026-02-27 14:56:06] (step=0004497) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 0.8798669536294267, LR: 0.0003 +[2026-02-27 14:56:14] (step=0004498) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.8800626100567404, LR: 0.0003 +[2026-02-27 14:56:22] (step=0004499) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 0.880258266484054, LR: 0.0003 +[2026-02-27 14:56:29] (step=0004500) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.8804539229113676, LR: 0.0003 +[2026-02-27 14:56:30] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0004500/ +[2026-02-27 14:56:37] (step=0004501) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.8806495793386813, LR: 0.0003 +[2026-02-27 14:56:45] (step=0004502) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.8808452357659949, LR: 0.0003 +[2026-02-27 14:56:53] (step=0004503) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.8810408921933085, LR: 0.0003 +[2026-02-27 14:57:01] (step=0004504) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.8812365486206222, LR: 0.0003 +[2026-02-27 14:57:09] (step=0004505) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 0.8814322050479358, LR: 0.0003 +[2026-02-27 14:57:17] (step=0004506) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.8816278614752494, LR: 0.0003 +[2026-02-27 14:57:24] (step=0004507) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 0.8818235179025631, LR: 0.0003 +[2026-02-27 14:57:32] (step=0004508) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.8820191743298768, LR: 0.0003 +[2026-02-27 14:57:40] (step=0004509) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.8822148307571904, LR: 0.0003 +[2026-02-27 14:57:48] (step=0004510) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 0.882410487184504, LR: 0.0003 +[2026-02-27 14:57:56] (step=0004511) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.8826061436118177, LR: 0.0003 +[2026-02-27 14:58:04] (step=0004512) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 0.8828018000391313, LR: 0.0003 +[2026-02-27 14:58:11] (step=0004513) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.8829974564664449, LR: 0.0003 +[2026-02-27 14:58:19] (step=0004514) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.8831931128937586, LR: 0.0003 +[2026-02-27 14:58:27] (step=0004515) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 0.8833887693210722, LR: 0.0003 +[2026-02-27 14:58:35] (step=0004516) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 0.8835844257483858, LR: 0.0003 +[2026-02-27 14:58:43] (step=0004517) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 0.8837800821756995, LR: 0.0003 +[2026-02-27 14:58:51] (step=0004518) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.8839757386030132, LR: 0.0003 +[2026-02-27 14:58:58] (step=0004519) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.8841713950303267, LR: 0.0003 +[2026-02-27 14:59:06] (step=0004520) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 0.8843670514576404, LR: 0.0003 +[2026-02-27 14:59:14] (step=0004521) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 0.8845627078849541, LR: 0.0003 +[2026-02-27 14:59:22] (step=0004522) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.8847583643122676, LR: 0.0003 +[2026-02-27 14:59:30] (step=0004523) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.8849540207395813, LR: 0.0003 +[2026-02-27 14:59:38] (step=0004524) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.885149677166895, LR: 0.0003 +[2026-02-27 14:59:46] (step=0004525) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 0.8853453335942085, LR: 0.0003 +[2026-02-27 14:59:53] (step=0004526) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.8855409900215222, LR: 0.0003 +[2026-02-27 15:00:01] (step=0004527) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.8857366464488359, LR: 0.0003 +[2026-02-27 15:00:09] (step=0004528) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 0.8859323028761494, LR: 0.0003 +[2026-02-27 15:00:17] (step=0004529) Train Loss: 0.4761, Train Steps/Sec: 0.13, Epoch: 0.8861279593034631, LR: 0.0003 +[2026-02-27 15:00:25] (step=0004530) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 0.8863236157307768, LR: 0.0003 +[2026-02-27 15:00:33] (step=0004531) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.8865192721580903, LR: 0.0003 +[2026-02-27 15:00:41] (step=0004532) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.886714928585404, LR: 0.0003 +[2026-02-27 15:00:48] (step=0004533) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 0.8869105850127177, LR: 0.0003 +[2026-02-27 15:00:56] (step=0004534) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.8871062414400313, LR: 0.0003 +[2026-02-27 15:01:04] (step=0004535) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.8873018978673449, LR: 0.0003 +[2026-02-27 15:01:12] (step=0004536) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 0.8874975542946586, LR: 0.0003 +[2026-02-27 15:01:20] (step=0004537) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 0.8876932107219723, LR: 0.0003 +[2026-02-27 15:01:28] (step=0004538) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.8878888671492858, LR: 0.0003 +[2026-02-27 15:01:35] (step=0004539) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.8880845235765995, LR: 0.0003 +[2026-02-27 15:01:43] (step=0004540) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.8882801800039132, LR: 0.0003 +[2026-02-27 15:01:51] (step=0004541) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.8884758364312267, LR: 0.0003 +[2026-02-27 15:01:59] (step=0004542) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 0.8886714928585404, LR: 0.0003 +[2026-02-27 15:02:07] (step=0004543) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.8888671492858541, LR: 0.0003 +[2026-02-27 15:02:15] (step=0004544) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 0.8890628057131676, LR: 0.0003 +[2026-02-27 15:02:22] (step=0004545) Train Loss: 0.4783, Train Steps/Sec: 0.13, Epoch: 0.8892584621404813, LR: 0.0003 +[2026-02-27 15:02:30] (step=0004546) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.889454118567795, LR: 0.0003 +[2026-02-27 15:02:38] (step=0004547) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.8896497749951086, LR: 0.0003 +[2026-02-27 15:02:46] (step=0004548) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.8898454314224222, LR: 0.0003 +[2026-02-27 15:02:54] (step=0004549) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 0.8900410878497359, LR: 0.0003 +[2026-02-27 15:03:02] (step=0004550) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 0.8902367442770495, LR: 0.0003 +[2026-02-27 15:03:09] (step=0004551) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 0.8904324007043631, LR: 0.0003 +[2026-02-27 15:03:17] (step=0004552) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.8906280571316768, LR: 0.0003 +[2026-02-27 15:03:25] (step=0004553) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 0.8908237135589904, LR: 0.0003 +[2026-02-27 15:03:33] (step=0004554) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 0.891019369986304, LR: 0.0003 +[2026-02-27 15:03:41] (step=0004555) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.8912150264136177, LR: 0.0003 +[2026-02-27 15:03:49] (step=0004556) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.8914106828409313, LR: 0.0003 +[2026-02-27 15:03:56] (step=0004557) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.891606339268245, LR: 0.0003 +[2026-02-27 15:04:04] (step=0004558) Train Loss: 0.4725, Train Steps/Sec: 0.13, Epoch: 0.8918019956955586, LR: 0.0003 +[2026-02-27 15:04:12] (step=0004559) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.8919976521228722, LR: 0.0003 +[2026-02-27 15:04:20] (step=0004560) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 0.8921933085501859, LR: 0.0003 +[2026-02-27 15:04:28] (step=0004561) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.8923889649774995, LR: 0.0003 +[2026-02-27 15:04:36] (step=0004562) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.8925846214048131, LR: 0.0003 +[2026-02-27 15:04:44] (step=0004563) Train Loss: 0.4510, Train Steps/Sec: 0.12, Epoch: 0.8927802778321268, LR: 0.0003 +[2026-02-27 15:04:51] (step=0004564) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.8929759342594404, LR: 0.0003 +[2026-02-27 15:04:59] (step=0004565) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 0.8931715906867541, LR: 0.0003 +[2026-02-27 15:05:07] (step=0004566) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 0.8933672471140677, LR: 0.0003 +[2026-02-27 15:05:15] (step=0004567) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 0.8935629035413813, LR: 0.0003 +[2026-02-27 15:05:23] (step=0004568) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.893758559968695, LR: 0.0003 +[2026-02-27 15:05:31] (step=0004569) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 0.8939542163960086, LR: 0.0003 +[2026-02-27 15:05:38] (step=0004570) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 0.8941498728233223, LR: 0.0003 +[2026-02-27 15:05:46] (step=0004571) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.8943455292506359, LR: 0.0003 +[2026-02-27 15:05:54] (step=0004572) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 0.8945411856779495, LR: 0.0003 +[2026-02-27 15:06:02] (step=0004573) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 0.8947368421052632, LR: 0.0003 +[2026-02-27 15:06:10] (step=0004574) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.8949324985325768, LR: 0.0003 +[2026-02-27 15:06:18] (step=0004575) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 0.8951281549598904, LR: 0.0003 +[2026-02-27 15:06:25] (step=0004576) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.8953238113872041, LR: 0.0003 +[2026-02-27 15:06:33] (step=0004577) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.8955194678145177, LR: 0.0003 +[2026-02-27 15:06:41] (step=0004578) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 0.8957151242418313, LR: 0.0003 +[2026-02-27 15:06:49] (step=0004579) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.895910780669145, LR: 0.0003 +[2026-02-27 15:06:57] (step=0004580) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 0.8961064370964587, LR: 0.0003 +[2026-02-27 15:07:05] (step=0004581) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.8963020935237722, LR: 0.0003 +[2026-02-27 15:07:13] (step=0004582) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 0.8964977499510859, LR: 0.0003 +[2026-02-27 15:07:20] (step=0004583) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.8966934063783996, LR: 0.0003 +[2026-02-27 15:07:28] (step=0004584) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 0.8968890628057131, LR: 0.0003 +[2026-02-27 15:07:36] (step=0004585) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.8970847192330268, LR: 0.0003 +[2026-02-27 15:07:44] (step=0004586) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.8972803756603405, LR: 0.0003 +[2026-02-27 15:07:52] (step=0004587) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 0.897476032087654, LR: 0.0003 +[2026-02-27 15:08:00] (step=0004588) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 0.8976716885149677, LR: 0.0003 +[2026-02-27 15:08:08] (step=0004589) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 0.8978673449422814, LR: 0.0003 +[2026-02-27 15:08:15] (step=0004590) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 0.8980630013695949, LR: 0.0003 +[2026-02-27 15:08:23] (step=0004591) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 0.8982586577969086, LR: 0.0003 +[2026-02-27 15:08:31] (step=0004592) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 0.8984543142242223, LR: 0.0003 +[2026-02-27 15:08:39] (step=0004593) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.898649970651536, LR: 0.0003 +[2026-02-27 15:08:47] (step=0004594) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.8988456270788495, LR: 0.0003 +[2026-02-27 15:08:55] (step=0004595) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 0.8990412835061632, LR: 0.0003 +[2026-02-27 15:09:02] (step=0004596) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.8992369399334769, LR: 0.0003 +[2026-02-27 15:09:10] (step=0004597) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 0.8994325963607904, LR: 0.0003 +[2026-02-27 15:09:18] (step=0004598) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 0.8996282527881041, LR: 0.0003 +[2026-02-27 15:09:26] (step=0004599) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 0.8998239092154178, LR: 0.0003 +[2026-02-27 15:09:34] (step=0004600) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 0.9000195656427313, LR: 0.0003 +[2026-02-27 15:09:42] (step=0004601) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.900215222070045, LR: 0.0003 +[2026-02-27 15:09:49] (step=0004602) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 0.9004108784973587, LR: 0.0003 +[2026-02-27 15:09:57] (step=0004603) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.9006065349246722, LR: 0.0003 +[2026-02-27 15:10:05] (step=0004604) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 0.9008021913519859, LR: 0.0003 +[2026-02-27 15:10:13] (step=0004605) Train Loss: 0.4587, Train Steps/Sec: 0.12, Epoch: 0.9009978477792996, LR: 0.0003 +[2026-02-27 15:10:21] (step=0004606) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 0.9011935042066131, LR: 0.0003 +[2026-02-27 15:10:29] (step=0004607) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 0.9013891606339268, LR: 0.0003 +[2026-02-27 15:10:37] (step=0004608) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 0.9015848170612405, LR: 0.0003 +[2026-02-27 15:10:44] (step=0004609) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.9017804734885541, LR: 0.0003 +[2026-02-27 15:10:52] (step=0004610) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.9019761299158677, LR: 0.0003 +[2026-02-27 15:11:00] (step=0004611) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 0.9021717863431814, LR: 0.0003 +[2026-02-27 15:11:08] (step=0004612) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.902367442770495, LR: 0.0003 +[2026-02-27 15:11:16] (step=0004613) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.9025630991978086, LR: 0.0003 +[2026-02-27 15:11:24] (step=0004614) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 0.9027587556251223, LR: 0.0003 +[2026-02-27 15:11:31] (step=0004615) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 0.9029544120524359, LR: 0.0003 +[2026-02-27 15:11:39] (step=0004616) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 0.9031500684797495, LR: 0.0003 +[2026-02-27 15:11:47] (step=0004617) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.9033457249070632, LR: 0.0003 +[2026-02-27 15:11:55] (step=0004618) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.9035413813343768, LR: 0.0003 +[2026-02-27 15:12:03] (step=0004619) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 0.9037370377616905, LR: 0.0003 +[2026-02-27 15:12:11] (step=0004620) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.9039326941890041, LR: 0.0003 +[2026-02-27 15:12:18] (step=0004621) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.9041283506163178, LR: 0.0003 +[2026-02-27 15:12:26] (step=0004622) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.9043240070436314, LR: 0.0003 +[2026-02-27 15:12:34] (step=0004623) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.904519663470945, LR: 0.0003 +[2026-02-27 15:12:42] (step=0004624) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.9047153198982587, LR: 0.0003 +[2026-02-27 15:12:50] (step=0004625) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.9049109763255723, LR: 0.0003 +[2026-02-27 15:12:58] (step=0004626) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.9051066327528859, LR: 0.0003 +[2026-02-27 15:13:05] (step=0004627) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.9053022891801996, LR: 0.0003 +[2026-02-27 15:13:13] (step=0004628) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 0.9054979456075132, LR: 0.0003 +[2026-02-27 15:13:21] (step=0004629) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 0.9056936020348269, LR: 0.0003 +[2026-02-27 15:13:29] (step=0004630) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.9058892584621405, LR: 0.0003 +[2026-02-27 15:13:37] (step=0004631) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.9060849148894541, LR: 0.0003 +[2026-02-27 15:13:45] (step=0004632) Train Loss: 0.4693, Train Steps/Sec: 0.13, Epoch: 0.9062805713167678, LR: 0.0003 +[2026-02-27 15:13:53] (step=0004633) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 0.9064762277440814, LR: 0.0003 +[2026-02-27 15:14:00] (step=0004634) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 0.906671884171395, LR: 0.0003 +[2026-02-27 15:14:08] (step=0004635) Train Loss: 0.4742, Train Steps/Sec: 0.13, Epoch: 0.9068675405987087, LR: 0.0003 +[2026-02-27 15:14:16] (step=0004636) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.9070631970260223, LR: 0.0003 +[2026-02-27 15:14:24] (step=0004637) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.9072588534533359, LR: 0.0003 +[2026-02-27 15:14:32] (step=0004638) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 0.9074545098806496, LR: 0.0003 +[2026-02-27 15:14:40] (step=0004639) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 0.9076501663079632, LR: 0.0003 +[2026-02-27 15:14:47] (step=0004640) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 0.9078458227352768, LR: 0.0003 +[2026-02-27 15:14:55] (step=0004641) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.9080414791625905, LR: 0.0003 +[2026-02-27 15:15:03] (step=0004642) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 0.9082371355899042, LR: 0.0003 +[2026-02-27 15:15:11] (step=0004643) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.9084327920172177, LR: 0.0003 +[2026-02-27 15:15:19] (step=0004644) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 0.9086284484445314, LR: 0.0003 +[2026-02-27 15:15:27] (step=0004645) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.9088241048718451, LR: 0.0003 +[2026-02-27 15:15:34] (step=0004646) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.9090197612991586, LR: 0.0003 +[2026-02-27 15:15:42] (step=0004647) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.9092154177264723, LR: 0.0003 +[2026-02-27 15:15:50] (step=0004648) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.909411074153786, LR: 0.0003 +[2026-02-27 15:15:58] (step=0004649) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 0.9096067305810996, LR: 0.0003 +[2026-02-27 15:16:06] (step=0004650) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 0.9098023870084132, LR: 0.0003 +[2026-02-27 15:16:14] (step=0004651) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 0.9099980434357269, LR: 0.0003 +[2026-02-27 15:16:22] (step=0004652) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 0.9101936998630406, LR: 0.0003 +[2026-02-27 15:16:29] (step=0004653) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.9103893562903541, LR: 0.0003 +[2026-02-27 15:16:37] (step=0004654) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 0.9105850127176678, LR: 0.0003 +[2026-02-27 15:16:45] (step=0004655) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 0.9107806691449815, LR: 0.0003 +[2026-02-27 15:16:53] (step=0004656) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.910976325572295, LR: 0.0003 +[2026-02-27 15:17:01] (step=0004657) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 0.9111719819996087, LR: 0.0003 +[2026-02-27 15:17:09] (step=0004658) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.9113676384269224, LR: 0.0003 +[2026-02-27 15:17:16] (step=0004659) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 0.9115632948542359, LR: 0.0003 +[2026-02-27 15:17:24] (step=0004660) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 0.9117589512815496, LR: 0.0003 +[2026-02-27 15:17:32] (step=0004661) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 0.9119546077088633, LR: 0.0003 +[2026-02-27 15:17:40] (step=0004662) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.9121502641361768, LR: 0.0003 +[2026-02-27 15:17:48] (step=0004663) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 0.9123459205634905, LR: 0.0003 +[2026-02-27 15:17:56] (step=0004664) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 0.9125415769908042, LR: 0.0003 +[2026-02-27 15:18:03] (step=0004665) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.9127372334181177, LR: 0.0003 +[2026-02-27 15:18:11] (step=0004666) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.9129328898454314, LR: 0.0003 +[2026-02-27 15:18:19] (step=0004667) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.9131285462727451, LR: 0.0003 +[2026-02-27 15:18:27] (step=0004668) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.9133242027000587, LR: 0.0003 +[2026-02-27 15:18:35] (step=0004669) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.9135198591273723, LR: 0.0003 +[2026-02-27 15:18:43] (step=0004670) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 0.913715515554686, LR: 0.0003 +[2026-02-27 15:18:50] (step=0004671) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 0.9139111719819996, LR: 0.0003 +[2026-02-27 15:18:58] (step=0004672) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 0.9141068284093132, LR: 0.0003 +[2026-02-27 15:19:06] (step=0004673) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 0.9143024848366269, LR: 0.0003 +[2026-02-27 15:19:14] (step=0004674) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.9144981412639405, LR: 0.0003 +[2026-02-27 15:19:22] (step=0004675) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.9146937976912541, LR: 0.0003 +[2026-02-27 15:19:30] (step=0004676) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.9148894541185678, LR: 0.0003 +[2026-02-27 15:19:38] (step=0004677) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 0.9150851105458815, LR: 0.0003 +[2026-02-27 15:19:45] (step=0004678) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.915280766973195, LR: 0.0003 +[2026-02-27 15:19:53] (step=0004679) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 0.9154764234005087, LR: 0.0003 +[2026-02-27 15:20:01] (step=0004680) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 0.9156720798278224, LR: 0.0003 +[2026-02-27 15:20:09] (step=0004681) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.915867736255136, LR: 0.0003 +[2026-02-27 15:20:17] (step=0004682) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.9160633926824496, LR: 0.0003 +[2026-02-27 15:20:25] (step=0004683) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.9162590491097633, LR: 0.0003 +[2026-02-27 15:20:33] (step=0004684) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.9164547055370769, LR: 0.0003 +[2026-02-27 15:20:40] (step=0004685) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 0.9166503619643905, LR: 0.0003 +[2026-02-27 15:20:48] (step=0004686) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.9168460183917042, LR: 0.0003 +[2026-02-27 15:20:56] (step=0004687) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 0.9170416748190178, LR: 0.0003 +[2026-02-27 15:21:04] (step=0004688) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 0.9172373312463314, LR: 0.0003 +[2026-02-27 15:21:12] (step=0004689) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.9174329876736451, LR: 0.0003 +[2026-02-27 15:21:20] (step=0004690) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 0.9176286441009587, LR: 0.0003 +[2026-02-27 15:21:27] (step=0004691) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 0.9178243005282724, LR: 0.0003 +[2026-02-27 15:21:35] (step=0004692) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.918019956955586, LR: 0.0003 +[2026-02-27 15:21:43] (step=0004693) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.9182156133828996, LR: 0.0003 +[2026-02-27 15:21:51] (step=0004694) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 0.9184112698102133, LR: 0.0003 +[2026-02-27 15:21:59] (step=0004695) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 0.9186069262375269, LR: 0.0003 +[2026-02-27 15:22:07] (step=0004696) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.9188025826648405, LR: 0.0003 +[2026-02-27 15:22:14] (step=0004697) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.9189982390921542, LR: 0.0003 +[2026-02-27 15:22:22] (step=0004698) Train Loss: 0.4703, Train Steps/Sec: 0.13, Epoch: 0.9191938955194678, LR: 0.0003 +[2026-02-27 15:22:30] (step=0004699) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 0.9193895519467814, LR: 0.0003 +[2026-02-27 15:22:38] (step=0004700) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 0.9195852083740951, LR: 0.0003 +[2026-02-27 15:22:46] (step=0004701) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.9197808648014087, LR: 0.0003 +[2026-02-27 15:22:54] (step=0004702) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 0.9199765212287223, LR: 0.0003 +[2026-02-27 15:23:01] (step=0004703) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.920172177656036, LR: 0.0003 +[2026-02-27 15:23:09] (step=0004704) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 0.9203678340833497, LR: 0.0003 +[2026-02-27 15:23:17] (step=0004705) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 0.9205634905106633, LR: 0.0003 +[2026-02-27 15:23:25] (step=0004706) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 0.9207591469379769, LR: 0.0003 +[2026-02-27 15:23:33] (step=0004707) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.9209548033652906, LR: 0.0003 +[2026-02-27 15:23:41] (step=0004708) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.9211504597926042, LR: 0.0003 +[2026-02-27 15:23:48] (step=0004709) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 0.9213461162199178, LR: 0.0003 +[2026-02-27 15:23:56] (step=0004710) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.9215417726472315, LR: 0.0003 +[2026-02-27 15:24:04] (step=0004711) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.9217374290745451, LR: 0.0003 +[2026-02-27 15:24:12] (step=0004712) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.9219330855018587, LR: 0.0003 +[2026-02-27 15:24:20] (step=0004713) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.9221287419291724, LR: 0.0003 +[2026-02-27 15:24:28] (step=0004714) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.922324398356486, LR: 0.0003 +[2026-02-27 15:24:35] (step=0004715) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.9225200547837996, LR: 0.0003 +[2026-02-27 15:24:43] (step=0004716) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.9227157112111133, LR: 0.0003 +[2026-02-27 15:24:51] (step=0004717) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.922911367638427, LR: 0.0003 +[2026-02-27 15:24:59] (step=0004718) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.9231070240657405, LR: 0.0003 +[2026-02-27 15:25:07] (step=0004719) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.9233026804930542, LR: 0.0003 +[2026-02-27 15:25:15] (step=0004720) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 0.9234983369203679, LR: 0.0003 +[2026-02-27 15:25:22] (step=0004721) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 0.9236939933476814, LR: 0.0003 +[2026-02-27 15:25:30] (step=0004722) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.9238896497749951, LR: 0.0003 +[2026-02-27 15:25:38] (step=0004723) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.9240853062023088, LR: 0.0003 +[2026-02-27 15:25:46] (step=0004724) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 0.9242809626296223, LR: 0.0003 +[2026-02-27 15:25:54] (step=0004725) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 0.924476619056936, LR: 0.0003 +[2026-02-27 15:26:02] (step=0004726) Train Loss: 0.4742, Train Steps/Sec: 0.13, Epoch: 0.9246722754842497, LR: 0.0003 +[2026-02-27 15:26:09] (step=0004727) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 0.9248679319115632, LR: 0.0003 +[2026-02-27 15:26:17] (step=0004728) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.9250635883388769, LR: 0.0003 +[2026-02-27 15:26:25] (step=0004729) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 0.9252592447661906, LR: 0.0003 +[2026-02-27 15:26:33] (step=0004730) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 0.9254549011935042, LR: 0.0003 +[2026-02-27 15:26:41] (step=0004731) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 0.9256505576208178, LR: 0.0003 +[2026-02-27 15:26:49] (step=0004732) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.9258462140481315, LR: 0.0003 +[2026-02-27 15:26:57] (step=0004733) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.9260418704754452, LR: 0.0003 +[2026-02-27 15:27:04] (step=0004734) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 0.9262375269027587, LR: 0.0003 +[2026-02-27 15:27:12] (step=0004735) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.9264331833300724, LR: 0.0003 +[2026-02-27 15:27:20] (step=0004736) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.9266288397573861, LR: 0.0003 +[2026-02-27 15:27:28] (step=0004737) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.9268244961846996, LR: 0.0003 +[2026-02-27 15:27:36] (step=0004738) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 0.9270201526120133, LR: 0.0003 +[2026-02-27 15:27:44] (step=0004739) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.927215809039327, LR: 0.0003 +[2026-02-27 15:27:51] (step=0004740) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 0.9274114654666405, LR: 0.0003 +[2026-02-27 15:27:59] (step=0004741) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.9276071218939542, LR: 0.0003 +[2026-02-27 15:28:07] (step=0004742) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.9278027783212679, LR: 0.0003 +[2026-02-27 15:28:15] (step=0004743) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.9279984347485815, LR: 0.0003 +[2026-02-27 15:28:23] (step=0004744) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 0.9281940911758951, LR: 0.0003 +[2026-02-27 15:28:31] (step=0004745) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 0.9283897476032088, LR: 0.0003 +[2026-02-27 15:28:38] (step=0004746) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 0.9285854040305224, LR: 0.0003 +[2026-02-27 15:28:46] (step=0004747) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.928781060457836, LR: 0.0003 +[2026-02-27 15:28:54] (step=0004748) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.9289767168851497, LR: 0.0003 +[2026-02-27 15:29:02] (step=0004749) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.9291723733124633, LR: 0.0003 +[2026-02-27 15:29:10] (step=0004750) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.929368029739777, LR: 0.0003 +[2026-02-27 15:29:18] (step=0004751) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 0.9295636861670906, LR: 0.0003 +[2026-02-27 15:29:26] (step=0004752) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.9297593425944042, LR: 0.0003 +[2026-02-27 15:29:33] (step=0004753) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 0.9299549990217179, LR: 0.0003 +[2026-02-27 15:29:41] (step=0004754) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 0.9301506554490315, LR: 0.0003 +[2026-02-27 15:29:49] (step=0004755) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.9303463118763451, LR: 0.0003 +[2026-02-27 15:29:57] (step=0004756) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 0.9305419683036588, LR: 0.0003 +[2026-02-27 15:30:05] (step=0004757) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.9307376247309724, LR: 0.0003 +[2026-02-27 15:30:13] (step=0004758) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.930933281158286, LR: 0.0003 +[2026-02-27 15:30:21] (step=0004759) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.9311289375855997, LR: 0.0003 +[2026-02-27 15:30:28] (step=0004760) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.9313245940129133, LR: 0.0003 +[2026-02-27 15:30:36] (step=0004761) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 0.931520250440227, LR: 0.0003 +[2026-02-27 15:30:44] (step=0004762) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 0.9317159068675406, LR: 0.0003 +[2026-02-27 15:30:52] (step=0004763) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 0.9319115632948543, LR: 0.0003 +[2026-02-27 15:31:00] (step=0004764) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.9321072197221679, LR: 0.0003 +[2026-02-27 15:31:08] (step=0004765) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 0.9323028761494815, LR: 0.0003 +[2026-02-27 15:31:15] (step=0004766) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 0.9324985325767952, LR: 0.0003 +[2026-02-27 15:31:23] (step=0004767) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.9326941890041088, LR: 0.0003 +[2026-02-27 15:31:31] (step=0004768) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.9328898454314224, LR: 0.0003 +[2026-02-27 15:31:39] (step=0004769) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.9330855018587361, LR: 0.0003 +[2026-02-27 15:31:47] (step=0004770) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.9332811582860497, LR: 0.0003 +[2026-02-27 15:31:55] (step=0004771) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.9334768147133633, LR: 0.0003 +[2026-02-27 15:32:02] (step=0004772) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.933672471140677, LR: 0.0003 +[2026-02-27 15:32:10] (step=0004773) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 0.9338681275679906, LR: 0.0003 +[2026-02-27 15:32:18] (step=0004774) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.9340637839953042, LR: 0.0003 +[2026-02-27 15:32:26] (step=0004775) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.9342594404226179, LR: 0.0003 +[2026-02-27 15:32:34] (step=0004776) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 0.9344550968499316, LR: 0.0003 +[2026-02-27 15:32:42] (step=0004777) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.9346507532772451, LR: 0.0003 +[2026-02-27 15:32:50] (step=0004778) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.9348464097045588, LR: 0.0003 +[2026-02-27 15:32:57] (step=0004779) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 0.9350420661318725, LR: 0.0003 +[2026-02-27 15:33:05] (step=0004780) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 0.935237722559186, LR: 0.0003 +[2026-02-27 15:33:13] (step=0004781) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 0.9354333789864997, LR: 0.0003 +[2026-02-27 15:33:21] (step=0004782) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.9356290354138134, LR: 0.0003 +[2026-02-27 15:33:29] (step=0004783) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 0.9358246918411269, LR: 0.0003 +[2026-02-27 15:33:37] (step=0004784) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.9360203482684406, LR: 0.0003 +[2026-02-27 15:33:44] (step=0004785) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 0.9362160046957543, LR: 0.0003 +[2026-02-27 15:33:52] (step=0004786) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 0.9364116611230678, LR: 0.0003 +[2026-02-27 15:34:00] (step=0004787) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 0.9366073175503815, LR: 0.0003 +[2026-02-27 15:34:08] (step=0004788) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 0.9368029739776952, LR: 0.0003 +[2026-02-27 15:34:16] (step=0004789) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 0.9369986304050089, LR: 0.0003 +[2026-02-27 15:34:24] (step=0004790) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 0.9371942868323224, LR: 0.0003 +[2026-02-27 15:34:31] (step=0004791) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.9373899432596361, LR: 0.0003 +[2026-02-27 15:34:39] (step=0004792) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.9375855996869498, LR: 0.0003 +[2026-02-27 15:34:47] (step=0004793) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.9377812561142633, LR: 0.0003 +[2026-02-27 15:34:55] (step=0004794) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.937976912541577, LR: 0.0003 +[2026-02-27 15:35:03] (step=0004795) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 0.9381725689688907, LR: 0.0003 +[2026-02-27 15:35:11] (step=0004796) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 0.9383682253962042, LR: 0.0003 +[2026-02-27 15:35:18] (step=0004797) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 0.9385638818235179, LR: 0.0003 +[2026-02-27 15:35:26] (step=0004798) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.9387595382508316, LR: 0.0003 +[2026-02-27 15:35:34] (step=0004799) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 0.9389551946781451, LR: 0.0003 +[2026-02-27 15:35:42] (step=0004800) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 0.9391508511054588, LR: 0.0003 +[2026-02-27 15:35:50] (step=0004801) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.9393465075327725, LR: 0.0003 +[2026-02-27 15:35:58] (step=0004802) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.939542163960086, LR: 0.0003 +[2026-02-27 15:36:05] (step=0004803) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 0.9397378203873997, LR: 0.0003 +[2026-02-27 15:36:13] (step=0004804) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.9399334768147134, LR: 0.0003 +[2026-02-27 15:36:21] (step=0004805) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 0.940129133242027, LR: 0.0003 +[2026-02-27 15:36:29] (step=0004806) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.9403247896693406, LR: 0.0003 +[2026-02-27 15:36:37] (step=0004807) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 0.9405204460966543, LR: 0.0003 +[2026-02-27 15:36:45] (step=0004808) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 0.9407161025239679, LR: 0.0003 +[2026-02-27 15:36:53] (step=0004809) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 0.9409117589512815, LR: 0.0003 +[2026-02-27 15:37:00] (step=0004810) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 0.9411074153785952, LR: 0.0003 +[2026-02-27 15:37:08] (step=0004811) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 0.9413030718059088, LR: 0.0003 +[2026-02-27 15:37:16] (step=0004812) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.9414987282332224, LR: 0.0003 +[2026-02-27 15:37:24] (step=0004813) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.9416943846605361, LR: 0.0003 +[2026-02-27 15:37:32] (step=0004814) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 0.9418900410878497, LR: 0.0003 +[2026-02-27 15:37:40] (step=0004815) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 0.9420856975151634, LR: 0.0003 +[2026-02-27 15:37:47] (step=0004816) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 0.942281353942477, LR: 0.0003 +[2026-02-27 15:37:55] (step=0004817) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 0.9424770103697907, LR: 0.0003 +[2026-02-27 15:38:03] (step=0004818) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 0.9426726667971043, LR: 0.0003 +[2026-02-27 15:38:11] (step=0004819) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.9428683232244179, LR: 0.0003 +[2026-02-27 15:38:19] (step=0004820) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.9430639796517316, LR: 0.0003 +[2026-02-27 15:38:27] (step=0004821) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.9432596360790452, LR: 0.0003 +[2026-02-27 15:38:34] (step=0004822) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 0.9434552925063588, LR: 0.0003 +[2026-02-27 15:38:42] (step=0004823) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.9436509489336725, LR: 0.0003 +[2026-02-27 15:38:50] (step=0004824) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.9438466053609861, LR: 0.0003 +[2026-02-27 15:38:58] (step=0004825) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 0.9440422617882998, LR: 0.0003 +[2026-02-27 15:39:06] (step=0004826) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 0.9442379182156134, LR: 0.0003 +[2026-02-27 15:39:14] (step=0004827) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 0.944433574642927, LR: 0.0003 +[2026-02-27 15:39:21] (step=0004828) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.9446292310702407, LR: 0.0003 +[2026-02-27 15:39:29] (step=0004829) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 0.9448248874975543, LR: 0.0003 +[2026-02-27 15:39:37] (step=0004830) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.9450205439248679, LR: 0.0003 +[2026-02-27 15:39:45] (step=0004831) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.9452162003521816, LR: 0.0003 +[2026-02-27 15:39:53] (step=0004832) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 0.9454118567794952, LR: 0.0003 +[2026-02-27 15:40:01] (step=0004833) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 0.9456075132068088, LR: 0.0003 +[2026-02-27 15:40:09] (step=0004834) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 0.9458031696341225, LR: 0.0003 +[2026-02-27 15:40:16] (step=0004835) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.9459988260614361, LR: 0.0003 +[2026-02-27 15:40:24] (step=0004836) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 0.9461944824887497, LR: 0.0003 +[2026-02-27 15:40:32] (step=0004837) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 0.9463901389160634, LR: 0.0003 +[2026-02-27 15:40:40] (step=0004838) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 0.9465857953433771, LR: 0.0003 +[2026-02-27 15:40:48] (step=0004839) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 0.9467814517706906, LR: 0.0003 +[2026-02-27 15:40:56] (step=0004840) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.9469771081980043, LR: 0.0003 +[2026-02-27 15:41:03] (step=0004841) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.947172764625318, LR: 0.0003 +[2026-02-27 15:41:11] (step=0004842) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.9473684210526315, LR: 0.0003 +[2026-02-27 15:41:19] (step=0004843) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.9475640774799452, LR: 0.0003 +[2026-02-27 15:41:27] (step=0004844) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 0.9477597339072589, LR: 0.0003 +[2026-02-27 15:41:35] (step=0004845) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.9479553903345725, LR: 0.0003 +[2026-02-27 15:41:43] (step=0004846) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 0.9481510467618861, LR: 0.0003 +[2026-02-27 15:41:50] (step=0004847) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 0.9483467031891998, LR: 0.0003 +[2026-02-27 15:41:58] (step=0004848) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.9485423596165135, LR: 0.0003 +[2026-02-27 15:42:06] (step=0004849) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 0.948738016043827, LR: 0.0003 +[2026-02-27 15:42:14] (step=0004850) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 0.9489336724711407, LR: 0.0003 +[2026-02-27 15:42:22] (step=0004851) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.9491293288984544, LR: 0.0003 +[2026-02-27 15:42:30] (step=0004852) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.9493249853257679, LR: 0.0003 +[2026-02-27 15:42:37] (step=0004853) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.9495206417530816, LR: 0.0003 +[2026-02-27 15:42:45] (step=0004854) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.9497162981803953, LR: 0.0003 +[2026-02-27 15:42:53] (step=0004855) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 0.9499119546077088, LR: 0.0003 +[2026-02-27 15:43:01] (step=0004856) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.9501076110350225, LR: 0.0003 +[2026-02-27 15:43:09] (step=0004857) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.9503032674623362, LR: 0.0003 +[2026-02-27 15:43:17] (step=0004858) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 0.9504989238896497, LR: 0.0003 +[2026-02-27 15:43:25] (step=0004859) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 0.9506945803169634, LR: 0.0003 +[2026-02-27 15:43:33] (step=0004860) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.9508902367442771, LR: 0.0003 +[2026-02-27 15:43:40] (step=0004861) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.9510858931715906, LR: 0.0003 +[2026-02-27 15:43:48] (step=0004862) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 0.9512815495989043, LR: 0.0003 +[2026-02-27 15:43:56] (step=0004863) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 0.951477206026218, LR: 0.0003 +[2026-02-27 15:44:04] (step=0004864) Train Loss: 0.4718, Train Steps/Sec: 0.13, Epoch: 0.9516728624535316, LR: 0.0003 +[2026-02-27 15:44:12] (step=0004865) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.9518685188808452, LR: 0.0003 +[2026-02-27 15:44:20] (step=0004866) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 0.9520641753081589, LR: 0.0003 +[2026-02-27 15:44:27] (step=0004867) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 0.9522598317354725, LR: 0.0003 +[2026-02-27 15:44:35] (step=0004868) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.9524554881627861, LR: 0.0003 +[2026-02-27 15:44:43] (step=0004869) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.9526511445900998, LR: 0.0003 +[2026-02-27 15:44:51] (step=0004870) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 0.9528468010174134, LR: 0.0003 +[2026-02-27 15:44:59] (step=0004871) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 0.953042457444727, LR: 0.0003 +[2026-02-27 15:45:06] (step=0004872) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 0.9532381138720407, LR: 0.0003 +[2026-02-27 15:45:14] (step=0004873) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.9534337702993544, LR: 0.0003 +[2026-02-27 15:45:22] (step=0004874) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.953629426726668, LR: 0.0003 +[2026-02-27 15:45:30] (step=0004875) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 0.9538250831539816, LR: 0.0003 +[2026-02-27 15:45:38] (step=0004876) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.9540207395812953, LR: 0.0003 +[2026-02-27 15:45:46] (step=0004877) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.9542163960086089, LR: 0.0003 +[2026-02-27 15:45:53] (step=0004878) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.9544120524359225, LR: 0.0003 +[2026-02-27 15:46:01] (step=0004879) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 0.9546077088632362, LR: 0.0003 +[2026-02-27 15:46:09] (step=0004880) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 0.9548033652905498, LR: 0.0003 +[2026-02-27 15:46:17] (step=0004881) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.9549990217178634, LR: 0.0003 +[2026-02-27 15:46:25] (step=0004882) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 0.9551946781451771, LR: 0.0003 +[2026-02-27 15:46:33] (step=0004883) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 0.9553903345724907, LR: 0.0003 +[2026-02-27 15:46:41] (step=0004884) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.9555859909998043, LR: 0.0003 +[2026-02-27 15:46:48] (step=0004885) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.955781647427118, LR: 0.0003 +[2026-02-27 15:46:56] (step=0004886) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.9559773038544316, LR: 0.0003 +[2026-02-27 15:47:04] (step=0004887) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 0.9561729602817453, LR: 0.0003 +[2026-02-27 15:47:12] (step=0004888) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 0.9563686167090589, LR: 0.0003 +[2026-02-27 15:47:20] (step=0004889) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 0.9565642731363725, LR: 0.0003 +[2026-02-27 15:47:28] (step=0004890) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.9567599295636862, LR: 0.0003 +[2026-02-27 15:47:35] (step=0004891) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 0.9569555859909998, LR: 0.0003 +[2026-02-27 15:47:43] (step=0004892) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 0.9571512424183134, LR: 0.0003 +[2026-02-27 15:47:51] (step=0004893) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 0.9573468988456271, LR: 0.0003 +[2026-02-27 15:47:59] (step=0004894) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 0.9575425552729407, LR: 0.0003 +[2026-02-27 15:48:07] (step=0004895) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.9577382117002543, LR: 0.0003 +[2026-02-27 15:48:15] (step=0004896) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.957933868127568, LR: 0.0003 +[2026-02-27 15:48:22] (step=0004897) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 0.9581295245548817, LR: 0.0003 +[2026-02-27 15:48:30] (step=0004898) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 0.9583251809821952, LR: 0.0003 +[2026-02-27 15:48:38] (step=0004899) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 0.9585208374095089, LR: 0.0003 +[2026-02-27 15:48:46] (step=0004900) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 0.9587164938368226, LR: 0.0003 +[2026-02-27 15:48:54] (step=0004901) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 0.9589121502641362, LR: 0.0003 +[2026-02-27 15:49:02] (step=0004902) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.9591078066914498, LR: 0.0003 +[2026-02-27 15:49:10] (step=0004903) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 0.9593034631187635, LR: 0.0003 +[2026-02-27 15:49:17] (step=0004904) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 0.9594991195460771, LR: 0.0003 +[2026-02-27 15:49:25] (step=0004905) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.9596947759733907, LR: 0.0003 +[2026-02-27 15:49:33] (step=0004906) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 0.9598904324007044, LR: 0.0003 +[2026-02-27 15:49:41] (step=0004907) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 0.960086088828018, LR: 0.0003 +[2026-02-27 15:49:49] (step=0004908) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 0.9602817452553316, LR: 0.0003 +[2026-02-27 15:49:57] (step=0004909) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 0.9604774016826453, LR: 0.0003 +[2026-02-27 15:50:04] (step=0004910) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 0.960673058109959, LR: 0.0003 +[2026-02-27 15:50:12] (step=0004911) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.9608687145372725, LR: 0.0003 +[2026-02-27 15:50:20] (step=0004912) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.9610643709645862, LR: 0.0003 +[2026-02-27 15:50:28] (step=0004913) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.9612600273918999, LR: 0.0003 +[2026-02-27 15:50:36] (step=0004914) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.9614556838192134, LR: 0.0003 +[2026-02-27 15:50:44] (step=0004915) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 0.9616513402465271, LR: 0.0003 +[2026-02-27 15:50:51] (step=0004916) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.9618469966738408, LR: 0.0003 +[2026-02-27 15:50:59] (step=0004917) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.9620426531011543, LR: 0.0003 +[2026-02-27 15:51:07] (step=0004918) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.962238309528468, LR: 0.0003 +[2026-02-27 15:51:15] (step=0004919) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 0.9624339659557817, LR: 0.0003 +[2026-02-27 15:51:23] (step=0004920) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 0.9626296223830952, LR: 0.0003 +[2026-02-27 15:51:31] (step=0004921) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.9628252788104089, LR: 0.0003 +[2026-02-27 15:51:38] (step=0004922) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 0.9630209352377226, LR: 0.0003 +[2026-02-27 15:51:46] (step=0004923) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.9632165916650361, LR: 0.0003 +[2026-02-27 15:51:54] (step=0004924) Train Loss: 0.4750, Train Steps/Sec: 0.13, Epoch: 0.9634122480923498, LR: 0.0003 +[2026-02-27 15:52:02] (step=0004925) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.9636079045196635, LR: 0.0003 +[2026-02-27 15:52:10] (step=0004926) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 0.9638035609469771, LR: 0.0003 +[2026-02-27 15:52:18] (step=0004927) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 0.9639992173742907, LR: 0.0003 +[2026-02-27 15:52:25] (step=0004928) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 0.9641948738016044, LR: 0.0003 +[2026-02-27 15:52:33] (step=0004929) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 0.9643905302289181, LR: 0.0003 +[2026-02-27 15:52:41] (step=0004930) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.9645861866562316, LR: 0.0003 +[2026-02-27 15:52:49] (step=0004931) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 0.9647818430835453, LR: 0.0003 +[2026-02-27 15:52:57] (step=0004932) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.964977499510859, LR: 0.0003 +[2026-02-27 15:53:05] (step=0004933) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 0.9651731559381725, LR: 0.0003 +[2026-02-27 15:53:13] (step=0004934) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 0.9653688123654862, LR: 0.0003 +[2026-02-27 15:53:20] (step=0004935) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.9655644687927999, LR: 0.0003 +[2026-02-27 15:53:28] (step=0004936) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.9657601252201135, LR: 0.0003 +[2026-02-27 15:53:36] (step=0004937) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 0.9659557816474271, LR: 0.0003 +[2026-02-27 15:53:44] (step=0004938) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.9661514380747408, LR: 0.0003 +[2026-02-27 15:53:52] (step=0004939) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.9663470945020544, LR: 0.0003 +[2026-02-27 15:54:00] (step=0004940) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.966542750929368, LR: 0.0003 +[2026-02-27 15:54:07] (step=0004941) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 0.9667384073566817, LR: 0.0003 +[2026-02-27 15:54:15] (step=0004942) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 0.9669340637839953, LR: 0.0003 +[2026-02-27 15:54:23] (step=0004943) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 0.9671297202113089, LR: 0.0003 +[2026-02-27 15:54:31] (step=0004944) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 0.9673253766386226, LR: 0.0003 +[2026-02-27 15:54:39] (step=0004945) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.9675210330659362, LR: 0.0003 +[2026-02-27 15:54:47] (step=0004946) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 0.9677166894932498, LR: 0.0003 +[2026-02-27 15:54:54] (step=0004947) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 0.9679123459205635, LR: 0.0003 +[2026-02-27 15:55:02] (step=0004948) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 0.9681080023478771, LR: 0.0003 +[2026-02-27 15:55:10] (step=0004949) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 0.9683036587751908, LR: 0.0003 +[2026-02-27 15:55:18] (step=0004950) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 0.9684993152025044, LR: 0.0003 +[2026-02-27 15:55:26] (step=0004951) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 0.968694971629818, LR: 0.0003 +[2026-02-27 15:55:34] (step=0004952) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 0.9688906280571317, LR: 0.0003 +[2026-02-27 15:55:42] (step=0004953) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 0.9690862844844453, LR: 0.0003 +[2026-02-27 15:55:49] (step=0004954) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 0.9692819409117589, LR: 0.0003 +[2026-02-27 15:55:57] (step=0004955) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 0.9694775973390726, LR: 0.0003 +[2026-02-27 15:56:05] (step=0004956) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.9696732537663862, LR: 0.0003 +[2026-02-27 15:56:13] (step=0004957) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 0.9698689101936999, LR: 0.0003 +[2026-02-27 15:56:21] (step=0004958) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 0.9700645666210135, LR: 0.0003 +[2026-02-27 15:56:29] (step=0004959) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 0.9702602230483272, LR: 0.0003 +[2026-02-27 15:56:36] (step=0004960) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 0.9704558794756408, LR: 0.0003 +[2026-02-27 15:56:44] (step=0004961) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 0.9706515359029544, LR: 0.0003 +[2026-02-27 15:56:52] (step=0004962) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 0.9708471923302681, LR: 0.0003 +[2026-02-27 15:57:00] (step=0004963) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.9710428487575817, LR: 0.0003 +[2026-02-27 15:57:08] (step=0004964) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.9712385051848953, LR: 0.0003 +[2026-02-27 15:57:16] (step=0004965) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.971434161612209, LR: 0.0003 +[2026-02-27 15:57:23] (step=0004966) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 0.9716298180395226, LR: 0.0003 +[2026-02-27 15:57:31] (step=0004967) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.9718254744668362, LR: 0.0003 +[2026-02-27 15:57:39] (step=0004968) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.9720211308941499, LR: 0.0003 +[2026-02-27 15:57:47] (step=0004969) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.9722167873214635, LR: 0.0003 +[2026-02-27 15:57:55] (step=0004970) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 0.9724124437487771, LR: 0.0003 +[2026-02-27 15:58:03] (step=0004971) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 0.9726081001760908, LR: 0.0003 +[2026-02-27 15:58:10] (step=0004972) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 0.9728037566034045, LR: 0.0003 +[2026-02-27 15:58:18] (step=0004973) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.972999413030718, LR: 0.0003 +[2026-02-27 15:58:26] (step=0004974) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 0.9731950694580317, LR: 0.0003 +[2026-02-27 15:58:34] (step=0004975) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 0.9733907258853454, LR: 0.0003 +[2026-02-27 15:58:42] (step=0004976) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 0.9735863823126589, LR: 0.0003 +[2026-02-27 15:58:50] (step=0004977) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 0.9737820387399726, LR: 0.0003 +[2026-02-27 15:58:57] (step=0004978) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 0.9739776951672863, LR: 0.0003 +[2026-02-27 15:59:05] (step=0004979) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.9741733515945998, LR: 0.0003 +[2026-02-27 15:59:13] (step=0004980) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 0.9743690080219135, LR: 0.0003 +[2026-02-27 15:59:21] (step=0004981) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.9745646644492272, LR: 0.0003 +[2026-02-27 15:59:29] (step=0004982) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 0.9747603208765407, LR: 0.0003 +[2026-02-27 15:59:37] (step=0004983) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 0.9749559773038544, LR: 0.0003 +[2026-02-27 15:59:45] (step=0004984) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.9751516337311681, LR: 0.0003 +[2026-02-27 15:59:52] (step=0004985) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.9753472901584818, LR: 0.0003 +[2026-02-27 16:00:00] (step=0004986) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.9755429465857953, LR: 0.0003 +[2026-02-27 16:00:08] (step=0004987) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 0.975738603013109, LR: 0.0003 +[2026-02-27 16:00:16] (step=0004988) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 0.9759342594404227, LR: 0.0003 +[2026-02-27 16:00:24] (step=0004989) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 0.9761299158677362, LR: 0.0003 +[2026-02-27 16:00:32] (step=0004990) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 0.9763255722950499, LR: 0.0003 +[2026-02-27 16:00:39] (step=0004991) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 0.9765212287223636, LR: 0.0003 +[2026-02-27 16:00:47] (step=0004992) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 0.9767168851496771, LR: 0.0003 +[2026-02-27 16:00:55] (step=0004993) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 0.9769125415769908, LR: 0.0003 +[2026-02-27 16:01:03] (step=0004994) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 0.9771081980043045, LR: 0.0003 +[2026-02-27 16:01:11] (step=0004995) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 0.977303854431618, LR: 0.0003 +[2026-02-27 16:01:19] (step=0004996) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.9774995108589317, LR: 0.0003 +[2026-02-27 16:01:27] (step=0004997) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.9776951672862454, LR: 0.0003 +[2026-02-27 16:01:34] (step=0004998) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 0.977890823713559, LR: 0.0003 +[2026-02-27 16:01:42] (step=0004999) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 0.9780864801408726, LR: 0.0003 +[2026-02-27 16:01:50] (step=0005000) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 0.9782821365681863, LR: 0.0003 +[2026-02-27 16:01:50] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0005000/ +[2026-02-27 16:01:58] (step=0005001) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.9784777929954999, LR: 0.0003 +[2026-02-27 16:02:06] (step=0005002) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 0.9786734494228135, LR: 0.0003 +[2026-02-27 16:02:14] (step=0005003) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.9788691058501272, LR: 0.0003 +[2026-02-27 16:02:21] (step=0005004) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 0.9790647622774408, LR: 0.0003 +[2026-02-27 16:02:29] (step=0005005) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.9792604187047544, LR: 0.0003 +[2026-02-27 16:02:37] (step=0005006) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 0.9794560751320681, LR: 0.0003 +[2026-02-27 16:02:45] (step=0005007) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 0.9796517315593817, LR: 0.0003 +[2026-02-27 16:02:53] (step=0005008) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 0.9798473879866954, LR: 0.0003 +[2026-02-27 16:03:01] (step=0005009) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.980043044414009, LR: 0.0003 +[2026-02-27 16:03:08] (step=0005010) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 0.9802387008413226, LR: 0.0003 +[2026-02-27 16:03:16] (step=0005011) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.9804343572686363, LR: 0.0003 +[2026-02-27 16:03:24] (step=0005012) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 0.9806300136959499, LR: 0.0003 +[2026-02-27 16:03:32] (step=0005013) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 0.9808256701232636, LR: 0.0003 +[2026-02-27 16:03:40] (step=0005014) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 0.9810213265505772, LR: 0.0003 +[2026-02-27 16:03:48] (step=0005015) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.9812169829778908, LR: 0.0003 +[2026-02-27 16:03:56] (step=0005016) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 0.9814126394052045, LR: 0.0003 +[2026-02-27 16:04:03] (step=0005017) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.9816082958325181, LR: 0.0003 +[2026-02-27 16:04:11] (step=0005018) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.9818039522598317, LR: 0.0003 +[2026-02-27 16:04:19] (step=0005019) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 0.9819996086871454, LR: 0.0003 +[2026-02-27 16:04:27] (step=0005020) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 0.982195265114459, LR: 0.0003 +[2026-02-27 16:04:35] (step=0005021) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 0.9823909215417727, LR: 0.0003 +[2026-02-27 16:04:43] (step=0005022) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 0.9825865779690863, LR: 0.0003 +[2026-02-27 16:04:50] (step=0005023) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 0.9827822343963999, LR: 0.0003 +[2026-02-27 16:04:58] (step=0005024) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 0.9829778908237136, LR: 0.0003 +[2026-02-27 16:05:06] (step=0005025) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 0.9831735472510272, LR: 0.0003 +[2026-02-27 16:05:14] (step=0005026) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.9833692036783408, LR: 0.0003 +[2026-02-27 16:05:22] (step=0005027) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 0.9835648601056545, LR: 0.0003 +[2026-02-27 16:05:30] (step=0005028) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.9837605165329681, LR: 0.0003 +[2026-02-27 16:05:38] (step=0005029) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 0.9839561729602817, LR: 0.0003 +[2026-02-27 16:05:45] (step=0005030) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.9841518293875954, LR: 0.0003 +[2026-02-27 16:05:53] (step=0005031) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.984347485814909, LR: 0.0003 +[2026-02-27 16:06:01] (step=0005032) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 0.9845431422422226, LR: 0.0003 +[2026-02-27 16:06:09] (step=0005033) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 0.9847387986695363, LR: 0.0003 +[2026-02-27 16:06:17] (step=0005034) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 0.98493445509685, LR: 0.0003 +[2026-02-27 16:06:25] (step=0005035) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 0.9851301115241635, LR: 0.0003 +[2026-02-27 16:06:32] (step=0005036) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.9853257679514772, LR: 0.0003 +[2026-02-27 16:06:40] (step=0005037) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 0.9855214243787909, LR: 0.0003 +[2026-02-27 16:06:48] (step=0005038) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 0.9857170808061044, LR: 0.0003 +[2026-02-27 16:06:56] (step=0005039) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 0.9859127372334181, LR: 0.0003 +[2026-02-27 16:07:04] (step=0005040) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 0.9861083936607318, LR: 0.0003 +[2026-02-27 16:07:12] (step=0005041) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.9863040500880454, LR: 0.0003 +[2026-02-27 16:07:19] (step=0005042) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 0.986499706515359, LR: 0.0003 +[2026-02-27 16:07:27] (step=0005043) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 0.9866953629426727, LR: 0.0003 +[2026-02-27 16:07:35] (step=0005044) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 0.9868910193699864, LR: 0.0003 +[2026-02-27 16:07:43] (step=0005045) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 0.9870866757972999, LR: 0.0003 +[2026-02-27 16:07:51] (step=0005046) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.9872823322246136, LR: 0.0003 +[2026-02-27 16:07:59] (step=0005047) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 0.9874779886519273, LR: 0.0003 +[2026-02-27 16:08:07] (step=0005048) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 0.9876736450792408, LR: 0.0003 +[2026-02-27 16:08:14] (step=0005049) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 0.9878693015065545, LR: 0.0003 +[2026-02-27 16:08:22] (step=0005050) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 0.9880649579338682, LR: 0.0003 +[2026-02-27 16:08:30] (step=0005051) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 0.9882606143611817, LR: 0.0003 +[2026-02-27 16:08:38] (step=0005052) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 0.9884562707884954, LR: 0.0003 +[2026-02-27 16:08:46] (step=0005053) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 0.9886519272158091, LR: 0.0003 +[2026-02-27 16:08:54] (step=0005054) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 0.9888475836431226, LR: 0.0003 +[2026-02-27 16:09:01] (step=0005055) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 0.9890432400704363, LR: 0.0003 +[2026-02-27 16:09:09] (step=0005056) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.98923889649775, LR: 0.0003 +[2026-02-27 16:09:17] (step=0005057) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 0.9894345529250635, LR: 0.0003 +[2026-02-27 16:09:25] (step=0005058) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 0.9896302093523772, LR: 0.0003 +[2026-02-27 16:09:33] (step=0005059) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.9898258657796909, LR: 0.0003 +[2026-02-27 16:09:41] (step=0005060) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 0.9900215222070045, LR: 0.0003 +[2026-02-27 16:09:49] (step=0005061) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 0.9902171786343181, LR: 0.0003 +[2026-02-27 16:09:56] (step=0005062) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 0.9904128350616318, LR: 0.0003 +[2026-02-27 16:10:04] (step=0005063) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.9906084914889454, LR: 0.0003 +[2026-02-27 16:10:12] (step=0005064) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.990804147916259, LR: 0.0003 +[2026-02-27 16:10:20] (step=0005065) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 0.9909998043435727, LR: 0.0003 +[2026-02-27 16:10:28] (step=0005066) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 0.9911954607708863, LR: 0.0003 +[2026-02-27 16:10:36] (step=0005067) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 0.9913911171982, LR: 0.0003 +[2026-02-27 16:10:43] (step=0005068) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 0.9915867736255136, LR: 0.0003 +[2026-02-27 16:10:51] (step=0005069) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 0.9917824300528273, LR: 0.0003 +[2026-02-27 16:10:59] (step=0005070) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 0.9919780864801409, LR: 0.0003 +[2026-02-27 16:11:07] (step=0005071) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 0.9921737429074545, LR: 0.0003 +[2026-02-27 16:11:15] (step=0005072) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 0.9923693993347682, LR: 0.0003 +[2026-02-27 16:11:23] (step=0005073) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.9925650557620818, LR: 0.0003 +[2026-02-27 16:11:30] (step=0005074) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 0.9927607121893954, LR: 0.0003 +[2026-02-27 16:11:38] (step=0005075) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 0.9929563686167091, LR: 0.0003 +[2026-02-27 16:11:46] (step=0005076) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 0.9931520250440227, LR: 0.0003 +[2026-02-27 16:11:54] (step=0005077) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 0.9933476814713363, LR: 0.0003 +[2026-02-27 16:12:02] (step=0005078) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 0.99354333789865, LR: 0.0003 +[2026-02-27 16:12:10] (step=0005079) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 0.9937389943259636, LR: 0.0003 +[2026-02-27 16:12:18] (step=0005080) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 0.9939346507532772, LR: 0.0003 +[2026-02-27 16:12:25] (step=0005081) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 0.9941303071805909, LR: 0.0003 +[2026-02-27 16:12:33] (step=0005082) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 0.9943259636079045, LR: 0.0003 +[2026-02-27 16:12:41] (step=0005083) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 0.9945216200352182, LR: 0.0003 +[2026-02-27 16:12:49] (step=0005084) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 0.9947172764625318, LR: 0.0003 +[2026-02-27 16:12:57] (step=0005085) Train Loss: 0.4765, Train Steps/Sec: 0.13, Epoch: 0.9949129328898454, LR: 0.0003 +[2026-02-27 16:13:05] (step=0005086) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.9951085893171591, LR: 0.0003 +[2026-02-27 16:13:12] (step=0005087) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 0.9953042457444727, LR: 0.0003 +[2026-02-27 16:13:20] (step=0005088) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 0.9954999021717863, LR: 0.0003 +[2026-02-27 16:13:28] (step=0005089) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 0.9956955585991, LR: 0.0003 +[2026-02-27 16:13:36] (step=0005090) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 0.9958912150264136, LR: 0.0003 +[2026-02-27 16:13:44] (step=0005091) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 0.9960868714537272, LR: 0.0003 +[2026-02-27 16:13:52] (step=0005092) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 0.9962825278810409, LR: 0.0003 +[2026-02-27 16:13:59] (step=0005093) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 0.9964781843083546, LR: 0.0003 +[2026-02-27 16:14:07] (step=0005094) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 0.9966738407356681, LR: 0.0003 +[2026-02-27 16:14:15] (step=0005095) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 0.9968694971629818, LR: 0.0003 +[2026-02-27 16:14:23] (step=0005096) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 0.9970651535902955, LR: 0.0003 +[2026-02-27 16:14:31] (step=0005097) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 0.9972608100176091, LR: 0.0003 +[2026-02-27 16:14:39] (step=0005098) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 0.9974564664449227, LR: 0.0003 +[2026-02-27 16:14:47] (step=0005099) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 0.9976521228722364, LR: 0.0003 +[2026-02-27 16:14:54] (step=0005100) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 0.99784777929955, LR: 0.0003 +[2026-02-27 16:15:02] (step=0005101) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 0.9980434357268636, LR: 0.0003 +[2026-02-27 16:15:10] (step=0005102) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 0.9982390921541773, LR: 0.0003 +[2026-02-27 16:15:18] (step=0005103) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 0.998434748581491, LR: 0.0003 +[2026-02-27 16:15:26] (step=0005104) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 0.9986304050088045, LR: 0.0003 +[2026-02-27 16:15:34] (step=0005105) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 0.9988260614361182, LR: 0.0003 +[2026-02-27 16:15:41] (step=0005106) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 0.9990217178634319, LR: 0.0003 +[2026-02-27 16:15:49] (step=0005107) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 0.9992173742907454, LR: 0.0003 +[2026-02-27 16:15:57] (step=0005108) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 0.9994130307180591, LR: 0.0003 +[2026-02-27 16:16:05] (step=0005109) Train Loss: 0.4747, Train Steps/Sec: 0.13, Epoch: 0.9996086871453728, LR: 0.0003 +[2026-02-27 16:16:13] (step=0005110) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 0.9998043435726863, LR: 0.0003 +[2026-02-27 16:16:21] (step=0005111) Train Loss: 0.4547, Train Steps/Sec: 0.12, Epoch: 1.0, LR: 0.0003 +[2026-02-27 16:16:21] Beginning epoch 1... +[2026-02-27 16:16:30] (step=0005112) Train Loss: 0.4583, Train Steps/Sec: 0.11, Epoch: 1.0001956564273136, LR: 0.0003 +[2026-02-27 16:16:38] (step=0005113) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.0003913128546273, LR: 0.0003 +[2026-02-27 16:16:46] (step=0005114) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.000586969281941, LR: 0.0003 +[2026-02-27 16:16:54] (step=0005115) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.0007826257092545, LR: 0.0003 +[2026-02-27 16:17:02] (step=0005116) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.0009782821365683, LR: 0.0003 +[2026-02-27 16:17:10] (step=0005117) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.0011739385638818, LR: 0.0003 +[2026-02-27 16:17:17] (step=0005118) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.0013695949911954, LR: 0.0003 +[2026-02-27 16:17:25] (step=0005119) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.0015652514185092, LR: 0.0003 +[2026-02-27 16:17:33] (step=0005120) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.0017609078458227, LR: 0.0003 +[2026-02-27 16:17:41] (step=0005121) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 1.0019565642731363, LR: 0.0003 +[2026-02-27 16:17:49] (step=0005122) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.00215222070045, LR: 0.0003 +[2026-02-27 16:17:57] (step=0005123) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.0023478771277636, LR: 0.0003 +[2026-02-27 16:18:05] (step=0005124) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 1.0025435335550772, LR: 0.0003 +[2026-02-27 16:18:12] (step=0005125) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.002739189982391, LR: 0.0003 +[2026-02-27 16:18:20] (step=0005126) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.0029348464097045, LR: 0.0003 +[2026-02-27 16:18:28] (step=0005127) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.003130502837018, LR: 0.0003 +[2026-02-27 16:18:36] (step=0005128) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.0033261592643319, LR: 0.0003 +[2026-02-27 16:18:44] (step=0005129) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 1.0035218156916454, LR: 0.0003 +[2026-02-27 16:18:52] (step=0005130) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.003717472118959, LR: 0.0003 +[2026-02-27 16:18:59] (step=0005131) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.0039131285462728, LR: 0.0003 +[2026-02-27 16:19:07] (step=0005132) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 1.0041087849735864, LR: 0.0003 +[2026-02-27 16:19:15] (step=0005133) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.0043044414009, LR: 0.0003 +[2026-02-27 16:19:23] (step=0005134) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.0045000978282137, LR: 0.0003 +[2026-02-27 16:19:31] (step=0005135) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.0046957542555273, LR: 0.0003 +[2026-02-27 16:19:39] (step=0005136) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.0048914106828408, LR: 0.0003 +[2026-02-27 16:19:46] (step=0005137) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.0050870671101546, LR: 0.0003 +[2026-02-27 16:19:54] (step=0005138) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.0052827235374682, LR: 0.0003 +[2026-02-27 16:20:02] (step=0005139) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.0054783799647817, LR: 0.0003 +[2026-02-27 16:20:10] (step=0005140) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 1.0056740363920955, LR: 0.0003 +[2026-02-27 16:20:18] (step=0005141) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.005869692819409, LR: 0.0003 +[2026-02-27 16:20:26] (step=0005142) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 1.0060653492467229, LR: 0.0003 +[2026-02-27 16:20:33] (step=0005143) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 1.0062610056740364, LR: 0.0003 +[2026-02-27 16:20:41] (step=0005144) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.00645666210135, LR: 0.0003 +[2026-02-27 16:20:49] (step=0005145) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.0066523185286638, LR: 0.0003 +[2026-02-27 16:20:57] (step=0005146) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 1.0068479749559773, LR: 0.0003 +[2026-02-27 16:21:05] (step=0005147) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.007043631383291, LR: 0.0003 +[2026-02-27 16:21:13] (step=0005148) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 1.0072392878106047, LR: 0.0003 +[2026-02-27 16:21:20] (step=0005149) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.0074349442379182, LR: 0.0003 +[2026-02-27 16:21:28] (step=0005150) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.0076306006652318, LR: 0.0003 +[2026-02-27 16:21:36] (step=0005151) Train Loss: 0.4640, Train Steps/Sec: 0.12, Epoch: 1.0078262570925456, LR: 0.0003 +[2026-02-27 16:21:44] (step=0005152) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 1.0080219135198591, LR: 0.0003 +[2026-02-27 16:21:52] (step=0005153) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.0082175699471727, LR: 0.0003 +[2026-02-27 16:22:00] (step=0005154) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.0084132263744865, LR: 0.0003 +[2026-02-27 16:22:08] (step=0005155) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.0086088828018, LR: 0.0003 +[2026-02-27 16:22:15] (step=0005156) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.0088045392291136, LR: 0.0003 +[2026-02-27 16:22:23] (step=0005157) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.0090001956564274, LR: 0.0003 +[2026-02-27 16:22:31] (step=0005158) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.009195852083741, LR: 0.0003 +[2026-02-27 16:22:39] (step=0005159) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 1.0093915085110545, LR: 0.0003 +[2026-02-27 16:22:47] (step=0005160) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.0095871649383683, LR: 0.0003 +[2026-02-27 16:22:55] (step=0005161) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.0097828213656819, LR: 0.0003 +[2026-02-27 16:23:02] (step=0005162) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.0099784777929954, LR: 0.0003 +[2026-02-27 16:23:10] (step=0005163) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.0101741342203092, LR: 0.0003 +[2026-02-27 16:23:18] (step=0005164) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.0103697906476228, LR: 0.0003 +[2026-02-27 16:23:26] (step=0005165) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.0105654470749363, LR: 0.0003 +[2026-02-27 16:23:34] (step=0005166) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.0107611035022501, LR: 0.0003 +[2026-02-27 16:23:42] (step=0005167) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.0109567599295637, LR: 0.0003 +[2026-02-27 16:23:49] (step=0005168) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.0111524163568772, LR: 0.0003 +[2026-02-27 16:23:57] (step=0005169) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.011348072784191, LR: 0.0003 +[2026-02-27 16:24:05] (step=0005170) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.0115437292115046, LR: 0.0003 +[2026-02-27 16:24:13] (step=0005171) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.0117393856388182, LR: 0.0003 +[2026-02-27 16:24:21] (step=0005172) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 1.011935042066132, LR: 0.0003 +[2026-02-27 16:24:29] (step=0005173) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.0121306984934455, LR: 0.0003 +[2026-02-27 16:24:37] (step=0005174) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.012326354920759, LR: 0.0003 +[2026-02-27 16:24:44] (step=0005175) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 1.0125220113480728, LR: 0.0003 +[2026-02-27 16:24:52] (step=0005176) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 1.0127176677753864, LR: 0.0003 +[2026-02-27 16:25:00] (step=0005177) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.0129133242027, LR: 0.0003 +[2026-02-27 16:25:08] (step=0005178) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 1.0131089806300138, LR: 0.0003 +[2026-02-27 16:25:16] (step=0005179) Train Loss: 0.4751, Train Steps/Sec: 0.13, Epoch: 1.0133046370573273, LR: 0.0003 +[2026-02-27 16:25:24] (step=0005180) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 1.0135002934846409, LR: 0.0003 +[2026-02-27 16:25:31] (step=0005181) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.0136959499119547, LR: 0.0003 +[2026-02-27 16:25:39] (step=0005182) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 1.0138916063392682, LR: 0.0003 +[2026-02-27 16:25:47] (step=0005183) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.0140872627665818, LR: 0.0003 +[2026-02-27 16:25:55] (step=0005184) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 1.0142829191938956, LR: 0.0003 +[2026-02-27 16:26:03] (step=0005185) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.0144785756212091, LR: 0.0003 +[2026-02-27 16:26:11] (step=0005186) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.0146742320485227, LR: 0.0003 +[2026-02-27 16:26:19] (step=0005187) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.0148698884758365, LR: 0.0003 +[2026-02-27 16:26:26] (step=0005188) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.01506554490315, LR: 0.0003 +[2026-02-27 16:26:34] (step=0005189) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 1.0152612013304636, LR: 0.0003 +[2026-02-27 16:26:42] (step=0005190) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 1.0154568577577774, LR: 0.0003 +[2026-02-27 16:26:50] (step=0005191) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.015652514185091, LR: 0.0003 +[2026-02-27 16:26:58] (step=0005192) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 1.0158481706124045, LR: 0.0003 +[2026-02-27 16:27:06] (step=0005193) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 1.0160438270397183, LR: 0.0003 +[2026-02-27 16:27:13] (step=0005194) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.0162394834670319, LR: 0.0003 +[2026-02-27 16:27:21] (step=0005195) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.0164351398943454, LR: 0.0003 +[2026-02-27 16:27:29] (step=0005196) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.0166307963216592, LR: 0.0003 +[2026-02-27 16:27:37] (step=0005197) Train Loss: 0.4726, Train Steps/Sec: 0.13, Epoch: 1.0168264527489728, LR: 0.0003 +[2026-02-27 16:27:45] (step=0005198) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.0170221091762865, LR: 0.0003 +[2026-02-27 16:27:53] (step=0005199) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.0172177656036, LR: 0.0003 +[2026-02-27 16:28:00] (step=0005200) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 1.0174134220309137, LR: 0.0003 +[2026-02-27 16:28:08] (step=0005201) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 1.0176090784582275, LR: 0.0003 +[2026-02-27 16:28:16] (step=0005202) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.017804734885541, LR: 0.0003 +[2026-02-27 16:28:24] (step=0005203) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 1.0180003913128546, LR: 0.0003 +[2026-02-27 16:28:32] (step=0005204) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 1.0181960477401684, LR: 0.0003 +[2026-02-27 16:28:40] (step=0005205) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.018391704167482, LR: 0.0003 +[2026-02-27 16:28:47] (step=0005206) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.0185873605947955, LR: 0.0003 +[2026-02-27 16:28:55] (step=0005207) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 1.0187830170221093, LR: 0.0003 +[2026-02-27 16:29:03] (step=0005208) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.0189786734494228, LR: 0.0003 +[2026-02-27 16:29:11] (step=0005209) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 1.0191743298767364, LR: 0.0003 +[2026-02-27 16:29:19] (step=0005210) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 1.0193699863040502, LR: 0.0003 +[2026-02-27 16:29:27] (step=0005211) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.0195656427313637, LR: 0.0003 +[2026-02-27 16:29:34] (step=0005212) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.0197612991586773, LR: 0.0003 +[2026-02-27 16:29:42] (step=0005213) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.019956955585991, LR: 0.0003 +[2026-02-27 16:29:50] (step=0005214) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.0201526120133046, LR: 0.0003 +[2026-02-27 16:29:58] (step=0005215) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 1.0203482684406182, LR: 0.0003 +[2026-02-27 16:30:06] (step=0005216) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.020543924867932, LR: 0.0003 +[2026-02-27 16:30:14] (step=0005217) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.0207395812952456, LR: 0.0003 +[2026-02-27 16:30:22] (step=0005218) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.0209352377225591, LR: 0.0003 +[2026-02-27 16:30:29] (step=0005219) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.021130894149873, LR: 0.0003 +[2026-02-27 16:30:37] (step=0005220) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.0213265505771865, LR: 0.0003 +[2026-02-27 16:30:45] (step=0005221) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.0215222070045, LR: 0.0003 +[2026-02-27 16:30:53] (step=0005222) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 1.0217178634318138, LR: 0.0003 +[2026-02-27 16:31:01] (step=0005223) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 1.0219135198591274, LR: 0.0003 +[2026-02-27 16:31:09] (step=0005224) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 1.022109176286441, LR: 0.0003 +[2026-02-27 16:31:16] (step=0005225) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 1.0223048327137547, LR: 0.0003 +[2026-02-27 16:31:24] (step=0005226) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.0225004891410683, LR: 0.0003 +[2026-02-27 16:31:32] (step=0005227) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.0226961455683818, LR: 0.0003 +[2026-02-27 16:31:40] (step=0005228) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.0228918019956956, LR: 0.0003 +[2026-02-27 16:31:48] (step=0005229) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.0230874584230092, LR: 0.0003 +[2026-02-27 16:31:56] (step=0005230) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.0232831148503228, LR: 0.0003 +[2026-02-27 16:32:04] (step=0005231) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 1.0234787712776365, LR: 0.0003 +[2026-02-27 16:32:11] (step=0005232) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 1.02367442770495, LR: 0.0003 +[2026-02-27 16:32:19] (step=0005233) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 1.0238700841322637, LR: 0.0003 +[2026-02-27 16:32:27] (step=0005234) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.0240657405595774, LR: 0.0003 +[2026-02-27 16:32:35] (step=0005235) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.024261396986891, LR: 0.0003 +[2026-02-27 16:32:43] (step=0005236) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.0244570534142046, LR: 0.0003 +[2026-02-27 16:32:51] (step=0005237) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.0246527098415183, LR: 0.0003 +[2026-02-27 16:32:58] (step=0005238) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 1.024848366268832, LR: 0.0003 +[2026-02-27 16:33:06] (step=0005239) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 1.0250440226961455, LR: 0.0003 +[2026-02-27 16:33:14] (step=0005240) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.0252396791234593, LR: 0.0003 +[2026-02-27 16:33:22] (step=0005241) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 1.0254353355507728, LR: 0.0003 +[2026-02-27 16:33:30] (step=0005242) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.0256309919780864, LR: 0.0003 +[2026-02-27 16:33:38] (step=0005243) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 1.0258266484054002, LR: 0.0003 +[2026-02-27 16:33:45] (step=0005244) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 1.0260223048327137, LR: 0.0003 +[2026-02-27 16:33:53] (step=0005245) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.0262179612600273, LR: 0.0003 +[2026-02-27 16:34:01] (step=0005246) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 1.026413617687341, LR: 0.0003 +[2026-02-27 16:34:09] (step=0005247) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.0266092741146546, LR: 0.0003 +[2026-02-27 16:34:17] (step=0005248) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.0268049305419682, LR: 0.0003 +[2026-02-27 16:34:25] (step=0005249) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 1.027000586969282, LR: 0.0003 +[2026-02-27 16:34:33] (step=0005250) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 1.0271962433965955, LR: 0.0003 +[2026-02-27 16:34:40] (step=0005251) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 1.027391899823909, LR: 0.0003 +[2026-02-27 16:34:48] (step=0005252) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.0275875562512229, LR: 0.0003 +[2026-02-27 16:34:56] (step=0005253) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.0277832126785365, LR: 0.0003 +[2026-02-27 16:35:04] (step=0005254) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.0279788691058502, LR: 0.0003 +[2026-02-27 16:35:12] (step=0005255) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.0281745255331638, LR: 0.0003 +[2026-02-27 16:35:20] (step=0005256) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 1.0283701819604774, LR: 0.0003 +[2026-02-27 16:35:27] (step=0005257) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.0285658383877911, LR: 0.0003 +[2026-02-27 16:35:35] (step=0005258) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 1.0287614948151047, LR: 0.0003 +[2026-02-27 16:35:43] (step=0005259) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 1.0289571512424183, LR: 0.0003 +[2026-02-27 16:35:51] (step=0005260) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 1.029152807669732, LR: 0.0003 +[2026-02-27 16:35:59] (step=0005261) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 1.0293484640970456, LR: 0.0003 +[2026-02-27 16:36:07] (step=0005262) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 1.0295441205243592, LR: 0.0003 +[2026-02-27 16:36:15] (step=0005263) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 1.029739776951673, LR: 0.0003 +[2026-02-27 16:36:22] (step=0005264) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.0299354333789865, LR: 0.0003 +[2026-02-27 16:36:30] (step=0005265) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.0301310898063, LR: 0.0003 +[2026-02-27 16:36:38] (step=0005266) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 1.0303267462336139, LR: 0.0003 +[2026-02-27 16:36:46] (step=0005267) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.0305224026609274, LR: 0.0003 +[2026-02-27 16:36:54] (step=0005268) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 1.030718059088241, LR: 0.0003 +[2026-02-27 16:37:02] (step=0005269) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.0309137155155548, LR: 0.0003 +[2026-02-27 16:37:09] (step=0005270) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.0311093719428683, LR: 0.0003 +[2026-02-27 16:37:17] (step=0005271) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 1.031305028370182, LR: 0.0003 +[2026-02-27 16:37:25] (step=0005272) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.0315006847974957, LR: 0.0003 +[2026-02-27 16:37:33] (step=0005273) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.0316963412248092, LR: 0.0003 +[2026-02-27 16:37:41] (step=0005274) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.0318919976521228, LR: 0.0003 +[2026-02-27 16:37:49] (step=0005275) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.0320876540794366, LR: 0.0003 +[2026-02-27 16:37:57] (step=0005276) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 1.0322833105067502, LR: 0.0003 +[2026-02-27 16:38:04] (step=0005277) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 1.0324789669340637, LR: 0.0003 +[2026-02-27 16:38:12] (step=0005278) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 1.0326746233613775, LR: 0.0003 +[2026-02-27 16:38:20] (step=0005279) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 1.032870279788691, LR: 0.0003 +[2026-02-27 16:38:28] (step=0005280) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 1.0330659362160046, LR: 0.0003 +[2026-02-27 16:38:36] (step=0005281) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 1.0332615926433184, LR: 0.0003 +[2026-02-27 16:38:44] (step=0005282) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 1.033457249070632, LR: 0.0003 +[2026-02-27 16:38:52] (step=0005283) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 1.0336529054979455, LR: 0.0003 +[2026-02-27 16:38:59] (step=0005284) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 1.0338485619252593, LR: 0.0003 +[2026-02-27 16:39:07] (step=0005285) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 1.0340442183525729, LR: 0.0003 +[2026-02-27 16:39:15] (step=0005286) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 1.0342398747798864, LR: 0.0003 +[2026-02-27 16:39:23] (step=0005287) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.0344355312072002, LR: 0.0003 +[2026-02-27 16:39:31] (step=0005288) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.0346311876345138, LR: 0.0003 +[2026-02-27 16:39:39] (step=0005289) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.0348268440618273, LR: 0.0003 +[2026-02-27 16:39:46] (step=0005290) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.0350225004891411, LR: 0.0003 +[2026-02-27 16:39:54] (step=0005291) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.0352181569164547, LR: 0.0003 +[2026-02-27 16:40:02] (step=0005292) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 1.0354138133437683, LR: 0.0003 +[2026-02-27 16:40:10] (step=0005293) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 1.035609469771082, LR: 0.0003 +[2026-02-27 16:40:18] (step=0005294) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 1.0358051261983956, LR: 0.0003 +[2026-02-27 16:40:26] (step=0005295) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 1.0360007826257092, LR: 0.0003 +[2026-02-27 16:40:34] (step=0005296) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.036196439053023, LR: 0.0003 +[2026-02-27 16:40:41] (step=0005297) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 1.0363920954803365, LR: 0.0003 +[2026-02-27 16:40:49] (step=0005298) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.03658775190765, LR: 0.0003 +[2026-02-27 16:40:57] (step=0005299) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 1.0367834083349639, LR: 0.0003 +[2026-02-27 16:41:05] (step=0005300) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.0369790647622774, LR: 0.0003 +[2026-02-27 16:41:13] (step=0005301) Train Loss: 0.4610, Train Steps/Sec: 0.12, Epoch: 1.037174721189591, LR: 0.0003 +[2026-02-27 16:41:21] (step=0005302) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 1.0373703776169048, LR: 0.0003 +[2026-02-27 16:41:29] (step=0005303) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.0375660340442183, LR: 0.0003 +[2026-02-27 16:41:36] (step=0005304) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.0377616904715319, LR: 0.0003 +[2026-02-27 16:41:44] (step=0005305) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 1.0379573468988457, LR: 0.0003 +[2026-02-27 16:41:52] (step=0005306) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.0381530033261592, LR: 0.0003 +[2026-02-27 16:42:00] (step=0005307) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 1.0383486597534728, LR: 0.0003 +[2026-02-27 16:42:08] (step=0005308) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 1.0385443161807866, LR: 0.0003 +[2026-02-27 16:42:16] (step=0005309) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.0387399726081001, LR: 0.0003 +[2026-02-27 16:42:23] (step=0005310) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 1.038935629035414, LR: 0.0003 +[2026-02-27 16:42:31] (step=0005311) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 1.0391312854627275, LR: 0.0003 +[2026-02-27 16:42:39] (step=0005312) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 1.039326941890041, LR: 0.0003 +[2026-02-27 16:42:47] (step=0005313) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 1.0395225983173548, LR: 0.0003 +[2026-02-27 16:42:55] (step=0005314) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.0397182547446684, LR: 0.0003 +[2026-02-27 16:43:03] (step=0005315) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.039913911171982, LR: 0.0003 +[2026-02-27 16:43:11] (step=0005316) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 1.0401095675992957, LR: 0.0003 +[2026-02-27 16:43:18] (step=0005317) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 1.0403052240266093, LR: 0.0003 +[2026-02-27 16:43:26] (step=0005318) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.0405008804539229, LR: 0.0003 +[2026-02-27 16:43:34] (step=0005319) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.0406965368812366, LR: 0.0003 +[2026-02-27 16:43:42] (step=0005320) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 1.0408921933085502, LR: 0.0003 +[2026-02-27 16:43:50] (step=0005321) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 1.0410878497358638, LR: 0.0003 +[2026-02-27 16:43:58] (step=0005322) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 1.0412835061631776, LR: 0.0003 +[2026-02-27 16:44:06] (step=0005323) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.0414791625904911, LR: 0.0003 +[2026-02-27 16:44:13] (step=0005324) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 1.0416748190178047, LR: 0.0003 +[2026-02-27 16:44:21] (step=0005325) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.0418704754451185, LR: 0.0003 +[2026-02-27 16:44:29] (step=0005326) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.042066131872432, LR: 0.0003 +[2026-02-27 16:44:37] (step=0005327) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 1.0422617882997456, LR: 0.0003 +[2026-02-27 16:44:45] (step=0005328) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.0424574447270594, LR: 0.0003 +[2026-02-27 16:44:53] (step=0005329) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.042653101154373, LR: 0.0003 +[2026-02-27 16:45:01] (step=0005330) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.0428487575816865, LR: 0.0003 +[2026-02-27 16:45:08] (step=0005331) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.0430444140090003, LR: 0.0003 +[2026-02-27 16:45:16] (step=0005332) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.0432400704363138, LR: 0.0003 +[2026-02-27 16:45:24] (step=0005333) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 1.0434357268636274, LR: 0.0003 +[2026-02-27 16:45:32] (step=0005334) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 1.0436313832909412, LR: 0.0003 +[2026-02-27 16:45:40] (step=0005335) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.0438270397182547, LR: 0.0003 +[2026-02-27 16:45:48] (step=0005336) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 1.0440226961455683, LR: 0.0003 +[2026-02-27 16:45:55] (step=0005337) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.044218352572882, LR: 0.0003 +[2026-02-27 16:46:03] (step=0005338) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.0444140090001957, LR: 0.0003 +[2026-02-27 16:46:11] (step=0005339) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.0446096654275092, LR: 0.0003 +[2026-02-27 16:46:19] (step=0005340) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 1.044805321854823, LR: 0.0003 +[2026-02-27 16:46:27] (step=0005341) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.0450009782821366, LR: 0.0003 +[2026-02-27 16:46:35] (step=0005342) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.0451966347094501, LR: 0.0003 +[2026-02-27 16:46:43] (step=0005343) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.045392291136764, LR: 0.0003 +[2026-02-27 16:46:50] (step=0005344) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 1.0455879475640775, LR: 0.0003 +[2026-02-27 16:46:58] (step=0005345) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.045783603991391, LR: 0.0003 +[2026-02-27 16:47:06] (step=0005346) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 1.0459792604187048, LR: 0.0003 +[2026-02-27 16:47:14] (step=0005347) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.0461749168460184, LR: 0.0003 +[2026-02-27 16:47:22] (step=0005348) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.046370573273332, LR: 0.0003 +[2026-02-27 16:47:30] (step=0005349) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.0465662297006457, LR: 0.0003 +[2026-02-27 16:47:37] (step=0005350) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 1.0467618861279593, LR: 0.0003 +[2026-02-27 16:47:45] (step=0005351) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.0469575425552728, LR: 0.0003 +[2026-02-27 16:47:53] (step=0005352) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.0471531989825866, LR: 0.0003 +[2026-02-27 16:48:01] (step=0005353) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.0473488554099002, LR: 0.0003 +[2026-02-27 16:48:09] (step=0005354) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.0475445118372138, LR: 0.0003 +[2026-02-27 16:48:17] (step=0005355) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.0477401682645275, LR: 0.0003 +[2026-02-27 16:48:25] (step=0005356) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.047935824691841, LR: 0.0003 +[2026-02-27 16:48:33] (step=0005357) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 1.0481314811191547, LR: 0.0003 +[2026-02-27 16:48:40] (step=0005358) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 1.0483271375464684, LR: 0.0003 +[2026-02-27 16:48:48] (step=0005359) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.048522793973782, LR: 0.0003 +[2026-02-27 16:48:56] (step=0005360) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 1.0487184504010956, LR: 0.0003 +[2026-02-27 16:49:04] (step=0005361) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.0489141068284094, LR: 0.0003 +[2026-02-27 16:49:12] (step=0005362) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.049109763255723, LR: 0.0003 +[2026-02-27 16:49:20] (step=0005363) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.0493054196830365, LR: 0.0003 +[2026-02-27 16:49:27] (step=0005364) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 1.0495010761103503, LR: 0.0003 +[2026-02-27 16:49:35] (step=0005365) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 1.0496967325376638, LR: 0.0003 +[2026-02-27 16:49:43] (step=0005366) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.0498923889649776, LR: 0.0003 +[2026-02-27 16:49:51] (step=0005367) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 1.0500880453922912, LR: 0.0003 +[2026-02-27 16:49:59] (step=0005368) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 1.0502837018196047, LR: 0.0003 +[2026-02-27 16:50:07] (step=0005369) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 1.0504793582469185, LR: 0.0003 +[2026-02-27 16:50:14] (step=0005370) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.050675014674232, LR: 0.0003 +[2026-02-27 16:50:22] (step=0005371) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.0508706711015456, LR: 0.0003 +[2026-02-27 16:50:30] (step=0005372) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 1.0510663275288594, LR: 0.0003 +[2026-02-27 16:50:38] (step=0005373) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 1.051261983956173, LR: 0.0003 +[2026-02-27 16:50:46] (step=0005374) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.0514576403834865, LR: 0.0003 +[2026-02-27 16:50:54] (step=0005375) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.0516532968108003, LR: 0.0003 +[2026-02-27 16:51:02] (step=0005376) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.051848953238114, LR: 0.0003 +[2026-02-27 16:51:09] (step=0005377) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.0520446096654275, LR: 0.0003 +[2026-02-27 16:51:17] (step=0005378) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.0522402660927412, LR: 0.0003 +[2026-02-27 16:51:25] (step=0005379) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.0524359225200548, LR: 0.0003 +[2026-02-27 16:51:33] (step=0005380) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 1.0526315789473684, LR: 0.0003 +[2026-02-27 16:51:41] (step=0005381) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 1.0528272353746821, LR: 0.0003 +[2026-02-27 16:51:49] (step=0005382) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.0530228918019957, LR: 0.0003 +[2026-02-27 16:51:57] (step=0005383) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.0532185482293093, LR: 0.0003 +[2026-02-27 16:52:04] (step=0005384) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 1.053414204656623, LR: 0.0003 +[2026-02-27 16:52:12] (step=0005385) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.0536098610839366, LR: 0.0003 +[2026-02-27 16:52:20] (step=0005386) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.0538055175112502, LR: 0.0003 +[2026-02-27 16:52:28] (step=0005387) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 1.054001173938564, LR: 0.0003 +[2026-02-27 16:52:36] (step=0005388) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 1.0541968303658775, LR: 0.0003 +[2026-02-27 16:52:44] (step=0005389) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 1.054392486793191, LR: 0.0003 +[2026-02-27 16:52:52] (step=0005390) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 1.0545881432205049, LR: 0.0003 +[2026-02-27 16:52:59] (step=0005391) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 1.0547837996478184, LR: 0.0003 +[2026-02-27 16:53:07] (step=0005392) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.054979456075132, LR: 0.0003 +[2026-02-27 16:53:15] (step=0005393) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 1.0551751125024458, LR: 0.0003 +[2026-02-27 16:53:23] (step=0005394) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.0553707689297593, LR: 0.0003 +[2026-02-27 16:53:31] (step=0005395) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 1.055566425357073, LR: 0.0003 +[2026-02-27 16:53:39] (step=0005396) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 1.0557620817843867, LR: 0.0003 +[2026-02-27 16:53:46] (step=0005397) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 1.0559577382117002, LR: 0.0003 +[2026-02-27 16:53:54] (step=0005398) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 1.0561533946390138, LR: 0.0003 +[2026-02-27 16:54:02] (step=0005399) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.0563490510663276, LR: 0.0003 +[2026-02-27 16:54:10] (step=0005400) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.0565447074936412, LR: 0.0003 +[2026-02-27 16:54:18] (step=0005401) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 1.0567403639209547, LR: 0.0003 +[2026-02-27 16:54:26] (step=0005402) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 1.0569360203482685, LR: 0.0003 +[2026-02-27 16:54:34] (step=0005403) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.057131676775582, LR: 0.0003 +[2026-02-27 16:54:41] (step=0005404) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.0573273332028956, LR: 0.0003 +[2026-02-27 16:54:49] (step=0005405) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.0575229896302094, LR: 0.0003 +[2026-02-27 16:54:57] (step=0005406) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.057718646057523, LR: 0.0003 +[2026-02-27 16:55:05] (step=0005407) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.0579143024848365, LR: 0.0003 +[2026-02-27 16:55:13] (step=0005408) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.0581099589121503, LR: 0.0003 +[2026-02-27 16:55:21] (step=0005409) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.0583056153394639, LR: 0.0003 +[2026-02-27 16:55:29] (step=0005410) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.0585012717667774, LR: 0.0003 +[2026-02-27 16:55:36] (step=0005411) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.0586969281940912, LR: 0.0003 +[2026-02-27 16:55:44] (step=0005412) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.0588925846214048, LR: 0.0003 +[2026-02-27 16:55:52] (step=0005413) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 1.0590882410487183, LR: 0.0003 +[2026-02-27 16:56:00] (step=0005414) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.0592838974760321, LR: 0.0003 +[2026-02-27 16:56:08] (step=0005415) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.0594795539033457, LR: 0.0003 +[2026-02-27 16:56:16] (step=0005416) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.0596752103306593, LR: 0.0003 +[2026-02-27 16:56:23] (step=0005417) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.059870866757973, LR: 0.0003 +[2026-02-27 16:56:31] (step=0005418) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.0600665231852866, LR: 0.0003 +[2026-02-27 16:56:39] (step=0005419) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 1.0602621796126002, LR: 0.0003 +[2026-02-27 16:56:47] (step=0005420) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.060457836039914, LR: 0.0003 +[2026-02-27 16:56:55] (step=0005421) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.0606534924672275, LR: 0.0003 +[2026-02-27 16:57:03] (step=0005422) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 1.0608491488945413, LR: 0.0003 +[2026-02-27 16:57:10] (step=0005423) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 1.0610448053218549, LR: 0.0003 +[2026-02-27 16:57:18] (step=0005424) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 1.0612404617491684, LR: 0.0003 +[2026-02-27 16:57:26] (step=0005425) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.0614361181764822, LR: 0.0003 +[2026-02-27 16:57:34] (step=0005426) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.0616317746037958, LR: 0.0003 +[2026-02-27 16:57:42] (step=0005427) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.0618274310311093, LR: 0.0003 +[2026-02-27 16:57:50] (step=0005428) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 1.062023087458423, LR: 0.0003 +[2026-02-27 16:57:58] (step=0005429) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.0622187438857367, LR: 0.0003 +[2026-02-27 16:58:06] (step=0005430) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 1.0624144003130502, LR: 0.0003 +[2026-02-27 16:58:13] (step=0005431) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 1.062610056740364, LR: 0.0003 +[2026-02-27 16:58:21] (step=0005432) Train Loss: 0.4703, Train Steps/Sec: 0.13, Epoch: 1.0628057131676776, LR: 0.0003 +[2026-02-27 16:58:29] (step=0005433) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.0630013695949911, LR: 0.0003 +[2026-02-27 16:58:37] (step=0005434) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.063197026022305, LR: 0.0003 +[2026-02-27 16:58:45] (step=0005435) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.0633926824496185, LR: 0.0003 +[2026-02-27 16:58:53] (step=0005436) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 1.063588338876932, LR: 0.0003 +[2026-02-27 16:59:00] (step=0005437) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.0637839953042458, LR: 0.0003 +[2026-02-27 16:59:08] (step=0005438) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.0639796517315594, LR: 0.0003 +[2026-02-27 16:59:16] (step=0005439) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 1.064175308158873, LR: 0.0003 +[2026-02-27 16:59:24] (step=0005440) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.0643709645861867, LR: 0.0003 +[2026-02-27 16:59:32] (step=0005441) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 1.0645666210135003, LR: 0.0003 +[2026-02-27 16:59:40] (step=0005442) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 1.0647622774408139, LR: 0.0003 +[2026-02-27 16:59:47] (step=0005443) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.0649579338681276, LR: 0.0003 +[2026-02-27 16:59:55] (step=0005444) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.0651535902954412, LR: 0.0003 +[2026-02-27 17:00:03] (step=0005445) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 1.0653492467227548, LR: 0.0003 +[2026-02-27 17:00:11] (step=0005446) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.0655449031500686, LR: 0.0003 +[2026-02-27 17:00:19] (step=0005447) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.0657405595773821, LR: 0.0003 +[2026-02-27 17:00:27] (step=0005448) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.0659362160046957, LR: 0.0003 +[2026-02-27 17:00:35] (step=0005449) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.0661318724320095, LR: 0.0003 +[2026-02-27 17:00:42] (step=0005450) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.066327528859323, LR: 0.0003 +[2026-02-27 17:00:50] (step=0005451) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 1.0665231852866366, LR: 0.0003 +[2026-02-27 17:00:58] (step=0005452) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 1.0667188417139504, LR: 0.0003 +[2026-02-27 17:01:06] (step=0005453) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 1.066914498141264, LR: 0.0003 +[2026-02-27 17:01:14] (step=0005454) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 1.0671101545685775, LR: 0.0003 +[2026-02-27 17:01:22] (step=0005455) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.0673058109958913, LR: 0.0003 +[2026-02-27 17:01:30] (step=0005456) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.0675014674232048, LR: 0.0003 +[2026-02-27 17:01:37] (step=0005457) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 1.0676971238505184, LR: 0.0003 +[2026-02-27 17:01:45] (step=0005458) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.0678927802778322, LR: 0.0003 +[2026-02-27 17:01:53] (step=0005459) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.0680884367051457, LR: 0.0003 +[2026-02-27 17:02:01] (step=0005460) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 1.0682840931324593, LR: 0.0003 +[2026-02-27 17:02:09] (step=0005461) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 1.068479749559773, LR: 0.0003 +[2026-02-27 17:02:17] (step=0005462) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.0686754059870867, LR: 0.0003 +[2026-02-27 17:02:24] (step=0005463) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.0688710624144002, LR: 0.0003 +[2026-02-27 17:02:32] (step=0005464) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.069066718841714, LR: 0.0003 +[2026-02-27 17:02:40] (step=0005465) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.0692623752690276, LR: 0.0003 +[2026-02-27 17:02:48] (step=0005466) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.0694580316963411, LR: 0.0003 +[2026-02-27 17:02:56] (step=0005467) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.069653688123655, LR: 0.0003 +[2026-02-27 17:03:04] (step=0005468) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.0698493445509685, LR: 0.0003 +[2026-02-27 17:03:12] (step=0005469) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.070045000978282, LR: 0.0003 +[2026-02-27 17:03:19] (step=0005470) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.0702406574055958, LR: 0.0003 +[2026-02-27 17:03:27] (step=0005471) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 1.0704363138329094, LR: 0.0003 +[2026-02-27 17:03:35] (step=0005472) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.070631970260223, LR: 0.0003 +[2026-02-27 17:03:43] (step=0005473) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 1.0708276266875367, LR: 0.0003 +[2026-02-27 17:03:51] (step=0005474) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.0710232831148503, LR: 0.0003 +[2026-02-27 17:03:59] (step=0005475) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 1.0712189395421639, LR: 0.0003 +[2026-02-27 17:04:07] (step=0005476) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 1.0714145959694776, LR: 0.0003 +[2026-02-27 17:04:14] (step=0005477) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.0716102523967912, LR: 0.0003 +[2026-02-27 17:04:22] (step=0005478) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 1.071805908824105, LR: 0.0003 +[2026-02-27 17:04:30] (step=0005479) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.0720015652514185, LR: 0.0003 +[2026-02-27 17:04:38] (step=0005480) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.072197221678732, LR: 0.0003 +[2026-02-27 17:04:46] (step=0005481) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 1.0723928781060459, LR: 0.0003 +[2026-02-27 17:04:54] (step=0005482) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 1.0725885345333595, LR: 0.0003 +[2026-02-27 17:05:01] (step=0005483) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 1.072784190960673, LR: 0.0003 +[2026-02-27 17:05:09] (step=0005484) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 1.0729798473879868, LR: 0.0003 +[2026-02-27 17:05:17] (step=0005485) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.0731755038153004, LR: 0.0003 +[2026-02-27 17:05:25] (step=0005486) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.073371160242614, LR: 0.0003 +[2026-02-27 17:05:33] (step=0005487) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 1.0735668166699277, LR: 0.0003 +[2026-02-27 17:05:41] (step=0005488) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 1.0737624730972413, LR: 0.0003 +[2026-02-27 17:05:49] (step=0005489) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 1.0739581295245548, LR: 0.0003 +[2026-02-27 17:05:56] (step=0005490) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.0741537859518686, LR: 0.0003 +[2026-02-27 17:06:04] (step=0005491) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.0743494423791822, LR: 0.0003 +[2026-02-27 17:06:12] (step=0005492) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.0745450988064957, LR: 0.0003 +[2026-02-27 17:06:20] (step=0005493) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.0747407552338095, LR: 0.0003 +[2026-02-27 17:06:28] (step=0005494) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.074936411661123, LR: 0.0003 +[2026-02-27 17:06:36] (step=0005495) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.0751320680884366, LR: 0.0003 +[2026-02-27 17:06:44] (step=0005496) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.0753277245157504, LR: 0.0003 +[2026-02-27 17:06:51] (step=0005497) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 1.075523380943064, LR: 0.0003 +[2026-02-27 17:06:59] (step=0005498) Train Loss: 0.4740, Train Steps/Sec: 0.13, Epoch: 1.0757190373703776, LR: 0.0003 +[2026-02-27 17:07:07] (step=0005499) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.0759146937976913, LR: 0.0003 +[2026-02-27 17:07:15] (step=0005500) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.076110350225005, LR: 0.0003 +[2026-02-27 17:07:15] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0005500/ +[2026-02-27 17:07:23] (step=0005501) Train Loss: 0.4458, Train Steps/Sec: 0.12, Epoch: 1.0763060066523185, LR: 0.0003 +[2026-02-27 17:07:31] (step=0005502) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 1.0765016630796322, LR: 0.0003 +[2026-02-27 17:07:39] (step=0005503) Train Loss: 0.4714, Train Steps/Sec: 0.13, Epoch: 1.0766973195069458, LR: 0.0003 +[2026-02-27 17:07:46] (step=0005504) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 1.0768929759342594, LR: 0.0003 +[2026-02-27 17:07:54] (step=0005505) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.0770886323615732, LR: 0.0003 +[2026-02-27 17:08:02] (step=0005506) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.0772842887888867, LR: 0.0003 +[2026-02-27 17:08:10] (step=0005507) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 1.0774799452162003, LR: 0.0003 +[2026-02-27 17:08:18] (step=0005508) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 1.077675601643514, LR: 0.0003 +[2026-02-27 17:08:26] (step=0005509) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.0778712580708276, LR: 0.0003 +[2026-02-27 17:08:33] (step=0005510) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 1.0780669144981412, LR: 0.0003 +[2026-02-27 17:08:41] (step=0005511) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.078262570925455, LR: 0.0003 +[2026-02-27 17:08:49] (step=0005512) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.0784582273527685, LR: 0.0003 +[2026-02-27 17:08:57] (step=0005513) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.078653883780082, LR: 0.0003 +[2026-02-27 17:09:05] (step=0005514) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.0788495402073959, LR: 0.0003 +[2026-02-27 17:09:13] (step=0005515) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.0790451966347094, LR: 0.0003 +[2026-02-27 17:09:21] (step=0005516) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 1.079240853062023, LR: 0.0003 +[2026-02-27 17:09:28] (step=0005517) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.0794365094893368, LR: 0.0003 +[2026-02-27 17:09:36] (step=0005518) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.0796321659166503, LR: 0.0003 +[2026-02-27 17:09:44] (step=0005519) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 1.079827822343964, LR: 0.0003 +[2026-02-27 17:09:52] (step=0005520) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.0800234787712777, LR: 0.0003 +[2026-02-27 17:10:00] (step=0005521) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.0802191351985913, LR: 0.0003 +[2026-02-27 17:10:08] (step=0005522) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 1.0804147916259048, LR: 0.0003 +[2026-02-27 17:10:16] (step=0005523) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 1.0806104480532186, LR: 0.0003 +[2026-02-27 17:10:23] (step=0005524) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.0808061044805322, LR: 0.0003 +[2026-02-27 17:10:31] (step=0005525) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 1.0810017609078457, LR: 0.0003 +[2026-02-27 17:10:39] (step=0005526) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 1.0811974173351595, LR: 0.0003 +[2026-02-27 17:10:47] (step=0005527) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.081393073762473, LR: 0.0003 +[2026-02-27 17:10:55] (step=0005528) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.0815887301897866, LR: 0.0003 +[2026-02-27 17:11:03] (step=0005529) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.0817843866171004, LR: 0.0003 +[2026-02-27 17:11:11] (step=0005530) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.081980043044414, LR: 0.0003 +[2026-02-27 17:11:18] (step=0005531) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.0821756994717275, LR: 0.0003 +[2026-02-27 17:11:26] (step=0005532) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.0823713558990413, LR: 0.0003 +[2026-02-27 17:11:34] (step=0005533) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.0825670123263549, LR: 0.0003 +[2026-02-27 17:11:42] (step=0005534) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.0827626687536687, LR: 0.0003 +[2026-02-27 17:11:50] (step=0005535) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.0829583251809822, LR: 0.0003 +[2026-02-27 17:11:58] (step=0005536) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.0831539816082958, LR: 0.0003 +[2026-02-27 17:12:05] (step=0005537) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 1.0833496380356096, LR: 0.0003 +[2026-02-27 17:12:13] (step=0005538) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.0835452944629231, LR: 0.0003 +[2026-02-27 17:12:21] (step=0005539) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.0837409508902367, LR: 0.0003 +[2026-02-27 17:12:29] (step=0005540) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.0839366073175505, LR: 0.0003 +[2026-02-27 17:12:37] (step=0005541) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.084132263744864, LR: 0.0003 +[2026-02-27 17:12:45] (step=0005542) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 1.0843279201721776, LR: 0.0003 +[2026-02-27 17:12:52] (step=0005543) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.0845235765994914, LR: 0.0003 +[2026-02-27 17:13:00] (step=0005544) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.084719233026805, LR: 0.0003 +[2026-02-27 17:13:08] (step=0005545) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 1.0849148894541185, LR: 0.0003 +[2026-02-27 17:13:16] (step=0005546) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.0851105458814323, LR: 0.0003 +[2026-02-27 17:13:24] (step=0005547) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.0853062023087459, LR: 0.0003 +[2026-02-27 17:13:32] (step=0005548) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.0855018587360594, LR: 0.0003 +[2026-02-27 17:13:40] (step=0005549) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 1.0856975151633732, LR: 0.0003 +[2026-02-27 17:13:47] (step=0005550) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 1.0858931715906868, LR: 0.0003 +[2026-02-27 17:13:55] (step=0005551) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.0860888280180003, LR: 0.0003 +[2026-02-27 17:14:03] (step=0005552) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.0862844844453141, LR: 0.0003 +[2026-02-27 17:14:11] (step=0005553) Train Loss: 0.4731, Train Steps/Sec: 0.13, Epoch: 1.0864801408726277, LR: 0.0003 +[2026-02-27 17:14:19] (step=0005554) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 1.0866757972999412, LR: 0.0003 +[2026-02-27 17:14:27] (step=0005555) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 1.086871453727255, LR: 0.0003 +[2026-02-27 17:14:35] (step=0005556) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.0870671101545686, LR: 0.0003 +[2026-02-27 17:14:42] (step=0005557) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.0872627665818821, LR: 0.0003 +[2026-02-27 17:14:50] (step=0005558) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.087458423009196, LR: 0.0003 +[2026-02-27 17:14:58] (step=0005559) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.0876540794365095, LR: 0.0003 +[2026-02-27 17:15:06] (step=0005560) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 1.087849735863823, LR: 0.0003 +[2026-02-27 17:15:14] (step=0005561) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 1.0880453922911368, LR: 0.0003 +[2026-02-27 17:15:22] (step=0005562) Train Loss: 0.4734, Train Steps/Sec: 0.13, Epoch: 1.0882410487184504, LR: 0.0003 +[2026-02-27 17:15:29] (step=0005563) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 1.088436705145764, LR: 0.0003 +[2026-02-27 17:15:37] (step=0005564) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 1.0886323615730777, LR: 0.0003 +[2026-02-27 17:15:45] (step=0005565) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.0888280180003913, LR: 0.0003 +[2026-02-27 17:15:53] (step=0005566) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 1.0890236744277049, LR: 0.0003 +[2026-02-27 17:16:01] (step=0005567) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.0892193308550187, LR: 0.0003 +[2026-02-27 17:16:09] (step=0005568) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.0894149872823322, LR: 0.0003 +[2026-02-27 17:16:17] (step=0005569) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.0896106437096458, LR: 0.0003 +[2026-02-27 17:16:24] (step=0005570) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.0898063001369596, LR: 0.0003 +[2026-02-27 17:16:32] (step=0005571) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 1.0900019565642731, LR: 0.0003 +[2026-02-27 17:16:40] (step=0005572) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.0901976129915867, LR: 0.0003 +[2026-02-27 17:16:48] (step=0005573) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 1.0903932694189005, LR: 0.0003 +[2026-02-27 17:16:56] (step=0005574) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 1.090588925846214, LR: 0.0003 +[2026-02-27 17:17:04] (step=0005575) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.0907845822735276, LR: 0.0003 +[2026-02-27 17:17:12] (step=0005576) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.0909802387008414, LR: 0.0003 +[2026-02-27 17:17:19] (step=0005577) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.091175895128155, LR: 0.0003 +[2026-02-27 17:17:27] (step=0005578) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.0913715515554685, LR: 0.0003 +[2026-02-27 17:17:35] (step=0005579) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.0915672079827823, LR: 0.0003 +[2026-02-27 17:17:43] (step=0005580) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.0917628644100958, LR: 0.0003 +[2026-02-27 17:17:51] (step=0005581) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.0919585208374094, LR: 0.0003 +[2026-02-27 17:17:59] (step=0005582) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 1.0921541772647232, LR: 0.0003 +[2026-02-27 17:18:07] (step=0005583) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.0923498336920368, LR: 0.0003 +[2026-02-27 17:18:14] (step=0005584) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.0925454901193503, LR: 0.0003 +[2026-02-27 17:18:22] (step=0005585) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 1.092741146546664, LR: 0.0003 +[2026-02-27 17:18:30] (step=0005586) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.0929368029739777, LR: 0.0003 +[2026-02-27 17:18:38] (step=0005587) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.0931324594012912, LR: 0.0003 +[2026-02-27 17:18:46] (step=0005588) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.093328115828605, LR: 0.0003 +[2026-02-27 17:18:54] (step=0005589) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.0935237722559186, LR: 0.0003 +[2026-02-27 17:19:01] (step=0005590) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.0937194286832324, LR: 0.0003 +[2026-02-27 17:19:09] (step=0005591) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.093915085110546, LR: 0.0003 +[2026-02-27 17:19:17] (step=0005592) Train Loss: 0.4611, Train Steps/Sec: 0.12, Epoch: 1.0941107415378595, LR: 0.0003 +[2026-02-27 17:19:25] (step=0005593) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.0943063979651733, LR: 0.0003 +[2026-02-27 17:19:33] (step=0005594) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 1.0945020543924868, LR: 0.0003 +[2026-02-27 17:19:41] (step=0005595) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.0946977108198004, LR: 0.0003 +[2026-02-27 17:19:49] (step=0005596) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.0948933672471142, LR: 0.0003 +[2026-02-27 17:19:57] (step=0005597) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 1.0950890236744277, LR: 0.0003 +[2026-02-27 17:20:04] (step=0005598) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.0952846801017413, LR: 0.0003 +[2026-02-27 17:20:12] (step=0005599) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.095480336529055, LR: 0.0003 +[2026-02-27 17:20:20] (step=0005600) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.0956759929563686, LR: 0.0003 +[2026-02-27 17:20:28] (step=0005601) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.0958716493836822, LR: 0.0003 +[2026-02-27 17:20:36] (step=0005602) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.096067305810996, LR: 0.0003 +[2026-02-27 17:20:44] (step=0005603) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.0962629622383095, LR: 0.0003 +[2026-02-27 17:20:51] (step=0005604) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.096458618665623, LR: 0.0003 +[2026-02-27 17:20:59] (step=0005605) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.096654275092937, LR: 0.0003 +[2026-02-27 17:21:07] (step=0005606) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.0968499315202505, LR: 0.0003 +[2026-02-27 17:21:15] (step=0005607) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.097045587947564, LR: 0.0003 +[2026-02-27 17:21:23] (step=0005608) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 1.0972412443748778, LR: 0.0003 +[2026-02-27 17:21:31] (step=0005609) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 1.0974369008021914, LR: 0.0003 +[2026-02-27 17:21:39] (step=0005610) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.097632557229505, LR: 0.0003 +[2026-02-27 17:21:46] (step=0005611) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 1.0978282136568187, LR: 0.0003 +[2026-02-27 17:21:54] (step=0005612) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.0980238700841323, LR: 0.0003 +[2026-02-27 17:22:02] (step=0005613) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.0982195265114458, LR: 0.0003 +[2026-02-27 17:22:10] (step=0005614) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 1.0984151829387596, LR: 0.0003 +[2026-02-27 17:22:18] (step=0005615) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 1.0986108393660732, LR: 0.0003 +[2026-02-27 17:22:26] (step=0005616) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.0988064957933867, LR: 0.0003 +[2026-02-27 17:22:33] (step=0005617) Train Loss: 0.4727, Train Steps/Sec: 0.13, Epoch: 1.0990021522207005, LR: 0.0003 +[2026-02-27 17:22:41] (step=0005618) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 1.099197808648014, LR: 0.0003 +[2026-02-27 17:22:49] (step=0005619) Train Loss: 0.4447, Train Steps/Sec: 0.12, Epoch: 1.0993934650753276, LR: 0.0003 +[2026-02-27 17:22:57] (step=0005620) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 1.0995891215026414, LR: 0.0003 +[2026-02-27 17:23:05] (step=0005621) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.099784777929955, LR: 0.0003 +[2026-02-27 17:23:13] (step=0005622) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 1.0999804343572686, LR: 0.0003 +[2026-02-27 17:23:21] (step=0005623) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.1001760907845823, LR: 0.0003 +[2026-02-27 17:23:29] (step=0005624) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.100371747211896, LR: 0.0003 +[2026-02-27 17:23:36] (step=0005625) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.1005674036392095, LR: 0.0003 +[2026-02-27 17:23:44] (step=0005626) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.1007630600665232, LR: 0.0003 +[2026-02-27 17:23:52] (step=0005627) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.1009587164938368, LR: 0.0003 +[2026-02-27 17:24:00] (step=0005628) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.1011543729211504, LR: 0.0003 +[2026-02-27 17:24:08] (step=0005629) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.1013500293484642, LR: 0.0003 +[2026-02-27 17:24:16] (step=0005630) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 1.1015456857757777, LR: 0.0003 +[2026-02-27 17:24:24] (step=0005631) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.1017413422030913, LR: 0.0003 +[2026-02-27 17:24:31] (step=0005632) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.101936998630405, LR: 0.0003 +[2026-02-27 17:24:39] (step=0005633) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.1021326550577186, LR: 0.0003 +[2026-02-27 17:24:47] (step=0005634) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 1.1023283114850322, LR: 0.0003 +[2026-02-27 17:24:55] (step=0005635) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 1.102523967912346, LR: 0.0003 +[2026-02-27 17:25:03] (step=0005636) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.1027196243396595, LR: 0.0003 +[2026-02-27 17:25:11] (step=0005637) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.102915280766973, LR: 0.0003 +[2026-02-27 17:25:18] (step=0005638) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.1031109371942869, LR: 0.0003 +[2026-02-27 17:25:26] (step=0005639) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.1033065936216004, LR: 0.0003 +[2026-02-27 17:25:34] (step=0005640) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 1.103502250048914, LR: 0.0003 +[2026-02-27 17:25:42] (step=0005641) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.1036979064762278, LR: 0.0003 +[2026-02-27 17:25:50] (step=0005642) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.1038935629035413, LR: 0.0003 +[2026-02-27 17:25:58] (step=0005643) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 1.104089219330855, LR: 0.0003 +[2026-02-27 17:26:06] (step=0005644) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 1.1042848757581687, LR: 0.0003 +[2026-02-27 17:26:13] (step=0005645) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 1.1044805321854823, LR: 0.0003 +[2026-02-27 17:26:21] (step=0005646) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 1.104676188612796, LR: 0.0003 +[2026-02-27 17:26:29] (step=0005647) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.1048718450401096, LR: 0.0003 +[2026-02-27 17:26:37] (step=0005648) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.1050675014674232, LR: 0.0003 +[2026-02-27 17:26:45] (step=0005649) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 1.105263157894737, LR: 0.0003 +[2026-02-27 17:26:53] (step=0005650) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 1.1054588143220505, LR: 0.0003 +[2026-02-27 17:27:01] (step=0005651) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.105654470749364, LR: 0.0003 +[2026-02-27 17:27:08] (step=0005652) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.1058501271766779, LR: 0.0003 +[2026-02-27 17:27:16] (step=0005653) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.1060457836039914, LR: 0.0003 +[2026-02-27 17:27:24] (step=0005654) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.106241440031305, LR: 0.0003 +[2026-02-27 17:27:32] (step=0005655) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 1.1064370964586188, LR: 0.0003 +[2026-02-27 17:27:40] (step=0005656) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.1066327528859323, LR: 0.0003 +[2026-02-27 17:27:48] (step=0005657) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.1068284093132459, LR: 0.0003 +[2026-02-27 17:27:55] (step=0005658) Train Loss: 0.4763, Train Steps/Sec: 0.13, Epoch: 1.1070240657405597, LR: 0.0003 +[2026-02-27 17:28:03] (step=0005659) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.1072197221678732, LR: 0.0003 +[2026-02-27 17:28:11] (step=0005660) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.1074153785951868, LR: 0.0003 +[2026-02-27 17:28:19] (step=0005661) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.1076110350225006, LR: 0.0003 +[2026-02-27 17:28:27] (step=0005662) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.1078066914498141, LR: 0.0003 +[2026-02-27 17:28:35] (step=0005663) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 1.1080023478771277, LR: 0.0003 +[2026-02-27 17:28:43] (step=0005664) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 1.1081980043044415, LR: 0.0003 +[2026-02-27 17:28:50] (step=0005665) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.108393660731755, LR: 0.0003 +[2026-02-27 17:28:58] (step=0005666) Train Loss: 0.4605, Train Steps/Sec: 0.12, Epoch: 1.1085893171590686, LR: 0.0003 +[2026-02-27 17:29:06] (step=0005667) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.1087849735863824, LR: 0.0003 +[2026-02-27 17:29:14] (step=0005668) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.108980630013696, LR: 0.0003 +[2026-02-27 17:29:22] (step=0005669) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.1091762864410095, LR: 0.0003 +[2026-02-27 17:29:30] (step=0005670) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.1093719428683233, LR: 0.0003 +[2026-02-27 17:29:38] (step=0005671) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 1.1095675992956369, LR: 0.0003 +[2026-02-27 17:29:47] (step=0005672) Train Loss: 0.4520, Train Steps/Sec: 0.11, Epoch: 1.1097632557229504, LR: 0.0003 +[2026-02-27 17:30:00] (step=0005673) Train Loss: 0.4671, Train Steps/Sec: 0.07, Epoch: 1.1099589121502642, LR: 0.0003 +[2026-02-27 17:30:14] (step=0005674) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 1.1101545685775778, LR: 0.0003 +[2026-02-27 17:30:23] (step=0005675) Train Loss: 0.4564, Train Steps/Sec: 0.11, Epoch: 1.1103502250048913, LR: 0.0003 +[2026-02-27 17:30:31] (step=0005676) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 1.1105458814322051, LR: 0.0003 +[2026-02-27 17:30:39] (step=0005677) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.1107415378595187, LR: 0.0003 +[2026-02-27 17:30:47] (step=0005678) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.1109371942868322, LR: 0.0003 +[2026-02-27 17:30:55] (step=0005679) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.111132850714146, LR: 0.0003 +[2026-02-27 17:31:02] (step=0005680) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.1113285071414596, LR: 0.0003 +[2026-02-27 17:31:10] (step=0005681) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 1.1115241635687731, LR: 0.0003 +[2026-02-27 17:31:18] (step=0005682) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 1.111719819996087, LR: 0.0003 +[2026-02-27 17:31:26] (step=0005683) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 1.1119154764234005, LR: 0.0003 +[2026-02-27 17:31:34] (step=0005684) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.112111132850714, LR: 0.0003 +[2026-02-27 17:31:42] (step=0005685) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.1123067892780278, LR: 0.0003 +[2026-02-27 17:31:50] (step=0005686) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 1.1125024457053414, LR: 0.0003 +[2026-02-27 17:31:57] (step=0005687) Train Loss: 0.4744, Train Steps/Sec: 0.13, Epoch: 1.112698102132655, LR: 0.0003 +[2026-02-27 17:32:05] (step=0005688) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.1128937585599687, LR: 0.0003 +[2026-02-27 17:32:13] (step=0005689) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 1.1130894149872823, LR: 0.0003 +[2026-02-27 17:32:21] (step=0005690) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 1.1132850714145959, LR: 0.0003 +[2026-02-27 17:32:29] (step=0005691) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 1.1134807278419097, LR: 0.0003 +[2026-02-27 17:32:37] (step=0005692) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 1.1136763842692232, LR: 0.0003 +[2026-02-27 17:32:45] (step=0005693) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.1138720406965368, LR: 0.0003 +[2026-02-27 17:32:52] (step=0005694) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.1140676971238506, LR: 0.0003 +[2026-02-27 17:33:00] (step=0005695) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.1142633535511641, LR: 0.0003 +[2026-02-27 17:33:08] (step=0005696) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.1144590099784777, LR: 0.0003 +[2026-02-27 17:33:16] (step=0005697) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.1146546664057915, LR: 0.0003 +[2026-02-27 17:33:24] (step=0005698) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.114850322833105, LR: 0.0003 +[2026-02-27 17:33:32] (step=0005699) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 1.1150459792604186, LR: 0.0003 +[2026-02-27 17:33:39] (step=0005700) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 1.1152416356877324, LR: 0.0003 +[2026-02-27 17:33:47] (step=0005701) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 1.115437292115046, LR: 0.0003 +[2026-02-27 17:33:55] (step=0005702) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.1156329485423597, LR: 0.0003 +[2026-02-27 17:34:03] (step=0005703) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.1158286049696733, LR: 0.0003 +[2026-02-27 17:34:11] (step=0005704) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 1.1160242613969868, LR: 0.0003 +[2026-02-27 17:34:19] (step=0005705) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.1162199178243006, LR: 0.0003 +[2026-02-27 17:34:27] (step=0005706) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 1.1164155742516142, LR: 0.0003 +[2026-02-27 17:34:34] (step=0005707) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.1166112306789278, LR: 0.0003 +[2026-02-27 17:34:42] (step=0005708) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 1.1168068871062415, LR: 0.0003 +[2026-02-27 17:34:50] (step=0005709) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 1.117002543533555, LR: 0.0003 +[2026-02-27 17:34:58] (step=0005710) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.1171981999608687, LR: 0.0003 +[2026-02-27 17:35:06] (step=0005711) Train Loss: 0.4813, Train Steps/Sec: 0.13, Epoch: 1.1173938563881824, LR: 0.0003 +[2026-02-27 17:35:14] (step=0005712) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.117589512815496, LR: 0.0003 +[2026-02-27 17:35:22] (step=0005713) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.1177851692428096, LR: 0.0003 +[2026-02-27 17:35:29] (step=0005714) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 1.1179808256701234, LR: 0.0003 +[2026-02-27 17:35:37] (step=0005715) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 1.118176482097437, LR: 0.0003 +[2026-02-27 17:35:45] (step=0005716) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.1183721385247505, LR: 0.0003 +[2026-02-27 17:35:53] (step=0005717) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.1185677949520643, LR: 0.0003 +[2026-02-27 17:36:01] (step=0005718) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 1.1187634513793778, LR: 0.0003 +[2026-02-27 17:36:10] (step=0005719) Train Loss: 0.4492, Train Steps/Sec: 0.11, Epoch: 1.1189591078066914, LR: 0.0003 +[2026-02-27 17:36:23] (step=0005720) Train Loss: 0.4706, Train Steps/Sec: 0.07, Epoch: 1.1191547642340052, LR: 0.0003 +[2026-02-27 17:36:37] (step=0005721) Train Loss: 0.4472, Train Steps/Sec: 0.07, Epoch: 1.1193504206613187, LR: 0.0003 +[2026-02-27 17:36:46] (step=0005722) Train Loss: 0.4592, Train Steps/Sec: 0.10, Epoch: 1.1195460770886323, LR: 0.0003 +[2026-02-27 17:36:54] (step=0005723) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.119741733515946, LR: 0.0003 +[2026-02-27 17:37:02] (step=0005724) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 1.1199373899432596, LR: 0.0003 +[2026-02-27 17:37:10] (step=0005725) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 1.1201330463705732, LR: 0.0003 +[2026-02-27 17:37:18] (step=0005726) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.120328702797887, LR: 0.0003 +[2026-02-27 17:37:26] (step=0005727) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 1.1205243592252006, LR: 0.0003 +[2026-02-27 17:37:33] (step=0005728) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.1207200156525141, LR: 0.0003 +[2026-02-27 17:37:41] (step=0005729) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.120915672079828, LR: 0.0003 +[2026-02-27 17:37:49] (step=0005730) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.1211113285071415, LR: 0.0003 +[2026-02-27 17:37:57] (step=0005731) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 1.121306984934455, LR: 0.0003 +[2026-02-27 17:38:05] (step=0005732) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.1215026413617688, LR: 0.0003 +[2026-02-27 17:38:13] (step=0005733) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.1216982977890824, LR: 0.0003 +[2026-02-27 17:38:21] (step=0005734) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 1.121893954216396, LR: 0.0003 +[2026-02-27 17:38:28] (step=0005735) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 1.1220896106437097, LR: 0.0003 +[2026-02-27 17:38:36] (step=0005736) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 1.1222852670710233, LR: 0.0003 +[2026-02-27 17:38:44] (step=0005737) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 1.1224809234983368, LR: 0.0003 +[2026-02-27 17:38:52] (step=0005738) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.1226765799256506, LR: 0.0003 +[2026-02-27 17:39:00] (step=0005739) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 1.1228722363529642, LR: 0.0003 +[2026-02-27 17:39:08] (step=0005740) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.1230678927802777, LR: 0.0003 +[2026-02-27 17:39:15] (step=0005741) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.1232635492075915, LR: 0.0003 +[2026-02-27 17:39:23] (step=0005742) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.123459205634905, LR: 0.0003 +[2026-02-27 17:39:31] (step=0005743) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 1.1236548620622187, LR: 0.0003 +[2026-02-27 17:39:39] (step=0005744) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.1238505184895324, LR: 0.0003 +[2026-02-27 17:39:47] (step=0005745) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.124046174916846, LR: 0.0003 +[2026-02-27 17:39:55] (step=0005746) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.1242418313441596, LR: 0.0003 +[2026-02-27 17:40:03] (step=0005747) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.1244374877714733, LR: 0.0003 +[2026-02-27 17:40:11] (step=0005748) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.124633144198787, LR: 0.0003 +[2026-02-27 17:40:18] (step=0005749) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.1248288006261005, LR: 0.0003 +[2026-02-27 17:40:26] (step=0005750) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.1250244570534143, LR: 0.0003 +[2026-02-27 17:40:34] (step=0005751) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.1252201134807278, LR: 0.0003 +[2026-02-27 17:40:42] (step=0005752) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 1.1254157699080414, LR: 0.0003 +[2026-02-27 17:40:50] (step=0005753) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 1.1256114263353552, LR: 0.0003 +[2026-02-27 17:40:58] (step=0005754) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 1.1258070827626687, LR: 0.0003 +[2026-02-27 17:41:05] (step=0005755) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.1260027391899823, LR: 0.0003 +[2026-02-27 17:41:13] (step=0005756) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.126198395617296, LR: 0.0003 +[2026-02-27 17:41:21] (step=0005757) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 1.1263940520446096, LR: 0.0003 +[2026-02-27 17:41:29] (step=0005758) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.1265897084719234, LR: 0.0003 +[2026-02-27 17:41:37] (step=0005759) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 1.126785364899237, LR: 0.0003 +[2026-02-27 17:41:45] (step=0005760) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.1269810213265505, LR: 0.0003 +[2026-02-27 17:41:53] (step=0005761) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 1.1271766777538643, LR: 0.0003 +[2026-02-27 17:42:00] (step=0005762) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.1273723341811779, LR: 0.0003 +[2026-02-27 17:42:08] (step=0005763) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 1.1275679906084914, LR: 0.0003 +[2026-02-27 17:42:18] (step=0005764) Train Loss: 0.4557, Train Steps/Sec: 0.10, Epoch: 1.1277636470358052, LR: 0.0003 +[2026-02-27 17:42:31] (step=0005765) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 1.1279593034631188, LR: 0.0003 +[2026-02-27 17:42:45] (step=0005766) Train Loss: 0.4614, Train Steps/Sec: 0.07, Epoch: 1.1281549598904324, LR: 0.0003 +[2026-02-27 17:42:59] (step=0005767) Train Loss: 0.4737, Train Steps/Sec: 0.07, Epoch: 1.1283506163177461, LR: 0.0003 +[2026-02-27 17:43:12] (step=0005768) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 1.1285462727450597, LR: 0.0003 +[2026-02-27 17:43:26] (step=0005769) Train Loss: 0.4619, Train Steps/Sec: 0.07, Epoch: 1.1287419291723733, LR: 0.0003 +[2026-02-27 17:43:39] (step=0005770) Train Loss: 0.4600, Train Steps/Sec: 0.07, Epoch: 1.128937585599687, LR: 0.0003 +[2026-02-27 17:43:47] (step=0005771) Train Loss: 0.4695, Train Steps/Sec: 0.12, Epoch: 1.1291332420270006, LR: 0.0003 +[2026-02-27 17:43:55] (step=0005772) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.1293288984543142, LR: 0.0003 +[2026-02-27 17:44:03] (step=0005773) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 1.129524554881628, LR: 0.0003 +[2026-02-27 17:44:11] (step=0005774) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 1.1297202113089415, LR: 0.0003 +[2026-02-27 17:44:19] (step=0005775) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.129915867736255, LR: 0.0003 +[2026-02-27 17:44:27] (step=0005776) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.1301115241635689, LR: 0.0003 +[2026-02-27 17:44:35] (step=0005777) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.1303071805908824, LR: 0.0003 +[2026-02-27 17:44:42] (step=0005778) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.130502837018196, LR: 0.0003 +[2026-02-27 17:44:50] (step=0005779) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 1.1306984934455098, LR: 0.0003 +[2026-02-27 17:44:58] (step=0005780) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.1308941498728233, LR: 0.0003 +[2026-02-27 17:45:06] (step=0005781) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.131089806300137, LR: 0.0003 +[2026-02-27 17:45:14] (step=0005782) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 1.1312854627274507, LR: 0.0003 +[2026-02-27 17:45:22] (step=0005783) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.1314811191547642, LR: 0.0003 +[2026-02-27 17:45:29] (step=0005784) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 1.1316767755820778, LR: 0.0003 +[2026-02-27 17:45:37] (step=0005785) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 1.1318724320093916, LR: 0.0003 +[2026-02-27 17:45:45] (step=0005786) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.1320680884367051, LR: 0.0003 +[2026-02-27 17:45:53] (step=0005787) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.1322637448640187, LR: 0.0003 +[2026-02-27 17:46:01] (step=0005788) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 1.1324594012913325, LR: 0.0003 +[2026-02-27 17:46:09] (step=0005789) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.132655057718646, LR: 0.0003 +[2026-02-27 17:46:17] (step=0005790) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.1328507141459596, LR: 0.0003 +[2026-02-27 17:46:24] (step=0005791) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 1.1330463705732734, LR: 0.0003 +[2026-02-27 17:46:32] (step=0005792) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 1.133242027000587, LR: 0.0003 +[2026-02-27 17:46:40] (step=0005793) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.1334376834279005, LR: 0.0003 +[2026-02-27 17:46:48] (step=0005794) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 1.1336333398552143, LR: 0.0003 +[2026-02-27 17:46:56] (step=0005795) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.1338289962825279, LR: 0.0003 +[2026-02-27 17:47:04] (step=0005796) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.1340246527098414, LR: 0.0003 +[2026-02-27 17:47:12] (step=0005797) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 1.1342203091371552, LR: 0.0003 +[2026-02-27 17:47:19] (step=0005798) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.1344159655644688, LR: 0.0003 +[2026-02-27 17:47:27] (step=0005799) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 1.1346116219917823, LR: 0.0003 +[2026-02-27 17:47:35] (step=0005800) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.1348072784190961, LR: 0.0003 +[2026-02-27 17:47:43] (step=0005801) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.1350029348464097, LR: 0.0003 +[2026-02-27 17:47:51] (step=0005802) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 1.1351985912737232, LR: 0.0003 +[2026-02-27 17:47:59] (step=0005803) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.135394247701037, LR: 0.0003 +[2026-02-27 17:48:06] (step=0005804) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.1355899041283506, LR: 0.0003 +[2026-02-27 17:48:14] (step=0005805) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.1357855605556642, LR: 0.0003 +[2026-02-27 17:48:22] (step=0005806) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 1.135981216982978, LR: 0.0003 +[2026-02-27 17:48:30] (step=0005807) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 1.1361768734102915, LR: 0.0003 +[2026-02-27 17:48:38] (step=0005808) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 1.136372529837605, LR: 0.0003 +[2026-02-27 17:48:46] (step=0005809) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.1365681862649188, LR: 0.0003 +[2026-02-27 17:48:54] (step=0005810) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.1367638426922324, LR: 0.0003 +[2026-02-27 17:49:02] (step=0005811) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.136959499119546, LR: 0.0003 +[2026-02-27 17:49:09] (step=0005812) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 1.1371551555468598, LR: 0.0003 +[2026-02-27 17:49:17] (step=0005813) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.1373508119741733, LR: 0.0003 +[2026-02-27 17:49:25] (step=0005814) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.137546468401487, LR: 0.0003 +[2026-02-27 17:49:33] (step=0005815) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.1377421248288007, LR: 0.0003 +[2026-02-27 17:49:41] (step=0005816) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.1379377812561142, LR: 0.0003 +[2026-02-27 17:49:49] (step=0005817) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 1.138133437683428, LR: 0.0003 +[2026-02-27 17:49:56] (step=0005818) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.1383290941107416, LR: 0.0003 +[2026-02-27 17:50:04] (step=0005819) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 1.1385247505380551, LR: 0.0003 +[2026-02-27 17:50:12] (step=0005820) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.138720406965369, LR: 0.0003 +[2026-02-27 17:50:20] (step=0005821) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.1389160633926825, LR: 0.0003 +[2026-02-27 17:50:28] (step=0005822) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.139111719819996, LR: 0.0003 +[2026-02-27 17:50:36] (step=0005823) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 1.1393073762473098, LR: 0.0003 +[2026-02-27 17:50:44] (step=0005824) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.1395030326746234, LR: 0.0003 +[2026-02-27 17:50:51] (step=0005825) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.139698689101937, LR: 0.0003 +[2026-02-27 17:50:59] (step=0005826) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.1398943455292507, LR: 0.0003 +[2026-02-27 17:51:07] (step=0005827) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.1400900019565643, LR: 0.0003 +[2026-02-27 17:51:15] (step=0005828) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.1402856583838779, LR: 0.0003 +[2026-02-27 17:51:23] (step=0005829) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 1.1404813148111916, LR: 0.0003 +[2026-02-27 17:51:31] (step=0005830) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.1406769712385052, LR: 0.0003 +[2026-02-27 17:51:38] (step=0005831) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.1408726276658188, LR: 0.0003 +[2026-02-27 17:51:46] (step=0005832) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 1.1410682840931325, LR: 0.0003 +[2026-02-27 17:51:54] (step=0005833) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.141263940520446, LR: 0.0003 +[2026-02-27 17:52:02] (step=0005834) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.1414595969477597, LR: 0.0003 +[2026-02-27 17:52:10] (step=0005835) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.1416552533750735, LR: 0.0003 +[2026-02-27 17:52:18] (step=0005836) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 1.141850909802387, LR: 0.0003 +[2026-02-27 17:52:25] (step=0005837) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.1420465662297006, LR: 0.0003 +[2026-02-27 17:52:33] (step=0005838) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.1422422226570144, LR: 0.0003 +[2026-02-27 17:52:41] (step=0005839) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 1.142437879084328, LR: 0.0003 +[2026-02-27 17:52:49] (step=0005840) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.1426335355116415, LR: 0.0003 +[2026-02-27 17:52:57] (step=0005841) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.1428291919389553, LR: 0.0003 +[2026-02-27 17:53:05] (step=0005842) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.1430248483662688, LR: 0.0003 +[2026-02-27 17:53:13] (step=0005843) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.1432205047935824, LR: 0.0003 +[2026-02-27 17:53:20] (step=0005844) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.1434161612208962, LR: 0.0003 +[2026-02-27 17:53:28] (step=0005845) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.1436118176482097, LR: 0.0003 +[2026-02-27 17:53:36] (step=0005846) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 1.1438074740755233, LR: 0.0003 +[2026-02-27 17:53:44] (step=0005847) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.144003130502837, LR: 0.0003 +[2026-02-27 17:53:52] (step=0005848) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.1441987869301506, LR: 0.0003 +[2026-02-27 17:54:00] (step=0005849) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.1443944433574642, LR: 0.0003 +[2026-02-27 17:54:08] (step=0005850) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.144590099784778, LR: 0.0003 +[2026-02-27 17:54:15] (step=0005851) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 1.1447857562120916, LR: 0.0003 +[2026-02-27 17:54:23] (step=0005852) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 1.1449814126394051, LR: 0.0003 +[2026-02-27 17:54:31] (step=0005853) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.145177069066719, LR: 0.0003 +[2026-02-27 17:54:39] (step=0005854) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 1.1453727254940325, LR: 0.0003 +[2026-02-27 17:54:47] (step=0005855) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.145568381921346, LR: 0.0003 +[2026-02-27 17:54:55] (step=0005856) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.1457640383486598, LR: 0.0003 +[2026-02-27 17:55:03] (step=0005857) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 1.1459596947759734, LR: 0.0003 +[2026-02-27 17:55:10] (step=0005858) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 1.146155351203287, LR: 0.0003 +[2026-02-27 17:55:18] (step=0005859) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.1463510076306007, LR: 0.0003 +[2026-02-27 17:55:26] (step=0005860) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.1465466640579143, LR: 0.0003 +[2026-02-27 17:55:34] (step=0005861) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.1467423204852278, LR: 0.0003 +[2026-02-27 17:55:42] (step=0005862) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 1.1469379769125416, LR: 0.0003 +[2026-02-27 17:55:50] (step=0005863) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.1471336333398552, LR: 0.0003 +[2026-02-27 17:55:57] (step=0005864) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.1473292897671687, LR: 0.0003 +[2026-02-27 17:56:05] (step=0005865) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 1.1475249461944825, LR: 0.0003 +[2026-02-27 17:56:13] (step=0005866) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.147720602621796, LR: 0.0003 +[2026-02-27 17:56:21] (step=0005867) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.1479162590491097, LR: 0.0003 +[2026-02-27 17:56:29] (step=0005868) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.1481119154764234, LR: 0.0003 +[2026-02-27 17:56:37] (step=0005869) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 1.148307571903737, LR: 0.0003 +[2026-02-27 17:56:45] (step=0005870) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 1.1485032283310508, LR: 0.0003 +[2026-02-27 17:56:52] (step=0005871) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.1486988847583643, LR: 0.0003 +[2026-02-27 17:57:00] (step=0005872) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.148894541185678, LR: 0.0003 +[2026-02-27 17:57:08] (step=0005873) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.1490901976129917, LR: 0.0003 +[2026-02-27 17:57:16] (step=0005874) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.1492858540403053, LR: 0.0003 +[2026-02-27 17:57:24] (step=0005875) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.1494815104676188, LR: 0.0003 +[2026-02-27 17:57:32] (step=0005876) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 1.1496771668949326, LR: 0.0003 +[2026-02-27 17:57:39] (step=0005877) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.1498728233222462, LR: 0.0003 +[2026-02-27 17:57:47] (step=0005878) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.1500684797495597, LR: 0.0003 +[2026-02-27 17:57:55] (step=0005879) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 1.1502641361768735, LR: 0.0003 +[2026-02-27 17:58:03] (step=0005880) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.150459792604187, LR: 0.0003 +[2026-02-27 17:58:11] (step=0005881) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.1506554490315006, LR: 0.0003 +[2026-02-27 17:58:19] (step=0005882) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.1508511054588144, LR: 0.0003 +[2026-02-27 17:58:27] (step=0005883) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 1.151046761886128, LR: 0.0003 +[2026-02-27 17:58:34] (step=0005884) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.1512424183134415, LR: 0.0003 +[2026-02-27 17:58:42] (step=0005885) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 1.1514380747407553, LR: 0.0003 +[2026-02-27 17:58:50] (step=0005886) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.1516337311680689, LR: 0.0003 +[2026-02-27 17:58:58] (step=0005887) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.1518293875953824, LR: 0.0003 +[2026-02-27 17:59:06] (step=0005888) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 1.1520250440226962, LR: 0.0003 +[2026-02-27 17:59:14] (step=0005889) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 1.1522207004500098, LR: 0.0003 +[2026-02-27 17:59:21] (step=0005890) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 1.1524163568773234, LR: 0.0003 +[2026-02-27 17:59:29] (step=0005891) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 1.1526120133046371, LR: 0.0003 +[2026-02-27 17:59:37] (step=0005892) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.1528076697319507, LR: 0.0003 +[2026-02-27 17:59:45] (step=0005893) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.1530033261592643, LR: 0.0003 +[2026-02-27 17:59:53] (step=0005894) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 1.153198982586578, LR: 0.0003 +[2026-02-27 18:00:01] (step=0005895) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.1533946390138916, LR: 0.0003 +[2026-02-27 18:00:09] (step=0005896) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 1.1535902954412052, LR: 0.0003 +[2026-02-27 18:00:16] (step=0005897) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 1.153785951868519, LR: 0.0003 +[2026-02-27 18:00:24] (step=0005898) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.1539816082958325, LR: 0.0003 +[2026-02-27 18:00:32] (step=0005899) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 1.154177264723146, LR: 0.0003 +[2026-02-27 18:00:40] (step=0005900) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 1.1543729211504599, LR: 0.0003 +[2026-02-27 18:00:48] (step=0005901) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.1545685775777734, LR: 0.0003 +[2026-02-27 18:00:56] (step=0005902) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.154764234005087, LR: 0.0003 +[2026-02-27 18:01:04] (step=0005903) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.1549598904324008, LR: 0.0003 +[2026-02-27 18:01:12] (step=0005904) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.1551555468597143, LR: 0.0003 +[2026-02-27 18:01:19] (step=0005905) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.155351203287028, LR: 0.0003 +[2026-02-27 18:01:27] (step=0005906) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.1555468597143417, LR: 0.0003 +[2026-02-27 18:01:35] (step=0005907) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 1.1557425161416552, LR: 0.0003 +[2026-02-27 18:01:43] (step=0005908) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.1559381725689688, LR: 0.0003 +[2026-02-27 18:01:51] (step=0005909) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 1.1561338289962826, LR: 0.0003 +[2026-02-27 18:01:59] (step=0005910) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.1563294854235961, LR: 0.0003 +[2026-02-27 18:02:06] (step=0005911) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.1565251418509097, LR: 0.0003 +[2026-02-27 18:02:14] (step=0005912) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.1567207982782235, LR: 0.0003 +[2026-02-27 18:02:22] (step=0005913) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.156916454705537, LR: 0.0003 +[2026-02-27 18:02:30] (step=0005914) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.1571121111328506, LR: 0.0003 +[2026-02-27 18:02:38] (step=0005915) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 1.1573077675601644, LR: 0.0003 +[2026-02-27 18:02:46] (step=0005916) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.157503423987478, LR: 0.0003 +[2026-02-27 18:02:54] (step=0005917) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.1576990804147915, LR: 0.0003 +[2026-02-27 18:03:01] (step=0005918) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.1578947368421053, LR: 0.0003 +[2026-02-27 18:03:09] (step=0005919) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.1580903932694189, LR: 0.0003 +[2026-02-27 18:03:17] (step=0005920) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.1582860496967324, LR: 0.0003 +[2026-02-27 18:03:25] (step=0005921) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.1584817061240462, LR: 0.0003 +[2026-02-27 18:03:33] (step=0005922) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 1.1586773625513598, LR: 0.0003 +[2026-02-27 18:03:41] (step=0005923) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.1588730189786733, LR: 0.0003 +[2026-02-27 18:03:48] (step=0005924) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.1590686754059871, LR: 0.0003 +[2026-02-27 18:03:56] (step=0005925) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 1.1592643318333007, LR: 0.0003 +[2026-02-27 18:04:04] (step=0005926) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.1594599882606145, LR: 0.0003 +[2026-02-27 18:04:12] (step=0005927) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.159655644687928, LR: 0.0003 +[2026-02-27 18:04:20] (step=0005928) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.1598513011152416, LR: 0.0003 +[2026-02-27 18:04:28] (step=0005929) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.1600469575425554, LR: 0.0003 +[2026-02-27 18:04:36] (step=0005930) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.160242613969869, LR: 0.0003 +[2026-02-27 18:04:43] (step=0005931) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 1.1604382703971825, LR: 0.0003 +[2026-02-27 18:04:51] (step=0005932) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.1606339268244963, LR: 0.0003 +[2026-02-27 18:04:59] (step=0005933) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.1608295832518098, LR: 0.0003 +[2026-02-27 18:05:07] (step=0005934) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.1610252396791234, LR: 0.0003 +[2026-02-27 18:05:15] (step=0005935) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.1612208961064372, LR: 0.0003 +[2026-02-27 18:05:23] (step=0005936) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 1.1614165525337508, LR: 0.0003 +[2026-02-27 18:05:30] (step=0005937) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 1.1616122089610643, LR: 0.0003 +[2026-02-27 18:05:38] (step=0005938) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 1.161807865388378, LR: 0.0003 +[2026-02-27 18:05:46] (step=0005939) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.1620035218156917, LR: 0.0003 +[2026-02-27 18:05:54] (step=0005940) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.1621991782430052, LR: 0.0003 +[2026-02-27 18:06:02] (step=0005941) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 1.162394834670319, LR: 0.0003 +[2026-02-27 18:06:10] (step=0005942) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 1.1625904910976326, LR: 0.0003 +[2026-02-27 18:06:18] (step=0005943) Train Loss: 0.4755, Train Steps/Sec: 0.13, Epoch: 1.1627861475249461, LR: 0.0003 +[2026-02-27 18:06:25] (step=0005944) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.16298180395226, LR: 0.0003 +[2026-02-27 18:06:33] (step=0005945) Train Loss: 0.4679, Train Steps/Sec: 0.12, Epoch: 1.1631774603795735, LR: 0.0003 +[2026-02-27 18:06:41] (step=0005946) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 1.163373116806887, LR: 0.0003 +[2026-02-27 18:06:49] (step=0005947) Train Loss: 0.4721, Train Steps/Sec: 0.13, Epoch: 1.1635687732342008, LR: 0.0003 +[2026-02-27 18:06:57] (step=0005948) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 1.1637644296615144, LR: 0.0003 +[2026-02-27 18:07:05] (step=0005949) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.163960086088828, LR: 0.0003 +[2026-02-27 18:07:13] (step=0005950) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 1.1641557425161417, LR: 0.0003 +[2026-02-27 18:07:20] (step=0005951) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.1643513989434553, LR: 0.0003 +[2026-02-27 18:07:28] (step=0005952) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.1645470553707689, LR: 0.0003 +[2026-02-27 18:07:36] (step=0005953) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.1647427117980826, LR: 0.0003 +[2026-02-27 18:07:44] (step=0005954) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.1649383682253962, LR: 0.0003 +[2026-02-27 18:07:52] (step=0005955) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.1651340246527098, LR: 0.0003 +[2026-02-27 18:08:00] (step=0005956) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.1653296810800235, LR: 0.0003 +[2026-02-27 18:08:07] (step=0005957) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.1655253375073371, LR: 0.0003 +[2026-02-27 18:08:15] (step=0005958) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 1.1657209939346507, LR: 0.0003 +[2026-02-27 18:08:23] (step=0005959) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 1.1659166503619645, LR: 0.0003 +[2026-02-27 18:08:31] (step=0005960) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.166112306789278, LR: 0.0003 +[2026-02-27 18:08:39] (step=0005961) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.1663079632165916, LR: 0.0003 +[2026-02-27 18:08:47] (step=0005962) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 1.1665036196439054, LR: 0.0003 +[2026-02-27 18:08:55] (step=0005963) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 1.166699276071219, LR: 0.0003 +[2026-02-27 18:09:02] (step=0005964) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.1668949324985325, LR: 0.0003 +[2026-02-27 18:09:10] (step=0005965) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.1670905889258463, LR: 0.0003 +[2026-02-27 18:09:18] (step=0005966) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.1672862453531598, LR: 0.0003 +[2026-02-27 18:09:26] (step=0005967) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.1674819017804734, LR: 0.0003 +[2026-02-27 18:09:34] (step=0005968) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 1.1676775582077872, LR: 0.0003 +[2026-02-27 18:09:42] (step=0005969) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.1678732146351007, LR: 0.0003 +[2026-02-27 18:09:50] (step=0005970) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.1680688710624143, LR: 0.0003 +[2026-02-27 18:09:57] (step=0005971) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 1.168264527489728, LR: 0.0003 +[2026-02-27 18:10:05] (step=0005972) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.1684601839170417, LR: 0.0003 +[2026-02-27 18:10:13] (step=0005973) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.1686558403443552, LR: 0.0003 +[2026-02-27 18:10:21] (step=0005974) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.168851496771669, LR: 0.0003 +[2026-02-27 18:10:29] (step=0005975) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 1.1690471531989826, LR: 0.0003 +[2026-02-27 18:10:37] (step=0005976) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.1692428096262961, LR: 0.0003 +[2026-02-27 18:10:44] (step=0005977) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.16943846605361, LR: 0.0003 +[2026-02-27 18:10:52] (step=0005978) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.1696341224809235, LR: 0.0003 +[2026-02-27 18:11:00] (step=0005979) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 1.169829778908237, LR: 0.0003 +[2026-02-27 18:11:08] (step=0005980) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.1700254353355508, LR: 0.0003 +[2026-02-27 18:11:16] (step=0005981) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.1702210917628644, LR: 0.0003 +[2026-02-27 18:11:24] (step=0005982) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.1704167481901782, LR: 0.0003 +[2026-02-27 18:11:31] (step=0005983) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.1706124046174917, LR: 0.0003 +[2026-02-27 18:11:39] (step=0005984) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.1708080610448053, LR: 0.0003 +[2026-02-27 18:11:47] (step=0005985) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 1.171003717472119, LR: 0.0003 +[2026-02-27 18:11:55] (step=0005986) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.1711993738994326, LR: 0.0003 +[2026-02-27 18:12:03] (step=0005987) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 1.1713950303267462, LR: 0.0003 +[2026-02-27 18:12:11] (step=0005988) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.17159068675406, LR: 0.0003 +[2026-02-27 18:12:18] (step=0005989) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.1717863431813735, LR: 0.0003 +[2026-02-27 18:12:26] (step=0005990) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.171981999608687, LR: 0.0003 +[2026-02-27 18:12:34] (step=0005991) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.1721776560360009, LR: 0.0003 +[2026-02-27 18:12:42] (step=0005992) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.1723733124633144, LR: 0.0003 +[2026-02-27 18:12:50] (step=0005993) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 1.172568968890628, LR: 0.0003 +[2026-02-27 18:12:58] (step=0005994) Train Loss: 0.4523, Train Steps/Sec: 0.12, Epoch: 1.1727646253179418, LR: 0.0003 +[2026-02-27 18:13:06] (step=0005995) Train Loss: 0.4749, Train Steps/Sec: 0.13, Epoch: 1.1729602817452554, LR: 0.0003 +[2026-02-27 18:13:14] (step=0005996) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 1.173155938172569, LR: 0.0003 +[2026-02-27 18:13:22] (step=0005997) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 1.1733515945998827, LR: 0.0003 +[2026-02-27 18:13:29] (step=0005998) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.1735472510271963, LR: 0.0003 +[2026-02-27 18:13:37] (step=0005999) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 1.1737429074545098, LR: 0.0003 +[2026-02-27 18:13:45] (step=0006000) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.1739385638818236, LR: 0.0003 +[2026-02-27 18:13:45] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0006000/ +[2026-02-27 18:13:53] (step=0006001) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 1.1741342203091372, LR: 0.0003 +[2026-02-27 18:14:01] (step=0006002) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.1743298767364507, LR: 0.0003 +[2026-02-27 18:14:09] (step=0006003) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.1745255331637645, LR: 0.0003 +[2026-02-27 18:14:16] (step=0006004) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 1.174721189591078, LR: 0.0003 +[2026-02-27 18:14:24] (step=0006005) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.1749168460183916, LR: 0.0003 +[2026-02-27 18:14:32] (step=0006006) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.1751125024457054, LR: 0.0003 +[2026-02-27 18:14:40] (step=0006007) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 1.175308158873019, LR: 0.0003 +[2026-02-27 18:14:48] (step=0006008) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.1755038153003325, LR: 0.0003 +[2026-02-27 18:14:56] (step=0006009) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.1756994717276463, LR: 0.0003 +[2026-02-27 18:15:03] (step=0006010) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.17589512815496, LR: 0.0003 +[2026-02-27 18:15:11] (step=0006011) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.1760907845822735, LR: 0.0003 +[2026-02-27 18:15:19] (step=0006012) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.1762864410095872, LR: 0.0003 +[2026-02-27 18:15:27] (step=0006013) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.1764820974369008, LR: 0.0003 +[2026-02-27 18:15:35] (step=0006014) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.1766777538642144, LR: 0.0003 +[2026-02-27 18:15:43] (step=0006015) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.1768734102915281, LR: 0.0003 +[2026-02-27 18:15:51] (step=0006016) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 1.1770690667188417, LR: 0.0003 +[2026-02-27 18:15:58] (step=0006017) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.1772647231461553, LR: 0.0003 +[2026-02-27 18:16:06] (step=0006018) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.177460379573469, LR: 0.0003 +[2026-02-27 18:16:14] (step=0006019) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 1.1776560360007826, LR: 0.0003 +[2026-02-27 18:16:22] (step=0006020) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.1778516924280962, LR: 0.0003 +[2026-02-27 18:16:30] (step=0006021) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.17804734885541, LR: 0.0003 +[2026-02-27 18:16:38] (step=0006022) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 1.1782430052827235, LR: 0.0003 +[2026-02-27 18:16:45] (step=0006023) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 1.178438661710037, LR: 0.0003 +[2026-02-27 18:16:53] (step=0006024) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.1786343181373509, LR: 0.0003 +[2026-02-27 18:17:01] (step=0006025) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.1788299745646644, LR: 0.0003 +[2026-02-27 18:17:09] (step=0006026) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 1.179025630991978, LR: 0.0003 +[2026-02-27 18:17:17] (step=0006027) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 1.1792212874192918, LR: 0.0003 +[2026-02-27 18:17:25] (step=0006028) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.1794169438466053, LR: 0.0003 +[2026-02-27 18:17:32] (step=0006029) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.179612600273919, LR: 0.0003 +[2026-02-27 18:17:40] (step=0006030) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.1798082567012327, LR: 0.0003 +[2026-02-27 18:17:48] (step=0006031) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.1800039131285462, LR: 0.0003 +[2026-02-27 18:17:56] (step=0006032) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.1801995695558598, LR: 0.0003 +[2026-02-27 18:18:04] (step=0006033) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 1.1803952259831736, LR: 0.0003 +[2026-02-27 18:18:12] (step=0006034) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.1805908824104872, LR: 0.0003 +[2026-02-27 18:18:20] (step=0006035) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.1807865388378007, LR: 0.0003 +[2026-02-27 18:18:27] (step=0006036) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.1809821952651145, LR: 0.0003 +[2026-02-27 18:18:35] (step=0006037) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.181177851692428, LR: 0.0003 +[2026-02-27 18:18:43] (step=0006038) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 1.1813735081197418, LR: 0.0003 +[2026-02-27 18:18:51] (step=0006039) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.1815691645470554, LR: 0.0003 +[2026-02-27 18:18:59] (step=0006040) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 1.181764820974369, LR: 0.0003 +[2026-02-27 18:19:07] (step=0006041) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 1.1819604774016828, LR: 0.0003 +[2026-02-27 18:19:14] (step=0006042) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.1821561338289963, LR: 0.0003 +[2026-02-27 18:19:22] (step=0006043) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.1823517902563099, LR: 0.0003 +[2026-02-27 18:19:30] (step=0006044) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 1.1825474466836237, LR: 0.0003 +[2026-02-27 18:19:38] (step=0006045) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.1827431031109372, LR: 0.0003 +[2026-02-27 18:19:46] (step=0006046) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 1.1829387595382508, LR: 0.0003 +[2026-02-27 18:19:54] (step=0006047) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.1831344159655646, LR: 0.0003 +[2026-02-27 18:20:02] (step=0006048) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.1833300723928781, LR: 0.0003 +[2026-02-27 18:20:09] (step=0006049) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.1835257288201917, LR: 0.0003 +[2026-02-27 18:20:17] (step=0006050) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 1.1837213852475055, LR: 0.0003 +[2026-02-27 18:20:25] (step=0006051) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.183917041674819, LR: 0.0003 +[2026-02-27 18:20:33] (step=0006052) Train Loss: 0.4642, Train Steps/Sec: 0.12, Epoch: 1.1841126981021326, LR: 0.0003 +[2026-02-27 18:20:41] (step=0006053) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.1843083545294464, LR: 0.0003 +[2026-02-27 18:20:49] (step=0006054) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.18450401095676, LR: 0.0003 +[2026-02-27 18:20:57] (step=0006055) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.1846996673840735, LR: 0.0003 +[2026-02-27 18:21:04] (step=0006056) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.1848953238113873, LR: 0.0003 +[2026-02-27 18:21:12] (step=0006057) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.1850909802387009, LR: 0.0003 +[2026-02-27 18:21:20] (step=0006058) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 1.1852866366660144, LR: 0.0003 +[2026-02-27 18:21:28] (step=0006059) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.1854822930933282, LR: 0.0003 +[2026-02-27 18:21:36] (step=0006060) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.1856779495206418, LR: 0.0003 +[2026-02-27 18:21:44] (step=0006061) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.1858736059479553, LR: 0.0003 +[2026-02-27 18:21:51] (step=0006062) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 1.186069262375269, LR: 0.0003 +[2026-02-27 18:21:59] (step=0006063) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.1862649188025827, LR: 0.0003 +[2026-02-27 18:22:07] (step=0006064) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.1864605752298962, LR: 0.0003 +[2026-02-27 18:22:15] (step=0006065) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 1.18665623165721, LR: 0.0003 +[2026-02-27 18:22:23] (step=0006066) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.1868518880845236, LR: 0.0003 +[2026-02-27 18:22:31] (step=0006067) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 1.1870475445118371, LR: 0.0003 +[2026-02-27 18:22:39] (step=0006068) Train Loss: 0.4721, Train Steps/Sec: 0.13, Epoch: 1.187243200939151, LR: 0.0003 +[2026-02-27 18:22:46] (step=0006069) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.1874388573664645, LR: 0.0003 +[2026-02-27 18:22:54] (step=0006070) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 1.187634513793778, LR: 0.0003 +[2026-02-27 18:23:02] (step=0006071) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.1878301702210918, LR: 0.0003 +[2026-02-27 18:23:10] (step=0006072) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.1880258266484054, LR: 0.0003 +[2026-02-27 18:23:18] (step=0006073) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.188221483075719, LR: 0.0003 +[2026-02-27 18:23:25] (step=0006074) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.1884171395030327, LR: 0.0003 +[2026-02-27 18:23:33] (step=0006075) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.1886127959303463, LR: 0.0003 +[2026-02-27 18:23:41] (step=0006076) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 1.1888084523576599, LR: 0.0003 +[2026-02-27 18:23:49] (step=0006077) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 1.1890041087849736, LR: 0.0003 +[2026-02-27 18:23:57] (step=0006078) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.1891997652122872, LR: 0.0003 +[2026-02-27 18:24:05] (step=0006079) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.1893954216396008, LR: 0.0003 +[2026-02-27 18:24:12] (step=0006080) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.1895910780669146, LR: 0.0003 +[2026-02-27 18:24:20] (step=0006081) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 1.1897867344942281, LR: 0.0003 +[2026-02-27 18:24:28] (step=0006082) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.1899823909215417, LR: 0.0003 +[2026-02-27 18:24:36] (step=0006083) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.1901780473488555, LR: 0.0003 +[2026-02-27 18:24:44] (step=0006084) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.190373703776169, LR: 0.0003 +[2026-02-27 18:24:51] (step=0006085) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 1.1905693602034826, LR: 0.0003 +[2026-02-27 18:24:59] (step=0006086) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 1.1907650166307964, LR: 0.0003 +[2026-02-27 18:25:07] (step=0006087) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.19096067305811, LR: 0.0003 +[2026-02-27 18:25:15] (step=0006088) Train Loss: 0.4718, Train Steps/Sec: 0.13, Epoch: 1.1911563294854235, LR: 0.0003 +[2026-02-27 18:25:23] (step=0006089) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.1913519859127373, LR: 0.0003 +[2026-02-27 18:25:31] (step=0006090) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.1915476423400508, LR: 0.0003 +[2026-02-27 18:25:38] (step=0006091) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 1.1917432987673644, LR: 0.0003 +[2026-02-27 18:25:46] (step=0006092) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.1919389551946782, LR: 0.0003 +[2026-02-27 18:25:54] (step=0006093) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 1.1921346116219917, LR: 0.0003 +[2026-02-27 18:26:02] (step=0006094) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 1.1923302680493055, LR: 0.0003 +[2026-02-27 18:26:10] (step=0006095) Train Loss: 0.4761, Train Steps/Sec: 0.13, Epoch: 1.192525924476619, LR: 0.0003 +[2026-02-27 18:26:17] (step=0006096) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 1.1927215809039327, LR: 0.0003 +[2026-02-27 18:26:25] (step=0006097) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 1.1929172373312464, LR: 0.0003 +[2026-02-27 18:26:33] (step=0006098) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.19311289375856, LR: 0.0003 +[2026-02-27 18:26:41] (step=0006099) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.1933085501858736, LR: 0.0003 +[2026-02-27 18:26:49] (step=0006100) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.1935042066131873, LR: 0.0003 +[2026-02-27 18:26:57] (step=0006101) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 1.193699863040501, LR: 0.0003 +[2026-02-27 18:27:05] (step=0006102) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.1938955194678145, LR: 0.0003 +[2026-02-27 18:27:12] (step=0006103) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.1940911758951283, LR: 0.0003 +[2026-02-27 18:27:20] (step=0006104) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.1942868323224418, LR: 0.0003 +[2026-02-27 18:27:28] (step=0006105) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.1944824887497554, LR: 0.0003 +[2026-02-27 18:27:36] (step=0006106) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 1.1946781451770692, LR: 0.0003 +[2026-02-27 18:27:44] (step=0006107) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 1.1948738016043827, LR: 0.0003 +[2026-02-27 18:27:52] (step=0006108) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.1950694580316963, LR: 0.0003 +[2026-02-27 18:27:59] (step=0006109) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 1.19526511445901, LR: 0.0003 +[2026-02-27 18:28:07] (step=0006110) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.1954607708863236, LR: 0.0003 +[2026-02-27 18:28:15] (step=0006111) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.1956564273136372, LR: 0.0003 +[2026-02-27 18:28:23] (step=0006112) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.195852083740951, LR: 0.0003 +[2026-02-27 18:28:31] (step=0006113) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.1960477401682645, LR: 0.0003 +[2026-02-27 18:28:38] (step=0006114) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.196243396595578, LR: 0.0003 +[2026-02-27 18:28:46] (step=0006115) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.1964390530228919, LR: 0.0003 +[2026-02-27 18:28:54] (step=0006116) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.1966347094502054, LR: 0.0003 +[2026-02-27 18:29:02] (step=0006117) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.196830365877519, LR: 0.0003 +[2026-02-27 18:29:10] (step=0006118) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.1970260223048328, LR: 0.0003 +[2026-02-27 18:29:18] (step=0006119) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.1972216787321464, LR: 0.0003 +[2026-02-27 18:29:25] (step=0006120) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 1.19741733515946, LR: 0.0003 +[2026-02-27 18:29:33] (step=0006121) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.1976129915867737, LR: 0.0003 +[2026-02-27 18:29:41] (step=0006122) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 1.1978086480140873, LR: 0.0003 +[2026-02-27 18:29:49] (step=0006123) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.1980043044414008, LR: 0.0003 +[2026-02-27 18:29:57] (step=0006124) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 1.1981999608687146, LR: 0.0003 +[2026-02-27 18:30:05] (step=0006125) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.1983956172960282, LR: 0.0003 +[2026-02-27 18:30:12] (step=0006126) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 1.1985912737233417, LR: 0.0003 +[2026-02-27 18:30:20] (step=0006127) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.1987869301506555, LR: 0.0003 +[2026-02-27 18:30:28] (step=0006128) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 1.198982586577969, LR: 0.0003 +[2026-02-27 18:30:36] (step=0006129) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 1.1991782430052826, LR: 0.0003 +[2026-02-27 18:30:44] (step=0006130) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 1.1993738994325964, LR: 0.0003 +[2026-02-27 18:30:51] (step=0006131) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.19956955585991, LR: 0.0003 +[2026-02-27 18:30:59] (step=0006132) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 1.1997652122872235, LR: 0.0003 +[2026-02-27 18:31:07] (step=0006133) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.1999608687145373, LR: 0.0003 +[2026-02-27 18:31:15] (step=0006134) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.200156525141851, LR: 0.0003 +[2026-02-27 18:31:23] (step=0006135) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.2003521815691645, LR: 0.0003 +[2026-02-27 18:31:31] (step=0006136) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 1.2005478379964782, LR: 0.0003 +[2026-02-27 18:31:38] (step=0006137) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.2007434944237918, LR: 0.0003 +[2026-02-27 18:31:46] (step=0006138) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 1.2009391508511054, LR: 0.0003 +[2026-02-27 18:31:54] (step=0006139) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.2011348072784191, LR: 0.0003 +[2026-02-27 18:32:02] (step=0006140) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.2013304637057327, LR: 0.0003 +[2026-02-27 18:32:10] (step=0006141) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 1.2015261201330463, LR: 0.0003 +[2026-02-27 18:32:18] (step=0006142) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 1.20172177656036, LR: 0.0003 +[2026-02-27 18:32:25] (step=0006143) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.2019174329876736, LR: 0.0003 +[2026-02-27 18:32:33] (step=0006144) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.2021130894149872, LR: 0.0003 +[2026-02-27 18:32:41] (step=0006145) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 1.202308745842301, LR: 0.0003 +[2026-02-27 18:32:49] (step=0006146) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 1.2025044022696145, LR: 0.0003 +[2026-02-27 18:32:57] (step=0006147) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.202700058696928, LR: 0.0003 +[2026-02-27 18:33:05] (step=0006148) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 1.2028957151242419, LR: 0.0003 +[2026-02-27 18:33:13] (step=0006149) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.2030913715515554, LR: 0.0003 +[2026-02-27 18:33:20] (step=0006150) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 1.203287027978869, LR: 0.0003 +[2026-02-27 18:33:28] (step=0006151) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 1.2034826844061828, LR: 0.0003 +[2026-02-27 18:33:36] (step=0006152) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 1.2036783408334963, LR: 0.0003 +[2026-02-27 18:33:44] (step=0006153) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.2038739972608101, LR: 0.0003 +[2026-02-27 18:33:52] (step=0006154) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 1.2040696536881237, LR: 0.0003 +[2026-02-27 18:34:00] (step=0006155) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 1.2042653101154372, LR: 0.0003 +[2026-02-27 18:34:07] (step=0006156) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.204460966542751, LR: 0.0003 +[2026-02-27 18:34:15] (step=0006157) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.2046566229700646, LR: 0.0003 +[2026-02-27 18:34:23] (step=0006158) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.2048522793973782, LR: 0.0003 +[2026-02-27 18:34:31] (step=0006159) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.205047935824692, LR: 0.0003 +[2026-02-27 18:34:39] (step=0006160) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.2052435922520055, LR: 0.0003 +[2026-02-27 18:34:47] (step=0006161) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 1.205439248679319, LR: 0.0003 +[2026-02-27 18:34:54] (step=0006162) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.2056349051066328, LR: 0.0003 +[2026-02-27 18:35:02] (step=0006163) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 1.2058305615339464, LR: 0.0003 +[2026-02-27 18:35:10] (step=0006164) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.20602621796126, LR: 0.0003 +[2026-02-27 18:35:18] (step=0006165) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.2062218743885738, LR: 0.0003 +[2026-02-27 18:35:26] (step=0006166) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.2064175308158873, LR: 0.0003 +[2026-02-27 18:35:34] (step=0006167) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.2066131872432009, LR: 0.0003 +[2026-02-27 18:35:41] (step=0006168) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 1.2068088436705147, LR: 0.0003 +[2026-02-27 18:35:49] (step=0006169) Train Loss: 0.4719, Train Steps/Sec: 0.13, Epoch: 1.2070045000978282, LR: 0.0003 +[2026-02-27 18:35:57] (step=0006170) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 1.2072001565251418, LR: 0.0003 +[2026-02-27 18:36:05] (step=0006171) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.2073958129524556, LR: 0.0003 +[2026-02-27 18:36:13] (step=0006172) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 1.2075914693797691, LR: 0.0003 +[2026-02-27 18:36:20] (step=0006173) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 1.2077871258070827, LR: 0.0003 +[2026-02-27 18:36:28] (step=0006174) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.2079827822343965, LR: 0.0003 +[2026-02-27 18:36:36] (step=0006175) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.20817843866171, LR: 0.0003 +[2026-02-27 18:36:44] (step=0006176) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.2083740950890236, LR: 0.0003 +[2026-02-27 18:36:52] (step=0006177) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.2085697515163374, LR: 0.0003 +[2026-02-27 18:37:00] (step=0006178) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 1.208765407943651, LR: 0.0003 +[2026-02-27 18:37:07] (step=0006179) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.2089610643709645, LR: 0.0003 +[2026-02-27 18:37:15] (step=0006180) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 1.2091567207982783, LR: 0.0003 +[2026-02-27 18:37:23] (step=0006181) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.2093523772255919, LR: 0.0003 +[2026-02-27 18:37:31] (step=0006182) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.2095480336529054, LR: 0.0003 +[2026-02-27 18:37:39] (step=0006183) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.2097436900802192, LR: 0.0003 +[2026-02-27 18:37:46] (step=0006184) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.2099393465075328, LR: 0.0003 +[2026-02-27 18:37:54] (step=0006185) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 1.2101350029348463, LR: 0.0003 +[2026-02-27 18:38:02] (step=0006186) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.21033065936216, LR: 0.0003 +[2026-02-27 18:38:10] (step=0006187) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.2105263157894737, LR: 0.0003 +[2026-02-27 18:38:18] (step=0006188) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 1.2107219722167872, LR: 0.0003 +[2026-02-27 18:38:26] (step=0006189) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.210917628644101, LR: 0.0003 +[2026-02-27 18:38:33] (step=0006190) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.2111132850714146, LR: 0.0003 +[2026-02-27 18:38:41] (step=0006191) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.2113089414987281, LR: 0.0003 +[2026-02-27 18:38:49] (step=0006192) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.211504597926042, LR: 0.0003 +[2026-02-27 18:38:57] (step=0006193) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 1.2117002543533555, LR: 0.0003 +[2026-02-27 18:39:05] (step=0006194) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.211895910780669, LR: 0.0003 +[2026-02-27 18:39:13] (step=0006195) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 1.2120915672079828, LR: 0.0003 +[2026-02-27 18:39:21] (step=0006196) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 1.2122872236352964, LR: 0.0003 +[2026-02-27 18:39:28] (step=0006197) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.21248288006261, LR: 0.0003 +[2026-02-27 18:39:36] (step=0006198) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 1.2126785364899237, LR: 0.0003 +[2026-02-27 18:39:44] (step=0006199) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.2128741929172373, LR: 0.0003 +[2026-02-27 18:39:52] (step=0006200) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.2130698493445509, LR: 0.0003 +[2026-02-27 18:40:00] (step=0006201) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 1.2132655057718646, LR: 0.0003 +[2026-02-27 18:40:08] (step=0006202) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.2134611621991782, LR: 0.0003 +[2026-02-27 18:40:16] (step=0006203) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.2136568186264918, LR: 0.0003 +[2026-02-27 18:40:23] (step=0006204) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.2138524750538056, LR: 0.0003 +[2026-02-27 18:40:31] (step=0006205) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.2140481314811191, LR: 0.0003 +[2026-02-27 18:40:39] (step=0006206) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.2142437879084327, LR: 0.0003 +[2026-02-27 18:40:47] (step=0006207) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.2144394443357465, LR: 0.0003 +[2026-02-27 18:40:55] (step=0006208) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.21463510076306, LR: 0.0003 +[2026-02-27 18:41:03] (step=0006209) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 1.2148307571903738, LR: 0.0003 +[2026-02-27 18:41:10] (step=0006210) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.2150264136176874, LR: 0.0003 +[2026-02-27 18:41:18] (step=0006211) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.215222070045001, LR: 0.0003 +[2026-02-27 18:41:26] (step=0006212) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 1.2154177264723147, LR: 0.0003 +[2026-02-27 18:41:34] (step=0006213) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 1.2156133828996283, LR: 0.0003 +[2026-02-27 18:41:42] (step=0006214) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 1.2158090393269418, LR: 0.0003 +[2026-02-27 18:41:50] (step=0006215) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.2160046957542556, LR: 0.0003 +[2026-02-27 18:41:58] (step=0006216) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.2162003521815692, LR: 0.0003 +[2026-02-27 18:42:05] (step=0006217) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.2163960086088828, LR: 0.0003 +[2026-02-27 18:42:13] (step=0006218) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.2165916650361965, LR: 0.0003 +[2026-02-27 18:42:21] (step=0006219) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.21678732146351, LR: 0.0003 +[2026-02-27 18:42:29] (step=0006220) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 1.2169829778908237, LR: 0.0003 +[2026-02-27 18:42:37] (step=0006221) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.2171786343181374, LR: 0.0003 +[2026-02-27 18:42:45] (step=0006222) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 1.217374290745451, LR: 0.0003 +[2026-02-27 18:42:52] (step=0006223) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.2175699471727646, LR: 0.0003 +[2026-02-27 18:43:00] (step=0006224) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.2177656036000784, LR: 0.0003 +[2026-02-27 18:43:08] (step=0006225) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.217961260027392, LR: 0.0003 +[2026-02-27 18:43:16] (step=0006226) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.2181569164547055, LR: 0.0003 +[2026-02-27 18:43:24] (step=0006227) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 1.2183525728820193, LR: 0.0003 +[2026-02-27 18:43:32] (step=0006228) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.2185482293093328, LR: 0.0003 +[2026-02-27 18:43:39] (step=0006229) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 1.2187438857366464, LR: 0.0003 +[2026-02-27 18:43:47] (step=0006230) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.2189395421639602, LR: 0.0003 +[2026-02-27 18:43:55] (step=0006231) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.2191351985912737, LR: 0.0003 +[2026-02-27 18:44:03] (step=0006232) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.2193308550185873, LR: 0.0003 +[2026-02-27 18:44:11] (step=0006233) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.219526511445901, LR: 0.0003 +[2026-02-27 18:44:19] (step=0006234) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.2197221678732146, LR: 0.0003 +[2026-02-27 18:44:26] (step=0006235) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 1.2199178243005282, LR: 0.0003 +[2026-02-27 18:44:34] (step=0006236) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 1.220113480727842, LR: 0.0003 +[2026-02-27 18:44:42] (step=0006237) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 1.2203091371551555, LR: 0.0003 +[2026-02-27 18:44:50] (step=0006238) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.220504793582469, LR: 0.0003 +[2026-02-27 18:44:58] (step=0006239) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.220700450009783, LR: 0.0003 +[2026-02-27 18:45:06] (step=0006240) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.2208961064370965, LR: 0.0003 +[2026-02-27 18:45:14] (step=0006241) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.22109176286441, LR: 0.0003 +[2026-02-27 18:45:22] (step=0006242) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.2212874192917238, LR: 0.0003 +[2026-02-27 18:45:29] (step=0006243) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.2214830757190374, LR: 0.0003 +[2026-02-27 18:45:37] (step=0006244) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 1.221678732146351, LR: 0.0003 +[2026-02-27 18:45:45] (step=0006245) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.2218743885736647, LR: 0.0003 +[2026-02-27 18:45:53] (step=0006246) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.2220700450009783, LR: 0.0003 +[2026-02-27 18:46:01] (step=0006247) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.2222657014282918, LR: 0.0003 +[2026-02-27 18:46:09] (step=0006248) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 1.2224613578556056, LR: 0.0003 +[2026-02-27 18:46:16] (step=0006249) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.2226570142829192, LR: 0.0003 +[2026-02-27 18:46:24] (step=0006250) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.2228526707102327, LR: 0.0003 +[2026-02-27 18:46:32] (step=0006251) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 1.2230483271375465, LR: 0.0003 +[2026-02-27 18:46:40] (step=0006252) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.22324398356486, LR: 0.0003 +[2026-02-27 18:46:48] (step=0006253) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.2234396399921736, LR: 0.0003 +[2026-02-27 18:46:56] (step=0006254) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 1.2236352964194874, LR: 0.0003 +[2026-02-27 18:47:04] (step=0006255) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 1.223830952846801, LR: 0.0003 +[2026-02-27 18:47:11] (step=0006256) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 1.2240266092741146, LR: 0.0003 +[2026-02-27 18:47:19] (step=0006257) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 1.2242222657014283, LR: 0.0003 +[2026-02-27 18:47:27] (step=0006258) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.224417922128742, LR: 0.0003 +[2026-02-27 18:47:35] (step=0006259) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.2246135785560555, LR: 0.0003 +[2026-02-27 18:47:43] (step=0006260) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.2248092349833692, LR: 0.0003 +[2026-02-27 18:47:51] (step=0006261) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.2250048914106828, LR: 0.0003 +[2026-02-27 18:47:59] (step=0006262) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 1.2252005478379964, LR: 0.0003 +[2026-02-27 18:48:06] (step=0006263) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.2253962042653102, LR: 0.0003 +[2026-02-27 18:48:14] (step=0006264) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.2255918606926237, LR: 0.0003 +[2026-02-27 18:48:22] (step=0006265) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 1.2257875171199375, LR: 0.0003 +[2026-02-27 18:48:30] (step=0006266) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.225983173547251, LR: 0.0003 +[2026-02-27 18:48:38] (step=0006267) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.2261788299745646, LR: 0.0003 +[2026-02-27 18:48:46] (step=0006268) Train Loss: 0.4717, Train Steps/Sec: 0.13, Epoch: 1.2263744864018784, LR: 0.0003 +[2026-02-27 18:48:53] (step=0006269) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 1.226570142829192, LR: 0.0003 +[2026-02-27 18:49:01] (step=0006270) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 1.2267657992565055, LR: 0.0003 +[2026-02-27 18:49:09] (step=0006271) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.2269614556838193, LR: 0.0003 +[2026-02-27 18:49:17] (step=0006272) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.2271571121111329, LR: 0.0003 +[2026-02-27 18:49:25] (step=0006273) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.2273527685384464, LR: 0.0003 +[2026-02-27 18:49:33] (step=0006274) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.2275484249657602, LR: 0.0003 +[2026-02-27 18:49:40] (step=0006275) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 1.2277440813930738, LR: 0.0003 +[2026-02-27 18:49:48] (step=0006276) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.2279397378203873, LR: 0.0003 +[2026-02-27 18:49:56] (step=0006277) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.2281353942477011, LR: 0.0003 +[2026-02-27 18:50:04] (step=0006278) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 1.2283310506750147, LR: 0.0003 +[2026-02-27 18:50:12] (step=0006279) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.2285267071023283, LR: 0.0003 +[2026-02-27 18:50:20] (step=0006280) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 1.228722363529642, LR: 0.0003 +[2026-02-27 18:50:28] (step=0006281) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.2289180199569556, LR: 0.0003 +[2026-02-27 18:50:35] (step=0006282) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 1.2291136763842692, LR: 0.0003 +[2026-02-27 18:50:43] (step=0006283) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 1.229309332811583, LR: 0.0003 +[2026-02-27 18:50:51] (step=0006284) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 1.2295049892388965, LR: 0.0003 +[2026-02-27 18:50:59] (step=0006285) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 1.22970064566621, LR: 0.0003 +[2026-02-27 18:51:07] (step=0006286) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.2298963020935239, LR: 0.0003 +[2026-02-27 18:51:15] (step=0006287) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 1.2300919585208374, LR: 0.0003 +[2026-02-27 18:51:23] (step=0006288) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.230287614948151, LR: 0.0003 +[2026-02-27 18:51:30] (step=0006289) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 1.2304832713754648, LR: 0.0003 +[2026-02-27 18:51:38] (step=0006290) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.2306789278027783, LR: 0.0003 +[2026-02-27 18:51:46] (step=0006291) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.2308745842300919, LR: 0.0003 +[2026-02-27 18:51:54] (step=0006292) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.2310702406574057, LR: 0.0003 +[2026-02-27 18:52:02] (step=0006293) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 1.2312658970847192, LR: 0.0003 +[2026-02-27 18:52:10] (step=0006294) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 1.2314615535120328, LR: 0.0003 +[2026-02-27 18:52:17] (step=0006295) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.2316572099393466, LR: 0.0003 +[2026-02-27 18:52:25] (step=0006296) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 1.2318528663666601, LR: 0.0003 +[2026-02-27 18:52:33] (step=0006297) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 1.2320485227939737, LR: 0.0003 +[2026-02-27 18:52:41] (step=0006298) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.2322441792212875, LR: 0.0003 +[2026-02-27 18:52:49] (step=0006299) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.232439835648601, LR: 0.0003 +[2026-02-27 18:52:57] (step=0006300) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.2326354920759146, LR: 0.0003 +[2026-02-27 18:53:05] (step=0006301) Train Loss: 0.4693, Train Steps/Sec: 0.13, Epoch: 1.2328311485032284, LR: 0.0003 +[2026-02-27 18:53:12] (step=0006302) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 1.233026804930542, LR: 0.0003 +[2026-02-27 18:53:20] (step=0006303) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.2332224613578555, LR: 0.0003 +[2026-02-27 18:53:28] (step=0006304) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.2334181177851693, LR: 0.0003 +[2026-02-27 18:53:36] (step=0006305) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.2336137742124829, LR: 0.0003 +[2026-02-27 18:53:44] (step=0006306) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.2338094306397964, LR: 0.0003 +[2026-02-27 18:53:52] (step=0006307) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 1.2340050870671102, LR: 0.0003 +[2026-02-27 18:54:00] (step=0006308) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.2342007434944238, LR: 0.0003 +[2026-02-27 18:54:07] (step=0006309) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 1.2343963999217373, LR: 0.0003 +[2026-02-27 18:54:15] (step=0006310) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.2345920563490511, LR: 0.0003 +[2026-02-27 18:54:23] (step=0006311) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.2347877127763647, LR: 0.0003 +[2026-02-27 18:54:31] (step=0006312) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.2349833692036782, LR: 0.0003 +[2026-02-27 18:54:39] (step=0006313) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.235179025630992, LR: 0.0003 +[2026-02-27 18:54:47] (step=0006314) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.2353746820583056, LR: 0.0003 +[2026-02-27 18:54:54] (step=0006315) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 1.2355703384856191, LR: 0.0003 +[2026-02-27 18:55:02] (step=0006316) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 1.235765994912933, LR: 0.0003 +[2026-02-27 18:55:10] (step=0006317) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.2359616513402465, LR: 0.0003 +[2026-02-27 18:55:18] (step=0006318) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.23615730776756, LR: 0.0003 +[2026-02-27 18:55:26] (step=0006319) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.2363529641948738, LR: 0.0003 +[2026-02-27 18:55:34] (step=0006320) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.2365486206221874, LR: 0.0003 +[2026-02-27 18:55:41] (step=0006321) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 1.2367442770495012, LR: 0.0003 +[2026-02-27 18:55:49] (step=0006322) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.2369399334768147, LR: 0.0003 +[2026-02-27 18:55:57] (step=0006323) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 1.2371355899041283, LR: 0.0003 +[2026-02-27 18:56:05] (step=0006324) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 1.237331246331442, LR: 0.0003 +[2026-02-27 18:56:13] (step=0006325) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.2375269027587557, LR: 0.0003 +[2026-02-27 18:56:21] (step=0006326) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.2377225591860692, LR: 0.0003 +[2026-02-27 18:56:29] (step=0006327) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.237918215613383, LR: 0.0003 +[2026-02-27 18:56:36] (step=0006328) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 1.2381138720406966, LR: 0.0003 +[2026-02-27 18:56:44] (step=0006329) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.2383095284680101, LR: 0.0003 +[2026-02-27 18:56:52] (step=0006330) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.238505184895324, LR: 0.0003 +[2026-02-27 18:57:00] (step=0006331) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 1.2387008413226375, LR: 0.0003 +[2026-02-27 18:57:08] (step=0006332) Train Loss: 0.4474, Train Steps/Sec: 0.12, Epoch: 1.238896497749951, LR: 0.0003 +[2026-02-27 18:57:16] (step=0006333) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.2390921541772648, LR: 0.0003 +[2026-02-27 18:57:24] (step=0006334) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.2392878106045784, LR: 0.0003 +[2026-02-27 18:57:31] (step=0006335) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.239483467031892, LR: 0.0003 +[2026-02-27 18:57:39] (step=0006336) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.2396791234592057, LR: 0.0003 +[2026-02-27 18:57:47] (step=0006337) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 1.2398747798865193, LR: 0.0003 +[2026-02-27 18:57:55] (step=0006338) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 1.2400704363138328, LR: 0.0003 +[2026-02-27 18:58:03] (step=0006339) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 1.2402660927411466, LR: 0.0003 +[2026-02-27 18:58:11] (step=0006340) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.2404617491684602, LR: 0.0003 +[2026-02-27 18:58:19] (step=0006341) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.2406574055957738, LR: 0.0003 +[2026-02-27 18:58:26] (step=0006342) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.2408530620230875, LR: 0.0003 +[2026-02-27 18:58:34] (step=0006343) Train Loss: 0.4728, Train Steps/Sec: 0.13, Epoch: 1.241048718450401, LR: 0.0003 +[2026-02-27 18:58:42] (step=0006344) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.2412443748777147, LR: 0.0003 +[2026-02-27 18:58:50] (step=0006345) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.2414400313050284, LR: 0.0003 +[2026-02-27 18:58:58] (step=0006346) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 1.241635687732342, LR: 0.0003 +[2026-02-27 18:59:06] (step=0006347) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.2418313441596556, LR: 0.0003 +[2026-02-27 18:59:14] (step=0006348) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 1.2420270005869694, LR: 0.0003 +[2026-02-27 18:59:21] (step=0006349) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 1.242222657014283, LR: 0.0003 +[2026-02-27 18:59:29] (step=0006350) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.2424183134415965, LR: 0.0003 +[2026-02-27 18:59:37] (step=0006351) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 1.2426139698689103, LR: 0.0003 +[2026-02-27 18:59:45] (step=0006352) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.2428096262962238, LR: 0.0003 +[2026-02-27 18:59:53] (step=0006353) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 1.2430052827235374, LR: 0.0003 +[2026-02-27 19:00:01] (step=0006354) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.2432009391508512, LR: 0.0003 +[2026-02-27 19:00:09] (step=0006355) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.2433965955781647, LR: 0.0003 +[2026-02-27 19:00:16] (step=0006356) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 1.2435922520054783, LR: 0.0003 +[2026-02-27 19:00:24] (step=0006357) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.243787908432792, LR: 0.0003 +[2026-02-27 19:00:32] (step=0006358) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 1.2439835648601056, LR: 0.0003 +[2026-02-27 19:00:40] (step=0006359) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 1.2441792212874192, LR: 0.0003 +[2026-02-27 19:00:48] (step=0006360) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 1.244374877714733, LR: 0.0003 +[2026-02-27 19:00:56] (step=0006361) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 1.2445705341420465, LR: 0.0003 +[2026-02-27 19:01:03] (step=0006362) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 1.24476619056936, LR: 0.0003 +[2026-02-27 19:01:11] (step=0006363) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.244961846996674, LR: 0.0003 +[2026-02-27 19:01:19] (step=0006364) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.2451575034239875, LR: 0.0003 +[2026-02-27 19:01:27] (step=0006365) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.245353159851301, LR: 0.0003 +[2026-02-27 19:01:35] (step=0006366) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.2455488162786148, LR: 0.0003 +[2026-02-27 19:01:43] (step=0006367) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.2457444727059284, LR: 0.0003 +[2026-02-27 19:01:50] (step=0006368) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.245940129133242, LR: 0.0003 +[2026-02-27 19:01:58] (step=0006369) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 1.2461357855605557, LR: 0.0003 +[2026-02-27 19:02:06] (step=0006370) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 1.2463314419878693, LR: 0.0003 +[2026-02-27 19:02:14] (step=0006371) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.2465270984151828, LR: 0.0003 +[2026-02-27 19:02:22] (step=0006372) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 1.2467227548424966, LR: 0.0003 +[2026-02-27 19:02:30] (step=0006373) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.2469184112698102, LR: 0.0003 +[2026-02-27 19:02:38] (step=0006374) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.2471140676971237, LR: 0.0003 +[2026-02-27 19:02:45] (step=0006375) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 1.2473097241244375, LR: 0.0003 +[2026-02-27 19:02:53] (step=0006376) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 1.247505380551751, LR: 0.0003 +[2026-02-27 19:03:01] (step=0006377) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.2477010369790649, LR: 0.0003 +[2026-02-27 19:03:09] (step=0006378) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.2478966934063784, LR: 0.0003 +[2026-02-27 19:03:17] (step=0006379) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.248092349833692, LR: 0.0003 +[2026-02-27 19:03:25] (step=0006380) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.2482880062610058, LR: 0.0003 +[2026-02-27 19:03:33] (step=0006381) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.2484836626883193, LR: 0.0003 +[2026-02-27 19:03:40] (step=0006382) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.248679319115633, LR: 0.0003 +[2026-02-27 19:03:48] (step=0006383) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.2488749755429467, LR: 0.0003 +[2026-02-27 19:03:56] (step=0006384) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.2490706319702602, LR: 0.0003 +[2026-02-27 19:04:04] (step=0006385) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.2492662883975738, LR: 0.0003 +[2026-02-27 19:04:12] (step=0006386) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 1.2494619448248876, LR: 0.0003 +[2026-02-27 19:04:20] (step=0006387) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 1.2496576012522012, LR: 0.0003 +[2026-02-27 19:04:27] (step=0006388) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.2498532576795147, LR: 0.0003 +[2026-02-27 19:04:35] (step=0006389) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 1.2500489141068285, LR: 0.0003 +[2026-02-27 19:04:43] (step=0006390) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.250244570534142, LR: 0.0003 +[2026-02-27 19:04:51] (step=0006391) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 1.2504402269614556, LR: 0.0003 +[2026-02-27 19:04:59] (step=0006392) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.2506358833887694, LR: 0.0003 +[2026-02-27 19:05:07] (step=0006393) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 1.250831539816083, LR: 0.0003 +[2026-02-27 19:05:15] (step=0006394) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 1.2510271962433965, LR: 0.0003 +[2026-02-27 19:05:23] (step=0006395) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 1.2512228526707103, LR: 0.0003 +[2026-02-27 19:05:30] (step=0006396) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.2514185090980239, LR: 0.0003 +[2026-02-27 19:05:38] (step=0006397) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.2516141655253374, LR: 0.0003 +[2026-02-27 19:05:46] (step=0006398) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.2518098219526512, LR: 0.0003 +[2026-02-27 19:05:54] (step=0006399) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.2520054783799648, LR: 0.0003 +[2026-02-27 19:06:02] (step=0006400) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 1.2522011348072783, LR: 0.0003 +[2026-02-27 19:06:10] (step=0006401) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 1.2523967912345921, LR: 0.0003 +[2026-02-27 19:06:17] (step=0006402) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.2525924476619057, LR: 0.0003 +[2026-02-27 19:06:25] (step=0006403) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.2527881040892193, LR: 0.0003 +[2026-02-27 19:06:33] (step=0006404) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.252983760516533, LR: 0.0003 +[2026-02-27 19:06:41] (step=0006405) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.2531794169438466, LR: 0.0003 +[2026-02-27 19:06:49] (step=0006406) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.2533750733711602, LR: 0.0003 +[2026-02-27 19:06:57] (step=0006407) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.253570729798474, LR: 0.0003 +[2026-02-27 19:07:04] (step=0006408) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.2537663862257875, LR: 0.0003 +[2026-02-27 19:07:12] (step=0006409) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.253962042653101, LR: 0.0003 +[2026-02-27 19:07:20] (step=0006410) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.2541576990804149, LR: 0.0003 +[2026-02-27 19:07:28] (step=0006411) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.2543533555077284, LR: 0.0003 +[2026-02-27 19:07:36] (step=0006412) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.254549011935042, LR: 0.0003 +[2026-02-27 19:07:44] (step=0006413) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 1.2547446683623558, LR: 0.0003 +[2026-02-27 19:07:51] (step=0006414) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 1.2549403247896693, LR: 0.0003 +[2026-02-27 19:07:59] (step=0006415) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.2551359812169829, LR: 0.0003 +[2026-02-27 19:08:07] (step=0006416) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.2553316376442967, LR: 0.0003 +[2026-02-27 19:08:15] (step=0006417) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 1.2555272940716102, LR: 0.0003 +[2026-02-27 19:08:23] (step=0006418) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 1.2557229504989238, LR: 0.0003 +[2026-02-27 19:08:31] (step=0006419) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.2559186069262376, LR: 0.0003 +[2026-02-27 19:08:39] (step=0006420) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 1.2561142633535511, LR: 0.0003 +[2026-02-27 19:08:46] (step=0006421) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 1.2563099197808647, LR: 0.0003 +[2026-02-27 19:08:54] (step=0006422) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.2565055762081785, LR: 0.0003 +[2026-02-27 19:09:02] (step=0006423) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.256701232635492, LR: 0.0003 +[2026-02-27 19:09:10] (step=0006424) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 1.2568968890628056, LR: 0.0003 +[2026-02-27 19:09:18] (step=0006425) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.2570925454901194, LR: 0.0003 +[2026-02-27 19:09:26] (step=0006426) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.257288201917433, LR: 0.0003 +[2026-02-27 19:09:33] (step=0006427) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 1.2574838583447465, LR: 0.0003 +[2026-02-27 19:09:41] (step=0006428) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 1.2576795147720603, LR: 0.0003 +[2026-02-27 19:09:49] (step=0006429) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 1.2578751711993739, LR: 0.0003 +[2026-02-27 19:09:57] (step=0006430) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.2580708276266874, LR: 0.0003 +[2026-02-27 19:10:05] (step=0006431) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.2582664840540012, LR: 0.0003 +[2026-02-27 19:10:13] (step=0006432) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.2584621404813148, LR: 0.0003 +[2026-02-27 19:10:21] (step=0006433) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.2586577969086286, LR: 0.0003 +[2026-02-27 19:10:28] (step=0006434) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.2588534533359421, LR: 0.0003 +[2026-02-27 19:10:36] (step=0006435) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.2590491097632557, LR: 0.0003 +[2026-02-27 19:10:44] (step=0006436) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.2592447661905695, LR: 0.0003 +[2026-02-27 19:10:52] (step=0006437) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.259440422617883, LR: 0.0003 +[2026-02-27 19:11:00] (step=0006438) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.2596360790451966, LR: 0.0003 +[2026-02-27 19:11:08] (step=0006439) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 1.2598317354725104, LR: 0.0003 +[2026-02-27 19:11:15] (step=0006440) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.260027391899824, LR: 0.0003 +[2026-02-27 19:11:23] (step=0006441) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.2602230483271375, LR: 0.0003 +[2026-02-27 19:11:31] (step=0006442) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.2604187047544513, LR: 0.0003 +[2026-02-27 19:11:39] (step=0006443) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.2606143611817648, LR: 0.0003 +[2026-02-27 19:11:47] (step=0006444) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 1.2608100176090784, LR: 0.0003 +[2026-02-27 19:11:55] (step=0006445) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.2610056740363922, LR: 0.0003 +[2026-02-27 19:12:03] (step=0006446) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.2612013304637057, LR: 0.0003 +[2026-02-27 19:12:11] (step=0006447) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.2613969868910193, LR: 0.0003 +[2026-02-27 19:12:18] (step=0006448) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 1.261592643318333, LR: 0.0003 +[2026-02-27 19:12:26] (step=0006449) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.2617882997456467, LR: 0.0003 +[2026-02-27 19:12:34] (step=0006450) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.2619839561729602, LR: 0.0003 +[2026-02-27 19:12:42] (step=0006451) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.262179612600274, LR: 0.0003 +[2026-02-27 19:12:50] (step=0006452) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 1.2623752690275876, LR: 0.0003 +[2026-02-27 19:12:58] (step=0006453) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.2625709254549011, LR: 0.0003 +[2026-02-27 19:13:05] (step=0006454) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.262766581882215, LR: 0.0003 +[2026-02-27 19:13:13] (step=0006455) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 1.2629622383095285, LR: 0.0003 +[2026-02-27 19:13:21] (step=0006456) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.263157894736842, LR: 0.0003 +[2026-02-27 19:13:29] (step=0006457) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 1.2633535511641558, LR: 0.0003 +[2026-02-27 19:13:37] (step=0006458) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.2635492075914694, LR: 0.0003 +[2026-02-27 19:13:45] (step=0006459) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.263744864018783, LR: 0.0003 +[2026-02-27 19:13:52] (step=0006460) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 1.2639405204460967, LR: 0.0003 +[2026-02-27 19:14:00] (step=0006461) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.2641361768734103, LR: 0.0003 +[2026-02-27 19:14:08] (step=0006462) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.2643318333007239, LR: 0.0003 +[2026-02-27 19:14:16] (step=0006463) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.2645274897280376, LR: 0.0003 +[2026-02-27 19:14:24] (step=0006464) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 1.2647231461553512, LR: 0.0003 +[2026-02-27 19:14:32] (step=0006465) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.2649188025826648, LR: 0.0003 +[2026-02-27 19:14:40] (step=0006466) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 1.2651144590099785, LR: 0.0003 +[2026-02-27 19:14:47] (step=0006467) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 1.265310115437292, LR: 0.0003 +[2026-02-27 19:14:55] (step=0006468) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.2655057718646057, LR: 0.0003 +[2026-02-27 19:15:03] (step=0006469) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 1.2657014282919195, LR: 0.0003 +[2026-02-27 19:15:11] (step=0006470) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 1.265897084719233, LR: 0.0003 +[2026-02-27 19:15:19] (step=0006471) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 1.2660927411465466, LR: 0.0003 +[2026-02-27 19:15:27] (step=0006472) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 1.2662883975738604, LR: 0.0003 +[2026-02-27 19:15:34] (step=0006473) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 1.266484054001174, LR: 0.0003 +[2026-02-27 19:15:42] (step=0006474) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 1.2666797104284875, LR: 0.0003 +[2026-02-27 19:15:50] (step=0006475) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.2668753668558013, LR: 0.0003 +[2026-02-27 19:15:58] (step=0006476) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.2670710232831148, LR: 0.0003 +[2026-02-27 19:16:06] (step=0006477) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.2672666797104284, LR: 0.0003 +[2026-02-27 19:16:14] (step=0006478) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.2674623361377422, LR: 0.0003 +[2026-02-27 19:16:21] (step=0006479) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 1.2676579925650557, LR: 0.0003 +[2026-02-27 19:16:29] (step=0006480) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.2678536489923693, LR: 0.0003 +[2026-02-27 19:16:37] (step=0006481) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.268049305419683, LR: 0.0003 +[2026-02-27 19:16:45] (step=0006482) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.2682449618469966, LR: 0.0003 +[2026-02-27 19:16:53] (step=0006483) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.2684406182743102, LR: 0.0003 +[2026-02-27 19:17:01] (step=0006484) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 1.268636274701624, LR: 0.0003 +[2026-02-27 19:17:09] (step=0006485) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.2688319311289376, LR: 0.0003 +[2026-02-27 19:17:16] (step=0006486) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.2690275875562511, LR: 0.0003 +[2026-02-27 19:17:24] (step=0006487) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 1.269223243983565, LR: 0.0003 +[2026-02-27 19:17:32] (step=0006488) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.2694189004108785, LR: 0.0003 +[2026-02-27 19:17:40] (step=0006489) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.2696145568381922, LR: 0.0003 +[2026-02-27 19:17:48] (step=0006490) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 1.2698102132655058, LR: 0.0003 +[2026-02-27 19:17:56] (step=0006491) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.2700058696928194, LR: 0.0003 +[2026-02-27 19:18:04] (step=0006492) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 1.2702015261201332, LR: 0.0003 +[2026-02-27 19:18:12] (step=0006493) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 1.2703971825474467, LR: 0.0003 +[2026-02-27 19:18:19] (step=0006494) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 1.2705928389747603, LR: 0.0003 +[2026-02-27 19:18:27] (step=0006495) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.270788495402074, LR: 0.0003 +[2026-02-27 19:18:35] (step=0006496) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.2709841518293876, LR: 0.0003 +[2026-02-27 19:18:43] (step=0006497) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.2711798082567012, LR: 0.0003 +[2026-02-27 19:18:51] (step=0006498) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.271375464684015, LR: 0.0003 +[2026-02-27 19:18:59] (step=0006499) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.2715711211113285, LR: 0.0003 +[2026-02-27 19:19:06] (step=0006500) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 1.271766777538642, LR: 0.0003 +[2026-02-27 19:19:06] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0006500/ +[2026-02-27 19:19:14] (step=0006501) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 1.2719624339659559, LR: 0.0003 +[2026-02-27 19:19:22] (step=0006502) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.2721580903932694, LR: 0.0003 +[2026-02-27 19:19:30] (step=0006503) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.272353746820583, LR: 0.0003 +[2026-02-27 19:19:38] (step=0006504) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.2725494032478968, LR: 0.0003 +[2026-02-27 19:19:46] (step=0006505) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.2727450596752103, LR: 0.0003 +[2026-02-27 19:19:54] (step=0006506) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.272940716102524, LR: 0.0003 +[2026-02-27 19:20:01] (step=0006507) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.2731363725298377, LR: 0.0003 +[2026-02-27 19:20:09] (step=0006508) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.2733320289571513, LR: 0.0003 +[2026-02-27 19:20:17] (step=0006509) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 1.2735276853844648, LR: 0.0003 +[2026-02-27 19:20:25] (step=0006510) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 1.2737233418117786, LR: 0.0003 +[2026-02-27 19:20:33] (step=0006511) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 1.2739189982390922, LR: 0.0003 +[2026-02-27 19:20:41] (step=0006512) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 1.2741146546664057, LR: 0.0003 +[2026-02-27 19:20:48] (step=0006513) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.2743103110937195, LR: 0.0003 +[2026-02-27 19:20:56] (step=0006514) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.274505967521033, LR: 0.0003 +[2026-02-27 19:21:04] (step=0006515) Train Loss: 0.4718, Train Steps/Sec: 0.13, Epoch: 1.2747016239483466, LR: 0.0003 +[2026-02-27 19:21:12] (step=0006516) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 1.2748972803756604, LR: 0.0003 +[2026-02-27 19:21:20] (step=0006517) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 1.275092936802974, LR: 0.0003 +[2026-02-27 19:21:28] (step=0006518) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 1.2752885932302875, LR: 0.0003 +[2026-02-27 19:21:35] (step=0006519) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.2754842496576013, LR: 0.0003 +[2026-02-27 19:21:43] (step=0006520) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.2756799060849149, LR: 0.0003 +[2026-02-27 19:21:51] (step=0006521) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.2758755625122284, LR: 0.0003 +[2026-02-27 19:21:59] (step=0006522) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.2760712189395422, LR: 0.0003 +[2026-02-27 19:22:07] (step=0006523) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.2762668753668558, LR: 0.0003 +[2026-02-27 19:22:15] (step=0006524) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.2764625317941694, LR: 0.0003 +[2026-02-27 19:22:22] (step=0006525) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 1.2766581882214831, LR: 0.0003 +[2026-02-27 19:22:30] (step=0006526) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.2768538446487967, LR: 0.0003 +[2026-02-27 19:22:38] (step=0006527) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 1.2770495010761103, LR: 0.0003 +[2026-02-27 19:22:46] (step=0006528) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.277245157503424, LR: 0.0003 +[2026-02-27 19:22:54] (step=0006529) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.2774408139307376, LR: 0.0003 +[2026-02-27 19:23:02] (step=0006530) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.2776364703580512, LR: 0.0003 +[2026-02-27 19:23:10] (step=0006531) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 1.277832126785365, LR: 0.0003 +[2026-02-27 19:23:17] (step=0006532) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.2780277832126785, LR: 0.0003 +[2026-02-27 19:23:25] (step=0006533) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.278223439639992, LR: 0.0003 +[2026-02-27 19:23:33] (step=0006534) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.2784190960673059, LR: 0.0003 +[2026-02-27 19:23:41] (step=0006535) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 1.2786147524946194, LR: 0.0003 +[2026-02-27 19:23:49] (step=0006536) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.278810408921933, LR: 0.0003 +[2026-02-27 19:23:57] (step=0006537) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.2790060653492468, LR: 0.0003 +[2026-02-27 19:24:05] (step=0006538) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.2792017217765603, LR: 0.0003 +[2026-02-27 19:24:12] (step=0006539) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.279397378203874, LR: 0.0003 +[2026-02-27 19:24:20] (step=0006540) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 1.2795930346311877, LR: 0.0003 +[2026-02-27 19:24:28] (step=0006541) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.2797886910585012, LR: 0.0003 +[2026-02-27 19:24:36] (step=0006542) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 1.2799843474858148, LR: 0.0003 +[2026-02-27 19:24:44] (step=0006543) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 1.2801800039131286, LR: 0.0003 +[2026-02-27 19:24:52] (step=0006544) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 1.2803756603404421, LR: 0.0003 +[2026-02-27 19:25:00] (step=0006545) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.280571316767756, LR: 0.0003 +[2026-02-27 19:25:07] (step=0006546) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 1.2807669731950695, LR: 0.0003 +[2026-02-27 19:25:15] (step=0006547) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.280962629622383, LR: 0.0003 +[2026-02-27 19:25:23] (step=0006548) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.2811582860496968, LR: 0.0003 +[2026-02-27 19:25:31] (step=0006549) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 1.2813539424770104, LR: 0.0003 +[2026-02-27 19:25:39] (step=0006550) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 1.281549598904324, LR: 0.0003 +[2026-02-27 19:25:47] (step=0006551) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.2817452553316377, LR: 0.0003 +[2026-02-27 19:25:55] (step=0006552) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.2819409117589513, LR: 0.0003 +[2026-02-27 19:26:02] (step=0006553) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.2821365681862649, LR: 0.0003 +[2026-02-27 19:26:10] (step=0006554) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.2823322246135787, LR: 0.0003 +[2026-02-27 19:26:18] (step=0006555) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.2825278810408922, LR: 0.0003 +[2026-02-27 19:26:26] (step=0006556) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 1.2827235374682058, LR: 0.0003 +[2026-02-27 19:26:34] (step=0006557) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.2829191938955196, LR: 0.0003 +[2026-02-27 19:26:42] (step=0006558) Train Loss: 0.4729, Train Steps/Sec: 0.13, Epoch: 1.2831148503228331, LR: 0.0003 +[2026-02-27 19:26:49] (step=0006559) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 1.2833105067501467, LR: 0.0003 +[2026-02-27 19:26:57] (step=0006560) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.2835061631774605, LR: 0.0003 +[2026-02-27 19:27:05] (step=0006561) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.283701819604774, LR: 0.0003 +[2026-02-27 19:27:13] (step=0006562) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.2838974760320876, LR: 0.0003 +[2026-02-27 19:27:21] (step=0006563) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.2840931324594014, LR: 0.0003 +[2026-02-27 19:27:29] (step=0006564) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 1.284288788886715, LR: 0.0003 +[2026-02-27 19:27:36] (step=0006565) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 1.2844844453140285, LR: 0.0003 +[2026-02-27 19:27:44] (step=0006566) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 1.2846801017413423, LR: 0.0003 +[2026-02-27 19:27:52] (step=0006567) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.2848757581686558, LR: 0.0003 +[2026-02-27 19:28:00] (step=0006568) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.2850714145959694, LR: 0.0003 +[2026-02-27 19:28:08] (step=0006569) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 1.2852670710232832, LR: 0.0003 +[2026-02-27 19:28:16] (step=0006570) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.2854627274505968, LR: 0.0003 +[2026-02-27 19:28:24] (step=0006571) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.2856583838779103, LR: 0.0003 +[2026-02-27 19:28:31] (step=0006572) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.285854040305224, LR: 0.0003 +[2026-02-27 19:28:39] (step=0006573) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.2860496967325377, LR: 0.0003 +[2026-02-27 19:28:47] (step=0006574) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.2862453531598512, LR: 0.0003 +[2026-02-27 19:28:55] (step=0006575) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 1.286441009587165, LR: 0.0003 +[2026-02-27 19:29:03] (step=0006576) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 1.2866366660144786, LR: 0.0003 +[2026-02-27 19:29:11] (step=0006577) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.2868323224417921, LR: 0.0003 +[2026-02-27 19:29:19] (step=0006578) Train Loss: 0.4549, Train Steps/Sec: 0.12, Epoch: 1.287027978869106, LR: 0.0003 +[2026-02-27 19:29:26] (step=0006579) Train Loss: 0.4782, Train Steps/Sec: 0.13, Epoch: 1.2872236352964195, LR: 0.0003 +[2026-02-27 19:29:34] (step=0006580) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.287419291723733, LR: 0.0003 +[2026-02-27 19:29:42] (step=0006581) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.2876149481510468, LR: 0.0003 +[2026-02-27 19:29:50] (step=0006582) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.2878106045783604, LR: 0.0003 +[2026-02-27 19:29:58] (step=0006583) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.288006261005674, LR: 0.0003 +[2026-02-27 19:30:06] (step=0006584) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.2882019174329877, LR: 0.0003 +[2026-02-27 19:30:13] (step=0006585) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.2883975738603013, LR: 0.0003 +[2026-02-27 19:30:21] (step=0006586) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 1.2885932302876149, LR: 0.0003 +[2026-02-27 19:30:29] (step=0006587) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 1.2887888867149286, LR: 0.0003 +[2026-02-27 19:30:37] (step=0006588) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 1.2889845431422422, LR: 0.0003 +[2026-02-27 19:30:45] (step=0006589) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 1.2891801995695558, LR: 0.0003 +[2026-02-27 19:30:53] (step=0006590) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.2893758559968695, LR: 0.0003 +[2026-02-27 19:31:01] (step=0006591) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.289571512424183, LR: 0.0003 +[2026-02-27 19:31:08] (step=0006592) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 1.2897671688514967, LR: 0.0003 +[2026-02-27 19:31:16] (step=0006593) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.2899628252788105, LR: 0.0003 +[2026-02-27 19:31:24] (step=0006594) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.290158481706124, LR: 0.0003 +[2026-02-27 19:31:32] (step=0006595) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 1.2903541381334376, LR: 0.0003 +[2026-02-27 19:31:40] (step=0006596) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 1.2905497945607514, LR: 0.0003 +[2026-02-27 19:31:48] (step=0006597) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.290745450988065, LR: 0.0003 +[2026-02-27 19:31:56] (step=0006598) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 1.2909411074153785, LR: 0.0003 +[2026-02-27 19:32:03] (step=0006599) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.2911367638426923, LR: 0.0003 +[2026-02-27 19:32:11] (step=0006600) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 1.2913324202700058, LR: 0.0003 +[2026-02-27 19:32:19] (step=0006601) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 1.2915280766973196, LR: 0.0003 +[2026-02-27 19:32:27] (step=0006602) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.2917237331246332, LR: 0.0003 +[2026-02-27 19:32:35] (step=0006603) Train Loss: 0.4695, Train Steps/Sec: 0.13, Epoch: 1.2919193895519467, LR: 0.0003 +[2026-02-27 19:32:43] (step=0006604) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 1.2921150459792605, LR: 0.0003 +[2026-02-27 19:32:50] (step=0006605) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.292310702406574, LR: 0.0003 +[2026-02-27 19:32:58] (step=0006606) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.2925063588338876, LR: 0.0003 +[2026-02-27 19:33:06] (step=0006607) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 1.2927020152612014, LR: 0.0003 +[2026-02-27 19:33:14] (step=0006608) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 1.292897671688515, LR: 0.0003 +[2026-02-27 19:33:22] (step=0006609) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.2930933281158286, LR: 0.0003 +[2026-02-27 19:33:30] (step=0006610) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.2932889845431423, LR: 0.0003 +[2026-02-27 19:33:37] (step=0006611) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.293484640970456, LR: 0.0003 +[2026-02-27 19:33:45] (step=0006612) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 1.2936802973977695, LR: 0.0003 +[2026-02-27 19:33:53] (step=0006613) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.2938759538250832, LR: 0.0003 +[2026-02-27 19:34:01] (step=0006614) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.2940716102523968, LR: 0.0003 +[2026-02-27 19:34:09] (step=0006615) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.2942672666797104, LR: 0.0003 +[2026-02-27 19:34:17] (step=0006616) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 1.2944629231070242, LR: 0.0003 +[2026-02-27 19:34:24] (step=0006617) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.2946585795343377, LR: 0.0003 +[2026-02-27 19:34:32] (step=0006618) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 1.2948542359616513, LR: 0.0003 +[2026-02-27 19:34:40] (step=0006619) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.295049892388965, LR: 0.0003 +[2026-02-27 19:34:48] (step=0006620) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.2952455488162786, LR: 0.0003 +[2026-02-27 19:34:56] (step=0006621) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 1.2954412052435922, LR: 0.0003 +[2026-02-27 19:35:04] (step=0006622) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.295636861670906, LR: 0.0003 +[2026-02-27 19:35:12] (step=0006623) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.2958325180982195, LR: 0.0003 +[2026-02-27 19:35:19] (step=0006624) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.296028174525533, LR: 0.0003 +[2026-02-27 19:35:27] (step=0006625) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 1.2962238309528469, LR: 0.0003 +[2026-02-27 19:35:35] (step=0006626) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.2964194873801604, LR: 0.0003 +[2026-02-27 19:35:43] (step=0006627) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.296615143807474, LR: 0.0003 +[2026-02-27 19:35:51] (step=0006628) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.2968108002347878, LR: 0.0003 +[2026-02-27 19:35:59] (step=0006629) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.2970064566621013, LR: 0.0003 +[2026-02-27 19:36:07] (step=0006630) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 1.297202113089415, LR: 0.0003 +[2026-02-27 19:36:14] (step=0006631) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.2973977695167287, LR: 0.0003 +[2026-02-27 19:36:22] (step=0006632) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.2975934259440423, LR: 0.0003 +[2026-02-27 19:36:30] (step=0006633) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.2977890823713558, LR: 0.0003 +[2026-02-27 19:36:38] (step=0006634) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 1.2979847387986696, LR: 0.0003 +[2026-02-27 19:36:46] (step=0006635) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.2981803952259832, LR: 0.0003 +[2026-02-27 19:36:54] (step=0006636) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.2983760516532967, LR: 0.0003 +[2026-02-27 19:37:02] (step=0006637) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.2985717080806105, LR: 0.0003 +[2026-02-27 19:37:10] (step=0006638) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 1.298767364507924, LR: 0.0003 +[2026-02-27 19:37:17] (step=0006639) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.2989630209352376, LR: 0.0003 +[2026-02-27 19:37:25] (step=0006640) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 1.2991586773625514, LR: 0.0003 +[2026-02-27 19:37:33] (step=0006641) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.299354333789865, LR: 0.0003 +[2026-02-27 19:37:41] (step=0006642) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.2995499902171785, LR: 0.0003 +[2026-02-27 19:37:49] (step=0006643) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 1.2997456466444923, LR: 0.0003 +[2026-02-27 19:37:57] (step=0006644) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 1.2999413030718059, LR: 0.0003 +[2026-02-27 19:38:04] (step=0006645) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.3001369594991194, LR: 0.0003 +[2026-02-27 19:38:12] (step=0006646) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.3003326159264332, LR: 0.0003 +[2026-02-27 19:38:20] (step=0006647) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.3005282723537468, LR: 0.0003 +[2026-02-27 19:38:28] (step=0006648) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 1.3007239287810604, LR: 0.0003 +[2026-02-27 19:38:36] (step=0006649) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 1.3009195852083741, LR: 0.0003 +[2026-02-27 19:38:44] (step=0006650) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.3011152416356877, LR: 0.0003 +[2026-02-27 19:38:52] (step=0006651) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 1.3013108980630013, LR: 0.0003 +[2026-02-27 19:38:59] (step=0006652) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 1.301506554490315, LR: 0.0003 +[2026-02-27 19:39:07] (step=0006653) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 1.3017022109176286, LR: 0.0003 +[2026-02-27 19:39:15] (step=0006654) Train Loss: 0.4693, Train Steps/Sec: 0.13, Epoch: 1.3018978673449422, LR: 0.0003 +[2026-02-27 19:39:23] (step=0006655) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.302093523772256, LR: 0.0003 +[2026-02-27 19:39:31] (step=0006656) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.3022891801995695, LR: 0.0003 +[2026-02-27 19:39:39] (step=0006657) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.3024848366268833, LR: 0.0003 +[2026-02-27 19:39:46] (step=0006658) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.3026804930541969, LR: 0.0003 +[2026-02-27 19:39:54] (step=0006659) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.3028761494815104, LR: 0.0003 +[2026-02-27 19:40:02] (step=0006660) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.3030718059088242, LR: 0.0003 +[2026-02-27 19:40:10] (step=0006661) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 1.3032674623361378, LR: 0.0003 +[2026-02-27 19:40:18] (step=0006662) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 1.3034631187634513, LR: 0.0003 +[2026-02-27 19:40:26] (step=0006663) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.3036587751907651, LR: 0.0003 +[2026-02-27 19:40:33] (step=0006664) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 1.3038544316180787, LR: 0.0003 +[2026-02-27 19:40:41] (step=0006665) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.3040500880453922, LR: 0.0003 +[2026-02-27 19:40:49] (step=0006666) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 1.304245744472706, LR: 0.0003 +[2026-02-27 19:40:57] (step=0006667) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 1.3044414009000196, LR: 0.0003 +[2026-02-27 19:41:05] (step=0006668) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 1.3046370573273331, LR: 0.0003 +[2026-02-27 19:41:13] (step=0006669) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.304832713754647, LR: 0.0003 +[2026-02-27 19:41:21] (step=0006670) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.3050283701819605, LR: 0.0003 +[2026-02-27 19:41:28] (step=0006671) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.305224026609274, LR: 0.0003 +[2026-02-27 19:41:36] (step=0006672) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.3054196830365878, LR: 0.0003 +[2026-02-27 19:41:44] (step=0006673) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 1.3056153394639014, LR: 0.0003 +[2026-02-27 19:41:52] (step=0006674) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.305810995891215, LR: 0.0003 +[2026-02-27 19:42:00] (step=0006675) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.3060066523185287, LR: 0.0003 +[2026-02-27 19:42:08] (step=0006676) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.3062023087458423, LR: 0.0003 +[2026-02-27 19:42:16] (step=0006677) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.3063979651731559, LR: 0.0003 +[2026-02-27 19:42:23] (step=0006678) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.3065936216004697, LR: 0.0003 +[2026-02-27 19:42:31] (step=0006679) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.3067892780277832, LR: 0.0003 +[2026-02-27 19:42:39] (step=0006680) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.3069849344550968, LR: 0.0003 +[2026-02-27 19:42:47] (step=0006681) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 1.3071805908824106, LR: 0.0003 +[2026-02-27 19:42:55] (step=0006682) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.3073762473097241, LR: 0.0003 +[2026-02-27 19:43:03] (step=0006683) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.3075719037370377, LR: 0.0003 +[2026-02-27 19:43:10] (step=0006684) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.3077675601643515, LR: 0.0003 +[2026-02-27 19:43:18] (step=0006685) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.307963216591665, LR: 0.0003 +[2026-02-27 19:43:26] (step=0006686) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.3081588730189786, LR: 0.0003 +[2026-02-27 19:43:34] (step=0006687) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.3083545294462924, LR: 0.0003 +[2026-02-27 19:43:42] (step=0006688) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 1.308550185873606, LR: 0.0003 +[2026-02-27 19:43:50] (step=0006689) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 1.3087458423009195, LR: 0.0003 +[2026-02-27 19:43:58] (step=0006690) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.3089414987282333, LR: 0.0003 +[2026-02-27 19:44:06] (step=0006691) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.3091371551555468, LR: 0.0003 +[2026-02-27 19:44:13] (step=0006692) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.3093328115828604, LR: 0.0003 +[2026-02-27 19:44:21] (step=0006693) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.3095284680101742, LR: 0.0003 +[2026-02-27 19:44:29] (step=0006694) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 1.3097241244374878, LR: 0.0003 +[2026-02-27 19:44:37] (step=0006695) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 1.3099197808648013, LR: 0.0003 +[2026-02-27 19:44:45] (step=0006696) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.310115437292115, LR: 0.0003 +[2026-02-27 19:44:53] (step=0006697) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.3103110937194287, LR: 0.0003 +[2026-02-27 19:45:00] (step=0006698) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.3105067501467422, LR: 0.0003 +[2026-02-27 19:45:08] (step=0006699) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.310702406574056, LR: 0.0003 +[2026-02-27 19:45:16] (step=0006700) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.3108980630013696, LR: 0.0003 +[2026-02-27 19:45:24] (step=0006701) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 1.3110937194286831, LR: 0.0003 +[2026-02-27 19:45:32] (step=0006702) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.311289375855997, LR: 0.0003 +[2026-02-27 19:45:40] (step=0006703) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 1.3114850322833105, LR: 0.0003 +[2026-02-27 19:45:47] (step=0006704) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.311680688710624, LR: 0.0003 +[2026-02-27 19:45:55] (step=0006705) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.3118763451379378, LR: 0.0003 +[2026-02-27 19:46:03] (step=0006706) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.3120720015652514, LR: 0.0003 +[2026-02-27 19:46:11] (step=0006707) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.312267657992565, LR: 0.0003 +[2026-02-27 19:46:19] (step=0006708) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 1.3124633144198787, LR: 0.0003 +[2026-02-27 19:46:27] (step=0006709) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.3126589708471923, LR: 0.0003 +[2026-02-27 19:46:35] (step=0006710) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 1.3128546272745059, LR: 0.0003 +[2026-02-27 19:46:42] (step=0006711) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.3130502837018196, LR: 0.0003 +[2026-02-27 19:46:50] (step=0006712) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.3132459401291332, LR: 0.0003 +[2026-02-27 19:46:58] (step=0006713) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 1.313441596556447, LR: 0.0003 +[2026-02-27 19:47:06] (step=0006714) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 1.3136372529837606, LR: 0.0003 +[2026-02-27 19:47:14] (step=0006715) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 1.3138329094110741, LR: 0.0003 +[2026-02-27 19:47:22] (step=0006716) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.314028565838388, LR: 0.0003 +[2026-02-27 19:47:29] (step=0006717) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.3142242222657015, LR: 0.0003 +[2026-02-27 19:47:37] (step=0006718) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.314419878693015, LR: 0.0003 +[2026-02-27 19:47:45] (step=0006719) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.3146155351203288, LR: 0.0003 +[2026-02-27 19:47:53] (step=0006720) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.3148111915476424, LR: 0.0003 +[2026-02-27 19:48:01] (step=0006721) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 1.315006847974956, LR: 0.0003 +[2026-02-27 19:48:09] (step=0006722) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 1.3152025044022697, LR: 0.0003 +[2026-02-27 19:48:16] (step=0006723) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.3153981608295833, LR: 0.0003 +[2026-02-27 19:48:24] (step=0006724) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 1.3155938172568968, LR: 0.0003 +[2026-02-27 19:48:32] (step=0006725) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 1.3157894736842106, LR: 0.0003 +[2026-02-27 19:48:40] (step=0006726) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 1.3159851301115242, LR: 0.0003 +[2026-02-27 19:48:48] (step=0006727) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 1.3161807865388377, LR: 0.0003 +[2026-02-27 19:48:56] (step=0006728) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 1.3163764429661515, LR: 0.0003 +[2026-02-27 19:49:04] (step=0006729) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.316572099393465, LR: 0.0003 +[2026-02-27 19:49:12] (step=0006730) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.3167677558207787, LR: 0.0003 +[2026-02-27 19:49:19] (step=0006731) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.3169634122480924, LR: 0.0003 +[2026-02-27 19:49:27] (step=0006732) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.317159068675406, LR: 0.0003 +[2026-02-27 19:49:35] (step=0006733) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.3173547251027196, LR: 0.0003 +[2026-02-27 19:49:43] (step=0006734) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 1.3175503815300333, LR: 0.0003 +[2026-02-27 19:49:51] (step=0006735) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.317746037957347, LR: 0.0003 +[2026-02-27 19:49:59] (step=0006736) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.3179416943846605, LR: 0.0003 +[2026-02-27 19:50:06] (step=0006737) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.3181373508119743, LR: 0.0003 +[2026-02-27 19:50:14] (step=0006738) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.3183330072392878, LR: 0.0003 +[2026-02-27 19:50:22] (step=0006739) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 1.3185286636666014, LR: 0.0003 +[2026-02-27 19:50:30] (step=0006740) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.3187243200939152, LR: 0.0003 +[2026-02-27 19:50:38] (step=0006741) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.3189199765212287, LR: 0.0003 +[2026-02-27 19:50:46] (step=0006742) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.3191156329485423, LR: 0.0003 +[2026-02-27 19:50:54] (step=0006743) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.319311289375856, LR: 0.0003 +[2026-02-27 19:51:01] (step=0006744) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 1.3195069458031696, LR: 0.0003 +[2026-02-27 19:51:09] (step=0006745) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 1.3197026022304832, LR: 0.0003 +[2026-02-27 19:51:17] (step=0006746) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.319898258657797, LR: 0.0003 +[2026-02-27 19:51:25] (step=0006747) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.3200939150851105, LR: 0.0003 +[2026-02-27 19:51:33] (step=0006748) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 1.320289571512424, LR: 0.0003 +[2026-02-27 19:51:41] (step=0006749) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.3204852279397379, LR: 0.0003 +[2026-02-27 19:51:49] (step=0006750) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 1.3206808843670514, LR: 0.0003 +[2026-02-27 19:51:56] (step=0006751) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.320876540794365, LR: 0.0003 +[2026-02-27 19:52:04] (step=0006752) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 1.3210721972216788, LR: 0.0003 +[2026-02-27 19:52:12] (step=0006753) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 1.3212678536489924, LR: 0.0003 +[2026-02-27 19:52:20] (step=0006754) Train Loss: 0.4693, Train Steps/Sec: 0.13, Epoch: 1.321463510076306, LR: 0.0003 +[2026-02-27 19:52:28] (step=0006755) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 1.3216591665036197, LR: 0.0003 +[2026-02-27 19:52:36] (step=0006756) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.3218548229309333, LR: 0.0003 +[2026-02-27 19:52:43] (step=0006757) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 1.3220504793582468, LR: 0.0003 +[2026-02-27 19:52:51] (step=0006758) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 1.3222461357855606, LR: 0.0003 +[2026-02-27 19:52:59] (step=0006759) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.3224417922128742, LR: 0.0003 +[2026-02-27 19:53:07] (step=0006760) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.3226374486401877, LR: 0.0003 +[2026-02-27 19:53:15] (step=0006761) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 1.3228331050675015, LR: 0.0003 +[2026-02-27 19:53:23] (step=0006762) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.323028761494815, LR: 0.0003 +[2026-02-27 19:53:30] (step=0006763) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 1.3232244179221286, LR: 0.0003 +[2026-02-27 19:53:38] (step=0006764) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.3234200743494424, LR: 0.0003 +[2026-02-27 19:53:46] (step=0006765) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.323615730776756, LR: 0.0003 +[2026-02-27 19:53:54] (step=0006766) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.3238113872040695, LR: 0.0003 +[2026-02-27 19:54:02] (step=0006767) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.3240070436313833, LR: 0.0003 +[2026-02-27 19:54:10] (step=0006768) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.324202700058697, LR: 0.0003 +[2026-02-27 19:54:17] (step=0006769) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.3243983564860107, LR: 0.0003 +[2026-02-27 19:54:25] (step=0006770) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.3245940129133242, LR: 0.0003 +[2026-02-27 19:54:33] (step=0006771) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.3247896693406378, LR: 0.0003 +[2026-02-27 19:54:41] (step=0006772) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.3249853257679516, LR: 0.0003 +[2026-02-27 19:54:49] (step=0006773) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.3251809821952651, LR: 0.0003 +[2026-02-27 19:54:57] (step=0006774) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.3253766386225787, LR: 0.0003 +[2026-02-27 19:55:05] (step=0006775) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.3255722950498925, LR: 0.0003 +[2026-02-27 19:55:13] (step=0006776) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.325767951477206, LR: 0.0003 +[2026-02-27 19:55:20] (step=0006777) Train Loss: 0.4760, Train Steps/Sec: 0.13, Epoch: 1.3259636079045196, LR: 0.0003 +[2026-02-27 19:55:28] (step=0006778) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 1.3261592643318334, LR: 0.0003 +[2026-02-27 19:55:36] (step=0006779) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 1.326354920759147, LR: 0.0003 +[2026-02-27 19:55:44] (step=0006780) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.3265505771864605, LR: 0.0003 +[2026-02-27 19:55:52] (step=0006781) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.3267462336137743, LR: 0.0003 +[2026-02-27 19:56:00] (step=0006782) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 1.3269418900410879, LR: 0.0003 +[2026-02-27 19:56:07] (step=0006783) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.3271375464684014, LR: 0.0003 +[2026-02-27 19:56:15] (step=0006784) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 1.3273332028957152, LR: 0.0003 +[2026-02-27 19:56:23] (step=0006785) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.3275288593230288, LR: 0.0003 +[2026-02-27 19:56:31] (step=0006786) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.3277245157503423, LR: 0.0003 +[2026-02-27 19:56:39] (step=0006787) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.3279201721776561, LR: 0.0003 +[2026-02-27 19:56:47] (step=0006788) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 1.3281158286049697, LR: 0.0003 +[2026-02-27 19:56:55] (step=0006789) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 1.3283114850322832, LR: 0.0003 +[2026-02-27 19:57:02] (step=0006790) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.328507141459597, LR: 0.0003 +[2026-02-27 19:57:10] (step=0006791) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.3287027978869106, LR: 0.0003 +[2026-02-27 19:57:18] (step=0006792) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 1.3288984543142242, LR: 0.0003 +[2026-02-27 19:57:26] (step=0006793) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 1.329094110741538, LR: 0.0003 +[2026-02-27 19:57:34] (step=0006794) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 1.3292897671688515, LR: 0.0003 +[2026-02-27 19:57:42] (step=0006795) Train Loss: 0.4599, Train Steps/Sec: 0.12, Epoch: 1.329485423596165, LR: 0.0003 +[2026-02-27 19:57:50] (step=0006796) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.3296810800234788, LR: 0.0003 +[2026-02-27 19:57:57] (step=0006797) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.3298767364507924, LR: 0.0003 +[2026-02-27 19:58:05] (step=0006798) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.330072392878106, LR: 0.0003 +[2026-02-27 19:58:13] (step=0006799) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.3302680493054198, LR: 0.0003 +[2026-02-27 19:58:21] (step=0006800) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.3304637057327333, LR: 0.0003 +[2026-02-27 19:58:29] (step=0006801) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.3306593621600469, LR: 0.0003 +[2026-02-27 19:58:37] (step=0006802) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.3308550185873607, LR: 0.0003 +[2026-02-27 19:58:44] (step=0006803) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 1.3310506750146742, LR: 0.0003 +[2026-02-27 19:58:52] (step=0006804) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 1.3312463314419878, LR: 0.0003 +[2026-02-27 19:59:00] (step=0006805) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 1.3314419878693016, LR: 0.0003 +[2026-02-27 19:59:08] (step=0006806) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.3316376442966151, LR: 0.0003 +[2026-02-27 19:59:16] (step=0006807) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.3318333007239287, LR: 0.0003 +[2026-02-27 19:59:24] (step=0006808) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.3320289571512425, LR: 0.0003 +[2026-02-27 19:59:32] (step=0006809) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.332224613578556, LR: 0.0003 +[2026-02-27 19:59:39] (step=0006810) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 1.3324202700058696, LR: 0.0003 +[2026-02-27 19:59:47] (step=0006811) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 1.3326159264331834, LR: 0.0003 +[2026-02-27 19:59:55] (step=0006812) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.332811582860497, LR: 0.0003 +[2026-02-27 20:00:03] (step=0006813) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.3330072392878105, LR: 0.0003 +[2026-02-27 20:00:11] (step=0006814) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.3332028957151243, LR: 0.0003 +[2026-02-27 20:00:19] (step=0006815) Train Loss: 0.4720, Train Steps/Sec: 0.13, Epoch: 1.3333985521424379, LR: 0.0003 +[2026-02-27 20:00:26] (step=0006816) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 1.3335942085697514, LR: 0.0003 +[2026-02-27 20:00:34] (step=0006817) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 1.3337898649970652, LR: 0.0003 +[2026-02-27 20:00:42] (step=0006818) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 1.3339855214243788, LR: 0.0003 +[2026-02-27 20:00:50] (step=0006819) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.3341811778516923, LR: 0.0003 +[2026-02-27 20:00:58] (step=0006820) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.334376834279006, LR: 0.0003 +[2026-02-27 20:01:06] (step=0006821) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 1.3345724907063197, LR: 0.0003 +[2026-02-27 20:01:14] (step=0006822) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 1.3347681471336332, LR: 0.0003 +[2026-02-27 20:01:21] (step=0006823) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.334963803560947, LR: 0.0003 +[2026-02-27 20:01:29] (step=0006824) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 1.3351594599882606, LR: 0.0003 +[2026-02-27 20:01:37] (step=0006825) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.3353551164155744, LR: 0.0003 +[2026-02-27 20:01:45] (step=0006826) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 1.335550772842888, LR: 0.0003 +[2026-02-27 20:01:53] (step=0006827) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.3357464292702015, LR: 0.0003 +[2026-02-27 20:02:01] (step=0006828) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.3359420856975153, LR: 0.0003 +[2026-02-27 20:02:09] (step=0006829) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.3361377421248288, LR: 0.0003 +[2026-02-27 20:02:16] (step=0006830) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 1.3363333985521424, LR: 0.0003 +[2026-02-27 20:02:24] (step=0006831) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.3365290549794562, LR: 0.0003 +[2026-02-27 20:02:32] (step=0006832) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.3367247114067697, LR: 0.0003 +[2026-02-27 20:02:40] (step=0006833) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.3369203678340833, LR: 0.0003 +[2026-02-27 20:02:48] (step=0006834) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.337116024261397, LR: 0.0003 +[2026-02-27 20:02:56] (step=0006835) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 1.3373116806887106, LR: 0.0003 +[2026-02-27 20:03:03] (step=0006836) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.3375073371160242, LR: 0.0003 +[2026-02-27 20:03:11] (step=0006837) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.337702993543338, LR: 0.0003 +[2026-02-27 20:03:19] (step=0006838) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 1.3378986499706516, LR: 0.0003 +[2026-02-27 20:03:27] (step=0006839) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.3380943063979651, LR: 0.0003 +[2026-02-27 20:03:35] (step=0006840) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.338289962825279, LR: 0.0003 +[2026-02-27 20:03:43] (step=0006841) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.3384856192525925, LR: 0.0003 +[2026-02-27 20:03:51] (step=0006842) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 1.338681275679906, LR: 0.0003 +[2026-02-27 20:03:58] (step=0006843) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 1.3388769321072198, LR: 0.0003 +[2026-02-27 20:04:06] (step=0006844) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 1.3390725885345334, LR: 0.0003 +[2026-02-27 20:04:14] (step=0006845) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.339268244961847, LR: 0.0003 +[2026-02-27 20:04:22] (step=0006846) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 1.3394639013891607, LR: 0.0003 +[2026-02-27 20:04:30] (step=0006847) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.3396595578164743, LR: 0.0003 +[2026-02-27 20:04:38] (step=0006848) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.3398552142437878, LR: 0.0003 +[2026-02-27 20:04:46] (step=0006849) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.3400508706711016, LR: 0.0003 +[2026-02-27 20:04:53] (step=0006850) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.3402465270984152, LR: 0.0003 +[2026-02-27 20:05:01] (step=0006851) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.3404421835257287, LR: 0.0003 +[2026-02-27 20:05:09] (step=0006852) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 1.3406378399530425, LR: 0.0003 +[2026-02-27 20:05:17] (step=0006853) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.340833496380356, LR: 0.0003 +[2026-02-27 20:05:25] (step=0006854) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 1.3410291528076697, LR: 0.0003 +[2026-02-27 20:05:33] (step=0006855) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 1.3412248092349834, LR: 0.0003 +[2026-02-27 20:05:40] (step=0006856) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.341420465662297, LR: 0.0003 +[2026-02-27 20:05:48] (step=0006857) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 1.3416161220896106, LR: 0.0003 +[2026-02-27 20:05:56] (step=0006858) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 1.3418117785169243, LR: 0.0003 +[2026-02-27 20:06:04] (step=0006859) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 1.342007434944238, LR: 0.0003 +[2026-02-27 20:06:12] (step=0006860) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.3422030913715515, LR: 0.0003 +[2026-02-27 20:06:20] (step=0006861) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.3423987477988653, LR: 0.0003 +[2026-02-27 20:06:27] (step=0006862) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.3425944042261788, LR: 0.0003 +[2026-02-27 20:06:35] (step=0006863) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.3427900606534924, LR: 0.0003 +[2026-02-27 20:06:43] (step=0006864) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 1.3429857170808062, LR: 0.0003 +[2026-02-27 20:06:51] (step=0006865) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.3431813735081197, LR: 0.0003 +[2026-02-27 20:06:59] (step=0006866) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 1.3433770299354333, LR: 0.0003 +[2026-02-27 20:07:07] (step=0006867) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.343572686362747, LR: 0.0003 +[2026-02-27 20:07:15] (step=0006868) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 1.3437683427900606, LR: 0.0003 +[2026-02-27 20:07:22] (step=0006869) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.3439639992173742, LR: 0.0003 +[2026-02-27 20:07:30] (step=0006870) Train Loss: 0.4575, Train Steps/Sec: 0.12, Epoch: 1.344159655644688, LR: 0.0003 +[2026-02-27 20:07:38] (step=0006871) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.3443553120720015, LR: 0.0003 +[2026-02-27 20:07:46] (step=0006872) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.344550968499315, LR: 0.0003 +[2026-02-27 20:07:54] (step=0006873) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 1.3447466249266289, LR: 0.0003 +[2026-02-27 20:08:02] (step=0006874) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.3449422813539424, LR: 0.0003 +[2026-02-27 20:08:10] (step=0006875) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 1.345137937781256, LR: 0.0003 +[2026-02-27 20:08:17] (step=0006876) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.3453335942085698, LR: 0.0003 +[2026-02-27 20:08:25] (step=0006877) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.3455292506358834, LR: 0.0003 +[2026-02-27 20:08:33] (step=0006878) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.345724907063197, LR: 0.0003 +[2026-02-27 20:08:41] (step=0006879) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.3459205634905107, LR: 0.0003 +[2026-02-27 20:08:49] (step=0006880) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.3461162199178243, LR: 0.0003 +[2026-02-27 20:08:57] (step=0006881) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.346311876345138, LR: 0.0003 +[2026-02-27 20:09:04] (step=0006882) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 1.3465075327724516, LR: 0.0003 +[2026-02-27 20:09:12] (step=0006883) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.3467031891997652, LR: 0.0003 +[2026-02-27 20:09:20] (step=0006884) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.346898845627079, LR: 0.0003 +[2026-02-27 20:09:28] (step=0006885) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.3470945020543925, LR: 0.0003 +[2026-02-27 20:09:36] (step=0006886) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 1.347290158481706, LR: 0.0003 +[2026-02-27 20:09:44] (step=0006887) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.3474858149090199, LR: 0.0003 +[2026-02-27 20:09:52] (step=0006888) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.3476814713363334, LR: 0.0003 +[2026-02-27 20:09:59] (step=0006889) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 1.347877127763647, LR: 0.0003 +[2026-02-27 20:10:07] (step=0006890) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 1.3480727841909608, LR: 0.0003 +[2026-02-27 20:10:15] (step=0006891) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 1.3482684406182743, LR: 0.0003 +[2026-02-27 20:10:23] (step=0006892) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.348464097045588, LR: 0.0003 +[2026-02-27 20:10:31] (step=0006893) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.3486597534729017, LR: 0.0003 +[2026-02-27 20:10:39] (step=0006894) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.3488554099002152, LR: 0.0003 +[2026-02-27 20:10:47] (step=0006895) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.3490510663275288, LR: 0.0003 +[2026-02-27 20:10:54] (step=0006896) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.3492467227548426, LR: 0.0003 +[2026-02-27 20:11:02] (step=0006897) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.3494423791821561, LR: 0.0003 +[2026-02-27 20:11:10] (step=0006898) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.3496380356094697, LR: 0.0003 +[2026-02-27 20:11:18] (step=0006899) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.3498336920367835, LR: 0.0003 +[2026-02-27 20:11:26] (step=0006900) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.350029348464097, LR: 0.0003 +[2026-02-27 20:11:34] (step=0006901) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.3502250048914106, LR: 0.0003 +[2026-02-27 20:11:41] (step=0006902) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.3504206613187244, LR: 0.0003 +[2026-02-27 20:11:49] (step=0006903) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 1.350616317746038, LR: 0.0003 +[2026-02-27 20:11:57] (step=0006904) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.3508119741733515, LR: 0.0003 +[2026-02-27 20:12:05] (step=0006905) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.3510076306006653, LR: 0.0003 +[2026-02-27 20:12:13] (step=0006906) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.3512032870279789, LR: 0.0003 +[2026-02-27 20:12:21] (step=0006907) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 1.3513989434552924, LR: 0.0003 +[2026-02-27 20:12:28] (step=0006908) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.3515945998826062, LR: 0.0003 +[2026-02-27 20:12:36] (step=0006909) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.3517902563099198, LR: 0.0003 +[2026-02-27 20:12:44] (step=0006910) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 1.3519859127372333, LR: 0.0003 +[2026-02-27 20:12:52] (step=0006911) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.3521815691645471, LR: 0.0003 +[2026-02-27 20:13:00] (step=0006912) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.3523772255918607, LR: 0.0003 +[2026-02-27 20:13:08] (step=0006913) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.3525728820191742, LR: 0.0003 +[2026-02-27 20:13:16] (step=0006914) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.352768538446488, LR: 0.0003 +[2026-02-27 20:13:23] (step=0006915) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.3529641948738016, LR: 0.0003 +[2026-02-27 20:13:31] (step=0006916) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 1.3531598513011152, LR: 0.0003 +[2026-02-27 20:13:39] (step=0006917) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 1.353355507728429, LR: 0.0003 +[2026-02-27 20:13:47] (step=0006918) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.3535511641557425, LR: 0.0003 +[2026-02-27 20:13:55] (step=0006919) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.353746820583056, LR: 0.0003 +[2026-02-27 20:14:03] (step=0006920) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.3539424770103698, LR: 0.0003 +[2026-02-27 20:14:11] (step=0006921) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 1.3541381334376834, LR: 0.0003 +[2026-02-27 20:14:18] (step=0006922) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.354333789864997, LR: 0.0003 +[2026-02-27 20:14:26] (step=0006923) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.3545294462923108, LR: 0.0003 +[2026-02-27 20:14:34] (step=0006924) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.3547251027196243, LR: 0.0003 +[2026-02-27 20:14:42] (step=0006925) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 1.3549207591469379, LR: 0.0003 +[2026-02-27 20:14:50] (step=0006926) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 1.3551164155742517, LR: 0.0003 +[2026-02-27 20:14:58] (step=0006927) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.3553120720015652, LR: 0.0003 +[2026-02-27 20:15:05] (step=0006928) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.3555077284288788, LR: 0.0003 +[2026-02-27 20:15:13] (step=0006929) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.3557033848561926, LR: 0.0003 +[2026-02-27 20:15:21] (step=0006930) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.3558990412835061, LR: 0.0003 +[2026-02-27 20:15:29] (step=0006931) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.3560946977108197, LR: 0.0003 +[2026-02-27 20:15:37] (step=0006932) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 1.3562903541381335, LR: 0.0003 +[2026-02-27 20:15:45] (step=0006933) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.356486010565447, LR: 0.0003 +[2026-02-27 20:15:52] (step=0006934) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 1.3566816669927606, LR: 0.0003 +[2026-02-27 20:16:00] (step=0006935) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 1.3568773234200744, LR: 0.0003 +[2026-02-27 20:16:08] (step=0006936) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.357072979847388, LR: 0.0003 +[2026-02-27 20:16:16] (step=0006937) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.3572686362747017, LR: 0.0003 +[2026-02-27 20:16:24] (step=0006938) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 1.3574642927020153, LR: 0.0003 +[2026-02-27 20:16:32] (step=0006939) Train Loss: 0.4577, Train Steps/Sec: 0.12, Epoch: 1.3576599491293289, LR: 0.0003 +[2026-02-27 20:16:40] (step=0006940) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.3578556055566426, LR: 0.0003 +[2026-02-27 20:16:48] (step=0006941) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.3580512619839562, LR: 0.0003 +[2026-02-27 20:16:55] (step=0006942) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.3582469184112698, LR: 0.0003 +[2026-02-27 20:17:03] (step=0006943) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.3584425748385835, LR: 0.0003 +[2026-02-27 20:17:11] (step=0006944) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.3586382312658971, LR: 0.0003 +[2026-02-27 20:17:19] (step=0006945) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.3588338876932107, LR: 0.0003 +[2026-02-27 20:17:27] (step=0006946) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 1.3590295441205245, LR: 0.0003 +[2026-02-27 20:17:35] (step=0006947) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.359225200547838, LR: 0.0003 +[2026-02-27 20:17:42] (step=0006948) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.3594208569751516, LR: 0.0003 +[2026-02-27 20:17:50] (step=0006949) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 1.3596165134024654, LR: 0.0003 +[2026-02-27 20:17:58] (step=0006950) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.359812169829779, LR: 0.0003 +[2026-02-27 20:18:06] (step=0006951) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.3600078262570925, LR: 0.0003 +[2026-02-27 20:18:14] (step=0006952) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.3602034826844063, LR: 0.0003 +[2026-02-27 20:18:22] (step=0006953) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.3603991391117198, LR: 0.0003 +[2026-02-27 20:18:29] (step=0006954) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 1.3605947955390334, LR: 0.0003 +[2026-02-27 20:18:37] (step=0006955) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.3607904519663472, LR: 0.0003 +[2026-02-27 20:18:45] (step=0006956) Train Loss: 0.4734, Train Steps/Sec: 0.13, Epoch: 1.3609861083936607, LR: 0.0003 +[2026-02-27 20:18:53] (step=0006957) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 1.3611817648209743, LR: 0.0003 +[2026-02-27 20:19:01] (step=0006958) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 1.361377421248288, LR: 0.0003 +[2026-02-27 20:19:09] (step=0006959) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 1.3615730776756017, LR: 0.0003 +[2026-02-27 20:19:16] (step=0006960) Train Loss: 0.4742, Train Steps/Sec: 0.13, Epoch: 1.3617687341029152, LR: 0.0003 +[2026-02-27 20:19:24] (step=0006961) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.361964390530229, LR: 0.0003 +[2026-02-27 20:19:32] (step=0006962) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.3621600469575426, LR: 0.0003 +[2026-02-27 20:19:40] (step=0006963) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.3623557033848561, LR: 0.0003 +[2026-02-27 20:19:48] (step=0006964) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 1.36255135981217, LR: 0.0003 +[2026-02-27 20:19:56] (step=0006965) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 1.3627470162394835, LR: 0.0003 +[2026-02-27 20:20:04] (step=0006966) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.362942672666797, LR: 0.0003 +[2026-02-27 20:20:11] (step=0006967) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 1.3631383290941108, LR: 0.0003 +[2026-02-27 20:20:19] (step=0006968) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 1.3633339855214244, LR: 0.0003 +[2026-02-27 20:20:27] (step=0006969) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.363529641948738, LR: 0.0003 +[2026-02-27 20:20:35] (step=0006970) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.3637252983760517, LR: 0.0003 +[2026-02-27 20:20:43] (step=0006971) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.3639209548033653, LR: 0.0003 +[2026-02-27 20:20:51] (step=0006972) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.3641166112306788, LR: 0.0003 +[2026-02-27 20:20:59] (step=0006973) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 1.3643122676579926, LR: 0.0003 +[2026-02-27 20:21:06] (step=0006974) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 1.3645079240853062, LR: 0.0003 +[2026-02-27 20:21:14] (step=0006975) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 1.3647035805126198, LR: 0.0003 +[2026-02-27 20:21:22] (step=0006976) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.3648992369399335, LR: 0.0003 +[2026-02-27 20:21:30] (step=0006977) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 1.365094893367247, LR: 0.0003 +[2026-02-27 20:21:38] (step=0006978) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.3652905497945607, LR: 0.0003 +[2026-02-27 20:21:46] (step=0006979) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 1.3654862062218744, LR: 0.0003 +[2026-02-27 20:21:53] (step=0006980) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.365681862649188, LR: 0.0003 +[2026-02-27 20:22:01] (step=0006981) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 1.3658775190765016, LR: 0.0003 +[2026-02-27 20:22:09] (step=0006982) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 1.3660731755038154, LR: 0.0003 +[2026-02-27 20:22:17] (step=0006983) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.366268831931129, LR: 0.0003 +[2026-02-27 20:22:25] (step=0006984) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.3664644883584425, LR: 0.0003 +[2026-02-27 20:22:33] (step=0006985) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.3666601447857563, LR: 0.0003 +[2026-02-27 20:22:41] (step=0006986) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.3668558012130698, LR: 0.0003 +[2026-02-27 20:22:48] (step=0006987) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.3670514576403834, LR: 0.0003 +[2026-02-27 20:22:56] (step=0006988) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.3672471140676972, LR: 0.0003 +[2026-02-27 20:23:04] (step=0006989) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.3674427704950107, LR: 0.0003 +[2026-02-27 20:23:12] (step=0006990) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.3676384269223243, LR: 0.0003 +[2026-02-27 20:23:20] (step=0006991) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.367834083349638, LR: 0.0003 +[2026-02-27 20:23:28] (step=0006992) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 1.3680297397769516, LR: 0.0003 +[2026-02-27 20:23:36] (step=0006993) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.3682253962042654, LR: 0.0003 +[2026-02-27 20:23:43] (step=0006994) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 1.368421052631579, LR: 0.0003 +[2026-02-27 20:23:51] (step=0006995) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.3686167090588925, LR: 0.0003 +[2026-02-27 20:23:59] (step=0006996) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.3688123654862063, LR: 0.0003 +[2026-02-27 20:24:07] (step=0006997) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 1.36900802191352, LR: 0.0003 +[2026-02-27 20:24:15] (step=0006998) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.3692036783408335, LR: 0.0003 +[2026-02-27 20:24:23] (step=0006999) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.3693993347681472, LR: 0.0003 +[2026-02-27 20:24:30] (step=0007000) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 1.3695949911954608, LR: 0.0003 +[2026-02-27 20:24:30] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0007000/ +[2026-02-27 20:24:38] (step=0007001) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 1.3697906476227744, LR: 0.0003 +[2026-02-27 20:24:46] (step=0007002) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.3699863040500881, LR: 0.0003 +[2026-02-27 20:24:54] (step=0007003) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.3701819604774017, LR: 0.0003 +[2026-02-27 20:25:02] (step=0007004) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.3703776169047153, LR: 0.0003 +[2026-02-27 20:25:10] (step=0007005) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.370573273332029, LR: 0.0003 +[2026-02-27 20:25:17] (step=0007006) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 1.3707689297593426, LR: 0.0003 +[2026-02-27 20:25:25] (step=0007007) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 1.3709645861866562, LR: 0.0003 +[2026-02-27 20:25:33] (step=0007008) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 1.37116024261397, LR: 0.0003 +[2026-02-27 20:25:41] (step=0007009) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.3713558990412835, LR: 0.0003 +[2026-02-27 20:25:49] (step=0007010) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.371551555468597, LR: 0.0003 +[2026-02-27 20:25:57] (step=0007011) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.3717472118959109, LR: 0.0003 +[2026-02-27 20:26:04] (step=0007012) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 1.3719428683232244, LR: 0.0003 +[2026-02-27 20:26:12] (step=0007013) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 1.372138524750538, LR: 0.0003 +[2026-02-27 20:26:20] (step=0007014) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 1.3723341811778518, LR: 0.0003 +[2026-02-27 20:26:28] (step=0007015) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.3725298376051653, LR: 0.0003 +[2026-02-27 20:26:36] (step=0007016) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.372725494032479, LR: 0.0003 +[2026-02-27 20:26:44] (step=0007017) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 1.3729211504597927, LR: 0.0003 +[2026-02-27 20:26:52] (step=0007018) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 1.3731168068871062, LR: 0.0003 +[2026-02-27 20:27:00] (step=0007019) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 1.3733124633144198, LR: 0.0003 +[2026-02-27 20:27:07] (step=0007020) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.3735081197417336, LR: 0.0003 +[2026-02-27 20:27:15] (step=0007021) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.3737037761690472, LR: 0.0003 +[2026-02-27 20:27:23] (step=0007022) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 1.3738994325963607, LR: 0.0003 +[2026-02-27 20:27:31] (step=0007023) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.3740950890236745, LR: 0.0003 +[2026-02-27 20:27:39] (step=0007024) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 1.374290745450988, LR: 0.0003 +[2026-02-27 20:27:47] (step=0007025) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.3744864018783016, LR: 0.0003 +[2026-02-27 20:27:54] (step=0007026) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.3746820583056154, LR: 0.0003 +[2026-02-27 20:28:02] (step=0007027) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 1.374877714732929, LR: 0.0003 +[2026-02-27 20:28:10] (step=0007028) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.3750733711602425, LR: 0.0003 +[2026-02-27 20:28:18] (step=0007029) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 1.3752690275875563, LR: 0.0003 +[2026-02-27 20:28:26] (step=0007030) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 1.3754646840148699, LR: 0.0003 +[2026-02-27 20:28:34] (step=0007031) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.3756603404421834, LR: 0.0003 +[2026-02-27 20:28:41] (step=0007032) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.3758559968694972, LR: 0.0003 +[2026-02-27 20:28:49] (step=0007033) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 1.3760516532968108, LR: 0.0003 +[2026-02-27 20:28:57] (step=0007034) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 1.3762473097241243, LR: 0.0003 +[2026-02-27 20:29:05] (step=0007035) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.3764429661514381, LR: 0.0003 +[2026-02-27 20:29:13] (step=0007036) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.3766386225787517, LR: 0.0003 +[2026-02-27 20:29:21] (step=0007037) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.3768342790060653, LR: 0.0003 +[2026-02-27 20:29:29] (step=0007038) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 1.377029935433379, LR: 0.0003 +[2026-02-27 20:29:37] (step=0007039) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.3772255918606926, LR: 0.0003 +[2026-02-27 20:29:44] (step=0007040) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 1.3774212482880062, LR: 0.0003 +[2026-02-27 20:29:52] (step=0007041) Train Loss: 0.4740, Train Steps/Sec: 0.13, Epoch: 1.37761690471532, LR: 0.0003 +[2026-02-27 20:30:00] (step=0007042) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 1.3778125611426335, LR: 0.0003 +[2026-02-27 20:30:08] (step=0007043) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.378008217569947, LR: 0.0003 +[2026-02-27 20:30:16] (step=0007044) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 1.3782038739972609, LR: 0.0003 +[2026-02-27 20:30:24] (step=0007045) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.3783995304245744, LR: 0.0003 +[2026-02-27 20:30:31] (step=0007046) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 1.378595186851888, LR: 0.0003 +[2026-02-27 20:30:39] (step=0007047) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 1.3787908432792018, LR: 0.0003 +[2026-02-27 20:30:47] (step=0007048) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 1.3789864997065153, LR: 0.0003 +[2026-02-27 20:30:55] (step=0007049) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.379182156133829, LR: 0.0003 +[2026-02-27 20:31:03] (step=0007050) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.3793778125611427, LR: 0.0003 +[2026-02-27 20:31:11] (step=0007051) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.3795734689884562, LR: 0.0003 +[2026-02-27 20:31:18] (step=0007052) Train Loss: 0.4737, Train Steps/Sec: 0.13, Epoch: 1.37976912541577, LR: 0.0003 +[2026-02-27 20:31:26] (step=0007053) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.3799647818430836, LR: 0.0003 +[2026-02-27 20:31:34] (step=0007054) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.3801604382703971, LR: 0.0003 +[2026-02-27 20:31:42] (step=0007055) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.380356094697711, LR: 0.0003 +[2026-02-27 20:31:50] (step=0007056) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.3805517511250245, LR: 0.0003 +[2026-02-27 20:31:58] (step=0007057) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.380747407552338, LR: 0.0003 +[2026-02-27 20:32:06] (step=0007058) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 1.3809430639796518, LR: 0.0003 +[2026-02-27 20:32:13] (step=0007059) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 1.3811387204069654, LR: 0.0003 +[2026-02-27 20:32:21] (step=0007060) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 1.381334376834279, LR: 0.0003 +[2026-02-27 20:32:29] (step=0007061) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 1.3815300332615927, LR: 0.0003 +[2026-02-27 20:32:37] (step=0007062) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.3817256896889063, LR: 0.0003 +[2026-02-27 20:32:45] (step=0007063) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 1.3819213461162199, LR: 0.0003 +[2026-02-27 20:32:53] (step=0007064) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.3821170025435336, LR: 0.0003 +[2026-02-27 20:33:01] (step=0007065) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.3823126589708472, LR: 0.0003 +[2026-02-27 20:33:08] (step=0007066) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.3825083153981608, LR: 0.0003 +[2026-02-27 20:33:16] (step=0007067) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 1.3827039718254746, LR: 0.0003 +[2026-02-27 20:33:24] (step=0007068) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.3828996282527881, LR: 0.0003 +[2026-02-27 20:33:32] (step=0007069) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.3830952846801017, LR: 0.0003 +[2026-02-27 20:33:40] (step=0007070) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 1.3832909411074155, LR: 0.0003 +[2026-02-27 20:33:48] (step=0007071) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.383486597534729, LR: 0.0003 +[2026-02-27 20:33:55] (step=0007072) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.3836822539620426, LR: 0.0003 +[2026-02-27 20:34:03] (step=0007073) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 1.3838779103893564, LR: 0.0003 +[2026-02-27 20:34:11] (step=0007074) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 1.38407356681667, LR: 0.0003 +[2026-02-27 20:34:19] (step=0007075) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.3842692232439835, LR: 0.0003 +[2026-02-27 20:34:27] (step=0007076) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.3844648796712973, LR: 0.0003 +[2026-02-27 20:34:35] (step=0007077) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.3846605360986108, LR: 0.0003 +[2026-02-27 20:34:43] (step=0007078) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.3848561925259244, LR: 0.0003 +[2026-02-27 20:34:50] (step=0007079) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.3850518489532382, LR: 0.0003 +[2026-02-27 20:34:58] (step=0007080) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.3852475053805517, LR: 0.0003 +[2026-02-27 20:35:06] (step=0007081) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.3854431618078653, LR: 0.0003 +[2026-02-27 20:35:14] (step=0007082) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 1.385638818235179, LR: 0.0003 +[2026-02-27 20:35:22] (step=0007083) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 1.3858344746624927, LR: 0.0003 +[2026-02-27 20:35:30] (step=0007084) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.3860301310898062, LR: 0.0003 +[2026-02-27 20:35:37] (step=0007085) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.38622578751712, LR: 0.0003 +[2026-02-27 20:35:45] (step=0007086) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.3864214439444336, LR: 0.0003 +[2026-02-27 20:35:53] (step=0007087) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.3866171003717471, LR: 0.0003 +[2026-02-27 20:36:01] (step=0007088) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 1.386812756799061, LR: 0.0003 +[2026-02-27 20:36:09] (step=0007089) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.3870084132263745, LR: 0.0003 +[2026-02-27 20:36:17] (step=0007090) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 1.387204069653688, LR: 0.0003 +[2026-02-27 20:36:25] (step=0007091) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.3873997260810018, LR: 0.0003 +[2026-02-27 20:36:32] (step=0007092) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.3875953825083154, LR: 0.0003 +[2026-02-27 20:36:40] (step=0007093) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.387791038935629, LR: 0.0003 +[2026-02-27 20:36:48] (step=0007094) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.3879866953629427, LR: 0.0003 +[2026-02-27 20:36:56] (step=0007095) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.3881823517902563, LR: 0.0003 +[2026-02-27 20:37:04] (step=0007096) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.3883780082175698, LR: 0.0003 +[2026-02-27 20:37:12] (step=0007097) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.3885736646448836, LR: 0.0003 +[2026-02-27 20:37:19] (step=0007098) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.3887693210721972, LR: 0.0003 +[2026-02-27 20:37:27] (step=0007099) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.3889649774995108, LR: 0.0003 +[2026-02-27 20:37:35] (step=0007100) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 1.3891606339268245, LR: 0.0003 +[2026-02-27 20:37:43] (step=0007101) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.389356290354138, LR: 0.0003 +[2026-02-27 20:37:51] (step=0007102) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.3895519467814517, LR: 0.0003 +[2026-02-27 20:37:59] (step=0007103) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.3897476032087654, LR: 0.0003 +[2026-02-27 20:38:06] (step=0007104) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.389943259636079, LR: 0.0003 +[2026-02-27 20:38:14] (step=0007105) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.3901389160633928, LR: 0.0003 +[2026-02-27 20:38:22] (step=0007106) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 1.3903345724907064, LR: 0.0003 +[2026-02-27 20:38:30] (step=0007107) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.39053022891802, LR: 0.0003 +[2026-02-27 20:38:38] (step=0007108) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.3907258853453337, LR: 0.0003 +[2026-02-27 20:38:46] (step=0007109) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.3909215417726473, LR: 0.0003 +[2026-02-27 20:38:54] (step=0007110) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 1.3911171981999608, LR: 0.0003 +[2026-02-27 20:39:01] (step=0007111) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.3913128546272746, LR: 0.0003 +[2026-02-27 20:39:09] (step=0007112) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.3915085110545882, LR: 0.0003 +[2026-02-27 20:39:17] (step=0007113) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 1.3917041674819017, LR: 0.0003 +[2026-02-27 20:39:25] (step=0007114) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.3918998239092155, LR: 0.0003 +[2026-02-27 20:39:33] (step=0007115) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.392095480336529, LR: 0.0003 +[2026-02-27 20:39:41] (step=0007116) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.3922911367638426, LR: 0.0003 +[2026-02-27 20:39:49] (step=0007117) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.3924867931911564, LR: 0.0003 +[2026-02-27 20:39:56] (step=0007118) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.39268244961847, LR: 0.0003 +[2026-02-27 20:40:04] (step=0007119) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 1.3928781060457835, LR: 0.0003 +[2026-02-27 20:40:12] (step=0007120) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.3930737624730973, LR: 0.0003 +[2026-02-27 20:40:20] (step=0007121) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.393269418900411, LR: 0.0003 +[2026-02-27 20:40:28] (step=0007122) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.3934650753277245, LR: 0.0003 +[2026-02-27 20:40:36] (step=0007123) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.3936607317550382, LR: 0.0003 +[2026-02-27 20:40:43] (step=0007124) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.3938563881823518, LR: 0.0003 +[2026-02-27 20:40:51] (step=0007125) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 1.3940520446096654, LR: 0.0003 +[2026-02-27 20:40:59] (step=0007126) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 1.3942477010369791, LR: 0.0003 +[2026-02-27 20:41:07] (step=0007127) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.3944433574642927, LR: 0.0003 +[2026-02-27 20:41:15] (step=0007128) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.3946390138916063, LR: 0.0003 +[2026-02-27 20:41:23] (step=0007129) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 1.39483467031892, LR: 0.0003 +[2026-02-27 20:41:30] (step=0007130) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 1.3950303267462336, LR: 0.0003 +[2026-02-27 20:41:38] (step=0007131) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.3952259831735472, LR: 0.0003 +[2026-02-27 20:41:46] (step=0007132) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 1.395421639600861, LR: 0.0003 +[2026-02-27 20:41:54] (step=0007133) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 1.3956172960281745, LR: 0.0003 +[2026-02-27 20:42:02] (step=0007134) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 1.395812952455488, LR: 0.0003 +[2026-02-27 20:42:10] (step=0007135) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.3960086088828019, LR: 0.0003 +[2026-02-27 20:42:18] (step=0007136) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 1.3962042653101154, LR: 0.0003 +[2026-02-27 20:42:25] (step=0007137) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.396399921737429, LR: 0.0003 +[2026-02-27 20:42:33] (step=0007138) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 1.3965955781647428, LR: 0.0003 +[2026-02-27 20:42:41] (step=0007139) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.3967912345920563, LR: 0.0003 +[2026-02-27 20:42:49] (step=0007140) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.39698689101937, LR: 0.0003 +[2026-02-27 20:42:57] (step=0007141) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 1.3971825474466837, LR: 0.0003 +[2026-02-27 20:43:05] (step=0007142) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.3973782038739972, LR: 0.0003 +[2026-02-27 20:43:13] (step=0007143) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 1.3975738603013108, LR: 0.0003 +[2026-02-27 20:43:20] (step=0007144) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.3977695167286246, LR: 0.0003 +[2026-02-27 20:43:28] (step=0007145) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.3979651731559382, LR: 0.0003 +[2026-02-27 20:43:36] (step=0007146) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 1.3981608295832517, LR: 0.0003 +[2026-02-27 20:43:44] (step=0007147) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.3983564860105655, LR: 0.0003 +[2026-02-27 20:43:52] (step=0007148) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 1.398552142437879, LR: 0.0003 +[2026-02-27 20:44:00] (step=0007149) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.3987477988651926, LR: 0.0003 +[2026-02-27 20:44:07] (step=0007150) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 1.3989434552925064, LR: 0.0003 +[2026-02-27 20:44:15] (step=0007151) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.39913911171982, LR: 0.0003 +[2026-02-27 20:44:23] (step=0007152) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 1.3993347681471335, LR: 0.0003 +[2026-02-27 20:44:31] (step=0007153) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.3995304245744473, LR: 0.0003 +[2026-02-27 20:44:39] (step=0007154) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.3997260810017609, LR: 0.0003 +[2026-02-27 20:44:47] (step=0007155) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.3999217374290744, LR: 0.0003 +[2026-02-27 20:44:55] (step=0007156) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.4001173938563882, LR: 0.0003 +[2026-02-27 20:45:02] (step=0007157) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.4003130502837018, LR: 0.0003 +[2026-02-27 20:45:10] (step=0007158) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.4005087067110153, LR: 0.0003 +[2026-02-27 20:45:18] (step=0007159) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 1.4007043631383291, LR: 0.0003 +[2026-02-27 20:45:26] (step=0007160) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.4009000195656427, LR: 0.0003 +[2026-02-27 20:45:34] (step=0007161) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 1.4010956759929563, LR: 0.0003 +[2026-02-27 20:45:42] (step=0007162) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.40129133242027, LR: 0.0003 +[2026-02-27 20:45:49] (step=0007163) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.4014869888475836, LR: 0.0003 +[2026-02-27 20:45:57] (step=0007164) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.4016826452748974, LR: 0.0003 +[2026-02-27 20:46:05] (step=0007165) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 1.401878301702211, LR: 0.0003 +[2026-02-27 20:46:13] (step=0007166) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.4020739581295245, LR: 0.0003 +[2026-02-27 20:46:21] (step=0007167) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 1.4022696145568383, LR: 0.0003 +[2026-02-27 20:46:29] (step=0007168) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.4024652709841519, LR: 0.0003 +[2026-02-27 20:46:37] (step=0007169) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.4026609274114654, LR: 0.0003 +[2026-02-27 20:46:44] (step=0007170) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.4028565838387792, LR: 0.0003 +[2026-02-27 20:46:52] (step=0007171) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 1.4030522402660928, LR: 0.0003 +[2026-02-27 20:47:00] (step=0007172) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.4032478966934063, LR: 0.0003 +[2026-02-27 20:47:08] (step=0007173) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 1.40344355312072, LR: 0.0003 +[2026-02-27 20:47:16] (step=0007174) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 1.4036392095480337, LR: 0.0003 +[2026-02-27 20:47:24] (step=0007175) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 1.4038348659753472, LR: 0.0003 +[2026-02-27 20:47:31] (step=0007176) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.404030522402661, LR: 0.0003 +[2026-02-27 20:47:39] (step=0007177) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.4042261788299746, LR: 0.0003 +[2026-02-27 20:47:47] (step=0007178) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.4044218352572881, LR: 0.0003 +[2026-02-27 20:47:55] (step=0007179) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 1.404617491684602, LR: 0.0003 +[2026-02-27 20:48:03] (step=0007180) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 1.4048131481119155, LR: 0.0003 +[2026-02-27 20:48:11] (step=0007181) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 1.405008804539229, LR: 0.0003 +[2026-02-27 20:48:19] (step=0007182) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.4052044609665428, LR: 0.0003 +[2026-02-27 20:48:26] (step=0007183) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.4054001173938564, LR: 0.0003 +[2026-02-27 20:48:34] (step=0007184) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 1.40559577382117, LR: 0.0003 +[2026-02-27 20:48:42] (step=0007185) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 1.4057914302484837, LR: 0.0003 +[2026-02-27 20:48:50] (step=0007186) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 1.4059870866757973, LR: 0.0003 +[2026-02-27 20:48:58] (step=0007187) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 1.4061827431031109, LR: 0.0003 +[2026-02-27 20:49:06] (step=0007188) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.4063783995304246, LR: 0.0003 +[2026-02-27 20:49:14] (step=0007189) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 1.4065740559577382, LR: 0.0003 +[2026-02-27 20:49:21] (step=0007190) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.4067697123850518, LR: 0.0003 +[2026-02-27 20:49:29] (step=0007191) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 1.4069653688123656, LR: 0.0003 +[2026-02-27 20:49:37] (step=0007192) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 1.4071610252396791, LR: 0.0003 +[2026-02-27 20:49:45] (step=0007193) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.4073566816669927, LR: 0.0003 +[2026-02-27 20:49:53] (step=0007194) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 1.4075523380943065, LR: 0.0003 +[2026-02-27 20:50:01] (step=0007195) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.40774799452162, LR: 0.0003 +[2026-02-27 20:50:08] (step=0007196) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.4079436509489336, LR: 0.0003 +[2026-02-27 20:50:16] (step=0007197) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 1.4081393073762474, LR: 0.0003 +[2026-02-27 20:50:24] (step=0007198) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 1.408334963803561, LR: 0.0003 +[2026-02-27 20:50:32] (step=0007199) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.4085306202308745, LR: 0.0003 +[2026-02-27 20:50:40] (step=0007200) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.4087262766581883, LR: 0.0003 +[2026-02-27 20:50:48] (step=0007201) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 1.4089219330855018, LR: 0.0003 +[2026-02-27 20:50:55] (step=0007202) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 1.4091175895128154, LR: 0.0003 +[2026-02-27 20:51:03] (step=0007203) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.4093132459401292, LR: 0.0003 +[2026-02-27 20:51:11] (step=0007204) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 1.4095089023674428, LR: 0.0003 +[2026-02-27 20:51:19] (step=0007205) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 1.4097045587947563, LR: 0.0003 +[2026-02-27 20:51:27] (step=0007206) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.40990021522207, LR: 0.0003 +[2026-02-27 20:51:35] (step=0007207) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 1.4100958716493837, LR: 0.0003 +[2026-02-27 20:51:42] (step=0007208) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 1.4102915280766972, LR: 0.0003 +[2026-02-27 20:51:50] (step=0007209) Train Loss: 0.4709, Train Steps/Sec: 0.13, Epoch: 1.410487184504011, LR: 0.0003 +[2026-02-27 20:51:58] (step=0007210) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.4106828409313246, LR: 0.0003 +[2026-02-27 20:52:06] (step=0007211) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 1.4108784973586381, LR: 0.0003 +[2026-02-27 20:52:14] (step=0007212) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.411074153785952, LR: 0.0003 +[2026-02-27 20:52:22] (step=0007213) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.4112698102132655, LR: 0.0003 +[2026-02-27 20:52:30] (step=0007214) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.411465466640579, LR: 0.0003 +[2026-02-27 20:52:37] (step=0007215) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 1.4116611230678928, LR: 0.0003 +[2026-02-27 20:52:45] (step=0007216) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.4118567794952064, LR: 0.0003 +[2026-02-27 20:52:53] (step=0007217) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.41205243592252, LR: 0.0003 +[2026-02-27 20:53:01] (step=0007218) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.4122480923498337, LR: 0.0003 +[2026-02-27 20:53:09] (step=0007219) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.4124437487771473, LR: 0.0003 +[2026-02-27 20:53:17] (step=0007220) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 1.412639405204461, LR: 0.0003 +[2026-02-27 20:53:25] (step=0007221) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.4128350616317746, LR: 0.0003 +[2026-02-27 20:53:32] (step=0007222) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 1.4130307180590882, LR: 0.0003 +[2026-02-27 20:53:40] (step=0007223) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 1.413226374486402, LR: 0.0003 +[2026-02-27 20:53:48] (step=0007224) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 1.4134220309137155, LR: 0.0003 +[2026-02-27 20:53:56] (step=0007225) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.413617687341029, LR: 0.0003 +[2026-02-27 20:54:04] (step=0007226) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 1.413813343768343, LR: 0.0003 +[2026-02-27 20:54:12] (step=0007227) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.4140090001956565, LR: 0.0003 +[2026-02-27 20:54:19] (step=0007228) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.41420465662297, LR: 0.0003 +[2026-02-27 20:54:27] (step=0007229) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 1.4144003130502838, LR: 0.0003 +[2026-02-27 20:54:35] (step=0007230) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.4145959694775974, LR: 0.0003 +[2026-02-27 20:54:43] (step=0007231) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.414791625904911, LR: 0.0003 +[2026-02-27 20:54:51] (step=0007232) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.4149872823322247, LR: 0.0003 +[2026-02-27 20:54:59] (step=0007233) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.4151829387595383, LR: 0.0003 +[2026-02-27 20:55:07] (step=0007234) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.4153785951868518, LR: 0.0003 +[2026-02-27 20:55:14] (step=0007235) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.4155742516141656, LR: 0.0003 +[2026-02-27 20:55:22] (step=0007236) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.4157699080414792, LR: 0.0003 +[2026-02-27 20:55:30] (step=0007237) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.4159655644687927, LR: 0.0003 +[2026-02-27 20:55:38] (step=0007238) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.4161612208961065, LR: 0.0003 +[2026-02-27 20:55:46] (step=0007239) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.41635687732342, LR: 0.0003 +[2026-02-27 20:55:54] (step=0007240) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 1.4165525337507336, LR: 0.0003 +[2026-02-27 20:56:01] (step=0007241) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.4167481901780474, LR: 0.0003 +[2026-02-27 20:56:09] (step=0007242) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.416943846605361, LR: 0.0003 +[2026-02-27 20:56:17] (step=0007243) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.4171395030326746, LR: 0.0003 +[2026-02-27 20:56:25] (step=0007244) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.4173351594599883, LR: 0.0003 +[2026-02-27 20:56:33] (step=0007245) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.417530815887302, LR: 0.0003 +[2026-02-27 20:56:41] (step=0007246) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.4177264723146155, LR: 0.0003 +[2026-02-27 20:56:49] (step=0007247) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.4179221287419292, LR: 0.0003 +[2026-02-27 20:56:56] (step=0007248) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.4181177851692428, LR: 0.0003 +[2026-02-27 20:57:04] (step=0007249) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.4183134415965564, LR: 0.0003 +[2026-02-27 20:57:12] (step=0007250) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.4185090980238702, LR: 0.0003 +[2026-02-27 20:57:20] (step=0007251) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 1.4187047544511837, LR: 0.0003 +[2026-02-27 20:57:28] (step=0007252) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 1.4189004108784973, LR: 0.0003 +[2026-02-27 20:57:36] (step=0007253) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.419096067305811, LR: 0.0003 +[2026-02-27 20:57:43] (step=0007254) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.4192917237331246, LR: 0.0003 +[2026-02-27 20:57:51] (step=0007255) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 1.4194873801604382, LR: 0.0003 +[2026-02-27 20:57:59] (step=0007256) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.419683036587752, LR: 0.0003 +[2026-02-27 20:58:07] (step=0007257) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 1.4198786930150655, LR: 0.0003 +[2026-02-27 20:58:15] (step=0007258) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.420074349442379, LR: 0.0003 +[2026-02-27 20:58:23] (step=0007259) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.4202700058696929, LR: 0.0003 +[2026-02-27 20:58:30] (step=0007260) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 1.4204656622970064, LR: 0.0003 +[2026-02-27 20:58:38] (step=0007261) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.42066131872432, LR: 0.0003 +[2026-02-27 20:58:46] (step=0007262) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.4208569751516338, LR: 0.0003 +[2026-02-27 20:58:54] (step=0007263) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.4210526315789473, LR: 0.0003 +[2026-02-27 20:59:02] (step=0007264) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 1.421248288006261, LR: 0.0003 +[2026-02-27 20:59:10] (step=0007265) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.4214439444335747, LR: 0.0003 +[2026-02-27 20:59:18] (step=0007266) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.4216396008608883, LR: 0.0003 +[2026-02-27 20:59:26] (step=0007267) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.4218352572882018, LR: 0.0003 +[2026-02-27 20:59:33] (step=0007268) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 1.4220309137155156, LR: 0.0003 +[2026-02-27 20:59:41] (step=0007269) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.4222265701428292, LR: 0.0003 +[2026-02-27 20:59:49] (step=0007270) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.4224222265701427, LR: 0.0003 +[2026-02-27 20:59:57] (step=0007271) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 1.4226178829974565, LR: 0.0003 +[2026-02-27 21:00:05] (step=0007272) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.42281353942477, LR: 0.0003 +[2026-02-27 21:00:13] (step=0007273) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.4230091958520836, LR: 0.0003 +[2026-02-27 21:00:20] (step=0007274) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.4232048522793974, LR: 0.0003 +[2026-02-27 21:00:28] (step=0007275) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 1.423400508706711, LR: 0.0003 +[2026-02-27 21:00:36] (step=0007276) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.4235961651340248, LR: 0.0003 +[2026-02-27 21:00:44] (step=0007277) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.4237918215613383, LR: 0.0003 +[2026-02-27 21:00:52] (step=0007278) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 1.4239874779886519, LR: 0.0003 +[2026-02-27 21:01:00] (step=0007279) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.4241831344159657, LR: 0.0003 +[2026-02-27 21:01:07] (step=0007280) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 1.4243787908432792, LR: 0.0003 +[2026-02-27 21:01:15] (step=0007281) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.4245744472705928, LR: 0.0003 +[2026-02-27 21:01:23] (step=0007282) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.4247701036979066, LR: 0.0003 +[2026-02-27 21:01:31] (step=0007283) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.4249657601252201, LR: 0.0003 +[2026-02-27 21:01:39] (step=0007284) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.4251614165525337, LR: 0.0003 +[2026-02-27 21:01:47] (step=0007285) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.4253570729798475, LR: 0.0003 +[2026-02-27 21:01:55] (step=0007286) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.425552729407161, LR: 0.0003 +[2026-02-27 21:02:02] (step=0007287) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.4257483858344746, LR: 0.0003 +[2026-02-27 21:02:10] (step=0007288) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.4259440422617884, LR: 0.0003 +[2026-02-27 21:02:18] (step=0007289) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 1.426139698689102, LR: 0.0003 +[2026-02-27 21:02:26] (step=0007290) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.4263353551164155, LR: 0.0003 +[2026-02-27 21:02:34] (step=0007291) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 1.4265310115437293, LR: 0.0003 +[2026-02-27 21:02:42] (step=0007292) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.4267266679710429, LR: 0.0003 +[2026-02-27 21:02:49] (step=0007293) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.4269223243983564, LR: 0.0003 +[2026-02-27 21:02:57] (step=0007294) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.4271179808256702, LR: 0.0003 +[2026-02-27 21:03:05] (step=0007295) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.4273136372529838, LR: 0.0003 +[2026-02-27 21:03:13] (step=0007296) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.4275092936802973, LR: 0.0003 +[2026-02-27 21:03:21] (step=0007297) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 1.4277049501076111, LR: 0.0003 +[2026-02-27 21:03:29] (step=0007298) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.4279006065349247, LR: 0.0003 +[2026-02-27 21:03:36] (step=0007299) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.4280962629622382, LR: 0.0003 +[2026-02-27 21:03:44] (step=0007300) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.428291919389552, LR: 0.0003 +[2026-02-27 21:03:52] (step=0007301) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.4284875758168656, LR: 0.0003 +[2026-02-27 21:04:00] (step=0007302) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.4286832322441791, LR: 0.0003 +[2026-02-27 21:04:08] (step=0007303) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.428878888671493, LR: 0.0003 +[2026-02-27 21:04:16] (step=0007304) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.4290745450988065, LR: 0.0003 +[2026-02-27 21:04:24] (step=0007305) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.42927020152612, LR: 0.0003 +[2026-02-27 21:04:31] (step=0007306) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.4294658579534338, LR: 0.0003 +[2026-02-27 21:04:39] (step=0007307) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.4296615143807474, LR: 0.0003 +[2026-02-27 21:04:47] (step=0007308) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 1.429857170808061, LR: 0.0003 +[2026-02-27 21:04:55] (step=0007309) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 1.4300528272353747, LR: 0.0003 +[2026-02-27 21:05:03] (step=0007310) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.4302484836626883, LR: 0.0003 +[2026-02-27 21:05:11] (step=0007311) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.4304441400900019, LR: 0.0003 +[2026-02-27 21:05:19] (step=0007312) Train Loss: 0.4648, Train Steps/Sec: 0.12, Epoch: 1.4306397965173157, LR: 0.0003 +[2026-02-27 21:05:26] (step=0007313) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.4308354529446292, LR: 0.0003 +[2026-02-27 21:05:34] (step=0007314) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 1.4310311093719428, LR: 0.0003 +[2026-02-27 21:05:42] (step=0007315) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.4312267657992566, LR: 0.0003 +[2026-02-27 21:05:50] (step=0007316) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.4314224222265701, LR: 0.0003 +[2026-02-27 21:05:58] (step=0007317) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.4316180786538837, LR: 0.0003 +[2026-02-27 21:06:06] (step=0007318) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.4318137350811975, LR: 0.0003 +[2026-02-27 21:06:13] (step=0007319) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.432009391508511, LR: 0.0003 +[2026-02-27 21:06:21] (step=0007320) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.4322050479358246, LR: 0.0003 +[2026-02-27 21:06:29] (step=0007321) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.4324007043631384, LR: 0.0003 +[2026-02-27 21:06:37] (step=0007322) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.432596360790452, LR: 0.0003 +[2026-02-27 21:06:45] (step=0007323) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.4327920172177655, LR: 0.0003 +[2026-02-27 21:06:53] (step=0007324) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.4329876736450793, LR: 0.0003 +[2026-02-27 21:07:00] (step=0007325) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.4331833300723928, LR: 0.0003 +[2026-02-27 21:07:08] (step=0007326) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.4333789864997064, LR: 0.0003 +[2026-02-27 21:07:16] (step=0007327) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.4335746429270202, LR: 0.0003 +[2026-02-27 21:07:24] (step=0007328) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.4337702993543338, LR: 0.0003 +[2026-02-27 21:07:32] (step=0007329) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 1.4339659557816473, LR: 0.0003 +[2026-02-27 21:07:40] (step=0007330) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 1.434161612208961, LR: 0.0003 +[2026-02-27 21:07:48] (step=0007331) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 1.4343572686362747, LR: 0.0003 +[2026-02-27 21:07:55] (step=0007332) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.4345529250635884, LR: 0.0003 +[2026-02-27 21:08:03] (step=0007333) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 1.434748581490902, LR: 0.0003 +[2026-02-27 21:08:11] (step=0007334) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.4349442379182156, LR: 0.0003 +[2026-02-27 21:08:19] (step=0007335) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.4351398943455294, LR: 0.0003 +[2026-02-27 21:08:27] (step=0007336) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.435335550772843, LR: 0.0003 +[2026-02-27 21:08:35] (step=0007337) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.4355312072001565, LR: 0.0003 +[2026-02-27 21:08:43] (step=0007338) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.4357268636274703, LR: 0.0003 +[2026-02-27 21:08:50] (step=0007339) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.4359225200547838, LR: 0.0003 +[2026-02-27 21:08:58] (step=0007340) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.4361181764820974, LR: 0.0003 +[2026-02-27 21:09:06] (step=0007341) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.4363138329094112, LR: 0.0003 +[2026-02-27 21:09:14] (step=0007342) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.4365094893367247, LR: 0.0003 +[2026-02-27 21:09:22] (step=0007343) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.4367051457640383, LR: 0.0003 +[2026-02-27 21:09:30] (step=0007344) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 1.436900802191352, LR: 0.0003 +[2026-02-27 21:09:37] (step=0007345) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.4370964586186656, LR: 0.0003 +[2026-02-27 21:09:45] (step=0007346) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 1.4372921150459792, LR: 0.0003 +[2026-02-27 21:09:53] (step=0007347) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.437487771473293, LR: 0.0003 +[2026-02-27 21:10:01] (step=0007348) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 1.4376834279006065, LR: 0.0003 +[2026-02-27 21:10:09] (step=0007349) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.43787908432792, LR: 0.0003 +[2026-02-27 21:10:17] (step=0007350) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 1.438074740755234, LR: 0.0003 +[2026-02-27 21:10:25] (step=0007351) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.4382703971825475, LR: 0.0003 +[2026-02-27 21:10:32] (step=0007352) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.438466053609861, LR: 0.0003 +[2026-02-27 21:10:40] (step=0007353) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.4386617100371748, LR: 0.0003 +[2026-02-27 21:10:48] (step=0007354) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.4388573664644884, LR: 0.0003 +[2026-02-27 21:10:56] (step=0007355) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.439053022891802, LR: 0.0003 +[2026-02-27 21:11:04] (step=0007356) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 1.4392486793191157, LR: 0.0003 +[2026-02-27 21:11:12] (step=0007357) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.4394443357464293, LR: 0.0003 +[2026-02-27 21:11:19] (step=0007358) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.4396399921737428, LR: 0.0003 +[2026-02-27 21:11:27] (step=0007359) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.4398356486010566, LR: 0.0003 +[2026-02-27 21:11:35] (step=0007360) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.4400313050283702, LR: 0.0003 +[2026-02-27 21:11:43] (step=0007361) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.4402269614556837, LR: 0.0003 +[2026-02-27 21:11:51] (step=0007362) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 1.4404226178829975, LR: 0.0003 +[2026-02-27 21:11:59] (step=0007363) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.440618274310311, LR: 0.0003 +[2026-02-27 21:12:07] (step=0007364) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.4408139307376246, LR: 0.0003 +[2026-02-27 21:12:14] (step=0007365) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.4410095871649384, LR: 0.0003 +[2026-02-27 21:12:22] (step=0007366) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 1.441205243592252, LR: 0.0003 +[2026-02-27 21:12:30] (step=0007367) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 1.4414009000195656, LR: 0.0003 +[2026-02-27 21:12:38] (step=0007368) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.4415965564468793, LR: 0.0003 +[2026-02-27 21:12:46] (step=0007369) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.441792212874193, LR: 0.0003 +[2026-02-27 21:12:54] (step=0007370) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.4419878693015065, LR: 0.0003 +[2026-02-27 21:13:01] (step=0007371) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 1.4421835257288202, LR: 0.0003 +[2026-02-27 21:13:09] (step=0007372) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.4423791821561338, LR: 0.0003 +[2026-02-27 21:13:17] (step=0007373) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 1.4425748385834474, LR: 0.0003 +[2026-02-27 21:13:25] (step=0007374) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 1.4427704950107612, LR: 0.0003 +[2026-02-27 21:13:33] (step=0007375) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 1.4429661514380747, LR: 0.0003 +[2026-02-27 21:13:41] (step=0007376) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.4431618078653883, LR: 0.0003 +[2026-02-27 21:13:49] (step=0007377) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.443357464292702, LR: 0.0003 +[2026-02-27 21:13:56] (step=0007378) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.4435531207200156, LR: 0.0003 +[2026-02-27 21:14:04] (step=0007379) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.4437487771473292, LR: 0.0003 +[2026-02-27 21:14:12] (step=0007380) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.443944433574643, LR: 0.0003 +[2026-02-27 21:14:20] (step=0007381) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.4441400900019565, LR: 0.0003 +[2026-02-27 21:14:28] (step=0007382) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.44433574642927, LR: 0.0003 +[2026-02-27 21:14:36] (step=0007383) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 1.4445314028565839, LR: 0.0003 +[2026-02-27 21:14:43] (step=0007384) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 1.4447270592838974, LR: 0.0003 +[2026-02-27 21:14:51] (step=0007385) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.444922715711211, LR: 0.0003 +[2026-02-27 21:14:59] (step=0007386) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.4451183721385248, LR: 0.0003 +[2026-02-27 21:15:07] (step=0007387) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.4453140285658383, LR: 0.0003 +[2026-02-27 21:15:15] (step=0007388) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.4455096849931521, LR: 0.0003 +[2026-02-27 21:15:23] (step=0007389) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 1.4457053414204657, LR: 0.0003 +[2026-02-27 21:15:31] (step=0007390) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.4459009978477793, LR: 0.0003 +[2026-02-27 21:15:38] (step=0007391) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 1.446096654275093, LR: 0.0003 +[2026-02-27 21:15:46] (step=0007392) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.4462923107024066, LR: 0.0003 +[2026-02-27 21:15:54] (step=0007393) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.4464879671297202, LR: 0.0003 +[2026-02-27 21:16:02] (step=0007394) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.446683623557034, LR: 0.0003 +[2026-02-27 21:16:10] (step=0007395) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.4468792799843475, LR: 0.0003 +[2026-02-27 21:16:18] (step=0007396) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.447074936411661, LR: 0.0003 +[2026-02-27 21:16:26] (step=0007397) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.4472705928389749, LR: 0.0003 +[2026-02-27 21:16:33] (step=0007398) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.4474662492662884, LR: 0.0003 +[2026-02-27 21:16:41] (step=0007399) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 1.447661905693602, LR: 0.0003 +[2026-02-27 21:16:49] (step=0007400) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 1.4478575621209158, LR: 0.0003 +[2026-02-27 21:16:57] (step=0007401) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 1.4480532185482293, LR: 0.0003 +[2026-02-27 21:17:05] (step=0007402) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 1.4482488749755429, LR: 0.0003 +[2026-02-27 21:17:13] (step=0007403) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 1.4484445314028567, LR: 0.0003 +[2026-02-27 21:17:20] (step=0007404) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 1.4486401878301702, LR: 0.0003 +[2026-02-27 21:17:28] (step=0007405) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 1.4488358442574838, LR: 0.0003 +[2026-02-27 21:17:36] (step=0007406) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 1.4490315006847976, LR: 0.0003 +[2026-02-27 21:17:44] (step=0007407) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.4492271571121111, LR: 0.0003 +[2026-02-27 21:17:52] (step=0007408) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.4494228135394247, LR: 0.0003 +[2026-02-27 21:18:00] (step=0007409) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.4496184699667385, LR: 0.0003 +[2026-02-27 21:18:07] (step=0007410) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 1.449814126394052, LR: 0.0003 +[2026-02-27 21:18:15] (step=0007411) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.4500097828213656, LR: 0.0003 +[2026-02-27 21:18:23] (step=0007412) Train Loss: 0.4742, Train Steps/Sec: 0.13, Epoch: 1.4502054392486794, LR: 0.0003 +[2026-02-27 21:18:31] (step=0007413) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.450401095675993, LR: 0.0003 +[2026-02-27 21:18:39] (step=0007414) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.4505967521033065, LR: 0.0003 +[2026-02-27 21:18:47] (step=0007415) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.4507924085306203, LR: 0.0003 +[2026-02-27 21:18:55] (step=0007416) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.4509880649579339, LR: 0.0003 +[2026-02-27 21:19:02] (step=0007417) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.4511837213852474, LR: 0.0003 +[2026-02-27 21:19:10] (step=0007418) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.4513793778125612, LR: 0.0003 +[2026-02-27 21:19:18] (step=0007419) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 1.4515750342398748, LR: 0.0003 +[2026-02-27 21:19:26] (step=0007420) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.4517706906671883, LR: 0.0003 +[2026-02-27 21:19:34] (step=0007421) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.4519663470945021, LR: 0.0003 +[2026-02-27 21:19:42] (step=0007422) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.4521620035218157, LR: 0.0003 +[2026-02-27 21:19:50] (step=0007423) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.4523576599491292, LR: 0.0003 +[2026-02-27 21:19:57] (step=0007424) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.452553316376443, LR: 0.0003 +[2026-02-27 21:20:05] (step=0007425) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.4527489728037566, LR: 0.0003 +[2026-02-27 21:20:13] (step=0007426) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.4529446292310702, LR: 0.0003 +[2026-02-27 21:20:21] (step=0007427) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 1.453140285658384, LR: 0.0003 +[2026-02-27 21:20:29] (step=0007428) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.4533359420856975, LR: 0.0003 +[2026-02-27 21:20:37] (step=0007429) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 1.453531598513011, LR: 0.0003 +[2026-02-27 21:20:44] (step=0007430) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.4537272549403248, LR: 0.0003 +[2026-02-27 21:20:52] (step=0007431) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 1.4539229113676384, LR: 0.0003 +[2026-02-27 21:21:00] (step=0007432) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.454118567794952, LR: 0.0003 +[2026-02-27 21:21:08] (step=0007433) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.4543142242222658, LR: 0.0003 +[2026-02-27 21:21:16] (step=0007434) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.4545098806495793, LR: 0.0003 +[2026-02-27 21:21:24] (step=0007435) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.4547055370768929, LR: 0.0003 +[2026-02-27 21:21:31] (step=0007436) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 1.4549011935042067, LR: 0.0003 +[2026-02-27 21:21:39] (step=0007437) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.4550968499315202, LR: 0.0003 +[2026-02-27 21:21:47] (step=0007438) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.4552925063588338, LR: 0.0003 +[2026-02-27 21:21:55] (step=0007439) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.4554881627861476, LR: 0.0003 +[2026-02-27 21:22:03] (step=0007440) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 1.4556838192134611, LR: 0.0003 +[2026-02-27 21:22:11] (step=0007441) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.4558794756407747, LR: 0.0003 +[2026-02-27 21:22:19] (step=0007442) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 1.4560751320680885, LR: 0.0003 +[2026-02-27 21:22:26] (step=0007443) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.456270788495402, LR: 0.0003 +[2026-02-27 21:22:34] (step=0007444) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.4564664449227158, LR: 0.0003 +[2026-02-27 21:22:42] (step=0007445) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.4566621013500294, LR: 0.0003 +[2026-02-27 21:22:50] (step=0007446) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.456857757777343, LR: 0.0003 +[2026-02-27 21:22:58] (step=0007447) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 1.4570534142046567, LR: 0.0003 +[2026-02-27 21:23:06] (step=0007448) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.4572490706319703, LR: 0.0003 +[2026-02-27 21:23:13] (step=0007449) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.4574447270592839, LR: 0.0003 +[2026-02-27 21:23:21] (step=0007450) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.4576403834865976, LR: 0.0003 +[2026-02-27 21:23:29] (step=0007451) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 1.4578360399139112, LR: 0.0003 +[2026-02-27 21:23:37] (step=0007452) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 1.4580316963412248, LR: 0.0003 +[2026-02-27 21:23:45] (step=0007453) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.4582273527685385, LR: 0.0003 +[2026-02-27 21:23:53] (step=0007454) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.458423009195852, LR: 0.0003 +[2026-02-27 21:24:01] (step=0007455) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 1.4586186656231657, LR: 0.0003 +[2026-02-27 21:24:08] (step=0007456) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.4588143220504795, LR: 0.0003 +[2026-02-27 21:24:16] (step=0007457) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.459009978477793, LR: 0.0003 +[2026-02-27 21:24:24] (step=0007458) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 1.4592056349051066, LR: 0.0003 +[2026-02-27 21:24:32] (step=0007459) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.4594012913324204, LR: 0.0003 +[2026-02-27 21:24:40] (step=0007460) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.459596947759734, LR: 0.0003 +[2026-02-27 21:24:48] (step=0007461) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.4597926041870475, LR: 0.0003 +[2026-02-27 21:24:56] (step=0007462) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.4599882606143613, LR: 0.0003 +[2026-02-27 21:25:03] (step=0007463) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 1.4601839170416748, LR: 0.0003 +[2026-02-27 21:25:11] (step=0007464) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 1.4603795734689884, LR: 0.0003 +[2026-02-27 21:25:19] (step=0007465) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 1.4605752298963022, LR: 0.0003 +[2026-02-27 21:25:27] (step=0007466) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.4607708863236157, LR: 0.0003 +[2026-02-27 21:25:35] (step=0007467) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.4609665427509293, LR: 0.0003 +[2026-02-27 21:25:43] (step=0007468) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 1.461162199178243, LR: 0.0003 +[2026-02-27 21:25:50] (step=0007469) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.4613578556055566, LR: 0.0003 +[2026-02-27 21:25:58] (step=0007470) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.4615535120328702, LR: 0.0003 +[2026-02-27 21:26:06] (step=0007471) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 1.461749168460184, LR: 0.0003 +[2026-02-27 21:26:14] (step=0007472) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.4619448248874976, LR: 0.0003 +[2026-02-27 21:26:22] (step=0007473) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.4621404813148111, LR: 0.0003 +[2026-02-27 21:26:30] (step=0007474) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 1.462336137742125, LR: 0.0003 +[2026-02-27 21:26:38] (step=0007475) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.4625317941694385, LR: 0.0003 +[2026-02-27 21:26:45] (step=0007476) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.462727450596752, LR: 0.0003 +[2026-02-27 21:26:53] (step=0007477) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 1.4629231070240658, LR: 0.0003 +[2026-02-27 21:27:01] (step=0007478) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 1.4631187634513794, LR: 0.0003 +[2026-02-27 21:27:09] (step=0007479) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.463314419878693, LR: 0.0003 +[2026-02-27 21:27:17] (step=0007480) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 1.4635100763060067, LR: 0.0003 +[2026-02-27 21:27:25] (step=0007481) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 1.4637057327333203, LR: 0.0003 +[2026-02-27 21:27:32] (step=0007482) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.4639013891606338, LR: 0.0003 +[2026-02-27 21:27:40] (step=0007483) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 1.4640970455879476, LR: 0.0003 +[2026-02-27 21:27:48] (step=0007484) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.4642927020152612, LR: 0.0003 +[2026-02-27 21:27:56] (step=0007485) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 1.4644883584425747, LR: 0.0003 +[2026-02-27 21:28:04] (step=0007486) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.4646840148698885, LR: 0.0003 +[2026-02-27 21:28:12] (step=0007487) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 1.464879671297202, LR: 0.0003 +[2026-02-27 21:28:20] (step=0007488) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 1.4650753277245157, LR: 0.0003 +[2026-02-27 21:28:27] (step=0007489) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.4652709841518294, LR: 0.0003 +[2026-02-27 21:28:35] (step=0007490) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.465466640579143, LR: 0.0003 +[2026-02-27 21:28:43] (step=0007491) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.4656622970064566, LR: 0.0003 +[2026-02-27 21:28:51] (step=0007492) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.4658579534337703, LR: 0.0003 +[2026-02-27 21:28:59] (step=0007493) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.466053609861084, LR: 0.0003 +[2026-02-27 21:29:07] (step=0007494) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.4662492662883975, LR: 0.0003 +[2026-02-27 21:29:14] (step=0007495) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.4664449227157113, LR: 0.0003 +[2026-02-27 21:29:22] (step=0007496) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.4666405791430248, LR: 0.0003 +[2026-02-27 21:29:30] (step=0007497) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.4668362355703384, LR: 0.0003 +[2026-02-27 21:29:38] (step=0007498) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.4670318919976522, LR: 0.0003 +[2026-02-27 21:29:46] (step=0007499) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.4672275484249657, LR: 0.0003 +[2026-02-27 21:29:54] (step=0007500) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.4674232048522795, LR: 0.0003 +[2026-02-27 21:29:54] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0007500/ +[2026-02-27 21:30:01] (step=0007501) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 1.467618861279593, LR: 0.0003 +[2026-02-27 21:30:09] (step=0007502) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.4678145177069066, LR: 0.0003 +[2026-02-27 21:30:17] (step=0007503) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.4680101741342204, LR: 0.0003 +[2026-02-27 21:30:25] (step=0007504) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.468205830561534, LR: 0.0003 +[2026-02-27 21:30:33] (step=0007505) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 1.4684014869888475, LR: 0.0003 +[2026-02-27 21:30:41] (step=0007506) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 1.4685971434161613, LR: 0.0003 +[2026-02-27 21:30:49] (step=0007507) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.4687927998434749, LR: 0.0003 +[2026-02-27 21:30:56] (step=0007508) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.4689884562707884, LR: 0.0003 +[2026-02-27 21:31:04] (step=0007509) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 1.4691841126981022, LR: 0.0003 +[2026-02-27 21:31:12] (step=0007510) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.4693797691254158, LR: 0.0003 +[2026-02-27 21:31:20] (step=0007511) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.4695754255527294, LR: 0.0003 +[2026-02-27 21:31:28] (step=0007512) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.4697710819800431, LR: 0.0003 +[2026-02-27 21:31:36] (step=0007513) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 1.4699667384073567, LR: 0.0003 +[2026-02-27 21:31:43] (step=0007514) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 1.4701623948346703, LR: 0.0003 +[2026-02-27 21:31:51] (step=0007515) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.470358051261984, LR: 0.0003 +[2026-02-27 21:31:59] (step=0007516) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.4705537076892976, LR: 0.0003 +[2026-02-27 21:32:07] (step=0007517) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.4707493641166112, LR: 0.0003 +[2026-02-27 21:32:15] (step=0007518) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.470945020543925, LR: 0.0003 +[2026-02-27 21:32:23] (step=0007519) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 1.4711406769712385, LR: 0.0003 +[2026-02-27 21:32:30] (step=0007520) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.471336333398552, LR: 0.0003 +[2026-02-27 21:32:38] (step=0007521) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.4715319898258659, LR: 0.0003 +[2026-02-27 21:32:46] (step=0007522) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.4717276462531794, LR: 0.0003 +[2026-02-27 21:32:54] (step=0007523) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 1.471923302680493, LR: 0.0003 +[2026-02-27 21:33:02] (step=0007524) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.4721189591078068, LR: 0.0003 +[2026-02-27 21:33:10] (step=0007525) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 1.4723146155351203, LR: 0.0003 +[2026-02-27 21:33:17] (step=0007526) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 1.472510271962434, LR: 0.0003 +[2026-02-27 21:33:25] (step=0007527) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 1.4727059283897477, LR: 0.0003 +[2026-02-27 21:33:33] (step=0007528) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.4729015848170612, LR: 0.0003 +[2026-02-27 21:33:41] (step=0007529) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.4730972412443748, LR: 0.0003 +[2026-02-27 21:33:49] (step=0007530) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.4732928976716886, LR: 0.0003 +[2026-02-27 21:33:57] (step=0007531) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.4734885540990021, LR: 0.0003 +[2026-02-27 21:34:05] (step=0007532) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.4736842105263157, LR: 0.0003 +[2026-02-27 21:34:12] (step=0007533) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 1.4738798669536295, LR: 0.0003 +[2026-02-27 21:34:20] (step=0007534) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.474075523380943, LR: 0.0003 +[2026-02-27 21:34:28] (step=0007535) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 1.4742711798082566, LR: 0.0003 +[2026-02-27 21:34:36] (step=0007536) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.4744668362355704, LR: 0.0003 +[2026-02-27 21:34:44] (step=0007537) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.474662492662884, LR: 0.0003 +[2026-02-27 21:34:52] (step=0007538) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.4748581490901975, LR: 0.0003 +[2026-02-27 21:35:00] (step=0007539) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 1.4750538055175113, LR: 0.0003 +[2026-02-27 21:35:07] (step=0007540) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 1.4752494619448249, LR: 0.0003 +[2026-02-27 21:35:15] (step=0007541) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 1.4754451183721384, LR: 0.0003 +[2026-02-27 21:35:23] (step=0007542) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 1.4756407747994522, LR: 0.0003 +[2026-02-27 21:35:31] (step=0007543) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 1.4758364312267658, LR: 0.0003 +[2026-02-27 21:35:39] (step=0007544) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 1.4760320876540793, LR: 0.0003 +[2026-02-27 21:35:47] (step=0007545) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.4762277440813931, LR: 0.0003 +[2026-02-27 21:35:54] (step=0007546) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 1.4764234005087067, LR: 0.0003 +[2026-02-27 21:36:02] (step=0007547) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.4766190569360202, LR: 0.0003 +[2026-02-27 21:36:10] (step=0007548) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 1.476814713363334, LR: 0.0003 +[2026-02-27 21:36:18] (step=0007549) Train Loss: 0.4521, Train Steps/Sec: 0.12, Epoch: 1.4770103697906476, LR: 0.0003 +[2026-02-27 21:36:26] (step=0007550) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.4772060262179612, LR: 0.0003 +[2026-02-27 21:36:34] (step=0007551) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 1.477401682645275, LR: 0.0003 +[2026-02-27 21:36:42] (step=0007552) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 1.4775973390725885, LR: 0.0003 +[2026-02-27 21:36:50] (step=0007553) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 1.477792995499902, LR: 0.0003 +[2026-02-27 21:36:57] (step=0007554) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 1.4779886519272158, LR: 0.0003 +[2026-02-27 21:37:05] (step=0007555) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.4781843083545294, LR: 0.0003 +[2026-02-27 21:37:13] (step=0007556) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.4783799647818432, LR: 0.0003 +[2026-02-27 21:37:21] (step=0007557) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 1.4785756212091568, LR: 0.0003 +[2026-02-27 21:37:29] (step=0007558) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.4787712776364703, LR: 0.0003 +[2026-02-27 21:37:37] (step=0007559) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.478966934063784, LR: 0.0003 +[2026-02-27 21:37:44] (step=0007560) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.4791625904910977, LR: 0.0003 +[2026-02-27 21:37:52] (step=0007561) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.4793582469184112, LR: 0.0003 +[2026-02-27 21:38:00] (step=0007562) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 1.479553903345725, LR: 0.0003 +[2026-02-27 21:38:08] (step=0007563) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.4797495597730386, LR: 0.0003 +[2026-02-27 21:38:16] (step=0007564) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.4799452162003521, LR: 0.0003 +[2026-02-27 21:38:24] (step=0007565) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.480140872627666, LR: 0.0003 +[2026-02-27 21:38:31] (step=0007566) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.4803365290549795, LR: 0.0003 +[2026-02-27 21:38:39] (step=0007567) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 1.480532185482293, LR: 0.0003 +[2026-02-27 21:38:47] (step=0007568) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.4807278419096068, LR: 0.0003 +[2026-02-27 21:38:55] (step=0007569) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 1.4809234983369204, LR: 0.0003 +[2026-02-27 21:39:03] (step=0007570) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.481119154764234, LR: 0.0003 +[2026-02-27 21:39:11] (step=0007571) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.4813148111915477, LR: 0.0003 +[2026-02-27 21:39:19] (step=0007572) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 1.4815104676188613, LR: 0.0003 +[2026-02-27 21:39:26] (step=0007573) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 1.4817061240461749, LR: 0.0003 +[2026-02-27 21:39:34] (step=0007574) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.4819017804734886, LR: 0.0003 +[2026-02-27 21:39:42] (step=0007575) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.4820974369008022, LR: 0.0003 +[2026-02-27 21:39:50] (step=0007576) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.4822930933281158, LR: 0.0003 +[2026-02-27 21:39:58] (step=0007577) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.4824887497554295, LR: 0.0003 +[2026-02-27 21:40:06] (step=0007578) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.482684406182743, LR: 0.0003 +[2026-02-27 21:40:13] (step=0007579) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.4828800626100567, LR: 0.0003 +[2026-02-27 21:40:21] (step=0007580) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.4830757190373705, LR: 0.0003 +[2026-02-27 21:40:29] (step=0007581) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 1.483271375464684, LR: 0.0003 +[2026-02-27 21:40:37] (step=0007582) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.4834670318919976, LR: 0.0003 +[2026-02-27 21:40:45] (step=0007583) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.4836626883193114, LR: 0.0003 +[2026-02-27 21:40:53] (step=0007584) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.483858344746625, LR: 0.0003 +[2026-02-27 21:41:00] (step=0007585) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 1.4840540011739385, LR: 0.0003 +[2026-02-27 21:41:08] (step=0007586) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 1.4842496576012523, LR: 0.0003 +[2026-02-27 21:41:16] (step=0007587) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.4844453140285658, LR: 0.0003 +[2026-02-27 21:41:24] (step=0007588) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.4846409704558794, LR: 0.0003 +[2026-02-27 21:41:32] (step=0007589) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.4848366268831932, LR: 0.0003 +[2026-02-27 21:41:40] (step=0007590) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.4850322833105067, LR: 0.0003 +[2026-02-27 21:41:48] (step=0007591) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.4852279397378203, LR: 0.0003 +[2026-02-27 21:41:56] (step=0007592) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.485423596165134, LR: 0.0003 +[2026-02-27 21:42:03] (step=0007593) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 1.4856192525924476, LR: 0.0003 +[2026-02-27 21:42:11] (step=0007594) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.4858149090197612, LR: 0.0003 +[2026-02-27 21:42:19] (step=0007595) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 1.486010565447075, LR: 0.0003 +[2026-02-27 21:42:27] (step=0007596) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.4862062218743886, LR: 0.0003 +[2026-02-27 21:42:35] (step=0007597) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.4864018783017021, LR: 0.0003 +[2026-02-27 21:42:43] (step=0007598) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.486597534729016, LR: 0.0003 +[2026-02-27 21:42:51] (step=0007599) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.4867931911563295, LR: 0.0003 +[2026-02-27 21:42:58] (step=0007600) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 1.486988847583643, LR: 0.0003 +[2026-02-27 21:43:06] (step=0007601) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.4871845040109568, LR: 0.0003 +[2026-02-27 21:43:14] (step=0007602) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.4873801604382704, LR: 0.0003 +[2026-02-27 21:43:22] (step=0007603) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 1.487575816865584, LR: 0.0003 +[2026-02-27 21:43:30] (step=0007604) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.4877714732928977, LR: 0.0003 +[2026-02-27 21:43:38] (step=0007605) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.4879671297202113, LR: 0.0003 +[2026-02-27 21:43:45] (step=0007606) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.4881627861475248, LR: 0.0003 +[2026-02-27 21:43:53] (step=0007607) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 1.4883584425748386, LR: 0.0003 +[2026-02-27 21:44:01] (step=0007608) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.4885540990021522, LR: 0.0003 +[2026-02-27 21:44:09] (step=0007609) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 1.4887497554294657, LR: 0.0003 +[2026-02-27 21:44:17] (step=0007610) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 1.4889454118567795, LR: 0.0003 +[2026-02-27 21:44:25] (step=0007611) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.489141068284093, LR: 0.0003 +[2026-02-27 21:44:33] (step=0007612) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.4893367247114069, LR: 0.0003 +[2026-02-27 21:44:40] (step=0007613) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.4895323811387204, LR: 0.0003 +[2026-02-27 21:44:48] (step=0007614) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 1.489728037566034, LR: 0.0003 +[2026-02-27 21:44:56] (step=0007615) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.4899236939933478, LR: 0.0003 +[2026-02-27 21:45:04] (step=0007616) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 1.4901193504206613, LR: 0.0003 +[2026-02-27 21:45:12] (step=0007617) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.490315006847975, LR: 0.0003 +[2026-02-27 21:45:20] (step=0007618) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 1.4905106632752887, LR: 0.0003 +[2026-02-27 21:45:27] (step=0007619) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.4907063197026023, LR: 0.0003 +[2026-02-27 21:45:35] (step=0007620) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 1.4909019761299158, LR: 0.0003 +[2026-02-27 21:45:43] (step=0007621) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.4910976325572296, LR: 0.0003 +[2026-02-27 21:45:51] (step=0007622) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 1.4912932889845432, LR: 0.0003 +[2026-02-27 21:45:59] (step=0007623) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.4914889454118567, LR: 0.0003 +[2026-02-27 21:46:07] (step=0007624) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.4916846018391705, LR: 0.0003 +[2026-02-27 21:46:14] (step=0007625) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.491880258266484, LR: 0.0003 +[2026-02-27 21:46:22] (step=0007626) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.4920759146937976, LR: 0.0003 +[2026-02-27 21:46:30] (step=0007627) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 1.4922715711211114, LR: 0.0003 +[2026-02-27 21:46:38] (step=0007628) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.492467227548425, LR: 0.0003 +[2026-02-27 21:46:46] (step=0007629) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.4926628839757385, LR: 0.0003 +[2026-02-27 21:46:54] (step=0007630) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.4928585404030523, LR: 0.0003 +[2026-02-27 21:47:02] (step=0007631) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.4930541968303659, LR: 0.0003 +[2026-02-27 21:47:09] (step=0007632) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 1.4932498532576794, LR: 0.0003 +[2026-02-27 21:47:17] (step=0007633) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.4934455096849932, LR: 0.0003 +[2026-02-27 21:47:25] (step=0007634) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.4936411661123068, LR: 0.0003 +[2026-02-27 21:47:33] (step=0007635) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.4938368225396204, LR: 0.0003 +[2026-02-27 21:47:41] (step=0007636) Train Loss: 0.4638, Train Steps/Sec: 0.12, Epoch: 1.4940324789669341, LR: 0.0003 +[2026-02-27 21:47:49] (step=0007637) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.4942281353942477, LR: 0.0003 +[2026-02-27 21:47:57] (step=0007638) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.4944237918215613, LR: 0.0003 +[2026-02-27 21:48:04] (step=0007639) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 1.494619448248875, LR: 0.0003 +[2026-02-27 21:48:12] (step=0007640) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.4948151046761886, LR: 0.0003 +[2026-02-27 21:48:20] (step=0007641) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 1.4950107611035022, LR: 0.0003 +[2026-02-27 21:48:28] (step=0007642) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.495206417530816, LR: 0.0003 +[2026-02-27 21:48:36] (step=0007643) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 1.4954020739581295, LR: 0.0003 +[2026-02-27 21:48:44] (step=0007644) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.495597730385443, LR: 0.0003 +[2026-02-27 21:48:51] (step=0007645) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 1.4957933868127569, LR: 0.0003 +[2026-02-27 21:48:59] (step=0007646) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.4959890432400704, LR: 0.0003 +[2026-02-27 21:49:07] (step=0007647) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 1.496184699667384, LR: 0.0003 +[2026-02-27 21:49:15] (step=0007648) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.4963803560946978, LR: 0.0003 +[2026-02-27 21:49:23] (step=0007649) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.4965760125220113, LR: 0.0003 +[2026-02-27 21:49:31] (step=0007650) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 1.496771668949325, LR: 0.0003 +[2026-02-27 21:49:39] (step=0007651) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.4969673253766387, LR: 0.0003 +[2026-02-27 21:49:47] (step=0007652) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 1.4971629818039522, LR: 0.0003 +[2026-02-27 21:49:54] (step=0007653) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 1.4973586382312658, LR: 0.0003 +[2026-02-27 21:50:02] (step=0007654) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.4975542946585796, LR: 0.0003 +[2026-02-27 21:50:10] (step=0007655) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.4977499510858931, LR: 0.0003 +[2026-02-27 21:50:18] (step=0007656) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.4979456075132067, LR: 0.0003 +[2026-02-27 21:50:26] (step=0007657) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.4981412639405205, LR: 0.0003 +[2026-02-27 21:50:34] (step=0007658) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 1.498336920367834, LR: 0.0003 +[2026-02-27 21:50:41] (step=0007659) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.4985325767951476, LR: 0.0003 +[2026-02-27 21:50:49] (step=0007660) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 1.4987282332224614, LR: 0.0003 +[2026-02-27 21:50:57] (step=0007661) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.498923889649775, LR: 0.0003 +[2026-02-27 21:51:05] (step=0007662) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 1.4991195460770885, LR: 0.0003 +[2026-02-27 21:51:13] (step=0007663) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 1.4993152025044023, LR: 0.0003 +[2026-02-27 21:51:21] (step=0007664) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.4995108589317159, LR: 0.0003 +[2026-02-27 21:51:28] (step=0007665) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.4997065153590294, LR: 0.0003 +[2026-02-27 21:51:36] (step=0007666) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 1.4999021717863432, LR: 0.0003 +[2026-02-27 21:51:44] (step=0007667) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 1.5000978282136568, LR: 0.0003 +[2026-02-27 21:51:52] (step=0007668) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.5002934846409706, LR: 0.0003 +[2026-02-27 21:52:00] (step=0007669) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 1.5004891410682841, LR: 0.0003 +[2026-02-27 21:52:08] (step=0007670) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.5006847974955977, LR: 0.0003 +[2026-02-27 21:52:15] (step=0007671) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.5008804539229115, LR: 0.0003 +[2026-02-27 21:52:23] (step=0007672) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.501076110350225, LR: 0.0003 +[2026-02-27 21:52:31] (step=0007673) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.5012717667775386, LR: 0.0003 +[2026-02-27 21:52:39] (step=0007674) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.5014674232048524, LR: 0.0003 +[2026-02-27 21:52:47] (step=0007675) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.501663079632166, LR: 0.0003 +[2026-02-27 21:52:55] (step=0007676) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.5018587360594795, LR: 0.0003 +[2026-02-27 21:53:03] (step=0007677) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 1.5020543924867933, LR: 0.0003 +[2026-02-27 21:53:10] (step=0007678) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.5022500489141069, LR: 0.0003 +[2026-02-27 21:53:18] (step=0007679) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 1.5024457053414204, LR: 0.0003 +[2026-02-27 21:53:26] (step=0007680) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.5026413617687342, LR: 0.0003 +[2026-02-27 21:53:34] (step=0007681) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.5028370181960478, LR: 0.0003 +[2026-02-27 21:53:42] (step=0007682) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.5030326746233613, LR: 0.0003 +[2026-02-27 21:53:50] (step=0007683) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.503228331050675, LR: 0.0003 +[2026-02-27 21:53:57] (step=0007684) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.5034239874779887, LR: 0.0003 +[2026-02-27 21:54:05] (step=0007685) Train Loss: 0.4514, Train Steps/Sec: 0.12, Epoch: 1.5036196439053022, LR: 0.0003 +[2026-02-27 21:54:13] (step=0007686) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.503815300332616, LR: 0.0003 +[2026-02-27 21:54:21] (step=0007687) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.5040109567599296, LR: 0.0003 +[2026-02-27 21:54:29] (step=0007688) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.5042066131872431, LR: 0.0003 +[2026-02-27 21:54:37] (step=0007689) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.504402269614557, LR: 0.0003 +[2026-02-27 21:54:45] (step=0007690) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.5045979260418705, LR: 0.0003 +[2026-02-27 21:54:52] (step=0007691) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.504793582469184, LR: 0.0003 +[2026-02-27 21:55:00] (step=0007692) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.5049892388964978, LR: 0.0003 +[2026-02-27 21:55:08] (step=0007693) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 1.5051848953238114, LR: 0.0003 +[2026-02-27 21:55:16] (step=0007694) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 1.505380551751125, LR: 0.0003 +[2026-02-27 21:55:24] (step=0007695) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 1.5055762081784387, LR: 0.0003 +[2026-02-27 21:55:32] (step=0007696) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.5057718646057523, LR: 0.0003 +[2026-02-27 21:55:40] (step=0007697) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.5059675210330659, LR: 0.0003 +[2026-02-27 21:55:48] (step=0007698) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.5061631774603796, LR: 0.0003 +[2026-02-27 21:55:55] (step=0007699) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 1.5063588338876932, LR: 0.0003 +[2026-02-27 21:56:03] (step=0007700) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.5065544903150068, LR: 0.0003 +[2026-02-27 21:56:11] (step=0007701) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 1.5067501467423206, LR: 0.0003 +[2026-02-27 21:56:19] (step=0007702) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.5069458031696341, LR: 0.0003 +[2026-02-27 21:56:27] (step=0007703) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.5071414595969477, LR: 0.0003 +[2026-02-27 21:56:35] (step=0007704) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.5073371160242615, LR: 0.0003 +[2026-02-27 21:56:42] (step=0007705) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.507532772451575, LR: 0.0003 +[2026-02-27 21:56:50] (step=0007706) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 1.5077284288788886, LR: 0.0003 +[2026-02-27 21:56:58] (step=0007707) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.5079240853062024, LR: 0.0003 +[2026-02-27 21:57:06] (step=0007708) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.508119741733516, LR: 0.0003 +[2026-02-27 21:57:14] (step=0007709) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.5083153981608295, LR: 0.0003 +[2026-02-27 21:57:22] (step=0007710) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.5085110545881433, LR: 0.0003 +[2026-02-27 21:57:29] (step=0007711) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 1.5087067110154568, LR: 0.0003 +[2026-02-27 21:57:37] (step=0007712) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 1.5089023674427704, LR: 0.0003 +[2026-02-27 21:57:45] (step=0007713) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.5090980238700842, LR: 0.0003 +[2026-02-27 21:57:53] (step=0007714) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 1.5092936802973977, LR: 0.0003 +[2026-02-27 21:58:01] (step=0007715) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.5094893367247113, LR: 0.0003 +[2026-02-27 21:58:09] (step=0007716) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.509684993152025, LR: 0.0003 +[2026-02-27 21:58:16] (step=0007717) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 1.5098806495793387, LR: 0.0003 +[2026-02-27 21:58:24] (step=0007718) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.5100763060066522, LR: 0.0003 +[2026-02-27 21:58:32] (step=0007719) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 1.510271962433966, LR: 0.0003 +[2026-02-27 21:58:40] (step=0007720) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 1.5104676188612796, LR: 0.0003 +[2026-02-27 21:58:48] (step=0007721) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 1.5106632752885931, LR: 0.0003 +[2026-02-27 21:58:56] (step=0007722) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 1.510858931715907, LR: 0.0003 +[2026-02-27 21:59:03] (step=0007723) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.5110545881432205, LR: 0.0003 +[2026-02-27 21:59:11] (step=0007724) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 1.5112502445705343, LR: 0.0003 +[2026-02-27 21:59:19] (step=0007725) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.5114459009978478, LR: 0.0003 +[2026-02-27 21:59:27] (step=0007726) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 1.5116415574251614, LR: 0.0003 +[2026-02-27 21:59:35] (step=0007727) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.5118372138524752, LR: 0.0003 +[2026-02-27 21:59:43] (step=0007728) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.5120328702797887, LR: 0.0003 +[2026-02-27 21:59:51] (step=0007729) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.5122285267071023, LR: 0.0003 +[2026-02-27 21:59:58] (step=0007730) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.512424183134416, LR: 0.0003 +[2026-02-27 22:00:06] (step=0007731) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.5126198395617296, LR: 0.0003 +[2026-02-27 22:00:14] (step=0007732) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 1.5128154959890432, LR: 0.0003 +[2026-02-27 22:00:22] (step=0007733) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.513011152416357, LR: 0.0003 +[2026-02-27 22:00:30] (step=0007734) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.5132068088436705, LR: 0.0003 +[2026-02-27 22:00:38] (step=0007735) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.513402465270984, LR: 0.0003 +[2026-02-27 22:00:46] (step=0007736) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 1.5135981216982979, LR: 0.0003 +[2026-02-27 22:00:53] (step=0007737) Train Loss: 0.4727, Train Steps/Sec: 0.13, Epoch: 1.5137937781256114, LR: 0.0003 +[2026-02-27 22:01:01] (step=0007738) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 1.513989434552925, LR: 0.0003 +[2026-02-27 22:01:09] (step=0007739) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.5141850909802388, LR: 0.0003 +[2026-02-27 22:01:17] (step=0007740) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.5143807474075524, LR: 0.0003 +[2026-02-27 22:01:25] (step=0007741) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 1.514576403834866, LR: 0.0003 +[2026-02-27 22:01:33] (step=0007742) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.5147720602621797, LR: 0.0003 +[2026-02-27 22:01:41] (step=0007743) Train Loss: 0.4594, Train Steps/Sec: 0.12, Epoch: 1.5149677166894933, LR: 0.0003 +[2026-02-27 22:01:48] (step=0007744) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.5151633731168068, LR: 0.0003 +[2026-02-27 22:01:56] (step=0007745) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.5153590295441206, LR: 0.0003 +[2026-02-27 22:02:04] (step=0007746) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.5155546859714342, LR: 0.0003 +[2026-02-27 22:02:12] (step=0007747) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.5157503423987477, LR: 0.0003 +[2026-02-27 22:02:20] (step=0007748) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.5159459988260615, LR: 0.0003 +[2026-02-27 22:02:28] (step=0007749) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.516141655253375, LR: 0.0003 +[2026-02-27 22:02:36] (step=0007750) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.5163373116806886, LR: 0.0003 +[2026-02-27 22:02:43] (step=0007751) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.5165329681080024, LR: 0.0003 +[2026-02-27 22:02:51] (step=0007752) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.516728624535316, LR: 0.0003 +[2026-02-27 22:02:59] (step=0007753) Train Loss: 0.4706, Train Steps/Sec: 0.13, Epoch: 1.5169242809626295, LR: 0.0003 +[2026-02-27 22:03:07] (step=0007754) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 1.5171199373899433, LR: 0.0003 +[2026-02-27 22:03:15] (step=0007755) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.517315593817257, LR: 0.0003 +[2026-02-27 22:03:23] (step=0007756) Train Loss: 0.4716, Train Steps/Sec: 0.13, Epoch: 1.5175112502445705, LR: 0.0003 +[2026-02-27 22:03:30] (step=0007757) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 1.5177069066718842, LR: 0.0003 +[2026-02-27 22:03:38] (step=0007758) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.5179025630991978, LR: 0.0003 +[2026-02-27 22:03:46] (step=0007759) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 1.5180982195265114, LR: 0.0003 +[2026-02-27 22:03:54] (step=0007760) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.5182938759538251, LR: 0.0003 +[2026-02-27 22:04:02] (step=0007761) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 1.5184895323811387, LR: 0.0003 +[2026-02-27 22:04:10] (step=0007762) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.5186851888084523, LR: 0.0003 +[2026-02-27 22:04:18] (step=0007763) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 1.518880845235766, LR: 0.0003 +[2026-02-27 22:04:25] (step=0007764) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.5190765016630796, LR: 0.0003 +[2026-02-27 22:04:33] (step=0007765) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.5192721580903932, LR: 0.0003 +[2026-02-27 22:04:41] (step=0007766) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 1.519467814517707, LR: 0.0003 +[2026-02-27 22:04:49] (step=0007767) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 1.5196634709450205, LR: 0.0003 +[2026-02-27 22:04:57] (step=0007768) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 1.519859127372334, LR: 0.0003 +[2026-02-27 22:05:05] (step=0007769) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 1.5200547837996479, LR: 0.0003 +[2026-02-27 22:05:12] (step=0007770) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.5202504402269614, LR: 0.0003 +[2026-02-27 22:05:20] (step=0007771) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 1.520446096654275, LR: 0.0003 +[2026-02-27 22:05:28] (step=0007772) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.5206417530815888, LR: 0.0003 +[2026-02-27 22:05:36] (step=0007773) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 1.5208374095089023, LR: 0.0003 +[2026-02-27 22:05:44] (step=0007774) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 1.521033065936216, LR: 0.0003 +[2026-02-27 22:05:52] (step=0007775) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.5212287223635297, LR: 0.0003 +[2026-02-27 22:05:59] (step=0007776) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.5214243787908432, LR: 0.0003 +[2026-02-27 22:06:07] (step=0007777) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.5216200352181568, LR: 0.0003 +[2026-02-27 22:06:15] (step=0007778) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.5218156916454706, LR: 0.0003 +[2026-02-27 22:06:23] (step=0007779) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.5220113480727842, LR: 0.0003 +[2026-02-27 22:06:31] (step=0007780) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 1.522207004500098, LR: 0.0003 +[2026-02-27 22:06:39] (step=0007781) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.5224026609274115, LR: 0.0003 +[2026-02-27 22:06:47] (step=0007782) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 1.522598317354725, LR: 0.0003 +[2026-02-27 22:06:54] (step=0007783) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 1.5227939737820388, LR: 0.0003 +[2026-02-27 22:07:02] (step=0007784) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.5229896302093524, LR: 0.0003 +[2026-02-27 22:07:10] (step=0007785) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.523185286636666, LR: 0.0003 +[2026-02-27 22:07:18] (step=0007786) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 1.5233809430639798, LR: 0.0003 +[2026-02-27 22:07:26] (step=0007787) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.5235765994912933, LR: 0.0003 +[2026-02-27 22:07:34] (step=0007788) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.5237722559186069, LR: 0.0003 +[2026-02-27 22:07:42] (step=0007789) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 1.5239679123459207, LR: 0.0003 +[2026-02-27 22:07:49] (step=0007790) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.5241635687732342, LR: 0.0003 +[2026-02-27 22:07:57] (step=0007791) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.5243592252005478, LR: 0.0003 +[2026-02-27 22:08:05] (step=0007792) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 1.5245548816278616, LR: 0.0003 +[2026-02-27 22:08:13] (step=0007793) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 1.5247505380551751, LR: 0.0003 +[2026-02-27 22:08:21] (step=0007794) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 1.5249461944824887, LR: 0.0003 +[2026-02-27 22:08:29] (step=0007795) Train Loss: 0.4463, Train Steps/Sec: 0.12, Epoch: 1.5251418509098025, LR: 0.0003 +[2026-02-27 22:08:37] (step=0007796) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.525337507337116, LR: 0.0003 +[2026-02-27 22:08:45] (step=0007797) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.5255331637644296, LR: 0.0003 +[2026-02-27 22:08:52] (step=0007798) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.5257288201917434, LR: 0.0003 +[2026-02-27 22:09:00] (step=0007799) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.525924476619057, LR: 0.0003 +[2026-02-27 22:09:08] (step=0007800) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.5261201330463705, LR: 0.0003 +[2026-02-27 22:09:16] (step=0007801) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.5263157894736843, LR: 0.0003 +[2026-02-27 22:09:24] (step=0007802) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 1.5265114459009979, LR: 0.0003 +[2026-02-27 22:09:32] (step=0007803) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.5267071023283114, LR: 0.0003 +[2026-02-27 22:09:40] (step=0007804) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.5269027587556252, LR: 0.0003 +[2026-02-27 22:09:47] (step=0007805) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.5270984151829388, LR: 0.0003 +[2026-02-27 22:09:55] (step=0007806) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 1.5272940716102523, LR: 0.0003 +[2026-02-27 22:10:03] (step=0007807) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 1.527489728037566, LR: 0.0003 +[2026-02-27 22:10:11] (step=0007808) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.5276853844648797, LR: 0.0003 +[2026-02-27 22:10:19] (step=0007809) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.5278810408921932, LR: 0.0003 +[2026-02-27 22:10:27] (step=0007810) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.528076697319507, LR: 0.0003 +[2026-02-27 22:10:34] (step=0007811) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 1.5282723537468206, LR: 0.0003 +[2026-02-27 22:10:42] (step=0007812) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.5284680101741341, LR: 0.0003 +[2026-02-27 22:10:50] (step=0007813) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.528663666601448, LR: 0.0003 +[2026-02-27 22:10:58] (step=0007814) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.5288593230287615, LR: 0.0003 +[2026-02-27 22:11:06] (step=0007815) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.529054979456075, LR: 0.0003 +[2026-02-27 22:11:14] (step=0007816) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.5292506358833888, LR: 0.0003 +[2026-02-27 22:11:21] (step=0007817) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 1.5294462923107024, LR: 0.0003 +[2026-02-27 22:11:29] (step=0007818) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 1.529641948738016, LR: 0.0003 +[2026-02-27 22:11:37] (step=0007819) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.5298376051653297, LR: 0.0003 +[2026-02-27 22:11:45] (step=0007820) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 1.5300332615926433, LR: 0.0003 +[2026-02-27 22:11:53] (step=0007821) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 1.5302289180199569, LR: 0.0003 +[2026-02-27 22:12:01] (step=0007822) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.5304245744472706, LR: 0.0003 +[2026-02-27 22:12:09] (step=0007823) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.5306202308745842, LR: 0.0003 +[2026-02-27 22:12:16] (step=0007824) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.5308158873018978, LR: 0.0003 +[2026-02-27 22:12:24] (step=0007825) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.5310115437292116, LR: 0.0003 +[2026-02-27 22:12:32] (step=0007826) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.5312072001565251, LR: 0.0003 +[2026-02-27 22:12:40] (step=0007827) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.5314028565838387, LR: 0.0003 +[2026-02-27 22:12:48] (step=0007828) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 1.5315985130111525, LR: 0.0003 +[2026-02-27 22:12:56] (step=0007829) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.531794169438466, LR: 0.0003 +[2026-02-27 22:13:03] (step=0007830) Train Loss: 0.4708, Train Steps/Sec: 0.13, Epoch: 1.5319898258657796, LR: 0.0003 +[2026-02-27 22:13:11] (step=0007831) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.5321854822930934, LR: 0.0003 +[2026-02-27 22:13:19] (step=0007832) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 1.532381138720407, LR: 0.0003 +[2026-02-27 22:13:27] (step=0007833) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 1.5325767951477205, LR: 0.0003 +[2026-02-27 22:13:35] (step=0007834) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 1.5327724515750343, LR: 0.0003 +[2026-02-27 22:13:43] (step=0007835) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 1.5329681080023478, LR: 0.0003 +[2026-02-27 22:13:51] (step=0007836) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 1.5331637644296616, LR: 0.0003 +[2026-02-27 22:13:59] (step=0007837) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.5333594208569752, LR: 0.0003 +[2026-02-27 22:14:06] (step=0007838) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.5335550772842887, LR: 0.0003 +[2026-02-27 22:14:14] (step=0007839) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 1.5337507337116025, LR: 0.0003 +[2026-02-27 22:14:22] (step=0007840) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 1.533946390138916, LR: 0.0003 +[2026-02-27 22:14:30] (step=0007841) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.5341420465662297, LR: 0.0003 +[2026-02-27 22:14:38] (step=0007842) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.5343377029935434, LR: 0.0003 +[2026-02-27 22:14:46] (step=0007843) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.534533359420857, LR: 0.0003 +[2026-02-27 22:14:54] (step=0007844) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 1.5347290158481706, LR: 0.0003 +[2026-02-27 22:15:01] (step=0007845) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.5349246722754843, LR: 0.0003 +[2026-02-27 22:15:09] (step=0007846) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 1.535120328702798, LR: 0.0003 +[2026-02-27 22:15:17] (step=0007847) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.5353159851301115, LR: 0.0003 +[2026-02-27 22:15:25] (step=0007848) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 1.5355116415574253, LR: 0.0003 +[2026-02-27 22:15:33] (step=0007849) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.5357072979847388, LR: 0.0003 +[2026-02-27 22:15:41] (step=0007850) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.5359029544120524, LR: 0.0003 +[2026-02-27 22:15:48] (step=0007851) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.5360986108393662, LR: 0.0003 +[2026-02-27 22:15:56] (step=0007852) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.5362942672666797, LR: 0.0003 +[2026-02-27 22:16:04] (step=0007853) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.5364899236939933, LR: 0.0003 +[2026-02-27 22:16:12] (step=0007854) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 1.536685580121307, LR: 0.0003 +[2026-02-27 22:16:20] (step=0007855) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 1.5368812365486206, LR: 0.0003 +[2026-02-27 22:16:28] (step=0007856) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.5370768929759342, LR: 0.0003 +[2026-02-27 22:16:35] (step=0007857) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.537272549403248, LR: 0.0003 +[2026-02-27 22:16:43] (step=0007858) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 1.5374682058305615, LR: 0.0003 +[2026-02-27 22:16:51] (step=0007859) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.537663862257875, LR: 0.0003 +[2026-02-27 22:16:59] (step=0007860) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 1.5378595186851889, LR: 0.0003 +[2026-02-27 22:17:07] (step=0007861) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.5380551751125024, LR: 0.0003 +[2026-02-27 22:17:15] (step=0007862) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 1.538250831539816, LR: 0.0003 +[2026-02-27 22:17:23] (step=0007863) Train Loss: 0.4663, Train Steps/Sec: 0.13, Epoch: 1.5384464879671298, LR: 0.0003 +[2026-02-27 22:17:30] (step=0007864) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.5386421443944434, LR: 0.0003 +[2026-02-27 22:17:38] (step=0007865) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 1.538837800821757, LR: 0.0003 +[2026-02-27 22:17:46] (step=0007866) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.5390334572490707, LR: 0.0003 +[2026-02-27 22:17:54] (step=0007867) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.5392291136763843, LR: 0.0003 +[2026-02-27 22:18:02] (step=0007868) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 1.5394247701036978, LR: 0.0003 +[2026-02-27 22:18:10] (step=0007869) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.5396204265310116, LR: 0.0003 +[2026-02-27 22:18:17] (step=0007870) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.5398160829583252, LR: 0.0003 +[2026-02-27 22:18:25] (step=0007871) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.5400117393856387, LR: 0.0003 +[2026-02-27 22:18:33] (step=0007872) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.5402073958129525, LR: 0.0003 +[2026-02-27 22:18:41] (step=0007873) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.540403052240266, LR: 0.0003 +[2026-02-27 22:18:49] (step=0007874) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.5405987086675796, LR: 0.0003 +[2026-02-27 22:18:57] (step=0007875) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.5407943650948934, LR: 0.0003 +[2026-02-27 22:19:05] (step=0007876) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.540990021522207, LR: 0.0003 +[2026-02-27 22:19:12] (step=0007877) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.5411856779495205, LR: 0.0003 +[2026-02-27 22:19:20] (step=0007878) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.5413813343768343, LR: 0.0003 +[2026-02-27 22:19:28] (step=0007879) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 1.541576990804148, LR: 0.0003 +[2026-02-27 22:19:36] (step=0007880) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.5417726472314615, LR: 0.0003 +[2026-02-27 22:19:44] (step=0007881) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.5419683036587752, LR: 0.0003 +[2026-02-27 22:19:52] (step=0007882) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.5421639600860888, LR: 0.0003 +[2026-02-27 22:19:59] (step=0007883) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 1.5423596165134024, LR: 0.0003 +[2026-02-27 22:20:07] (step=0007884) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 1.5425552729407161, LR: 0.0003 +[2026-02-27 22:20:15] (step=0007885) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.5427509293680297, LR: 0.0003 +[2026-02-27 22:20:23] (step=0007886) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 1.5429465857953433, LR: 0.0003 +[2026-02-27 22:20:31] (step=0007887) Train Loss: 0.4495, Train Steps/Sec: 0.12, Epoch: 1.543142242222657, LR: 0.0003 +[2026-02-27 22:20:39] (step=0007888) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.5433378986499706, LR: 0.0003 +[2026-02-27 22:20:47] (step=0007889) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.5435335550772842, LR: 0.0003 +[2026-02-27 22:20:55] (step=0007890) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.543729211504598, LR: 0.0003 +[2026-02-27 22:21:03] (step=0007891) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.5439248679319115, LR: 0.0003 +[2026-02-27 22:21:10] (step=0007892) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.5441205243592253, LR: 0.0003 +[2026-02-27 22:21:18] (step=0007893) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.5443161807865389, LR: 0.0003 +[2026-02-27 22:21:26] (step=0007894) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 1.5445118372138524, LR: 0.0003 +[2026-02-27 22:21:34] (step=0007895) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 1.5447074936411662, LR: 0.0003 +[2026-02-27 22:21:42] (step=0007896) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.5449031500684798, LR: 0.0003 +[2026-02-27 22:21:50] (step=0007897) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 1.5450988064957933, LR: 0.0003 +[2026-02-27 22:21:57] (step=0007898) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.5452944629231071, LR: 0.0003 +[2026-02-27 22:22:05] (step=0007899) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 1.5454901193504207, LR: 0.0003 +[2026-02-27 22:22:13] (step=0007900) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.5456857757777342, LR: 0.0003 +[2026-02-27 22:22:21] (step=0007901) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.545881432205048, LR: 0.0003 +[2026-02-27 22:22:29] (step=0007902) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.5460770886323616, LR: 0.0003 +[2026-02-27 22:22:37] (step=0007903) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 1.5462727450596752, LR: 0.0003 +[2026-02-27 22:22:45] (step=0007904) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.546468401486989, LR: 0.0003 +[2026-02-27 22:22:52] (step=0007905) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.5466640579143025, LR: 0.0003 +[2026-02-27 22:23:00] (step=0007906) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.546859714341616, LR: 0.0003 +[2026-02-27 22:23:08] (step=0007907) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.5470553707689298, LR: 0.0003 +[2026-02-27 22:23:16] (step=0007908) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 1.5472510271962434, LR: 0.0003 +[2026-02-27 22:23:24] (step=0007909) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 1.547446683623557, LR: 0.0003 +[2026-02-27 22:23:32] (step=0007910) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.5476423400508708, LR: 0.0003 +[2026-02-27 22:23:39] (step=0007911) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.5478379964781843, LR: 0.0003 +[2026-02-27 22:23:47] (step=0007912) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.5480336529054979, LR: 0.0003 +[2026-02-27 22:23:55] (step=0007913) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.5482293093328117, LR: 0.0003 +[2026-02-27 22:24:03] (step=0007914) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 1.5484249657601252, LR: 0.0003 +[2026-02-27 22:24:11] (step=0007915) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.5486206221874388, LR: 0.0003 +[2026-02-27 22:24:19] (step=0007916) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 1.5488162786147526, LR: 0.0003 +[2026-02-27 22:24:27] (step=0007917) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.5490119350420661, LR: 0.0003 +[2026-02-27 22:24:34] (step=0007918) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.5492075914693797, LR: 0.0003 +[2026-02-27 22:24:42] (step=0007919) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.5494032478966935, LR: 0.0003 +[2026-02-27 22:24:50] (step=0007920) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.549598904324007, LR: 0.0003 +[2026-02-27 22:24:58] (step=0007921) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.5497945607513206, LR: 0.0003 +[2026-02-27 22:25:06] (step=0007922) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.5499902171786344, LR: 0.0003 +[2026-02-27 22:25:14] (step=0007923) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.550185873605948, LR: 0.0003 +[2026-02-27 22:25:21] (step=0007924) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.5503815300332615, LR: 0.0003 +[2026-02-27 22:25:29] (step=0007925) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.5505771864605753, LR: 0.0003 +[2026-02-27 22:25:37] (step=0007926) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.5507728428878889, LR: 0.0003 +[2026-02-27 22:25:45] (step=0007927) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 1.5509684993152024, LR: 0.0003 +[2026-02-27 22:25:53] (step=0007928) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.5511641557425162, LR: 0.0003 +[2026-02-27 22:26:01] (step=0007929) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 1.5513598121698298, LR: 0.0003 +[2026-02-27 22:26:08] (step=0007930) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 1.5515554685971433, LR: 0.0003 +[2026-02-27 22:26:16] (step=0007931) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.5517511250244571, LR: 0.0003 +[2026-02-27 22:26:24] (step=0007932) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.5519467814517707, LR: 0.0003 +[2026-02-27 22:26:32] (step=0007933) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.5521424378790842, LR: 0.0003 +[2026-02-27 22:26:40] (step=0007934) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 1.552338094306398, LR: 0.0003 +[2026-02-27 22:26:48] (step=0007935) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 1.5525337507337116, LR: 0.0003 +[2026-02-27 22:26:56] (step=0007936) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 1.5527294071610251, LR: 0.0003 +[2026-02-27 22:27:04] (step=0007937) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.552925063588339, LR: 0.0003 +[2026-02-27 22:27:12] (step=0007938) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 1.5531207200156525, LR: 0.0003 +[2026-02-27 22:27:19] (step=0007939) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.553316376442966, LR: 0.0003 +[2026-02-27 22:27:27] (step=0007940) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.5535120328702798, LR: 0.0003 +[2026-02-27 22:27:35] (step=0007941) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.5537076892975934, LR: 0.0003 +[2026-02-27 22:27:43] (step=0007942) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 1.553903345724907, LR: 0.0003 +[2026-02-27 22:27:51] (step=0007943) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.5540990021522207, LR: 0.0003 +[2026-02-27 22:27:59] (step=0007944) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 1.5542946585795343, LR: 0.0003 +[2026-02-27 22:28:06] (step=0007945) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 1.5544903150068479, LR: 0.0003 +[2026-02-27 22:28:14] (step=0007946) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.5546859714341617, LR: 0.0003 +[2026-02-27 22:28:22] (step=0007947) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.5548816278614752, LR: 0.0003 +[2026-02-27 22:28:30] (step=0007948) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 1.555077284288789, LR: 0.0003 +[2026-02-27 22:28:38] (step=0007949) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.5552729407161026, LR: 0.0003 +[2026-02-27 22:28:46] (step=0007950) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.5554685971434161, LR: 0.0003 +[2026-02-27 22:28:54] (step=0007951) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.55566425357073, LR: 0.0003 +[2026-02-27 22:29:01] (step=0007952) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 1.5558599099980435, LR: 0.0003 +[2026-02-27 22:29:09] (step=0007953) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.556055566425357, LR: 0.0003 +[2026-02-27 22:29:17] (step=0007954) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 1.5562512228526708, LR: 0.0003 +[2026-02-27 22:29:25] (step=0007955) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 1.5564468792799844, LR: 0.0003 +[2026-02-27 22:29:33] (step=0007956) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.556642535707298, LR: 0.0003 +[2026-02-27 22:29:41] (step=0007957) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 1.5568381921346117, LR: 0.0003 +[2026-02-27 22:29:48] (step=0007958) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.5570338485619253, LR: 0.0003 +[2026-02-27 22:29:56] (step=0007959) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.5572295049892388, LR: 0.0003 +[2026-02-27 22:30:04] (step=0007960) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 1.5574251614165526, LR: 0.0003 +[2026-02-27 22:30:12] (step=0007961) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.5576208178438662, LR: 0.0003 +[2026-02-27 22:30:20] (step=0007962) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.5578164742711798, LR: 0.0003 +[2026-02-27 22:30:28] (step=0007963) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.5580121306984935, LR: 0.0003 +[2026-02-27 22:30:36] (step=0007964) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.558207787125807, LR: 0.0003 +[2026-02-27 22:30:43] (step=0007965) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.5584034435531207, LR: 0.0003 +[2026-02-27 22:30:51] (step=0007966) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.5585990999804344, LR: 0.0003 +[2026-02-27 22:30:59] (step=0007967) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.558794756407748, LR: 0.0003 +[2026-02-27 22:31:07] (step=0007968) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.5589904128350616, LR: 0.0003 +[2026-02-27 22:31:15] (step=0007969) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.5591860692623754, LR: 0.0003 +[2026-02-27 22:31:23] (step=0007970) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.559381725689689, LR: 0.0003 +[2026-02-27 22:31:31] (step=0007971) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 1.5595773821170025, LR: 0.0003 +[2026-02-27 22:31:38] (step=0007972) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 1.5597730385443163, LR: 0.0003 +[2026-02-27 22:31:46] (step=0007973) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 1.5599686949716298, LR: 0.0003 +[2026-02-27 22:31:54] (step=0007974) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 1.5601643513989434, LR: 0.0003 +[2026-02-27 22:32:02] (step=0007975) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 1.5603600078262572, LR: 0.0003 +[2026-02-27 22:32:10] (step=0007976) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 1.5605556642535707, LR: 0.0003 +[2026-02-27 22:32:18] (step=0007977) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.5607513206808843, LR: 0.0003 +[2026-02-27 22:32:25] (step=0007978) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.560946977108198, LR: 0.0003 +[2026-02-27 22:32:33] (step=0007979) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 1.5611426335355116, LR: 0.0003 +[2026-02-27 22:32:41] (step=0007980) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.5613382899628252, LR: 0.0003 +[2026-02-27 22:32:49] (step=0007981) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.561533946390139, LR: 0.0003 +[2026-02-27 22:32:57] (step=0007982) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.5617296028174525, LR: 0.0003 +[2026-02-27 22:33:05] (step=0007983) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 1.561925259244766, LR: 0.0003 +[2026-02-27 22:33:13] (step=0007984) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 1.56212091567208, LR: 0.0003 +[2026-02-27 22:33:20] (step=0007985) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.5623165720993935, LR: 0.0003 +[2026-02-27 22:33:28] (step=0007986) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.562512228526707, LR: 0.0003 +[2026-02-27 22:33:36] (step=0007987) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.5627078849540208, LR: 0.0003 +[2026-02-27 22:33:44] (step=0007988) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 1.5629035413813344, LR: 0.0003 +[2026-02-27 22:33:52] (step=0007989) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.563099197808648, LR: 0.0003 +[2026-02-27 22:34:00] (step=0007990) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.5632948542359617, LR: 0.0003 +[2026-02-27 22:34:08] (step=0007991) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.5634905106632753, LR: 0.0003 +[2026-02-27 22:34:16] (step=0007992) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.5636861670905888, LR: 0.0003 +[2026-02-27 22:34:23] (step=0007993) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.5638818235179026, LR: 0.0003 +[2026-02-27 22:34:31] (step=0007994) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.5640774799452162, LR: 0.0003 +[2026-02-27 22:34:39] (step=0007995) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 1.5642731363725297, LR: 0.0003 +[2026-02-27 22:34:47] (step=0007996) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.5644687927998435, LR: 0.0003 +[2026-02-27 22:34:55] (step=0007997) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 1.564664449227157, LR: 0.0003 +[2026-02-27 22:35:03] (step=0007998) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.5648601056544706, LR: 0.0003 +[2026-02-27 22:35:10] (step=0007999) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.5650557620817844, LR: 0.0003 +[2026-02-27 22:35:18] (step=0008000) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 1.565251418509098, LR: 0.0003 +[2026-02-27 22:35:18] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0008000/ +[2026-02-27 22:35:26] (step=0008001) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 1.5654470749364116, LR: 0.0003 +[2026-02-27 22:35:34] (step=0008002) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.5656427313637253, LR: 0.0003 +[2026-02-27 22:35:42] (step=0008003) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.565838387791039, LR: 0.0003 +[2026-02-27 22:35:50] (step=0008004) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 1.5660340442183527, LR: 0.0003 +[2026-02-27 22:35:57] (step=0008005) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.5662297006456662, LR: 0.0003 +[2026-02-27 22:36:05] (step=0008006) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.5664253570729798, LR: 0.0003 +[2026-02-27 22:36:13] (step=0008007) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.5666210135002936, LR: 0.0003 +[2026-02-27 22:36:21] (step=0008008) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.5668166699276072, LR: 0.0003 +[2026-02-27 22:36:29] (step=0008009) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.5670123263549207, LR: 0.0003 +[2026-02-27 22:36:37] (step=0008010) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.5672079827822345, LR: 0.0003 +[2026-02-27 22:36:45] (step=0008011) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.567403639209548, LR: 0.0003 +[2026-02-27 22:36:52] (step=0008012) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.5675992956368616, LR: 0.0003 +[2026-02-27 22:37:00] (step=0008013) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 1.5677949520641754, LR: 0.0003 +[2026-02-27 22:37:08] (step=0008014) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.567990608491489, LR: 0.0003 +[2026-02-27 22:37:16] (step=0008015) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 1.5681862649188025, LR: 0.0003 +[2026-02-27 22:37:24] (step=0008016) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.5683819213461163, LR: 0.0003 +[2026-02-27 22:37:32] (step=0008017) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.5685775777734299, LR: 0.0003 +[2026-02-27 22:37:40] (step=0008018) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.5687732342007434, LR: 0.0003 +[2026-02-27 22:37:47] (step=0008019) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.5689688906280572, LR: 0.0003 +[2026-02-27 22:37:55] (step=0008020) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 1.5691645470553708, LR: 0.0003 +[2026-02-27 22:38:03] (step=0008021) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.5693602034826843, LR: 0.0003 +[2026-02-27 22:38:11] (step=0008022) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.5695558599099981, LR: 0.0003 +[2026-02-27 22:38:19] (step=0008023) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 1.5697515163373117, LR: 0.0003 +[2026-02-27 22:38:27] (step=0008024) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.5699471727646253, LR: 0.0003 +[2026-02-27 22:38:34] (step=0008025) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 1.570142829191939, LR: 0.0003 +[2026-02-27 22:38:42] (step=0008026) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.5703384856192526, LR: 0.0003 +[2026-02-27 22:38:50] (step=0008027) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.5705341420465662, LR: 0.0003 +[2026-02-27 22:38:58] (step=0008028) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 1.57072979847388, LR: 0.0003 +[2026-02-27 22:39:06] (step=0008029) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.5709254549011935, LR: 0.0003 +[2026-02-27 22:39:14] (step=0008030) Train Loss: 0.4548, Train Steps/Sec: 0.12, Epoch: 1.571121111328507, LR: 0.0003 +[2026-02-27 22:39:22] (step=0008031) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.5713167677558209, LR: 0.0003 +[2026-02-27 22:39:30] (step=0008032) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 1.5715124241831344, LR: 0.0003 +[2026-02-27 22:39:37] (step=0008033) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 1.571708080610448, LR: 0.0003 +[2026-02-27 22:39:45] (step=0008034) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.5719037370377618, LR: 0.0003 +[2026-02-27 22:39:53] (step=0008035) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.5720993934650753, LR: 0.0003 +[2026-02-27 22:40:01] (step=0008036) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 1.5722950498923889, LR: 0.0003 +[2026-02-27 22:40:09] (step=0008037) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 1.5724907063197027, LR: 0.0003 +[2026-02-27 22:40:17] (step=0008038) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.5726863627470162, LR: 0.0003 +[2026-02-27 22:40:25] (step=0008039) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.5728820191743298, LR: 0.0003 +[2026-02-27 22:40:32] (step=0008040) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.5730776756016436, LR: 0.0003 +[2026-02-27 22:40:40] (step=0008041) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 1.5732733320289571, LR: 0.0003 +[2026-02-27 22:40:48] (step=0008042) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.5734689884562707, LR: 0.0003 +[2026-02-27 22:40:56] (step=0008043) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.5736646448835845, LR: 0.0003 +[2026-02-27 22:41:04] (step=0008044) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 1.573860301310898, LR: 0.0003 +[2026-02-27 22:41:12] (step=0008045) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 1.5740559577382116, LR: 0.0003 +[2026-02-27 22:41:19] (step=0008046) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.5742516141655254, LR: 0.0003 +[2026-02-27 22:41:27] (step=0008047) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.574447270592839, LR: 0.0003 +[2026-02-27 22:41:35] (step=0008048) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 1.5746429270201525, LR: 0.0003 +[2026-02-27 22:41:43] (step=0008049) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 1.5748385834474663, LR: 0.0003 +[2026-02-27 22:41:51] (step=0008050) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.5750342398747799, LR: 0.0003 +[2026-02-27 22:41:59] (step=0008051) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 1.5752298963020934, LR: 0.0003 +[2026-02-27 22:42:07] (step=0008052) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.5754255527294072, LR: 0.0003 +[2026-02-27 22:42:14] (step=0008053) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.5756212091567208, LR: 0.0003 +[2026-02-27 22:42:22] (step=0008054) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 1.5758168655840343, LR: 0.0003 +[2026-02-27 22:42:30] (step=0008055) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.5760125220113481, LR: 0.0003 +[2026-02-27 22:42:38] (step=0008056) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.5762081784386617, LR: 0.0003 +[2026-02-27 22:42:46] (step=0008057) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.5764038348659752, LR: 0.0003 +[2026-02-27 22:42:54] (step=0008058) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.576599491293289, LR: 0.0003 +[2026-02-27 22:43:02] (step=0008059) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.5767951477206026, LR: 0.0003 +[2026-02-27 22:43:09] (step=0008060) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.5769908041479164, LR: 0.0003 +[2026-02-27 22:43:17] (step=0008061) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.57718646057523, LR: 0.0003 +[2026-02-27 22:43:25] (step=0008062) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.5773821170025435, LR: 0.0003 +[2026-02-27 22:43:33] (step=0008063) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.5775777734298573, LR: 0.0003 +[2026-02-27 22:43:41] (step=0008064) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 1.5777734298571708, LR: 0.0003 +[2026-02-27 22:43:49] (step=0008065) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.5779690862844844, LR: 0.0003 +[2026-02-27 22:43:56] (step=0008066) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 1.5781647427117982, LR: 0.0003 +[2026-02-27 22:44:04] (step=0008067) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 1.5783603991391117, LR: 0.0003 +[2026-02-27 22:44:12] (step=0008068) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.5785560555664253, LR: 0.0003 +[2026-02-27 22:44:20] (step=0008069) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 1.578751711993739, LR: 0.0003 +[2026-02-27 22:44:28] (step=0008070) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.5789473684210527, LR: 0.0003 +[2026-02-27 22:44:36] (step=0008071) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.5791430248483662, LR: 0.0003 +[2026-02-27 22:44:43] (step=0008072) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.57933868127568, LR: 0.0003 +[2026-02-27 22:44:51] (step=0008073) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.5795343377029936, LR: 0.0003 +[2026-02-27 22:44:59] (step=0008074) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 1.5797299941303071, LR: 0.0003 +[2026-02-27 22:45:07] (step=0008075) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 1.579925650557621, LR: 0.0003 +[2026-02-27 22:45:15] (step=0008076) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.5801213069849345, LR: 0.0003 +[2026-02-27 22:45:23] (step=0008077) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 1.580316963412248, LR: 0.0003 +[2026-02-27 22:45:31] (step=0008078) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 1.5805126198395618, LR: 0.0003 +[2026-02-27 22:45:39] (step=0008079) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 1.5807082762668754, LR: 0.0003 +[2026-02-27 22:45:46] (step=0008080) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.580903932694189, LR: 0.0003 +[2026-02-27 22:45:54] (step=0008081) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.5810995891215027, LR: 0.0003 +[2026-02-27 22:46:02] (step=0008082) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.5812952455488163, LR: 0.0003 +[2026-02-27 22:46:10] (step=0008083) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 1.5814909019761298, LR: 0.0003 +[2026-02-27 22:46:18] (step=0008084) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 1.5816865584034436, LR: 0.0003 +[2026-02-27 22:46:26] (step=0008085) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 1.5818822148307572, LR: 0.0003 +[2026-02-27 22:46:33] (step=0008086) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 1.5820778712580708, LR: 0.0003 +[2026-02-27 22:46:41] (step=0008087) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.5822735276853845, LR: 0.0003 +[2026-02-27 22:46:49] (step=0008088) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.582469184112698, LR: 0.0003 +[2026-02-27 22:46:57] (step=0008089) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.5826648405400117, LR: 0.0003 +[2026-02-27 22:47:05] (step=0008090) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 1.5828604969673254, LR: 0.0003 +[2026-02-27 22:47:13] (step=0008091) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.583056153394639, LR: 0.0003 +[2026-02-27 22:47:21] (step=0008092) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.5832518098219526, LR: 0.0003 +[2026-02-27 22:47:28] (step=0008093) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.5834474662492664, LR: 0.0003 +[2026-02-27 22:47:36] (step=0008094) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.58364312267658, LR: 0.0003 +[2026-02-27 22:47:44] (step=0008095) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.5838387791038935, LR: 0.0003 +[2026-02-27 22:47:52] (step=0008096) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.5840344355312073, LR: 0.0003 +[2026-02-27 22:48:00] (step=0008097) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 1.5842300919585208, LR: 0.0003 +[2026-02-27 22:48:08] (step=0008098) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 1.5844257483858344, LR: 0.0003 +[2026-02-27 22:48:16] (step=0008099) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.5846214048131482, LR: 0.0003 +[2026-02-27 22:48:23] (step=0008100) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.5848170612404617, LR: 0.0003 +[2026-02-27 22:48:31] (step=0008101) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.5850127176677753, LR: 0.0003 +[2026-02-27 22:48:39] (step=0008102) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 1.585208374095089, LR: 0.0003 +[2026-02-27 22:48:47] (step=0008103) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.5854040305224026, LR: 0.0003 +[2026-02-27 22:48:55] (step=0008104) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.5855996869497162, LR: 0.0003 +[2026-02-27 22:49:03] (step=0008105) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.58579534337703, LR: 0.0003 +[2026-02-27 22:49:10] (step=0008106) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.5859909998043435, LR: 0.0003 +[2026-02-27 22:49:18] (step=0008107) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.586186656231657, LR: 0.0003 +[2026-02-27 22:49:26] (step=0008108) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 1.586382312658971, LR: 0.0003 +[2026-02-27 22:49:34] (step=0008109) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.5865779690862845, LR: 0.0003 +[2026-02-27 22:49:42] (step=0008110) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 1.586773625513598, LR: 0.0003 +[2026-02-27 22:49:50] (step=0008111) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.5869692819409118, LR: 0.0003 +[2026-02-27 22:49:58] (step=0008112) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 1.5871649383682254, LR: 0.0003 +[2026-02-27 22:50:05] (step=0008113) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.587360594795539, LR: 0.0003 +[2026-02-27 22:50:13] (step=0008114) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.5875562512228527, LR: 0.0003 +[2026-02-27 22:50:21] (step=0008115) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 1.5877519076501663, LR: 0.0003 +[2026-02-27 22:50:29] (step=0008116) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 1.58794756407748, LR: 0.0003 +[2026-02-27 22:50:37] (step=0008117) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.5881432205047936, LR: 0.0003 +[2026-02-27 22:50:45] (step=0008118) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 1.5883388769321072, LR: 0.0003 +[2026-02-27 22:50:52] (step=0008119) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.588534533359421, LR: 0.0003 +[2026-02-27 22:51:00] (step=0008120) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.5887301897867345, LR: 0.0003 +[2026-02-27 22:51:08] (step=0008121) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 1.588925846214048, LR: 0.0003 +[2026-02-27 22:51:16] (step=0008122) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 1.5891215026413619, LR: 0.0003 +[2026-02-27 22:51:24] (step=0008123) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.5893171590686754, LR: 0.0003 +[2026-02-27 22:51:32] (step=0008124) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 1.589512815495989, LR: 0.0003 +[2026-02-27 22:51:39] (step=0008125) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.5897084719233028, LR: 0.0003 +[2026-02-27 22:51:47] (step=0008126) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.5899041283506163, LR: 0.0003 +[2026-02-27 22:51:55] (step=0008127) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.59009978477793, LR: 0.0003 +[2026-02-27 22:52:03] (step=0008128) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.5902954412052437, LR: 0.0003 +[2026-02-27 22:52:11] (step=0008129) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.5904910976325572, LR: 0.0003 +[2026-02-27 22:52:19] (step=0008130) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 1.5906867540598708, LR: 0.0003 +[2026-02-27 22:52:27] (step=0008131) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 1.5908824104871846, LR: 0.0003 +[2026-02-27 22:52:35] (step=0008132) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 1.5910780669144982, LR: 0.0003 +[2026-02-27 22:52:42] (step=0008133) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 1.5912737233418117, LR: 0.0003 +[2026-02-27 22:52:50] (step=0008134) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 1.5914693797691255, LR: 0.0003 +[2026-02-27 22:52:58] (step=0008135) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.591665036196439, LR: 0.0003 +[2026-02-27 22:53:06] (step=0008136) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.5918606926237526, LR: 0.0003 +[2026-02-27 22:53:14] (step=0008137) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 1.5920563490510664, LR: 0.0003 +[2026-02-27 22:53:22] (step=0008138) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.59225200547838, LR: 0.0003 +[2026-02-27 22:53:30] (step=0008139) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.5924476619056935, LR: 0.0003 +[2026-02-27 22:53:37] (step=0008140) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.5926433183330073, LR: 0.0003 +[2026-02-27 22:53:45] (step=0008141) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 1.5928389747603209, LR: 0.0003 +[2026-02-27 22:53:53] (step=0008142) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.5930346311876344, LR: 0.0003 +[2026-02-27 22:54:01] (step=0008143) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.5932302876149482, LR: 0.0003 +[2026-02-27 22:54:09] (step=0008144) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.5934259440422618, LR: 0.0003 +[2026-02-27 22:54:17] (step=0008145) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 1.5936216004695754, LR: 0.0003 +[2026-02-27 22:54:24] (step=0008146) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 1.5938172568968891, LR: 0.0003 +[2026-02-27 22:54:32] (step=0008147) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.5940129133242027, LR: 0.0003 +[2026-02-27 22:54:40] (step=0008148) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 1.5942085697515163, LR: 0.0003 +[2026-02-27 22:54:48] (step=0008149) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 1.59440422617883, LR: 0.0003 +[2026-02-27 22:54:56] (step=0008150) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 1.5945998826061436, LR: 0.0003 +[2026-02-27 22:55:04] (step=0008151) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.5947955390334572, LR: 0.0003 +[2026-02-27 22:55:12] (step=0008152) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.594991195460771, LR: 0.0003 +[2026-02-27 22:55:19] (step=0008153) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.5951868518880845, LR: 0.0003 +[2026-02-27 22:55:27] (step=0008154) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.595382508315398, LR: 0.0003 +[2026-02-27 22:55:35] (step=0008155) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 1.5955781647427119, LR: 0.0003 +[2026-02-27 22:55:43] (step=0008156) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.5957738211700254, LR: 0.0003 +[2026-02-27 22:55:51] (step=0008157) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 1.595969477597339, LR: 0.0003 +[2026-02-27 22:55:59] (step=0008158) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.5961651340246528, LR: 0.0003 +[2026-02-27 22:56:06] (step=0008159) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 1.5963607904519663, LR: 0.0003 +[2026-02-27 22:56:14] (step=0008160) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 1.59655644687928, LR: 0.0003 +[2026-02-27 22:56:22] (step=0008161) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.5967521033065937, LR: 0.0003 +[2026-02-27 22:56:30] (step=0008162) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 1.5969477597339072, LR: 0.0003 +[2026-02-27 22:56:38] (step=0008163) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.5971434161612208, LR: 0.0003 +[2026-02-27 22:56:46] (step=0008164) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.5973390725885346, LR: 0.0003 +[2026-02-27 22:56:53] (step=0008165) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 1.5975347290158481, LR: 0.0003 +[2026-02-27 22:57:01] (step=0008166) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.5977303854431617, LR: 0.0003 +[2026-02-27 22:57:09] (step=0008167) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 1.5979260418704755, LR: 0.0003 +[2026-02-27 22:57:17] (step=0008168) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.598121698297789, LR: 0.0003 +[2026-02-27 22:57:25] (step=0008169) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 1.5983173547251026, LR: 0.0003 +[2026-02-27 22:57:33] (step=0008170) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.5985130111524164, LR: 0.0003 +[2026-02-27 22:57:41] (step=0008171) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.59870866757973, LR: 0.0003 +[2026-02-27 22:57:48] (step=0008172) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.5989043240070437, LR: 0.0003 +[2026-02-27 22:57:56] (step=0008173) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 1.5990999804343573, LR: 0.0003 +[2026-02-27 22:58:04] (step=0008174) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.5992956368616709, LR: 0.0003 +[2026-02-27 22:58:12] (step=0008175) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.5994912932889847, LR: 0.0003 +[2026-02-27 22:58:20] (step=0008176) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.5996869497162982, LR: 0.0003 +[2026-02-27 22:58:28] (step=0008177) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 1.5998826061436118, LR: 0.0003 +[2026-02-27 22:58:35] (step=0008178) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.6000782625709256, LR: 0.0003 +[2026-02-27 22:58:43] (step=0008179) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.6002739189982391, LR: 0.0003 +[2026-02-27 22:58:51] (step=0008180) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.6004695754255527, LR: 0.0003 +[2026-02-27 22:58:59] (step=0008181) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 1.6006652318528665, LR: 0.0003 +[2026-02-27 22:59:07] (step=0008182) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.60086088828018, LR: 0.0003 +[2026-02-27 22:59:15] (step=0008183) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.6010565447074936, LR: 0.0003 +[2026-02-27 22:59:23] (step=0008184) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 1.6012522011348074, LR: 0.0003 +[2026-02-27 22:59:30] (step=0008185) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 1.601447857562121, LR: 0.0003 +[2026-02-27 22:59:38] (step=0008186) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.6016435139894345, LR: 0.0003 +[2026-02-27 22:59:46] (step=0008187) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.6018391704167483, LR: 0.0003 +[2026-02-27 22:59:54] (step=0008188) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 1.6020348268440618, LR: 0.0003 +[2026-02-27 23:00:02] (step=0008189) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.6022304832713754, LR: 0.0003 +[2026-02-27 23:00:10] (step=0008190) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.6024261396986892, LR: 0.0003 +[2026-02-27 23:00:18] (step=0008191) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.6026217961260028, LR: 0.0003 +[2026-02-27 23:00:25] (step=0008192) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.6028174525533163, LR: 0.0003 +[2026-02-27 23:00:33] (step=0008193) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 1.60301310898063, LR: 0.0003 +[2026-02-27 23:00:41] (step=0008194) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.6032087654079437, LR: 0.0003 +[2026-02-27 23:00:49] (step=0008195) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.6034044218352572, LR: 0.0003 +[2026-02-27 23:00:57] (step=0008196) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.603600078262571, LR: 0.0003 +[2026-02-27 23:01:05] (step=0008197) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.6037957346898846, LR: 0.0003 +[2026-02-27 23:01:13] (step=0008198) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 1.6039913911171981, LR: 0.0003 +[2026-02-27 23:01:20] (step=0008199) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.604187047544512, LR: 0.0003 +[2026-02-27 23:01:28] (step=0008200) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.6043827039718255, LR: 0.0003 +[2026-02-27 23:01:36] (step=0008201) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 1.604578360399139, LR: 0.0003 +[2026-02-27 23:01:44] (step=0008202) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 1.6047740168264528, LR: 0.0003 +[2026-02-27 23:01:52] (step=0008203) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.6049696732537664, LR: 0.0003 +[2026-02-27 23:02:00] (step=0008204) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.60516532968108, LR: 0.0003 +[2026-02-27 23:02:07] (step=0008205) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 1.6053609861083937, LR: 0.0003 +[2026-02-27 23:02:15] (step=0008206) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.6055566425357073, LR: 0.0003 +[2026-02-27 23:02:23] (step=0008207) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 1.6057522989630209, LR: 0.0003 +[2026-02-27 23:02:31] (step=0008208) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.6059479553903346, LR: 0.0003 +[2026-02-27 23:02:39] (step=0008209) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.6061436118176482, LR: 0.0003 +[2026-02-27 23:02:47] (step=0008210) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.6063392682449618, LR: 0.0003 +[2026-02-27 23:02:54] (step=0008211) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.6065349246722755, LR: 0.0003 +[2026-02-27 23:03:02] (step=0008212) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 1.606730581099589, LR: 0.0003 +[2026-02-27 23:03:10] (step=0008213) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 1.6069262375269027, LR: 0.0003 +[2026-02-27 23:03:18] (step=0008214) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.6071218939542165, LR: 0.0003 +[2026-02-27 23:03:26] (step=0008215) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.60731755038153, LR: 0.0003 +[2026-02-27 23:03:34] (step=0008216) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 1.6075132068088436, LR: 0.0003 +[2026-02-27 23:03:42] (step=0008217) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.6077088632361574, LR: 0.0003 +[2026-02-27 23:03:49] (step=0008218) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 1.607904519663471, LR: 0.0003 +[2026-02-27 23:03:57] (step=0008219) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.6081001760907845, LR: 0.0003 +[2026-02-27 23:04:05] (step=0008220) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 1.6082958325180983, LR: 0.0003 +[2026-02-27 23:04:13] (step=0008221) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.6084914889454118, LR: 0.0003 +[2026-02-27 23:04:21] (step=0008222) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.6086871453727254, LR: 0.0003 +[2026-02-27 23:04:29] (step=0008223) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.6088828018000392, LR: 0.0003 +[2026-02-27 23:04:36] (step=0008224) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.6090784582273527, LR: 0.0003 +[2026-02-27 23:04:44] (step=0008225) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.6092741146546663, LR: 0.0003 +[2026-02-27 23:04:52] (step=0008226) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.60946977108198, LR: 0.0003 +[2026-02-27 23:05:00] (step=0008227) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 1.6096654275092936, LR: 0.0003 +[2026-02-27 23:05:08] (step=0008228) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.6098610839366072, LR: 0.0003 +[2026-02-27 23:05:16] (step=0008229) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.610056740363921, LR: 0.0003 +[2026-02-27 23:05:24] (step=0008230) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.6102523967912346, LR: 0.0003 +[2026-02-27 23:05:31] (step=0008231) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 1.6104480532185483, LR: 0.0003 +[2026-02-27 23:05:39] (step=0008232) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.610643709645862, LR: 0.0003 +[2026-02-27 23:05:47] (step=0008233) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.6108393660731755, LR: 0.0003 +[2026-02-27 23:05:55] (step=0008234) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 1.6110350225004892, LR: 0.0003 +[2026-02-27 23:06:03] (step=0008235) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 1.6112306789278028, LR: 0.0003 +[2026-02-27 23:06:11] (step=0008236) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.6114263353551164, LR: 0.0003 +[2026-02-27 23:06:19] (step=0008237) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 1.6116219917824302, LR: 0.0003 +[2026-02-27 23:06:27] (step=0008238) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.6118176482097437, LR: 0.0003 +[2026-02-27 23:06:34] (step=0008239) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.6120133046370573, LR: 0.0003 +[2026-02-27 23:06:42] (step=0008240) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 1.612208961064371, LR: 0.0003 +[2026-02-27 23:06:50] (step=0008241) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.6124046174916846, LR: 0.0003 +[2026-02-27 23:06:58] (step=0008242) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 1.6126002739189982, LR: 0.0003 +[2026-02-27 23:07:06] (step=0008243) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 1.612795930346312, LR: 0.0003 +[2026-02-27 23:07:14] (step=0008244) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 1.6129915867736255, LR: 0.0003 +[2026-02-27 23:07:22] (step=0008245) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 1.613187243200939, LR: 0.0003 +[2026-02-27 23:07:29] (step=0008246) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.6133828996282529, LR: 0.0003 +[2026-02-27 23:07:37] (step=0008247) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.6135785560555664, LR: 0.0003 +[2026-02-27 23:07:45] (step=0008248) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.61377421248288, LR: 0.0003 +[2026-02-27 23:07:53] (step=0008249) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 1.6139698689101938, LR: 0.0003 +[2026-02-27 23:08:01] (step=0008250) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.6141655253375073, LR: 0.0003 +[2026-02-27 23:08:09] (step=0008251) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.614361181764821, LR: 0.0003 +[2026-02-27 23:08:16] (step=0008252) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.6145568381921347, LR: 0.0003 +[2026-02-27 23:08:24] (step=0008253) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.6147524946194483, LR: 0.0003 +[2026-02-27 23:08:32] (step=0008254) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 1.6149481510467618, LR: 0.0003 +[2026-02-27 23:08:40] (step=0008255) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.6151438074740756, LR: 0.0003 +[2026-02-27 23:08:48] (step=0008256) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.6153394639013892, LR: 0.0003 +[2026-02-27 23:08:56] (step=0008257) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.6155351203287027, LR: 0.0003 +[2026-02-27 23:09:04] (step=0008258) Train Loss: 0.4748, Train Steps/Sec: 0.13, Epoch: 1.6157307767560165, LR: 0.0003 +[2026-02-27 23:09:11] (step=0008259) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.61592643318333, LR: 0.0003 +[2026-02-27 23:09:19] (step=0008260) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 1.6161220896106436, LR: 0.0003 +[2026-02-27 23:09:27] (step=0008261) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.6163177460379574, LR: 0.0003 +[2026-02-27 23:09:35] (step=0008262) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.616513402465271, LR: 0.0003 +[2026-02-27 23:09:43] (step=0008263) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 1.6167090588925845, LR: 0.0003 +[2026-02-27 23:09:51] (step=0008264) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 1.6169047153198983, LR: 0.0003 +[2026-02-27 23:09:58] (step=0008265) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 1.6171003717472119, LR: 0.0003 +[2026-02-27 23:10:06] (step=0008266) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 1.6172960281745254, LR: 0.0003 +[2026-02-27 23:10:14] (step=0008267) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 1.6174916846018392, LR: 0.0003 +[2026-02-27 23:10:22] (step=0008268) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.6176873410291528, LR: 0.0003 +[2026-02-27 23:10:30] (step=0008269) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 1.6178829974564664, LR: 0.0003 +[2026-02-27 23:10:38] (step=0008270) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 1.6180786538837801, LR: 0.0003 +[2026-02-27 23:10:45] (step=0008271) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.6182743103110937, LR: 0.0003 +[2026-02-27 23:10:53] (step=0008272) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.6184699667384073, LR: 0.0003 +[2026-02-27 23:11:01] (step=0008273) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 1.618665623165721, LR: 0.0003 +[2026-02-27 23:11:09] (step=0008274) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 1.6188612795930346, LR: 0.0003 +[2026-02-27 23:11:17] (step=0008275) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 1.6190569360203482, LR: 0.0003 +[2026-02-27 23:11:25] (step=0008276) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.619252592447662, LR: 0.0003 +[2026-02-27 23:11:33] (step=0008277) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 1.6194482488749755, LR: 0.0003 +[2026-02-27 23:11:40] (step=0008278) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 1.619643905302289, LR: 0.0003 +[2026-02-27 23:11:48] (step=0008279) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.6198395617296029, LR: 0.0003 +[2026-02-27 23:11:56] (step=0008280) Train Loss: 0.4560, Train Steps/Sec: 0.12, Epoch: 1.6200352181569164, LR: 0.0003 +[2026-02-27 23:12:04] (step=0008281) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.62023087458423, LR: 0.0003 +[2026-02-27 23:12:12] (step=0008282) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.6204265310115438, LR: 0.0003 +[2026-02-27 23:12:20] (step=0008283) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.6206221874388573, LR: 0.0003 +[2026-02-27 23:12:28] (step=0008284) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 1.620817843866171, LR: 0.0003 +[2026-02-27 23:12:35] (step=0008285) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.6210135002934847, LR: 0.0003 +[2026-02-27 23:12:43] (step=0008286) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.6212091567207982, LR: 0.0003 +[2026-02-27 23:12:51] (step=0008287) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.621404813148112, LR: 0.0003 +[2026-02-27 23:12:59] (step=0008288) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.6216004695754256, LR: 0.0003 +[2026-02-27 23:13:07] (step=0008289) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.6217961260027391, LR: 0.0003 +[2026-02-27 23:13:15] (step=0008290) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.621991782430053, LR: 0.0003 +[2026-02-27 23:13:23] (step=0008291) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 1.6221874388573665, LR: 0.0003 +[2026-02-27 23:13:31] (step=0008292) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.62238309528468, LR: 0.0003 +[2026-02-27 23:13:38] (step=0008293) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 1.6225787517119938, LR: 0.0003 +[2026-02-27 23:13:46] (step=0008294) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 1.6227744081393074, LR: 0.0003 +[2026-02-27 23:13:54] (step=0008295) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 1.622970064566621, LR: 0.0003 +[2026-02-27 23:14:02] (step=0008296) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.6231657209939347, LR: 0.0003 +[2026-02-27 23:14:10] (step=0008297) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.6233613774212483, LR: 0.0003 +[2026-02-27 23:14:18] (step=0008298) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 1.6235570338485619, LR: 0.0003 +[2026-02-27 23:14:25] (step=0008299) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.6237526902758757, LR: 0.0003 +[2026-02-27 23:14:33] (step=0008300) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.6239483467031892, LR: 0.0003 +[2026-02-27 23:14:41] (step=0008301) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.6241440031305028, LR: 0.0003 +[2026-02-27 23:14:49] (step=0008302) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.6243396595578166, LR: 0.0003 +[2026-02-27 23:14:57] (step=0008303) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.6245353159851301, LR: 0.0003 +[2026-02-27 23:15:05] (step=0008304) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 1.6247309724124437, LR: 0.0003 +[2026-02-27 23:15:12] (step=0008305) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 1.6249266288397575, LR: 0.0003 +[2026-02-27 23:15:20] (step=0008306) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.625122285267071, LR: 0.0003 +[2026-02-27 23:15:28] (step=0008307) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 1.6253179416943846, LR: 0.0003 +[2026-02-27 23:15:36] (step=0008308) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.6255135981216984, LR: 0.0003 +[2026-02-27 23:15:44] (step=0008309) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.625709254549012, LR: 0.0003 +[2026-02-27 23:15:52] (step=0008310) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.6259049109763255, LR: 0.0003 +[2026-02-27 23:16:00] (step=0008311) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.6261005674036393, LR: 0.0003 +[2026-02-27 23:16:07] (step=0008312) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.6262962238309528, LR: 0.0003 +[2026-02-27 23:16:15] (step=0008313) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.6264918802582664, LR: 0.0003 +[2026-02-27 23:16:23] (step=0008314) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 1.6266875366855802, LR: 0.0003 +[2026-02-27 23:16:31] (step=0008315) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.6268831931128938, LR: 0.0003 +[2026-02-27 23:16:39] (step=0008316) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 1.6270788495402073, LR: 0.0003 +[2026-02-27 23:16:47] (step=0008317) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.627274505967521, LR: 0.0003 +[2026-02-27 23:16:54] (step=0008318) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.6274701623948347, LR: 0.0003 +[2026-02-27 23:17:02] (step=0008319) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.6276658188221482, LR: 0.0003 +[2026-02-27 23:17:10] (step=0008320) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.627861475249462, LR: 0.0003 +[2026-02-27 23:17:18] (step=0008321) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.6280571316767756, LR: 0.0003 +[2026-02-27 23:17:26] (step=0008322) Train Loss: 0.4699, Train Steps/Sec: 0.13, Epoch: 1.6282527881040891, LR: 0.0003 +[2026-02-27 23:17:34] (step=0008323) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.628448444531403, LR: 0.0003 +[2026-02-27 23:17:42] (step=0008324) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.6286441009587165, LR: 0.0003 +[2026-02-27 23:17:49] (step=0008325) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.62883975738603, LR: 0.0003 +[2026-02-27 23:17:57] (step=0008326) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.6290354138133438, LR: 0.0003 +[2026-02-27 23:18:05] (step=0008327) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.6292310702406574, LR: 0.0003 +[2026-02-27 23:18:13] (step=0008328) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 1.629426726667971, LR: 0.0003 +[2026-02-27 23:18:21] (step=0008329) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 1.6296223830952847, LR: 0.0003 +[2026-02-27 23:18:29] (step=0008330) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 1.6298180395225983, LR: 0.0003 +[2026-02-27 23:18:37] (step=0008331) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.6300136959499119, LR: 0.0003 +[2026-02-27 23:18:44] (step=0008332) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.6302093523772256, LR: 0.0003 +[2026-02-27 23:18:52] (step=0008333) Train Loss: 0.4533, Train Steps/Sec: 0.12, Epoch: 1.6304050088045392, LR: 0.0003 +[2026-02-27 23:19:00] (step=0008334) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.6306006652318528, LR: 0.0003 +[2026-02-27 23:19:08] (step=0008335) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.6307963216591665, LR: 0.0003 +[2026-02-27 23:19:16] (step=0008336) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.63099197808648, LR: 0.0003 +[2026-02-27 23:19:24] (step=0008337) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.6311876345137937, LR: 0.0003 +[2026-02-27 23:19:32] (step=0008338) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 1.6313832909411075, LR: 0.0003 +[2026-02-27 23:19:39] (step=0008339) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.631578947368421, LR: 0.0003 +[2026-02-27 23:19:47] (step=0008340) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.6317746037957346, LR: 0.0003 +[2026-02-27 23:19:55] (step=0008341) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 1.6319702602230484, LR: 0.0003 +[2026-02-27 23:20:03] (step=0008342) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.632165916650362, LR: 0.0003 +[2026-02-27 23:20:11] (step=0008343) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 1.6323615730776757, LR: 0.0003 +[2026-02-27 23:20:19] (step=0008344) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.6325572295049893, LR: 0.0003 +[2026-02-27 23:20:26] (step=0008345) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.6327528859323028, LR: 0.0003 +[2026-02-27 23:20:34] (step=0008346) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.6329485423596166, LR: 0.0003 +[2026-02-27 23:20:42] (step=0008347) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 1.6331441987869302, LR: 0.0003 +[2026-02-27 23:20:50] (step=0008348) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.6333398552142437, LR: 0.0003 +[2026-02-27 23:20:58] (step=0008349) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.6335355116415575, LR: 0.0003 +[2026-02-27 23:21:06] (step=0008350) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 1.633731168068871, LR: 0.0003 +[2026-02-27 23:21:13] (step=0008351) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 1.6339268244961846, LR: 0.0003 +[2026-02-27 23:21:21] (step=0008352) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 1.6341224809234984, LR: 0.0003 +[2026-02-27 23:21:29] (step=0008353) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.634318137350812, LR: 0.0003 +[2026-02-27 23:21:37] (step=0008354) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.6345137937781256, LR: 0.0003 +[2026-02-27 23:21:45] (step=0008355) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.6347094502054393, LR: 0.0003 +[2026-02-27 23:21:52] (step=0008356) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.634905106632753, LR: 0.0003 +[2026-02-27 23:22:00] (step=0008357) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 1.6351007630600665, LR: 0.0003 +[2026-02-27 23:22:08] (step=0008358) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.6352964194873802, LR: 0.0003 +[2026-02-27 23:22:16] (step=0008359) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 1.6354920759146938, LR: 0.0003 +[2026-02-27 23:22:24] (step=0008360) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.6356877323420074, LR: 0.0003 +[2026-02-27 23:22:32] (step=0008361) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.6358833887693212, LR: 0.0003 +[2026-02-27 23:22:39] (step=0008362) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 1.6360790451966347, LR: 0.0003 +[2026-02-27 23:22:47] (step=0008363) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 1.6362747016239483, LR: 0.0003 +[2026-02-27 23:22:55] (step=0008364) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.636470358051262, LR: 0.0003 +[2026-02-27 23:23:03] (step=0008365) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 1.6366660144785756, LR: 0.0003 +[2026-02-27 23:23:11] (step=0008366) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.6368616709058892, LR: 0.0003 +[2026-02-27 23:23:18] (step=0008367) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.637057327333203, LR: 0.0003 +[2026-02-27 23:23:26] (step=0008368) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.6372529837605165, LR: 0.0003 +[2026-02-27 23:23:34] (step=0008369) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 1.63744864018783, LR: 0.0003 +[2026-02-27 23:23:42] (step=0008370) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.6376442966151439, LR: 0.0003 +[2026-02-27 23:23:50] (step=0008371) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.6378399530424574, LR: 0.0003 +[2026-02-27 23:23:58] (step=0008372) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 1.638035609469771, LR: 0.0003 +[2026-02-27 23:24:05] (step=0008373) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 1.6382312658970848, LR: 0.0003 +[2026-02-27 23:24:13] (step=0008374) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.6384269223243983, LR: 0.0003 +[2026-02-27 23:24:21] (step=0008375) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.638622578751712, LR: 0.0003 +[2026-02-27 23:24:29] (step=0008376) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.6388182351790257, LR: 0.0003 +[2026-02-27 23:24:37] (step=0008377) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.6390138916063393, LR: 0.0003 +[2026-02-27 23:24:45] (step=0008378) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 1.6392095480336528, LR: 0.0003 +[2026-02-27 23:24:52] (step=0008379) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 1.6394052044609666, LR: 0.0003 +[2026-02-27 23:25:00] (step=0008380) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 1.6396008608882802, LR: 0.0003 +[2026-02-27 23:25:08] (step=0008381) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.6397965173155937, LR: 0.0003 +[2026-02-27 23:25:16] (step=0008382) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.6399921737429075, LR: 0.0003 +[2026-02-27 23:25:24] (step=0008383) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.640187830170221, LR: 0.0003 +[2026-02-27 23:25:32] (step=0008384) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 1.6403834865975346, LR: 0.0003 +[2026-02-27 23:25:39] (step=0008385) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 1.6405791430248484, LR: 0.0003 +[2026-02-27 23:25:47] (step=0008386) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.640774799452162, LR: 0.0003 +[2026-02-27 23:25:55] (step=0008387) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.6409704558794755, LR: 0.0003 +[2026-02-27 23:26:03] (step=0008388) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.6411661123067893, LR: 0.0003 +[2026-02-27 23:26:11] (step=0008389) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.6413617687341029, LR: 0.0003 +[2026-02-27 23:26:19] (step=0008390) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 1.6415574251614165, LR: 0.0003 +[2026-02-27 23:26:26] (step=0008391) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.6417530815887302, LR: 0.0003 +[2026-02-27 23:26:34] (step=0008392) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 1.6419487380160438, LR: 0.0003 +[2026-02-27 23:26:42] (step=0008393) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 1.6421443944433574, LR: 0.0003 +[2026-02-27 23:26:50] (step=0008394) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.6423400508706711, LR: 0.0003 +[2026-02-27 23:26:58] (step=0008395) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.6425357072979847, LR: 0.0003 +[2026-02-27 23:27:05] (step=0008396) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.6427313637252983, LR: 0.0003 +[2026-02-27 23:27:13] (step=0008397) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 1.642927020152612, LR: 0.0003 +[2026-02-27 23:27:21] (step=0008398) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.6431226765799256, LR: 0.0003 +[2026-02-27 23:27:29] (step=0008399) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.6433183330072394, LR: 0.0003 +[2026-02-27 23:27:37] (step=0008400) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 1.643513989434553, LR: 0.0003 +[2026-02-27 23:27:45] (step=0008401) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.6437096458618665, LR: 0.0003 +[2026-02-27 23:27:52] (step=0008402) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.6439053022891803, LR: 0.0003 +[2026-02-27 23:28:00] (step=0008403) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.6441009587164939, LR: 0.0003 +[2026-02-27 23:28:08] (step=0008404) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.6442966151438074, LR: 0.0003 +[2026-02-27 23:28:16] (step=0008405) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.6444922715711212, LR: 0.0003 +[2026-02-27 23:28:24] (step=0008406) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.6446879279984348, LR: 0.0003 +[2026-02-27 23:28:32] (step=0008407) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 1.6448835844257483, LR: 0.0003 +[2026-02-27 23:28:39] (step=0008408) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.6450792408530621, LR: 0.0003 +[2026-02-27 23:28:47] (step=0008409) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.6452748972803757, LR: 0.0003 +[2026-02-27 23:28:55] (step=0008410) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.6454705537076892, LR: 0.0003 +[2026-02-27 23:29:03] (step=0008411) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.645666210135003, LR: 0.0003 +[2026-02-27 23:29:11] (step=0008412) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.6458618665623166, LR: 0.0003 +[2026-02-27 23:29:19] (step=0008413) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.6460575229896302, LR: 0.0003 +[2026-02-27 23:29:26] (step=0008414) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.646253179416944, LR: 0.0003 +[2026-02-27 23:29:34] (step=0008415) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.6464488358442575, LR: 0.0003 +[2026-02-27 23:29:42] (step=0008416) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.646644492271571, LR: 0.0003 +[2026-02-27 23:29:50] (step=0008417) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 1.6468401486988848, LR: 0.0003 +[2026-02-27 23:29:58] (step=0008418) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.6470358051261984, LR: 0.0003 +[2026-02-27 23:30:06] (step=0008419) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 1.647231461553512, LR: 0.0003 +[2026-02-27 23:30:13] (step=0008420) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 1.6474271179808258, LR: 0.0003 +[2026-02-27 23:30:21] (step=0008421) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 1.6476227744081393, LR: 0.0003 +[2026-02-27 23:30:29] (step=0008422) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.6478184308354529, LR: 0.0003 +[2026-02-27 23:30:37] (step=0008423) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 1.6480140872627667, LR: 0.0003 +[2026-02-27 23:30:45] (step=0008424) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.6482097436900802, LR: 0.0003 +[2026-02-27 23:30:52] (step=0008425) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.6484054001173938, LR: 0.0003 +[2026-02-27 23:31:00] (step=0008426) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.6486010565447076, LR: 0.0003 +[2026-02-27 23:31:08] (step=0008427) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.6487967129720211, LR: 0.0003 +[2026-02-27 23:31:16] (step=0008428) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.6489923693993347, LR: 0.0003 +[2026-02-27 23:31:24] (step=0008429) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 1.6491880258266485, LR: 0.0003 +[2026-02-27 23:31:32] (step=0008430) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.649383682253962, LR: 0.0003 +[2026-02-27 23:31:40] (step=0008431) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 1.6495793386812756, LR: 0.0003 +[2026-02-27 23:31:48] (step=0008432) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.6497749951085894, LR: 0.0003 +[2026-02-27 23:31:55] (step=0008433) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 1.649970651535903, LR: 0.0003 +[2026-02-27 23:32:03] (step=0008434) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 1.6501663079632165, LR: 0.0003 +[2026-02-27 23:32:11] (step=0008435) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 1.6503619643905303, LR: 0.0003 +[2026-02-27 23:32:19] (step=0008436) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 1.6505576208178439, LR: 0.0003 +[2026-02-27 23:32:27] (step=0008437) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 1.6507532772451574, LR: 0.0003 +[2026-02-27 23:32:34] (step=0008438) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 1.6509489336724712, LR: 0.0003 +[2026-02-27 23:32:42] (step=0008439) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.6511445900997848, LR: 0.0003 +[2026-02-27 23:32:50] (step=0008440) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.6513402465270983, LR: 0.0003 +[2026-02-27 23:32:58] (step=0008441) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.651535902954412, LR: 0.0003 +[2026-02-27 23:33:06] (step=0008442) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 1.6517315593817257, LR: 0.0003 +[2026-02-27 23:33:13] (step=0008443) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.6519272158090392, LR: 0.0003 +[2026-02-27 23:33:21] (step=0008444) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 1.652122872236353, LR: 0.0003 +[2026-02-27 23:33:29] (step=0008445) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.6523185286636666, LR: 0.0003 +[2026-02-27 23:33:37] (step=0008446) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 1.6525141850909801, LR: 0.0003 +[2026-02-27 23:33:45] (step=0008447) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.652709841518294, LR: 0.0003 +[2026-02-27 23:33:53] (step=0008448) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 1.6529054979456075, LR: 0.0003 +[2026-02-27 23:34:00] (step=0008449) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 1.653101154372921, LR: 0.0003 +[2026-02-27 23:34:08] (step=0008450) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.6532968108002348, LR: 0.0003 +[2026-02-27 23:34:16] (step=0008451) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.6534924672275484, LR: 0.0003 +[2026-02-27 23:34:24] (step=0008452) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.653688123654862, LR: 0.0003 +[2026-02-27 23:34:32] (step=0008453) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.6538837800821757, LR: 0.0003 +[2026-02-27 23:34:39] (step=0008454) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.6540794365094893, LR: 0.0003 +[2026-02-27 23:34:47] (step=0008455) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.654275092936803, LR: 0.0003 +[2026-02-27 23:34:55] (step=0008456) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.6544707493641166, LR: 0.0003 +[2026-02-27 23:35:03] (step=0008457) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.6546664057914302, LR: 0.0003 +[2026-02-27 23:35:11] (step=0008458) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.654862062218744, LR: 0.0003 +[2026-02-27 23:35:19] (step=0008459) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 1.6550577186460576, LR: 0.0003 +[2026-02-27 23:35:26] (step=0008460) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.6552533750733711, LR: 0.0003 +[2026-02-27 23:35:34] (step=0008461) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.655449031500685, LR: 0.0003 +[2026-02-27 23:35:42] (step=0008462) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.6556446879279985, LR: 0.0003 +[2026-02-27 23:35:50] (step=0008463) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 1.655840344355312, LR: 0.0003 +[2026-02-27 23:35:58] (step=0008464) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.6560360007826258, LR: 0.0003 +[2026-02-27 23:36:06] (step=0008465) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 1.6562316572099394, LR: 0.0003 +[2026-02-27 23:36:13] (step=0008466) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 1.656427313637253, LR: 0.0003 +[2026-02-27 23:36:21] (step=0008467) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.6566229700645667, LR: 0.0003 +[2026-02-27 23:36:29] (step=0008468) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 1.6568186264918803, LR: 0.0003 +[2026-02-27 23:36:37] (step=0008469) Train Loss: 0.4712, Train Steps/Sec: 0.13, Epoch: 1.6570142829191938, LR: 0.0003 +[2026-02-27 23:36:45] (step=0008470) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 1.6572099393465076, LR: 0.0003 +[2026-02-27 23:36:53] (step=0008471) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.6574055957738212, LR: 0.0003 +[2026-02-27 23:37:00] (step=0008472) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.6576012522011347, LR: 0.0003 +[2026-02-27 23:37:08] (step=0008473) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 1.6577969086284485, LR: 0.0003 +[2026-02-27 23:37:16] (step=0008474) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.657992565055762, LR: 0.0003 +[2026-02-27 23:37:24] (step=0008475) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.6581882214830757, LR: 0.0003 +[2026-02-27 23:37:32] (step=0008476) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.6583838779103894, LR: 0.0003 +[2026-02-27 23:37:40] (step=0008477) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.658579534337703, LR: 0.0003 +[2026-02-27 23:37:47] (step=0008478) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 1.6587751907650166, LR: 0.0003 +[2026-02-27 23:37:55] (step=0008479) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.6589708471923303, LR: 0.0003 +[2026-02-27 23:38:03] (step=0008480) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.659166503619644, LR: 0.0003 +[2026-02-27 23:38:11] (step=0008481) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 1.6593621600469575, LR: 0.0003 +[2026-02-27 23:38:19] (step=0008482) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.6595578164742713, LR: 0.0003 +[2026-02-27 23:38:27] (step=0008483) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 1.6597534729015848, LR: 0.0003 +[2026-02-27 23:38:35] (step=0008484) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 1.6599491293288984, LR: 0.0003 +[2026-02-27 23:38:42] (step=0008485) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 1.6601447857562122, LR: 0.0003 +[2026-02-27 23:38:50] (step=0008486) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.6603404421835257, LR: 0.0003 +[2026-02-27 23:38:58] (step=0008487) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.6605360986108393, LR: 0.0003 +[2026-02-27 23:39:06] (step=0008488) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.660731755038153, LR: 0.0003 +[2026-02-27 23:39:14] (step=0008489) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.6609274114654666, LR: 0.0003 +[2026-02-27 23:39:22] (step=0008490) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 1.6611230678927802, LR: 0.0003 +[2026-02-27 23:39:29] (step=0008491) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.661318724320094, LR: 0.0003 +[2026-02-27 23:39:37] (step=0008492) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 1.6615143807474075, LR: 0.0003 +[2026-02-27 23:39:45] (step=0008493) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.661710037174721, LR: 0.0003 +[2026-02-27 23:39:53] (step=0008494) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.6619056936020349, LR: 0.0003 +[2026-02-27 23:40:01] (step=0008495) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 1.6621013500293484, LR: 0.0003 +[2026-02-27 23:40:08] (step=0008496) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 1.662297006456662, LR: 0.0003 +[2026-02-27 23:40:16] (step=0008497) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 1.6624926628839758, LR: 0.0003 +[2026-02-27 23:40:24] (step=0008498) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 1.6626883193112894, LR: 0.0003 +[2026-02-27 23:40:32] (step=0008499) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 1.662883975738603, LR: 0.0003 +[2026-02-27 23:40:40] (step=0008500) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.6630796321659167, LR: 0.0003 +[2026-02-27 23:40:40] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0008500/ +[2026-02-27 23:40:48] (step=0008501) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 1.6632752885932303, LR: 0.0003 +[2026-02-27 23:40:55] (step=0008502) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 1.6634709450205438, LR: 0.0003 +[2026-02-27 23:41:03] (step=0008503) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.6636666014478576, LR: 0.0003 +[2026-02-27 23:41:11] (step=0008504) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.6638622578751712, LR: 0.0003 +[2026-02-27 23:41:19] (step=0008505) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.6640579143024847, LR: 0.0003 +[2026-02-27 23:41:27] (step=0008506) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.6642535707297985, LR: 0.0003 +[2026-02-27 23:41:35] (step=0008507) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.664449227157112, LR: 0.0003 +[2026-02-27 23:41:42] (step=0008508) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.6646448835844256, LR: 0.0003 +[2026-02-27 23:41:50] (step=0008509) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.6648405400117394, LR: 0.0003 +[2026-02-27 23:41:58] (step=0008510) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.665036196439053, LR: 0.0003 +[2026-02-27 23:42:06] (step=0008511) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.6652318528663668, LR: 0.0003 +[2026-02-27 23:42:14] (step=0008512) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.6654275092936803, LR: 0.0003 +[2026-02-27 23:42:21] (step=0008513) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.665623165720994, LR: 0.0003 +[2026-02-27 23:42:29] (step=0008514) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.6658188221483077, LR: 0.0003 +[2026-02-27 23:42:37] (step=0008515) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 1.6660144785756212, LR: 0.0003 +[2026-02-27 23:42:45] (step=0008516) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 1.6662101350029348, LR: 0.0003 +[2026-02-27 23:42:53] (step=0008517) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 1.6664057914302486, LR: 0.0003 +[2026-02-27 23:43:01] (step=0008518) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 1.6666014478575621, LR: 0.0003 +[2026-02-27 23:43:08] (step=0008519) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 1.6667971042848757, LR: 0.0003 +[2026-02-27 23:43:16] (step=0008520) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.6669927607121895, LR: 0.0003 +[2026-02-27 23:43:24] (step=0008521) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.667188417139503, LR: 0.0003 +[2026-02-27 23:43:32] (step=0008522) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.6673840735668166, LR: 0.0003 +[2026-02-27 23:43:40] (step=0008523) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 1.6675797299941304, LR: 0.0003 +[2026-02-27 23:43:48] (step=0008524) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.667775386421444, LR: 0.0003 +[2026-02-27 23:43:55] (step=0008525) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.6679710428487575, LR: 0.0003 +[2026-02-27 23:44:03] (step=0008526) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.6681666992760713, LR: 0.0003 +[2026-02-27 23:44:11] (step=0008527) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.6683623557033849, LR: 0.0003 +[2026-02-27 23:44:19] (step=0008528) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 1.6685580121306984, LR: 0.0003 +[2026-02-27 23:44:27] (step=0008529) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 1.6687536685580122, LR: 0.0003 +[2026-02-27 23:44:35] (step=0008530) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 1.6689493249853258, LR: 0.0003 +[2026-02-27 23:44:43] (step=0008531) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.6691449814126393, LR: 0.0003 +[2026-02-27 23:44:50] (step=0008532) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 1.6693406378399531, LR: 0.0003 +[2026-02-27 23:44:58] (step=0008533) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.6695362942672667, LR: 0.0003 +[2026-02-27 23:45:06] (step=0008534) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.6697319506945802, LR: 0.0003 +[2026-02-27 23:45:14] (step=0008535) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 1.669927607121894, LR: 0.0003 +[2026-02-27 23:45:22] (step=0008536) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 1.6701232635492076, LR: 0.0003 +[2026-02-27 23:45:29] (step=0008537) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.6703189199765212, LR: 0.0003 +[2026-02-27 23:45:37] (step=0008538) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 1.670514576403835, LR: 0.0003 +[2026-02-27 23:45:45] (step=0008539) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.6707102328311485, LR: 0.0003 +[2026-02-27 23:45:53] (step=0008540) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.670905889258462, LR: 0.0003 +[2026-02-27 23:46:01] (step=0008541) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 1.6711015456857758, LR: 0.0003 +[2026-02-27 23:46:09] (step=0008542) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 1.6712972021130894, LR: 0.0003 +[2026-02-27 23:46:16] (step=0008543) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.671492858540403, LR: 0.0003 +[2026-02-27 23:46:24] (step=0008544) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.6716885149677168, LR: 0.0003 +[2026-02-27 23:46:32] (step=0008545) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.6718841713950303, LR: 0.0003 +[2026-02-27 23:46:40] (step=0008546) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.6720798278223439, LR: 0.0003 +[2026-02-27 23:46:48] (step=0008547) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.6722754842496577, LR: 0.0003 +[2026-02-27 23:46:55] (step=0008548) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.6724711406769712, LR: 0.0003 +[2026-02-27 23:47:03] (step=0008549) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 1.6726667971042848, LR: 0.0003 +[2026-02-27 23:47:11] (step=0008550) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.6728624535315986, LR: 0.0003 +[2026-02-27 23:47:19] (step=0008551) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.6730581099589121, LR: 0.0003 +[2026-02-27 23:47:27] (step=0008552) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 1.6732537663862257, LR: 0.0003 +[2026-02-27 23:47:35] (step=0008553) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.6734494228135395, LR: 0.0003 +[2026-02-27 23:47:42] (step=0008554) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.673645079240853, LR: 0.0003 +[2026-02-27 23:47:50] (step=0008555) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.6738407356681666, LR: 0.0003 +[2026-02-27 23:47:58] (step=0008556) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 1.6740363920954804, LR: 0.0003 +[2026-02-27 23:48:06] (step=0008557) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 1.674232048522794, LR: 0.0003 +[2026-02-27 23:48:14] (step=0008558) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.6744277049501075, LR: 0.0003 +[2026-02-27 23:48:21] (step=0008559) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 1.6746233613774213, LR: 0.0003 +[2026-02-27 23:48:29] (step=0008560) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.6748190178047349, LR: 0.0003 +[2026-02-27 23:48:37] (step=0008561) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 1.6750146742320484, LR: 0.0003 +[2026-02-27 23:48:45] (step=0008562) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.6752103306593622, LR: 0.0003 +[2026-02-27 23:48:53] (step=0008563) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.6754059870866758, LR: 0.0003 +[2026-02-27 23:49:01] (step=0008564) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.6756016435139893, LR: 0.0003 +[2026-02-27 23:49:08] (step=0008565) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.675797299941303, LR: 0.0003 +[2026-02-27 23:49:16] (step=0008566) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.6759929563686167, LR: 0.0003 +[2026-02-27 23:49:24] (step=0008567) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.6761886127959305, LR: 0.0003 +[2026-02-27 23:49:32] (step=0008568) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.676384269223244, LR: 0.0003 +[2026-02-27 23:49:40] (step=0008569) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.6765799256505576, LR: 0.0003 +[2026-02-27 23:49:48] (step=0008570) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 1.6767755820778714, LR: 0.0003 +[2026-02-27 23:49:55] (step=0008571) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.676971238505185, LR: 0.0003 +[2026-02-27 23:50:03] (step=0008572) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 1.6771668949324985, LR: 0.0003 +[2026-02-27 23:50:11] (step=0008573) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.6773625513598123, LR: 0.0003 +[2026-02-27 23:50:19] (step=0008574) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.6775582077871258, LR: 0.0003 +[2026-02-27 23:50:27] (step=0008575) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 1.6777538642144394, LR: 0.0003 +[2026-02-27 23:50:35] (step=0008576) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.6779495206417532, LR: 0.0003 +[2026-02-27 23:50:42] (step=0008577) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.6781451770690667, LR: 0.0003 +[2026-02-27 23:50:50] (step=0008578) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.6783408334963803, LR: 0.0003 +[2026-02-27 23:50:58] (step=0008579) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.678536489923694, LR: 0.0003 +[2026-02-27 23:51:06] (step=0008580) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 1.6787321463510076, LR: 0.0003 +[2026-02-27 23:51:14] (step=0008581) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.6789278027783212, LR: 0.0003 +[2026-02-27 23:51:22] (step=0008582) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.679123459205635, LR: 0.0003 +[2026-02-27 23:51:29] (step=0008583) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.6793191156329486, LR: 0.0003 +[2026-02-27 23:51:37] (step=0008584) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 1.6795147720602621, LR: 0.0003 +[2026-02-27 23:51:45] (step=0008585) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 1.679710428487576, LR: 0.0003 +[2026-02-27 23:51:53] (step=0008586) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 1.6799060849148895, LR: 0.0003 +[2026-02-27 23:52:01] (step=0008587) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 1.680101741342203, LR: 0.0003 +[2026-02-27 23:52:09] (step=0008588) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 1.6802973977695168, LR: 0.0003 +[2026-02-27 23:52:16] (step=0008589) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.6804930541968304, LR: 0.0003 +[2026-02-27 23:52:24] (step=0008590) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.680688710624144, LR: 0.0003 +[2026-02-27 23:52:32] (step=0008591) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 1.6808843670514577, LR: 0.0003 +[2026-02-27 23:52:40] (step=0008592) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.6810800234787713, LR: 0.0003 +[2026-02-27 23:52:48] (step=0008593) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 1.6812756799060848, LR: 0.0003 +[2026-02-27 23:52:55] (step=0008594) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.6814713363333986, LR: 0.0003 +[2026-02-27 23:53:03] (step=0008595) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.6816669927607122, LR: 0.0003 +[2026-02-27 23:53:11] (step=0008596) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 1.6818626491880257, LR: 0.0003 +[2026-02-27 23:53:19] (step=0008597) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 1.6820583056153395, LR: 0.0003 +[2026-02-27 23:53:27] (step=0008598) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.682253962042653, LR: 0.0003 +[2026-02-27 23:53:35] (step=0008599) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 1.6824496184699667, LR: 0.0003 +[2026-02-27 23:53:42] (step=0008600) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.6826452748972804, LR: 0.0003 +[2026-02-27 23:53:50] (step=0008601) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 1.682840931324594, LR: 0.0003 +[2026-02-27 23:53:58] (step=0008602) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 1.6830365877519076, LR: 0.0003 +[2026-02-27 23:54:06] (step=0008603) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.6832322441792213, LR: 0.0003 +[2026-02-27 23:54:14] (step=0008604) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.683427900606535, LR: 0.0003 +[2026-02-27 23:54:22] (step=0008605) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.6836235570338485, LR: 0.0003 +[2026-02-27 23:54:29] (step=0008606) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.6838192134611623, LR: 0.0003 +[2026-02-27 23:54:37] (step=0008607) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 1.6840148698884758, LR: 0.0003 +[2026-02-27 23:54:45] (step=0008608) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.6842105263157894, LR: 0.0003 +[2026-02-27 23:54:53] (step=0008609) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 1.6844061827431032, LR: 0.0003 +[2026-02-27 23:55:01] (step=0008610) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 1.6846018391704167, LR: 0.0003 +[2026-02-27 23:55:08] (step=0008611) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.6847974955977303, LR: 0.0003 +[2026-02-27 23:55:16] (step=0008612) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.684993152025044, LR: 0.0003 +[2026-02-27 23:55:24] (step=0008613) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 1.6851888084523576, LR: 0.0003 +[2026-02-27 23:55:32] (step=0008614) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.6853844648796712, LR: 0.0003 +[2026-02-27 23:55:40] (step=0008615) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 1.685580121306985, LR: 0.0003 +[2026-02-27 23:55:48] (step=0008616) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 1.6857757777342985, LR: 0.0003 +[2026-02-27 23:55:56] (step=0008617) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 1.685971434161612, LR: 0.0003 +[2026-02-27 23:56:03] (step=0008618) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 1.6861670905889259, LR: 0.0003 +[2026-02-27 23:56:11] (step=0008619) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.6863627470162394, LR: 0.0003 +[2026-02-27 23:56:19] (step=0008620) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 1.686558403443553, LR: 0.0003 +[2026-02-27 23:56:27] (step=0008621) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.6867540598708668, LR: 0.0003 +[2026-02-27 23:56:35] (step=0008622) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.6869497162981804, LR: 0.0003 +[2026-02-27 23:56:42] (step=0008623) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.6871453727254941, LR: 0.0003 +[2026-02-27 23:56:50] (step=0008624) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.6873410291528077, LR: 0.0003 +[2026-02-27 23:56:58] (step=0008625) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 1.6875366855801213, LR: 0.0003 +[2026-02-27 23:57:06] (step=0008626) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.687732342007435, LR: 0.0003 +[2026-02-27 23:57:14] (step=0008627) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 1.6879279984347486, LR: 0.0003 +[2026-02-27 23:57:22] (step=0008628) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.6881236548620622, LR: 0.0003 +[2026-02-27 23:57:29] (step=0008629) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 1.688319311289376, LR: 0.0003 +[2026-02-27 23:57:37] (step=0008630) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 1.6885149677166895, LR: 0.0003 +[2026-02-27 23:57:45] (step=0008631) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.688710624144003, LR: 0.0003 +[2026-02-27 23:57:53] (step=0008632) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.6889062805713169, LR: 0.0003 +[2026-02-27 23:58:01] (step=0008633) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.6891019369986304, LR: 0.0003 +[2026-02-27 23:58:09] (step=0008634) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.689297593425944, LR: 0.0003 +[2026-02-27 23:58:16] (step=0008635) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.6894932498532578, LR: 0.0003 +[2026-02-27 23:58:24] (step=0008636) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 1.6896889062805713, LR: 0.0003 +[2026-02-27 23:58:32] (step=0008637) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 1.689884562707885, LR: 0.0003 +[2026-02-27 23:58:40] (step=0008638) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.6900802191351987, LR: 0.0003 +[2026-02-27 23:58:48] (step=0008639) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.6902758755625122, LR: 0.0003 +[2026-02-27 23:58:55] (step=0008640) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.6904715319898258, LR: 0.0003 +[2026-02-27 23:59:03] (step=0008641) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.6906671884171396, LR: 0.0003 +[2026-02-27 23:59:11] (step=0008642) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.6908628448444532, LR: 0.0003 +[2026-02-27 23:59:19] (step=0008643) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.6910585012717667, LR: 0.0003 +[2026-02-27 23:59:27] (step=0008644) Train Loss: 0.4698, Train Steps/Sec: 0.13, Epoch: 1.6912541576990805, LR: 0.0003 +[2026-02-27 23:59:35] (step=0008645) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.691449814126394, LR: 0.0003 +[2026-02-27 23:59:42] (step=0008646) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 1.6916454705537076, LR: 0.0003 +[2026-02-27 23:59:50] (step=0008647) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.6918411269810214, LR: 0.0003 +[2026-02-27 23:59:58] (step=0008648) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 1.692036783408335, LR: 0.0003 +[2026-02-28 00:00:06] (step=0008649) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.6922324398356485, LR: 0.0003 +[2026-02-28 00:00:14] (step=0008650) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.6924280962629623, LR: 0.0003 +[2026-02-28 00:00:22] (step=0008651) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.6926237526902759, LR: 0.0003 +[2026-02-28 00:00:29] (step=0008652) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 1.6928194091175894, LR: 0.0003 +[2026-02-28 00:00:37] (step=0008653) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.6930150655449032, LR: 0.0003 +[2026-02-28 00:00:45] (step=0008654) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 1.6932107219722168, LR: 0.0003 +[2026-02-28 00:00:53] (step=0008655) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.6934063783995303, LR: 0.0003 +[2026-02-28 00:01:01] (step=0008656) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 1.6936020348268441, LR: 0.0003 +[2026-02-28 00:01:08] (step=0008657) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.6937976912541577, LR: 0.0003 +[2026-02-28 00:01:16] (step=0008658) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 1.6939933476814713, LR: 0.0003 +[2026-02-28 00:01:24] (step=0008659) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.694189004108785, LR: 0.0003 +[2026-02-28 00:01:32] (step=0008660) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 1.6943846605360986, LR: 0.0003 +[2026-02-28 00:01:40] (step=0008661) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 1.6945803169634122, LR: 0.0003 +[2026-02-28 00:01:48] (step=0008662) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.694775973390726, LR: 0.0003 +[2026-02-28 00:01:55] (step=0008663) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.6949716298180395, LR: 0.0003 +[2026-02-28 00:02:03] (step=0008664) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 1.695167286245353, LR: 0.0003 +[2026-02-28 00:02:11] (step=0008665) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.6953629426726669, LR: 0.0003 +[2026-02-28 00:02:19] (step=0008666) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.6955585990999804, LR: 0.0003 +[2026-02-28 00:02:27] (step=0008667) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.695754255527294, LR: 0.0003 +[2026-02-28 00:02:35] (step=0008668) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.6959499119546078, LR: 0.0003 +[2026-02-28 00:02:43] (step=0008669) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.6961455683819213, LR: 0.0003 +[2026-02-28 00:02:50] (step=0008670) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 1.6963412248092349, LR: 0.0003 +[2026-02-28 00:02:58] (step=0008671) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 1.6965368812365487, LR: 0.0003 +[2026-02-28 00:03:06] (step=0008672) Train Loss: 0.4723, Train Steps/Sec: 0.13, Epoch: 1.6967325376638622, LR: 0.0003 +[2026-02-28 00:03:14] (step=0008673) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.6969281940911758, LR: 0.0003 +[2026-02-28 00:03:22] (step=0008674) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 1.6971238505184896, LR: 0.0003 +[2026-02-28 00:03:30] (step=0008675) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.6973195069458031, LR: 0.0003 +[2026-02-28 00:03:37] (step=0008676) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 1.6975151633731167, LR: 0.0003 +[2026-02-28 00:03:45] (step=0008677) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.6977108198004305, LR: 0.0003 +[2026-02-28 00:03:53] (step=0008678) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 1.697906476227744, LR: 0.0003 +[2026-02-28 00:04:01] (step=0008679) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 1.6981021326550578, LR: 0.0003 +[2026-02-28 00:04:09] (step=0008680) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 1.6982977890823714, LR: 0.0003 +[2026-02-28 00:04:17] (step=0008681) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.698493445509685, LR: 0.0003 +[2026-02-28 00:04:24] (step=0008682) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 1.6986891019369987, LR: 0.0003 +[2026-02-28 00:04:32] (step=0008683) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.6988847583643123, LR: 0.0003 +[2026-02-28 00:04:40] (step=0008684) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 1.6990804147916259, LR: 0.0003 +[2026-02-28 00:04:48] (step=0008685) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 1.6992760712189396, LR: 0.0003 +[2026-02-28 00:04:56] (step=0008686) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 1.6994717276462532, LR: 0.0003 +[2026-02-28 00:05:04] (step=0008687) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.6996673840735668, LR: 0.0003 +[2026-02-28 00:05:11] (step=0008688) Train Loss: 0.4740, Train Steps/Sec: 0.13, Epoch: 1.6998630405008806, LR: 0.0003 +[2026-02-28 00:05:19] (step=0008689) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.7000586969281941, LR: 0.0003 +[2026-02-28 00:05:27] (step=0008690) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 1.7002543533555077, LR: 0.0003 +[2026-02-28 00:05:35] (step=0008691) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.7004500097828215, LR: 0.0003 +[2026-02-28 00:05:43] (step=0008692) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 1.700645666210135, LR: 0.0003 +[2026-02-28 00:05:51] (step=0008693) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.7008413226374486, LR: 0.0003 +[2026-02-28 00:05:58] (step=0008694) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 1.7010369790647624, LR: 0.0003 +[2026-02-28 00:06:06] (step=0008695) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.701232635492076, LR: 0.0003 +[2026-02-28 00:06:14] (step=0008696) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.7014282919193895, LR: 0.0003 +[2026-02-28 00:06:22] (step=0008697) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.7016239483467033, LR: 0.0003 +[2026-02-28 00:06:30] (step=0008698) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.7018196047740168, LR: 0.0003 +[2026-02-28 00:06:38] (step=0008699) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.7020152612013304, LR: 0.0003 +[2026-02-28 00:06:45] (step=0008700) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 1.7022109176286442, LR: 0.0003 +[2026-02-28 00:06:53] (step=0008701) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.7024065740559577, LR: 0.0003 +[2026-02-28 00:07:01] (step=0008702) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.7026022304832713, LR: 0.0003 +[2026-02-28 00:07:09] (step=0008703) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 1.702797886910585, LR: 0.0003 +[2026-02-28 00:07:17] (step=0008704) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.7029935433378987, LR: 0.0003 +[2026-02-28 00:07:25] (step=0008705) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.7031891997652122, LR: 0.0003 +[2026-02-28 00:07:32] (step=0008706) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.703384856192526, LR: 0.0003 +[2026-02-28 00:07:40] (step=0008707) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.7035805126198396, LR: 0.0003 +[2026-02-28 00:07:48] (step=0008708) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 1.7037761690471531, LR: 0.0003 +[2026-02-28 00:07:56] (step=0008709) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 1.703971825474467, LR: 0.0003 +[2026-02-28 00:08:04] (step=0008710) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.7041674819017805, LR: 0.0003 +[2026-02-28 00:08:12] (step=0008711) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 1.704363138329094, LR: 0.0003 +[2026-02-28 00:08:19] (step=0008712) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.7045587947564078, LR: 0.0003 +[2026-02-28 00:08:27] (step=0008713) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 1.7047544511837214, LR: 0.0003 +[2026-02-28 00:08:35] (step=0008714) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.704950107611035, LR: 0.0003 +[2026-02-28 00:08:43] (step=0008715) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.7051457640383487, LR: 0.0003 +[2026-02-28 00:08:51] (step=0008716) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.7053414204656623, LR: 0.0003 +[2026-02-28 00:08:59] (step=0008717) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 1.7055370768929758, LR: 0.0003 +[2026-02-28 00:09:06] (step=0008718) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.7057327333202896, LR: 0.0003 +[2026-02-28 00:09:14] (step=0008719) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.7059283897476032, LR: 0.0003 +[2026-02-28 00:09:22] (step=0008720) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.7061240461749168, LR: 0.0003 +[2026-02-28 00:09:30] (step=0008721) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.7063197026022305, LR: 0.0003 +[2026-02-28 00:09:38] (step=0008722) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 1.706515359029544, LR: 0.0003 +[2026-02-28 00:09:46] (step=0008723) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 1.7067110154568577, LR: 0.0003 +[2026-02-28 00:09:53] (step=0008724) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 1.7069066718841714, LR: 0.0003 +[2026-02-28 00:10:01] (step=0008725) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.707102328311485, LR: 0.0003 +[2026-02-28 00:10:09] (step=0008726) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.7072979847387986, LR: 0.0003 +[2026-02-28 00:10:17] (step=0008727) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 1.7074936411661124, LR: 0.0003 +[2026-02-28 00:10:25] (step=0008728) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 1.707689297593426, LR: 0.0003 +[2026-02-28 00:10:33] (step=0008729) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.7078849540207395, LR: 0.0003 +[2026-02-28 00:10:40] (step=0008730) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 1.7080806104480533, LR: 0.0003 +[2026-02-28 00:10:48] (step=0008731) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.7082762668753668, LR: 0.0003 +[2026-02-28 00:10:56] (step=0008732) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.7084719233026804, LR: 0.0003 +[2026-02-28 00:11:04] (step=0008733) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.7086675797299942, LR: 0.0003 +[2026-02-28 00:11:12] (step=0008734) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.7088632361573077, LR: 0.0003 +[2026-02-28 00:11:20] (step=0008735) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 1.7090588925846215, LR: 0.0003 +[2026-02-28 00:11:27] (step=0008736) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.709254549011935, LR: 0.0003 +[2026-02-28 00:11:35] (step=0008737) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 1.7094502054392486, LR: 0.0003 +[2026-02-28 00:11:43] (step=0008738) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.7096458618665624, LR: 0.0003 +[2026-02-28 00:11:51] (step=0008739) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 1.709841518293876, LR: 0.0003 +[2026-02-28 00:11:59] (step=0008740) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.7100371747211895, LR: 0.0003 +[2026-02-28 00:12:06] (step=0008741) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.7102328311485033, LR: 0.0003 +[2026-02-28 00:12:14] (step=0008742) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.710428487575817, LR: 0.0003 +[2026-02-28 00:12:22] (step=0008743) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 1.7106241440031305, LR: 0.0003 +[2026-02-28 00:12:30] (step=0008744) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.7108198004304442, LR: 0.0003 +[2026-02-28 00:12:38] (step=0008745) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.7110154568577578, LR: 0.0003 +[2026-02-28 00:12:45] (step=0008746) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 1.7112111132850714, LR: 0.0003 +[2026-02-28 00:12:53] (step=0008747) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 1.7114067697123851, LR: 0.0003 +[2026-02-28 00:13:01] (step=0008748) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 1.7116024261396987, LR: 0.0003 +[2026-02-28 00:13:09] (step=0008749) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 1.7117980825670123, LR: 0.0003 +[2026-02-28 00:13:17] (step=0008750) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.711993738994326, LR: 0.0003 +[2026-02-28 00:13:25] (step=0008751) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.7121893954216396, LR: 0.0003 +[2026-02-28 00:13:32] (step=0008752) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.7123850518489532, LR: 0.0003 +[2026-02-28 00:13:40] (step=0008753) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.712580708276267, LR: 0.0003 +[2026-02-28 00:13:48] (step=0008754) Train Loss: 0.4725, Train Steps/Sec: 0.13, Epoch: 1.7127763647035805, LR: 0.0003 +[2026-02-28 00:13:56] (step=0008755) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 1.712972021130894, LR: 0.0003 +[2026-02-28 00:14:04] (step=0008756) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 1.7131676775582079, LR: 0.0003 +[2026-02-28 00:14:11] (step=0008757) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.7133633339855214, LR: 0.0003 +[2026-02-28 00:14:19] (step=0008758) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 1.713558990412835, LR: 0.0003 +[2026-02-28 00:14:27] (step=0008759) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.7137546468401488, LR: 0.0003 +[2026-02-28 00:14:35] (step=0008760) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.7139503032674623, LR: 0.0003 +[2026-02-28 00:14:43] (step=0008761) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.714145959694776, LR: 0.0003 +[2026-02-28 00:14:50] (step=0008762) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 1.7143416161220897, LR: 0.0003 +[2026-02-28 00:14:58] (step=0008763) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 1.7145372725494032, LR: 0.0003 +[2026-02-28 00:15:06] (step=0008764) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.7147329289767168, LR: 0.0003 +[2026-02-28 00:15:14] (step=0008765) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 1.7149285854040306, LR: 0.0003 +[2026-02-28 00:15:22] (step=0008766) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 1.7151242418313442, LR: 0.0003 +[2026-02-28 00:15:29] (step=0008767) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.7153198982586577, LR: 0.0003 +[2026-02-28 00:15:37] (step=0008768) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.7155155546859715, LR: 0.0003 +[2026-02-28 00:15:45] (step=0008769) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 1.715711211113285, LR: 0.0003 +[2026-02-28 00:15:53] (step=0008770) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 1.7159068675405986, LR: 0.0003 +[2026-02-28 00:16:01] (step=0008771) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.7161025239679124, LR: 0.0003 +[2026-02-28 00:16:09] (step=0008772) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.716298180395226, LR: 0.0003 +[2026-02-28 00:16:16] (step=0008773) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 1.7164938368225395, LR: 0.0003 +[2026-02-28 00:16:24] (step=0008774) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 1.7166894932498533, LR: 0.0003 +[2026-02-28 00:16:32] (step=0008775) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.7168851496771669, LR: 0.0003 +[2026-02-28 00:16:40] (step=0008776) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.7170808061044804, LR: 0.0003 +[2026-02-28 00:16:48] (step=0008777) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 1.7172764625317942, LR: 0.0003 +[2026-02-28 00:16:56] (step=0008778) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.7174721189591078, LR: 0.0003 +[2026-02-28 00:17:03] (step=0008779) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 1.7176677753864213, LR: 0.0003 +[2026-02-28 00:17:11] (step=0008780) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 1.7178634318137351, LR: 0.0003 +[2026-02-28 00:17:19] (step=0008781) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.7180590882410487, LR: 0.0003 +[2026-02-28 00:17:27] (step=0008782) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.7182547446683623, LR: 0.0003 +[2026-02-28 00:17:35] (step=0008783) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.718450401095676, LR: 0.0003 +[2026-02-28 00:17:43] (step=0008784) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.7186460575229896, LR: 0.0003 +[2026-02-28 00:17:50] (step=0008785) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.7188417139503032, LR: 0.0003 +[2026-02-28 00:17:58] (step=0008786) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.719037370377617, LR: 0.0003 +[2026-02-28 00:18:06] (step=0008787) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.7192330268049305, LR: 0.0003 +[2026-02-28 00:18:14] (step=0008788) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.719428683232244, LR: 0.0003 +[2026-02-28 00:18:22] (step=0008789) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.7196243396595579, LR: 0.0003 +[2026-02-28 00:18:29] (step=0008790) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 1.7198199960868714, LR: 0.0003 +[2026-02-28 00:18:37] (step=0008791) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 1.7200156525141852, LR: 0.0003 +[2026-02-28 00:18:45] (step=0008792) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 1.7202113089414988, LR: 0.0003 +[2026-02-28 00:18:53] (step=0008793) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.7204069653688123, LR: 0.0003 +[2026-02-28 00:19:01] (step=0008794) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.720602621796126, LR: 0.0003 +[2026-02-28 00:19:08] (step=0008795) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.7207982782234397, LR: 0.0003 +[2026-02-28 00:19:16] (step=0008796) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 1.7209939346507532, LR: 0.0003 +[2026-02-28 00:19:24] (step=0008797) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.721189591078067, LR: 0.0003 +[2026-02-28 00:19:32] (step=0008798) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 1.7213852475053806, LR: 0.0003 +[2026-02-28 00:19:40] (step=0008799) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 1.7215809039326941, LR: 0.0003 +[2026-02-28 00:19:48] (step=0008800) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 1.721776560360008, LR: 0.0003 +[2026-02-28 00:19:55] (step=0008801) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.7219722167873215, LR: 0.0003 +[2026-02-28 00:20:03] (step=0008802) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.722167873214635, LR: 0.0003 +[2026-02-28 00:20:11] (step=0008803) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.7223635296419488, LR: 0.0003 +[2026-02-28 00:20:19] (step=0008804) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 1.7225591860692624, LR: 0.0003 +[2026-02-28 00:20:27] (step=0008805) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 1.722754842496576, LR: 0.0003 +[2026-02-28 00:20:35] (step=0008806) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.7229504989238897, LR: 0.0003 +[2026-02-28 00:20:42] (step=0008807) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.7231461553512033, LR: 0.0003 +[2026-02-28 00:20:50] (step=0008808) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.7233418117785169, LR: 0.0003 +[2026-02-28 00:20:58] (step=0008809) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 1.7235374682058306, LR: 0.0003 +[2026-02-28 00:21:06] (step=0008810) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 1.7237331246331442, LR: 0.0003 +[2026-02-28 00:21:14] (step=0008811) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.7239287810604578, LR: 0.0003 +[2026-02-28 00:21:21] (step=0008812) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 1.7241244374877716, LR: 0.0003 +[2026-02-28 00:21:29] (step=0008813) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 1.7243200939150851, LR: 0.0003 +[2026-02-28 00:21:37] (step=0008814) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.7245157503423987, LR: 0.0003 +[2026-02-28 00:21:45] (step=0008815) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.7247114067697125, LR: 0.0003 +[2026-02-28 00:21:53] (step=0008816) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.724907063197026, LR: 0.0003 +[2026-02-28 00:22:01] (step=0008817) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.7251027196243396, LR: 0.0003 +[2026-02-28 00:22:08] (step=0008818) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 1.7252983760516534, LR: 0.0003 +[2026-02-28 00:22:16] (step=0008819) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.725494032478967, LR: 0.0003 +[2026-02-28 00:22:24] (step=0008820) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 1.7256896889062805, LR: 0.0003 +[2026-02-28 00:22:32] (step=0008821) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 1.7258853453335943, LR: 0.0003 +[2026-02-28 00:22:40] (step=0008822) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.7260810017609078, LR: 0.0003 +[2026-02-28 00:22:48] (step=0008823) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.7262766581882214, LR: 0.0003 +[2026-02-28 00:22:55] (step=0008824) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 1.7264723146155352, LR: 0.0003 +[2026-02-28 00:23:03] (step=0008825) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 1.7266679710428487, LR: 0.0003 +[2026-02-28 00:23:11] (step=0008826) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 1.7268636274701623, LR: 0.0003 +[2026-02-28 00:23:19] (step=0008827) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 1.727059283897476, LR: 0.0003 +[2026-02-28 00:23:27] (step=0008828) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.7272549403247897, LR: 0.0003 +[2026-02-28 00:23:35] (step=0008829) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.7274505967521032, LR: 0.0003 +[2026-02-28 00:23:42] (step=0008830) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 1.727646253179417, LR: 0.0003 +[2026-02-28 00:23:50] (step=0008831) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.7278419096067306, LR: 0.0003 +[2026-02-28 00:23:58] (step=0008832) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.7280375660340441, LR: 0.0003 +[2026-02-28 00:24:06] (step=0008833) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.728233222461358, LR: 0.0003 +[2026-02-28 00:24:14] (step=0008834) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 1.7284288788886715, LR: 0.0003 +[2026-02-28 00:24:22] (step=0008835) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.728624535315985, LR: 0.0003 +[2026-02-28 00:24:29] (step=0008836) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.7288201917432988, LR: 0.0003 +[2026-02-28 00:24:37] (step=0008837) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 1.7290158481706124, LR: 0.0003 +[2026-02-28 00:24:45] (step=0008838) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 1.729211504597926, LR: 0.0003 +[2026-02-28 00:24:53] (step=0008839) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 1.7294071610252397, LR: 0.0003 +[2026-02-28 00:25:01] (step=0008840) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.7296028174525533, LR: 0.0003 +[2026-02-28 00:25:08] (step=0008841) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 1.7297984738798668, LR: 0.0003 +[2026-02-28 00:25:16] (step=0008842) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.7299941303071806, LR: 0.0003 +[2026-02-28 00:25:24] (step=0008843) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 1.7301897867344942, LR: 0.0003 +[2026-02-28 00:25:32] (step=0008844) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.7303854431618078, LR: 0.0003 +[2026-02-28 00:25:40] (step=0008845) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.7305810995891215, LR: 0.0003 +[2026-02-28 00:25:48] (step=0008846) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.730776756016435, LR: 0.0003 +[2026-02-28 00:25:55] (step=0008847) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.7309724124437489, LR: 0.0003 +[2026-02-28 00:26:03] (step=0008848) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.7311680688710624, LR: 0.0003 +[2026-02-28 00:26:11] (step=0008849) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.731363725298376, LR: 0.0003 +[2026-02-28 00:26:19] (step=0008850) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.7315593817256898, LR: 0.0003 +[2026-02-28 00:26:27] (step=0008851) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 1.7317550381530034, LR: 0.0003 +[2026-02-28 00:26:34] (step=0008852) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 1.731950694580317, LR: 0.0003 +[2026-02-28 00:26:42] (step=0008853) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 1.7321463510076307, LR: 0.0003 +[2026-02-28 00:26:50] (step=0008854) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 1.7323420074349443, LR: 0.0003 +[2026-02-28 00:26:58] (step=0008855) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.7325376638622578, LR: 0.0003 +[2026-02-28 00:27:06] (step=0008856) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.7327333202895716, LR: 0.0003 +[2026-02-28 00:27:14] (step=0008857) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 1.7329289767168852, LR: 0.0003 +[2026-02-28 00:27:21] (step=0008858) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 1.7331246331441987, LR: 0.0003 +[2026-02-28 00:27:29] (step=0008859) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.7333202895715125, LR: 0.0003 +[2026-02-28 00:27:37] (step=0008860) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.733515945998826, LR: 0.0003 +[2026-02-28 00:27:45] (step=0008861) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.7337116024261396, LR: 0.0003 +[2026-02-28 00:27:53] (step=0008862) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.7339072588534534, LR: 0.0003 +[2026-02-28 00:28:01] (step=0008863) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.734102915280767, LR: 0.0003 +[2026-02-28 00:28:08] (step=0008864) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 1.7342985717080805, LR: 0.0003 +[2026-02-28 00:28:16] (step=0008865) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 1.7344942281353943, LR: 0.0003 +[2026-02-28 00:28:24] (step=0008866) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.734689884562708, LR: 0.0003 +[2026-02-28 00:28:32] (step=0008867) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 1.7348855409900215, LR: 0.0003 +[2026-02-28 00:28:40] (step=0008868) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.7350811974173352, LR: 0.0003 +[2026-02-28 00:28:47] (step=0008869) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.7352768538446488, LR: 0.0003 +[2026-02-28 00:28:55] (step=0008870) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.7354725102719624, LR: 0.0003 +[2026-02-28 00:29:03] (step=0008871) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.7356681666992761, LR: 0.0003 +[2026-02-28 00:29:11] (step=0008872) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 1.7358638231265897, LR: 0.0003 +[2026-02-28 00:29:19] (step=0008873) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.7360594795539033, LR: 0.0003 +[2026-02-28 00:29:27] (step=0008874) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.736255135981217, LR: 0.0003 +[2026-02-28 00:29:35] (step=0008875) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.7364507924085306, LR: 0.0003 +[2026-02-28 00:29:42] (step=0008876) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 1.7366464488358442, LR: 0.0003 +[2026-02-28 00:29:50] (step=0008877) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 1.736842105263158, LR: 0.0003 +[2026-02-28 00:29:58] (step=0008878) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 1.7370377616904715, LR: 0.0003 +[2026-02-28 00:30:06] (step=0008879) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 1.737233418117785, LR: 0.0003 +[2026-02-28 00:30:14] (step=0008880) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 1.7374290745450989, LR: 0.0003 +[2026-02-28 00:30:22] (step=0008881) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.7376247309724124, LR: 0.0003 +[2026-02-28 00:30:29] (step=0008882) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 1.737820387399726, LR: 0.0003 +[2026-02-28 00:30:37] (step=0008883) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 1.7380160438270398, LR: 0.0003 +[2026-02-28 00:30:45] (step=0008884) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 1.7382117002543533, LR: 0.0003 +[2026-02-28 00:30:53] (step=0008885) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 1.738407356681667, LR: 0.0003 +[2026-02-28 00:31:01] (step=0008886) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.7386030131089807, LR: 0.0003 +[2026-02-28 00:31:09] (step=0008887) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 1.7387986695362943, LR: 0.0003 +[2026-02-28 00:31:16] (step=0008888) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 1.7389943259636078, LR: 0.0003 +[2026-02-28 00:31:24] (step=0008889) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.7391899823909216, LR: 0.0003 +[2026-02-28 00:31:32] (step=0008890) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.7393856388182352, LR: 0.0003 +[2026-02-28 00:31:40] (step=0008891) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.7395812952455487, LR: 0.0003 +[2026-02-28 00:31:48] (step=0008892) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.7397769516728625, LR: 0.0003 +[2026-02-28 00:31:55] (step=0008893) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.739972608100176, LR: 0.0003 +[2026-02-28 00:32:03] (step=0008894) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 1.7401682645274896, LR: 0.0003 +[2026-02-28 00:32:11] (step=0008895) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.7403639209548034, LR: 0.0003 +[2026-02-28 00:32:19] (step=0008896) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.740559577382117, LR: 0.0003 +[2026-02-28 00:32:27] (step=0008897) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 1.7407552338094305, LR: 0.0003 +[2026-02-28 00:32:34] (step=0008898) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.7409508902367443, LR: 0.0003 +[2026-02-28 00:32:42] (step=0008899) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.7411465466640579, LR: 0.0003 +[2026-02-28 00:32:50] (step=0008900) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 1.7413422030913714, LR: 0.0003 +[2026-02-28 00:32:58] (step=0008901) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.7415378595186852, LR: 0.0003 +[2026-02-28 00:33:06] (step=0008902) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 1.7417335159459988, LR: 0.0003 +[2026-02-28 00:33:14] (step=0008903) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 1.7419291723733126, LR: 0.0003 +[2026-02-28 00:33:21] (step=0008904) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 1.7421248288006261, LR: 0.0003 +[2026-02-28 00:33:29] (step=0008905) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.7423204852279397, LR: 0.0003 +[2026-02-28 00:33:37] (step=0008906) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.7425161416552535, LR: 0.0003 +[2026-02-28 00:33:45] (step=0008907) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 1.742711798082567, LR: 0.0003 +[2026-02-28 00:33:53] (step=0008908) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 1.7429074545098806, LR: 0.0003 +[2026-02-28 00:34:00] (step=0008909) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 1.7431031109371944, LR: 0.0003 +[2026-02-28 00:34:08] (step=0008910) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 1.743298767364508, LR: 0.0003 +[2026-02-28 00:34:16] (step=0008911) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 1.7434944237918215, LR: 0.0003 +[2026-02-28 00:34:24] (step=0008912) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 1.7436900802191353, LR: 0.0003 +[2026-02-28 00:34:32] (step=0008913) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 1.7438857366464489, LR: 0.0003 +[2026-02-28 00:34:39] (step=0008914) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.7440813930737624, LR: 0.0003 +[2026-02-28 00:34:47] (step=0008915) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 1.7442770495010762, LR: 0.0003 +[2026-02-28 00:34:55] (step=0008916) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 1.7444727059283898, LR: 0.0003 +[2026-02-28 00:35:03] (step=0008917) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.7446683623557033, LR: 0.0003 +[2026-02-28 00:35:11] (step=0008918) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.7448640187830171, LR: 0.0003 +[2026-02-28 00:35:19] (step=0008919) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.7450596752103307, LR: 0.0003 +[2026-02-28 00:35:27] (step=0008920) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 1.7452553316376442, LR: 0.0003 +[2026-02-28 00:35:34] (step=0008921) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 1.745450988064958, LR: 0.0003 +[2026-02-28 00:35:42] (step=0008922) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 1.7456466444922716, LR: 0.0003 +[2026-02-28 00:35:50] (step=0008923) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.7458423009195851, LR: 0.0003 +[2026-02-28 00:35:58] (step=0008924) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.746037957346899, LR: 0.0003 +[2026-02-28 00:36:06] (step=0008925) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.7462336137742125, LR: 0.0003 +[2026-02-28 00:36:14] (step=0008926) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 1.746429270201526, LR: 0.0003 +[2026-02-28 00:36:21] (step=0008927) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 1.7466249266288398, LR: 0.0003 +[2026-02-28 00:36:29] (step=0008928) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.7468205830561534, LR: 0.0003 +[2026-02-28 00:36:37] (step=0008929) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 1.747016239483467, LR: 0.0003 +[2026-02-28 00:36:45] (step=0008930) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.7472118959107807, LR: 0.0003 +[2026-02-28 00:36:53] (step=0008931) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 1.7474075523380943, LR: 0.0003 +[2026-02-28 00:37:00] (step=0008932) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 1.7476032087654079, LR: 0.0003 +[2026-02-28 00:37:08] (step=0008933) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.7477988651927217, LR: 0.0003 +[2026-02-28 00:37:16] (step=0008934) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.7479945216200352, LR: 0.0003 +[2026-02-28 00:37:24] (step=0008935) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 1.7481901780473488, LR: 0.0003 +[2026-02-28 00:37:32] (step=0008936) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 1.7483858344746626, LR: 0.0003 +[2026-02-28 00:37:40] (step=0008937) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 1.7485814909019761, LR: 0.0003 +[2026-02-28 00:37:47] (step=0008938) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 1.7487771473292897, LR: 0.0003 +[2026-02-28 00:37:55] (step=0008939) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 1.7489728037566035, LR: 0.0003 +[2026-02-28 00:38:03] (step=0008940) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.749168460183917, LR: 0.0003 +[2026-02-28 00:38:11] (step=0008941) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.7493641166112306, LR: 0.0003 +[2026-02-28 00:38:19] (step=0008942) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 1.7495597730385444, LR: 0.0003 +[2026-02-28 00:38:26] (step=0008943) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 1.749755429465858, LR: 0.0003 +[2026-02-28 00:38:34] (step=0008944) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.7499510858931715, LR: 0.0003 +[2026-02-28 00:38:42] (step=0008945) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 1.7501467423204853, LR: 0.0003 +[2026-02-28 00:38:50] (step=0008946) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 1.7503423987477988, LR: 0.0003 +[2026-02-28 00:38:58] (step=0008947) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 1.7505380551751124, LR: 0.0003 +[2026-02-28 00:39:06] (step=0008948) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.7507337116024262, LR: 0.0003 +[2026-02-28 00:39:13] (step=0008949) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 1.7509293680297398, LR: 0.0003 +[2026-02-28 00:39:21] (step=0008950) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.7511250244570533, LR: 0.0003 +[2026-02-28 00:39:29] (step=0008951) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 1.751320680884367, LR: 0.0003 +[2026-02-28 00:39:37] (step=0008952) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 1.7515163373116807, LR: 0.0003 +[2026-02-28 00:39:45] (step=0008953) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 1.7517119937389942, LR: 0.0003 +[2026-02-28 00:39:52] (step=0008954) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 1.751907650166308, LR: 0.0003 +[2026-02-28 00:40:00] (step=0008955) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.7521033065936216, LR: 0.0003 +[2026-02-28 00:40:08] (step=0008956) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 1.7522989630209351, LR: 0.0003 +[2026-02-28 00:40:16] (step=0008957) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 1.752494619448249, LR: 0.0003 +[2026-02-28 00:40:24] (step=0008958) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 1.7526902758755625, LR: 0.0003 +[2026-02-28 00:40:32] (step=0008959) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.7528859323028763, LR: 0.0003 +[2026-02-28 00:40:39] (step=0008960) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.7530815887301898, LR: 0.0003 +[2026-02-28 00:40:47] (step=0008961) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 1.7532772451575034, LR: 0.0003 +[2026-02-28 00:40:55] (step=0008962) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.7534729015848172, LR: 0.0003 +[2026-02-28 00:41:03] (step=0008963) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 1.7536685580121307, LR: 0.0003 +[2026-02-28 00:41:11] (step=0008964) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 1.7538642144394443, LR: 0.0003 +[2026-02-28 00:41:19] (step=0008965) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 1.754059870866758, LR: 0.0003 +[2026-02-28 00:41:26] (step=0008966) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 1.7542555272940716, LR: 0.0003 +[2026-02-28 00:41:34] (step=0008967) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 1.7544511837213852, LR: 0.0003 +[2026-02-28 00:41:42] (step=0008968) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.754646840148699, LR: 0.0003 +[2026-02-28 00:41:50] (step=0008969) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 1.7548424965760125, LR: 0.0003 +[2026-02-28 00:41:58] (step=0008970) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 1.755038153003326, LR: 0.0003 +[2026-02-28 00:42:06] (step=0008971) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 1.75523380943064, LR: 0.0003 +[2026-02-28 00:42:13] (step=0008972) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.7554294658579535, LR: 0.0003 +[2026-02-28 00:42:21] (step=0008973) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 1.755625122285267, LR: 0.0003 +[2026-02-28 00:42:29] (step=0008974) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.7558207787125808, LR: 0.0003 +[2026-02-28 00:42:37] (step=0008975) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 1.7560164351398944, LR: 0.0003 +[2026-02-28 00:42:45] (step=0008976) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 1.756212091567208, LR: 0.0003 +[2026-02-28 00:42:53] (step=0008977) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.7564077479945217, LR: 0.0003 +[2026-02-28 00:43:00] (step=0008978) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 1.7566034044218353, LR: 0.0003 +[2026-02-28 00:43:08] (step=0008979) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 1.7567990608491488, LR: 0.0003 +[2026-02-28 00:43:16] (step=0008980) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.7569947172764626, LR: 0.0003 +[2026-02-28 00:43:24] (step=0008981) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 1.7571903737037762, LR: 0.0003 +[2026-02-28 00:43:32] (step=0008982) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 1.7573860301310897, LR: 0.0003 +[2026-02-28 00:43:40] (step=0008983) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 1.7575816865584035, LR: 0.0003 +[2026-02-28 00:43:47] (step=0008984) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.757777342985717, LR: 0.0003 +[2026-02-28 00:43:55] (step=0008985) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 1.7579729994130306, LR: 0.0003 +[2026-02-28 00:44:03] (step=0008986) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 1.7581686558403444, LR: 0.0003 +[2026-02-28 00:44:11] (step=0008987) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 1.758364312267658, LR: 0.0003 +[2026-02-28 00:44:19] (step=0008988) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 1.7585599686949716, LR: 0.0003 +[2026-02-28 00:44:27] (step=0008989) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 1.7587556251222853, LR: 0.0003 +[2026-02-28 00:44:34] (step=0008990) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 1.758951281549599, LR: 0.0003 +[2026-02-28 00:44:42] (step=0008991) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.7591469379769125, LR: 0.0003 +[2026-02-28 00:44:50] (step=0008992) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 1.7593425944042262, LR: 0.0003 +[2026-02-28 00:44:58] (step=0008993) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.7595382508315398, LR: 0.0003 +[2026-02-28 00:45:06] (step=0008994) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 1.7597339072588534, LR: 0.0003 +[2026-02-28 00:45:14] (step=0008995) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 1.7599295636861672, LR: 0.0003 +[2026-02-28 00:45:21] (step=0008996) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.7601252201134807, LR: 0.0003 +[2026-02-28 00:45:29] (step=0008997) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.7603208765407943, LR: 0.0003 +[2026-02-28 00:45:37] (step=0008998) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.760516532968108, LR: 0.0003 +[2026-02-28 00:45:45] (step=0008999) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 1.7607121893954216, LR: 0.0003 +[2026-02-28 00:45:53] (step=0009000) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 1.7609078458227352, LR: 0.0003 +[2026-02-28 00:45:53] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0009000/ +[2026-02-28 00:46:01] (step=0009001) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 1.761103502250049, LR: 0.0003 +[2026-02-28 00:46:08] (step=0009002) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 1.7612991586773625, LR: 0.0003 +[2026-02-28 00:46:16] (step=0009003) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 1.761494815104676, LR: 0.0003 +[2026-02-28 00:46:24] (step=0009004) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 1.7616904715319899, LR: 0.0003 +[2026-02-28 00:46:32] (step=0009005) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 1.7618861279593034, LR: 0.0003 +[2026-02-28 00:46:40] (step=0009006) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 1.762081784386617, LR: 0.0003 +[2026-02-28 00:46:48] (step=0009007) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.7622774408139308, LR: 0.0003 +[2026-02-28 00:46:55] (step=0009008) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 1.7624730972412443, LR: 0.0003 +[2026-02-28 00:47:03] (step=0009009) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.762668753668558, LR: 0.0003 +[2026-02-28 00:47:11] (step=0009010) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 1.7628644100958717, LR: 0.0003 +[2026-02-28 00:47:19] (step=0009011) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 1.7630600665231853, LR: 0.0003 +[2026-02-28 00:47:27] (step=0009012) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.7632557229504988, LR: 0.0003 +[2026-02-28 00:47:34] (step=0009013) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 1.7634513793778126, LR: 0.0003 +[2026-02-28 00:47:42] (step=0009014) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 1.7636470358051262, LR: 0.0003 +[2026-02-28 00:47:50] (step=0009015) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 1.76384269223244, LR: 0.0003 +[2026-02-28 00:47:58] (step=0009016) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 1.7640383486597535, LR: 0.0003 +[2026-02-28 00:48:06] (step=0009017) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.764234005087067, LR: 0.0003 +[2026-02-28 00:48:14] (step=0009018) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.7644296615143809, LR: 0.0003 +[2026-02-28 00:48:22] (step=0009019) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 1.7646253179416944, LR: 0.0003 +[2026-02-28 00:48:30] (step=0009020) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 1.764820974369008, LR: 0.0003 +[2026-02-28 00:48:37] (step=0009021) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 1.7650166307963218, LR: 0.0003 +[2026-02-28 00:48:45] (step=0009022) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 1.7652122872236353, LR: 0.0003 +[2026-02-28 00:48:53] (step=0009023) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.7654079436509489, LR: 0.0003 +[2026-02-28 00:49:01] (step=0009024) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 1.7656036000782627, LR: 0.0003 +[2026-02-28 00:49:09] (step=0009025) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.7657992565055762, LR: 0.0003 +[2026-02-28 00:49:17] (step=0009026) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 1.7659949129328898, LR: 0.0003 +[2026-02-28 00:49:24] (step=0009027) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.7661905693602036, LR: 0.0003 +[2026-02-28 00:49:32] (step=0009028) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.7663862257875171, LR: 0.0003 +[2026-02-28 00:49:40] (step=0009029) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 1.7665818822148307, LR: 0.0003 +[2026-02-28 00:49:48] (step=0009030) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 1.7667775386421445, LR: 0.0003 +[2026-02-28 00:49:56] (step=0009031) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 1.766973195069458, LR: 0.0003 +[2026-02-28 00:50:04] (step=0009032) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 1.7671688514967716, LR: 0.0003 +[2026-02-28 00:50:11] (step=0009033) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 1.7673645079240854, LR: 0.0003 +[2026-02-28 00:50:19] (step=0009034) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 1.767560164351399, LR: 0.0003 +[2026-02-28 00:50:27] (step=0009035) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 1.7677558207787125, LR: 0.0003 +[2026-02-28 00:50:35] (step=0009036) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 1.7679514772060263, LR: 0.0003 +[2026-02-28 00:50:43] (step=0009037) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 1.7681471336333399, LR: 0.0003 +[2026-02-28 00:50:51] (step=0009038) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 1.7683427900606534, LR: 0.0003 +[2026-02-28 00:50:58] (step=0009039) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 1.7685384464879672, LR: 0.0003 +[2026-02-28 00:51:06] (step=0009040) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 1.7687341029152808, LR: 0.0003 +[2026-02-28 00:51:14] (step=0009041) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 1.7689297593425943, LR: 0.0003 +[2026-02-28 00:51:22] (step=0009042) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 1.7691254157699081, LR: 0.0003 +[2026-02-28 00:51:30] (step=0009043) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 1.7693210721972217, LR: 0.0003 +[2026-02-28 00:51:37] (step=0009044) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.7695167286245352, LR: 0.0003 +[2026-02-28 00:51:45] (step=0009045) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.769712385051849, LR: 0.0003 +[2026-02-28 00:51:53] (step=0009046) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 1.7699080414791626, LR: 0.0003 +[2026-02-28 00:52:01] (step=0009047) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 1.7701036979064761, LR: 0.0003 +[2026-02-28 00:52:09] (step=0009048) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 1.77029935433379, LR: 0.0003 +[2026-02-28 00:52:17] (step=0009049) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 1.7704950107611035, LR: 0.0003 +[2026-02-28 00:52:24] (step=0009050) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 1.770690667188417, LR: 0.0003 +[2026-02-28 00:52:32] (step=0009051) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 1.7708863236157308, LR: 0.0003 +[2026-02-28 00:52:40] (step=0009052) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 1.7710819800430444, LR: 0.0003 +[2026-02-28 00:52:48] (step=0009053) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 1.771277636470358, LR: 0.0003 +[2026-02-28 00:52:56] (step=0009054) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 1.7714732928976717, LR: 0.0003 +[2026-02-28 00:53:04] (step=0009055) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 1.7716689493249853, LR: 0.0003 +[2026-02-28 00:53:11] (step=0009056) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 1.7718646057522989, LR: 0.0003 +[2026-02-28 00:53:19] (step=0009057) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 1.7720602621796127, LR: 0.0003 +[2026-02-28 00:53:27] (step=0009058) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 1.7722559186069262, LR: 0.0003 +[2026-02-28 00:53:35] (step=0009059) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 1.7724515750342398, LR: 0.0003 +[2026-02-28 00:53:43] (step=0009060) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 1.7726472314615536, LR: 0.0003 +[2026-02-28 00:53:50] (step=0009061) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 1.7728428878888671, LR: 0.0003 +[2026-02-28 00:53:58] (step=0009062) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 1.7730385443161807, LR: 0.0003 +[2026-02-28 00:54:06] (step=0009063) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 1.7732342007434945, LR: 0.0003 +[2026-02-28 00:54:14] (step=0009064) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 1.773429857170808, LR: 0.0003 +[2026-02-28 00:54:22] (step=0009065) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 1.7736255135981216, LR: 0.0003 +[2026-02-28 00:54:30] (step=0009066) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 1.7738211700254354, LR: 0.0003 +[2026-02-28 00:54:37] (step=0009067) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 1.774016826452749, LR: 0.0003 +[2026-02-28 00:54:45] (step=0009068) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 1.7742124828800625, LR: 0.0003 +[2026-02-28 00:54:53] (step=0009069) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.7744081393073763, LR: 0.0003 +[2026-02-28 00:55:01] (step=0009070) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.7746037957346898, LR: 0.0003 +[2026-02-28 00:55:09] (step=0009071) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.7747994521620036, LR: 0.0003 +[2026-02-28 00:55:17] (step=0009072) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 1.7749951085893172, LR: 0.0003 +[2026-02-28 00:55:24] (step=0009073) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 1.7751907650166308, LR: 0.0003 +[2026-02-28 00:55:32] (step=0009074) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 1.7753864214439445, LR: 0.0003 +[2026-02-28 00:55:40] (step=0009075) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.775582077871258, LR: 0.0003 +[2026-02-28 00:55:48] (step=0009076) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 1.7757777342985717, LR: 0.0003 +[2026-02-28 00:55:56] (step=0009077) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 1.7759733907258854, LR: 0.0003 +[2026-02-28 00:56:04] (step=0009078) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 1.776169047153199, LR: 0.0003 +[2026-02-28 00:56:11] (step=0009079) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.7763647035805126, LR: 0.0003 +[2026-02-28 00:56:19] (step=0009080) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 1.7765603600078264, LR: 0.0003 +[2026-02-28 00:56:27] (step=0009081) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.77675601643514, LR: 0.0003 +[2026-02-28 00:56:35] (step=0009082) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 1.7769516728624535, LR: 0.0003 +[2026-02-28 00:56:43] (step=0009083) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 1.7771473292897673, LR: 0.0003 +[2026-02-28 00:56:50] (step=0009084) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 1.7773429857170808, LR: 0.0003 +[2026-02-28 00:56:58] (step=0009085) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 1.7775386421443944, LR: 0.0003 +[2026-02-28 00:57:06] (step=0009086) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 1.7777342985717082, LR: 0.0003 +[2026-02-28 00:57:14] (step=0009087) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 1.7779299549990217, LR: 0.0003 +[2026-02-28 00:57:22] (step=0009088) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 1.7781256114263353, LR: 0.0003 +[2026-02-28 00:57:30] (step=0009089) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 1.778321267853649, LR: 0.0003 +[2026-02-28 00:57:37] (step=0009090) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 1.7785169242809626, LR: 0.0003 +[2026-02-28 00:57:45] (step=0009091) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 1.7787125807082762, LR: 0.0003 +[2026-02-28 00:57:53] (step=0009092) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 1.77890823713559, LR: 0.0003 +[2026-02-28 00:58:01] (step=0009093) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 1.7791038935629035, LR: 0.0003 +[2026-02-28 00:58:09] (step=0009094) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 1.779299549990217, LR: 0.0003 +[2026-02-28 00:58:16] (step=0009095) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 1.779495206417531, LR: 0.0003 +[2026-02-28 00:58:24] (step=0009096) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.7796908628448445, LR: 0.0003 +[2026-02-28 00:58:32] (step=0009097) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 1.779886519272158, LR: 0.0003 +[2026-02-28 00:58:40] (step=0009098) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.7800821756994718, LR: 0.0003 +[2026-02-28 00:58:48] (step=0009099) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 1.7802778321267854, LR: 0.0003 +[2026-02-28 00:58:56] (step=0009100) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 1.780473488554099, LR: 0.0003 +[2026-02-28 00:59:03] (step=0009101) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 1.7806691449814127, LR: 0.0003 +[2026-02-28 00:59:11] (step=0009102) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.7808648014087263, LR: 0.0003 +[2026-02-28 00:59:19] (step=0009103) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.7810604578360398, LR: 0.0003 +[2026-02-28 00:59:27] (step=0009104) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.7812561142633536, LR: 0.0003 +[2026-02-28 00:59:35] (step=0009105) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 1.7814517706906672, LR: 0.0003 +[2026-02-28 00:59:42] (step=0009106) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.7816474271179807, LR: 0.0003 +[2026-02-28 00:59:50] (step=0009107) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 1.7818430835452945, LR: 0.0003 +[2026-02-28 00:59:58] (step=0009108) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 1.782038739972608, LR: 0.0003 +[2026-02-28 01:00:06] (step=0009109) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 1.7822343963999216, LR: 0.0003 +[2026-02-28 01:00:14] (step=0009110) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 1.7824300528272354, LR: 0.0003 +[2026-02-28 01:00:22] (step=0009111) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 1.782625709254549, LR: 0.0003 +[2026-02-28 01:00:29] (step=0009112) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 1.7828213656818626, LR: 0.0003 +[2026-02-28 01:00:37] (step=0009113) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 1.7830170221091763, LR: 0.0003 +[2026-02-28 01:00:45] (step=0009114) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 1.78321267853649, LR: 0.0003 +[2026-02-28 01:00:53] (step=0009115) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 1.7834083349638035, LR: 0.0003 +[2026-02-28 01:01:01] (step=0009116) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.7836039913911172, LR: 0.0003 +[2026-02-28 01:01:09] (step=0009117) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 1.7837996478184308, LR: 0.0003 +[2026-02-28 01:01:16] (step=0009118) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 1.7839953042457444, LR: 0.0003 +[2026-02-28 01:01:24] (step=0009119) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 1.7841909606730582, LR: 0.0003 +[2026-02-28 01:01:32] (step=0009120) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 1.7843866171003717, LR: 0.0003 +[2026-02-28 01:01:40] (step=0009121) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 1.7845822735276853, LR: 0.0003 +[2026-02-28 01:01:48] (step=0009122) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.784777929954999, LR: 0.0003 +[2026-02-28 01:01:56] (step=0009123) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 1.7849735863823126, LR: 0.0003 +[2026-02-28 01:02:04] (step=0009124) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 1.7851692428096262, LR: 0.0003 +[2026-02-28 01:02:11] (step=0009125) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 1.78536489923694, LR: 0.0003 +[2026-02-28 01:02:19] (step=0009126) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 1.7855605556642535, LR: 0.0003 +[2026-02-28 01:02:27] (step=0009127) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 1.7857562120915673, LR: 0.0003 +[2026-02-28 01:02:35] (step=0009128) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 1.7859518685188809, LR: 0.0003 +[2026-02-28 01:02:43] (step=0009129) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 1.7861475249461944, LR: 0.0003 +[2026-02-28 01:02:51] (step=0009130) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.7863431813735082, LR: 0.0003 +[2026-02-28 01:02:58] (step=0009131) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 1.7865388378008218, LR: 0.0003 +[2026-02-28 01:03:06] (step=0009132) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 1.7867344942281354, LR: 0.0003 +[2026-02-28 01:03:14] (step=0009133) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 1.7869301506554491, LR: 0.0003 +[2026-02-28 01:03:22] (step=0009134) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 1.7871258070827627, LR: 0.0003 +[2026-02-28 01:03:30] (step=0009135) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 1.7873214635100763, LR: 0.0003 +[2026-02-28 01:03:37] (step=0009136) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 1.78751711993739, LR: 0.0003 +[2026-02-28 01:03:45] (step=0009137) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 1.7877127763647036, LR: 0.0003 +[2026-02-28 01:03:53] (step=0009138) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 1.7879084327920172, LR: 0.0003 +[2026-02-28 01:04:01] (step=0009139) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 1.788104089219331, LR: 0.0003 +[2026-02-28 01:04:09] (step=0009140) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 1.7882997456466445, LR: 0.0003 +[2026-02-28 01:04:16] (step=0009141) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 1.788495402073958, LR: 0.0003 +[2026-02-28 01:04:24] (step=0009142) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 1.7886910585012719, LR: 0.0003 +[2026-02-28 01:04:32] (step=0009143) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 1.7888867149285854, LR: 0.0003 +[2026-02-28 01:04:40] (step=0009144) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 1.789082371355899, LR: 0.0003 +[2026-02-28 01:04:48] (step=0009145) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 1.7892780277832128, LR: 0.0003 +[2026-02-28 01:04:56] (step=0009146) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 1.7894736842105263, LR: 0.0003 +[2026-02-28 01:05:03] (step=0009147) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 1.78966934063784, LR: 0.0003 +[2026-02-28 01:05:11] (step=0009148) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 1.7898649970651537, LR: 0.0003 +[2026-02-28 01:05:19] (step=0009149) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 1.7900606534924672, LR: 0.0003 +[2026-02-28 01:05:27] (step=0009150) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 1.7902563099197808, LR: 0.0003 +[2026-02-28 01:05:35] (step=0009151) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 1.7904519663470946, LR: 0.0003 +[2026-02-28 01:05:42] (step=0009152) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 1.7906476227744081, LR: 0.0003 +[2026-02-28 01:05:50] (step=0009153) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 1.7908432792017217, LR: 0.0003 +[2026-02-28 01:05:58] (step=0009154) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 1.7910389356290355, LR: 0.0003 +[2026-02-28 01:06:06] (step=0009155) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 1.791234592056349, LR: 0.0003 +[2026-02-28 01:06:14] (step=0009156) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 1.7914302484836626, LR: 0.0003 +[2026-02-28 01:06:22] (step=0009157) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 1.7916259049109764, LR: 0.0003 +[2026-02-28 01:06:29] (step=0009158) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 1.79182156133829, LR: 0.0003 +[2026-02-28 01:06:37] (step=0009159) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 1.7920172177656035, LR: 0.0003 +[2026-02-28 01:06:47] (step=0009160) Train Loss: 0.4627, Train Steps/Sec: 0.10, Epoch: 1.7922128741929173, LR: 0.0003 +[2026-02-28 01:07:01] (step=0009161) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 1.7924085306202309, LR: 0.0003 +[2026-02-28 01:07:15] (step=0009162) Train Loss: 0.4484, Train Steps/Sec: 0.07, Epoch: 1.7926041870475444, LR: 0.0003 +[2026-02-28 01:07:28] (step=0009163) Train Loss: 0.4463, Train Steps/Sec: 0.07, Epoch: 1.7927998434748582, LR: 0.0003 +[2026-02-28 01:07:42] (step=0009164) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 1.7929954999021718, LR: 0.0003 +[2026-02-28 01:07:56] (step=0009165) Train Loss: 0.4524, Train Steps/Sec: 0.07, Epoch: 1.7931911563294853, LR: 0.0003 +[2026-02-28 01:08:09] (step=0009166) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 1.7933868127567991, LR: 0.0003 +[2026-02-28 01:08:23] (step=0009167) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 1.7935824691841127, LR: 0.0003 +[2026-02-28 01:08:36] (step=0009168) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 1.7937781256114262, LR: 0.0003 +[2026-02-28 01:08:50] (step=0009169) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 1.79397378203874, LR: 0.0003 +[2026-02-28 01:09:04] (step=0009170) Train Loss: 0.4373, Train Steps/Sec: 0.07, Epoch: 1.7941694384660536, LR: 0.0003 +[2026-02-28 01:09:18] (step=0009171) Train Loss: 0.4495, Train Steps/Sec: 0.07, Epoch: 1.7943650948933672, LR: 0.0003 +[2026-02-28 01:09:31] (step=0009172) Train Loss: 0.4581, Train Steps/Sec: 0.07, Epoch: 1.794560751320681, LR: 0.0003 +[2026-02-28 01:09:45] (step=0009173) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 1.7947564077479945, LR: 0.0003 +[2026-02-28 01:09:59] (step=0009174) Train Loss: 0.4627, Train Steps/Sec: 0.07, Epoch: 1.794952064175308, LR: 0.0003 +[2026-02-28 01:10:12] (step=0009175) Train Loss: 0.4556, Train Steps/Sec: 0.07, Epoch: 1.7951477206026218, LR: 0.0003 +[2026-02-28 01:10:26] (step=0009176) Train Loss: 0.4496, Train Steps/Sec: 0.07, Epoch: 1.7953433770299354, LR: 0.0003 +[2026-02-28 01:10:40] (step=0009177) Train Loss: 0.4624, Train Steps/Sec: 0.07, Epoch: 1.795539033457249, LR: 0.0003 +[2026-02-28 01:10:54] (step=0009178) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 1.7957346898845628, LR: 0.0003 +[2026-02-28 01:11:07] (step=0009179) Train Loss: 0.4619, Train Steps/Sec: 0.07, Epoch: 1.7959303463118763, LR: 0.0003 +[2026-02-28 01:11:21] (step=0009180) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 1.7961260027391899, LR: 0.0003 +[2026-02-28 01:11:35] (step=0009181) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 1.7963216591665037, LR: 0.0003 +[2026-02-28 01:11:48] (step=0009182) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 1.7965173155938172, LR: 0.0003 +[2026-02-28 01:12:02] (step=0009183) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 1.796712972021131, LR: 0.0003 +[2026-02-28 01:12:15] (step=0009184) Train Loss: 0.4462, Train Steps/Sec: 0.07, Epoch: 1.7969086284484446, LR: 0.0003 +[2026-02-28 01:12:29] (step=0009185) Train Loss: 0.4475, Train Steps/Sec: 0.07, Epoch: 1.7971042848757581, LR: 0.0003 +[2026-02-28 01:12:43] (step=0009186) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 1.797299941303072, LR: 0.0003 +[2026-02-28 01:12:57] (step=0009187) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 1.7974955977303855, LR: 0.0003 +[2026-02-28 01:13:10] (step=0009188) Train Loss: 0.4582, Train Steps/Sec: 0.07, Epoch: 1.797691254157699, LR: 0.0003 +[2026-02-28 01:13:24] (step=0009189) Train Loss: 0.4619, Train Steps/Sec: 0.07, Epoch: 1.7978869105850128, LR: 0.0003 +[2026-02-28 01:13:38] (step=0009190) Train Loss: 0.4560, Train Steps/Sec: 0.07, Epoch: 1.7980825670123264, LR: 0.0003 +[2026-02-28 01:13:51] (step=0009191) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 1.79827822343964, LR: 0.0003 +[2026-02-28 01:14:05] (step=0009192) Train Loss: 0.4402, Train Steps/Sec: 0.07, Epoch: 1.7984738798669537, LR: 0.0003 +[2026-02-28 01:14:19] (step=0009193) Train Loss: 0.4524, Train Steps/Sec: 0.07, Epoch: 1.7986695362942673, LR: 0.0003 +[2026-02-28 01:14:32] (step=0009194) Train Loss: 0.4555, Train Steps/Sec: 0.07, Epoch: 1.7988651927215809, LR: 0.0003 +[2026-02-28 01:14:46] (step=0009195) Train Loss: 0.4455, Train Steps/Sec: 0.07, Epoch: 1.7990608491488946, LR: 0.0003 +[2026-02-28 01:15:00] (step=0009196) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 1.7992565055762082, LR: 0.0003 +[2026-02-28 01:15:14] (step=0009197) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 1.7994521620035218, LR: 0.0003 +[2026-02-28 01:15:27] (step=0009198) Train Loss: 0.4648, Train Steps/Sec: 0.07, Epoch: 1.7996478184308355, LR: 0.0003 +[2026-02-28 01:15:41] (step=0009199) Train Loss: 0.4641, Train Steps/Sec: 0.07, Epoch: 1.799843474858149, LR: 0.0003 +[2026-02-28 01:15:55] (step=0009200) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 1.8000391312854627, LR: 0.0003 +[2026-02-28 01:16:08] (step=0009201) Train Loss: 0.4462, Train Steps/Sec: 0.07, Epoch: 1.8002347877127765, LR: 0.0003 +[2026-02-28 01:16:22] (step=0009202) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 1.80043044414009, LR: 0.0003 +[2026-02-28 01:16:36] (step=0009203) Train Loss: 0.4472, Train Steps/Sec: 0.07, Epoch: 1.8006261005674036, LR: 0.0003 +[2026-02-28 01:16:49] (step=0009204) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 1.8008217569947174, LR: 0.0003 +[2026-02-28 01:17:03] (step=0009205) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 1.801017413422031, LR: 0.0003 +[2026-02-28 01:17:17] (step=0009206) Train Loss: 0.4458, Train Steps/Sec: 0.07, Epoch: 1.8012130698493445, LR: 0.0003 +[2026-02-28 01:17:31] (step=0009207) Train Loss: 0.4495, Train Steps/Sec: 0.07, Epoch: 1.8014087262766583, LR: 0.0003 +[2026-02-28 01:17:44] (step=0009208) Train Loss: 0.4368, Train Steps/Sec: 0.07, Epoch: 1.8016043827039718, LR: 0.0003 +[2026-02-28 01:17:58] (step=0009209) Train Loss: 0.4554, Train Steps/Sec: 0.07, Epoch: 1.8018000391312854, LR: 0.0003 +[2026-02-28 01:18:12] (step=0009210) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 1.8019956955585992, LR: 0.0003 +[2026-02-28 01:18:25] (step=0009211) Train Loss: 0.4610, Train Steps/Sec: 0.07, Epoch: 1.8021913519859127, LR: 0.0003 +[2026-02-28 01:18:39] (step=0009212) Train Loss: 0.4416, Train Steps/Sec: 0.07, Epoch: 1.8023870084132263, LR: 0.0003 +[2026-02-28 01:18:53] (step=0009213) Train Loss: 0.4442, Train Steps/Sec: 0.07, Epoch: 1.80258266484054, LR: 0.0003 +[2026-02-28 01:19:06] (step=0009214) Train Loss: 0.4476, Train Steps/Sec: 0.07, Epoch: 1.8027783212678536, LR: 0.0003 +[2026-02-28 01:19:20] (step=0009215) Train Loss: 0.4446, Train Steps/Sec: 0.07, Epoch: 1.8029739776951672, LR: 0.0003 +[2026-02-28 01:19:34] (step=0009216) Train Loss: 0.4379, Train Steps/Sec: 0.07, Epoch: 1.803169634122481, LR: 0.0003 +[2026-02-28 01:19:47] (step=0009217) Train Loss: 0.4671, Train Steps/Sec: 0.07, Epoch: 1.8033652905497946, LR: 0.0003 +[2026-02-28 01:20:01] (step=0009218) Train Loss: 0.4607, Train Steps/Sec: 0.07, Epoch: 1.8035609469771081, LR: 0.0003 +[2026-02-28 01:20:15] (step=0009219) Train Loss: 0.4583, Train Steps/Sec: 0.07, Epoch: 1.803756603404422, LR: 0.0003 +[2026-02-28 01:20:28] (step=0009220) Train Loss: 0.4383, Train Steps/Sec: 0.07, Epoch: 1.8039522598317355, LR: 0.0003 +[2026-02-28 01:20:42] (step=0009221) Train Loss: 0.4416, Train Steps/Sec: 0.07, Epoch: 1.804147916259049, LR: 0.0003 +[2026-02-28 01:20:56] (step=0009222) Train Loss: 0.4366, Train Steps/Sec: 0.07, Epoch: 1.8043435726863628, LR: 0.0003 +[2026-02-28 01:21:10] (step=0009223) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 1.8045392291136764, LR: 0.0003 +[2026-02-28 01:21:23] (step=0009224) Train Loss: 0.4569, Train Steps/Sec: 0.07, Epoch: 1.80473488554099, LR: 0.0003 +[2026-02-28 01:21:37] (step=0009225) Train Loss: 0.4448, Train Steps/Sec: 0.07, Epoch: 1.8049305419683037, LR: 0.0003 +[2026-02-28 01:21:51] (step=0009226) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 1.8051261983956173, LR: 0.0003 +[2026-02-28 01:22:04] (step=0009227) Train Loss: 0.4586, Train Steps/Sec: 0.07, Epoch: 1.8053218548229308, LR: 0.0003 +[2026-02-28 01:22:18] (step=0009228) Train Loss: 0.4599, Train Steps/Sec: 0.07, Epoch: 1.8055175112502446, LR: 0.0003 +[2026-02-28 01:22:32] (step=0009229) Train Loss: 0.4574, Train Steps/Sec: 0.07, Epoch: 1.8057131676775582, LR: 0.0003 +[2026-02-28 01:22:45] (step=0009230) Train Loss: 0.4572, Train Steps/Sec: 0.07, Epoch: 1.8059088241048717, LR: 0.0003 +[2026-02-28 01:22:59] (step=0009231) Train Loss: 0.4683, Train Steps/Sec: 0.07, Epoch: 1.8061044805321855, LR: 0.0003 +[2026-02-28 01:23:13] (step=0009232) Train Loss: 0.4483, Train Steps/Sec: 0.07, Epoch: 1.806300136959499, LR: 0.0003 +[2026-02-28 01:23:26] (step=0009233) Train Loss: 0.4698, Train Steps/Sec: 0.07, Epoch: 1.8064957933868127, LR: 0.0003 +[2026-02-28 01:23:40] (step=0009234) Train Loss: 0.4612, Train Steps/Sec: 0.07, Epoch: 1.8066914498141264, LR: 0.0003 +[2026-02-28 01:23:54] (step=0009235) Train Loss: 0.4599, Train Steps/Sec: 0.07, Epoch: 1.80688710624144, LR: 0.0003 +[2026-02-28 01:24:07] (step=0009236) Train Loss: 0.4463, Train Steps/Sec: 0.07, Epoch: 1.8070827626687536, LR: 0.0003 +[2026-02-28 01:24:21] (step=0009237) Train Loss: 0.4568, Train Steps/Sec: 0.07, Epoch: 1.8072784190960673, LR: 0.0003 +[2026-02-28 01:24:35] (step=0009238) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 1.807474075523381, LR: 0.0003 +[2026-02-28 01:24:49] (step=0009239) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 1.8076697319506945, LR: 0.0003 +[2026-02-28 01:25:02] (step=0009240) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 1.8078653883780083, LR: 0.0003 +[2026-02-28 01:25:16] (step=0009241) Train Loss: 0.4293, Train Steps/Sec: 0.07, Epoch: 1.8080610448053218, LR: 0.0003 +[2026-02-28 01:25:29] (step=0009242) Train Loss: 0.4581, Train Steps/Sec: 0.07, Epoch: 1.8082567012326356, LR: 0.0003 +[2026-02-28 01:25:43] (step=0009243) Train Loss: 0.4563, Train Steps/Sec: 0.07, Epoch: 1.8084523576599492, LR: 0.0003 +[2026-02-28 01:25:57] (step=0009244) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 1.8086480140872627, LR: 0.0003 +[2026-02-28 01:26:11] (step=0009245) Train Loss: 0.4608, Train Steps/Sec: 0.07, Epoch: 1.8088436705145765, LR: 0.0003 +[2026-02-28 01:26:24] (step=0009246) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 1.80903932694189, LR: 0.0003 +[2026-02-28 01:26:38] (step=0009247) Train Loss: 0.4593, Train Steps/Sec: 0.07, Epoch: 1.8092349833692036, LR: 0.0003 +[2026-02-28 01:26:52] (step=0009248) Train Loss: 0.4563, Train Steps/Sec: 0.07, Epoch: 1.8094306397965174, LR: 0.0003 +[2026-02-28 01:27:05] (step=0009249) Train Loss: 0.4554, Train Steps/Sec: 0.07, Epoch: 1.809626296223831, LR: 0.0003 +[2026-02-28 01:27:19] (step=0009250) Train Loss: 0.4555, Train Steps/Sec: 0.07, Epoch: 1.8098219526511445, LR: 0.0003 +[2026-02-28 01:27:33] (step=0009251) Train Loss: 0.4461, Train Steps/Sec: 0.07, Epoch: 1.8100176090784583, LR: 0.0003 +[2026-02-28 01:27:47] (step=0009252) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 1.8102132655057719, LR: 0.0003 +[2026-02-28 01:28:00] (step=0009253) Train Loss: 0.4528, Train Steps/Sec: 0.07, Epoch: 1.8104089219330854, LR: 0.0003 +[2026-02-28 01:28:14] (step=0009254) Train Loss: 0.4532, Train Steps/Sec: 0.07, Epoch: 1.8106045783603992, LR: 0.0003 +[2026-02-28 01:28:28] (step=0009255) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 1.8108002347877128, LR: 0.0003 +[2026-02-28 01:28:41] (step=0009256) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 1.8109958912150264, LR: 0.0003 +[2026-02-28 01:28:55] (step=0009257) Train Loss: 0.4595, Train Steps/Sec: 0.07, Epoch: 1.8111915476423401, LR: 0.0003 +[2026-02-28 01:29:09] (step=0009258) Train Loss: 0.4414, Train Steps/Sec: 0.07, Epoch: 1.8113872040696537, LR: 0.0003 +[2026-02-28 01:29:23] (step=0009259) Train Loss: 0.4471, Train Steps/Sec: 0.07, Epoch: 1.8115828604969673, LR: 0.0003 +[2026-02-28 01:29:36] (step=0009260) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 1.811778516924281, LR: 0.0003 +[2026-02-28 01:29:50] (step=0009261) Train Loss: 0.4601, Train Steps/Sec: 0.07, Epoch: 1.8119741733515946, LR: 0.0003 +[2026-02-28 01:30:03] (step=0009262) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 1.8121698297789082, LR: 0.0003 +[2026-02-28 01:30:17] (step=0009263) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 1.812365486206222, LR: 0.0003 +[2026-02-28 01:30:31] (step=0009264) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 1.8125611426335355, LR: 0.0003 +[2026-02-28 01:30:45] (step=0009265) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 1.812756799060849, LR: 0.0003 +[2026-02-28 01:30:58] (step=0009266) Train Loss: 0.4626, Train Steps/Sec: 0.07, Epoch: 1.8129524554881629, LR: 0.0003 +[2026-02-28 01:31:12] (step=0009267) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 1.8131481119154764, LR: 0.0003 +[2026-02-28 01:31:26] (step=0009268) Train Loss: 0.4461, Train Steps/Sec: 0.07, Epoch: 1.81334376834279, LR: 0.0003 +[2026-02-28 01:31:39] (step=0009269) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 1.8135394247701038, LR: 0.0003 +[2026-02-28 01:31:53] (step=0009270) Train Loss: 0.4447, Train Steps/Sec: 0.07, Epoch: 1.8137350811974173, LR: 0.0003 +[2026-02-28 01:32:07] (step=0009271) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 1.813930737624731, LR: 0.0003 +[2026-02-28 01:32:20] (step=0009272) Train Loss: 0.4650, Train Steps/Sec: 0.07, Epoch: 1.8141263940520447, LR: 0.0003 +[2026-02-28 01:32:34] (step=0009273) Train Loss: 0.4596, Train Steps/Sec: 0.07, Epoch: 1.8143220504793582, LR: 0.0003 +[2026-02-28 01:32:48] (step=0009274) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 1.8145177069066718, LR: 0.0003 +[2026-02-28 01:33:01] (step=0009275) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 1.8147133633339856, LR: 0.0003 +[2026-02-28 01:33:15] (step=0009276) Train Loss: 0.4528, Train Steps/Sec: 0.07, Epoch: 1.8149090197612991, LR: 0.0003 +[2026-02-28 01:33:29] (step=0009277) Train Loss: 0.4634, Train Steps/Sec: 0.07, Epoch: 1.8151046761886127, LR: 0.0003 +[2026-02-28 01:33:42] (step=0009278) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 1.8153003326159265, LR: 0.0003 +[2026-02-28 01:33:56] (step=0009279) Train Loss: 0.4528, Train Steps/Sec: 0.07, Epoch: 1.81549598904324, LR: 0.0003 +[2026-02-28 01:34:10] (step=0009280) Train Loss: 0.4686, Train Steps/Sec: 0.07, Epoch: 1.8156916454705536, LR: 0.0003 +[2026-02-28 01:34:24] (step=0009281) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 1.8158873018978674, LR: 0.0003 +[2026-02-28 01:34:37] (step=0009282) Train Loss: 0.4528, Train Steps/Sec: 0.07, Epoch: 1.816082958325181, LR: 0.0003 +[2026-02-28 01:34:51] (step=0009283) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 1.8162786147524945, LR: 0.0003 +[2026-02-28 01:35:05] (step=0009284) Train Loss: 0.4455, Train Steps/Sec: 0.07, Epoch: 1.8164742711798083, LR: 0.0003 +[2026-02-28 01:35:18] (step=0009285) Train Loss: 0.4664, Train Steps/Sec: 0.07, Epoch: 1.8166699276071219, LR: 0.0003 +[2026-02-28 01:35:32] (step=0009286) Train Loss: 0.4602, Train Steps/Sec: 0.07, Epoch: 1.8168655840344354, LR: 0.0003 +[2026-02-28 01:35:46] (step=0009287) Train Loss: 0.4621, Train Steps/Sec: 0.07, Epoch: 1.8170612404617492, LR: 0.0003 +[2026-02-28 01:35:59] (step=0009288) Train Loss: 0.4485, Train Steps/Sec: 0.07, Epoch: 1.8172568968890628, LR: 0.0003 +[2026-02-28 01:36:13] (step=0009289) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 1.8174525533163763, LR: 0.0003 +[2026-02-28 01:36:27] (step=0009290) Train Loss: 0.4602, Train Steps/Sec: 0.07, Epoch: 1.8176482097436901, LR: 0.0003 +[2026-02-28 01:36:40] (step=0009291) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 1.8178438661710037, LR: 0.0003 +[2026-02-28 01:36:54] (step=0009292) Train Loss: 0.4592, Train Steps/Sec: 0.07, Epoch: 1.8180395225983172, LR: 0.0003 +[2026-02-28 01:37:08] (step=0009293) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 1.818235179025631, LR: 0.0003 +[2026-02-28 01:37:21] (step=0009294) Train Loss: 0.4605, Train Steps/Sec: 0.07, Epoch: 1.8184308354529446, LR: 0.0003 +[2026-02-28 01:37:35] (step=0009295) Train Loss: 0.4574, Train Steps/Sec: 0.07, Epoch: 1.8186264918802582, LR: 0.0003 +[2026-02-28 01:37:49] (step=0009296) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 1.818822148307572, LR: 0.0003 +[2026-02-28 01:38:03] (step=0009297) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 1.8190178047348855, LR: 0.0003 +[2026-02-28 01:38:16] (step=0009298) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 1.8192134611621993, LR: 0.0003 +[2026-02-28 01:38:30] (step=0009299) Train Loss: 0.4528, Train Steps/Sec: 0.07, Epoch: 1.8194091175895128, LR: 0.0003 +[2026-02-28 01:38:44] (step=0009300) Train Loss: 0.4681, Train Steps/Sec: 0.07, Epoch: 1.8196047740168264, LR: 0.0003 +[2026-02-28 01:38:57] (step=0009301) Train Loss: 0.4453, Train Steps/Sec: 0.07, Epoch: 1.8198004304441402, LR: 0.0003 +[2026-02-28 01:39:11] (step=0009302) Train Loss: 0.4452, Train Steps/Sec: 0.07, Epoch: 1.8199960868714538, LR: 0.0003 +[2026-02-28 01:39:25] (step=0009303) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 1.8201917432987673, LR: 0.0003 +[2026-02-28 01:39:38] (step=0009304) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 1.820387399726081, LR: 0.0003 +[2026-02-28 01:39:52] (step=0009305) Train Loss: 0.4467, Train Steps/Sec: 0.07, Epoch: 1.8205830561533947, LR: 0.0003 +[2026-02-28 01:40:06] (step=0009306) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 1.8207787125807082, LR: 0.0003 +[2026-02-28 01:40:20] (step=0009307) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 1.820974369008022, LR: 0.0003 +[2026-02-28 01:40:33] (step=0009308) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 1.8211700254353356, LR: 0.0003 +[2026-02-28 01:40:47] (step=0009309) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 1.8213656818626491, LR: 0.0003 +[2026-02-28 01:41:01] (step=0009310) Train Loss: 0.4697, Train Steps/Sec: 0.07, Epoch: 1.821561338289963, LR: 0.0003 +[2026-02-28 01:41:15] (step=0009311) Train Loss: 0.4587, Train Steps/Sec: 0.07, Epoch: 1.8217569947172765, LR: 0.0003 +[2026-02-28 01:41:28] (step=0009312) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 1.82195265114459, LR: 0.0003 +[2026-02-28 01:41:42] (step=0009313) Train Loss: 0.4646, Train Steps/Sec: 0.07, Epoch: 1.8221483075719038, LR: 0.0003 +[2026-02-28 01:41:56] (step=0009314) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 1.8223439639992174, LR: 0.0003 +[2026-02-28 01:42:09] (step=0009315) Train Loss: 0.4440, Train Steps/Sec: 0.07, Epoch: 1.822539620426531, LR: 0.0003 +[2026-02-28 01:42:23] (step=0009316) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 1.8227352768538447, LR: 0.0003 +[2026-02-28 01:42:37] (step=0009317) Train Loss: 0.4616, Train Steps/Sec: 0.07, Epoch: 1.8229309332811583, LR: 0.0003 +[2026-02-28 01:42:51] (step=0009318) Train Loss: 0.4456, Train Steps/Sec: 0.07, Epoch: 1.8231265897084719, LR: 0.0003 +[2026-02-28 01:43:04] (step=0009319) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 1.8233222461357856, LR: 0.0003 +[2026-02-28 01:43:18] (step=0009320) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 1.8235179025630992, LR: 0.0003 +[2026-02-28 01:43:32] (step=0009321) Train Loss: 0.4583, Train Steps/Sec: 0.07, Epoch: 1.8237135589904128, LR: 0.0003 +[2026-02-28 01:43:45] (step=0009322) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 1.8239092154177265, LR: 0.0003 +[2026-02-28 01:43:59] (step=0009323) Train Loss: 0.4609, Train Steps/Sec: 0.07, Epoch: 1.82410487184504, LR: 0.0003 +[2026-02-28 01:44:13] (step=0009324) Train Loss: 0.4594, Train Steps/Sec: 0.07, Epoch: 1.8243005282723537, LR: 0.0003 +[2026-02-28 01:44:26] (step=0009325) Train Loss: 0.4631, Train Steps/Sec: 0.07, Epoch: 1.8244961846996675, LR: 0.0003 +[2026-02-28 01:44:40] (step=0009326) Train Loss: 0.4608, Train Steps/Sec: 0.07, Epoch: 1.824691841126981, LR: 0.0003 +[2026-02-28 01:44:54] (step=0009327) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 1.8248874975542946, LR: 0.0003 +[2026-02-28 01:45:07] (step=0009328) Train Loss: 0.4660, Train Steps/Sec: 0.07, Epoch: 1.8250831539816084, LR: 0.0003 +[2026-02-28 01:45:21] (step=0009329) Train Loss: 0.4582, Train Steps/Sec: 0.07, Epoch: 1.825278810408922, LR: 0.0003 +[2026-02-28 01:45:35] (step=0009330) Train Loss: 0.4588, Train Steps/Sec: 0.07, Epoch: 1.8254744668362355, LR: 0.0003 +[2026-02-28 01:45:49] (step=0009331) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 1.8256701232635493, LR: 0.0003 +[2026-02-28 01:46:02] (step=0009332) Train Loss: 0.4648, Train Steps/Sec: 0.07, Epoch: 1.8258657796908628, LR: 0.0003 +[2026-02-28 01:46:16] (step=0009333) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 1.8260614361181764, LR: 0.0003 +[2026-02-28 01:46:30] (step=0009334) Train Loss: 0.4581, Train Steps/Sec: 0.07, Epoch: 1.8262570925454902, LR: 0.0003 +[2026-02-28 01:46:44] (step=0009335) Train Loss: 0.4451, Train Steps/Sec: 0.07, Epoch: 1.8264527489728037, LR: 0.0003 +[2026-02-28 01:46:57] (step=0009336) Train Loss: 0.4602, Train Steps/Sec: 0.07, Epoch: 1.8266484054001173, LR: 0.0003 +[2026-02-28 01:47:11] (step=0009337) Train Loss: 0.4440, Train Steps/Sec: 0.07, Epoch: 1.826844061827431, LR: 0.0003 +[2026-02-28 01:47:25] (step=0009338) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 1.8270397182547446, LR: 0.0003 +[2026-02-28 01:47:39] (step=0009339) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 1.8272353746820582, LR: 0.0003 +[2026-02-28 01:47:52] (step=0009340) Train Loss: 0.4588, Train Steps/Sec: 0.07, Epoch: 1.827431031109372, LR: 0.0003 +[2026-02-28 01:48:06] (step=0009341) Train Loss: 0.4568, Train Steps/Sec: 0.07, Epoch: 1.8276266875366856, LR: 0.0003 +[2026-02-28 01:48:20] (step=0009342) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 1.8278223439639991, LR: 0.0003 +[2026-02-28 01:48:33] (step=0009343) Train Loss: 0.4581, Train Steps/Sec: 0.07, Epoch: 1.828018000391313, LR: 0.0003 +[2026-02-28 01:48:47] (step=0009344) Train Loss: 0.4475, Train Steps/Sec: 0.07, Epoch: 1.8282136568186265, LR: 0.0003 +[2026-02-28 01:49:01] (step=0009345) Train Loss: 0.4420, Train Steps/Sec: 0.07, Epoch: 1.82840931324594, LR: 0.0003 +[2026-02-28 01:49:14] (step=0009346) Train Loss: 0.4655, Train Steps/Sec: 0.07, Epoch: 1.8286049696732538, LR: 0.0003 +[2026-02-28 01:49:28] (step=0009347) Train Loss: 0.4584, Train Steps/Sec: 0.07, Epoch: 1.8288006261005674, LR: 0.0003 +[2026-02-28 01:49:42] (step=0009348) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 1.828996282527881, LR: 0.0003 +[2026-02-28 01:49:55] (step=0009349) Train Loss: 0.4429, Train Steps/Sec: 0.07, Epoch: 1.8291919389551947, LR: 0.0003 +[2026-02-28 01:50:09] (step=0009350) Train Loss: 0.4582, Train Steps/Sec: 0.07, Epoch: 1.8293875953825083, LR: 0.0003 +[2026-02-28 01:50:23] (step=0009351) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 1.8295832518098218, LR: 0.0003 +[2026-02-28 01:50:37] (step=0009352) Train Loss: 0.4457, Train Steps/Sec: 0.07, Epoch: 1.8297789082371356, LR: 0.0003 +[2026-02-28 01:50:50] (step=0009353) Train Loss: 0.4473, Train Steps/Sec: 0.07, Epoch: 1.8299745646644492, LR: 0.0003 +[2026-02-28 01:51:04] (step=0009354) Train Loss: 0.4600, Train Steps/Sec: 0.07, Epoch: 1.830170221091763, LR: 0.0003 +[2026-02-28 01:51:18] (step=0009355) Train Loss: 0.4492, Train Steps/Sec: 0.07, Epoch: 1.8303658775190765, LR: 0.0003 +[2026-02-28 01:51:31] (step=0009356) Train Loss: 0.4447, Train Steps/Sec: 0.07, Epoch: 1.83056153394639, LR: 0.0003 +[2026-02-28 01:51:45] (step=0009357) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 1.8307571903737039, LR: 0.0003 +[2026-02-28 01:51:59] (step=0009358) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 1.8309528468010174, LR: 0.0003 +[2026-02-28 01:52:13] (step=0009359) Train Loss: 0.4400, Train Steps/Sec: 0.07, Epoch: 1.831148503228331, LR: 0.0003 +[2026-02-28 01:52:26] (step=0009360) Train Loss: 0.4462, Train Steps/Sec: 0.07, Epoch: 1.8313441596556448, LR: 0.0003 +[2026-02-28 01:52:40] (step=0009361) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 1.8315398160829583, LR: 0.0003 +[2026-02-28 01:52:54] (step=0009362) Train Loss: 0.4483, Train Steps/Sec: 0.07, Epoch: 1.831735472510272, LR: 0.0003 +[2026-02-28 01:53:07] (step=0009363) Train Loss: 0.4544, Train Steps/Sec: 0.07, Epoch: 1.8319311289375857, LR: 0.0003 +[2026-02-28 01:53:21] (step=0009364) Train Loss: 0.4643, Train Steps/Sec: 0.07, Epoch: 1.8321267853648993, LR: 0.0003 +[2026-02-28 01:53:35] (step=0009365) Train Loss: 0.4638, Train Steps/Sec: 0.07, Epoch: 1.8323224417922128, LR: 0.0003 +[2026-02-28 01:53:49] (step=0009366) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 1.8325180982195266, LR: 0.0003 +[2026-02-28 01:54:02] (step=0009367) Train Loss: 0.4528, Train Steps/Sec: 0.07, Epoch: 1.8327137546468402, LR: 0.0003 +[2026-02-28 01:54:16] (step=0009368) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 1.8329094110741537, LR: 0.0003 +[2026-02-28 01:54:30] (step=0009369) Train Loss: 0.4576, Train Steps/Sec: 0.07, Epoch: 1.8331050675014675, LR: 0.0003 +[2026-02-28 01:54:43] (step=0009370) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 1.833300723928781, LR: 0.0003 +[2026-02-28 01:54:57] (step=0009371) Train Loss: 0.4470, Train Steps/Sec: 0.07, Epoch: 1.8334963803560946, LR: 0.0003 +[2026-02-28 01:55:11] (step=0009372) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 1.8336920367834084, LR: 0.0003 +[2026-02-28 01:55:25] (step=0009373) Train Loss: 0.4534, Train Steps/Sec: 0.07, Epoch: 1.833887693210722, LR: 0.0003 +[2026-02-28 01:55:38] (step=0009374) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 1.8340833496380355, LR: 0.0003 +[2026-02-28 01:55:52] (step=0009375) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 1.8342790060653493, LR: 0.0003 +[2026-02-28 01:56:06] (step=0009376) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 1.8344746624926629, LR: 0.0003 +[2026-02-28 01:56:19] (step=0009377) Train Loss: 0.4485, Train Steps/Sec: 0.07, Epoch: 1.8346703189199765, LR: 0.0003 +[2026-02-28 01:56:33] (step=0009378) Train Loss: 0.4461, Train Steps/Sec: 0.07, Epoch: 1.8348659753472902, LR: 0.0003 +[2026-02-28 01:56:47] (step=0009379) Train Loss: 0.4404, Train Steps/Sec: 0.07, Epoch: 1.8350616317746038, LR: 0.0003 +[2026-02-28 01:57:00] (step=0009380) Train Loss: 0.4374, Train Steps/Sec: 0.07, Epoch: 1.8352572882019174, LR: 0.0003 +[2026-02-28 01:57:14] (step=0009381) Train Loss: 0.4388, Train Steps/Sec: 0.07, Epoch: 1.8354529446292311, LR: 0.0003 +[2026-02-28 01:57:28] (step=0009382) Train Loss: 0.4457, Train Steps/Sec: 0.07, Epoch: 1.8356486010565447, LR: 0.0003 +[2026-02-28 01:57:42] (step=0009383) Train Loss: 0.4436, Train Steps/Sec: 0.07, Epoch: 1.8358442574838583, LR: 0.0003 +[2026-02-28 01:57:55] (step=0009384) Train Loss: 0.4432, Train Steps/Sec: 0.07, Epoch: 1.836039913911172, LR: 0.0003 +[2026-02-28 01:58:09] (step=0009385) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 1.8362355703384856, LR: 0.0003 +[2026-02-28 01:58:23] (step=0009386) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 1.8364312267657992, LR: 0.0003 +[2026-02-28 01:58:36] (step=0009387) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 1.836626883193113, LR: 0.0003 +[2026-02-28 01:58:50] (step=0009388) Train Loss: 0.4495, Train Steps/Sec: 0.07, Epoch: 1.8368225396204265, LR: 0.0003 +[2026-02-28 01:59:04] (step=0009389) Train Loss: 0.4457, Train Steps/Sec: 0.07, Epoch: 1.83701819604774, LR: 0.0003 +[2026-02-28 01:59:17] (step=0009390) Train Loss: 0.4588, Train Steps/Sec: 0.07, Epoch: 1.8372138524750539, LR: 0.0003 +[2026-02-28 01:59:31] (step=0009391) Train Loss: 0.4458, Train Steps/Sec: 0.07, Epoch: 1.8374095089023674, LR: 0.0003 +[2026-02-28 01:59:45] (step=0009392) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 1.837605165329681, LR: 0.0003 +[2026-02-28 01:59:58] (step=0009393) Train Loss: 0.4495, Train Steps/Sec: 0.07, Epoch: 1.8378008217569948, LR: 0.0003 +[2026-02-28 02:00:12] (step=0009394) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 1.8379964781843083, LR: 0.0003 +[2026-02-28 02:00:26] (step=0009395) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 1.838192134611622, LR: 0.0003 +[2026-02-28 02:00:39] (step=0009396) Train Loss: 0.4613, Train Steps/Sec: 0.07, Epoch: 1.8383877910389357, LR: 0.0003 +[2026-02-28 02:00:53] (step=0009397) Train Loss: 0.4622, Train Steps/Sec: 0.07, Epoch: 1.8385834474662492, LR: 0.0003 +[2026-02-28 02:01:07] (step=0009398) Train Loss: 0.4492, Train Steps/Sec: 0.07, Epoch: 1.8387791038935628, LR: 0.0003 +[2026-02-28 02:01:21] (step=0009399) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 1.8389747603208766, LR: 0.0003 +[2026-02-28 02:01:34] (step=0009400) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 1.8391704167481902, LR: 0.0003 +[2026-02-28 02:01:48] (step=0009401) Train Loss: 0.4579, Train Steps/Sec: 0.07, Epoch: 1.8393660731755037, LR: 0.0003 +[2026-02-28 02:02:02] (step=0009402) Train Loss: 0.4449, Train Steps/Sec: 0.07, Epoch: 1.8395617296028175, LR: 0.0003 +[2026-02-28 02:02:16] (step=0009403) Train Loss: 0.4767, Train Steps/Sec: 0.07, Epoch: 1.839757386030131, LR: 0.0003 +[2026-02-28 02:02:29] (step=0009404) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 1.8399530424574446, LR: 0.0003 +[2026-02-28 02:02:43] (step=0009405) Train Loss: 0.4532, Train Steps/Sec: 0.07, Epoch: 1.8401486988847584, LR: 0.0003 +[2026-02-28 02:02:56] (step=0009406) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 1.840344355312072, LR: 0.0003 +[2026-02-28 02:03:10] (step=0009407) Train Loss: 0.4581, Train Steps/Sec: 0.07, Epoch: 1.8405400117393855, LR: 0.0003 +[2026-02-28 02:03:24] (step=0009408) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 1.8407356681666993, LR: 0.0003 +[2026-02-28 02:03:37] (step=0009409) Train Loss: 0.4485, Train Steps/Sec: 0.07, Epoch: 1.8409313245940129, LR: 0.0003 +[2026-02-28 02:03:51] (step=0009410) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 1.8411269810213267, LR: 0.0003 +[2026-02-28 02:04:05] (step=0009411) Train Loss: 0.4580, Train Steps/Sec: 0.07, Epoch: 1.8413226374486402, LR: 0.0003 +[2026-02-28 02:04:19] (step=0009412) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 1.8415182938759538, LR: 0.0003 +[2026-02-28 02:04:32] (step=0009413) Train Loss: 0.4454, Train Steps/Sec: 0.07, Epoch: 1.8417139503032676, LR: 0.0003 +[2026-02-28 02:04:46] (step=0009414) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 1.8419096067305811, LR: 0.0003 +[2026-02-28 02:05:00] (step=0009415) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 1.8421052631578947, LR: 0.0003 +[2026-02-28 02:05:13] (step=0009416) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 1.8423009195852085, LR: 0.0003 +[2026-02-28 02:05:27] (step=0009417) Train Loss: 0.4595, Train Steps/Sec: 0.07, Epoch: 1.842496576012522, LR: 0.0003 +[2026-02-28 02:05:41] (step=0009418) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 1.8426922324398356, LR: 0.0003 +[2026-02-28 02:05:54] (step=0009419) Train Loss: 0.4592, Train Steps/Sec: 0.07, Epoch: 1.8428878888671494, LR: 0.0003 +[2026-02-28 02:06:08] (step=0009420) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 1.843083545294463, LR: 0.0003 +[2026-02-28 02:06:22] (step=0009421) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 1.8432792017217765, LR: 0.0003 +[2026-02-28 02:06:35] (step=0009422) Train Loss: 0.4382, Train Steps/Sec: 0.07, Epoch: 1.8434748581490903, LR: 0.0003 +[2026-02-28 02:06:49] (step=0009423) Train Loss: 0.4382, Train Steps/Sec: 0.07, Epoch: 1.8436705145764039, LR: 0.0003 +[2026-02-28 02:07:03] (step=0009424) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 1.8438661710037174, LR: 0.0003 +[2026-02-28 02:07:17] (step=0009425) Train Loss: 0.4642, Train Steps/Sec: 0.07, Epoch: 1.8440618274310312, LR: 0.0003 +[2026-02-28 02:07:30] (step=0009426) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 1.8442574838583448, LR: 0.0003 +[2026-02-28 02:07:44] (step=0009427) Train Loss: 0.4466, Train Steps/Sec: 0.07, Epoch: 1.8444531402856583, LR: 0.0003 +[2026-02-28 02:07:58] (step=0009428) Train Loss: 0.4524, Train Steps/Sec: 0.07, Epoch: 1.844648796712972, LR: 0.0003 +[2026-02-28 02:08:11] (step=0009429) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 1.8448444531402857, LR: 0.0003 +[2026-02-28 02:08:25] (step=0009430) Train Loss: 0.4408, Train Steps/Sec: 0.07, Epoch: 1.8450401095675992, LR: 0.0003 +[2026-02-28 02:08:39] (step=0009431) Train Loss: 0.4475, Train Steps/Sec: 0.07, Epoch: 1.845235765994913, LR: 0.0003 +[2026-02-28 02:08:52] (step=0009432) Train Loss: 0.4661, Train Steps/Sec: 0.07, Epoch: 1.8454314224222266, LR: 0.0003 +[2026-02-28 02:09:06] (step=0009433) Train Loss: 0.4447, Train Steps/Sec: 0.07, Epoch: 1.8456270788495401, LR: 0.0003 +[2026-02-28 02:09:20] (step=0009434) Train Loss: 0.4555, Train Steps/Sec: 0.07, Epoch: 1.845822735276854, LR: 0.0003 +[2026-02-28 02:09:33] (step=0009435) Train Loss: 0.4451, Train Steps/Sec: 0.07, Epoch: 1.8460183917041675, LR: 0.0003 +[2026-02-28 02:09:47] (step=0009436) Train Loss: 0.4408, Train Steps/Sec: 0.07, Epoch: 1.846214048131481, LR: 0.0003 +[2026-02-28 02:10:01] (step=0009437) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 1.8464097045587948, LR: 0.0003 +[2026-02-28 02:10:14] (step=0009438) Train Loss: 0.4664, Train Steps/Sec: 0.07, Epoch: 1.8466053609861084, LR: 0.0003 +[2026-02-28 02:10:28] (step=0009439) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 1.846801017413422, LR: 0.0003 +[2026-02-28 02:10:42] (step=0009440) Train Loss: 0.4455, Train Steps/Sec: 0.07, Epoch: 1.8469966738407357, LR: 0.0003 +[2026-02-28 02:10:56] (step=0009441) Train Loss: 0.4612, Train Steps/Sec: 0.07, Epoch: 1.8471923302680493, LR: 0.0003 +[2026-02-28 02:11:09] (step=0009442) Train Loss: 0.4638, Train Steps/Sec: 0.07, Epoch: 1.8473879866953629, LR: 0.0003 +[2026-02-28 02:11:23] (step=0009443) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 1.8475836431226766, LR: 0.0003 +[2026-02-28 02:11:37] (step=0009444) Train Loss: 0.4467, Train Steps/Sec: 0.07, Epoch: 1.8477792995499902, LR: 0.0003 +[2026-02-28 02:11:51] (step=0009445) Train Loss: 0.4465, Train Steps/Sec: 0.07, Epoch: 1.8479749559773038, LR: 0.0003 +[2026-02-28 02:12:04] (step=0009446) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 1.8481706124046176, LR: 0.0003 +[2026-02-28 02:12:18] (step=0009447) Train Loss: 0.4584, Train Steps/Sec: 0.07, Epoch: 1.8483662688319311, LR: 0.0003 +[2026-02-28 02:12:31] (step=0009448) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 1.8485619252592447, LR: 0.0003 +[2026-02-28 02:12:45] (step=0009449) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 1.8487575816865585, LR: 0.0003 +[2026-02-28 02:12:59] (step=0009450) Train Loss: 0.4436, Train Steps/Sec: 0.07, Epoch: 1.848953238113872, LR: 0.0003 +[2026-02-28 02:13:13] (step=0009451) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 1.8491488945411856, LR: 0.0003 +[2026-02-28 02:13:26] (step=0009452) Train Loss: 0.4492, Train Steps/Sec: 0.07, Epoch: 1.8493445509684994, LR: 0.0003 +[2026-02-28 02:13:40] (step=0009453) Train Loss: 0.4681, Train Steps/Sec: 0.07, Epoch: 1.849540207395813, LR: 0.0003 +[2026-02-28 02:13:54] (step=0009454) Train Loss: 0.4638, Train Steps/Sec: 0.07, Epoch: 1.8497358638231265, LR: 0.0003 +[2026-02-28 02:14:08] (step=0009455) Train Loss: 0.4560, Train Steps/Sec: 0.07, Epoch: 1.8499315202504403, LR: 0.0003 +[2026-02-28 02:14:21] (step=0009456) Train Loss: 0.4532, Train Steps/Sec: 0.07, Epoch: 1.8501271766777538, LR: 0.0003 +[2026-02-28 02:14:35] (step=0009457) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 1.8503228331050674, LR: 0.0003 +[2026-02-28 02:14:49] (step=0009458) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 1.8505184895323812, LR: 0.0003 +[2026-02-28 02:15:02] (step=0009459) Train Loss: 0.4451, Train Steps/Sec: 0.07, Epoch: 1.8507141459596947, LR: 0.0003 +[2026-02-28 02:15:16] (step=0009460) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 1.8509098023870083, LR: 0.0003 +[2026-02-28 02:15:30] (step=0009461) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 1.851105458814322, LR: 0.0003 +[2026-02-28 02:15:43] (step=0009462) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 1.8513011152416357, LR: 0.0003 +[2026-02-28 02:15:57] (step=0009463) Train Loss: 0.4461, Train Steps/Sec: 0.07, Epoch: 1.8514967716689492, LR: 0.0003 +[2026-02-28 02:16:11] (step=0009464) Train Loss: 0.4457, Train Steps/Sec: 0.07, Epoch: 1.851692428096263, LR: 0.0003 +[2026-02-28 02:16:24] (step=0009465) Train Loss: 0.4389, Train Steps/Sec: 0.07, Epoch: 1.8518880845235766, LR: 0.0003 +[2026-02-28 02:16:38] (step=0009466) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 1.8520837409508903, LR: 0.0003 +[2026-02-28 02:16:52] (step=0009467) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 1.852279397378204, LR: 0.0003 +[2026-02-28 02:17:05] (step=0009468) Train Loss: 0.4581, Train Steps/Sec: 0.07, Epoch: 1.8524750538055175, LR: 0.0003 +[2026-02-28 02:17:19] (step=0009469) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 1.8526707102328313, LR: 0.0003 +[2026-02-28 02:17:33] (step=0009470) Train Loss: 0.4609, Train Steps/Sec: 0.07, Epoch: 1.8528663666601448, LR: 0.0003 +[2026-02-28 02:17:47] (step=0009471) Train Loss: 0.4473, Train Steps/Sec: 0.07, Epoch: 1.8530620230874584, LR: 0.0003 +[2026-02-28 02:18:00] (step=0009472) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 1.8532576795147722, LR: 0.0003 +[2026-02-28 02:18:14] (step=0009473) Train Loss: 0.4586, Train Steps/Sec: 0.07, Epoch: 1.8534533359420857, LR: 0.0003 +[2026-02-28 02:18:28] (step=0009474) Train Loss: 0.4641, Train Steps/Sec: 0.07, Epoch: 1.8536489923693993, LR: 0.0003 +[2026-02-28 02:18:42] (step=0009475) Train Loss: 0.4463, Train Steps/Sec: 0.07, Epoch: 1.853844648796713, LR: 0.0003 +[2026-02-28 02:18:55] (step=0009476) Train Loss: 0.4458, Train Steps/Sec: 0.07, Epoch: 1.8540403052240266, LR: 0.0003 +[2026-02-28 02:19:09] (step=0009477) Train Loss: 0.4407, Train Steps/Sec: 0.07, Epoch: 1.8542359616513402, LR: 0.0003 +[2026-02-28 02:19:23] (step=0009478) Train Loss: 0.4462, Train Steps/Sec: 0.07, Epoch: 1.854431618078654, LR: 0.0003 +[2026-02-28 02:19:36] (step=0009479) Train Loss: 0.4581, Train Steps/Sec: 0.07, Epoch: 1.8546272745059675, LR: 0.0003 +[2026-02-28 02:19:50] (step=0009480) Train Loss: 0.4645, Train Steps/Sec: 0.07, Epoch: 1.854822930933281, LR: 0.0003 +[2026-02-28 02:20:04] (step=0009481) Train Loss: 0.4614, Train Steps/Sec: 0.07, Epoch: 1.8550185873605949, LR: 0.0003 +[2026-02-28 02:20:17] (step=0009482) Train Loss: 0.4469, Train Steps/Sec: 0.07, Epoch: 1.8552142437879084, LR: 0.0003 +[2026-02-28 02:20:31] (step=0009483) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 1.855409900215222, LR: 0.0003 +[2026-02-28 02:20:45] (step=0009484) Train Loss: 0.4480, Train Steps/Sec: 0.07, Epoch: 1.8556055566425358, LR: 0.0003 +[2026-02-28 02:20:58] (step=0009485) Train Loss: 0.4620, Train Steps/Sec: 0.07, Epoch: 1.8558012130698494, LR: 0.0003 +[2026-02-28 02:21:12] (step=0009486) Train Loss: 0.4646, Train Steps/Sec: 0.07, Epoch: 1.855996869497163, LR: 0.0003 +[2026-02-28 02:21:26] (step=0009487) Train Loss: 0.4577, Train Steps/Sec: 0.07, Epoch: 1.8561925259244767, LR: 0.0003 +[2026-02-28 02:21:40] (step=0009488) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 1.8563881823517903, LR: 0.0003 +[2026-02-28 02:21:53] (step=0009489) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 1.8565838387791038, LR: 0.0003 +[2026-02-28 02:22:07] (step=0009490) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 1.8567794952064176, LR: 0.0003 +[2026-02-28 02:22:21] (step=0009491) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 1.8569751516337312, LR: 0.0003 +[2026-02-28 02:22:34] (step=0009492) Train Loss: 0.4574, Train Steps/Sec: 0.07, Epoch: 1.8571708080610447, LR: 0.0003 +[2026-02-28 02:22:48] (step=0009493) Train Loss: 0.4522, Train Steps/Sec: 0.07, Epoch: 1.8573664644883585, LR: 0.0003 +[2026-02-28 02:23:02] (step=0009494) Train Loss: 0.4588, Train Steps/Sec: 0.07, Epoch: 1.857562120915672, LR: 0.0003 +[2026-02-28 02:23:15] (step=0009495) Train Loss: 0.4522, Train Steps/Sec: 0.07, Epoch: 1.8577577773429856, LR: 0.0003 +[2026-02-28 02:23:29] (step=0009496) Train Loss: 0.4469, Train Steps/Sec: 0.07, Epoch: 1.8579534337702994, LR: 0.0003 +[2026-02-28 02:23:43] (step=0009497) Train Loss: 0.4576, Train Steps/Sec: 0.07, Epoch: 1.858149090197613, LR: 0.0003 +[2026-02-28 02:23:57] (step=0009498) Train Loss: 0.4652, Train Steps/Sec: 0.07, Epoch: 1.8583447466249265, LR: 0.0003 +[2026-02-28 02:24:10] (step=0009499) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 1.8585404030522403, LR: 0.0003 +[2026-02-28 02:24:24] (step=0009500) Train Loss: 0.4604, Train Steps/Sec: 0.07, Epoch: 1.858736059479554, LR: 0.0003 +[2026-02-28 02:24:24] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0009500/ +[2026-02-28 02:24:38] (step=0009501) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 1.8589317159068675, LR: 0.0003 +[2026-02-28 02:24:51] (step=0009502) Train Loss: 0.4325, Train Steps/Sec: 0.07, Epoch: 1.8591273723341812, LR: 0.0003 +[2026-02-28 02:25:05] (step=0009503) Train Loss: 0.4452, Train Steps/Sec: 0.07, Epoch: 1.8593230287614948, LR: 0.0003 +[2026-02-28 02:25:19] (step=0009504) Train Loss: 0.4615, Train Steps/Sec: 0.07, Epoch: 1.8595186851888084, LR: 0.0003 +[2026-02-28 02:25:32] (step=0009505) Train Loss: 0.4459, Train Steps/Sec: 0.07, Epoch: 1.8597143416161221, LR: 0.0003 +[2026-02-28 02:25:46] (step=0009506) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 1.8599099980434357, LR: 0.0003 +[2026-02-28 02:26:00] (step=0009507) Train Loss: 0.4642, Train Steps/Sec: 0.07, Epoch: 1.8601056544707493, LR: 0.0003 +[2026-02-28 02:26:13] (step=0009508) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 1.860301310898063, LR: 0.0003 +[2026-02-28 02:26:27] (step=0009509) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 1.8604969673253766, LR: 0.0003 +[2026-02-28 02:26:41] (step=0009510) Train Loss: 0.4506, Train Steps/Sec: 0.07, Epoch: 1.8606926237526902, LR: 0.0003 +[2026-02-28 02:26:55] (step=0009511) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 1.860888280180004, LR: 0.0003 +[2026-02-28 02:27:08] (step=0009512) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 1.8610839366073175, LR: 0.0003 +[2026-02-28 02:27:22] (step=0009513) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 1.861279593034631, LR: 0.0003 +[2026-02-28 02:27:35] (step=0009514) Train Loss: 0.4446, Train Steps/Sec: 0.07, Epoch: 1.8614752494619449, LR: 0.0003 +[2026-02-28 02:27:49] (step=0009515) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 1.8616709058892584, LR: 0.0003 +[2026-02-28 02:28:03] (step=0009516) Train Loss: 0.4430, Train Steps/Sec: 0.07, Epoch: 1.861866562316572, LR: 0.0003 +[2026-02-28 02:28:17] (step=0009517) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 1.8620622187438858, LR: 0.0003 +[2026-02-28 02:28:30] (step=0009518) Train Loss: 0.4522, Train Steps/Sec: 0.07, Epoch: 1.8622578751711993, LR: 0.0003 +[2026-02-28 02:28:44] (step=0009519) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 1.862453531598513, LR: 0.0003 +[2026-02-28 02:28:58] (step=0009520) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 1.8626491880258267, LR: 0.0003 +[2026-02-28 02:29:11] (step=0009521) Train Loss: 0.4580, Train Steps/Sec: 0.07, Epoch: 1.8628448444531402, LR: 0.0003 +[2026-02-28 02:29:25] (step=0009522) Train Loss: 0.4617, Train Steps/Sec: 0.07, Epoch: 1.863040500880454, LR: 0.0003 +[2026-02-28 02:29:39] (step=0009523) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 1.8632361573077676, LR: 0.0003 +[2026-02-28 02:29:53] (step=0009524) Train Loss: 0.4430, Train Steps/Sec: 0.07, Epoch: 1.8634318137350812, LR: 0.0003 +[2026-02-28 02:30:06] (step=0009525) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 1.863627470162395, LR: 0.0003 +[2026-02-28 02:30:20] (step=0009526) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 1.8638231265897085, LR: 0.0003 +[2026-02-28 02:30:34] (step=0009527) Train Loss: 0.4469, Train Steps/Sec: 0.07, Epoch: 1.864018783017022, LR: 0.0003 +[2026-02-28 02:30:47] (step=0009528) Train Loss: 0.4601, Train Steps/Sec: 0.07, Epoch: 1.8642144394443358, LR: 0.0003 +[2026-02-28 02:31:01] (step=0009529) Train Loss: 0.4643, Train Steps/Sec: 0.07, Epoch: 1.8644100958716494, LR: 0.0003 +[2026-02-28 02:31:15] (step=0009530) Train Loss: 0.4408, Train Steps/Sec: 0.07, Epoch: 1.864605752298963, LR: 0.0003 +[2026-02-28 02:31:29] (step=0009531) Train Loss: 0.4586, Train Steps/Sec: 0.07, Epoch: 1.8648014087262768, LR: 0.0003 +[2026-02-28 02:31:42] (step=0009532) Train Loss: 0.4419, Train Steps/Sec: 0.07, Epoch: 1.8649970651535903, LR: 0.0003 +[2026-02-28 02:31:56] (step=0009533) Train Loss: 0.4456, Train Steps/Sec: 0.07, Epoch: 1.8651927215809039, LR: 0.0003 +[2026-02-28 02:32:10] (step=0009534) Train Loss: 0.4430, Train Steps/Sec: 0.07, Epoch: 1.8653883780082177, LR: 0.0003 +[2026-02-28 02:32:23] (step=0009535) Train Loss: 0.4584, Train Steps/Sec: 0.07, Epoch: 1.8655840344355312, LR: 0.0003 +[2026-02-28 02:32:37] (step=0009536) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 1.8657796908628448, LR: 0.0003 +[2026-02-28 02:32:51] (step=0009537) Train Loss: 0.4450, Train Steps/Sec: 0.07, Epoch: 1.8659753472901586, LR: 0.0003 +[2026-02-28 02:33:04] (step=0009538) Train Loss: 0.4461, Train Steps/Sec: 0.07, Epoch: 1.8661710037174721, LR: 0.0003 +[2026-02-28 02:33:18] (step=0009539) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 1.8663666601447857, LR: 0.0003 +[2026-02-28 02:33:32] (step=0009540) Train Loss: 0.4419, Train Steps/Sec: 0.07, Epoch: 1.8665623165720995, LR: 0.0003 +[2026-02-28 02:33:46] (step=0009541) Train Loss: 0.4378, Train Steps/Sec: 0.07, Epoch: 1.866757972999413, LR: 0.0003 +[2026-02-28 02:33:59] (step=0009542) Train Loss: 0.4434, Train Steps/Sec: 0.07, Epoch: 1.8669536294267266, LR: 0.0003 +[2026-02-28 02:34:13] (step=0009543) Train Loss: 0.4653, Train Steps/Sec: 0.07, Epoch: 1.8671492858540404, LR: 0.0003 +[2026-02-28 02:34:27] (step=0009544) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 1.867344942281354, LR: 0.0003 +[2026-02-28 02:34:40] (step=0009545) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 1.8675405987086675, LR: 0.0003 +[2026-02-28 02:34:54] (step=0009546) Train Loss: 0.4444, Train Steps/Sec: 0.07, Epoch: 1.8677362551359813, LR: 0.0003 +[2026-02-28 02:35:08] (step=0009547) Train Loss: 0.4601, Train Steps/Sec: 0.07, Epoch: 1.8679319115632949, LR: 0.0003 +[2026-02-28 02:35:21] (step=0009548) Train Loss: 0.4568, Train Steps/Sec: 0.07, Epoch: 1.8681275679906084, LR: 0.0003 +[2026-02-28 02:35:35] (step=0009549) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 1.8683232244179222, LR: 0.0003 +[2026-02-28 02:35:49] (step=0009550) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 1.8685188808452358, LR: 0.0003 +[2026-02-28 02:36:02] (step=0009551) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 1.8687145372725493, LR: 0.0003 +[2026-02-28 02:36:16] (step=0009552) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 1.868910193699863, LR: 0.0003 +[2026-02-28 02:36:30] (step=0009553) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 1.8691058501271767, LR: 0.0003 +[2026-02-28 02:36:44] (step=0009554) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 1.8693015065544902, LR: 0.0003 +[2026-02-28 02:36:57] (step=0009555) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 1.869497162981804, LR: 0.0003 +[2026-02-28 02:37:11] (step=0009556) Train Loss: 0.4689, Train Steps/Sec: 0.07, Epoch: 1.8696928194091176, LR: 0.0003 +[2026-02-28 02:37:25] (step=0009557) Train Loss: 0.4436, Train Steps/Sec: 0.07, Epoch: 1.8698884758364311, LR: 0.0003 +[2026-02-28 02:37:38] (step=0009558) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 1.870084132263745, LR: 0.0003 +[2026-02-28 02:37:52] (step=0009559) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 1.8702797886910585, LR: 0.0003 +[2026-02-28 02:38:06] (step=0009560) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 1.870475445118372, LR: 0.0003 +[2026-02-28 02:38:20] (step=0009561) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 1.8706711015456858, LR: 0.0003 +[2026-02-28 02:38:33] (step=0009562) Train Loss: 0.4665, Train Steps/Sec: 0.07, Epoch: 1.8708667579729994, LR: 0.0003 +[2026-02-28 02:38:47] (step=0009563) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 1.871062414400313, LR: 0.0003 +[2026-02-28 02:39:01] (step=0009564) Train Loss: 0.4642, Train Steps/Sec: 0.07, Epoch: 1.8712580708276267, LR: 0.0003 +[2026-02-28 02:39:14] (step=0009565) Train Loss: 0.4625, Train Steps/Sec: 0.07, Epoch: 1.8714537272549403, LR: 0.0003 +[2026-02-28 02:39:28] (step=0009566) Train Loss: 0.4652, Train Steps/Sec: 0.07, Epoch: 1.8716493836822539, LR: 0.0003 +[2026-02-28 02:39:42] (step=0009567) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 1.8718450401095676, LR: 0.0003 +[2026-02-28 02:39:55] (step=0009568) Train Loss: 0.4594, Train Steps/Sec: 0.07, Epoch: 1.8720406965368812, LR: 0.0003 +[2026-02-28 02:40:09] (step=0009569) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 1.8722363529641948, LR: 0.0003 +[2026-02-28 02:40:23] (step=0009570) Train Loss: 0.4470, Train Steps/Sec: 0.07, Epoch: 1.8724320093915086, LR: 0.0003 +[2026-02-28 02:40:37] (step=0009571) Train Loss: 0.4483, Train Steps/Sec: 0.07, Epoch: 1.8726276658188221, LR: 0.0003 +[2026-02-28 02:40:50] (step=0009572) Train Loss: 0.4507, Train Steps/Sec: 0.07, Epoch: 1.8728233222461357, LR: 0.0003 +[2026-02-28 02:41:04] (step=0009573) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 1.8730189786734495, LR: 0.0003 +[2026-02-28 02:41:18] (step=0009574) Train Loss: 0.4400, Train Steps/Sec: 0.07, Epoch: 1.873214635100763, LR: 0.0003 +[2026-02-28 02:41:32] (step=0009575) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 1.8734102915280766, LR: 0.0003 +[2026-02-28 02:41:45] (step=0009576) Train Loss: 0.4507, Train Steps/Sec: 0.07, Epoch: 1.8736059479553904, LR: 0.0003 +[2026-02-28 02:41:59] (step=0009577) Train Loss: 0.4644, Train Steps/Sec: 0.07, Epoch: 1.873801604382704, LR: 0.0003 +[2026-02-28 02:42:13] (step=0009578) Train Loss: 0.4462, Train Steps/Sec: 0.07, Epoch: 1.8739972608100177, LR: 0.0003 +[2026-02-28 02:42:26] (step=0009579) Train Loss: 0.4421, Train Steps/Sec: 0.07, Epoch: 1.8741929172373313, LR: 0.0003 +[2026-02-28 02:42:40] (step=0009580) Train Loss: 0.4605, Train Steps/Sec: 0.07, Epoch: 1.8743885736646448, LR: 0.0003 +[2026-02-28 02:42:54] (step=0009581) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 1.8745842300919586, LR: 0.0003 +[2026-02-28 02:43:07] (step=0009582) Train Loss: 0.4476, Train Steps/Sec: 0.07, Epoch: 1.8747798865192722, LR: 0.0003 +[2026-02-28 02:43:21] (step=0009583) Train Loss: 0.4488, Train Steps/Sec: 0.07, Epoch: 1.8749755429465857, LR: 0.0003 +[2026-02-28 02:43:35] (step=0009584) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 1.8751711993738995, LR: 0.0003 +[2026-02-28 02:43:48] (step=0009585) Train Loss: 0.4611, Train Steps/Sec: 0.07, Epoch: 1.875366855801213, LR: 0.0003 +[2026-02-28 02:44:02] (step=0009586) Train Loss: 0.4413, Train Steps/Sec: 0.07, Epoch: 1.8755625122285267, LR: 0.0003 +[2026-02-28 02:44:16] (step=0009587) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 1.8757581686558404, LR: 0.0003 +[2026-02-28 02:44:30] (step=0009588) Train Loss: 0.4416, Train Steps/Sec: 0.07, Epoch: 1.875953825083154, LR: 0.0003 +[2026-02-28 02:44:44] (step=0009589) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 1.8761494815104676, LR: 0.0003 +[2026-02-28 02:44:57] (step=0009590) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 1.8763451379377813, LR: 0.0003 +[2026-02-28 02:45:11] (step=0009591) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 1.876540794365095, LR: 0.0003 +[2026-02-28 02:45:25] (step=0009592) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 1.8767364507924085, LR: 0.0003 +[2026-02-28 02:45:38] (step=0009593) Train Loss: 0.4438, Train Steps/Sec: 0.07, Epoch: 1.8769321072197223, LR: 0.0003 +[2026-02-28 02:45:52] (step=0009594) Train Loss: 0.4708, Train Steps/Sec: 0.07, Epoch: 1.8771277636470358, LR: 0.0003 +[2026-02-28 02:46:06] (step=0009595) Train Loss: 0.4595, Train Steps/Sec: 0.07, Epoch: 1.8773234200743494, LR: 0.0003 +[2026-02-28 02:46:19] (step=0009596) Train Loss: 0.4596, Train Steps/Sec: 0.07, Epoch: 1.8775190765016632, LR: 0.0003 +[2026-02-28 02:46:33] (step=0009597) Train Loss: 0.4604, Train Steps/Sec: 0.07, Epoch: 1.8777147329289767, LR: 0.0003 +[2026-02-28 02:46:47] (step=0009598) Train Loss: 0.4470, Train Steps/Sec: 0.07, Epoch: 1.8779103893562903, LR: 0.0003 +[2026-02-28 02:47:00] (step=0009599) Train Loss: 0.4659, Train Steps/Sec: 0.07, Epoch: 1.878106045783604, LR: 0.0003 +[2026-02-28 02:47:14] (step=0009600) Train Loss: 0.4641, Train Steps/Sec: 0.07, Epoch: 1.8783017022109176, LR: 0.0003 +[2026-02-28 02:47:28] (step=0009601) Train Loss: 0.4455, Train Steps/Sec: 0.07, Epoch: 1.8784973586382312, LR: 0.0003 +[2026-02-28 02:47:42] (step=0009602) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 1.878693015065545, LR: 0.0003 +[2026-02-28 02:47:56] (step=0009603) Train Loss: 0.4622, Train Steps/Sec: 0.07, Epoch: 1.8788886714928585, LR: 0.0003 +[2026-02-28 02:48:09] (step=0009604) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 1.879084327920172, LR: 0.0003 +[2026-02-28 02:48:23] (step=0009605) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 1.8792799843474859, LR: 0.0003 +[2026-02-28 02:48:37] (step=0009606) Train Loss: 0.4544, Train Steps/Sec: 0.07, Epoch: 1.8794756407747994, LR: 0.0003 +[2026-02-28 02:48:50] (step=0009607) Train Loss: 0.4604, Train Steps/Sec: 0.07, Epoch: 1.879671297202113, LR: 0.0003 +[2026-02-28 02:49:04] (step=0009608) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 1.8798669536294268, LR: 0.0003 +[2026-02-28 02:49:18] (step=0009609) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 1.8800626100567404, LR: 0.0003 +[2026-02-28 02:49:31] (step=0009610) Train Loss: 0.4532, Train Steps/Sec: 0.07, Epoch: 1.880258266484054, LR: 0.0003 +[2026-02-28 02:49:45] (step=0009611) Train Loss: 0.4347, Train Steps/Sec: 0.07, Epoch: 1.8804539229113677, LR: 0.0003 +[2026-02-28 02:49:59] (step=0009612) Train Loss: 0.4582, Train Steps/Sec: 0.07, Epoch: 1.8806495793386813, LR: 0.0003 +[2026-02-28 02:50:12] (step=0009613) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 1.8808452357659948, LR: 0.0003 +[2026-02-28 02:50:26] (step=0009614) Train Loss: 0.4395, Train Steps/Sec: 0.07, Epoch: 1.8810408921933086, LR: 0.0003 +[2026-02-28 02:50:40] (step=0009615) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 1.8812365486206222, LR: 0.0003 +[2026-02-28 02:50:54] (step=0009616) Train Loss: 0.4612, Train Steps/Sec: 0.07, Epoch: 1.8814322050479357, LR: 0.0003 +[2026-02-28 02:51:07] (step=0009617) Train Loss: 0.4498, Train Steps/Sec: 0.07, Epoch: 1.8816278614752495, LR: 0.0003 +[2026-02-28 02:51:21] (step=0009618) Train Loss: 0.4571, Train Steps/Sec: 0.07, Epoch: 1.881823517902563, LR: 0.0003 +[2026-02-28 02:51:35] (step=0009619) Train Loss: 0.4458, Train Steps/Sec: 0.07, Epoch: 1.8820191743298766, LR: 0.0003 +[2026-02-28 02:51:49] (step=0009620) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 1.8822148307571904, LR: 0.0003 +[2026-02-28 02:52:02] (step=0009621) Train Loss: 0.4588, Train Steps/Sec: 0.07, Epoch: 1.882410487184504, LR: 0.0003 +[2026-02-28 02:52:16] (step=0009622) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 1.8826061436118176, LR: 0.0003 +[2026-02-28 02:52:29] (step=0009623) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 1.8828018000391313, LR: 0.0003 +[2026-02-28 02:52:43] (step=0009624) Train Loss: 0.4555, Train Steps/Sec: 0.07, Epoch: 1.882997456466445, LR: 0.0003 +[2026-02-28 02:52:57] (step=0009625) Train Loss: 0.4452, Train Steps/Sec: 0.07, Epoch: 1.8831931128937585, LR: 0.0003 +[2026-02-28 02:53:11] (step=0009626) Train Loss: 0.4603, Train Steps/Sec: 0.07, Epoch: 1.8833887693210722, LR: 0.0003 +[2026-02-28 02:53:24] (step=0009627) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 1.8835844257483858, LR: 0.0003 +[2026-02-28 02:53:38] (step=0009628) Train Loss: 0.4578, Train Steps/Sec: 0.07, Epoch: 1.8837800821756994, LR: 0.0003 +[2026-02-28 02:53:52] (step=0009629) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 1.8839757386030132, LR: 0.0003 +[2026-02-28 02:54:05] (step=0009630) Train Loss: 0.4556, Train Steps/Sec: 0.07, Epoch: 1.8841713950303267, LR: 0.0003 +[2026-02-28 02:54:19] (step=0009631) Train Loss: 0.4357, Train Steps/Sec: 0.07, Epoch: 1.8843670514576403, LR: 0.0003 +[2026-02-28 02:54:33] (step=0009632) Train Loss: 0.4596, Train Steps/Sec: 0.07, Epoch: 1.884562707884954, LR: 0.0003 +[2026-02-28 02:54:47] (step=0009633) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 1.8847583643122676, LR: 0.0003 +[2026-02-28 02:55:00] (step=0009634) Train Loss: 0.4427, Train Steps/Sec: 0.07, Epoch: 1.8849540207395814, LR: 0.0003 +[2026-02-28 02:55:14] (step=0009635) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 1.885149677166895, LR: 0.0003 +[2026-02-28 02:55:28] (step=0009636) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 1.8853453335942085, LR: 0.0003 +[2026-02-28 02:55:41] (step=0009637) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 1.8855409900215223, LR: 0.0003 +[2026-02-28 02:55:55] (step=0009638) Train Loss: 0.4611, Train Steps/Sec: 0.07, Epoch: 1.8857366464488359, LR: 0.0003 +[2026-02-28 02:56:09] (step=0009639) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 1.8859323028761494, LR: 0.0003 +[2026-02-28 02:56:22] (step=0009640) Train Loss: 0.4417, Train Steps/Sec: 0.07, Epoch: 1.8861279593034632, LR: 0.0003 +[2026-02-28 02:56:36] (step=0009641) Train Loss: 0.4401, Train Steps/Sec: 0.07, Epoch: 1.8863236157307768, LR: 0.0003 +[2026-02-28 02:56:50] (step=0009642) Train Loss: 0.4582, Train Steps/Sec: 0.07, Epoch: 1.8865192721580903, LR: 0.0003 +[2026-02-28 02:57:04] (step=0009643) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 1.8867149285854041, LR: 0.0003 +[2026-02-28 02:57:17] (step=0009644) Train Loss: 0.4452, Train Steps/Sec: 0.07, Epoch: 1.8869105850127177, LR: 0.0003 +[2026-02-28 02:57:31] (step=0009645) Train Loss: 0.4589, Train Steps/Sec: 0.07, Epoch: 1.8871062414400313, LR: 0.0003 +[2026-02-28 02:57:45] (step=0009646) Train Loss: 0.4563, Train Steps/Sec: 0.07, Epoch: 1.887301897867345, LR: 0.0003 +[2026-02-28 02:57:59] (step=0009647) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 1.8874975542946586, LR: 0.0003 +[2026-02-28 02:58:12] (step=0009648) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 1.8876932107219722, LR: 0.0003 +[2026-02-28 02:58:26] (step=0009649) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 1.887888867149286, LR: 0.0003 +[2026-02-28 02:58:40] (step=0009650) Train Loss: 0.4522, Train Steps/Sec: 0.07, Epoch: 1.8880845235765995, LR: 0.0003 +[2026-02-28 02:58:53] (step=0009651) Train Loss: 0.4516, Train Steps/Sec: 0.07, Epoch: 1.888280180003913, LR: 0.0003 +[2026-02-28 02:59:07] (step=0009652) Train Loss: 0.4685, Train Steps/Sec: 0.07, Epoch: 1.8884758364312269, LR: 0.0003 +[2026-02-28 02:59:21] (step=0009653) Train Loss: 0.4595, Train Steps/Sec: 0.07, Epoch: 1.8886714928585404, LR: 0.0003 +[2026-02-28 02:59:34] (step=0009654) Train Loss: 0.4569, Train Steps/Sec: 0.07, Epoch: 1.888867149285854, LR: 0.0003 +[2026-02-28 02:59:48] (step=0009655) Train Loss: 0.4524, Train Steps/Sec: 0.07, Epoch: 1.8890628057131678, LR: 0.0003 +[2026-02-28 03:00:02] (step=0009656) Train Loss: 0.4455, Train Steps/Sec: 0.07, Epoch: 1.8892584621404813, LR: 0.0003 +[2026-02-28 03:00:16] (step=0009657) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 1.8894541185677949, LR: 0.0003 +[2026-02-28 03:00:29] (step=0009658) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 1.8896497749951087, LR: 0.0003 +[2026-02-28 03:00:43] (step=0009659) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 1.8898454314224222, LR: 0.0003 +[2026-02-28 03:00:57] (step=0009660) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 1.8900410878497358, LR: 0.0003 +[2026-02-28 03:01:11] (step=0009661) Train Loss: 0.4724, Train Steps/Sec: 0.07, Epoch: 1.8902367442770496, LR: 0.0003 +[2026-02-28 03:01:25] (step=0009662) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 1.8904324007043631, LR: 0.0003 +[2026-02-28 03:01:38] (step=0009663) Train Loss: 0.4439, Train Steps/Sec: 0.07, Epoch: 1.8906280571316767, LR: 0.0003 +[2026-02-28 03:01:52] (step=0009664) Train Loss: 0.4472, Train Steps/Sec: 0.07, Epoch: 1.8908237135589905, LR: 0.0003 +[2026-02-28 03:02:05] (step=0009665) Train Loss: 0.4452, Train Steps/Sec: 0.07, Epoch: 1.891019369986304, LR: 0.0003 +[2026-02-28 03:02:19] (step=0009666) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 1.8912150264136176, LR: 0.0003 +[2026-02-28 03:02:33] (step=0009667) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 1.8914106828409314, LR: 0.0003 +[2026-02-28 03:02:46] (step=0009668) Train Loss: 0.4570, Train Steps/Sec: 0.07, Epoch: 1.891606339268245, LR: 0.0003 +[2026-02-28 03:03:00] (step=0009669) Train Loss: 0.4605, Train Steps/Sec: 0.07, Epoch: 1.8918019956955585, LR: 0.0003 +[2026-02-28 03:03:14] (step=0009670) Train Loss: 0.4486, Train Steps/Sec: 0.07, Epoch: 1.8919976521228723, LR: 0.0003 +[2026-02-28 03:03:27] (step=0009671) Train Loss: 0.4440, Train Steps/Sec: 0.07, Epoch: 1.8921933085501859, LR: 0.0003 +[2026-02-28 03:03:41] (step=0009672) Train Loss: 0.4470, Train Steps/Sec: 0.07, Epoch: 1.8923889649774994, LR: 0.0003 +[2026-02-28 03:03:55] (step=0009673) Train Loss: 0.4420, Train Steps/Sec: 0.07, Epoch: 1.8925846214048132, LR: 0.0003 +[2026-02-28 03:04:09] (step=0009674) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 1.8927802778321268, LR: 0.0003 +[2026-02-28 03:04:22] (step=0009675) Train Loss: 0.4612, Train Steps/Sec: 0.07, Epoch: 1.8929759342594403, LR: 0.0003 +[2026-02-28 03:04:36] (step=0009676) Train Loss: 0.4592, Train Steps/Sec: 0.07, Epoch: 1.8931715906867541, LR: 0.0003 +[2026-02-28 03:04:50] (step=0009677) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 1.8933672471140677, LR: 0.0003 +[2026-02-28 03:05:04] (step=0009678) Train Loss: 0.4494, Train Steps/Sec: 0.07, Epoch: 1.8935629035413812, LR: 0.0003 +[2026-02-28 03:05:17] (step=0009679) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 1.893758559968695, LR: 0.0003 +[2026-02-28 03:05:31] (step=0009680) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 1.8939542163960086, LR: 0.0003 +[2026-02-28 03:05:45] (step=0009681) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 1.8941498728233221, LR: 0.0003 +[2026-02-28 03:05:58] (step=0009682) Train Loss: 0.4461, Train Steps/Sec: 0.07, Epoch: 1.894345529250636, LR: 0.0003 +[2026-02-28 03:06:12] (step=0009683) Train Loss: 0.4496, Train Steps/Sec: 0.07, Epoch: 1.8945411856779495, LR: 0.0003 +[2026-02-28 03:06:26] (step=0009684) Train Loss: 0.4507, Train Steps/Sec: 0.07, Epoch: 1.894736842105263, LR: 0.0003 +[2026-02-28 03:06:39] (step=0009685) Train Loss: 0.4448, Train Steps/Sec: 0.07, Epoch: 1.8949324985325768, LR: 0.0003 +[2026-02-28 03:06:53] (step=0009686) Train Loss: 0.4664, Train Steps/Sec: 0.07, Epoch: 1.8951281549598904, LR: 0.0003 +[2026-02-28 03:07:07] (step=0009687) Train Loss: 0.4615, Train Steps/Sec: 0.07, Epoch: 1.895323811387204, LR: 0.0003 +[2026-02-28 03:07:20] (step=0009688) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 1.8955194678145177, LR: 0.0003 +[2026-02-28 03:07:34] (step=0009689) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 1.8957151242418313, LR: 0.0003 +[2026-02-28 03:07:48] (step=0009690) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 1.895910780669145, LR: 0.0003 +[2026-02-28 03:08:02] (step=0009691) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 1.8961064370964587, LR: 0.0003 +[2026-02-28 03:08:15] (step=0009692) Train Loss: 0.4411, Train Steps/Sec: 0.07, Epoch: 1.8963020935237722, LR: 0.0003 +[2026-02-28 03:08:29] (step=0009693) Train Loss: 0.4413, Train Steps/Sec: 0.07, Epoch: 1.896497749951086, LR: 0.0003 +[2026-02-28 03:08:43] (step=0009694) Train Loss: 0.4435, Train Steps/Sec: 0.07, Epoch: 1.8966934063783996, LR: 0.0003 +[2026-02-28 03:08:56] (step=0009695) Train Loss: 0.4606, Train Steps/Sec: 0.07, Epoch: 1.8968890628057131, LR: 0.0003 +[2026-02-28 03:09:10] (step=0009696) Train Loss: 0.4563, Train Steps/Sec: 0.07, Epoch: 1.897084719233027, LR: 0.0003 +[2026-02-28 03:09:24] (step=0009697) Train Loss: 0.4488, Train Steps/Sec: 0.07, Epoch: 1.8972803756603405, LR: 0.0003 +[2026-02-28 03:09:37] (step=0009698) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 1.897476032087654, LR: 0.0003 +[2026-02-28 03:09:51] (step=0009699) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 1.8976716885149678, LR: 0.0003 +[2026-02-28 03:10:05] (step=0009700) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 1.8978673449422814, LR: 0.0003 +[2026-02-28 03:10:18] (step=0009701) Train Loss: 0.4596, Train Steps/Sec: 0.07, Epoch: 1.898063001369595, LR: 0.0003 +[2026-02-28 03:10:32] (step=0009702) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 1.8982586577969087, LR: 0.0003 +[2026-02-28 03:10:46] (step=0009703) Train Loss: 0.4494, Train Steps/Sec: 0.07, Epoch: 1.8984543142242223, LR: 0.0003 +[2026-02-28 03:11:00] (step=0009704) Train Loss: 0.4528, Train Steps/Sec: 0.07, Epoch: 1.8986499706515358, LR: 0.0003 +[2026-02-28 03:11:14] (step=0009705) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 1.8988456270788496, LR: 0.0003 +[2026-02-28 03:11:27] (step=0009706) Train Loss: 0.4414, Train Steps/Sec: 0.07, Epoch: 1.8990412835061632, LR: 0.0003 +[2026-02-28 03:11:41] (step=0009707) Train Loss: 0.4573, Train Steps/Sec: 0.07, Epoch: 1.8992369399334768, LR: 0.0003 +[2026-02-28 03:11:55] (step=0009708) Train Loss: 0.4484, Train Steps/Sec: 0.07, Epoch: 1.8994325963607905, LR: 0.0003 +[2026-02-28 03:12:08] (step=0009709) Train Loss: 0.4488, Train Steps/Sec: 0.07, Epoch: 1.899628252788104, LR: 0.0003 +[2026-02-28 03:12:22] (step=0009710) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 1.8998239092154177, LR: 0.0003 +[2026-02-28 03:12:36] (step=0009711) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 1.9000195656427314, LR: 0.0003 +[2026-02-28 03:12:49] (step=0009712) Train Loss: 0.4509, Train Steps/Sec: 0.07, Epoch: 1.900215222070045, LR: 0.0003 +[2026-02-28 03:13:03] (step=0009713) Train Loss: 0.4499, Train Steps/Sec: 0.07, Epoch: 1.9004108784973586, LR: 0.0003 +[2026-02-28 03:13:17] (step=0009714) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 1.9006065349246724, LR: 0.0003 +[2026-02-28 03:13:30] (step=0009715) Train Loss: 0.4357, Train Steps/Sec: 0.07, Epoch: 1.900802191351986, LR: 0.0003 +[2026-02-28 03:13:44] (step=0009716) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 1.9009978477792995, LR: 0.0003 +[2026-02-28 03:13:58] (step=0009717) Train Loss: 0.4498, Train Steps/Sec: 0.07, Epoch: 1.9011935042066133, LR: 0.0003 +[2026-02-28 03:14:12] (step=0009718) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 1.9013891606339268, LR: 0.0003 +[2026-02-28 03:14:26] (step=0009719) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 1.9015848170612404, LR: 0.0003 +[2026-02-28 03:14:39] (step=0009720) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 1.9017804734885542, LR: 0.0003 +[2026-02-28 03:14:53] (step=0009721) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 1.9019761299158677, LR: 0.0003 +[2026-02-28 03:15:06] (step=0009722) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 1.9021717863431813, LR: 0.0003 +[2026-02-28 03:15:20] (step=0009723) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 1.902367442770495, LR: 0.0003 +[2026-02-28 03:15:34] (step=0009724) Train Loss: 0.4569, Train Steps/Sec: 0.07, Epoch: 1.9025630991978086, LR: 0.0003 +[2026-02-28 03:15:47] (step=0009725) Train Loss: 0.4555, Train Steps/Sec: 0.07, Epoch: 1.9027587556251222, LR: 0.0003 +[2026-02-28 03:16:01] (step=0009726) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 1.902954412052436, LR: 0.0003 +[2026-02-28 03:16:15] (step=0009727) Train Loss: 0.4468, Train Steps/Sec: 0.07, Epoch: 1.9031500684797495, LR: 0.0003 +[2026-02-28 03:16:29] (step=0009728) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 1.903345724907063, LR: 0.0003 +[2026-02-28 03:16:42] (step=0009729) Train Loss: 0.4653, Train Steps/Sec: 0.07, Epoch: 1.903541381334377, LR: 0.0003 +[2026-02-28 03:16:56] (step=0009730) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 1.9037370377616905, LR: 0.0003 +[2026-02-28 03:17:10] (step=0009731) Train Loss: 0.4573, Train Steps/Sec: 0.07, Epoch: 1.903932694189004, LR: 0.0003 +[2026-02-28 03:17:24] (step=0009732) Train Loss: 0.4556, Train Steps/Sec: 0.07, Epoch: 1.9041283506163178, LR: 0.0003 +[2026-02-28 03:17:37] (step=0009733) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 1.9043240070436314, LR: 0.0003 +[2026-02-28 03:17:51] (step=0009734) Train Loss: 0.4381, Train Steps/Sec: 0.07, Epoch: 1.904519663470945, LR: 0.0003 +[2026-02-28 03:18:05] (step=0009735) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 1.9047153198982587, LR: 0.0003 +[2026-02-28 03:18:18] (step=0009736) Train Loss: 0.4430, Train Steps/Sec: 0.07, Epoch: 1.9049109763255723, LR: 0.0003 +[2026-02-28 03:18:32] (step=0009737) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 1.9051066327528858, LR: 0.0003 +[2026-02-28 03:18:46] (step=0009738) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 1.9053022891801996, LR: 0.0003 +[2026-02-28 03:18:59] (step=0009739) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 1.9054979456075132, LR: 0.0003 +[2026-02-28 03:19:13] (step=0009740) Train Loss: 0.4438, Train Steps/Sec: 0.07, Epoch: 1.9056936020348267, LR: 0.0003 +[2026-02-28 03:19:27] (step=0009741) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 1.9058892584621405, LR: 0.0003 +[2026-02-28 03:19:40] (step=0009742) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 1.906084914889454, LR: 0.0003 +[2026-02-28 03:19:54] (step=0009743) Train Loss: 0.4423, Train Steps/Sec: 0.07, Epoch: 1.9062805713167676, LR: 0.0003 +[2026-02-28 03:20:08] (step=0009744) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 1.9064762277440814, LR: 0.0003 +[2026-02-28 03:20:21] (step=0009745) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 1.906671884171395, LR: 0.0003 +[2026-02-28 03:20:35] (step=0009746) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 1.9068675405987088, LR: 0.0003 +[2026-02-28 03:20:49] (step=0009747) Train Loss: 0.4415, Train Steps/Sec: 0.07, Epoch: 1.9070631970260223, LR: 0.0003 +[2026-02-28 03:21:03] (step=0009748) Train Loss: 0.4589, Train Steps/Sec: 0.07, Epoch: 1.907258853453336, LR: 0.0003 +[2026-02-28 03:21:17] (step=0009749) Train Loss: 0.4594, Train Steps/Sec: 0.07, Epoch: 1.9074545098806497, LR: 0.0003 +[2026-02-28 03:21:30] (step=0009750) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 1.9076501663079632, LR: 0.0003 +[2026-02-28 03:21:44] (step=0009751) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 1.9078458227352768, LR: 0.0003 +[2026-02-28 03:21:58] (step=0009752) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 1.9080414791625906, LR: 0.0003 +[2026-02-28 03:22:11] (step=0009753) Train Loss: 0.4504, Train Steps/Sec: 0.07, Epoch: 1.9082371355899042, LR: 0.0003 +[2026-02-28 03:22:25] (step=0009754) Train Loss: 0.4604, Train Steps/Sec: 0.07, Epoch: 1.9084327920172177, LR: 0.0003 +[2026-02-28 03:22:39] (step=0009755) Train Loss: 0.4400, Train Steps/Sec: 0.07, Epoch: 1.9086284484445315, LR: 0.0003 +[2026-02-28 03:22:52] (step=0009756) Train Loss: 0.4571, Train Steps/Sec: 0.07, Epoch: 1.908824104871845, LR: 0.0003 +[2026-02-28 03:23:06] (step=0009757) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 1.9090197612991586, LR: 0.0003 +[2026-02-28 03:23:20] (step=0009758) Train Loss: 0.4672, Train Steps/Sec: 0.07, Epoch: 1.9092154177264724, LR: 0.0003 +[2026-02-28 03:23:33] (step=0009759) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 1.909411074153786, LR: 0.0003 +[2026-02-28 03:23:47] (step=0009760) Train Loss: 0.4569, Train Steps/Sec: 0.07, Epoch: 1.9096067305810995, LR: 0.0003 +[2026-02-28 03:24:01] (step=0009761) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 1.9098023870084133, LR: 0.0003 +[2026-02-28 03:24:15] (step=0009762) Train Loss: 0.4392, Train Steps/Sec: 0.07, Epoch: 1.9099980434357269, LR: 0.0003 +[2026-02-28 03:24:28] (step=0009763) Train Loss: 0.4454, Train Steps/Sec: 0.07, Epoch: 1.9101936998630404, LR: 0.0003 +[2026-02-28 03:24:42] (step=0009764) Train Loss: 0.4453, Train Steps/Sec: 0.07, Epoch: 1.9103893562903542, LR: 0.0003 +[2026-02-28 03:24:56] (step=0009765) Train Loss: 0.4516, Train Steps/Sec: 0.07, Epoch: 1.9105850127176678, LR: 0.0003 +[2026-02-28 03:25:09] (step=0009766) Train Loss: 0.4586, Train Steps/Sec: 0.07, Epoch: 1.9107806691449813, LR: 0.0003 +[2026-02-28 03:25:23] (step=0009767) Train Loss: 0.4390, Train Steps/Sec: 0.07, Epoch: 1.9109763255722951, LR: 0.0003 +[2026-02-28 03:25:37] (step=0009768) Train Loss: 0.4601, Train Steps/Sec: 0.07, Epoch: 1.9111719819996087, LR: 0.0003 +[2026-02-28 03:25:50] (step=0009769) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 1.9113676384269223, LR: 0.0003 +[2026-02-28 03:26:04] (step=0009770) Train Loss: 0.4506, Train Steps/Sec: 0.07, Epoch: 1.911563294854236, LR: 0.0003 +[2026-02-28 03:26:18] (step=0009771) Train Loss: 0.4488, Train Steps/Sec: 0.07, Epoch: 1.9117589512815496, LR: 0.0003 +[2026-02-28 03:26:32] (step=0009772) Train Loss: 0.4386, Train Steps/Sec: 0.07, Epoch: 1.9119546077088632, LR: 0.0003 +[2026-02-28 03:26:45] (step=0009773) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 1.912150264136177, LR: 0.0003 +[2026-02-28 03:26:59] (step=0009774) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 1.9123459205634905, LR: 0.0003 +[2026-02-28 03:27:13] (step=0009775) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 1.912541576990804, LR: 0.0003 +[2026-02-28 03:27:27] (step=0009776) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 1.9127372334181179, LR: 0.0003 +[2026-02-28 03:27:40] (step=0009777) Train Loss: 0.4532, Train Steps/Sec: 0.07, Epoch: 1.9129328898454314, LR: 0.0003 +[2026-02-28 03:27:54] (step=0009778) Train Loss: 0.4640, Train Steps/Sec: 0.07, Epoch: 1.913128546272745, LR: 0.0003 +[2026-02-28 03:28:08] (step=0009779) Train Loss: 0.4453, Train Steps/Sec: 0.07, Epoch: 1.9133242027000588, LR: 0.0003 +[2026-02-28 03:28:21] (step=0009780) Train Loss: 0.4430, Train Steps/Sec: 0.07, Epoch: 1.9135198591273723, LR: 0.0003 +[2026-02-28 03:28:35] (step=0009781) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 1.9137155155546859, LR: 0.0003 +[2026-02-28 03:28:49] (step=0009782) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 1.9139111719819997, LR: 0.0003 +[2026-02-28 03:29:02] (step=0009783) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 1.9141068284093132, LR: 0.0003 +[2026-02-28 03:29:16] (step=0009784) Train Loss: 0.4591, Train Steps/Sec: 0.07, Epoch: 1.9143024848366268, LR: 0.0003 +[2026-02-28 03:29:30] (step=0009785) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 1.9144981412639406, LR: 0.0003 +[2026-02-28 03:29:44] (step=0009786) Train Loss: 0.4603, Train Steps/Sec: 0.07, Epoch: 1.9146937976912541, LR: 0.0003 +[2026-02-28 03:29:57] (step=0009787) Train Loss: 0.4496, Train Steps/Sec: 0.07, Epoch: 1.9148894541185677, LR: 0.0003 +[2026-02-28 03:30:11] (step=0009788) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 1.9150851105458815, LR: 0.0003 +[2026-02-28 03:30:25] (step=0009789) Train Loss: 0.4582, Train Steps/Sec: 0.07, Epoch: 1.915280766973195, LR: 0.0003 +[2026-02-28 03:30:39] (step=0009790) Train Loss: 0.4605, Train Steps/Sec: 0.07, Epoch: 1.9154764234005086, LR: 0.0003 +[2026-02-28 03:30:52] (step=0009791) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 1.9156720798278224, LR: 0.0003 +[2026-02-28 03:31:06] (step=0009792) Train Loss: 0.4470, Train Steps/Sec: 0.07, Epoch: 1.915867736255136, LR: 0.0003 +[2026-02-28 03:31:20] (step=0009793) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 1.9160633926824495, LR: 0.0003 +[2026-02-28 03:31:33] (step=0009794) Train Loss: 0.4615, Train Steps/Sec: 0.07, Epoch: 1.9162590491097633, LR: 0.0003 +[2026-02-28 03:31:47] (step=0009795) Train Loss: 0.4427, Train Steps/Sec: 0.07, Epoch: 1.9164547055370769, LR: 0.0003 +[2026-02-28 03:32:01] (step=0009796) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 1.9166503619643904, LR: 0.0003 +[2026-02-28 03:32:14] (step=0009797) Train Loss: 0.4607, Train Steps/Sec: 0.07, Epoch: 1.9168460183917042, LR: 0.0003 +[2026-02-28 03:32:28] (step=0009798) Train Loss: 0.4494, Train Steps/Sec: 0.07, Epoch: 1.9170416748190178, LR: 0.0003 +[2026-02-28 03:32:42] (step=0009799) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 1.9172373312463313, LR: 0.0003 +[2026-02-28 03:32:55] (step=0009800) Train Loss: 0.4496, Train Steps/Sec: 0.07, Epoch: 1.9174329876736451, LR: 0.0003 +[2026-02-28 03:33:09] (step=0009801) Train Loss: 0.4560, Train Steps/Sec: 0.07, Epoch: 1.9176286441009587, LR: 0.0003 +[2026-02-28 03:33:23] (step=0009802) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 1.9178243005282725, LR: 0.0003 +[2026-02-28 03:33:37] (step=0009803) Train Loss: 0.4475, Train Steps/Sec: 0.07, Epoch: 1.918019956955586, LR: 0.0003 +[2026-02-28 03:33:50] (step=0009804) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 1.9182156133828996, LR: 0.0003 +[2026-02-28 03:34:04] (step=0009805) Train Loss: 0.4420, Train Steps/Sec: 0.07, Epoch: 1.9184112698102134, LR: 0.0003 +[2026-02-28 03:34:18] (step=0009806) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 1.918606926237527, LR: 0.0003 +[2026-02-28 03:34:31] (step=0009807) Train Loss: 0.4584, Train Steps/Sec: 0.07, Epoch: 1.9188025826648405, LR: 0.0003 +[2026-02-28 03:34:45] (step=0009808) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 1.9189982390921543, LR: 0.0003 +[2026-02-28 03:34:59] (step=0009809) Train Loss: 0.4679, Train Steps/Sec: 0.07, Epoch: 1.9191938955194678, LR: 0.0003 +[2026-02-28 03:35:12] (step=0009810) Train Loss: 0.4633, Train Steps/Sec: 0.07, Epoch: 1.9193895519467814, LR: 0.0003 +[2026-02-28 03:35:26] (step=0009811) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 1.9195852083740952, LR: 0.0003 +[2026-02-28 03:35:40] (step=0009812) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 1.9197808648014087, LR: 0.0003 +[2026-02-28 03:35:54] (step=0009813) Train Loss: 0.4579, Train Steps/Sec: 0.07, Epoch: 1.9199765212287223, LR: 0.0003 +[2026-02-28 03:36:07] (step=0009814) Train Loss: 0.4471, Train Steps/Sec: 0.07, Epoch: 1.920172177656036, LR: 0.0003 +[2026-02-28 03:36:21] (step=0009815) Train Loss: 0.4585, Train Steps/Sec: 0.07, Epoch: 1.9203678340833497, LR: 0.0003 +[2026-02-28 03:36:35] (step=0009816) Train Loss: 0.4396, Train Steps/Sec: 0.07, Epoch: 1.9205634905106632, LR: 0.0003 +[2026-02-28 03:36:48] (step=0009817) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 1.920759146937977, LR: 0.0003 +[2026-02-28 03:37:02] (step=0009818) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 1.9209548033652906, LR: 0.0003 +[2026-02-28 03:37:16] (step=0009819) Train Loss: 0.4591, Train Steps/Sec: 0.07, Epoch: 1.9211504597926041, LR: 0.0003 +[2026-02-28 03:37:30] (step=0009820) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 1.921346116219918, LR: 0.0003 +[2026-02-28 03:37:43] (step=0009821) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 1.9215417726472315, LR: 0.0003 +[2026-02-28 03:37:57] (step=0009822) Train Loss: 0.4599, Train Steps/Sec: 0.07, Epoch: 1.921737429074545, LR: 0.0003 +[2026-02-28 03:38:11] (step=0009823) Train Loss: 0.4430, Train Steps/Sec: 0.07, Epoch: 1.9219330855018588, LR: 0.0003 +[2026-02-28 03:38:24] (step=0009824) Train Loss: 0.4555, Train Steps/Sec: 0.07, Epoch: 1.9221287419291724, LR: 0.0003 +[2026-02-28 03:38:38] (step=0009825) Train Loss: 0.4378, Train Steps/Sec: 0.07, Epoch: 1.922324398356486, LR: 0.0003 +[2026-02-28 03:38:52] (step=0009826) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 1.9225200547837997, LR: 0.0003 +[2026-02-28 03:39:05] (step=0009827) Train Loss: 0.4453, Train Steps/Sec: 0.07, Epoch: 1.9227157112111133, LR: 0.0003 +[2026-02-28 03:39:19] (step=0009828) Train Loss: 0.4481, Train Steps/Sec: 0.07, Epoch: 1.9229113676384268, LR: 0.0003 +[2026-02-28 03:39:33] (step=0009829) Train Loss: 0.4499, Train Steps/Sec: 0.07, Epoch: 1.9231070240657406, LR: 0.0003 +[2026-02-28 03:39:46] (step=0009830) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 1.9233026804930542, LR: 0.0003 +[2026-02-28 03:40:00] (step=0009831) Train Loss: 0.4480, Train Steps/Sec: 0.07, Epoch: 1.9234983369203678, LR: 0.0003 +[2026-02-28 03:40:14] (step=0009832) Train Loss: 0.4645, Train Steps/Sec: 0.07, Epoch: 1.9236939933476815, LR: 0.0003 +[2026-02-28 03:40:28] (step=0009833) Train Loss: 0.4408, Train Steps/Sec: 0.07, Epoch: 1.923889649774995, LR: 0.0003 +[2026-02-28 03:40:41] (step=0009834) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 1.9240853062023087, LR: 0.0003 +[2026-02-28 03:40:55] (step=0009835) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 1.9242809626296224, LR: 0.0003 +[2026-02-28 03:41:09] (step=0009836) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 1.924476619056936, LR: 0.0003 +[2026-02-28 03:41:22] (step=0009837) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 1.9246722754842496, LR: 0.0003 +[2026-02-28 03:41:36] (step=0009838) Train Loss: 0.4615, Train Steps/Sec: 0.07, Epoch: 1.9248679319115634, LR: 0.0003 +[2026-02-28 03:41:50] (step=0009839) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 1.925063588338877, LR: 0.0003 +[2026-02-28 03:42:03] (step=0009840) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 1.9252592447661905, LR: 0.0003 +[2026-02-28 03:42:17] (step=0009841) Train Loss: 0.4473, Train Steps/Sec: 0.07, Epoch: 1.9254549011935043, LR: 0.0003 +[2026-02-28 03:42:31] (step=0009842) Train Loss: 0.4506, Train Steps/Sec: 0.07, Epoch: 1.9256505576208178, LR: 0.0003 +[2026-02-28 03:42:44] (step=0009843) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 1.9258462140481314, LR: 0.0003 +[2026-02-28 03:42:58] (step=0009844) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 1.9260418704754452, LR: 0.0003 +[2026-02-28 03:43:12] (step=0009845) Train Loss: 0.4659, Train Steps/Sec: 0.07, Epoch: 1.9262375269027587, LR: 0.0003 +[2026-02-28 03:43:26] (step=0009846) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 1.9264331833300723, LR: 0.0003 +[2026-02-28 03:43:39] (step=0009847) Train Loss: 0.4599, Train Steps/Sec: 0.07, Epoch: 1.926628839757386, LR: 0.0003 +[2026-02-28 03:43:53] (step=0009848) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 1.9268244961846996, LR: 0.0003 +[2026-02-28 03:44:07] (step=0009849) Train Loss: 0.4758, Train Steps/Sec: 0.07, Epoch: 1.9270201526120132, LR: 0.0003 +[2026-02-28 03:44:20] (step=0009850) Train Loss: 0.4589, Train Steps/Sec: 0.07, Epoch: 1.927215809039327, LR: 0.0003 +[2026-02-28 03:44:34] (step=0009851) Train Loss: 0.4603, Train Steps/Sec: 0.07, Epoch: 1.9274114654666405, LR: 0.0003 +[2026-02-28 03:44:48] (step=0009852) Train Loss: 0.4599, Train Steps/Sec: 0.07, Epoch: 1.9276071218939541, LR: 0.0003 +[2026-02-28 03:45:01] (step=0009853) Train Loss: 0.4441, Train Steps/Sec: 0.07, Epoch: 1.927802778321268, LR: 0.0003 +[2026-02-28 03:45:15] (step=0009854) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 1.9279984347485815, LR: 0.0003 +[2026-02-28 03:45:29] (step=0009855) Train Loss: 0.4461, Train Steps/Sec: 0.07, Epoch: 1.928194091175895, LR: 0.0003 +[2026-02-28 03:45:42] (step=0009856) Train Loss: 0.4489, Train Steps/Sec: 0.07, Epoch: 1.9283897476032088, LR: 0.0003 +[2026-02-28 03:45:56] (step=0009857) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 1.9285854040305224, LR: 0.0003 +[2026-02-28 03:46:10] (step=0009858) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 1.9287810604578361, LR: 0.0003 +[2026-02-28 03:46:24] (step=0009859) Train Loss: 0.4619, Train Steps/Sec: 0.07, Epoch: 1.9289767168851497, LR: 0.0003 +[2026-02-28 03:46:37] (step=0009860) Train Loss: 0.4377, Train Steps/Sec: 0.07, Epoch: 1.9291723733124633, LR: 0.0003 +[2026-02-28 03:46:51] (step=0009861) Train Loss: 0.4466, Train Steps/Sec: 0.07, Epoch: 1.929368029739777, LR: 0.0003 +[2026-02-28 03:47:05] (step=0009862) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 1.9295636861670906, LR: 0.0003 +[2026-02-28 03:47:18] (step=0009863) Train Loss: 0.4426, Train Steps/Sec: 0.07, Epoch: 1.9297593425944042, LR: 0.0003 +[2026-02-28 03:47:32] (step=0009864) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 1.929954999021718, LR: 0.0003 +[2026-02-28 03:47:46] (step=0009865) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 1.9301506554490315, LR: 0.0003 +[2026-02-28 03:47:59] (step=0009866) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 1.930346311876345, LR: 0.0003 +[2026-02-28 03:48:13] (step=0009867) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 1.9305419683036589, LR: 0.0003 +[2026-02-28 03:48:27] (step=0009868) Train Loss: 0.4495, Train Steps/Sec: 0.07, Epoch: 1.9307376247309724, LR: 0.0003 +[2026-02-28 03:48:40] (step=0009869) Train Loss: 0.4509, Train Steps/Sec: 0.07, Epoch: 1.930933281158286, LR: 0.0003 +[2026-02-28 03:48:54] (step=0009870) Train Loss: 0.4444, Train Steps/Sec: 0.07, Epoch: 1.9311289375855998, LR: 0.0003 +[2026-02-28 03:49:08] (step=0009871) Train Loss: 0.4516, Train Steps/Sec: 0.07, Epoch: 1.9313245940129133, LR: 0.0003 +[2026-02-28 03:49:21] (step=0009872) Train Loss: 0.4400, Train Steps/Sec: 0.07, Epoch: 1.931520250440227, LR: 0.0003 +[2026-02-28 03:49:35] (step=0009873) Train Loss: 0.4619, Train Steps/Sec: 0.07, Epoch: 1.9317159068675407, LR: 0.0003 +[2026-02-28 03:49:49] (step=0009874) Train Loss: 0.4435, Train Steps/Sec: 0.07, Epoch: 1.9319115632948543, LR: 0.0003 +[2026-02-28 03:50:02] (step=0009875) Train Loss: 0.4666, Train Steps/Sec: 0.07, Epoch: 1.9321072197221678, LR: 0.0003 +[2026-02-28 03:50:16] (step=0009876) Train Loss: 0.4473, Train Steps/Sec: 0.07, Epoch: 1.9323028761494816, LR: 0.0003 +[2026-02-28 03:50:30] (step=0009877) Train Loss: 0.4554, Train Steps/Sec: 0.07, Epoch: 1.9324985325767952, LR: 0.0003 +[2026-02-28 03:50:43] (step=0009878) Train Loss: 0.4422, Train Steps/Sec: 0.07, Epoch: 1.9326941890041087, LR: 0.0003 +[2026-02-28 03:50:57] (step=0009879) Train Loss: 0.4319, Train Steps/Sec: 0.07, Epoch: 1.9328898454314225, LR: 0.0003 +[2026-02-28 03:51:11] (step=0009880) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 1.933085501858736, LR: 0.0003 +[2026-02-28 03:51:24] (step=0009881) Train Loss: 0.4616, Train Steps/Sec: 0.07, Epoch: 1.9332811582860496, LR: 0.0003 +[2026-02-28 03:51:38] (step=0009882) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 1.9334768147133634, LR: 0.0003 +[2026-02-28 03:51:52] (step=0009883) Train Loss: 0.4457, Train Steps/Sec: 0.07, Epoch: 1.933672471140677, LR: 0.0003 +[2026-02-28 03:52:05] (step=0009884) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 1.9338681275679905, LR: 0.0003 +[2026-02-28 03:52:19] (step=0009885) Train Loss: 0.4345, Train Steps/Sec: 0.07, Epoch: 1.9340637839953043, LR: 0.0003 +[2026-02-28 03:52:32] (step=0009886) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 1.9342594404226179, LR: 0.0003 +[2026-02-28 03:52:46] (step=0009887) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 1.9344550968499314, LR: 0.0003 +[2026-02-28 03:53:00] (step=0009888) Train Loss: 0.4640, Train Steps/Sec: 0.07, Epoch: 1.9346507532772452, LR: 0.0003 +[2026-02-28 03:53:13] (step=0009889) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 1.9348464097045588, LR: 0.0003 +[2026-02-28 03:53:27] (step=0009890) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 1.9350420661318724, LR: 0.0003 +[2026-02-28 03:53:41] (step=0009891) Train Loss: 0.4495, Train Steps/Sec: 0.07, Epoch: 1.9352377225591861, LR: 0.0003 +[2026-02-28 03:53:54] (step=0009892) Train Loss: 0.4578, Train Steps/Sec: 0.07, Epoch: 1.9354333789864997, LR: 0.0003 +[2026-02-28 03:54:08] (step=0009893) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 1.9356290354138133, LR: 0.0003 +[2026-02-28 03:54:22] (step=0009894) Train Loss: 0.4623, Train Steps/Sec: 0.07, Epoch: 1.935824691841127, LR: 0.0003 +[2026-02-28 03:54:35] (step=0009895) Train Loss: 0.4554, Train Steps/Sec: 0.07, Epoch: 1.9360203482684406, LR: 0.0003 +[2026-02-28 03:54:49] (step=0009896) Train Loss: 0.4596, Train Steps/Sec: 0.07, Epoch: 1.9362160046957542, LR: 0.0003 +[2026-02-28 03:55:02] (step=0009897) Train Loss: 0.4609, Train Steps/Sec: 0.07, Epoch: 1.936411661123068, LR: 0.0003 +[2026-02-28 03:55:16] (step=0009898) Train Loss: 0.4578, Train Steps/Sec: 0.07, Epoch: 1.9366073175503815, LR: 0.0003 +[2026-02-28 03:55:30] (step=0009899) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 1.936802973977695, LR: 0.0003 +[2026-02-28 03:55:44] (step=0009900) Train Loss: 0.4651, Train Steps/Sec: 0.07, Epoch: 1.9369986304050089, LR: 0.0003 +[2026-02-28 03:55:57] (step=0009901) Train Loss: 0.4485, Train Steps/Sec: 0.07, Epoch: 1.9371942868323224, LR: 0.0003 +[2026-02-28 03:56:11] (step=0009902) Train Loss: 0.4612, Train Steps/Sec: 0.07, Epoch: 1.937389943259636, LR: 0.0003 +[2026-02-28 03:56:25] (step=0009903) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 1.9375855996869498, LR: 0.0003 +[2026-02-28 03:56:38] (step=0009904) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 1.9377812561142633, LR: 0.0003 +[2026-02-28 03:56:52] (step=0009905) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 1.937976912541577, LR: 0.0003 +[2026-02-28 03:57:06] (step=0009906) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 1.9381725689688907, LR: 0.0003 +[2026-02-28 03:57:19] (step=0009907) Train Loss: 0.4581, Train Steps/Sec: 0.07, Epoch: 1.9383682253962042, LR: 0.0003 +[2026-02-28 03:57:33] (step=0009908) Train Loss: 0.4639, Train Steps/Sec: 0.07, Epoch: 1.9385638818235178, LR: 0.0003 +[2026-02-28 03:57:47] (step=0009909) Train Loss: 0.4604, Train Steps/Sec: 0.07, Epoch: 1.9387595382508316, LR: 0.0003 +[2026-02-28 03:58:00] (step=0009910) Train Loss: 0.4614, Train Steps/Sec: 0.07, Epoch: 1.9389551946781451, LR: 0.0003 +[2026-02-28 03:58:14] (step=0009911) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 1.9391508511054587, LR: 0.0003 +[2026-02-28 03:58:28] (step=0009912) Train Loss: 0.4472, Train Steps/Sec: 0.07, Epoch: 1.9393465075327725, LR: 0.0003 +[2026-02-28 03:58:41] (step=0009913) Train Loss: 0.4509, Train Steps/Sec: 0.07, Epoch: 1.939542163960086, LR: 0.0003 +[2026-02-28 03:58:55] (step=0009914) Train Loss: 0.4458, Train Steps/Sec: 0.07, Epoch: 1.9397378203873998, LR: 0.0003 +[2026-02-28 03:59:08] (step=0009915) Train Loss: 0.4606, Train Steps/Sec: 0.07, Epoch: 1.9399334768147134, LR: 0.0003 +[2026-02-28 03:59:22] (step=0009916) Train Loss: 0.4483, Train Steps/Sec: 0.07, Epoch: 1.940129133242027, LR: 0.0003 +[2026-02-28 03:59:36] (step=0009917) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 1.9403247896693407, LR: 0.0003 +[2026-02-28 03:59:49] (step=0009918) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 1.9405204460966543, LR: 0.0003 +[2026-02-28 04:00:03] (step=0009919) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 1.9407161025239679, LR: 0.0003 +[2026-02-28 04:00:17] (step=0009920) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 1.9409117589512817, LR: 0.0003 +[2026-02-28 04:00:30] (step=0009921) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 1.9411074153785952, LR: 0.0003 +[2026-02-28 04:00:44] (step=0009922) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 1.9413030718059088, LR: 0.0003 +[2026-02-28 04:00:58] (step=0009923) Train Loss: 0.4573, Train Steps/Sec: 0.07, Epoch: 1.9414987282332226, LR: 0.0003 +[2026-02-28 04:01:11] (step=0009924) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 1.9416943846605361, LR: 0.0003 +[2026-02-28 04:01:25] (step=0009925) Train Loss: 0.4390, Train Steps/Sec: 0.07, Epoch: 1.9418900410878497, LR: 0.0003 +[2026-02-28 04:01:39] (step=0009926) Train Loss: 0.4467, Train Steps/Sec: 0.07, Epoch: 1.9420856975151635, LR: 0.0003 +[2026-02-28 04:01:52] (step=0009927) Train Loss: 0.4564, Train Steps/Sec: 0.07, Epoch: 1.942281353942477, LR: 0.0003 +[2026-02-28 04:02:06] (step=0009928) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 1.9424770103697906, LR: 0.0003 +[2026-02-28 04:02:20] (step=0009929) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 1.9426726667971044, LR: 0.0003 +[2026-02-28 04:02:33] (step=0009930) Train Loss: 0.4640, Train Steps/Sec: 0.07, Epoch: 1.942868323224418, LR: 0.0003 +[2026-02-28 04:02:47] (step=0009931) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 1.9430639796517315, LR: 0.0003 +[2026-02-28 04:03:00] (step=0009932) Train Loss: 0.4522, Train Steps/Sec: 0.07, Epoch: 1.9432596360790453, LR: 0.0003 +[2026-02-28 04:03:14] (step=0009933) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 1.9434552925063588, LR: 0.0003 +[2026-02-28 04:03:28] (step=0009934) Train Loss: 0.4634, Train Steps/Sec: 0.07, Epoch: 1.9436509489336724, LR: 0.0003 +[2026-02-28 04:03:41] (step=0009935) Train Loss: 0.4484, Train Steps/Sec: 0.07, Epoch: 1.9438466053609862, LR: 0.0003 +[2026-02-28 04:03:55] (step=0009936) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 1.9440422617882998, LR: 0.0003 +[2026-02-28 04:04:08] (step=0009937) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 1.9442379182156133, LR: 0.0003 +[2026-02-28 04:04:22] (step=0009938) Train Loss: 0.4499, Train Steps/Sec: 0.07, Epoch: 1.944433574642927, LR: 0.0003 +[2026-02-28 04:04:36] (step=0009939) Train Loss: 0.4485, Train Steps/Sec: 0.07, Epoch: 1.9446292310702407, LR: 0.0003 +[2026-02-28 04:04:50] (step=0009940) Train Loss: 0.4532, Train Steps/Sec: 0.07, Epoch: 1.9448248874975542, LR: 0.0003 +[2026-02-28 04:05:04] (step=0009941) Train Loss: 0.4403, Train Steps/Sec: 0.07, Epoch: 1.945020543924868, LR: 0.0003 +[2026-02-28 04:05:17] (step=0009942) Train Loss: 0.4630, Train Steps/Sec: 0.07, Epoch: 1.9452162003521816, LR: 0.0003 +[2026-02-28 04:05:31] (step=0009943) Train Loss: 0.4421, Train Steps/Sec: 0.07, Epoch: 1.9454118567794951, LR: 0.0003 +[2026-02-28 04:05:44] (step=0009944) Train Loss: 0.4581, Train Steps/Sec: 0.07, Epoch: 1.945607513206809, LR: 0.0003 +[2026-02-28 04:05:58] (step=0009945) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 1.9458031696341225, LR: 0.0003 +[2026-02-28 04:06:12] (step=0009946) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 1.945998826061436, LR: 0.0003 +[2026-02-28 04:06:25] (step=0009947) Train Loss: 0.4475, Train Steps/Sec: 0.07, Epoch: 1.9461944824887498, LR: 0.0003 +[2026-02-28 04:06:39] (step=0009948) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 1.9463901389160634, LR: 0.0003 +[2026-02-28 04:06:53] (step=0009949) Train Loss: 0.4585, Train Steps/Sec: 0.07, Epoch: 1.946585795343377, LR: 0.0003 +[2026-02-28 04:07:07] (step=0009950) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 1.9467814517706907, LR: 0.0003 +[2026-02-28 04:07:20] (step=0009951) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 1.9469771081980043, LR: 0.0003 +[2026-02-28 04:07:34] (step=0009952) Train Loss: 0.4430, Train Steps/Sec: 0.07, Epoch: 1.9471727646253179, LR: 0.0003 +[2026-02-28 04:07:48] (step=0009953) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 1.9473684210526316, LR: 0.0003 +[2026-02-28 04:08:02] (step=0009954) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 1.9475640774799452, LR: 0.0003 +[2026-02-28 04:08:15] (step=0009955) Train Loss: 0.4677, Train Steps/Sec: 0.07, Epoch: 1.9477597339072588, LR: 0.0003 +[2026-02-28 04:08:29] (step=0009956) Train Loss: 0.4442, Train Steps/Sec: 0.07, Epoch: 1.9479553903345725, LR: 0.0003 +[2026-02-28 04:08:43] (step=0009957) Train Loss: 0.4682, Train Steps/Sec: 0.07, Epoch: 1.948151046761886, LR: 0.0003 +[2026-02-28 04:08:56] (step=0009958) Train Loss: 0.4573, Train Steps/Sec: 0.07, Epoch: 1.9483467031891997, LR: 0.0003 +[2026-02-28 04:09:10] (step=0009959) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 1.9485423596165135, LR: 0.0003 +[2026-02-28 04:09:24] (step=0009960) Train Loss: 0.4582, Train Steps/Sec: 0.07, Epoch: 1.948738016043827, LR: 0.0003 +[2026-02-28 04:09:37] (step=0009961) Train Loss: 0.4390, Train Steps/Sec: 0.07, Epoch: 1.9489336724711406, LR: 0.0003 +[2026-02-28 04:09:51] (step=0009962) Train Loss: 0.4504, Train Steps/Sec: 0.07, Epoch: 1.9491293288984544, LR: 0.0003 +[2026-02-28 04:10:05] (step=0009963) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 1.949324985325768, LR: 0.0003 +[2026-02-28 04:10:18] (step=0009964) Train Loss: 0.4457, Train Steps/Sec: 0.07, Epoch: 1.9495206417530815, LR: 0.0003 +[2026-02-28 04:10:32] (step=0009965) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 1.9497162981803953, LR: 0.0003 +[2026-02-28 04:10:46] (step=0009966) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 1.9499119546077088, LR: 0.0003 +[2026-02-28 04:11:00] (step=0009967) Train Loss: 0.4448, Train Steps/Sec: 0.07, Epoch: 1.9501076110350224, LR: 0.0003 +[2026-02-28 04:11:13] (step=0009968) Train Loss: 0.4615, Train Steps/Sec: 0.07, Epoch: 1.9503032674623362, LR: 0.0003 +[2026-02-28 04:11:27] (step=0009969) Train Loss: 0.4560, Train Steps/Sec: 0.07, Epoch: 1.9504989238896497, LR: 0.0003 +[2026-02-28 04:11:41] (step=0009970) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 1.9506945803169635, LR: 0.0003 +[2026-02-28 04:11:54] (step=0009971) Train Loss: 0.4578, Train Steps/Sec: 0.07, Epoch: 1.950890236744277, LR: 0.0003 +[2026-02-28 04:12:08] (step=0009972) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 1.9510858931715906, LR: 0.0003 +[2026-02-28 04:12:22] (step=0009973) Train Loss: 0.4448, Train Steps/Sec: 0.07, Epoch: 1.9512815495989044, LR: 0.0003 +[2026-02-28 04:12:35] (step=0009974) Train Loss: 0.4679, Train Steps/Sec: 0.07, Epoch: 1.951477206026218, LR: 0.0003 +[2026-02-28 04:12:49] (step=0009975) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 1.9516728624535316, LR: 0.0003 +[2026-02-28 04:13:03] (step=0009976) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 1.9518685188808453, LR: 0.0003 +[2026-02-28 04:13:16] (step=0009977) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 1.952064175308159, LR: 0.0003 +[2026-02-28 04:13:30] (step=0009978) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 1.9522598317354725, LR: 0.0003 +[2026-02-28 04:13:44] (step=0009979) Train Loss: 0.4433, Train Steps/Sec: 0.07, Epoch: 1.9524554881627862, LR: 0.0003 +[2026-02-28 04:13:57] (step=0009980) Train Loss: 0.4617, Train Steps/Sec: 0.07, Epoch: 1.9526511445900998, LR: 0.0003 +[2026-02-28 04:14:11] (step=0009981) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 1.9528468010174134, LR: 0.0003 +[2026-02-28 04:14:25] (step=0009982) Train Loss: 0.4694, Train Steps/Sec: 0.07, Epoch: 1.9530424574447272, LR: 0.0003 +[2026-02-28 04:14:39] (step=0009983) Train Loss: 0.4608, Train Steps/Sec: 0.07, Epoch: 1.9532381138720407, LR: 0.0003 +[2026-02-28 04:14:53] (step=0009984) Train Loss: 0.4614, Train Steps/Sec: 0.07, Epoch: 1.9534337702993543, LR: 0.0003 +[2026-02-28 04:15:06] (step=0009985) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 1.953629426726668, LR: 0.0003 +[2026-02-28 04:15:20] (step=0009986) Train Loss: 0.4484, Train Steps/Sec: 0.07, Epoch: 1.9538250831539816, LR: 0.0003 +[2026-02-28 04:15:34] (step=0009987) Train Loss: 0.4572, Train Steps/Sec: 0.07, Epoch: 1.9540207395812952, LR: 0.0003 +[2026-02-28 04:15:47] (step=0009988) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 1.954216396008609, LR: 0.0003 +[2026-02-28 04:16:01] (step=0009989) Train Loss: 0.4635, Train Steps/Sec: 0.07, Epoch: 1.9544120524359225, LR: 0.0003 +[2026-02-28 04:16:15] (step=0009990) Train Loss: 0.4556, Train Steps/Sec: 0.07, Epoch: 1.954607708863236, LR: 0.0003 +[2026-02-28 04:16:28] (step=0009991) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 1.9548033652905499, LR: 0.0003 +[2026-02-28 04:16:42] (step=0009992) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 1.9549990217178634, LR: 0.0003 +[2026-02-28 04:16:56] (step=0009993) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 1.955194678145177, LR: 0.0003 +[2026-02-28 04:17:09] (step=0009994) Train Loss: 0.4408, Train Steps/Sec: 0.07, Epoch: 1.9553903345724908, LR: 0.0003 +[2026-02-28 04:17:23] (step=0009995) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 1.9555859909998043, LR: 0.0003 +[2026-02-28 04:17:37] (step=0009996) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 1.955781647427118, LR: 0.0003 +[2026-02-28 04:17:51] (step=0009997) Train Loss: 0.4455, Train Steps/Sec: 0.07, Epoch: 1.9559773038544317, LR: 0.0003 +[2026-02-28 04:18:04] (step=0009998) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 1.9561729602817453, LR: 0.0003 +[2026-02-28 04:18:18] (step=0009999) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 1.9563686167090588, LR: 0.0003 +[2026-02-28 04:18:32] (step=0010000) Train Loss: 0.4636, Train Steps/Sec: 0.07, Epoch: 1.9565642731363726, LR: 0.0003 +[2026-02-28 04:18:32] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0010000/ +[2026-02-28 04:18:46] (step=0010001) Train Loss: 0.4450, Train Steps/Sec: 0.07, Epoch: 1.9567599295636862, LR: 0.0003 +[2026-02-28 04:18:59] (step=0010002) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 1.9569555859909997, LR: 0.0003 +[2026-02-28 04:19:13] (step=0010003) Train Loss: 0.4594, Train Steps/Sec: 0.07, Epoch: 1.9571512424183135, LR: 0.0003 +[2026-02-28 04:19:27] (step=0010004) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 1.957346898845627, LR: 0.0003 +[2026-02-28 04:19:40] (step=0010005) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 1.9575425552729406, LR: 0.0003 +[2026-02-28 04:19:54] (step=0010006) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 1.9577382117002544, LR: 0.0003 +[2026-02-28 04:20:08] (step=0010007) Train Loss: 0.4498, Train Steps/Sec: 0.07, Epoch: 1.957933868127568, LR: 0.0003 +[2026-02-28 04:20:22] (step=0010008) Train Loss: 0.4741, Train Steps/Sec: 0.07, Epoch: 1.9581295245548815, LR: 0.0003 +[2026-02-28 04:20:35] (step=0010009) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 1.9583251809821953, LR: 0.0003 +[2026-02-28 04:20:49] (step=0010010) Train Loss: 0.4380, Train Steps/Sec: 0.07, Epoch: 1.9585208374095089, LR: 0.0003 +[2026-02-28 04:21:03] (step=0010011) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 1.9587164938368224, LR: 0.0003 +[2026-02-28 04:21:17] (step=0010012) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 1.9589121502641362, LR: 0.0003 +[2026-02-28 04:21:30] (step=0010013) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 1.9591078066914498, LR: 0.0003 +[2026-02-28 04:21:44] (step=0010014) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 1.9593034631187634, LR: 0.0003 +[2026-02-28 04:21:58] (step=0010015) Train Loss: 0.4622, Train Steps/Sec: 0.07, Epoch: 1.9594991195460771, LR: 0.0003 +[2026-02-28 04:22:12] (step=0010016) Train Loss: 0.4507, Train Steps/Sec: 0.07, Epoch: 1.9596947759733907, LR: 0.0003 +[2026-02-28 04:22:25] (step=0010017) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 1.9598904324007043, LR: 0.0003 +[2026-02-28 04:22:39] (step=0010018) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 1.960086088828018, LR: 0.0003 +[2026-02-28 04:22:53] (step=0010019) Train Loss: 0.4580, Train Steps/Sec: 0.07, Epoch: 1.9602817452553316, LR: 0.0003 +[2026-02-28 04:23:06] (step=0010020) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 1.9604774016826452, LR: 0.0003 +[2026-02-28 04:23:20] (step=0010021) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 1.960673058109959, LR: 0.0003 +[2026-02-28 04:23:34] (step=0010022) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 1.9608687145372725, LR: 0.0003 +[2026-02-28 04:23:47] (step=0010023) Train Loss: 0.4449, Train Steps/Sec: 0.07, Epoch: 1.961064370964586, LR: 0.0003 +[2026-02-28 04:24:01] (step=0010024) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 1.9612600273918999, LR: 0.0003 +[2026-02-28 04:24:15] (step=0010025) Train Loss: 0.4662, Train Steps/Sec: 0.07, Epoch: 1.9614556838192134, LR: 0.0003 +[2026-02-28 04:24:29] (step=0010026) Train Loss: 0.4440, Train Steps/Sec: 0.07, Epoch: 1.9616513402465272, LR: 0.0003 +[2026-02-28 04:24:43] (step=0010027) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 1.9618469966738408, LR: 0.0003 +[2026-02-28 04:24:56] (step=0010028) Train Loss: 0.4544, Train Steps/Sec: 0.07, Epoch: 1.9620426531011543, LR: 0.0003 +[2026-02-28 04:25:10] (step=0010029) Train Loss: 0.4613, Train Steps/Sec: 0.07, Epoch: 1.9622383095284681, LR: 0.0003 +[2026-02-28 04:25:24] (step=0010030) Train Loss: 0.4450, Train Steps/Sec: 0.07, Epoch: 1.9624339659557817, LR: 0.0003 +[2026-02-28 04:25:37] (step=0010031) Train Loss: 0.4569, Train Steps/Sec: 0.07, Epoch: 1.9626296223830952, LR: 0.0003 +[2026-02-28 04:25:51] (step=0010032) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 1.962825278810409, LR: 0.0003 +[2026-02-28 04:26:05] (step=0010033) Train Loss: 0.4622, Train Steps/Sec: 0.07, Epoch: 1.9630209352377226, LR: 0.0003 +[2026-02-28 04:26:18] (step=0010034) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 1.9632165916650361, LR: 0.0003 +[2026-02-28 04:26:32] (step=0010035) Train Loss: 0.4465, Train Steps/Sec: 0.07, Epoch: 1.96341224809235, LR: 0.0003 +[2026-02-28 04:26:46] (step=0010036) Train Loss: 0.4582, Train Steps/Sec: 0.07, Epoch: 1.9636079045196635, LR: 0.0003 +[2026-02-28 04:27:00] (step=0010037) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 1.963803560946977, LR: 0.0003 +[2026-02-28 04:27:13] (step=0010038) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 1.9639992173742908, LR: 0.0003 +[2026-02-28 04:27:27] (step=0010039) Train Loss: 0.4607, Train Steps/Sec: 0.07, Epoch: 1.9641948738016044, LR: 0.0003 +[2026-02-28 04:27:41] (step=0010040) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 1.964390530228918, LR: 0.0003 +[2026-02-28 04:27:55] (step=0010041) Train Loss: 0.4468, Train Steps/Sec: 0.07, Epoch: 1.9645861866562317, LR: 0.0003 +[2026-02-28 04:28:08] (step=0010042) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 1.9647818430835453, LR: 0.0003 +[2026-02-28 04:28:22] (step=0010043) Train Loss: 0.4639, Train Steps/Sec: 0.07, Epoch: 1.9649774995108589, LR: 0.0003 +[2026-02-28 04:28:36] (step=0010044) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 1.9651731559381727, LR: 0.0003 +[2026-02-28 04:28:50] (step=0010045) Train Loss: 0.4597, Train Steps/Sec: 0.07, Epoch: 1.9653688123654862, LR: 0.0003 +[2026-02-28 04:29:03] (step=0010046) Train Loss: 0.4437, Train Steps/Sec: 0.07, Epoch: 1.9655644687927998, LR: 0.0003 +[2026-02-28 04:29:17] (step=0010047) Train Loss: 0.4431, Train Steps/Sec: 0.07, Epoch: 1.9657601252201136, LR: 0.0003 +[2026-02-28 04:29:31] (step=0010048) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 1.9659557816474271, LR: 0.0003 +[2026-02-28 04:29:44] (step=0010049) Train Loss: 0.4413, Train Steps/Sec: 0.07, Epoch: 1.9661514380747407, LR: 0.0003 +[2026-02-28 04:29:58] (step=0010050) Train Loss: 0.4441, Train Steps/Sec: 0.07, Epoch: 1.9663470945020545, LR: 0.0003 +[2026-02-28 04:30:12] (step=0010051) Train Loss: 0.4481, Train Steps/Sec: 0.07, Epoch: 1.966542750929368, LR: 0.0003 +[2026-02-28 04:30:26] (step=0010052) Train Loss: 0.4661, Train Steps/Sec: 0.07, Epoch: 1.9667384073566816, LR: 0.0003 +[2026-02-28 04:30:39] (step=0010053) Train Loss: 0.4339, Train Steps/Sec: 0.07, Epoch: 1.9669340637839954, LR: 0.0003 +[2026-02-28 04:30:53] (step=0010054) Train Loss: 0.4455, Train Steps/Sec: 0.07, Epoch: 1.967129720211309, LR: 0.0003 +[2026-02-28 04:31:07] (step=0010055) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 1.9673253766386225, LR: 0.0003 +[2026-02-28 04:31:21] (step=0010056) Train Loss: 0.4554, Train Steps/Sec: 0.07, Epoch: 1.9675210330659363, LR: 0.0003 +[2026-02-28 04:31:35] (step=0010057) Train Loss: 0.4466, Train Steps/Sec: 0.07, Epoch: 1.9677166894932498, LR: 0.0003 +[2026-02-28 04:31:48] (step=0010058) Train Loss: 0.4577, Train Steps/Sec: 0.07, Epoch: 1.9679123459205634, LR: 0.0003 +[2026-02-28 04:32:02] (step=0010059) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 1.9681080023478772, LR: 0.0003 +[2026-02-28 04:32:15] (step=0010060) Train Loss: 0.4570, Train Steps/Sec: 0.07, Epoch: 1.9683036587751908, LR: 0.0003 +[2026-02-28 04:32:29] (step=0010061) Train Loss: 0.4489, Train Steps/Sec: 0.07, Epoch: 1.9684993152025043, LR: 0.0003 +[2026-02-28 04:32:43] (step=0010062) Train Loss: 0.4654, Train Steps/Sec: 0.07, Epoch: 1.968694971629818, LR: 0.0003 +[2026-02-28 04:32:57] (step=0010063) Train Loss: 0.4368, Train Steps/Sec: 0.07, Epoch: 1.9688906280571317, LR: 0.0003 +[2026-02-28 04:33:10] (step=0010064) Train Loss: 0.4568, Train Steps/Sec: 0.07, Epoch: 1.9690862844844452, LR: 0.0003 +[2026-02-28 04:33:24] (step=0010065) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 1.969281940911759, LR: 0.0003 +[2026-02-28 04:33:38] (step=0010066) Train Loss: 0.4576, Train Steps/Sec: 0.07, Epoch: 1.9694775973390726, LR: 0.0003 +[2026-02-28 04:33:52] (step=0010067) Train Loss: 0.4675, Train Steps/Sec: 0.07, Epoch: 1.9696732537663861, LR: 0.0003 +[2026-02-28 04:34:05] (step=0010068) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 1.9698689101937, LR: 0.0003 +[2026-02-28 04:34:19] (step=0010069) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 1.9700645666210135, LR: 0.0003 +[2026-02-28 04:34:33] (step=0010070) Train Loss: 0.4472, Train Steps/Sec: 0.07, Epoch: 1.970260223048327, LR: 0.0003 +[2026-02-28 04:34:47] (step=0010071) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 1.9704558794756408, LR: 0.0003 +[2026-02-28 04:35:00] (step=0010072) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 1.9706515359029544, LR: 0.0003 +[2026-02-28 04:35:14] (step=0010073) Train Loss: 0.4409, Train Steps/Sec: 0.07, Epoch: 1.970847192330268, LR: 0.0003 +[2026-02-28 04:35:28] (step=0010074) Train Loss: 0.4376, Train Steps/Sec: 0.07, Epoch: 1.9710428487575817, LR: 0.0003 +[2026-02-28 04:35:41] (step=0010075) Train Loss: 0.4522, Train Steps/Sec: 0.07, Epoch: 1.9712385051848953, LR: 0.0003 +[2026-02-28 04:35:55] (step=0010076) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 1.9714341616122089, LR: 0.0003 +[2026-02-28 04:36:09] (step=0010077) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 1.9716298180395226, LR: 0.0003 +[2026-02-28 04:36:23] (step=0010078) Train Loss: 0.4619, Train Steps/Sec: 0.07, Epoch: 1.9718254744668362, LR: 0.0003 +[2026-02-28 04:36:37] (step=0010079) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 1.9720211308941498, LR: 0.0003 +[2026-02-28 04:36:50] (step=0010080) Train Loss: 0.4621, Train Steps/Sec: 0.07, Epoch: 1.9722167873214635, LR: 0.0003 +[2026-02-28 04:37:04] (step=0010081) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 1.9724124437487771, LR: 0.0003 +[2026-02-28 04:37:18] (step=0010082) Train Loss: 0.4419, Train Steps/Sec: 0.07, Epoch: 1.972608100176091, LR: 0.0003 +[2026-02-28 04:37:32] (step=0010083) Train Loss: 0.4584, Train Steps/Sec: 0.07, Epoch: 1.9728037566034045, LR: 0.0003 +[2026-02-28 04:37:45] (step=0010084) Train Loss: 0.4522, Train Steps/Sec: 0.07, Epoch: 1.972999413030718, LR: 0.0003 +[2026-02-28 04:37:59] (step=0010085) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 1.9731950694580318, LR: 0.0003 +[2026-02-28 04:38:13] (step=0010086) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 1.9733907258853454, LR: 0.0003 +[2026-02-28 04:38:26] (step=0010087) Train Loss: 0.4578, Train Steps/Sec: 0.07, Epoch: 1.973586382312659, LR: 0.0003 +[2026-02-28 04:38:40] (step=0010088) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 1.9737820387399727, LR: 0.0003 +[2026-02-28 04:38:54] (step=0010089) Train Loss: 0.4570, Train Steps/Sec: 0.07, Epoch: 1.9739776951672863, LR: 0.0003 +[2026-02-28 04:39:08] (step=0010090) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 1.9741733515945998, LR: 0.0003 +[2026-02-28 04:39:21] (step=0010091) Train Loss: 0.4468, Train Steps/Sec: 0.07, Epoch: 1.9743690080219136, LR: 0.0003 +[2026-02-28 04:39:35] (step=0010092) Train Loss: 0.4583, Train Steps/Sec: 0.07, Epoch: 1.9745646644492272, LR: 0.0003 +[2026-02-28 04:39:49] (step=0010093) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 1.9747603208765407, LR: 0.0003 +[2026-02-28 04:40:03] (step=0010094) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 1.9749559773038545, LR: 0.0003 +[2026-02-28 04:40:16] (step=0010095) Train Loss: 0.4645, Train Steps/Sec: 0.07, Epoch: 1.975151633731168, LR: 0.0003 +[2026-02-28 04:40:30] (step=0010096) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 1.9753472901584817, LR: 0.0003 +[2026-02-28 04:40:44] (step=0010097) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 1.9755429465857954, LR: 0.0003 +[2026-02-28 04:40:58] (step=0010098) Train Loss: 0.4463, Train Steps/Sec: 0.07, Epoch: 1.975738603013109, LR: 0.0003 +[2026-02-28 04:41:11] (step=0010099) Train Loss: 0.4486, Train Steps/Sec: 0.07, Epoch: 1.9759342594404226, LR: 0.0003 +[2026-02-28 04:41:25] (step=0010100) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 1.9761299158677363, LR: 0.0003 +[2026-02-28 04:41:39] (step=0010101) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 1.97632557229505, LR: 0.0003 +[2026-02-28 04:41:52] (step=0010102) Train Loss: 0.4585, Train Steps/Sec: 0.07, Epoch: 1.9765212287223635, LR: 0.0003 +[2026-02-28 04:42:06] (step=0010103) Train Loss: 0.4576, Train Steps/Sec: 0.07, Epoch: 1.9767168851496772, LR: 0.0003 +[2026-02-28 04:42:20] (step=0010104) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 1.9769125415769908, LR: 0.0003 +[2026-02-28 04:42:34] (step=0010105) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 1.9771081980043044, LR: 0.0003 +[2026-02-28 04:42:47] (step=0010106) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 1.9773038544316182, LR: 0.0003 +[2026-02-28 04:43:01] (step=0010107) Train Loss: 0.4456, Train Steps/Sec: 0.07, Epoch: 1.9774995108589317, LR: 0.0003 +[2026-02-28 04:43:15] (step=0010108) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 1.9776951672862453, LR: 0.0003 +[2026-02-28 04:43:28] (step=0010109) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 1.977890823713559, LR: 0.0003 +[2026-02-28 04:43:42] (step=0010110) Train Loss: 0.4410, Train Steps/Sec: 0.07, Epoch: 1.9780864801408726, LR: 0.0003 +[2026-02-28 04:43:56] (step=0010111) Train Loss: 0.4494, Train Steps/Sec: 0.07, Epoch: 1.9782821365681862, LR: 0.0003 +[2026-02-28 04:44:10] (step=0010112) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 1.9784777929955, LR: 0.0003 +[2026-02-28 04:44:24] (step=0010113) Train Loss: 0.4629, Train Steps/Sec: 0.07, Epoch: 1.9786734494228135, LR: 0.0003 +[2026-02-28 04:44:37] (step=0010114) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 1.978869105850127, LR: 0.0003 +[2026-02-28 04:44:51] (step=0010115) Train Loss: 0.4595, Train Steps/Sec: 0.07, Epoch: 1.9790647622774409, LR: 0.0003 +[2026-02-28 04:45:05] (step=0010116) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 1.9792604187047544, LR: 0.0003 +[2026-02-28 04:45:18] (step=0010117) Train Loss: 0.4439, Train Steps/Sec: 0.07, Epoch: 1.979456075132068, LR: 0.0003 +[2026-02-28 04:45:32] (step=0010118) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 1.9796517315593818, LR: 0.0003 +[2026-02-28 04:45:46] (step=0010119) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 1.9798473879866954, LR: 0.0003 +[2026-02-28 04:46:00] (step=0010120) Train Loss: 0.4620, Train Steps/Sec: 0.07, Epoch: 1.980043044414009, LR: 0.0003 +[2026-02-28 04:46:13] (step=0010121) Train Loss: 0.4592, Train Steps/Sec: 0.07, Epoch: 1.9802387008413227, LR: 0.0003 +[2026-02-28 04:46:27] (step=0010122) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 1.9804343572686363, LR: 0.0003 +[2026-02-28 04:46:41] (step=0010123) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 1.9806300136959498, LR: 0.0003 +[2026-02-28 04:46:55] (step=0010124) Train Loss: 0.4595, Train Steps/Sec: 0.07, Epoch: 1.9808256701232636, LR: 0.0003 +[2026-02-28 04:47:08] (step=0010125) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 1.9810213265505772, LR: 0.0003 +[2026-02-28 04:47:22] (step=0010126) Train Loss: 0.4637, Train Steps/Sec: 0.07, Epoch: 1.9812169829778907, LR: 0.0003 +[2026-02-28 04:47:36] (step=0010127) Train Loss: 0.4690, Train Steps/Sec: 0.07, Epoch: 1.9814126394052045, LR: 0.0003 +[2026-02-28 04:47:50] (step=0010128) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 1.981608295832518, LR: 0.0003 +[2026-02-28 04:48:03] (step=0010129) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 1.9818039522598316, LR: 0.0003 +[2026-02-28 04:48:17] (step=0010130) Train Loss: 0.4429, Train Steps/Sec: 0.07, Epoch: 1.9819996086871454, LR: 0.0003 +[2026-02-28 04:48:31] (step=0010131) Train Loss: 0.4433, Train Steps/Sec: 0.07, Epoch: 1.982195265114459, LR: 0.0003 +[2026-02-28 04:48:44] (step=0010132) Train Loss: 0.4473, Train Steps/Sec: 0.07, Epoch: 1.9823909215417725, LR: 0.0003 +[2026-02-28 04:48:58] (step=0010133) Train Loss: 0.4469, Train Steps/Sec: 0.07, Epoch: 1.9825865779690863, LR: 0.0003 +[2026-02-28 04:49:12] (step=0010134) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 1.9827822343964, LR: 0.0003 +[2026-02-28 04:49:26] (step=0010135) Train Loss: 0.4578, Train Steps/Sec: 0.07, Epoch: 1.9829778908237135, LR: 0.0003 +[2026-02-28 04:49:40] (step=0010136) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 1.9831735472510272, LR: 0.0003 +[2026-02-28 04:49:53] (step=0010137) Train Loss: 0.4509, Train Steps/Sec: 0.07, Epoch: 1.9833692036783408, LR: 0.0003 +[2026-02-28 04:50:07] (step=0010138) Train Loss: 0.4673, Train Steps/Sec: 0.07, Epoch: 1.9835648601056546, LR: 0.0003 +[2026-02-28 04:50:21] (step=0010139) Train Loss: 0.4650, Train Steps/Sec: 0.07, Epoch: 1.9837605165329681, LR: 0.0003 +[2026-02-28 04:50:34] (step=0010140) Train Loss: 0.4504, Train Steps/Sec: 0.07, Epoch: 1.9839561729602817, LR: 0.0003 +[2026-02-28 04:50:48] (step=0010141) Train Loss: 0.4507, Train Steps/Sec: 0.07, Epoch: 1.9841518293875955, LR: 0.0003 +[2026-02-28 04:51:02] (step=0010142) Train Loss: 0.4620, Train Steps/Sec: 0.07, Epoch: 1.984347485814909, LR: 0.0003 +[2026-02-28 04:51:16] (step=0010143) Train Loss: 0.4583, Train Steps/Sec: 0.07, Epoch: 1.9845431422422226, LR: 0.0003 +[2026-02-28 04:51:30] (step=0010144) Train Loss: 0.4423, Train Steps/Sec: 0.07, Epoch: 1.9847387986695364, LR: 0.0003 +[2026-02-28 04:51:43] (step=0010145) Train Loss: 0.4654, Train Steps/Sec: 0.07, Epoch: 1.98493445509685, LR: 0.0003 +[2026-02-28 04:51:57] (step=0010146) Train Loss: 0.4583, Train Steps/Sec: 0.07, Epoch: 1.9851301115241635, LR: 0.0003 +[2026-02-28 04:52:11] (step=0010147) Train Loss: 0.4463, Train Steps/Sec: 0.07, Epoch: 1.9853257679514773, LR: 0.0003 +[2026-02-28 04:52:25] (step=0010148) Train Loss: 0.4574, Train Steps/Sec: 0.07, Epoch: 1.9855214243787909, LR: 0.0003 +[2026-02-28 04:52:38] (step=0010149) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 1.9857170808061044, LR: 0.0003 +[2026-02-28 04:52:52] (step=0010150) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 1.9859127372334182, LR: 0.0003 +[2026-02-28 04:53:06] (step=0010151) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 1.9861083936607318, LR: 0.0003 +[2026-02-28 04:53:19] (step=0010152) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 1.9863040500880453, LR: 0.0003 +[2026-02-28 04:53:33] (step=0010153) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 1.9864997065153591, LR: 0.0003 +[2026-02-28 04:53:47] (step=0010154) Train Loss: 0.4410, Train Steps/Sec: 0.07, Epoch: 1.9866953629426727, LR: 0.0003 +[2026-02-28 04:54:00] (step=0010155) Train Loss: 0.4646, Train Steps/Sec: 0.07, Epoch: 1.9868910193699862, LR: 0.0003 +[2026-02-28 04:54:14] (step=0010156) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 1.9870866757973, LR: 0.0003 +[2026-02-28 04:54:28] (step=0010157) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 1.9872823322246136, LR: 0.0003 +[2026-02-28 04:54:42] (step=0010158) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 1.9874779886519272, LR: 0.0003 +[2026-02-28 04:54:55] (step=0010159) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 1.987673645079241, LR: 0.0003 +[2026-02-28 04:55:09] (step=0010160) Train Loss: 0.4660, Train Steps/Sec: 0.07, Epoch: 1.9878693015065545, LR: 0.0003 +[2026-02-28 04:55:23] (step=0010161) Train Loss: 0.4446, Train Steps/Sec: 0.07, Epoch: 1.988064957933868, LR: 0.0003 +[2026-02-28 04:55:37] (step=0010162) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 1.9882606143611818, LR: 0.0003 +[2026-02-28 04:55:50] (step=0010163) Train Loss: 0.4570, Train Steps/Sec: 0.07, Epoch: 1.9884562707884954, LR: 0.0003 +[2026-02-28 04:56:04] (step=0010164) Train Loss: 0.4401, Train Steps/Sec: 0.07, Epoch: 1.988651927215809, LR: 0.0003 +[2026-02-28 04:56:18] (step=0010165) Train Loss: 0.4645, Train Steps/Sec: 0.07, Epoch: 1.9888475836431228, LR: 0.0003 +[2026-02-28 04:56:32] (step=0010166) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 1.9890432400704363, LR: 0.0003 +[2026-02-28 04:56:45] (step=0010167) Train Loss: 0.4554, Train Steps/Sec: 0.07, Epoch: 1.9892388964977499, LR: 0.0003 +[2026-02-28 04:56:59] (step=0010168) Train Loss: 0.4585, Train Steps/Sec: 0.07, Epoch: 1.9894345529250637, LR: 0.0003 +[2026-02-28 04:57:13] (step=0010169) Train Loss: 0.4597, Train Steps/Sec: 0.07, Epoch: 1.9896302093523772, LR: 0.0003 +[2026-02-28 04:57:26] (step=0010170) Train Loss: 0.4420, Train Steps/Sec: 0.07, Epoch: 1.9898258657796908, LR: 0.0003 +[2026-02-28 04:57:40] (step=0010171) Train Loss: 0.4452, Train Steps/Sec: 0.07, Epoch: 1.9900215222070046, LR: 0.0003 +[2026-02-28 04:57:54] (step=0010172) Train Loss: 0.4516, Train Steps/Sec: 0.07, Epoch: 1.9902171786343181, LR: 0.0003 +[2026-02-28 04:58:08] (step=0010173) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 1.9904128350616317, LR: 0.0003 +[2026-02-28 04:58:22] (step=0010174) Train Loss: 0.4664, Train Steps/Sec: 0.07, Epoch: 1.9906084914889455, LR: 0.0003 +[2026-02-28 04:58:35] (step=0010175) Train Loss: 0.4580, Train Steps/Sec: 0.07, Epoch: 1.990804147916259, LR: 0.0003 +[2026-02-28 04:58:49] (step=0010176) Train Loss: 0.4495, Train Steps/Sec: 0.07, Epoch: 1.9909998043435726, LR: 0.0003 +[2026-02-28 04:59:03] (step=0010177) Train Loss: 0.4592, Train Steps/Sec: 0.07, Epoch: 1.9911954607708864, LR: 0.0003 +[2026-02-28 04:59:16] (step=0010178) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 1.9913911171982, LR: 0.0003 +[2026-02-28 04:59:30] (step=0010179) Train Loss: 0.4350, Train Steps/Sec: 0.07, Epoch: 1.9915867736255135, LR: 0.0003 +[2026-02-28 04:59:44] (step=0010180) Train Loss: 0.4563, Train Steps/Sec: 0.07, Epoch: 1.9917824300528273, LR: 0.0003 +[2026-02-28 04:59:58] (step=0010181) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 1.9919780864801409, LR: 0.0003 +[2026-02-28 05:00:11] (step=0010182) Train Loss: 0.4448, Train Steps/Sec: 0.07, Epoch: 1.9921737429074544, LR: 0.0003 +[2026-02-28 05:00:25] (step=0010183) Train Loss: 0.4453, Train Steps/Sec: 0.07, Epoch: 1.9923693993347682, LR: 0.0003 +[2026-02-28 05:00:39] (step=0010184) Train Loss: 0.4569, Train Steps/Sec: 0.07, Epoch: 1.9925650557620818, LR: 0.0003 +[2026-02-28 05:00:53] (step=0010185) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 1.9927607121893953, LR: 0.0003 +[2026-02-28 05:01:06] (step=0010186) Train Loss: 0.4599, Train Steps/Sec: 0.07, Epoch: 1.992956368616709, LR: 0.0003 +[2026-02-28 05:01:20] (step=0010187) Train Loss: 0.4569, Train Steps/Sec: 0.07, Epoch: 1.9931520250440227, LR: 0.0003 +[2026-02-28 05:01:34] (step=0010188) Train Loss: 0.4626, Train Steps/Sec: 0.07, Epoch: 1.9933476814713362, LR: 0.0003 +[2026-02-28 05:01:48] (step=0010189) Train Loss: 0.4486, Train Steps/Sec: 0.07, Epoch: 1.99354333789865, LR: 0.0003 +[2026-02-28 05:02:01] (step=0010190) Train Loss: 0.4563, Train Steps/Sec: 0.07, Epoch: 1.9937389943259636, LR: 0.0003 +[2026-02-28 05:02:15] (step=0010191) Train Loss: 0.4438, Train Steps/Sec: 0.07, Epoch: 1.9939346507532771, LR: 0.0003 +[2026-02-28 05:02:29] (step=0010192) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 1.994130307180591, LR: 0.0003 +[2026-02-28 05:02:42] (step=0010193) Train Loss: 0.4599, Train Steps/Sec: 0.07, Epoch: 1.9943259636079045, LR: 0.0003 +[2026-02-28 05:02:56] (step=0010194) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 1.9945216200352183, LR: 0.0003 +[2026-02-28 05:03:10] (step=0010195) Train Loss: 0.4475, Train Steps/Sec: 0.07, Epoch: 1.9947172764625318, LR: 0.0003 +[2026-02-28 05:03:23] (step=0010196) Train Loss: 0.4423, Train Steps/Sec: 0.07, Epoch: 1.9949129328898454, LR: 0.0003 +[2026-02-28 05:03:37] (step=0010197) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 1.9951085893171592, LR: 0.0003 +[2026-02-28 05:03:51] (step=0010198) Train Loss: 0.4522, Train Steps/Sec: 0.07, Epoch: 1.9953042457444727, LR: 0.0003 +[2026-02-28 05:04:05] (step=0010199) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 1.9954999021717863, LR: 0.0003 +[2026-02-28 05:04:18] (step=0010200) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 1.9956955585991, LR: 0.0003 +[2026-02-28 05:04:32] (step=0010201) Train Loss: 0.4579, Train Steps/Sec: 0.07, Epoch: 1.9958912150264136, LR: 0.0003 +[2026-02-28 05:04:46] (step=0010202) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 1.9960868714537272, LR: 0.0003 +[2026-02-28 05:05:00] (step=0010203) Train Loss: 0.4490, Train Steps/Sec: 0.07, Epoch: 1.996282527881041, LR: 0.0003 +[2026-02-28 05:05:13] (step=0010204) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 1.9964781843083546, LR: 0.0003 +[2026-02-28 05:05:27] (step=0010205) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 1.9966738407356681, LR: 0.0003 +[2026-02-28 05:05:41] (step=0010206) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 1.996869497162982, LR: 0.0003 +[2026-02-28 05:05:54] (step=0010207) Train Loss: 0.4647, Train Steps/Sec: 0.07, Epoch: 1.9970651535902955, LR: 0.0003 +[2026-02-28 05:06:08] (step=0010208) Train Loss: 0.4476, Train Steps/Sec: 0.07, Epoch: 1.997260810017609, LR: 0.0003 +[2026-02-28 05:06:22] (step=0010209) Train Loss: 0.4522, Train Steps/Sec: 0.07, Epoch: 1.9974564664449228, LR: 0.0003 +[2026-02-28 05:06:36] (step=0010210) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 1.9976521228722364, LR: 0.0003 +[2026-02-28 05:06:49] (step=0010211) Train Loss: 0.4596, Train Steps/Sec: 0.07, Epoch: 1.99784777929955, LR: 0.0003 +[2026-02-28 05:07:03] (step=0010212) Train Loss: 0.4490, Train Steps/Sec: 0.07, Epoch: 1.9980434357268637, LR: 0.0003 +[2026-02-28 05:07:17] (step=0010213) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 1.9982390921541773, LR: 0.0003 +[2026-02-28 05:07:31] (step=0010214) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 1.9984347485814908, LR: 0.0003 +[2026-02-28 05:07:45] (step=0010215) Train Loss: 0.4470, Train Steps/Sec: 0.07, Epoch: 1.9986304050088046, LR: 0.0003 +[2026-02-28 05:07:58] (step=0010216) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 1.9988260614361182, LR: 0.0003 +[2026-02-28 05:08:12] (step=0010217) Train Loss: 0.4449, Train Steps/Sec: 0.07, Epoch: 1.9990217178634317, LR: 0.0003 +[2026-02-28 05:08:26] (step=0010218) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 1.9992173742907455, LR: 0.0003 +[2026-02-28 05:08:39] (step=0010219) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 1.999413030718059, LR: 0.0003 +[2026-02-28 05:08:53] (step=0010220) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 1.9996086871453727, LR: 0.0003 +[2026-02-28 05:09:07] (step=0010221) Train Loss: 0.4494, Train Steps/Sec: 0.07, Epoch: 1.9998043435726864, LR: 0.0003 +[2026-02-28 05:09:21] (step=0010222) Train Loss: 0.4441, Train Steps/Sec: 0.07, Epoch: 2.0, LR: 0.0003 +[2026-02-28 05:09:21] Beginning epoch 2... +[2026-02-28 05:09:36] (step=0010223) Train Loss: 0.4469, Train Steps/Sec: 0.06, Epoch: 2.0001956564273136, LR: 0.0003 +[2026-02-28 05:09:50] (step=0010224) Train Loss: 0.4413, Train Steps/Sec: 0.07, Epoch: 2.000391312854627, LR: 0.0003 +[2026-02-28 05:10:04] (step=0010225) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.000586969281941, LR: 0.0003 +[2026-02-28 05:10:17] (step=0010226) Train Loss: 0.4414, Train Steps/Sec: 0.07, Epoch: 2.0007826257092547, LR: 0.0003 +[2026-02-28 05:10:31] (step=0010227) Train Loss: 0.4555, Train Steps/Sec: 0.07, Epoch: 2.0009782821365683, LR: 0.0003 +[2026-02-28 05:10:45] (step=0010228) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 2.001173938563882, LR: 0.0003 +[2026-02-28 05:10:58] (step=0010229) Train Loss: 0.4576, Train Steps/Sec: 0.07, Epoch: 2.0013695949911954, LR: 0.0003 +[2026-02-28 05:11:12] (step=0010230) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 2.001565251418509, LR: 0.0003 +[2026-02-28 05:11:26] (step=0010231) Train Loss: 0.4570, Train Steps/Sec: 0.07, Epoch: 2.001760907845823, LR: 0.0003 +[2026-02-28 05:11:39] (step=0010232) Train Loss: 0.4554, Train Steps/Sec: 0.07, Epoch: 2.0019565642731365, LR: 0.0003 +[2026-02-28 05:11:53] (step=0010233) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 2.00215222070045, LR: 0.0003 +[2026-02-28 05:12:07] (step=0010234) Train Loss: 0.4342, Train Steps/Sec: 0.07, Epoch: 2.0023478771277636, LR: 0.0003 +[2026-02-28 05:12:20] (step=0010235) Train Loss: 0.4632, Train Steps/Sec: 0.07, Epoch: 2.002543533555077, LR: 0.0003 +[2026-02-28 05:12:34] (step=0010236) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 2.0027391899823908, LR: 0.0003 +[2026-02-28 05:12:48] (step=0010237) Train Loss: 0.4584, Train Steps/Sec: 0.07, Epoch: 2.0029348464097048, LR: 0.0003 +[2026-02-28 05:13:02] (step=0010238) Train Loss: 0.4447, Train Steps/Sec: 0.07, Epoch: 2.0031305028370183, LR: 0.0003 +[2026-02-28 05:13:16] (step=0010239) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 2.003326159264332, LR: 0.0003 +[2026-02-28 05:13:29] (step=0010240) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 2.0035218156916454, LR: 0.0003 +[2026-02-28 05:13:43] (step=0010241) Train Loss: 0.4499, Train Steps/Sec: 0.07, Epoch: 2.003717472118959, LR: 0.0003 +[2026-02-28 05:13:56] (step=0010242) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 2.0039131285462726, LR: 0.0003 +[2026-02-28 05:14:10] (step=0010243) Train Loss: 0.4383, Train Steps/Sec: 0.07, Epoch: 2.0041087849735866, LR: 0.0003 +[2026-02-28 05:14:24] (step=0010244) Train Loss: 0.4528, Train Steps/Sec: 0.07, Epoch: 2.0043044414009, LR: 0.0003 +[2026-02-28 05:14:37] (step=0010245) Train Loss: 0.4572, Train Steps/Sec: 0.07, Epoch: 2.0045000978282137, LR: 0.0003 +[2026-02-28 05:14:51] (step=0010246) Train Loss: 0.4574, Train Steps/Sec: 0.07, Epoch: 2.0046957542555273, LR: 0.0003 +[2026-02-28 05:15:05] (step=0010247) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 2.004891410682841, LR: 0.0003 +[2026-02-28 05:15:18] (step=0010248) Train Loss: 0.4628, Train Steps/Sec: 0.07, Epoch: 2.0050870671101544, LR: 0.0003 +[2026-02-28 05:15:32] (step=0010249) Train Loss: 0.4588, Train Steps/Sec: 0.07, Epoch: 2.0052827235374684, LR: 0.0003 +[2026-02-28 05:15:46] (step=0010250) Train Loss: 0.4498, Train Steps/Sec: 0.07, Epoch: 2.005478379964782, LR: 0.0003 +[2026-02-28 05:16:00] (step=0010251) Train Loss: 0.4634, Train Steps/Sec: 0.07, Epoch: 2.0056740363920955, LR: 0.0003 +[2026-02-28 05:16:13] (step=0010252) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 2.005869692819409, LR: 0.0003 +[2026-02-28 05:16:27] (step=0010253) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 2.0060653492467226, LR: 0.0003 +[2026-02-28 05:16:41] (step=0010254) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 2.006261005674036, LR: 0.0003 +[2026-02-28 05:16:55] (step=0010255) Train Loss: 0.4555, Train Steps/Sec: 0.07, Epoch: 2.00645666210135, LR: 0.0003 +[2026-02-28 05:17:08] (step=0010256) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 2.0066523185286638, LR: 0.0003 +[2026-02-28 05:17:22] (step=0010257) Train Loss: 0.4613, Train Steps/Sec: 0.07, Epoch: 2.0068479749559773, LR: 0.0003 +[2026-02-28 05:17:35] (step=0010258) Train Loss: 0.4485, Train Steps/Sec: 0.07, Epoch: 2.007043631383291, LR: 0.0003 +[2026-02-28 05:17:49] (step=0010259) Train Loss: 0.4475, Train Steps/Sec: 0.07, Epoch: 2.0072392878106045, LR: 0.0003 +[2026-02-28 05:18:03] (step=0010260) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 2.007434944237918, LR: 0.0003 +[2026-02-28 05:18:16] (step=0010261) Train Loss: 0.4579, Train Steps/Sec: 0.07, Epoch: 2.007630600665232, LR: 0.0003 +[2026-02-28 05:18:30] (step=0010262) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 2.0078262570925456, LR: 0.0003 +[2026-02-28 05:18:44] (step=0010263) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 2.008021913519859, LR: 0.0003 +[2026-02-28 05:18:58] (step=0010264) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 2.0082175699471727, LR: 0.0003 +[2026-02-28 05:19:11] (step=0010265) Train Loss: 0.4507, Train Steps/Sec: 0.07, Epoch: 2.0084132263744863, LR: 0.0003 +[2026-02-28 05:19:25] (step=0010266) Train Loss: 0.4635, Train Steps/Sec: 0.07, Epoch: 2.0086088828018, LR: 0.0003 +[2026-02-28 05:19:39] (step=0010267) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 2.008804539229114, LR: 0.0003 +[2026-02-28 05:19:52] (step=0010268) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.0090001956564274, LR: 0.0003 +[2026-02-28 05:20:06] (step=0010269) Train Loss: 0.4480, Train Steps/Sec: 0.07, Epoch: 2.009195852083741, LR: 0.0003 +[2026-02-28 05:20:20] (step=0010270) Train Loss: 0.4499, Train Steps/Sec: 0.07, Epoch: 2.0093915085110545, LR: 0.0003 +[2026-02-28 05:20:34] (step=0010271) Train Loss: 0.4528, Train Steps/Sec: 0.07, Epoch: 2.009587164938368, LR: 0.0003 +[2026-02-28 05:20:47] (step=0010272) Train Loss: 0.4504, Train Steps/Sec: 0.07, Epoch: 2.0097828213656816, LR: 0.0003 +[2026-02-28 05:21:01] (step=0010273) Train Loss: 0.4490, Train Steps/Sec: 0.07, Epoch: 2.0099784777929957, LR: 0.0003 +[2026-02-28 05:21:14] (step=0010274) Train Loss: 0.4657, Train Steps/Sec: 0.07, Epoch: 2.010174134220309, LR: 0.0003 +[2026-02-28 05:21:28] (step=0010275) Train Loss: 0.4572, Train Steps/Sec: 0.07, Epoch: 2.010369790647623, LR: 0.0003 +[2026-02-28 05:21:42] (step=0010276) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 2.0105654470749363, LR: 0.0003 +[2026-02-28 05:21:56] (step=0010277) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 2.01076110350225, LR: 0.0003 +[2026-02-28 05:22:09] (step=0010278) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.0109567599295635, LR: 0.0003 +[2026-02-28 05:22:23] (step=0010279) Train Loss: 0.4465, Train Steps/Sec: 0.07, Epoch: 2.0111524163568775, LR: 0.0003 +[2026-02-28 05:22:37] (step=0010280) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 2.011348072784191, LR: 0.0003 +[2026-02-28 05:22:50] (step=0010281) Train Loss: 0.4592, Train Steps/Sec: 0.07, Epoch: 2.0115437292115046, LR: 0.0003 +[2026-02-28 05:23:04] (step=0010282) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 2.011739385638818, LR: 0.0003 +[2026-02-28 05:23:18] (step=0010283) Train Loss: 0.4446, Train Steps/Sec: 0.07, Epoch: 2.0119350420661317, LR: 0.0003 +[2026-02-28 05:23:32] (step=0010284) Train Loss: 0.4554, Train Steps/Sec: 0.07, Epoch: 2.0121306984934457, LR: 0.0003 +[2026-02-28 05:23:46] (step=0010285) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 2.0123263549207593, LR: 0.0003 +[2026-02-28 05:23:59] (step=0010286) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 2.012522011348073, LR: 0.0003 +[2026-02-28 05:24:13] (step=0010287) Train Loss: 0.4452, Train Steps/Sec: 0.07, Epoch: 2.0127176677753864, LR: 0.0003 +[2026-02-28 05:24:26] (step=0010288) Train Loss: 0.4356, Train Steps/Sec: 0.07, Epoch: 2.0129133242027, LR: 0.0003 +[2026-02-28 05:24:40] (step=0010289) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 2.0131089806300135, LR: 0.0003 +[2026-02-28 05:24:54] (step=0010290) Train Loss: 0.4603, Train Steps/Sec: 0.07, Epoch: 2.0133046370573275, LR: 0.0003 +[2026-02-28 05:25:07] (step=0010291) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 2.013500293484641, LR: 0.0003 +[2026-02-28 05:25:21] (step=0010292) Train Loss: 0.4433, Train Steps/Sec: 0.07, Epoch: 2.0136959499119547, LR: 0.0003 +[2026-02-28 05:25:35] (step=0010293) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 2.0138916063392682, LR: 0.0003 +[2026-02-28 05:25:49] (step=0010294) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 2.014087262766582, LR: 0.0003 +[2026-02-28 05:26:02] (step=0010295) Train Loss: 0.4570, Train Steps/Sec: 0.07, Epoch: 2.0142829191938953, LR: 0.0003 +[2026-02-28 05:26:16] (step=0010296) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 2.0144785756212094, LR: 0.0003 +[2026-02-28 05:26:30] (step=0010297) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 2.014674232048523, LR: 0.0003 +[2026-02-28 05:26:44] (step=0010298) Train Loss: 0.4617, Train Steps/Sec: 0.07, Epoch: 2.0148698884758365, LR: 0.0003 +[2026-02-28 05:26:57] (step=0010299) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.01506554490315, LR: 0.0003 +[2026-02-28 05:27:11] (step=0010300) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 2.0152612013304636, LR: 0.0003 +[2026-02-28 05:27:25] (step=0010301) Train Loss: 0.4672, Train Steps/Sec: 0.07, Epoch: 2.015456857757777, LR: 0.0003 +[2026-02-28 05:27:38] (step=0010302) Train Loss: 0.4569, Train Steps/Sec: 0.07, Epoch: 2.015652514185091, LR: 0.0003 +[2026-02-28 05:27:52] (step=0010303) Train Loss: 0.4615, Train Steps/Sec: 0.07, Epoch: 2.0158481706124047, LR: 0.0003 +[2026-02-28 05:28:06] (step=0010304) Train Loss: 0.4601, Train Steps/Sec: 0.07, Epoch: 2.0160438270397183, LR: 0.0003 +[2026-02-28 05:28:19] (step=0010305) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 2.016239483467032, LR: 0.0003 +[2026-02-28 05:28:33] (step=0010306) Train Loss: 0.4507, Train Steps/Sec: 0.07, Epoch: 2.0164351398943454, LR: 0.0003 +[2026-02-28 05:28:47] (step=0010307) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 2.016630796321659, LR: 0.0003 +[2026-02-28 05:29:00] (step=0010308) Train Loss: 0.4595, Train Steps/Sec: 0.07, Epoch: 2.016826452748973, LR: 0.0003 +[2026-02-28 05:29:14] (step=0010309) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.0170221091762865, LR: 0.0003 +[2026-02-28 05:29:28] (step=0010310) Train Loss: 0.4623, Train Steps/Sec: 0.07, Epoch: 2.0172177656036, LR: 0.0003 +[2026-02-28 05:29:41] (step=0010311) Train Loss: 0.4443, Train Steps/Sec: 0.07, Epoch: 2.0174134220309137, LR: 0.0003 +[2026-02-28 05:29:55] (step=0010312) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 2.0176090784582272, LR: 0.0003 +[2026-02-28 05:30:09] (step=0010313) Train Loss: 0.4636, Train Steps/Sec: 0.07, Epoch: 2.017804734885541, LR: 0.0003 +[2026-02-28 05:30:23] (step=0010314) Train Loss: 0.4630, Train Steps/Sec: 0.07, Epoch: 2.018000391312855, LR: 0.0003 +[2026-02-28 05:30:36] (step=0010315) Train Loss: 0.4577, Train Steps/Sec: 0.07, Epoch: 2.0181960477401684, LR: 0.0003 +[2026-02-28 05:30:50] (step=0010316) Train Loss: 0.4447, Train Steps/Sec: 0.07, Epoch: 2.018391704167482, LR: 0.0003 +[2026-02-28 05:31:04] (step=0010317) Train Loss: 0.4629, Train Steps/Sec: 0.07, Epoch: 2.0185873605947955, LR: 0.0003 +[2026-02-28 05:31:17] (step=0010318) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 2.018783017022109, LR: 0.0003 +[2026-02-28 05:31:31] (step=0010319) Train Loss: 0.4596, Train Steps/Sec: 0.07, Epoch: 2.0189786734494226, LR: 0.0003 +[2026-02-28 05:31:44] (step=0010320) Train Loss: 0.4532, Train Steps/Sec: 0.07, Epoch: 2.0191743298767366, LR: 0.0003 +[2026-02-28 05:31:58] (step=0010321) Train Loss: 0.4583, Train Steps/Sec: 0.07, Epoch: 2.01936998630405, LR: 0.0003 +[2026-02-28 05:32:12] (step=0010322) Train Loss: 0.4676, Train Steps/Sec: 0.07, Epoch: 2.0195656427313637, LR: 0.0003 +[2026-02-28 05:32:26] (step=0010323) Train Loss: 0.4587, Train Steps/Sec: 0.07, Epoch: 2.0197612991586773, LR: 0.0003 +[2026-02-28 05:32:39] (step=0010324) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 2.019956955585991, LR: 0.0003 +[2026-02-28 05:32:53] (step=0010325) Train Loss: 0.4348, Train Steps/Sec: 0.07, Epoch: 2.0201526120133044, LR: 0.0003 +[2026-02-28 05:33:07] (step=0010326) Train Loss: 0.4593, Train Steps/Sec: 0.07, Epoch: 2.0203482684406184, LR: 0.0003 +[2026-02-28 05:33:21] (step=0010327) Train Loss: 0.4399, Train Steps/Sec: 0.07, Epoch: 2.020543924867932, LR: 0.0003 +[2026-02-28 05:33:34] (step=0010328) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 2.0207395812952456, LR: 0.0003 +[2026-02-28 05:33:48] (step=0010329) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 2.020935237722559, LR: 0.0003 +[2026-02-28 05:34:02] (step=0010330) Train Loss: 0.4447, Train Steps/Sec: 0.07, Epoch: 2.0211308941498727, LR: 0.0003 +[2026-02-28 05:34:15] (step=0010331) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 2.0213265505771862, LR: 0.0003 +[2026-02-28 05:34:29] (step=0010332) Train Loss: 0.4480, Train Steps/Sec: 0.07, Epoch: 2.0215222070045002, LR: 0.0003 +[2026-02-28 05:34:43] (step=0010333) Train Loss: 0.4740, Train Steps/Sec: 0.07, Epoch: 2.021717863431814, LR: 0.0003 +[2026-02-28 05:34:56] (step=0010334) Train Loss: 0.4663, Train Steps/Sec: 0.07, Epoch: 2.0219135198591274, LR: 0.0003 +[2026-02-28 05:35:10] (step=0010335) Train Loss: 0.4600, Train Steps/Sec: 0.07, Epoch: 2.022109176286441, LR: 0.0003 +[2026-02-28 05:35:24] (step=0010336) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 2.0223048327137545, LR: 0.0003 +[2026-02-28 05:35:37] (step=0010337) Train Loss: 0.4472, Train Steps/Sec: 0.07, Epoch: 2.0225004891410685, LR: 0.0003 +[2026-02-28 05:35:51] (step=0010338) Train Loss: 0.4476, Train Steps/Sec: 0.07, Epoch: 2.022696145568382, LR: 0.0003 +[2026-02-28 05:36:05] (step=0010339) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 2.0228918019956956, LR: 0.0003 +[2026-02-28 05:36:18] (step=0010340) Train Loss: 0.4490, Train Steps/Sec: 0.07, Epoch: 2.023087458423009, LR: 0.0003 +[2026-02-28 05:36:32] (step=0010341) Train Loss: 0.4433, Train Steps/Sec: 0.07, Epoch: 2.0232831148503228, LR: 0.0003 +[2026-02-28 05:36:46] (step=0010342) Train Loss: 0.4628, Train Steps/Sec: 0.07, Epoch: 2.0234787712776363, LR: 0.0003 +[2026-02-28 05:37:00] (step=0010343) Train Loss: 0.4628, Train Steps/Sec: 0.07, Epoch: 2.0236744277049503, LR: 0.0003 +[2026-02-28 05:37:13] (step=0010344) Train Loss: 0.4507, Train Steps/Sec: 0.07, Epoch: 2.023870084132264, LR: 0.0003 +[2026-02-28 05:37:27] (step=0010345) Train Loss: 0.4444, Train Steps/Sec: 0.07, Epoch: 2.0240657405595774, LR: 0.0003 +[2026-02-28 05:37:41] (step=0010346) Train Loss: 0.4617, Train Steps/Sec: 0.07, Epoch: 2.024261396986891, LR: 0.0003 +[2026-02-28 05:37:55] (step=0010347) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.0244570534142046, LR: 0.0003 +[2026-02-28 05:38:08] (step=0010348) Train Loss: 0.4498, Train Steps/Sec: 0.07, Epoch: 2.024652709841518, LR: 0.0003 +[2026-02-28 05:38:22] (step=0010349) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 2.024848366268832, LR: 0.0003 +[2026-02-28 05:38:35] (step=0010350) Train Loss: 0.4476, Train Steps/Sec: 0.07, Epoch: 2.0250440226961457, LR: 0.0003 +[2026-02-28 05:38:49] (step=0010351) Train Loss: 0.4700, Train Steps/Sec: 0.07, Epoch: 2.0252396791234593, LR: 0.0003 +[2026-02-28 05:39:03] (step=0010352) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.025435335550773, LR: 0.0003 +[2026-02-28 05:39:17] (step=0010353) Train Loss: 0.4498, Train Steps/Sec: 0.07, Epoch: 2.0256309919780864, LR: 0.0003 +[2026-02-28 05:39:30] (step=0010354) Train Loss: 0.4428, Train Steps/Sec: 0.07, Epoch: 2.0258266484054, LR: 0.0003 +[2026-02-28 05:39:44] (step=0010355) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 2.026022304832714, LR: 0.0003 +[2026-02-28 05:39:58] (step=0010356) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.0262179612600275, LR: 0.0003 +[2026-02-28 05:40:11] (step=0010357) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 2.026413617687341, LR: 0.0003 +[2026-02-28 05:40:25] (step=0010358) Train Loss: 0.4610, Train Steps/Sec: 0.07, Epoch: 2.0266092741146546, LR: 0.0003 +[2026-02-28 05:40:39] (step=0010359) Train Loss: 0.4623, Train Steps/Sec: 0.07, Epoch: 2.026804930541968, LR: 0.0003 +[2026-02-28 05:40:53] (step=0010360) Train Loss: 0.4564, Train Steps/Sec: 0.07, Epoch: 2.0270005869692818, LR: 0.0003 +[2026-02-28 05:41:06] (step=0010361) Train Loss: 0.4634, Train Steps/Sec: 0.07, Epoch: 2.0271962433965958, LR: 0.0003 +[2026-02-28 05:41:20] (step=0010362) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 2.0273918998239093, LR: 0.0003 +[2026-02-28 05:41:34] (step=0010363) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 2.027587556251223, LR: 0.0003 +[2026-02-28 05:41:47] (step=0010364) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 2.0277832126785365, LR: 0.0003 +[2026-02-28 05:42:01] (step=0010365) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 2.02797886910585, LR: 0.0003 +[2026-02-28 05:42:15] (step=0010366) Train Loss: 0.4588, Train Steps/Sec: 0.07, Epoch: 2.0281745255331636, LR: 0.0003 +[2026-02-28 05:42:28] (step=0010367) Train Loss: 0.4615, Train Steps/Sec: 0.07, Epoch: 2.0283701819604776, LR: 0.0003 +[2026-02-28 05:42:42] (step=0010368) Train Loss: 0.4659, Train Steps/Sec: 0.07, Epoch: 2.028565838387791, LR: 0.0003 +[2026-02-28 05:42:56] (step=0010369) Train Loss: 0.4475, Train Steps/Sec: 0.07, Epoch: 2.0287614948151047, LR: 0.0003 +[2026-02-28 05:43:09] (step=0010370) Train Loss: 0.4465, Train Steps/Sec: 0.07, Epoch: 2.0289571512424183, LR: 0.0003 +[2026-02-28 05:43:23] (step=0010371) Train Loss: 0.4574, Train Steps/Sec: 0.07, Epoch: 2.029152807669732, LR: 0.0003 +[2026-02-28 05:43:37] (step=0010372) Train Loss: 0.4649, Train Steps/Sec: 0.07, Epoch: 2.0293484640970454, LR: 0.0003 +[2026-02-28 05:43:51] (step=0010373) Train Loss: 0.4563, Train Steps/Sec: 0.07, Epoch: 2.0295441205243594, LR: 0.0003 +[2026-02-28 05:44:05] (step=0010374) Train Loss: 0.4431, Train Steps/Sec: 0.07, Epoch: 2.029739776951673, LR: 0.0003 +[2026-02-28 05:44:18] (step=0010375) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 2.0299354333789865, LR: 0.0003 +[2026-02-28 05:44:32] (step=0010376) Train Loss: 0.4455, Train Steps/Sec: 0.07, Epoch: 2.0301310898063, LR: 0.0003 +[2026-02-28 05:44:45] (step=0010377) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 2.0303267462336136, LR: 0.0003 +[2026-02-28 05:44:59] (step=0010378) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.030522402660927, LR: 0.0003 +[2026-02-28 05:45:13] (step=0010379) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 2.030718059088241, LR: 0.0003 +[2026-02-28 05:45:26] (step=0010380) Train Loss: 0.4592, Train Steps/Sec: 0.07, Epoch: 2.0309137155155548, LR: 0.0003 +[2026-02-28 05:45:40] (step=0010381) Train Loss: 0.4365, Train Steps/Sec: 0.07, Epoch: 2.0311093719428683, LR: 0.0003 +[2026-02-28 05:45:54] (step=0010382) Train Loss: 0.4594, Train Steps/Sec: 0.07, Epoch: 2.031305028370182, LR: 0.0003 +[2026-02-28 05:46:07] (step=0010383) Train Loss: 0.4509, Train Steps/Sec: 0.07, Epoch: 2.0315006847974955, LR: 0.0003 +[2026-02-28 05:46:21] (step=0010384) Train Loss: 0.4633, Train Steps/Sec: 0.07, Epoch: 2.031696341224809, LR: 0.0003 +[2026-02-28 05:46:35] (step=0010385) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 2.031891997652123, LR: 0.0003 +[2026-02-28 05:46:49] (step=0010386) Train Loss: 0.4413, Train Steps/Sec: 0.07, Epoch: 2.0320876540794366, LR: 0.0003 +[2026-02-28 05:47:02] (step=0010387) Train Loss: 0.4459, Train Steps/Sec: 0.07, Epoch: 2.03228331050675, LR: 0.0003 +[2026-02-28 05:47:16] (step=0010388) Train Loss: 0.4618, Train Steps/Sec: 0.07, Epoch: 2.0324789669340637, LR: 0.0003 +[2026-02-28 05:47:30] (step=0010389) Train Loss: 0.4408, Train Steps/Sec: 0.07, Epoch: 2.0326746233613773, LR: 0.0003 +[2026-02-28 05:47:44] (step=0010390) Train Loss: 0.4544, Train Steps/Sec: 0.07, Epoch: 2.032870279788691, LR: 0.0003 +[2026-02-28 05:47:57] (step=0010391) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 2.033065936216005, LR: 0.0003 +[2026-02-28 05:48:11] (step=0010392) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 2.0332615926433184, LR: 0.0003 +[2026-02-28 05:48:24] (step=0010393) Train Loss: 0.4473, Train Steps/Sec: 0.07, Epoch: 2.033457249070632, LR: 0.0003 +[2026-02-28 05:48:38] (step=0010394) Train Loss: 0.4583, Train Steps/Sec: 0.07, Epoch: 2.0336529054979455, LR: 0.0003 +[2026-02-28 05:48:52] (step=0010395) Train Loss: 0.4563, Train Steps/Sec: 0.07, Epoch: 2.033848561925259, LR: 0.0003 +[2026-02-28 05:49:05] (step=0010396) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 2.034044218352573, LR: 0.0003 +[2026-02-28 05:49:19] (step=0010397) Train Loss: 0.4483, Train Steps/Sec: 0.07, Epoch: 2.0342398747798867, LR: 0.0003 +[2026-02-28 05:49:33] (step=0010398) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 2.0344355312072, LR: 0.0003 +[2026-02-28 05:49:46] (step=0010399) Train Loss: 0.4693, Train Steps/Sec: 0.07, Epoch: 2.034631187634514, LR: 0.0003 +[2026-02-28 05:50:00] (step=0010400) Train Loss: 0.4447, Train Steps/Sec: 0.07, Epoch: 2.0348268440618273, LR: 0.0003 +[2026-02-28 05:50:14] (step=0010401) Train Loss: 0.4494, Train Steps/Sec: 0.07, Epoch: 2.035022500489141, LR: 0.0003 +[2026-02-28 05:50:28] (step=0010402) Train Loss: 0.4569, Train Steps/Sec: 0.07, Epoch: 2.035218156916455, LR: 0.0003 +[2026-02-28 05:50:42] (step=0010403) Train Loss: 0.4627, Train Steps/Sec: 0.07, Epoch: 2.0354138133437685, LR: 0.0003 +[2026-02-28 05:50:55] (step=0010404) Train Loss: 0.4459, Train Steps/Sec: 0.07, Epoch: 2.035609469771082, LR: 0.0003 +[2026-02-28 05:51:09] (step=0010405) Train Loss: 0.4354, Train Steps/Sec: 0.07, Epoch: 2.0358051261983956, LR: 0.0003 +[2026-02-28 05:51:23] (step=0010406) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 2.036000782625709, LR: 0.0003 +[2026-02-28 05:51:36] (step=0010407) Train Loss: 0.4598, Train Steps/Sec: 0.07, Epoch: 2.0361964390530227, LR: 0.0003 +[2026-02-28 05:51:50] (step=0010408) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.0363920954803367, LR: 0.0003 +[2026-02-28 05:52:04] (step=0010409) Train Loss: 0.4448, Train Steps/Sec: 0.07, Epoch: 2.0365877519076503, LR: 0.0003 +[2026-02-28 05:52:17] (step=0010410) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 2.036783408334964, LR: 0.0003 +[2026-02-28 05:52:31] (step=0010411) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 2.0369790647622774, LR: 0.0003 +[2026-02-28 05:52:45] (step=0010412) Train Loss: 0.4484, Train Steps/Sec: 0.07, Epoch: 2.037174721189591, LR: 0.0003 +[2026-02-28 05:52:58] (step=0010413) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 2.0373703776169045, LR: 0.0003 +[2026-02-28 05:53:12] (step=0010414) Train Loss: 0.4494, Train Steps/Sec: 0.07, Epoch: 2.0375660340442185, LR: 0.0003 +[2026-02-28 05:53:26] (step=0010415) Train Loss: 0.4589, Train Steps/Sec: 0.07, Epoch: 2.037761690471532, LR: 0.0003 +[2026-02-28 05:53:40] (step=0010416) Train Loss: 0.4389, Train Steps/Sec: 0.07, Epoch: 2.0379573468988457, LR: 0.0003 +[2026-02-28 05:53:53] (step=0010417) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 2.0381530033261592, LR: 0.0003 +[2026-02-28 05:54:07] (step=0010418) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 2.038348659753473, LR: 0.0003 +[2026-02-28 05:54:21] (step=0010419) Train Loss: 0.4524, Train Steps/Sec: 0.07, Epoch: 2.0385443161807864, LR: 0.0003 +[2026-02-28 05:54:35] (step=0010420) Train Loss: 0.4564, Train Steps/Sec: 0.07, Epoch: 2.0387399726081004, LR: 0.0003 +[2026-02-28 05:54:48] (step=0010421) Train Loss: 0.4459, Train Steps/Sec: 0.07, Epoch: 2.038935629035414, LR: 0.0003 +[2026-02-28 05:55:02] (step=0010422) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 2.0391312854627275, LR: 0.0003 +[2026-02-28 05:55:15] (step=0010423) Train Loss: 0.4433, Train Steps/Sec: 0.07, Epoch: 2.039326941890041, LR: 0.0003 +[2026-02-28 05:55:29] (step=0010424) Train Loss: 0.4685, Train Steps/Sec: 0.07, Epoch: 2.0395225983173546, LR: 0.0003 +[2026-02-28 05:55:43] (step=0010425) Train Loss: 0.4496, Train Steps/Sec: 0.07, Epoch: 2.039718254744668, LR: 0.0003 +[2026-02-28 05:55:57] (step=0010426) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 2.039913911171982, LR: 0.0003 +[2026-02-28 05:56:10] (step=0010427) Train Loss: 0.4470, Train Steps/Sec: 0.07, Epoch: 2.0401095675992957, LR: 0.0003 +[2026-02-28 05:56:24] (step=0010428) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 2.0403052240266093, LR: 0.0003 +[2026-02-28 05:56:37] (step=0010429) Train Loss: 0.4528, Train Steps/Sec: 0.07, Epoch: 2.040500880453923, LR: 0.0003 +[2026-02-28 05:56:51] (step=0010430) Train Loss: 0.4494, Train Steps/Sec: 0.07, Epoch: 2.0406965368812364, LR: 0.0003 +[2026-02-28 05:57:05] (step=0010431) Train Loss: 0.4442, Train Steps/Sec: 0.07, Epoch: 2.04089219330855, LR: 0.0003 +[2026-02-28 05:57:19] (step=0010432) Train Loss: 0.4571, Train Steps/Sec: 0.07, Epoch: 2.041087849735864, LR: 0.0003 +[2026-02-28 05:57:32] (step=0010433) Train Loss: 0.4498, Train Steps/Sec: 0.07, Epoch: 2.0412835061631776, LR: 0.0003 +[2026-02-28 05:57:46] (step=0010434) Train Loss: 0.4657, Train Steps/Sec: 0.07, Epoch: 2.041479162590491, LR: 0.0003 +[2026-02-28 05:58:00] (step=0010435) Train Loss: 0.4506, Train Steps/Sec: 0.07, Epoch: 2.0416748190178047, LR: 0.0003 +[2026-02-28 05:58:13] (step=0010436) Train Loss: 0.4499, Train Steps/Sec: 0.07, Epoch: 2.0418704754451182, LR: 0.0003 +[2026-02-28 05:58:27] (step=0010437) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 2.042066131872432, LR: 0.0003 +[2026-02-28 05:58:41] (step=0010438) Train Loss: 0.4486, Train Steps/Sec: 0.07, Epoch: 2.042261788299746, LR: 0.0003 +[2026-02-28 05:58:54] (step=0010439) Train Loss: 0.4471, Train Steps/Sec: 0.07, Epoch: 2.0424574447270594, LR: 0.0003 +[2026-02-28 05:59:08] (step=0010440) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 2.042653101154373, LR: 0.0003 +[2026-02-28 05:59:22] (step=0010441) Train Loss: 0.4632, Train Steps/Sec: 0.07, Epoch: 2.0428487575816865, LR: 0.0003 +[2026-02-28 05:59:35] (step=0010442) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 2.043044414009, LR: 0.0003 +[2026-02-28 05:59:49] (step=0010443) Train Loss: 0.4471, Train Steps/Sec: 0.07, Epoch: 2.0432400704363136, LR: 0.0003 +[2026-02-28 06:00:03] (step=0010444) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 2.0434357268636276, LR: 0.0003 +[2026-02-28 06:00:17] (step=0010445) Train Loss: 0.4581, Train Steps/Sec: 0.07, Epoch: 2.043631383290941, LR: 0.0003 +[2026-02-28 06:00:30] (step=0010446) Train Loss: 0.4386, Train Steps/Sec: 0.07, Epoch: 2.0438270397182547, LR: 0.0003 +[2026-02-28 06:00:44] (step=0010447) Train Loss: 0.4422, Train Steps/Sec: 0.07, Epoch: 2.0440226961455683, LR: 0.0003 +[2026-02-28 06:00:58] (step=0010448) Train Loss: 0.4616, Train Steps/Sec: 0.07, Epoch: 2.044218352572882, LR: 0.0003 +[2026-02-28 06:01:11] (step=0010449) Train Loss: 0.4471, Train Steps/Sec: 0.07, Epoch: 2.044414009000196, LR: 0.0003 +[2026-02-28 06:01:25] (step=0010450) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.0446096654275094, LR: 0.0003 +[2026-02-28 06:01:39] (step=0010451) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 2.044805321854823, LR: 0.0003 +[2026-02-28 06:01:52] (step=0010452) Train Loss: 0.4569, Train Steps/Sec: 0.07, Epoch: 2.0450009782821366, LR: 0.0003 +[2026-02-28 06:02:06] (step=0010453) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 2.04519663470945, LR: 0.0003 +[2026-02-28 06:02:20] (step=0010454) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 2.0453922911367637, LR: 0.0003 +[2026-02-28 06:02:33] (step=0010455) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 2.0455879475640777, LR: 0.0003 +[2026-02-28 06:02:47] (step=0010456) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 2.0457836039913913, LR: 0.0003 +[2026-02-28 06:03:01] (step=0010457) Train Loss: 0.4591, Train Steps/Sec: 0.07, Epoch: 2.045979260418705, LR: 0.0003 +[2026-02-28 06:03:14] (step=0010458) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.0461749168460184, LR: 0.0003 +[2026-02-28 06:03:28] (step=0010459) Train Loss: 0.4485, Train Steps/Sec: 0.07, Epoch: 2.046370573273332, LR: 0.0003 +[2026-02-28 06:03:42] (step=0010460) Train Loss: 0.4458, Train Steps/Sec: 0.07, Epoch: 2.0465662297006455, LR: 0.0003 +[2026-02-28 06:03:56] (step=0010461) Train Loss: 0.4635, Train Steps/Sec: 0.07, Epoch: 2.0467618861279595, LR: 0.0003 +[2026-02-28 06:04:09] (step=0010462) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 2.046957542555273, LR: 0.0003 +[2026-02-28 06:04:23] (step=0010463) Train Loss: 0.4636, Train Steps/Sec: 0.07, Epoch: 2.0471531989825866, LR: 0.0003 +[2026-02-28 06:04:36] (step=0010464) Train Loss: 0.4494, Train Steps/Sec: 0.07, Epoch: 2.0473488554099, LR: 0.0003 +[2026-02-28 06:04:50] (step=0010465) Train Loss: 0.4576, Train Steps/Sec: 0.07, Epoch: 2.0475445118372138, LR: 0.0003 +[2026-02-28 06:05:04] (step=0010466) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.0477401682645273, LR: 0.0003 +[2026-02-28 06:05:18] (step=0010467) Train Loss: 0.4649, Train Steps/Sec: 0.07, Epoch: 2.0479358246918413, LR: 0.0003 +[2026-02-28 06:05:31] (step=0010468) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 2.048131481119155, LR: 0.0003 +[2026-02-28 06:05:45] (step=0010469) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.0483271375464684, LR: 0.0003 +[2026-02-28 06:05:59] (step=0010470) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 2.048522793973782, LR: 0.0003 +[2026-02-28 06:06:12] (step=0010471) Train Loss: 0.4600, Train Steps/Sec: 0.07, Epoch: 2.0487184504010956, LR: 0.0003 +[2026-02-28 06:06:26] (step=0010472) Train Loss: 0.4708, Train Steps/Sec: 0.07, Epoch: 2.048914106828409, LR: 0.0003 +[2026-02-28 06:06:40] (step=0010473) Train Loss: 0.4459, Train Steps/Sec: 0.07, Epoch: 2.049109763255723, LR: 0.0003 +[2026-02-28 06:06:53] (step=0010474) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 2.0493054196830367, LR: 0.0003 +[2026-02-28 06:07:07] (step=0010475) Train Loss: 0.4593, Train Steps/Sec: 0.07, Epoch: 2.0495010761103503, LR: 0.0003 +[2026-02-28 06:07:21] (step=0010476) Train Loss: 0.4617, Train Steps/Sec: 0.07, Epoch: 2.049696732537664, LR: 0.0003 +[2026-02-28 06:07:35] (step=0010477) Train Loss: 0.4444, Train Steps/Sec: 0.07, Epoch: 2.0498923889649774, LR: 0.0003 +[2026-02-28 06:07:48] (step=0010478) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 2.050088045392291, LR: 0.0003 +[2026-02-28 06:08:02] (step=0010479) Train Loss: 0.4627, Train Steps/Sec: 0.07, Epoch: 2.050283701819605, LR: 0.0003 +[2026-02-28 06:08:16] (step=0010480) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 2.0504793582469185, LR: 0.0003 +[2026-02-28 06:08:29] (step=0010481) Train Loss: 0.4637, Train Steps/Sec: 0.07, Epoch: 2.050675014674232, LR: 0.0003 +[2026-02-28 06:08:43] (step=0010482) Train Loss: 0.4499, Train Steps/Sec: 0.07, Epoch: 2.0508706711015456, LR: 0.0003 +[2026-02-28 06:08:56] (step=0010483) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 2.051066327528859, LR: 0.0003 +[2026-02-28 06:09:10] (step=0010484) Train Loss: 0.4522, Train Steps/Sec: 0.07, Epoch: 2.0512619839561728, LR: 0.0003 +[2026-02-28 06:09:24] (step=0010485) Train Loss: 0.4466, Train Steps/Sec: 0.07, Epoch: 2.0514576403834868, LR: 0.0003 +[2026-02-28 06:09:37] (step=0010486) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 2.0516532968108003, LR: 0.0003 +[2026-02-28 06:09:51] (step=0010487) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 2.051848953238114, LR: 0.0003 +[2026-02-28 06:10:05] (step=0010488) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 2.0520446096654275, LR: 0.0003 +[2026-02-28 06:10:18] (step=0010489) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 2.052240266092741, LR: 0.0003 +[2026-02-28 06:10:32] (step=0010490) Train Loss: 0.4494, Train Steps/Sec: 0.07, Epoch: 2.0524359225200546, LR: 0.0003 +[2026-02-28 06:10:46] (step=0010491) Train Loss: 0.4409, Train Steps/Sec: 0.07, Epoch: 2.0526315789473686, LR: 0.0003 +[2026-02-28 06:11:00] (step=0010492) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 2.052827235374682, LR: 0.0003 +[2026-02-28 06:11:13] (step=0010493) Train Loss: 0.4462, Train Steps/Sec: 0.07, Epoch: 2.0530228918019957, LR: 0.0003 +[2026-02-28 06:11:27] (step=0010494) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 2.0532185482293093, LR: 0.0003 +[2026-02-28 06:11:41] (step=0010495) Train Loss: 0.4528, Train Steps/Sec: 0.07, Epoch: 2.053414204656623, LR: 0.0003 +[2026-02-28 06:11:54] (step=0010496) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 2.0536098610839364, LR: 0.0003 +[2026-02-28 06:12:08] (step=0010497) Train Loss: 0.4354, Train Steps/Sec: 0.07, Epoch: 2.0538055175112504, LR: 0.0003 +[2026-02-28 06:12:21] (step=0010498) Train Loss: 0.4581, Train Steps/Sec: 0.07, Epoch: 2.054001173938564, LR: 0.0003 +[2026-02-28 06:12:35] (step=0010499) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.0541968303658775, LR: 0.0003 +[2026-02-28 06:12:49] (step=0010500) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 2.054392486793191, LR: 0.0003 +[2026-02-28 06:12:49] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0010500/ +[2026-02-28 06:13:03] (step=0010501) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 2.0545881432205046, LR: 0.0003 +[2026-02-28 06:13:16] (step=0010502) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.054783799647818, LR: 0.0003 +[2026-02-28 06:13:30] (step=0010503) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 2.054979456075132, LR: 0.0003 +[2026-02-28 06:13:44] (step=0010504) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 2.0551751125024458, LR: 0.0003 +[2026-02-28 06:13:57] (step=0010505) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.0553707689297593, LR: 0.0003 +[2026-02-28 06:14:11] (step=0010506) Train Loss: 0.4570, Train Steps/Sec: 0.07, Epoch: 2.055566425357073, LR: 0.0003 +[2026-02-28 06:14:25] (step=0010507) Train Loss: 0.4427, Train Steps/Sec: 0.07, Epoch: 2.0557620817843865, LR: 0.0003 +[2026-02-28 06:14:38] (step=0010508) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 2.0559577382117005, LR: 0.0003 +[2026-02-28 06:14:52] (step=0010509) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 2.056153394639014, LR: 0.0003 +[2026-02-28 06:15:06] (step=0010510) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 2.0563490510663276, LR: 0.0003 +[2026-02-28 06:15:19] (step=0010511) Train Loss: 0.4506, Train Steps/Sec: 0.07, Epoch: 2.056544707493641, LR: 0.0003 +[2026-02-28 06:15:33] (step=0010512) Train Loss: 0.4638, Train Steps/Sec: 0.07, Epoch: 2.0567403639209547, LR: 0.0003 +[2026-02-28 06:15:47] (step=0010513) Train Loss: 0.4476, Train Steps/Sec: 0.07, Epoch: 2.0569360203482683, LR: 0.0003 +[2026-02-28 06:16:00] (step=0010514) Train Loss: 0.4593, Train Steps/Sec: 0.07, Epoch: 2.0571316767755823, LR: 0.0003 +[2026-02-28 06:16:14] (step=0010515) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 2.057327333202896, LR: 0.0003 +[2026-02-28 06:16:28] (step=0010516) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 2.0575229896302094, LR: 0.0003 +[2026-02-28 06:16:41] (step=0010517) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 2.057718646057523, LR: 0.0003 +[2026-02-28 06:16:55] (step=0010518) Train Loss: 0.4430, Train Steps/Sec: 0.07, Epoch: 2.0579143024848365, LR: 0.0003 +[2026-02-28 06:17:09] (step=0010519) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 2.05810995891215, LR: 0.0003 +[2026-02-28 06:17:22] (step=0010520) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 2.058305615339464, LR: 0.0003 +[2026-02-28 06:17:36] (step=0010521) Train Loss: 0.4457, Train Steps/Sec: 0.07, Epoch: 2.0585012717667777, LR: 0.0003 +[2026-02-28 06:17:50] (step=0010522) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.0586969281940912, LR: 0.0003 +[2026-02-28 06:18:03] (step=0010523) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 2.058892584621405, LR: 0.0003 +[2026-02-28 06:18:17] (step=0010524) Train Loss: 0.4668, Train Steps/Sec: 0.07, Epoch: 2.0590882410487183, LR: 0.0003 +[2026-02-28 06:18:31] (step=0010525) Train Loss: 0.4627, Train Steps/Sec: 0.07, Epoch: 2.059283897476032, LR: 0.0003 +[2026-02-28 06:18:44] (step=0010526) Train Loss: 0.4386, Train Steps/Sec: 0.07, Epoch: 2.059479553903346, LR: 0.0003 +[2026-02-28 06:18:58] (step=0010527) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 2.0596752103306595, LR: 0.0003 +[2026-02-28 06:19:12] (step=0010528) Train Loss: 0.4589, Train Steps/Sec: 0.07, Epoch: 2.059870866757973, LR: 0.0003 +[2026-02-28 06:19:25] (step=0010529) Train Loss: 0.4457, Train Steps/Sec: 0.07, Epoch: 2.0600665231852866, LR: 0.0003 +[2026-02-28 06:19:39] (step=0010530) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 2.0602621796126, LR: 0.0003 +[2026-02-28 06:19:53] (step=0010531) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 2.0604578360399137, LR: 0.0003 +[2026-02-28 06:20:06] (step=0010532) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 2.0606534924672277, LR: 0.0003 +[2026-02-28 06:20:20] (step=0010533) Train Loss: 0.4564, Train Steps/Sec: 0.07, Epoch: 2.0608491488945413, LR: 0.0003 +[2026-02-28 06:20:34] (step=0010534) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.061044805321855, LR: 0.0003 +[2026-02-28 06:20:48] (step=0010535) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 2.0612404617491684, LR: 0.0003 +[2026-02-28 06:21:01] (step=0010536) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 2.061436118176482, LR: 0.0003 +[2026-02-28 06:21:15] (step=0010537) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 2.0616317746037955, LR: 0.0003 +[2026-02-28 06:21:29] (step=0010538) Train Loss: 0.4641, Train Steps/Sec: 0.07, Epoch: 2.0618274310311095, LR: 0.0003 +[2026-02-28 06:21:42] (step=0010539) Train Loss: 0.4476, Train Steps/Sec: 0.07, Epoch: 2.062023087458423, LR: 0.0003 +[2026-02-28 06:21:56] (step=0010540) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 2.0622187438857367, LR: 0.0003 +[2026-02-28 06:22:10] (step=0010541) Train Loss: 0.4459, Train Steps/Sec: 0.07, Epoch: 2.0624144003130502, LR: 0.0003 +[2026-02-28 06:22:23] (step=0010542) Train Loss: 0.4534, Train Steps/Sec: 0.07, Epoch: 2.062610056740364, LR: 0.0003 +[2026-02-28 06:22:37] (step=0010543) Train Loss: 0.4384, Train Steps/Sec: 0.07, Epoch: 2.0628057131676774, LR: 0.0003 +[2026-02-28 06:22:51] (step=0010544) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 2.0630013695949914, LR: 0.0003 +[2026-02-28 06:23:04] (step=0010545) Train Loss: 0.4637, Train Steps/Sec: 0.07, Epoch: 2.063197026022305, LR: 0.0003 +[2026-02-28 06:23:18] (step=0010546) Train Loss: 0.4582, Train Steps/Sec: 0.07, Epoch: 2.0633926824496185, LR: 0.0003 +[2026-02-28 06:23:32] (step=0010547) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 2.063588338876932, LR: 0.0003 +[2026-02-28 06:23:46] (step=0010548) Train Loss: 0.4705, Train Steps/Sec: 0.07, Epoch: 2.0637839953042456, LR: 0.0003 +[2026-02-28 06:23:59] (step=0010549) Train Loss: 0.4591, Train Steps/Sec: 0.07, Epoch: 2.063979651731559, LR: 0.0003 +[2026-02-28 06:24:13] (step=0010550) Train Loss: 0.4591, Train Steps/Sec: 0.07, Epoch: 2.064175308158873, LR: 0.0003 +[2026-02-28 06:24:27] (step=0010551) Train Loss: 0.4656, Train Steps/Sec: 0.07, Epoch: 2.0643709645861867, LR: 0.0003 +[2026-02-28 06:24:40] (step=0010552) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 2.0645666210135003, LR: 0.0003 +[2026-02-28 06:24:54] (step=0010553) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 2.064762277440814, LR: 0.0003 +[2026-02-28 06:25:08] (step=0010554) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 2.0649579338681274, LR: 0.0003 +[2026-02-28 06:25:21] (step=0010555) Train Loss: 0.4544, Train Steps/Sec: 0.07, Epoch: 2.065153590295441, LR: 0.0003 +[2026-02-28 06:25:35] (step=0010556) Train Loss: 0.4581, Train Steps/Sec: 0.07, Epoch: 2.065349246722755, LR: 0.0003 +[2026-02-28 06:25:49] (step=0010557) Train Loss: 0.4498, Train Steps/Sec: 0.07, Epoch: 2.0655449031500686, LR: 0.0003 +[2026-02-28 06:26:02] (step=0010558) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 2.065740559577382, LR: 0.0003 +[2026-02-28 06:26:16] (step=0010559) Train Loss: 0.4499, Train Steps/Sec: 0.07, Epoch: 2.0659362160046957, LR: 0.0003 +[2026-02-28 06:26:30] (step=0010560) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 2.0661318724320092, LR: 0.0003 +[2026-02-28 06:26:43] (step=0010561) Train Loss: 0.4472, Train Steps/Sec: 0.07, Epoch: 2.0663275288593232, LR: 0.0003 +[2026-02-28 06:26:57] (step=0010562) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 2.066523185286637, LR: 0.0003 +[2026-02-28 06:27:11] (step=0010563) Train Loss: 0.4453, Train Steps/Sec: 0.07, Epoch: 2.0667188417139504, LR: 0.0003 +[2026-02-28 06:27:25] (step=0010564) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 2.066914498141264, LR: 0.0003 +[2026-02-28 06:27:38] (step=0010565) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 2.0671101545685775, LR: 0.0003 +[2026-02-28 06:27:52] (step=0010566) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 2.067305810995891, LR: 0.0003 +[2026-02-28 06:28:06] (step=0010567) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 2.067501467423205, LR: 0.0003 +[2026-02-28 06:28:19] (step=0010568) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 2.0676971238505186, LR: 0.0003 +[2026-02-28 06:28:33] (step=0010569) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 2.067892780277832, LR: 0.0003 +[2026-02-28 06:28:47] (step=0010570) Train Loss: 0.4576, Train Steps/Sec: 0.07, Epoch: 2.0680884367051457, LR: 0.0003 +[2026-02-28 06:29:00] (step=0010571) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 2.0682840931324593, LR: 0.0003 +[2026-02-28 06:29:14] (step=0010572) Train Loss: 0.4585, Train Steps/Sec: 0.07, Epoch: 2.068479749559773, LR: 0.0003 +[2026-02-28 06:29:28] (step=0010573) Train Loss: 0.4499, Train Steps/Sec: 0.07, Epoch: 2.068675405987087, LR: 0.0003 +[2026-02-28 06:29:41] (step=0010574) Train Loss: 0.4609, Train Steps/Sec: 0.07, Epoch: 2.0688710624144004, LR: 0.0003 +[2026-02-28 06:29:55] (step=0010575) Train Loss: 0.4437, Train Steps/Sec: 0.07, Epoch: 2.069066718841714, LR: 0.0003 +[2026-02-28 06:30:09] (step=0010576) Train Loss: 0.4587, Train Steps/Sec: 0.07, Epoch: 2.0692623752690276, LR: 0.0003 +[2026-02-28 06:30:22] (step=0010577) Train Loss: 0.4556, Train Steps/Sec: 0.07, Epoch: 2.069458031696341, LR: 0.0003 +[2026-02-28 06:30:36] (step=0010578) Train Loss: 0.4631, Train Steps/Sec: 0.07, Epoch: 2.0696536881236547, LR: 0.0003 +[2026-02-28 06:30:50] (step=0010579) Train Loss: 0.4665, Train Steps/Sec: 0.07, Epoch: 2.0698493445509687, LR: 0.0003 +[2026-02-28 06:31:04] (step=0010580) Train Loss: 0.4457, Train Steps/Sec: 0.07, Epoch: 2.0700450009782823, LR: 0.0003 +[2026-02-28 06:31:17] (step=0010581) Train Loss: 0.4421, Train Steps/Sec: 0.07, Epoch: 2.070240657405596, LR: 0.0003 +[2026-02-28 06:31:31] (step=0010582) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.0704363138329094, LR: 0.0003 +[2026-02-28 06:31:44] (step=0010583) Train Loss: 0.4467, Train Steps/Sec: 0.07, Epoch: 2.070631970260223, LR: 0.0003 +[2026-02-28 06:31:58] (step=0010584) Train Loss: 0.4586, Train Steps/Sec: 0.07, Epoch: 2.0708276266875365, LR: 0.0003 +[2026-02-28 06:32:12] (step=0010585) Train Loss: 0.4588, Train Steps/Sec: 0.07, Epoch: 2.0710232831148505, LR: 0.0003 +[2026-02-28 06:32:25] (step=0010586) Train Loss: 0.4657, Train Steps/Sec: 0.07, Epoch: 2.071218939542164, LR: 0.0003 +[2026-02-28 06:32:39] (step=0010587) Train Loss: 0.4612, Train Steps/Sec: 0.07, Epoch: 2.0714145959694776, LR: 0.0003 +[2026-02-28 06:32:53] (step=0010588) Train Loss: 0.4506, Train Steps/Sec: 0.07, Epoch: 2.071610252396791, LR: 0.0003 +[2026-02-28 06:33:06] (step=0010589) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 2.0718059088241048, LR: 0.0003 +[2026-02-28 06:33:20] (step=0010590) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 2.0720015652514183, LR: 0.0003 +[2026-02-28 06:33:34] (step=0010591) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 2.0721972216787323, LR: 0.0003 +[2026-02-28 06:33:47] (step=0010592) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.072392878106046, LR: 0.0003 +[2026-02-28 06:34:01] (step=0010593) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 2.0725885345333595, LR: 0.0003 +[2026-02-28 06:34:15] (step=0010594) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 2.072784190960673, LR: 0.0003 +[2026-02-28 06:34:29] (step=0010595) Train Loss: 0.4453, Train Steps/Sec: 0.07, Epoch: 2.0729798473879866, LR: 0.0003 +[2026-02-28 06:34:42] (step=0010596) Train Loss: 0.4649, Train Steps/Sec: 0.07, Epoch: 2.0731755038153, LR: 0.0003 +[2026-02-28 06:34:56] (step=0010597) Train Loss: 0.4644, Train Steps/Sec: 0.07, Epoch: 2.073371160242614, LR: 0.0003 +[2026-02-28 06:35:10] (step=0010598) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.0735668166699277, LR: 0.0003 +[2026-02-28 06:35:23] (step=0010599) Train Loss: 0.4603, Train Steps/Sec: 0.07, Epoch: 2.0737624730972413, LR: 0.0003 +[2026-02-28 06:35:37] (step=0010600) Train Loss: 0.4480, Train Steps/Sec: 0.07, Epoch: 2.073958129524555, LR: 0.0003 +[2026-02-28 06:35:51] (step=0010601) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 2.0741537859518684, LR: 0.0003 +[2026-02-28 06:36:04] (step=0010602) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 2.074349442379182, LR: 0.0003 +[2026-02-28 06:36:18] (step=0010603) Train Loss: 0.4609, Train Steps/Sec: 0.07, Epoch: 2.074545098806496, LR: 0.0003 +[2026-02-28 06:36:32] (step=0010604) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.0747407552338095, LR: 0.0003 +[2026-02-28 06:36:45] (step=0010605) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 2.074936411661123, LR: 0.0003 +[2026-02-28 06:36:59] (step=0010606) Train Loss: 0.4693, Train Steps/Sec: 0.07, Epoch: 2.0751320680884366, LR: 0.0003 +[2026-02-28 06:37:12] (step=0010607) Train Loss: 0.4563, Train Steps/Sec: 0.07, Epoch: 2.07532772451575, LR: 0.0003 +[2026-02-28 06:37:26] (step=0010608) Train Loss: 0.4584, Train Steps/Sec: 0.07, Epoch: 2.0755233809430638, LR: 0.0003 +[2026-02-28 06:37:40] (step=0010609) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 2.0757190373703778, LR: 0.0003 +[2026-02-28 06:37:54] (step=0010610) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 2.0759146937976913, LR: 0.0003 +[2026-02-28 06:38:07] (step=0010611) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.076110350225005, LR: 0.0003 +[2026-02-28 06:38:21] (step=0010612) Train Loss: 0.4651, Train Steps/Sec: 0.07, Epoch: 2.0763060066523185, LR: 0.0003 +[2026-02-28 06:38:35] (step=0010613) Train Loss: 0.4619, Train Steps/Sec: 0.07, Epoch: 2.076501663079632, LR: 0.0003 +[2026-02-28 06:38:48] (step=0010614) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 2.0766973195069456, LR: 0.0003 +[2026-02-28 06:39:02] (step=0010615) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 2.0768929759342596, LR: 0.0003 +[2026-02-28 06:39:15] (step=0010616) Train Loss: 0.4386, Train Steps/Sec: 0.07, Epoch: 2.077088632361573, LR: 0.0003 +[2026-02-28 06:39:29] (step=0010617) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 2.0772842887888867, LR: 0.0003 +[2026-02-28 06:39:43] (step=0010618) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 2.0774799452162003, LR: 0.0003 +[2026-02-28 06:39:56] (step=0010619) Train Loss: 0.4448, Train Steps/Sec: 0.07, Epoch: 2.077675601643514, LR: 0.0003 +[2026-02-28 06:40:10] (step=0010620) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 2.077871258070828, LR: 0.0003 +[2026-02-28 06:40:24] (step=0010621) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.0780669144981414, LR: 0.0003 +[2026-02-28 06:40:38] (step=0010622) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 2.078262570925455, LR: 0.0003 +[2026-02-28 06:40:51] (step=0010623) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 2.0784582273527685, LR: 0.0003 +[2026-02-28 06:41:05] (step=0010624) Train Loss: 0.4602, Train Steps/Sec: 0.07, Epoch: 2.078653883780082, LR: 0.0003 +[2026-02-28 06:41:19] (step=0010625) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 2.0788495402073957, LR: 0.0003 +[2026-02-28 06:41:32] (step=0010626) Train Loss: 0.4608, Train Steps/Sec: 0.07, Epoch: 2.0790451966347097, LR: 0.0003 +[2026-02-28 06:41:46] (step=0010627) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 2.079240853062023, LR: 0.0003 +[2026-02-28 06:42:00] (step=0010628) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 2.079436509489337, LR: 0.0003 +[2026-02-28 06:42:13] (step=0010629) Train Loss: 0.4396, Train Steps/Sec: 0.07, Epoch: 2.0796321659166503, LR: 0.0003 +[2026-02-28 06:42:27] (step=0010630) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.079827822343964, LR: 0.0003 +[2026-02-28 06:42:40] (step=0010631) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 2.0800234787712775, LR: 0.0003 +[2026-02-28 06:42:54] (step=0010632) Train Loss: 0.4640, Train Steps/Sec: 0.07, Epoch: 2.0802191351985915, LR: 0.0003 +[2026-02-28 06:43:08] (step=0010633) Train Loss: 0.4485, Train Steps/Sec: 0.07, Epoch: 2.080414791625905, LR: 0.0003 +[2026-02-28 06:43:21] (step=0010634) Train Loss: 0.4623, Train Steps/Sec: 0.07, Epoch: 2.0806104480532186, LR: 0.0003 +[2026-02-28 06:43:35] (step=0010635) Train Loss: 0.4605, Train Steps/Sec: 0.07, Epoch: 2.080806104480532, LR: 0.0003 +[2026-02-28 06:43:49] (step=0010636) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 2.0810017609078457, LR: 0.0003 +[2026-02-28 06:44:02] (step=0010637) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.0811974173351593, LR: 0.0003 +[2026-02-28 06:44:16] (step=0010638) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 2.0813930737624733, LR: 0.0003 +[2026-02-28 06:44:30] (step=0010639) Train Loss: 0.4645, Train Steps/Sec: 0.07, Epoch: 2.081588730189787, LR: 0.0003 +[2026-02-28 06:44:43] (step=0010640) Train Loss: 0.4580, Train Steps/Sec: 0.07, Epoch: 2.0817843866171004, LR: 0.0003 +[2026-02-28 06:44:57] (step=0010641) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 2.081980043044414, LR: 0.0003 +[2026-02-28 06:45:11] (step=0010642) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.0821756994717275, LR: 0.0003 +[2026-02-28 06:45:24] (step=0010643) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 2.082371355899041, LR: 0.0003 +[2026-02-28 06:45:38] (step=0010644) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 2.082567012326355, LR: 0.0003 +[2026-02-28 06:45:52] (step=0010645) Train Loss: 0.4657, Train Steps/Sec: 0.07, Epoch: 2.0827626687536687, LR: 0.0003 +[2026-02-28 06:46:05] (step=0010646) Train Loss: 0.4346, Train Steps/Sec: 0.07, Epoch: 2.0829583251809822, LR: 0.0003 +[2026-02-28 06:46:19] (step=0010647) Train Loss: 0.4596, Train Steps/Sec: 0.07, Epoch: 2.083153981608296, LR: 0.0003 +[2026-02-28 06:46:32] (step=0010648) Train Loss: 0.4473, Train Steps/Sec: 0.07, Epoch: 2.0833496380356094, LR: 0.0003 +[2026-02-28 06:46:46] (step=0010649) Train Loss: 0.4431, Train Steps/Sec: 0.07, Epoch: 2.083545294462923, LR: 0.0003 +[2026-02-28 06:47:00] (step=0010650) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 2.083740950890237, LR: 0.0003 +[2026-02-28 06:47:13] (step=0010651) Train Loss: 0.4674, Train Steps/Sec: 0.07, Epoch: 2.0839366073175505, LR: 0.0003 +[2026-02-28 06:47:27] (step=0010652) Train Loss: 0.4601, Train Steps/Sec: 0.07, Epoch: 2.084132263744864, LR: 0.0003 +[2026-02-28 06:47:41] (step=0010653) Train Loss: 0.4532, Train Steps/Sec: 0.07, Epoch: 2.0843279201721776, LR: 0.0003 +[2026-02-28 06:47:54] (step=0010654) Train Loss: 0.4594, Train Steps/Sec: 0.07, Epoch: 2.084523576599491, LR: 0.0003 +[2026-02-28 06:48:08] (step=0010655) Train Loss: 0.4486, Train Steps/Sec: 0.07, Epoch: 2.0847192330268047, LR: 0.0003 +[2026-02-28 06:48:22] (step=0010656) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 2.0849148894541187, LR: 0.0003 +[2026-02-28 06:48:35] (step=0010657) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 2.0851105458814323, LR: 0.0003 +[2026-02-28 06:48:49] (step=0010658) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 2.085306202308746, LR: 0.0003 +[2026-02-28 06:49:02] (step=0010659) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 2.0855018587360594, LR: 0.0003 +[2026-02-28 06:49:16] (step=0010660) Train Loss: 0.4458, Train Steps/Sec: 0.07, Epoch: 2.085697515163373, LR: 0.0003 +[2026-02-28 06:49:30] (step=0010661) Train Loss: 0.4583, Train Steps/Sec: 0.07, Epoch: 2.0858931715906865, LR: 0.0003 +[2026-02-28 06:49:43] (step=0010662) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 2.0860888280180006, LR: 0.0003 +[2026-02-28 06:49:57] (step=0010663) Train Loss: 0.4522, Train Steps/Sec: 0.07, Epoch: 2.086284484445314, LR: 0.0003 +[2026-02-28 06:50:11] (step=0010664) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 2.0864801408726277, LR: 0.0003 +[2026-02-28 06:50:24] (step=0010665) Train Loss: 0.4424, Train Steps/Sec: 0.07, Epoch: 2.0866757972999412, LR: 0.0003 +[2026-02-28 06:50:38] (step=0010666) Train Loss: 0.4442, Train Steps/Sec: 0.07, Epoch: 2.086871453727255, LR: 0.0003 +[2026-02-28 06:50:51] (step=0010667) Train Loss: 0.4589, Train Steps/Sec: 0.07, Epoch: 2.0870671101545684, LR: 0.0003 +[2026-02-28 06:51:05] (step=0010668) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 2.0872627665818824, LR: 0.0003 +[2026-02-28 06:51:19] (step=0010669) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 2.087458423009196, LR: 0.0003 +[2026-02-28 06:51:32] (step=0010670) Train Loss: 0.4484, Train Steps/Sec: 0.07, Epoch: 2.0876540794365095, LR: 0.0003 +[2026-02-28 06:51:46] (step=0010671) Train Loss: 0.4407, Train Steps/Sec: 0.07, Epoch: 2.087849735863823, LR: 0.0003 +[2026-02-28 06:52:00] (step=0010672) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.0880453922911366, LR: 0.0003 +[2026-02-28 06:52:13] (step=0010673) Train Loss: 0.4455, Train Steps/Sec: 0.07, Epoch: 2.0882410487184506, LR: 0.0003 +[2026-02-28 06:52:27] (step=0010674) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 2.088436705145764, LR: 0.0003 +[2026-02-28 06:52:40] (step=0010675) Train Loss: 0.4618, Train Steps/Sec: 0.07, Epoch: 2.0886323615730777, LR: 0.0003 +[2026-02-28 06:52:54] (step=0010676) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 2.0888280180003913, LR: 0.0003 +[2026-02-28 06:53:08] (step=0010677) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 2.089023674427705, LR: 0.0003 +[2026-02-28 06:53:21] (step=0010678) Train Loss: 0.4471, Train Steps/Sec: 0.07, Epoch: 2.0892193308550184, LR: 0.0003 +[2026-02-28 06:53:35] (step=0010679) Train Loss: 0.4601, Train Steps/Sec: 0.07, Epoch: 2.0894149872823324, LR: 0.0003 +[2026-02-28 06:53:48] (step=0010680) Train Loss: 0.4442, Train Steps/Sec: 0.07, Epoch: 2.089610643709646, LR: 0.0003 +[2026-02-28 06:54:02] (step=0010681) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 2.0898063001369596, LR: 0.0003 +[2026-02-28 06:54:16] (step=0010682) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 2.090001956564273, LR: 0.0003 +[2026-02-28 06:54:29] (step=0010683) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 2.0901976129915867, LR: 0.0003 +[2026-02-28 06:54:43] (step=0010684) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.0903932694189002, LR: 0.0003 +[2026-02-28 06:54:57] (step=0010685) Train Loss: 0.4569, Train Steps/Sec: 0.07, Epoch: 2.0905889258462143, LR: 0.0003 +[2026-02-28 06:55:10] (step=0010686) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 2.090784582273528, LR: 0.0003 +[2026-02-28 06:55:24] (step=0010687) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.0909802387008414, LR: 0.0003 +[2026-02-28 06:55:38] (step=0010688) Train Loss: 0.4626, Train Steps/Sec: 0.07, Epoch: 2.091175895128155, LR: 0.0003 +[2026-02-28 06:55:51] (step=0010689) Train Loss: 0.4573, Train Steps/Sec: 0.07, Epoch: 2.0913715515554685, LR: 0.0003 +[2026-02-28 06:56:05] (step=0010690) Train Loss: 0.4412, Train Steps/Sec: 0.07, Epoch: 2.091567207982782, LR: 0.0003 +[2026-02-28 06:56:18] (step=0010691) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 2.091762864410096, LR: 0.0003 +[2026-02-28 06:56:32] (step=0010692) Train Loss: 0.4457, Train Steps/Sec: 0.07, Epoch: 2.0919585208374096, LR: 0.0003 +[2026-02-28 06:56:46] (step=0010693) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 2.092154177264723, LR: 0.0003 +[2026-02-28 06:56:59] (step=0010694) Train Loss: 0.4428, Train Steps/Sec: 0.07, Epoch: 2.0923498336920368, LR: 0.0003 +[2026-02-28 06:57:13] (step=0010695) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 2.0925454901193503, LR: 0.0003 +[2026-02-28 06:57:27] (step=0010696) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 2.092741146546664, LR: 0.0003 +[2026-02-28 06:57:40] (step=0010697) Train Loss: 0.4456, Train Steps/Sec: 0.07, Epoch: 2.092936802973978, LR: 0.0003 +[2026-02-28 06:57:54] (step=0010698) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.0931324594012914, LR: 0.0003 +[2026-02-28 06:58:07] (step=0010699) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 2.093328115828605, LR: 0.0003 +[2026-02-28 06:58:21] (step=0010700) Train Loss: 0.4597, Train Steps/Sec: 0.07, Epoch: 2.0935237722559186, LR: 0.0003 +[2026-02-28 06:58:35] (step=0010701) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 2.093719428683232, LR: 0.0003 +[2026-02-28 06:58:48] (step=0010702) Train Loss: 0.4524, Train Steps/Sec: 0.07, Epoch: 2.0939150851105457, LR: 0.0003 +[2026-02-28 06:59:02] (step=0010703) Train Loss: 0.4595, Train Steps/Sec: 0.07, Epoch: 2.0941107415378597, LR: 0.0003 +[2026-02-28 06:59:16] (step=0010704) Train Loss: 0.4458, Train Steps/Sec: 0.07, Epoch: 2.0943063979651733, LR: 0.0003 +[2026-02-28 06:59:29] (step=0010705) Train Loss: 0.4524, Train Steps/Sec: 0.07, Epoch: 2.094502054392487, LR: 0.0003 +[2026-02-28 06:59:43] (step=0010706) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.0946977108198004, LR: 0.0003 +[2026-02-28 06:59:56] (step=0010707) Train Loss: 0.4461, Train Steps/Sec: 0.07, Epoch: 2.094893367247114, LR: 0.0003 +[2026-02-28 07:00:10] (step=0010708) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 2.0950890236744275, LR: 0.0003 +[2026-02-28 07:00:24] (step=0010709) Train Loss: 0.4601, Train Steps/Sec: 0.07, Epoch: 2.0952846801017415, LR: 0.0003 +[2026-02-28 07:00:37] (step=0010710) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 2.095480336529055, LR: 0.0003 +[2026-02-28 07:00:51] (step=0010711) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 2.0956759929563686, LR: 0.0003 +[2026-02-28 07:01:04] (step=0010712) Train Loss: 0.4534, Train Steps/Sec: 0.08, Epoch: 2.095871649383682, LR: 0.0003 +[2026-02-28 07:01:18] (step=0010713) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 2.0960673058109958, LR: 0.0003 +[2026-02-28 07:01:32] (step=0010714) Train Loss: 0.4563, Train Steps/Sec: 0.07, Epoch: 2.0962629622383093, LR: 0.0003 +[2026-02-28 07:01:45] (step=0010715) Train Loss: 0.4655, Train Steps/Sec: 0.07, Epoch: 2.0964586186656233, LR: 0.0003 +[2026-02-28 07:01:59] (step=0010716) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 2.096654275092937, LR: 0.0003 +[2026-02-28 07:02:12] (step=0010717) Train Loss: 0.4370, Train Steps/Sec: 0.07, Epoch: 2.0968499315202505, LR: 0.0003 +[2026-02-28 07:02:26] (step=0010718) Train Loss: 0.4611, Train Steps/Sec: 0.07, Epoch: 2.097045587947564, LR: 0.0003 +[2026-02-28 07:02:40] (step=0010719) Train Loss: 0.4677, Train Steps/Sec: 0.07, Epoch: 2.0972412443748776, LR: 0.0003 +[2026-02-28 07:02:53] (step=0010720) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 2.097436900802191, LR: 0.0003 +[2026-02-28 07:03:07] (step=0010721) Train Loss: 0.4506, Train Steps/Sec: 0.07, Epoch: 2.097632557229505, LR: 0.0003 +[2026-02-28 07:03:21] (step=0010722) Train Loss: 0.4468, Train Steps/Sec: 0.07, Epoch: 2.0978282136568187, LR: 0.0003 +[2026-02-28 07:03:34] (step=0010723) Train Loss: 0.4608, Train Steps/Sec: 0.07, Epoch: 2.0980238700841323, LR: 0.0003 +[2026-02-28 07:03:48] (step=0010724) Train Loss: 0.4556, Train Steps/Sec: 0.07, Epoch: 2.098219526511446, LR: 0.0003 +[2026-02-28 07:04:02] (step=0010725) Train Loss: 0.4563, Train Steps/Sec: 0.07, Epoch: 2.0984151829387594, LR: 0.0003 +[2026-02-28 07:04:15] (step=0010726) Train Loss: 0.4532, Train Steps/Sec: 0.07, Epoch: 2.098610839366073, LR: 0.0003 +[2026-02-28 07:04:29] (step=0010727) Train Loss: 0.4555, Train Steps/Sec: 0.07, Epoch: 2.098806495793387, LR: 0.0003 +[2026-02-28 07:04:43] (step=0010728) Train Loss: 0.4485, Train Steps/Sec: 0.07, Epoch: 2.0990021522207005, LR: 0.0003 +[2026-02-28 07:04:56] (step=0010729) Train Loss: 0.4453, Train Steps/Sec: 0.07, Epoch: 2.099197808648014, LR: 0.0003 +[2026-02-28 07:05:10] (step=0010730) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 2.0993934650753276, LR: 0.0003 +[2026-02-28 07:05:24] (step=0010731) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 2.099589121502641, LR: 0.0003 +[2026-02-28 07:05:37] (step=0010732) Train Loss: 0.4588, Train Steps/Sec: 0.07, Epoch: 2.099784777929955, LR: 0.0003 +[2026-02-28 07:05:51] (step=0010733) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 2.0999804343572688, LR: 0.0003 +[2026-02-28 07:06:05] (step=0010734) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 2.1001760907845823, LR: 0.0003 +[2026-02-28 07:06:18] (step=0010735) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 2.100371747211896, LR: 0.0003 +[2026-02-28 07:06:32] (step=0010736) Train Loss: 0.4621, Train Steps/Sec: 0.07, Epoch: 2.1005674036392095, LR: 0.0003 +[2026-02-28 07:06:46] (step=0010737) Train Loss: 0.4424, Train Steps/Sec: 0.07, Epoch: 2.100763060066523, LR: 0.0003 +[2026-02-28 07:06:59] (step=0010738) Train Loss: 0.4662, Train Steps/Sec: 0.07, Epoch: 2.100958716493837, LR: 0.0003 +[2026-02-28 07:07:13] (step=0010739) Train Loss: 0.4563, Train Steps/Sec: 0.07, Epoch: 2.1011543729211506, LR: 0.0003 +[2026-02-28 07:07:26] (step=0010740) Train Loss: 0.4395, Train Steps/Sec: 0.07, Epoch: 2.101350029348464, LR: 0.0003 +[2026-02-28 07:07:40] (step=0010741) Train Loss: 0.4594, Train Steps/Sec: 0.07, Epoch: 2.1015456857757777, LR: 0.0003 +[2026-02-28 07:07:54] (step=0010742) Train Loss: 0.4585, Train Steps/Sec: 0.07, Epoch: 2.1017413422030913, LR: 0.0003 +[2026-02-28 07:08:07] (step=0010743) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 2.101936998630405, LR: 0.0003 +[2026-02-28 07:08:21] (step=0010744) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 2.102132655057719, LR: 0.0003 +[2026-02-28 07:08:35] (step=0010745) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 2.1023283114850324, LR: 0.0003 +[2026-02-28 07:08:48] (step=0010746) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 2.102523967912346, LR: 0.0003 +[2026-02-28 07:09:02] (step=0010747) Train Loss: 0.4684, Train Steps/Sec: 0.07, Epoch: 2.1027196243396595, LR: 0.0003 +[2026-02-28 07:09:16] (step=0010748) Train Loss: 0.4635, Train Steps/Sec: 0.07, Epoch: 2.102915280766973, LR: 0.0003 +[2026-02-28 07:09:29] (step=0010749) Train Loss: 0.4413, Train Steps/Sec: 0.07, Epoch: 2.1031109371942867, LR: 0.0003 +[2026-02-28 07:09:43] (step=0010750) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 2.1033065936216007, LR: 0.0003 +[2026-02-28 07:09:57] (step=0010751) Train Loss: 0.4401, Train Steps/Sec: 0.07, Epoch: 2.1035022500489142, LR: 0.0003 +[2026-02-28 07:10:10] (step=0010752) Train Loss: 0.4544, Train Steps/Sec: 0.07, Epoch: 2.103697906476228, LR: 0.0003 +[2026-02-28 07:10:24] (step=0010753) Train Loss: 0.4463, Train Steps/Sec: 0.07, Epoch: 2.1038935629035413, LR: 0.0003 +[2026-02-28 07:10:37] (step=0010754) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 2.104089219330855, LR: 0.0003 +[2026-02-28 07:10:51] (step=0010755) Train Loss: 0.4449, Train Steps/Sec: 0.07, Epoch: 2.1042848757581685, LR: 0.0003 +[2026-02-28 07:11:05] (step=0010756) Train Loss: 0.4516, Train Steps/Sec: 0.07, Epoch: 2.1044805321854825, LR: 0.0003 +[2026-02-28 07:11:18] (step=0010757) Train Loss: 0.4627, Train Steps/Sec: 0.07, Epoch: 2.104676188612796, LR: 0.0003 +[2026-02-28 07:11:32] (step=0010758) Train Loss: 0.4631, Train Steps/Sec: 0.07, Epoch: 2.1048718450401096, LR: 0.0003 +[2026-02-28 07:11:46] (step=0010759) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 2.105067501467423, LR: 0.0003 +[2026-02-28 07:11:59] (step=0010760) Train Loss: 0.4600, Train Steps/Sec: 0.07, Epoch: 2.1052631578947367, LR: 0.0003 +[2026-02-28 07:12:13] (step=0010761) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.1054588143220503, LR: 0.0003 +[2026-02-28 07:12:27] (step=0010762) Train Loss: 0.4340, Train Steps/Sec: 0.07, Epoch: 2.1056544707493643, LR: 0.0003 +[2026-02-28 07:12:40] (step=0010763) Train Loss: 0.4469, Train Steps/Sec: 0.07, Epoch: 2.105850127176678, LR: 0.0003 +[2026-02-28 07:12:54] (step=0010764) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 2.1060457836039914, LR: 0.0003 +[2026-02-28 07:13:08] (step=0010765) Train Loss: 0.4603, Train Steps/Sec: 0.07, Epoch: 2.106241440031305, LR: 0.0003 +[2026-02-28 07:13:21] (step=0010766) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 2.1064370964586185, LR: 0.0003 +[2026-02-28 07:13:35] (step=0010767) Train Loss: 0.4592, Train Steps/Sec: 0.07, Epoch: 2.106632752885932, LR: 0.0003 +[2026-02-28 07:13:49] (step=0010768) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 2.106828409313246, LR: 0.0003 +[2026-02-28 07:14:02] (step=0010769) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.1070240657405597, LR: 0.0003 +[2026-02-28 07:14:16] (step=0010770) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 2.1072197221678732, LR: 0.0003 +[2026-02-28 07:14:30] (step=0010771) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 2.107415378595187, LR: 0.0003 +[2026-02-28 07:14:43] (step=0010772) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.1076110350225004, LR: 0.0003 +[2026-02-28 07:14:57] (step=0010773) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.107806691449814, LR: 0.0003 +[2026-02-28 07:15:11] (step=0010774) Train Loss: 0.4534, Train Steps/Sec: 0.07, Epoch: 2.108002347877128, LR: 0.0003 +[2026-02-28 07:15:24] (step=0010775) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 2.1081980043044415, LR: 0.0003 +[2026-02-28 07:15:38] (step=0010776) Train Loss: 0.4388, Train Steps/Sec: 0.07, Epoch: 2.108393660731755, LR: 0.0003 +[2026-02-28 07:15:52] (step=0010777) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 2.1085893171590686, LR: 0.0003 +[2026-02-28 07:16:05] (step=0010778) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 2.108784973586382, LR: 0.0003 +[2026-02-28 07:16:19] (step=0010779) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 2.1089806300136957, LR: 0.0003 +[2026-02-28 07:16:33] (step=0010780) Train Loss: 0.4476, Train Steps/Sec: 0.07, Epoch: 2.1091762864410097, LR: 0.0003 +[2026-02-28 07:16:46] (step=0010781) Train Loss: 0.4404, Train Steps/Sec: 0.07, Epoch: 2.1093719428683233, LR: 0.0003 +[2026-02-28 07:17:00] (step=0010782) Train Loss: 0.4509, Train Steps/Sec: 0.07, Epoch: 2.109567599295637, LR: 0.0003 +[2026-02-28 07:17:14] (step=0010783) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 2.1097632557229504, LR: 0.0003 +[2026-02-28 07:17:28] (step=0010784) Train Loss: 0.4576, Train Steps/Sec: 0.07, Epoch: 2.109958912150264, LR: 0.0003 +[2026-02-28 07:17:41] (step=0010785) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.110154568577578, LR: 0.0003 +[2026-02-28 07:17:55] (step=0010786) Train Loss: 0.4410, Train Steps/Sec: 0.07, Epoch: 2.1103502250048916, LR: 0.0003 +[2026-02-28 07:18:09] (step=0010787) Train Loss: 0.4449, Train Steps/Sec: 0.07, Epoch: 2.110545881432205, LR: 0.0003 +[2026-02-28 07:18:22] (step=0010788) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 2.1107415378595187, LR: 0.0003 +[2026-02-28 07:18:36] (step=0010789) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 2.1109371942868322, LR: 0.0003 +[2026-02-28 07:18:49] (step=0010790) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 2.111132850714146, LR: 0.0003 +[2026-02-28 07:19:03] (step=0010791) Train Loss: 0.4577, Train Steps/Sec: 0.07, Epoch: 2.11132850714146, LR: 0.0003 +[2026-02-28 07:19:17] (step=0010792) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.1115241635687734, LR: 0.0003 +[2026-02-28 07:19:31] (step=0010793) Train Loss: 0.4472, Train Steps/Sec: 0.07, Epoch: 2.111719819996087, LR: 0.0003 +[2026-02-28 07:19:44] (step=0010794) Train Loss: 0.4614, Train Steps/Sec: 0.07, Epoch: 2.1119154764234005, LR: 0.0003 +[2026-02-28 07:19:58] (step=0010795) Train Loss: 0.4555, Train Steps/Sec: 0.07, Epoch: 2.112111132850714, LR: 0.0003 +[2026-02-28 07:20:12] (step=0010796) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 2.1123067892780276, LR: 0.0003 +[2026-02-28 07:20:25] (step=0010797) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 2.1125024457053416, LR: 0.0003 +[2026-02-28 07:20:39] (step=0010798) Train Loss: 0.4571, Train Steps/Sec: 0.07, Epoch: 2.112698102132655, LR: 0.0003 +[2026-02-28 07:20:53] (step=0010799) Train Loss: 0.4583, Train Steps/Sec: 0.07, Epoch: 2.1128937585599687, LR: 0.0003 +[2026-02-28 07:21:06] (step=0010800) Train Loss: 0.4414, Train Steps/Sec: 0.07, Epoch: 2.1130894149872823, LR: 0.0003 +[2026-02-28 07:21:20] (step=0010801) Train Loss: 0.4507, Train Steps/Sec: 0.07, Epoch: 2.113285071414596, LR: 0.0003 +[2026-02-28 07:21:34] (step=0010802) Train Loss: 0.4456, Train Steps/Sec: 0.07, Epoch: 2.1134807278419094, LR: 0.0003 +[2026-02-28 07:21:47] (step=0010803) Train Loss: 0.4628, Train Steps/Sec: 0.07, Epoch: 2.1136763842692234, LR: 0.0003 +[2026-02-28 07:22:01] (step=0010804) Train Loss: 0.4661, Train Steps/Sec: 0.07, Epoch: 2.113872040696537, LR: 0.0003 +[2026-02-28 07:22:15] (step=0010805) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 2.1140676971238506, LR: 0.0003 +[2026-02-28 07:22:28] (step=0010806) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 2.114263353551164, LR: 0.0003 +[2026-02-28 07:22:42] (step=0010807) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 2.1144590099784777, LR: 0.0003 +[2026-02-28 07:22:56] (step=0010808) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.1146546664057913, LR: 0.0003 +[2026-02-28 07:23:09] (step=0010809) Train Loss: 0.4492, Train Steps/Sec: 0.07, Epoch: 2.1148503228331053, LR: 0.0003 +[2026-02-28 07:23:23] (step=0010810) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 2.115045979260419, LR: 0.0003 +[2026-02-28 07:23:37] (step=0010811) Train Loss: 0.4496, Train Steps/Sec: 0.07, Epoch: 2.1152416356877324, LR: 0.0003 +[2026-02-28 07:23:50] (step=0010812) Train Loss: 0.4476, Train Steps/Sec: 0.07, Epoch: 2.115437292115046, LR: 0.0003 +[2026-02-28 07:24:04] (step=0010813) Train Loss: 0.4494, Train Steps/Sec: 0.07, Epoch: 2.1156329485423595, LR: 0.0003 +[2026-02-28 07:24:18] (step=0010814) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 2.115828604969673, LR: 0.0003 +[2026-02-28 07:24:31] (step=0010815) Train Loss: 0.4522, Train Steps/Sec: 0.07, Epoch: 2.116024261396987, LR: 0.0003 +[2026-02-28 07:24:45] (step=0010816) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 2.1162199178243006, LR: 0.0003 +[2026-02-28 07:24:59] (step=0010817) Train Loss: 0.4383, Train Steps/Sec: 0.07, Epoch: 2.116415574251614, LR: 0.0003 +[2026-02-28 07:25:12] (step=0010818) Train Loss: 0.4406, Train Steps/Sec: 0.07, Epoch: 2.1166112306789278, LR: 0.0003 +[2026-02-28 07:25:26] (step=0010819) Train Loss: 0.4486, Train Steps/Sec: 0.07, Epoch: 2.1168068871062413, LR: 0.0003 +[2026-02-28 07:25:40] (step=0010820) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 2.117002543533555, LR: 0.0003 +[2026-02-28 07:25:53] (step=0010821) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 2.117198199960869, LR: 0.0003 +[2026-02-28 07:26:07] (step=0010822) Train Loss: 0.4386, Train Steps/Sec: 0.07, Epoch: 2.1173938563881824, LR: 0.0003 +[2026-02-28 07:26:21] (step=0010823) Train Loss: 0.4408, Train Steps/Sec: 0.07, Epoch: 2.117589512815496, LR: 0.0003 +[2026-02-28 07:26:34] (step=0010824) Train Loss: 0.4643, Train Steps/Sec: 0.07, Epoch: 2.1177851692428096, LR: 0.0003 +[2026-02-28 07:26:48] (step=0010825) Train Loss: 0.4431, Train Steps/Sec: 0.07, Epoch: 2.117980825670123, LR: 0.0003 +[2026-02-28 07:27:02] (step=0010826) Train Loss: 0.4506, Train Steps/Sec: 0.07, Epoch: 2.1181764820974367, LR: 0.0003 +[2026-02-28 07:27:15] (step=0010827) Train Loss: 0.4645, Train Steps/Sec: 0.07, Epoch: 2.1183721385247507, LR: 0.0003 +[2026-02-28 07:27:29] (step=0010828) Train Loss: 0.4604, Train Steps/Sec: 0.07, Epoch: 2.1185677949520643, LR: 0.0003 +[2026-02-28 07:27:43] (step=0010829) Train Loss: 0.4621, Train Steps/Sec: 0.07, Epoch: 2.118763451379378, LR: 0.0003 +[2026-02-28 07:27:57] (step=0010830) Train Loss: 0.4598, Train Steps/Sec: 0.07, Epoch: 2.1189591078066914, LR: 0.0003 +[2026-02-28 07:28:10] (step=0010831) Train Loss: 0.4601, Train Steps/Sec: 0.07, Epoch: 2.119154764234005, LR: 0.0003 +[2026-02-28 07:28:24] (step=0010832) Train Loss: 0.4426, Train Steps/Sec: 0.07, Epoch: 2.1193504206613185, LR: 0.0003 +[2026-02-28 07:28:38] (step=0010833) Train Loss: 0.4419, Train Steps/Sec: 0.07, Epoch: 2.1195460770886325, LR: 0.0003 +[2026-02-28 07:28:51] (step=0010834) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.119741733515946, LR: 0.0003 +[2026-02-28 07:29:05] (step=0010835) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 2.1199373899432596, LR: 0.0003 +[2026-02-28 07:29:19] (step=0010836) Train Loss: 0.4481, Train Steps/Sec: 0.07, Epoch: 2.120133046370573, LR: 0.0003 +[2026-02-28 07:29:33] (step=0010837) Train Loss: 0.4634, Train Steps/Sec: 0.07, Epoch: 2.1203287027978868, LR: 0.0003 +[2026-02-28 07:29:46] (step=0010838) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.1205243592252003, LR: 0.0003 +[2026-02-28 07:30:00] (step=0010839) Train Loss: 0.4451, Train Steps/Sec: 0.07, Epoch: 2.1207200156525143, LR: 0.0003 +[2026-02-28 07:30:14] (step=0010840) Train Loss: 0.4642, Train Steps/Sec: 0.07, Epoch: 2.120915672079828, LR: 0.0003 +[2026-02-28 07:30:28] (step=0010841) Train Loss: 0.4597, Train Steps/Sec: 0.07, Epoch: 2.1211113285071415, LR: 0.0003 +[2026-02-28 07:30:41] (step=0010842) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 2.121306984934455, LR: 0.0003 +[2026-02-28 07:30:55] (step=0010843) Train Loss: 0.4452, Train Steps/Sec: 0.07, Epoch: 2.1215026413617686, LR: 0.0003 +[2026-02-28 07:31:09] (step=0010844) Train Loss: 0.4532, Train Steps/Sec: 0.07, Epoch: 2.1216982977890826, LR: 0.0003 +[2026-02-28 07:31:22] (step=0010845) Train Loss: 0.4560, Train Steps/Sec: 0.07, Epoch: 2.121893954216396, LR: 0.0003 +[2026-02-28 07:31:36] (step=0010846) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 2.1220896106437097, LR: 0.0003 +[2026-02-28 07:31:50] (step=0010847) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 2.1222852670710233, LR: 0.0003 +[2026-02-28 07:32:03] (step=0010848) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 2.122480923498337, LR: 0.0003 +[2026-02-28 07:32:17] (step=0010849) Train Loss: 0.4617, Train Steps/Sec: 0.07, Epoch: 2.1226765799256504, LR: 0.0003 +[2026-02-28 07:32:31] (step=0010850) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 2.1228722363529644, LR: 0.0003 +[2026-02-28 07:32:45] (step=0010851) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 2.123067892780278, LR: 0.0003 +[2026-02-28 07:32:58] (step=0010852) Train Loss: 0.4555, Train Steps/Sec: 0.07, Epoch: 2.1232635492075915, LR: 0.0003 +[2026-02-28 07:33:12] (step=0010853) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 2.123459205634905, LR: 0.0003 +[2026-02-28 07:33:26] (step=0010854) Train Loss: 0.4524, Train Steps/Sec: 0.07, Epoch: 2.1236548620622187, LR: 0.0003 +[2026-02-28 07:33:39] (step=0010855) Train Loss: 0.4446, Train Steps/Sec: 0.07, Epoch: 2.123850518489532, LR: 0.0003 +[2026-02-28 07:33:53] (step=0010856) Train Loss: 0.4554, Train Steps/Sec: 0.07, Epoch: 2.124046174916846, LR: 0.0003 +[2026-02-28 07:34:07] (step=0010857) Train Loss: 0.4407, Train Steps/Sec: 0.07, Epoch: 2.12424183134416, LR: 0.0003 +[2026-02-28 07:34:21] (step=0010858) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 2.1244374877714733, LR: 0.0003 +[2026-02-28 07:34:34] (step=0010859) Train Loss: 0.4554, Train Steps/Sec: 0.07, Epoch: 2.124633144198787, LR: 0.0003 +[2026-02-28 07:34:48] (step=0010860) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 2.1248288006261005, LR: 0.0003 +[2026-02-28 07:35:02] (step=0010861) Train Loss: 0.4640, Train Steps/Sec: 0.07, Epoch: 2.125024457053414, LR: 0.0003 +[2026-02-28 07:35:15] (step=0010862) Train Loss: 0.4673, Train Steps/Sec: 0.07, Epoch: 2.125220113480728, LR: 0.0003 +[2026-02-28 07:35:29] (step=0010863) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.1254157699080416, LR: 0.0003 +[2026-02-28 07:35:43] (step=0010864) Train Loss: 0.4560, Train Steps/Sec: 0.07, Epoch: 2.125611426335355, LR: 0.0003 +[2026-02-28 07:35:56] (step=0010865) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 2.1258070827626687, LR: 0.0003 +[2026-02-28 07:36:10] (step=0010866) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 2.1260027391899823, LR: 0.0003 +[2026-02-28 07:36:24] (step=0010867) Train Loss: 0.4486, Train Steps/Sec: 0.07, Epoch: 2.126198395617296, LR: 0.0003 +[2026-02-28 07:36:38] (step=0010868) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 2.12639405204461, LR: 0.0003 +[2026-02-28 07:36:51] (step=0010869) Train Loss: 0.4317, Train Steps/Sec: 0.07, Epoch: 2.1265897084719234, LR: 0.0003 +[2026-02-28 07:37:05] (step=0010870) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 2.126785364899237, LR: 0.0003 +[2026-02-28 07:37:19] (step=0010871) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.1269810213265505, LR: 0.0003 +[2026-02-28 07:37:33] (step=0010872) Train Loss: 0.4484, Train Steps/Sec: 0.07, Epoch: 2.127176677753864, LR: 0.0003 +[2026-02-28 07:37:46] (step=0010873) Train Loss: 0.4468, Train Steps/Sec: 0.07, Epoch: 2.1273723341811777, LR: 0.0003 +[2026-02-28 07:38:00] (step=0010874) Train Loss: 0.4619, Train Steps/Sec: 0.07, Epoch: 2.1275679906084917, LR: 0.0003 +[2026-02-28 07:38:14] (step=0010875) Train Loss: 0.4431, Train Steps/Sec: 0.07, Epoch: 2.1277636470358052, LR: 0.0003 +[2026-02-28 07:38:27] (step=0010876) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.127959303463119, LR: 0.0003 +[2026-02-28 07:38:41] (step=0010877) Train Loss: 0.4630, Train Steps/Sec: 0.07, Epoch: 2.1281549598904324, LR: 0.0003 +[2026-02-28 07:38:55] (step=0010878) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 2.128350616317746, LR: 0.0003 +[2026-02-28 07:39:09] (step=0010879) Train Loss: 0.4405, Train Steps/Sec: 0.07, Epoch: 2.1285462727450595, LR: 0.0003 +[2026-02-28 07:39:22] (step=0010880) Train Loss: 0.4437, Train Steps/Sec: 0.07, Epoch: 2.1287419291723735, LR: 0.0003 +[2026-02-28 07:39:36] (step=0010881) Train Loss: 0.4457, Train Steps/Sec: 0.07, Epoch: 2.128937585599687, LR: 0.0003 +[2026-02-28 07:39:50] (step=0010882) Train Loss: 0.4611, Train Steps/Sec: 0.07, Epoch: 2.1291332420270006, LR: 0.0003 +[2026-02-28 07:40:04] (step=0010883) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 2.129328898454314, LR: 0.0003 +[2026-02-28 07:40:18] (step=0010884) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 2.1295245548816277, LR: 0.0003 +[2026-02-28 07:40:31] (step=0010885) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 2.1297202113089413, LR: 0.0003 +[2026-02-28 07:40:45] (step=0010886) Train Loss: 0.4613, Train Steps/Sec: 0.07, Epoch: 2.1299158677362553, LR: 0.0003 +[2026-02-28 07:40:59] (step=0010887) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 2.130111524163569, LR: 0.0003 +[2026-02-28 07:41:12] (step=0010888) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 2.1303071805908824, LR: 0.0003 +[2026-02-28 07:41:26] (step=0010889) Train Loss: 0.4571, Train Steps/Sec: 0.07, Epoch: 2.130502837018196, LR: 0.0003 +[2026-02-28 07:41:40] (step=0010890) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 2.1306984934455095, LR: 0.0003 +[2026-02-28 07:41:53] (step=0010891) Train Loss: 0.4607, Train Steps/Sec: 0.07, Epoch: 2.130894149872823, LR: 0.0003 +[2026-02-28 07:42:07] (step=0010892) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 2.131089806300137, LR: 0.0003 +[2026-02-28 07:42:21] (step=0010893) Train Loss: 0.4424, Train Steps/Sec: 0.07, Epoch: 2.1312854627274507, LR: 0.0003 +[2026-02-28 07:42:34] (step=0010894) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 2.1314811191547642, LR: 0.0003 +[2026-02-28 07:42:48] (step=0010895) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 2.131676775582078, LR: 0.0003 +[2026-02-28 07:43:02] (step=0010896) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 2.1318724320093914, LR: 0.0003 +[2026-02-28 07:43:16] (step=0010897) Train Loss: 0.4403, Train Steps/Sec: 0.07, Epoch: 2.1320680884367054, LR: 0.0003 +[2026-02-28 07:43:29] (step=0010898) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.132263744864019, LR: 0.0003 +[2026-02-28 07:43:43] (step=0010899) Train Loss: 0.4584, Train Steps/Sec: 0.07, Epoch: 2.1324594012913325, LR: 0.0003 +[2026-02-28 07:43:57] (step=0010900) Train Loss: 0.4428, Train Steps/Sec: 0.07, Epoch: 2.132655057718646, LR: 0.0003 +[2026-02-28 07:44:11] (step=0010901) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 2.1328507141459596, LR: 0.0003 +[2026-02-28 07:44:24] (step=0010902) Train Loss: 0.4307, Train Steps/Sec: 0.07, Epoch: 2.133046370573273, LR: 0.0003 +[2026-02-28 07:44:38] (step=0010903) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.133242027000587, LR: 0.0003 +[2026-02-28 07:44:52] (step=0010904) Train Loss: 0.4556, Train Steps/Sec: 0.07, Epoch: 2.1334376834279007, LR: 0.0003 +[2026-02-28 07:45:05] (step=0010905) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 2.1336333398552143, LR: 0.0003 +[2026-02-28 07:45:19] (step=0010906) Train Loss: 0.4571, Train Steps/Sec: 0.07, Epoch: 2.133828996282528, LR: 0.0003 +[2026-02-28 07:45:33] (step=0010907) Train Loss: 0.4568, Train Steps/Sec: 0.07, Epoch: 2.1340246527098414, LR: 0.0003 +[2026-02-28 07:45:46] (step=0010908) Train Loss: 0.4619, Train Steps/Sec: 0.07, Epoch: 2.134220309137155, LR: 0.0003 +[2026-02-28 07:46:00] (step=0010909) Train Loss: 0.4453, Train Steps/Sec: 0.07, Epoch: 2.134415965564469, LR: 0.0003 +[2026-02-28 07:46:14] (step=0010910) Train Loss: 0.4654, Train Steps/Sec: 0.07, Epoch: 2.1346116219917826, LR: 0.0003 +[2026-02-28 07:46:28] (step=0010911) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 2.134807278419096, LR: 0.0003 +[2026-02-28 07:46:41] (step=0010912) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.1350029348464097, LR: 0.0003 +[2026-02-28 07:46:55] (step=0010913) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 2.1351985912737232, LR: 0.0003 +[2026-02-28 07:47:09] (step=0010914) Train Loss: 0.4486, Train Steps/Sec: 0.07, Epoch: 2.135394247701037, LR: 0.0003 +[2026-02-28 07:47:23] (step=0010915) Train Loss: 0.4615, Train Steps/Sec: 0.07, Epoch: 2.135589904128351, LR: 0.0003 +[2026-02-28 07:47:36] (step=0010916) Train Loss: 0.4432, Train Steps/Sec: 0.07, Epoch: 2.1357855605556644, LR: 0.0003 +[2026-02-28 07:47:50] (step=0010917) Train Loss: 0.4602, Train Steps/Sec: 0.07, Epoch: 2.135981216982978, LR: 0.0003 +[2026-02-28 07:48:03] (step=0010918) Train Loss: 0.4571, Train Steps/Sec: 0.07, Epoch: 2.1361768734102915, LR: 0.0003 +[2026-02-28 07:48:17] (step=0010919) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 2.136372529837605, LR: 0.0003 +[2026-02-28 07:48:31] (step=0010920) Train Loss: 0.4524, Train Steps/Sec: 0.07, Epoch: 2.1365681862649186, LR: 0.0003 +[2026-02-28 07:48:45] (step=0010921) Train Loss: 0.4580, Train Steps/Sec: 0.07, Epoch: 2.1367638426922326, LR: 0.0003 +[2026-02-28 07:48:58] (step=0010922) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.136959499119546, LR: 0.0003 +[2026-02-28 07:49:12] (step=0010923) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 2.1371551555468598, LR: 0.0003 +[2026-02-28 07:49:26] (step=0010924) Train Loss: 0.4681, Train Steps/Sec: 0.07, Epoch: 2.1373508119741733, LR: 0.0003 +[2026-02-28 07:49:40] (step=0010925) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 2.137546468401487, LR: 0.0003 +[2026-02-28 07:49:53] (step=0010926) Train Loss: 0.4469, Train Steps/Sec: 0.07, Epoch: 2.1377421248288004, LR: 0.0003 +[2026-02-28 07:50:07] (step=0010927) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 2.1379377812561144, LR: 0.0003 +[2026-02-28 07:50:21] (step=0010928) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 2.138133437683428, LR: 0.0003 +[2026-02-28 07:50:34] (step=0010929) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 2.1383290941107416, LR: 0.0003 +[2026-02-28 07:50:48] (step=0010930) Train Loss: 0.4439, Train Steps/Sec: 0.07, Epoch: 2.138524750538055, LR: 0.0003 +[2026-02-28 07:51:02] (step=0010931) Train Loss: 0.4471, Train Steps/Sec: 0.07, Epoch: 2.1387204069653687, LR: 0.0003 +[2026-02-28 07:51:15] (step=0010932) Train Loss: 0.4490, Train Steps/Sec: 0.07, Epoch: 2.1389160633926823, LR: 0.0003 +[2026-02-28 07:51:29] (step=0010933) Train Loss: 0.4400, Train Steps/Sec: 0.07, Epoch: 2.1391117198199963, LR: 0.0003 +[2026-02-28 07:51:43] (step=0010934) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.13930737624731, LR: 0.0003 +[2026-02-28 07:51:57] (step=0010935) Train Loss: 0.4607, Train Steps/Sec: 0.07, Epoch: 2.1395030326746234, LR: 0.0003 +[2026-02-28 07:52:10] (step=0010936) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 2.139698689101937, LR: 0.0003 +[2026-02-28 07:52:24] (step=0010937) Train Loss: 0.4516, Train Steps/Sec: 0.07, Epoch: 2.1398943455292505, LR: 0.0003 +[2026-02-28 07:52:38] (step=0010938) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 2.140090001956564, LR: 0.0003 +[2026-02-28 07:52:52] (step=0010939) Train Loss: 0.4403, Train Steps/Sec: 0.07, Epoch: 2.140285658383878, LR: 0.0003 +[2026-02-28 07:53:05] (step=0010940) Train Loss: 0.4625, Train Steps/Sec: 0.07, Epoch: 2.1404813148111916, LR: 0.0003 +[2026-02-28 07:53:19] (step=0010941) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 2.140676971238505, LR: 0.0003 +[2026-02-28 07:53:33] (step=0010942) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 2.1408726276658188, LR: 0.0003 +[2026-02-28 07:53:47] (step=0010943) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 2.1410682840931323, LR: 0.0003 +[2026-02-28 07:54:00] (step=0010944) Train Loss: 0.4465, Train Steps/Sec: 0.07, Epoch: 2.141263940520446, LR: 0.0003 +[2026-02-28 07:54:14] (step=0010945) Train Loss: 0.4372, Train Steps/Sec: 0.07, Epoch: 2.14145959694776, LR: 0.0003 +[2026-02-28 07:54:28] (step=0010946) Train Loss: 0.4426, Train Steps/Sec: 0.07, Epoch: 2.1416552533750735, LR: 0.0003 +[2026-02-28 07:54:41] (step=0010947) Train Loss: 0.4451, Train Steps/Sec: 0.07, Epoch: 2.141850909802387, LR: 0.0003 +[2026-02-28 07:54:55] (step=0010948) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 2.1420465662297006, LR: 0.0003 +[2026-02-28 07:55:09] (step=0010949) Train Loss: 0.4621, Train Steps/Sec: 0.07, Epoch: 2.142242222657014, LR: 0.0003 +[2026-02-28 07:55:22] (step=0010950) Train Loss: 0.4599, Train Steps/Sec: 0.07, Epoch: 2.1424378790843277, LR: 0.0003 +[2026-02-28 07:55:36] (step=0010951) Train Loss: 0.4442, Train Steps/Sec: 0.07, Epoch: 2.1426335355116417, LR: 0.0003 +[2026-02-28 07:55:50] (step=0010952) Train Loss: 0.4414, Train Steps/Sec: 0.07, Epoch: 2.1428291919389553, LR: 0.0003 +[2026-02-28 07:56:04] (step=0010953) Train Loss: 0.4597, Train Steps/Sec: 0.07, Epoch: 2.143024848366269, LR: 0.0003 +[2026-02-28 07:56:17] (step=0010954) Train Loss: 0.4602, Train Steps/Sec: 0.07, Epoch: 2.1432205047935824, LR: 0.0003 +[2026-02-28 07:56:31] (step=0010955) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 2.143416161220896, LR: 0.0003 +[2026-02-28 07:56:45] (step=0010956) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 2.14361181764821, LR: 0.0003 +[2026-02-28 07:56:59] (step=0010957) Train Loss: 0.4638, Train Steps/Sec: 0.07, Epoch: 2.1438074740755235, LR: 0.0003 +[2026-02-28 07:57:12] (step=0010958) Train Loss: 0.4602, Train Steps/Sec: 0.07, Epoch: 2.144003130502837, LR: 0.0003 +[2026-02-28 07:57:26] (step=0010959) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 2.1441987869301506, LR: 0.0003 +[2026-02-28 07:57:40] (step=0010960) Train Loss: 0.4473, Train Steps/Sec: 0.07, Epoch: 2.144394443357464, LR: 0.0003 +[2026-02-28 07:57:53] (step=0010961) Train Loss: 0.4594, Train Steps/Sec: 0.07, Epoch: 2.1445900997847778, LR: 0.0003 +[2026-02-28 07:58:07] (step=0010962) Train Loss: 0.4405, Train Steps/Sec: 0.07, Epoch: 2.1447857562120918, LR: 0.0003 +[2026-02-28 07:58:21] (step=0010963) Train Loss: 0.4626, Train Steps/Sec: 0.07, Epoch: 2.1449814126394053, LR: 0.0003 +[2026-02-28 07:58:34] (step=0010964) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 2.145177069066719, LR: 0.0003 +[2026-02-28 07:58:48] (step=0010965) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.1453727254940325, LR: 0.0003 +[2026-02-28 07:59:02] (step=0010966) Train Loss: 0.4448, Train Steps/Sec: 0.07, Epoch: 2.145568381921346, LR: 0.0003 +[2026-02-28 07:59:16] (step=0010967) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 2.1457640383486596, LR: 0.0003 +[2026-02-28 07:59:30] (step=0010968) Train Loss: 0.4606, Train Steps/Sec: 0.07, Epoch: 2.1459596947759736, LR: 0.0003 +[2026-02-28 07:59:43] (step=0010969) Train Loss: 0.4469, Train Steps/Sec: 0.07, Epoch: 2.146155351203287, LR: 0.0003 +[2026-02-28 07:59:57] (step=0010970) Train Loss: 0.4484, Train Steps/Sec: 0.07, Epoch: 2.1463510076306007, LR: 0.0003 +[2026-02-28 08:00:11] (step=0010971) Train Loss: 0.4534, Train Steps/Sec: 0.07, Epoch: 2.1465466640579143, LR: 0.0003 +[2026-02-28 08:00:25] (step=0010972) Train Loss: 0.4496, Train Steps/Sec: 0.07, Epoch: 2.146742320485228, LR: 0.0003 +[2026-02-28 08:00:38] (step=0010973) Train Loss: 0.4468, Train Steps/Sec: 0.07, Epoch: 2.1469379769125414, LR: 0.0003 +[2026-02-28 08:00:52] (step=0010974) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.1471336333398554, LR: 0.0003 +[2026-02-28 08:01:06] (step=0010975) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 2.147329289767169, LR: 0.0003 +[2026-02-28 08:01:19] (step=0010976) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 2.1475249461944825, LR: 0.0003 +[2026-02-28 08:01:33] (step=0010977) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 2.147720602621796, LR: 0.0003 +[2026-02-28 08:01:47] (step=0010978) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 2.1479162590491097, LR: 0.0003 +[2026-02-28 08:02:01] (step=0010979) Train Loss: 0.4436, Train Steps/Sec: 0.07, Epoch: 2.148111915476423, LR: 0.0003 +[2026-02-28 08:02:14] (step=0010980) Train Loss: 0.4586, Train Steps/Sec: 0.07, Epoch: 2.1483075719037372, LR: 0.0003 +[2026-02-28 08:02:28] (step=0010981) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 2.148503228331051, LR: 0.0003 +[2026-02-28 08:02:42] (step=0010982) Train Loss: 0.4587, Train Steps/Sec: 0.07, Epoch: 2.1486988847583643, LR: 0.0003 +[2026-02-28 08:02:56] (step=0010983) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 2.148894541185678, LR: 0.0003 +[2026-02-28 08:03:09] (step=0010984) Train Loss: 0.4571, Train Steps/Sec: 0.07, Epoch: 2.1490901976129915, LR: 0.0003 +[2026-02-28 08:03:23] (step=0010985) Train Loss: 0.4563, Train Steps/Sec: 0.07, Epoch: 2.149285854040305, LR: 0.0003 +[2026-02-28 08:03:37] (step=0010986) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 2.149481510467619, LR: 0.0003 +[2026-02-28 08:03:51] (step=0010987) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.1496771668949326, LR: 0.0003 +[2026-02-28 08:04:04] (step=0010988) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 2.149872823322246, LR: 0.0003 +[2026-02-28 08:04:18] (step=0010989) Train Loss: 0.4532, Train Steps/Sec: 0.07, Epoch: 2.1500684797495597, LR: 0.0003 +[2026-02-28 08:04:32] (step=0010990) Train Loss: 0.4556, Train Steps/Sec: 0.07, Epoch: 2.1502641361768733, LR: 0.0003 +[2026-02-28 08:04:45] (step=0010991) Train Loss: 0.4351, Train Steps/Sec: 0.07, Epoch: 2.150459792604187, LR: 0.0003 +[2026-02-28 08:04:59] (step=0010992) Train Loss: 0.4469, Train Steps/Sec: 0.07, Epoch: 2.150655449031501, LR: 0.0003 +[2026-02-28 08:05:13] (step=0010993) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 2.1508511054588144, LR: 0.0003 +[2026-02-28 08:05:27] (step=0010994) Train Loss: 0.4490, Train Steps/Sec: 0.07, Epoch: 2.151046761886128, LR: 0.0003 +[2026-02-28 08:05:41] (step=0010995) Train Loss: 0.4421, Train Steps/Sec: 0.07, Epoch: 2.1512424183134415, LR: 0.0003 +[2026-02-28 08:05:54] (step=0010996) Train Loss: 0.4595, Train Steps/Sec: 0.07, Epoch: 2.151438074740755, LR: 0.0003 +[2026-02-28 08:06:08] (step=0010997) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 2.1516337311680687, LR: 0.0003 +[2026-02-28 08:06:22] (step=0010998) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 2.1518293875953827, LR: 0.0003 +[2026-02-28 08:06:35] (step=0010999) Train Loss: 0.4555, Train Steps/Sec: 0.07, Epoch: 2.1520250440226962, LR: 0.0003 +[2026-02-28 08:06:49] (step=0011000) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.15222070045001, LR: 0.0003 +[2026-02-28 08:06:49] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0011000/ +[2026-02-28 08:07:03] (step=0011001) Train Loss: 0.4564, Train Steps/Sec: 0.07, Epoch: 2.1524163568773234, LR: 0.0003 +[2026-02-28 08:07:17] (step=0011002) Train Loss: 0.4463, Train Steps/Sec: 0.07, Epoch: 2.152612013304637, LR: 0.0003 +[2026-02-28 08:07:31] (step=0011003) Train Loss: 0.4596, Train Steps/Sec: 0.07, Epoch: 2.1528076697319505, LR: 0.0003 +[2026-02-28 08:07:44] (step=0011004) Train Loss: 0.4358, Train Steps/Sec: 0.07, Epoch: 2.1530033261592645, LR: 0.0003 +[2026-02-28 08:07:58] (step=0011005) Train Loss: 0.4578, Train Steps/Sec: 0.07, Epoch: 2.153198982586578, LR: 0.0003 +[2026-02-28 08:08:12] (step=0011006) Train Loss: 0.4574, Train Steps/Sec: 0.07, Epoch: 2.1533946390138916, LR: 0.0003 +[2026-02-28 08:08:25] (step=0011007) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 2.153590295441205, LR: 0.0003 +[2026-02-28 08:08:39] (step=0011008) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 2.1537859518685187, LR: 0.0003 +[2026-02-28 08:08:53] (step=0011009) Train Loss: 0.4572, Train Steps/Sec: 0.07, Epoch: 2.1539816082958327, LR: 0.0003 +[2026-02-28 08:09:07] (step=0011010) Train Loss: 0.4659, Train Steps/Sec: 0.07, Epoch: 2.1541772647231463, LR: 0.0003 +[2026-02-28 08:09:20] (step=0011011) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.15437292115046, LR: 0.0003 +[2026-02-28 08:09:34] (step=0011012) Train Loss: 0.4633, Train Steps/Sec: 0.07, Epoch: 2.1545685775777734, LR: 0.0003 +[2026-02-28 08:09:48] (step=0011013) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 2.154764234005087, LR: 0.0003 +[2026-02-28 08:10:02] (step=0011014) Train Loss: 0.4437, Train Steps/Sec: 0.07, Epoch: 2.1549598904324005, LR: 0.0003 +[2026-02-28 08:10:16] (step=0011015) Train Loss: 0.4492, Train Steps/Sec: 0.07, Epoch: 2.1551555468597146, LR: 0.0003 +[2026-02-28 08:10:29] (step=0011016) Train Loss: 0.4390, Train Steps/Sec: 0.07, Epoch: 2.155351203287028, LR: 0.0003 +[2026-02-28 08:10:43] (step=0011017) Train Loss: 0.4584, Train Steps/Sec: 0.07, Epoch: 2.1555468597143417, LR: 0.0003 +[2026-02-28 08:10:57] (step=0011018) Train Loss: 0.4473, Train Steps/Sec: 0.07, Epoch: 2.1557425161416552, LR: 0.0003 +[2026-02-28 08:11:11] (step=0011019) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 2.155938172568969, LR: 0.0003 +[2026-02-28 08:11:24] (step=0011020) Train Loss: 0.4617, Train Steps/Sec: 0.07, Epoch: 2.1561338289962824, LR: 0.0003 +[2026-02-28 08:11:38] (step=0011021) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.1563294854235964, LR: 0.0003 +[2026-02-28 08:11:52] (step=0011022) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 2.15652514185091, LR: 0.0003 +[2026-02-28 08:12:06] (step=0011023) Train Loss: 0.4592, Train Steps/Sec: 0.07, Epoch: 2.1567207982782235, LR: 0.0003 +[2026-02-28 08:12:19] (step=0011024) Train Loss: 0.4318, Train Steps/Sec: 0.07, Epoch: 2.156916454705537, LR: 0.0003 +[2026-02-28 08:12:33] (step=0011025) Train Loss: 0.4475, Train Steps/Sec: 0.07, Epoch: 2.1571121111328506, LR: 0.0003 +[2026-02-28 08:12:47] (step=0011026) Train Loss: 0.4556, Train Steps/Sec: 0.07, Epoch: 2.157307767560164, LR: 0.0003 +[2026-02-28 08:13:00] (step=0011027) Train Loss: 0.4430, Train Steps/Sec: 0.07, Epoch: 2.157503423987478, LR: 0.0003 +[2026-02-28 08:13:14] (step=0011028) Train Loss: 0.4673, Train Steps/Sec: 0.07, Epoch: 2.1576990804147917, LR: 0.0003 +[2026-02-28 08:13:28] (step=0011029) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.1578947368421053, LR: 0.0003 +[2026-02-28 08:13:42] (step=0011030) Train Loss: 0.4554, Train Steps/Sec: 0.07, Epoch: 2.158090393269419, LR: 0.0003 +[2026-02-28 08:13:55] (step=0011031) Train Loss: 0.4463, Train Steps/Sec: 0.07, Epoch: 2.1582860496967324, LR: 0.0003 +[2026-02-28 08:14:09] (step=0011032) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 2.158481706124046, LR: 0.0003 +[2026-02-28 08:14:23] (step=0011033) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.15867736255136, LR: 0.0003 +[2026-02-28 08:14:36] (step=0011034) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 2.1588730189786736, LR: 0.0003 +[2026-02-28 08:14:50] (step=0011035) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.159068675405987, LR: 0.0003 +[2026-02-28 08:15:04] (step=0011036) Train Loss: 0.4617, Train Steps/Sec: 0.07, Epoch: 2.1592643318333007, LR: 0.0003 +[2026-02-28 08:15:18] (step=0011037) Train Loss: 0.4624, Train Steps/Sec: 0.07, Epoch: 2.1594599882606142, LR: 0.0003 +[2026-02-28 08:15:31] (step=0011038) Train Loss: 0.4419, Train Steps/Sec: 0.07, Epoch: 2.159655644687928, LR: 0.0003 +[2026-02-28 08:15:45] (step=0011039) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 2.159851301115242, LR: 0.0003 +[2026-02-28 08:15:59] (step=0011040) Train Loss: 0.4524, Train Steps/Sec: 0.07, Epoch: 2.1600469575425554, LR: 0.0003 +[2026-02-28 08:16:12] (step=0011041) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 2.160242613969869, LR: 0.0003 +[2026-02-28 08:16:26] (step=0011042) Train Loss: 0.4488, Train Steps/Sec: 0.07, Epoch: 2.1604382703971825, LR: 0.0003 +[2026-02-28 08:16:40] (step=0011043) Train Loss: 0.4495, Train Steps/Sec: 0.07, Epoch: 2.160633926824496, LR: 0.0003 +[2026-02-28 08:16:54] (step=0011044) Train Loss: 0.4660, Train Steps/Sec: 0.07, Epoch: 2.1608295832518096, LR: 0.0003 +[2026-02-28 08:17:08] (step=0011045) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 2.1610252396791236, LR: 0.0003 +[2026-02-28 08:17:21] (step=0011046) Train Loss: 0.4444, Train Steps/Sec: 0.07, Epoch: 2.161220896106437, LR: 0.0003 +[2026-02-28 08:17:35] (step=0011047) Train Loss: 0.4615, Train Steps/Sec: 0.07, Epoch: 2.1614165525337508, LR: 0.0003 +[2026-02-28 08:17:49] (step=0011048) Train Loss: 0.4507, Train Steps/Sec: 0.07, Epoch: 2.1616122089610643, LR: 0.0003 +[2026-02-28 08:18:02] (step=0011049) Train Loss: 0.4433, Train Steps/Sec: 0.07, Epoch: 2.161807865388378, LR: 0.0003 +[2026-02-28 08:18:16] (step=0011050) Train Loss: 0.4628, Train Steps/Sec: 0.07, Epoch: 2.1620035218156914, LR: 0.0003 +[2026-02-28 08:18:30] (step=0011051) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 2.1621991782430054, LR: 0.0003 +[2026-02-28 08:18:43] (step=0011052) Train Loss: 0.4614, Train Steps/Sec: 0.07, Epoch: 2.162394834670319, LR: 0.0003 +[2026-02-28 08:18:57] (step=0011053) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.1625904910976326, LR: 0.0003 +[2026-02-28 08:19:11] (step=0011054) Train Loss: 0.4605, Train Steps/Sec: 0.07, Epoch: 2.162786147524946, LR: 0.0003 +[2026-02-28 08:19:24] (step=0011055) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 2.1629818039522597, LR: 0.0003 +[2026-02-28 08:19:38] (step=0011056) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 2.1631774603795733, LR: 0.0003 +[2026-02-28 08:19:52] (step=0011057) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 2.1633731168068873, LR: 0.0003 +[2026-02-28 08:20:06] (step=0011058) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 2.163568773234201, LR: 0.0003 +[2026-02-28 08:20:19] (step=0011059) Train Loss: 0.4415, Train Steps/Sec: 0.07, Epoch: 2.1637644296615144, LR: 0.0003 +[2026-02-28 08:20:33] (step=0011060) Train Loss: 0.4488, Train Steps/Sec: 0.07, Epoch: 2.163960086088828, LR: 0.0003 +[2026-02-28 08:20:47] (step=0011061) Train Loss: 0.4507, Train Steps/Sec: 0.07, Epoch: 2.1641557425161415, LR: 0.0003 +[2026-02-28 08:21:01] (step=0011062) Train Loss: 0.4381, Train Steps/Sec: 0.07, Epoch: 2.164351398943455, LR: 0.0003 +[2026-02-28 08:21:14] (step=0011063) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.164547055370769, LR: 0.0003 +[2026-02-28 08:21:28] (step=0011064) Train Loss: 0.4617, Train Steps/Sec: 0.07, Epoch: 2.1647427117980826, LR: 0.0003 +[2026-02-28 08:21:42] (step=0011065) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 2.164938368225396, LR: 0.0003 +[2026-02-28 08:21:56] (step=0011066) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 2.1651340246527098, LR: 0.0003 +[2026-02-28 08:22:09] (step=0011067) Train Loss: 0.4496, Train Steps/Sec: 0.07, Epoch: 2.1653296810800233, LR: 0.0003 +[2026-02-28 08:22:23] (step=0011068) Train Loss: 0.4580, Train Steps/Sec: 0.07, Epoch: 2.1655253375073373, LR: 0.0003 +[2026-02-28 08:22:37] (step=0011069) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 2.165720993934651, LR: 0.0003 +[2026-02-28 08:22:50] (step=0011070) Train Loss: 0.4568, Train Steps/Sec: 0.07, Epoch: 2.1659166503619645, LR: 0.0003 +[2026-02-28 08:23:04] (step=0011071) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.166112306789278, LR: 0.0003 +[2026-02-28 08:23:18] (step=0011072) Train Loss: 0.4480, Train Steps/Sec: 0.07, Epoch: 2.1663079632165916, LR: 0.0003 +[2026-02-28 08:23:31] (step=0011073) Train Loss: 0.4496, Train Steps/Sec: 0.07, Epoch: 2.166503619643905, LR: 0.0003 +[2026-02-28 08:23:45] (step=0011074) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.166699276071219, LR: 0.0003 +[2026-02-28 08:23:59] (step=0011075) Train Loss: 0.4591, Train Steps/Sec: 0.07, Epoch: 2.1668949324985327, LR: 0.0003 +[2026-02-28 08:24:13] (step=0011076) Train Loss: 0.4442, Train Steps/Sec: 0.07, Epoch: 2.1670905889258463, LR: 0.0003 +[2026-02-28 08:24:27] (step=0011077) Train Loss: 0.4420, Train Steps/Sec: 0.07, Epoch: 2.16728624535316, LR: 0.0003 +[2026-02-28 08:24:40] (step=0011078) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.1674819017804734, LR: 0.0003 +[2026-02-28 08:24:54] (step=0011079) Train Loss: 0.4642, Train Steps/Sec: 0.07, Epoch: 2.167677558207787, LR: 0.0003 +[2026-02-28 08:25:08] (step=0011080) Train Loss: 0.4611, Train Steps/Sec: 0.07, Epoch: 2.167873214635101, LR: 0.0003 +[2026-02-28 08:25:21] (step=0011081) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 2.1680688710624145, LR: 0.0003 +[2026-02-28 08:25:35] (step=0011082) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 2.168264527489728, LR: 0.0003 +[2026-02-28 08:25:49] (step=0011083) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 2.1684601839170417, LR: 0.0003 +[2026-02-28 08:26:02] (step=0011084) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 2.168655840344355, LR: 0.0003 +[2026-02-28 08:26:16] (step=0011085) Train Loss: 0.4671, Train Steps/Sec: 0.07, Epoch: 2.1688514967716688, LR: 0.0003 +[2026-02-28 08:26:30] (step=0011086) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 2.169047153198983, LR: 0.0003 +[2026-02-28 08:26:43] (step=0011087) Train Loss: 0.4457, Train Steps/Sec: 0.07, Epoch: 2.1692428096262963, LR: 0.0003 +[2026-02-28 08:26:57] (step=0011088) Train Loss: 0.4628, Train Steps/Sec: 0.07, Epoch: 2.16943846605361, LR: 0.0003 +[2026-02-28 08:27:11] (step=0011089) Train Loss: 0.4458, Train Steps/Sec: 0.07, Epoch: 2.1696341224809235, LR: 0.0003 +[2026-02-28 08:27:25] (step=0011090) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 2.169829778908237, LR: 0.0003 +[2026-02-28 08:27:38] (step=0011091) Train Loss: 0.4587, Train Steps/Sec: 0.07, Epoch: 2.1700254353355506, LR: 0.0003 +[2026-02-28 08:27:52] (step=0011092) Train Loss: 0.4588, Train Steps/Sec: 0.07, Epoch: 2.1702210917628646, LR: 0.0003 +[2026-02-28 08:28:06] (step=0011093) Train Loss: 0.4468, Train Steps/Sec: 0.07, Epoch: 2.170416748190178, LR: 0.0003 +[2026-02-28 08:28:20] (step=0011094) Train Loss: 0.4616, Train Steps/Sec: 0.07, Epoch: 2.1706124046174917, LR: 0.0003 +[2026-02-28 08:28:33] (step=0011095) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 2.1708080610448053, LR: 0.0003 +[2026-02-28 08:28:47] (step=0011096) Train Loss: 0.4570, Train Steps/Sec: 0.07, Epoch: 2.171003717472119, LR: 0.0003 +[2026-02-28 08:29:01] (step=0011097) Train Loss: 0.4591, Train Steps/Sec: 0.07, Epoch: 2.1711993738994324, LR: 0.0003 +[2026-02-28 08:29:14] (step=0011098) Train Loss: 0.4463, Train Steps/Sec: 0.07, Epoch: 2.1713950303267464, LR: 0.0003 +[2026-02-28 08:29:28] (step=0011099) Train Loss: 0.4573, Train Steps/Sec: 0.07, Epoch: 2.17159068675406, LR: 0.0003 +[2026-02-28 08:29:42] (step=0011100) Train Loss: 0.4573, Train Steps/Sec: 0.07, Epoch: 2.1717863431813735, LR: 0.0003 +[2026-02-28 08:29:55] (step=0011101) Train Loss: 0.4375, Train Steps/Sec: 0.07, Epoch: 2.171981999608687, LR: 0.0003 +[2026-02-28 08:30:09] (step=0011102) Train Loss: 0.4476, Train Steps/Sec: 0.07, Epoch: 2.1721776560360007, LR: 0.0003 +[2026-02-28 08:30:23] (step=0011103) Train Loss: 0.4516, Train Steps/Sec: 0.07, Epoch: 2.172373312463314, LR: 0.0003 +[2026-02-28 08:30:37] (step=0011104) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 2.1725689688906282, LR: 0.0003 +[2026-02-28 08:30:50] (step=0011105) Train Loss: 0.4431, Train Steps/Sec: 0.07, Epoch: 2.172764625317942, LR: 0.0003 +[2026-02-28 08:31:04] (step=0011106) Train Loss: 0.4628, Train Steps/Sec: 0.07, Epoch: 2.1729602817452554, LR: 0.0003 +[2026-02-28 08:31:18] (step=0011107) Train Loss: 0.4516, Train Steps/Sec: 0.07, Epoch: 2.173155938172569, LR: 0.0003 +[2026-02-28 08:31:32] (step=0011108) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 2.1733515945998825, LR: 0.0003 +[2026-02-28 08:31:45] (step=0011109) Train Loss: 0.4444, Train Steps/Sec: 0.07, Epoch: 2.173547251027196, LR: 0.0003 +[2026-02-28 08:31:59] (step=0011110) Train Loss: 0.4573, Train Steps/Sec: 0.07, Epoch: 2.17374290745451, LR: 0.0003 +[2026-02-28 08:32:13] (step=0011111) Train Loss: 0.4466, Train Steps/Sec: 0.07, Epoch: 2.1739385638818236, LR: 0.0003 +[2026-02-28 08:32:26] (step=0011112) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 2.174134220309137, LR: 0.0003 +[2026-02-28 08:32:40] (step=0011113) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 2.1743298767364507, LR: 0.0003 +[2026-02-28 08:32:54] (step=0011114) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.1745255331637643, LR: 0.0003 +[2026-02-28 08:33:07] (step=0011115) Train Loss: 0.4649, Train Steps/Sec: 0.07, Epoch: 2.174721189591078, LR: 0.0003 +[2026-02-28 08:33:21] (step=0011116) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 2.174916846018392, LR: 0.0003 +[2026-02-28 08:33:35] (step=0011117) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 2.1751125024457054, LR: 0.0003 +[2026-02-28 08:33:49] (step=0011118) Train Loss: 0.4430, Train Steps/Sec: 0.07, Epoch: 2.175308158873019, LR: 0.0003 +[2026-02-28 08:34:02] (step=0011119) Train Loss: 0.4571, Train Steps/Sec: 0.07, Epoch: 2.1755038153003325, LR: 0.0003 +[2026-02-28 08:34:16] (step=0011120) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.175699471727646, LR: 0.0003 +[2026-02-28 08:34:30] (step=0011121) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 2.17589512815496, LR: 0.0003 +[2026-02-28 08:34:44] (step=0011122) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 2.1760907845822737, LR: 0.0003 +[2026-02-28 08:34:57] (step=0011123) Train Loss: 0.4387, Train Steps/Sec: 0.07, Epoch: 2.1762864410095872, LR: 0.0003 +[2026-02-28 08:35:11] (step=0011124) Train Loss: 0.4424, Train Steps/Sec: 0.07, Epoch: 2.176482097436901, LR: 0.0003 +[2026-02-28 08:35:25] (step=0011125) Train Loss: 0.4504, Train Steps/Sec: 0.07, Epoch: 2.1766777538642144, LR: 0.0003 +[2026-02-28 08:35:38] (step=0011126) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.176873410291528, LR: 0.0003 +[2026-02-28 08:35:52] (step=0011127) Train Loss: 0.4609, Train Steps/Sec: 0.07, Epoch: 2.177069066718842, LR: 0.0003 +[2026-02-28 08:36:06] (step=0011128) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 2.1772647231461555, LR: 0.0003 +[2026-02-28 08:36:20] (step=0011129) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 2.177460379573469, LR: 0.0003 +[2026-02-28 08:36:33] (step=0011130) Train Loss: 0.4557, Train Steps/Sec: 0.07, Epoch: 2.1776560360007826, LR: 0.0003 +[2026-02-28 08:36:47] (step=0011131) Train Loss: 0.4524, Train Steps/Sec: 0.07, Epoch: 2.177851692428096, LR: 0.0003 +[2026-02-28 08:37:01] (step=0011132) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.1780473488554097, LR: 0.0003 +[2026-02-28 08:37:15] (step=0011133) Train Loss: 0.4431, Train Steps/Sec: 0.07, Epoch: 2.1782430052827237, LR: 0.0003 +[2026-02-28 08:37:28] (step=0011134) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 2.1784386617100373, LR: 0.0003 +[2026-02-28 08:37:42] (step=0011135) Train Loss: 0.4472, Train Steps/Sec: 0.07, Epoch: 2.178634318137351, LR: 0.0003 +[2026-02-28 08:37:56] (step=0011136) Train Loss: 0.4470, Train Steps/Sec: 0.07, Epoch: 2.1788299745646644, LR: 0.0003 +[2026-02-28 08:38:09] (step=0011137) Train Loss: 0.4391, Train Steps/Sec: 0.07, Epoch: 2.179025630991978, LR: 0.0003 +[2026-02-28 08:38:23] (step=0011138) Train Loss: 0.4589, Train Steps/Sec: 0.07, Epoch: 2.1792212874192916, LR: 0.0003 +[2026-02-28 08:38:37] (step=0011139) Train Loss: 0.4556, Train Steps/Sec: 0.07, Epoch: 2.1794169438466056, LR: 0.0003 +[2026-02-28 08:38:50] (step=0011140) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.179612600273919, LR: 0.0003 +[2026-02-28 08:39:04] (step=0011141) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 2.1798082567012327, LR: 0.0003 +[2026-02-28 08:39:18] (step=0011142) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.1800039131285462, LR: 0.0003 +[2026-02-28 08:39:31] (step=0011143) Train Loss: 0.4593, Train Steps/Sec: 0.07, Epoch: 2.18019956955586, LR: 0.0003 +[2026-02-28 08:39:45] (step=0011144) Train Loss: 0.4437, Train Steps/Sec: 0.07, Epoch: 2.1803952259831734, LR: 0.0003 +[2026-02-28 08:39:59] (step=0011145) Train Loss: 0.4443, Train Steps/Sec: 0.07, Epoch: 2.1805908824104874, LR: 0.0003 +[2026-02-28 08:40:12] (step=0011146) Train Loss: 0.4473, Train Steps/Sec: 0.07, Epoch: 2.180786538837801, LR: 0.0003 +[2026-02-28 08:40:26] (step=0011147) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 2.1809821952651145, LR: 0.0003 +[2026-02-28 08:40:40] (step=0011148) Train Loss: 0.4634, Train Steps/Sec: 0.07, Epoch: 2.181177851692428, LR: 0.0003 +[2026-02-28 08:40:53] (step=0011149) Train Loss: 0.4568, Train Steps/Sec: 0.07, Epoch: 2.1813735081197416, LR: 0.0003 +[2026-02-28 08:41:07] (step=0011150) Train Loss: 0.4544, Train Steps/Sec: 0.07, Epoch: 2.181569164547055, LR: 0.0003 +[2026-02-28 08:41:21] (step=0011151) Train Loss: 0.4492, Train Steps/Sec: 0.07, Epoch: 2.181764820974369, LR: 0.0003 +[2026-02-28 08:41:34] (step=0011152) Train Loss: 0.4612, Train Steps/Sec: 0.07, Epoch: 2.1819604774016828, LR: 0.0003 +[2026-02-28 08:41:48] (step=0011153) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.1821561338289963, LR: 0.0003 +[2026-02-28 08:42:02] (step=0011154) Train Loss: 0.4528, Train Steps/Sec: 0.07, Epoch: 2.18235179025631, LR: 0.0003 +[2026-02-28 08:42:15] (step=0011155) Train Loss: 0.4581, Train Steps/Sec: 0.07, Epoch: 2.1825474466836234, LR: 0.0003 +[2026-02-28 08:42:29] (step=0011156) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 2.182743103110937, LR: 0.0003 +[2026-02-28 08:42:43] (step=0011157) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.182938759538251, LR: 0.0003 +[2026-02-28 08:42:56] (step=0011158) Train Loss: 0.4597, Train Steps/Sec: 0.07, Epoch: 2.1831344159655646, LR: 0.0003 +[2026-02-28 08:43:10] (step=0011159) Train Loss: 0.4560, Train Steps/Sec: 0.07, Epoch: 2.183330072392878, LR: 0.0003 +[2026-02-28 08:43:24] (step=0011160) Train Loss: 0.4390, Train Steps/Sec: 0.07, Epoch: 2.1835257288201917, LR: 0.0003 +[2026-02-28 08:43:37] (step=0011161) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 2.1837213852475053, LR: 0.0003 +[2026-02-28 08:43:51] (step=0011162) Train Loss: 0.4655, Train Steps/Sec: 0.07, Epoch: 2.183917041674819, LR: 0.0003 +[2026-02-28 08:44:05] (step=0011163) Train Loss: 0.4568, Train Steps/Sec: 0.07, Epoch: 2.184112698102133, LR: 0.0003 +[2026-02-28 08:44:18] (step=0011164) Train Loss: 0.4399, Train Steps/Sec: 0.07, Epoch: 2.1843083545294464, LR: 0.0003 +[2026-02-28 08:44:32] (step=0011165) Train Loss: 0.4373, Train Steps/Sec: 0.07, Epoch: 2.18450401095676, LR: 0.0003 +[2026-02-28 08:44:45] (step=0011166) Train Loss: 0.4438, Train Steps/Sec: 0.07, Epoch: 2.1846996673840735, LR: 0.0003 +[2026-02-28 08:44:59] (step=0011167) Train Loss: 0.4481, Train Steps/Sec: 0.07, Epoch: 2.184895323811387, LR: 0.0003 +[2026-02-28 08:45:13] (step=0011168) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 2.1850909802387006, LR: 0.0003 +[2026-02-28 08:45:26] (step=0011169) Train Loss: 0.4498, Train Steps/Sec: 0.07, Epoch: 2.1852866366660146, LR: 0.0003 +[2026-02-28 08:45:40] (step=0011170) Train Loss: 0.4427, Train Steps/Sec: 0.07, Epoch: 2.185482293093328, LR: 0.0003 +[2026-02-28 08:45:53] (step=0011171) Train Loss: 0.4440, Train Steps/Sec: 0.07, Epoch: 2.1856779495206418, LR: 0.0003 +[2026-02-28 08:46:07] (step=0011172) Train Loss: 0.4468, Train Steps/Sec: 0.07, Epoch: 2.1858736059479553, LR: 0.0003 +[2026-02-28 08:46:21] (step=0011173) Train Loss: 0.4593, Train Steps/Sec: 0.07, Epoch: 2.186069262375269, LR: 0.0003 +[2026-02-28 08:46:34] (step=0011174) Train Loss: 0.4434, Train Steps/Sec: 0.07, Epoch: 2.1862649188025824, LR: 0.0003 +[2026-02-28 08:46:48] (step=0011175) Train Loss: 0.4406, Train Steps/Sec: 0.07, Epoch: 2.1864605752298965, LR: 0.0003 +[2026-02-28 08:47:02] (step=0011176) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 2.18665623165721, LR: 0.0003 +[2026-02-28 08:47:15] (step=0011177) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 2.1868518880845236, LR: 0.0003 +[2026-02-28 08:47:29] (step=0011178) Train Loss: 0.4629, Train Steps/Sec: 0.07, Epoch: 2.187047544511837, LR: 0.0003 +[2026-02-28 08:47:43] (step=0011179) Train Loss: 0.4454, Train Steps/Sec: 0.07, Epoch: 2.1872432009391507, LR: 0.0003 +[2026-02-28 08:47:56] (step=0011180) Train Loss: 0.4516, Train Steps/Sec: 0.07, Epoch: 2.1874388573664647, LR: 0.0003 +[2026-02-28 08:48:10] (step=0011181) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.1876345137937783, LR: 0.0003 +[2026-02-28 08:48:24] (step=0011182) Train Loss: 0.4484, Train Steps/Sec: 0.07, Epoch: 2.187830170221092, LR: 0.0003 +[2026-02-28 08:48:37] (step=0011183) Train Loss: 0.4636, Train Steps/Sec: 0.07, Epoch: 2.1880258266484054, LR: 0.0003 +[2026-02-28 08:48:51] (step=0011184) Train Loss: 0.4481, Train Steps/Sec: 0.07, Epoch: 2.188221483075719, LR: 0.0003 +[2026-02-28 08:49:05] (step=0011185) Train Loss: 0.4418, Train Steps/Sec: 0.07, Epoch: 2.1884171395030325, LR: 0.0003 +[2026-02-28 08:49:18] (step=0011186) Train Loss: 0.4408, Train Steps/Sec: 0.07, Epoch: 2.1886127959303465, LR: 0.0003 +[2026-02-28 08:49:32] (step=0011187) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 2.18880845235766, LR: 0.0003 +[2026-02-28 08:49:46] (step=0011188) Train Loss: 0.4414, Train Steps/Sec: 0.07, Epoch: 2.1890041087849736, LR: 0.0003 +[2026-02-28 08:49:59] (step=0011189) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 2.189199765212287, LR: 0.0003 +[2026-02-28 08:50:13] (step=0011190) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 2.1893954216396008, LR: 0.0003 +[2026-02-28 08:50:27] (step=0011191) Train Loss: 0.4709, Train Steps/Sec: 0.07, Epoch: 2.1895910780669143, LR: 0.0003 +[2026-02-28 08:50:40] (step=0011192) Train Loss: 0.4434, Train Steps/Sec: 0.07, Epoch: 2.1897867344942283, LR: 0.0003 +[2026-02-28 08:50:54] (step=0011193) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 2.189982390921542, LR: 0.0003 +[2026-02-28 08:51:08] (step=0011194) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 2.1901780473488555, LR: 0.0003 +[2026-02-28 08:51:22] (step=0011195) Train Loss: 0.4676, Train Steps/Sec: 0.07, Epoch: 2.190373703776169, LR: 0.0003 +[2026-02-28 08:51:35] (step=0011196) Train Loss: 0.4591, Train Steps/Sec: 0.07, Epoch: 2.1905693602034826, LR: 0.0003 +[2026-02-28 08:51:49] (step=0011197) Train Loss: 0.4582, Train Steps/Sec: 0.07, Epoch: 2.190765016630796, LR: 0.0003 +[2026-02-28 08:52:03] (step=0011198) Train Loss: 0.4447, Train Steps/Sec: 0.07, Epoch: 2.19096067305811, LR: 0.0003 +[2026-02-28 08:52:16] (step=0011199) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 2.1911563294854237, LR: 0.0003 +[2026-02-28 08:52:30] (step=0011200) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 2.1913519859127373, LR: 0.0003 +[2026-02-28 08:52:44] (step=0011201) Train Loss: 0.4509, Train Steps/Sec: 0.07, Epoch: 2.191547642340051, LR: 0.0003 +[2026-02-28 08:52:57] (step=0011202) Train Loss: 0.4608, Train Steps/Sec: 0.07, Epoch: 2.1917432987673644, LR: 0.0003 +[2026-02-28 08:53:11] (step=0011203) Train Loss: 0.4594, Train Steps/Sec: 0.07, Epoch: 2.191938955194678, LR: 0.0003 +[2026-02-28 08:53:25] (step=0011204) Train Loss: 0.4414, Train Steps/Sec: 0.07, Epoch: 2.192134611621992, LR: 0.0003 +[2026-02-28 08:53:38] (step=0011205) Train Loss: 0.4442, Train Steps/Sec: 0.07, Epoch: 2.1923302680493055, LR: 0.0003 +[2026-02-28 08:53:52] (step=0011206) Train Loss: 0.4466, Train Steps/Sec: 0.07, Epoch: 2.192525924476619, LR: 0.0003 +[2026-02-28 08:54:06] (step=0011207) Train Loss: 0.4642, Train Steps/Sec: 0.07, Epoch: 2.1927215809039327, LR: 0.0003 +[2026-02-28 08:54:20] (step=0011208) Train Loss: 0.4402, Train Steps/Sec: 0.07, Epoch: 2.192917237331246, LR: 0.0003 +[2026-02-28 08:54:33] (step=0011209) Train Loss: 0.4444, Train Steps/Sec: 0.07, Epoch: 2.19311289375856, LR: 0.0003 +[2026-02-28 08:54:47] (step=0011210) Train Loss: 0.4638, Train Steps/Sec: 0.07, Epoch: 2.193308550185874, LR: 0.0003 +[2026-02-28 08:55:01] (step=0011211) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 2.1935042066131873, LR: 0.0003 +[2026-02-28 08:55:14] (step=0011212) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 2.193699863040501, LR: 0.0003 +[2026-02-28 08:55:28] (step=0011213) Train Loss: 0.4472, Train Steps/Sec: 0.07, Epoch: 2.1938955194678145, LR: 0.0003 +[2026-02-28 08:55:42] (step=0011214) Train Loss: 0.4597, Train Steps/Sec: 0.07, Epoch: 2.194091175895128, LR: 0.0003 +[2026-02-28 08:55:55] (step=0011215) Train Loss: 0.4490, Train Steps/Sec: 0.07, Epoch: 2.1942868323224416, LR: 0.0003 +[2026-02-28 08:56:09] (step=0011216) Train Loss: 0.4488, Train Steps/Sec: 0.07, Epoch: 2.1944824887497556, LR: 0.0003 +[2026-02-28 08:56:23] (step=0011217) Train Loss: 0.4398, Train Steps/Sec: 0.07, Epoch: 2.194678145177069, LR: 0.0003 +[2026-02-28 08:56:36] (step=0011218) Train Loss: 0.4407, Train Steps/Sec: 0.07, Epoch: 2.1948738016043827, LR: 0.0003 +[2026-02-28 08:56:50] (step=0011219) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.1950694580316963, LR: 0.0003 +[2026-02-28 08:57:04] (step=0011220) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 2.19526511445901, LR: 0.0003 +[2026-02-28 08:57:17] (step=0011221) Train Loss: 0.4585, Train Steps/Sec: 0.07, Epoch: 2.1954607708863234, LR: 0.0003 +[2026-02-28 08:57:31] (step=0011222) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 2.1956564273136374, LR: 0.0003 +[2026-02-28 08:57:45] (step=0011223) Train Loss: 0.4385, Train Steps/Sec: 0.07, Epoch: 2.195852083740951, LR: 0.0003 +[2026-02-28 08:57:59] (step=0011224) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 2.1960477401682645, LR: 0.0003 +[2026-02-28 08:58:13] (step=0011225) Train Loss: 0.4603, Train Steps/Sec: 0.07, Epoch: 2.196243396595578, LR: 0.0003 +[2026-02-28 08:58:26] (step=0011226) Train Loss: 0.4576, Train Steps/Sec: 0.07, Epoch: 2.1964390530228917, LR: 0.0003 +[2026-02-28 08:58:40] (step=0011227) Train Loss: 0.4516, Train Steps/Sec: 0.07, Epoch: 2.1966347094502052, LR: 0.0003 +[2026-02-28 08:58:54] (step=0011228) Train Loss: 0.4506, Train Steps/Sec: 0.07, Epoch: 2.1968303658775192, LR: 0.0003 +[2026-02-28 08:59:07] (step=0011229) Train Loss: 0.4483, Train Steps/Sec: 0.07, Epoch: 2.197026022304833, LR: 0.0003 +[2026-02-28 08:59:21] (step=0011230) Train Loss: 0.4573, Train Steps/Sec: 0.07, Epoch: 2.1972216787321464, LR: 0.0003 +[2026-02-28 08:59:35] (step=0011231) Train Loss: 0.4504, Train Steps/Sec: 0.07, Epoch: 2.19741733515946, LR: 0.0003 +[2026-02-28 08:59:48] (step=0011232) Train Loss: 0.4499, Train Steps/Sec: 0.07, Epoch: 2.1976129915867735, LR: 0.0003 +[2026-02-28 09:00:02] (step=0011233) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 2.1978086480140875, LR: 0.0003 +[2026-02-28 09:00:16] (step=0011234) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 2.198004304441401, LR: 0.0003 +[2026-02-28 09:00:29] (step=0011235) Train Loss: 0.4670, Train Steps/Sec: 0.07, Epoch: 2.1981999608687146, LR: 0.0003 +[2026-02-28 09:00:43] (step=0011236) Train Loss: 0.4578, Train Steps/Sec: 0.07, Epoch: 2.198395617296028, LR: 0.0003 +[2026-02-28 09:00:57] (step=0011237) Train Loss: 0.4615, Train Steps/Sec: 0.07, Epoch: 2.1985912737233417, LR: 0.0003 +[2026-02-28 09:01:11] (step=0011238) Train Loss: 0.4417, Train Steps/Sec: 0.07, Epoch: 2.1987869301506553, LR: 0.0003 +[2026-02-28 09:01:25] (step=0011239) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 2.1989825865779693, LR: 0.0003 +[2026-02-28 09:01:38] (step=0011240) Train Loss: 0.4596, Train Steps/Sec: 0.07, Epoch: 2.199178243005283, LR: 0.0003 +[2026-02-28 09:01:52] (step=0011241) Train Loss: 0.4398, Train Steps/Sec: 0.07, Epoch: 2.1993738994325964, LR: 0.0003 +[2026-02-28 09:02:06] (step=0011242) Train Loss: 0.4588, Train Steps/Sec: 0.07, Epoch: 2.19956955585991, LR: 0.0003 +[2026-02-28 09:02:19] (step=0011243) Train Loss: 0.4572, Train Steps/Sec: 0.07, Epoch: 2.1997652122872235, LR: 0.0003 +[2026-02-28 09:02:33] (step=0011244) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 2.199960868714537, LR: 0.0003 +[2026-02-28 09:02:47] (step=0011245) Train Loss: 0.4601, Train Steps/Sec: 0.07, Epoch: 2.200156525141851, LR: 0.0003 +[2026-02-28 09:03:00] (step=0011246) Train Loss: 0.4616, Train Steps/Sec: 0.07, Epoch: 2.2003521815691647, LR: 0.0003 +[2026-02-28 09:03:14] (step=0011247) Train Loss: 0.4656, Train Steps/Sec: 0.07, Epoch: 2.2005478379964782, LR: 0.0003 +[2026-02-28 09:03:28] (step=0011248) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 2.200743494423792, LR: 0.0003 +[2026-02-28 09:03:42] (step=0011249) Train Loss: 0.4455, Train Steps/Sec: 0.07, Epoch: 2.2009391508511054, LR: 0.0003 +[2026-02-28 09:03:56] (step=0011250) Train Loss: 0.4572, Train Steps/Sec: 0.07, Epoch: 2.201134807278419, LR: 0.0003 +[2026-02-28 09:04:09] (step=0011251) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 2.201330463705733, LR: 0.0003 +[2026-02-28 09:04:23] (step=0011252) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 2.2015261201330465, LR: 0.0003 +[2026-02-28 09:04:37] (step=0011253) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 2.20172177656036, LR: 0.0003 +[2026-02-28 09:04:51] (step=0011254) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 2.2019174329876736, LR: 0.0003 +[2026-02-28 09:05:04] (step=0011255) Train Loss: 0.4467, Train Steps/Sec: 0.07, Epoch: 2.202113089414987, LR: 0.0003 +[2026-02-28 09:05:18] (step=0011256) Train Loss: 0.4556, Train Steps/Sec: 0.07, Epoch: 2.2023087458423007, LR: 0.0003 +[2026-02-28 09:05:32] (step=0011257) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 2.2025044022696147, LR: 0.0003 +[2026-02-28 09:05:45] (step=0011258) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 2.2027000586969283, LR: 0.0003 +[2026-02-28 09:05:59] (step=0011259) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 2.202895715124242, LR: 0.0003 +[2026-02-28 09:06:13] (step=0011260) Train Loss: 0.4619, Train Steps/Sec: 0.07, Epoch: 2.2030913715515554, LR: 0.0003 +[2026-02-28 09:06:27] (step=0011261) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.203287027978869, LR: 0.0003 +[2026-02-28 09:06:40] (step=0011262) Train Loss: 0.4569, Train Steps/Sec: 0.07, Epoch: 2.2034826844061826, LR: 0.0003 +[2026-02-28 09:06:54] (step=0011263) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 2.2036783408334966, LR: 0.0003 +[2026-02-28 09:07:08] (step=0011264) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 2.20387399726081, LR: 0.0003 +[2026-02-28 09:07:22] (step=0011265) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.2040696536881237, LR: 0.0003 +[2026-02-28 09:07:36] (step=0011266) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 2.2042653101154372, LR: 0.0003 +[2026-02-28 09:07:49] (step=0011267) Train Loss: 0.4674, Train Steps/Sec: 0.07, Epoch: 2.204460966542751, LR: 0.0003 +[2026-02-28 09:08:03] (step=0011268) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 2.2046566229700644, LR: 0.0003 +[2026-02-28 09:08:17] (step=0011269) Train Loss: 0.4605, Train Steps/Sec: 0.07, Epoch: 2.2048522793973784, LR: 0.0003 +[2026-02-28 09:08:30] (step=0011270) Train Loss: 0.4420, Train Steps/Sec: 0.07, Epoch: 2.205047935824692, LR: 0.0003 +[2026-02-28 09:08:44] (step=0011271) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 2.2052435922520055, LR: 0.0003 +[2026-02-28 09:08:58] (step=0011272) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 2.205439248679319, LR: 0.0003 +[2026-02-28 09:09:11] (step=0011273) Train Loss: 0.4366, Train Steps/Sec: 0.07, Epoch: 2.2056349051066326, LR: 0.0003 +[2026-02-28 09:09:25] (step=0011274) Train Loss: 0.4528, Train Steps/Sec: 0.07, Epoch: 2.205830561533946, LR: 0.0003 +[2026-02-28 09:09:39] (step=0011275) Train Loss: 0.4392, Train Steps/Sec: 0.07, Epoch: 2.20602621796126, LR: 0.0003 +[2026-02-28 09:09:52] (step=0011276) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 2.2062218743885738, LR: 0.0003 +[2026-02-28 09:10:06] (step=0011277) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 2.2064175308158873, LR: 0.0003 +[2026-02-28 09:10:20] (step=0011278) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 2.206613187243201, LR: 0.0003 +[2026-02-28 09:10:34] (step=0011279) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 2.2068088436705144, LR: 0.0003 +[2026-02-28 09:10:47] (step=0011280) Train Loss: 0.4506, Train Steps/Sec: 0.07, Epoch: 2.207004500097828, LR: 0.0003 +[2026-02-28 09:11:01] (step=0011281) Train Loss: 0.4399, Train Steps/Sec: 0.07, Epoch: 2.207200156525142, LR: 0.0003 +[2026-02-28 09:11:15] (step=0011282) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 2.2073958129524556, LR: 0.0003 +[2026-02-28 09:11:29] (step=0011283) Train Loss: 0.4610, Train Steps/Sec: 0.07, Epoch: 2.207591469379769, LR: 0.0003 +[2026-02-28 09:11:42] (step=0011284) Train Loss: 0.4617, Train Steps/Sec: 0.07, Epoch: 2.2077871258070827, LR: 0.0003 +[2026-02-28 09:11:56] (step=0011285) Train Loss: 0.4447, Train Steps/Sec: 0.07, Epoch: 2.2079827822343963, LR: 0.0003 +[2026-02-28 09:12:10] (step=0011286) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.20817843866171, LR: 0.0003 +[2026-02-28 09:12:23] (step=0011287) Train Loss: 0.4641, Train Steps/Sec: 0.07, Epoch: 2.208374095089024, LR: 0.0003 +[2026-02-28 09:12:37] (step=0011288) Train Loss: 0.4629, Train Steps/Sec: 0.07, Epoch: 2.2085697515163374, LR: 0.0003 +[2026-02-28 09:12:51] (step=0011289) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.208765407943651, LR: 0.0003 +[2026-02-28 09:13:04] (step=0011290) Train Loss: 0.4595, Train Steps/Sec: 0.07, Epoch: 2.2089610643709645, LR: 0.0003 +[2026-02-28 09:13:18] (step=0011291) Train Loss: 0.4429, Train Steps/Sec: 0.07, Epoch: 2.209156720798278, LR: 0.0003 +[2026-02-28 09:13:32] (step=0011292) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.209352377225592, LR: 0.0003 +[2026-02-28 09:13:46] (step=0011293) Train Loss: 0.4604, Train Steps/Sec: 0.07, Epoch: 2.2095480336529056, LR: 0.0003 +[2026-02-28 09:13:59] (step=0011294) Train Loss: 0.4700, Train Steps/Sec: 0.07, Epoch: 2.209743690080219, LR: 0.0003 +[2026-02-28 09:14:13] (step=0011295) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 2.2099393465075328, LR: 0.0003 +[2026-02-28 09:14:27] (step=0011296) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.2101350029348463, LR: 0.0003 +[2026-02-28 09:14:41] (step=0011297) Train Loss: 0.4615, Train Steps/Sec: 0.07, Epoch: 2.21033065936216, LR: 0.0003 +[2026-02-28 09:14:54] (step=0011298) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.210526315789474, LR: 0.0003 +[2026-02-28 09:15:08] (step=0011299) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 2.2107219722167875, LR: 0.0003 +[2026-02-28 09:15:21] (step=0011300) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 2.210917628644101, LR: 0.0003 +[2026-02-28 09:15:35] (step=0011301) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 2.2111132850714146, LR: 0.0003 +[2026-02-28 09:15:49] (step=0011302) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 2.211308941498728, LR: 0.0003 +[2026-02-28 09:16:03] (step=0011303) Train Loss: 0.4469, Train Steps/Sec: 0.07, Epoch: 2.2115045979260417, LR: 0.0003 +[2026-02-28 09:16:16] (step=0011304) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 2.2117002543533557, LR: 0.0003 +[2026-02-28 09:16:30] (step=0011305) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 2.2118959107806693, LR: 0.0003 +[2026-02-28 09:16:44] (step=0011306) Train Loss: 0.4585, Train Steps/Sec: 0.07, Epoch: 2.212091567207983, LR: 0.0003 +[2026-02-28 09:16:58] (step=0011307) Train Loss: 0.4446, Train Steps/Sec: 0.07, Epoch: 2.2122872236352964, LR: 0.0003 +[2026-02-28 09:17:11] (step=0011308) Train Loss: 0.4555, Train Steps/Sec: 0.07, Epoch: 2.21248288006261, LR: 0.0003 +[2026-02-28 09:17:25] (step=0011309) Train Loss: 0.4444, Train Steps/Sec: 0.07, Epoch: 2.2126785364899235, LR: 0.0003 +[2026-02-28 09:17:39] (step=0011310) Train Loss: 0.4472, Train Steps/Sec: 0.07, Epoch: 2.2128741929172375, LR: 0.0003 +[2026-02-28 09:17:53] (step=0011311) Train Loss: 0.4406, Train Steps/Sec: 0.07, Epoch: 2.213069849344551, LR: 0.0003 +[2026-02-28 09:18:06] (step=0011312) Train Loss: 0.4504, Train Steps/Sec: 0.07, Epoch: 2.2132655057718646, LR: 0.0003 +[2026-02-28 09:18:20] (step=0011313) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 2.213461162199178, LR: 0.0003 +[2026-02-28 09:18:33] (step=0011314) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 2.2136568186264918, LR: 0.0003 +[2026-02-28 09:18:47] (step=0011315) Train Loss: 0.4428, Train Steps/Sec: 0.07, Epoch: 2.2138524750538053, LR: 0.0003 +[2026-02-28 09:19:01] (step=0011316) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 2.2140481314811193, LR: 0.0003 +[2026-02-28 09:19:14] (step=0011317) Train Loss: 0.4574, Train Steps/Sec: 0.07, Epoch: 2.214243787908433, LR: 0.0003 +[2026-02-28 09:19:28] (step=0011318) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.2144394443357465, LR: 0.0003 +[2026-02-28 09:19:42] (step=0011319) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 2.21463510076306, LR: 0.0003 +[2026-02-28 09:19:56] (step=0011320) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 2.2148307571903736, LR: 0.0003 +[2026-02-28 09:20:09] (step=0011321) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 2.215026413617687, LR: 0.0003 +[2026-02-28 09:20:23] (step=0011322) Train Loss: 0.4612, Train Steps/Sec: 0.07, Epoch: 2.215222070045001, LR: 0.0003 +[2026-02-28 09:20:37] (step=0011323) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 2.2154177264723147, LR: 0.0003 +[2026-02-28 09:20:51] (step=0011324) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 2.2156133828996283, LR: 0.0003 +[2026-02-28 09:21:05] (step=0011325) Train Loss: 0.4475, Train Steps/Sec: 0.07, Epoch: 2.215809039326942, LR: 0.0003 +[2026-02-28 09:21:18] (step=0011326) Train Loss: 0.4532, Train Steps/Sec: 0.07, Epoch: 2.2160046957542554, LR: 0.0003 +[2026-02-28 09:21:32] (step=0011327) Train Loss: 0.4591, Train Steps/Sec: 0.07, Epoch: 2.216200352181569, LR: 0.0003 +[2026-02-28 09:21:46] (step=0011328) Train Loss: 0.4489, Train Steps/Sec: 0.07, Epoch: 2.216396008608883, LR: 0.0003 +[2026-02-28 09:21:59] (step=0011329) Train Loss: 0.4448, Train Steps/Sec: 0.07, Epoch: 2.2165916650361965, LR: 0.0003 +[2026-02-28 09:22:13] (step=0011330) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 2.21678732146351, LR: 0.0003 +[2026-02-28 09:22:27] (step=0011331) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 2.2169829778908237, LR: 0.0003 +[2026-02-28 09:22:40] (step=0011332) Train Loss: 0.4495, Train Steps/Sec: 0.07, Epoch: 2.217178634318137, LR: 0.0003 +[2026-02-28 09:22:54] (step=0011333) Train Loss: 0.4448, Train Steps/Sec: 0.07, Epoch: 2.217374290745451, LR: 0.0003 +[2026-02-28 09:23:08] (step=0011334) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 2.217569947172765, LR: 0.0003 +[2026-02-28 09:23:22] (step=0011335) Train Loss: 0.4576, Train Steps/Sec: 0.07, Epoch: 2.2177656036000784, LR: 0.0003 +[2026-02-28 09:23:35] (step=0011336) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 2.217961260027392, LR: 0.0003 +[2026-02-28 09:23:49] (step=0011337) Train Loss: 0.4632, Train Steps/Sec: 0.07, Epoch: 2.2181569164547055, LR: 0.0003 +[2026-02-28 09:24:03] (step=0011338) Train Loss: 0.4344, Train Steps/Sec: 0.07, Epoch: 2.218352572882019, LR: 0.0003 +[2026-02-28 09:24:17] (step=0011339) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 2.2185482293093326, LR: 0.0003 +[2026-02-28 09:24:30] (step=0011340) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 2.2187438857366466, LR: 0.0003 +[2026-02-28 09:24:44] (step=0011341) Train Loss: 0.4452, Train Steps/Sec: 0.07, Epoch: 2.21893954216396, LR: 0.0003 +[2026-02-28 09:24:58] (step=0011342) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 2.2191351985912737, LR: 0.0003 +[2026-02-28 09:25:11] (step=0011343) Train Loss: 0.4401, Train Steps/Sec: 0.07, Epoch: 2.2193308550185873, LR: 0.0003 +[2026-02-28 09:25:25] (step=0011344) Train Loss: 0.4365, Train Steps/Sec: 0.07, Epoch: 2.219526511445901, LR: 0.0003 +[2026-02-28 09:25:39] (step=0011345) Train Loss: 0.4488, Train Steps/Sec: 0.07, Epoch: 2.2197221678732144, LR: 0.0003 +[2026-02-28 09:25:53] (step=0011346) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 2.2199178243005284, LR: 0.0003 +[2026-02-28 09:26:06] (step=0011347) Train Loss: 0.4564, Train Steps/Sec: 0.07, Epoch: 2.220113480727842, LR: 0.0003 +[2026-02-28 09:26:20] (step=0011348) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 2.2203091371551555, LR: 0.0003 +[2026-02-28 09:26:34] (step=0011349) Train Loss: 0.4569, Train Steps/Sec: 0.07, Epoch: 2.220504793582469, LR: 0.0003 +[2026-02-28 09:26:47] (step=0011350) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.2207004500097827, LR: 0.0003 +[2026-02-28 09:27:01] (step=0011351) Train Loss: 0.4460, Train Steps/Sec: 0.07, Epoch: 2.2208961064370967, LR: 0.0003 +[2026-02-28 09:27:15] (step=0011352) Train Loss: 0.4509, Train Steps/Sec: 0.07, Epoch: 2.2210917628644102, LR: 0.0003 +[2026-02-28 09:27:29] (step=0011353) Train Loss: 0.4490, Train Steps/Sec: 0.07, Epoch: 2.221287419291724, LR: 0.0003 +[2026-02-28 09:27:42] (step=0011354) Train Loss: 0.4480, Train Steps/Sec: 0.07, Epoch: 2.2214830757190374, LR: 0.0003 +[2026-02-28 09:27:56] (step=0011355) Train Loss: 0.4271, Train Steps/Sec: 0.07, Epoch: 2.221678732146351, LR: 0.0003 +[2026-02-28 09:28:10] (step=0011356) Train Loss: 0.4444, Train Steps/Sec: 0.07, Epoch: 2.2218743885736645, LR: 0.0003 +[2026-02-28 09:28:24] (step=0011357) Train Loss: 0.4424, Train Steps/Sec: 0.07, Epoch: 2.2220700450009785, LR: 0.0003 +[2026-02-28 09:28:37] (step=0011358) Train Loss: 0.4672, Train Steps/Sec: 0.07, Epoch: 2.222265701428292, LR: 0.0003 +[2026-02-28 09:28:51] (step=0011359) Train Loss: 0.4591, Train Steps/Sec: 0.07, Epoch: 2.2224613578556056, LR: 0.0003 +[2026-02-28 09:29:05] (step=0011360) Train Loss: 0.4639, Train Steps/Sec: 0.07, Epoch: 2.222657014282919, LR: 0.0003 +[2026-02-28 09:29:18] (step=0011361) Train Loss: 0.4574, Train Steps/Sec: 0.07, Epoch: 2.2228526707102327, LR: 0.0003 +[2026-02-28 09:29:32] (step=0011362) Train Loss: 0.4431, Train Steps/Sec: 0.07, Epoch: 2.2230483271375463, LR: 0.0003 +[2026-02-28 09:29:46] (step=0011363) Train Loss: 0.4563, Train Steps/Sec: 0.07, Epoch: 2.2232439835648603, LR: 0.0003 +[2026-02-28 09:29:59] (step=0011364) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.223439639992174, LR: 0.0003 +[2026-02-28 09:30:13] (step=0011365) Train Loss: 0.4633, Train Steps/Sec: 0.07, Epoch: 2.2236352964194874, LR: 0.0003 +[2026-02-28 09:30:27] (step=0011366) Train Loss: 0.4484, Train Steps/Sec: 0.07, Epoch: 2.223830952846801, LR: 0.0003 +[2026-02-28 09:30:41] (step=0011367) Train Loss: 0.4641, Train Steps/Sec: 0.07, Epoch: 2.2240266092741146, LR: 0.0003 +[2026-02-28 09:30:54] (step=0011368) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 2.224222265701428, LR: 0.0003 +[2026-02-28 09:31:08] (step=0011369) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 2.224417922128742, LR: 0.0003 +[2026-02-28 09:31:22] (step=0011370) Train Loss: 0.4617, Train Steps/Sec: 0.07, Epoch: 2.2246135785560557, LR: 0.0003 +[2026-02-28 09:31:36] (step=0011371) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 2.2248092349833692, LR: 0.0003 +[2026-02-28 09:31:49] (step=0011372) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 2.225004891410683, LR: 0.0003 +[2026-02-28 09:32:03] (step=0011373) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 2.2252005478379964, LR: 0.0003 +[2026-02-28 09:32:17] (step=0011374) Train Loss: 0.4485, Train Steps/Sec: 0.07, Epoch: 2.22539620426531, LR: 0.0003 +[2026-02-28 09:32:30] (step=0011375) Train Loss: 0.4397, Train Steps/Sec: 0.07, Epoch: 2.225591860692624, LR: 0.0003 +[2026-02-28 09:32:44] (step=0011376) Train Loss: 0.4576, Train Steps/Sec: 0.07, Epoch: 2.2257875171199375, LR: 0.0003 +[2026-02-28 09:32:58] (step=0011377) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 2.225983173547251, LR: 0.0003 +[2026-02-28 09:33:12] (step=0011378) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.2261788299745646, LR: 0.0003 +[2026-02-28 09:33:25] (step=0011379) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 2.226374486401878, LR: 0.0003 +[2026-02-28 09:33:39] (step=0011380) Train Loss: 0.4600, Train Steps/Sec: 0.07, Epoch: 2.2265701428291917, LR: 0.0003 +[2026-02-28 09:33:53] (step=0011381) Train Loss: 0.4431, Train Steps/Sec: 0.07, Epoch: 2.2267657992565058, LR: 0.0003 +[2026-02-28 09:34:07] (step=0011382) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.2269614556838193, LR: 0.0003 +[2026-02-28 09:34:20] (step=0011383) Train Loss: 0.4455, Train Steps/Sec: 0.07, Epoch: 2.227157112111133, LR: 0.0003 +[2026-02-28 09:34:34] (step=0011384) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 2.2273527685384464, LR: 0.0003 +[2026-02-28 09:34:48] (step=0011385) Train Loss: 0.4350, Train Steps/Sec: 0.07, Epoch: 2.22754842496576, LR: 0.0003 +[2026-02-28 09:35:01] (step=0011386) Train Loss: 0.4343, Train Steps/Sec: 0.07, Epoch: 2.2277440813930736, LR: 0.0003 +[2026-02-28 09:35:15] (step=0011387) Train Loss: 0.4659, Train Steps/Sec: 0.07, Epoch: 2.2279397378203876, LR: 0.0003 +[2026-02-28 09:35:29] (step=0011388) Train Loss: 0.4644, Train Steps/Sec: 0.07, Epoch: 2.228135394247701, LR: 0.0003 +[2026-02-28 09:35:43] (step=0011389) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 2.2283310506750147, LR: 0.0003 +[2026-02-28 09:35:56] (step=0011390) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 2.2285267071023283, LR: 0.0003 +[2026-02-28 09:36:10] (step=0011391) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 2.228722363529642, LR: 0.0003 +[2026-02-28 09:36:24] (step=0011392) Train Loss: 0.4418, Train Steps/Sec: 0.07, Epoch: 2.2289180199569554, LR: 0.0003 +[2026-02-28 09:36:38] (step=0011393) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 2.2291136763842694, LR: 0.0003 +[2026-02-28 09:36:51] (step=0011394) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 2.229309332811583, LR: 0.0003 +[2026-02-28 09:37:05] (step=0011395) Train Loss: 0.4604, Train Steps/Sec: 0.07, Epoch: 2.2295049892388965, LR: 0.0003 +[2026-02-28 09:37:19] (step=0011396) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 2.22970064566621, LR: 0.0003 +[2026-02-28 09:37:33] (step=0011397) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.2298963020935236, LR: 0.0003 +[2026-02-28 09:37:46] (step=0011398) Train Loss: 0.4461, Train Steps/Sec: 0.07, Epoch: 2.230091958520837, LR: 0.0003 +[2026-02-28 09:38:00] (step=0011399) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 2.230287614948151, LR: 0.0003 +[2026-02-28 09:38:14] (step=0011400) Train Loss: 0.4393, Train Steps/Sec: 0.07, Epoch: 2.2304832713754648, LR: 0.0003 +[2026-02-28 09:38:27] (step=0011401) Train Loss: 0.4394, Train Steps/Sec: 0.07, Epoch: 2.2306789278027783, LR: 0.0003 +[2026-02-28 09:38:41] (step=0011402) Train Loss: 0.4651, Train Steps/Sec: 0.07, Epoch: 2.230874584230092, LR: 0.0003 +[2026-02-28 09:38:55] (step=0011403) Train Loss: 0.4647, Train Steps/Sec: 0.07, Epoch: 2.2310702406574054, LR: 0.0003 +[2026-02-28 09:39:09] (step=0011404) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.2312658970847195, LR: 0.0003 +[2026-02-28 09:39:22] (step=0011405) Train Loss: 0.4609, Train Steps/Sec: 0.07, Epoch: 2.231461553512033, LR: 0.0003 +[2026-02-28 09:39:36] (step=0011406) Train Loss: 0.4578, Train Steps/Sec: 0.07, Epoch: 2.2316572099393466, LR: 0.0003 +[2026-02-28 09:39:50] (step=0011407) Train Loss: 0.4655, Train Steps/Sec: 0.07, Epoch: 2.23185286636666, LR: 0.0003 +[2026-02-28 09:40:04] (step=0011408) Train Loss: 0.4556, Train Steps/Sec: 0.07, Epoch: 2.2320485227939737, LR: 0.0003 +[2026-02-28 09:40:17] (step=0011409) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 2.2322441792212873, LR: 0.0003 +[2026-02-28 09:40:31] (step=0011410) Train Loss: 0.4620, Train Steps/Sec: 0.07, Epoch: 2.2324398356486013, LR: 0.0003 +[2026-02-28 09:40:45] (step=0011411) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.232635492075915, LR: 0.0003 +[2026-02-28 09:40:59] (step=0011412) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 2.2328311485032284, LR: 0.0003 +[2026-02-28 09:41:13] (step=0011413) Train Loss: 0.4600, Train Steps/Sec: 0.07, Epoch: 2.233026804930542, LR: 0.0003 +[2026-02-28 09:41:26] (step=0011414) Train Loss: 0.4394, Train Steps/Sec: 0.07, Epoch: 2.2332224613578555, LR: 0.0003 +[2026-02-28 09:41:40] (step=0011415) Train Loss: 0.4603, Train Steps/Sec: 0.07, Epoch: 2.233418117785169, LR: 0.0003 +[2026-02-28 09:41:54] (step=0011416) Train Loss: 0.4509, Train Steps/Sec: 0.07, Epoch: 2.233613774212483, LR: 0.0003 +[2026-02-28 09:42:07] (step=0011417) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 2.2338094306397966, LR: 0.0003 +[2026-02-28 09:42:21] (step=0011418) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 2.23400508706711, LR: 0.0003 +[2026-02-28 09:42:35] (step=0011419) Train Loss: 0.4571, Train Steps/Sec: 0.07, Epoch: 2.2342007434944238, LR: 0.0003 +[2026-02-28 09:42:48] (step=0011420) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 2.2343963999217373, LR: 0.0003 +[2026-02-28 09:43:02] (step=0011421) Train Loss: 0.4499, Train Steps/Sec: 0.07, Epoch: 2.234592056349051, LR: 0.0003 +[2026-02-28 09:43:16] (step=0011422) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 2.234787712776365, LR: 0.0003 +[2026-02-28 09:43:30] (step=0011423) Train Loss: 0.4509, Train Steps/Sec: 0.07, Epoch: 2.2349833692036785, LR: 0.0003 +[2026-02-28 09:43:44] (step=0011424) Train Loss: 0.4582, Train Steps/Sec: 0.07, Epoch: 2.235179025630992, LR: 0.0003 +[2026-02-28 09:43:57] (step=0011425) Train Loss: 0.4615, Train Steps/Sec: 0.07, Epoch: 2.2353746820583056, LR: 0.0003 +[2026-02-28 09:44:11] (step=0011426) Train Loss: 0.4429, Train Steps/Sec: 0.07, Epoch: 2.235570338485619, LR: 0.0003 +[2026-02-28 09:44:25] (step=0011427) Train Loss: 0.4564, Train Steps/Sec: 0.07, Epoch: 2.2357659949129327, LR: 0.0003 +[2026-02-28 09:44:38] (step=0011428) Train Loss: 0.4600, Train Steps/Sec: 0.07, Epoch: 2.2359616513402467, LR: 0.0003 +[2026-02-28 09:44:52] (step=0011429) Train Loss: 0.4534, Train Steps/Sec: 0.07, Epoch: 2.2361573077675603, LR: 0.0003 +[2026-02-28 09:45:06] (step=0011430) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 2.236352964194874, LR: 0.0003 +[2026-02-28 09:45:20] (step=0011431) Train Loss: 0.4612, Train Steps/Sec: 0.07, Epoch: 2.2365486206221874, LR: 0.0003 +[2026-02-28 09:45:33] (step=0011432) Train Loss: 0.4396, Train Steps/Sec: 0.07, Epoch: 2.236744277049501, LR: 0.0003 +[2026-02-28 09:45:47] (step=0011433) Train Loss: 0.4470, Train Steps/Sec: 0.07, Epoch: 2.2369399334768145, LR: 0.0003 +[2026-02-28 09:46:01] (step=0011434) Train Loss: 0.4414, Train Steps/Sec: 0.07, Epoch: 2.2371355899041285, LR: 0.0003 +[2026-02-28 09:46:15] (step=0011435) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.237331246331442, LR: 0.0003 +[2026-02-28 09:46:28] (step=0011436) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 2.2375269027587557, LR: 0.0003 +[2026-02-28 09:46:42] (step=0011437) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 2.237722559186069, LR: 0.0003 +[2026-02-28 09:46:56] (step=0011438) Train Loss: 0.4458, Train Steps/Sec: 0.07, Epoch: 2.2379182156133828, LR: 0.0003 +[2026-02-28 09:47:09] (step=0011439) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 2.2381138720406963, LR: 0.0003 +[2026-02-28 09:47:23] (step=0011440) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 2.2383095284680103, LR: 0.0003 +[2026-02-28 09:47:37] (step=0011441) Train Loss: 0.4591, Train Steps/Sec: 0.07, Epoch: 2.238505184895324, LR: 0.0003 +[2026-02-28 09:47:51] (step=0011442) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 2.2387008413226375, LR: 0.0003 +[2026-02-28 09:48:04] (step=0011443) Train Loss: 0.4582, Train Steps/Sec: 0.07, Epoch: 2.238896497749951, LR: 0.0003 +[2026-02-28 09:48:18] (step=0011444) Train Loss: 0.4411, Train Steps/Sec: 0.07, Epoch: 2.2390921541772646, LR: 0.0003 +[2026-02-28 09:48:32] (step=0011445) Train Loss: 0.4654, Train Steps/Sec: 0.07, Epoch: 2.239287810604578, LR: 0.0003 +[2026-02-28 09:48:46] (step=0011446) Train Loss: 0.4504, Train Steps/Sec: 0.07, Epoch: 2.239483467031892, LR: 0.0003 +[2026-02-28 09:48:59] (step=0011447) Train Loss: 0.4433, Train Steps/Sec: 0.07, Epoch: 2.2396791234592057, LR: 0.0003 +[2026-02-28 09:49:13] (step=0011448) Train Loss: 0.4587, Train Steps/Sec: 0.07, Epoch: 2.2398747798865193, LR: 0.0003 +[2026-02-28 09:49:27] (step=0011449) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 2.240070436313833, LR: 0.0003 +[2026-02-28 09:49:41] (step=0011450) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 2.2402660927411464, LR: 0.0003 +[2026-02-28 09:49:54] (step=0011451) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 2.24046174916846, LR: 0.0003 +[2026-02-28 09:50:08] (step=0011452) Train Loss: 0.4579, Train Steps/Sec: 0.07, Epoch: 2.240657405595774, LR: 0.0003 +[2026-02-28 09:50:22] (step=0011453) Train Loss: 0.4443, Train Steps/Sec: 0.07, Epoch: 2.2408530620230875, LR: 0.0003 +[2026-02-28 09:50:36] (step=0011454) Train Loss: 0.4434, Train Steps/Sec: 0.07, Epoch: 2.241048718450401, LR: 0.0003 +[2026-02-28 09:50:50] (step=0011455) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 2.2412443748777147, LR: 0.0003 +[2026-02-28 09:51:03] (step=0011456) Train Loss: 0.4576, Train Steps/Sec: 0.07, Epoch: 2.2414400313050282, LR: 0.0003 +[2026-02-28 09:51:17] (step=0011457) Train Loss: 0.4441, Train Steps/Sec: 0.07, Epoch: 2.241635687732342, LR: 0.0003 +[2026-02-28 09:51:31] (step=0011458) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 2.241831344159656, LR: 0.0003 +[2026-02-28 09:51:45] (step=0011459) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 2.2420270005869694, LR: 0.0003 +[2026-02-28 09:51:58] (step=0011460) Train Loss: 0.4619, Train Steps/Sec: 0.07, Epoch: 2.242222657014283, LR: 0.0003 +[2026-02-28 09:52:12] (step=0011461) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 2.2424183134415965, LR: 0.0003 +[2026-02-28 09:52:26] (step=0011462) Train Loss: 0.4463, Train Steps/Sec: 0.07, Epoch: 2.24261396986891, LR: 0.0003 +[2026-02-28 09:52:40] (step=0011463) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 2.242809626296224, LR: 0.0003 +[2026-02-28 09:52:53] (step=0011464) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 2.2430052827235376, LR: 0.0003 +[2026-02-28 09:53:07] (step=0011465) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 2.243200939150851, LR: 0.0003 +[2026-02-28 09:53:21] (step=0011466) Train Loss: 0.4612, Train Steps/Sec: 0.07, Epoch: 2.2433965955781647, LR: 0.0003 +[2026-02-28 09:53:35] (step=0011467) Train Loss: 0.4578, Train Steps/Sec: 0.07, Epoch: 2.2435922520054783, LR: 0.0003 +[2026-02-28 09:53:48] (step=0011468) Train Loss: 0.4573, Train Steps/Sec: 0.07, Epoch: 2.243787908432792, LR: 0.0003 +[2026-02-28 09:54:02] (step=0011469) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 2.243983564860106, LR: 0.0003 +[2026-02-28 09:54:16] (step=0011470) Train Loss: 0.4623, Train Steps/Sec: 0.07, Epoch: 2.2441792212874194, LR: 0.0003 +[2026-02-28 09:54:29] (step=0011471) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 2.244374877714733, LR: 0.0003 +[2026-02-28 09:54:43] (step=0011472) Train Loss: 0.4488, Train Steps/Sec: 0.07, Epoch: 2.2445705341420465, LR: 0.0003 +[2026-02-28 09:54:57] (step=0011473) Train Loss: 0.4465, Train Steps/Sec: 0.07, Epoch: 2.24476619056936, LR: 0.0003 +[2026-02-28 09:55:11] (step=0011474) Train Loss: 0.4658, Train Steps/Sec: 0.07, Epoch: 2.2449618469966737, LR: 0.0003 +[2026-02-28 09:55:24] (step=0011475) Train Loss: 0.4516, Train Steps/Sec: 0.07, Epoch: 2.2451575034239877, LR: 0.0003 +[2026-02-28 09:55:38] (step=0011476) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 2.2453531598513012, LR: 0.0003 +[2026-02-28 09:55:52] (step=0011477) Train Loss: 0.4400, Train Steps/Sec: 0.07, Epoch: 2.245548816278615, LR: 0.0003 +[2026-02-28 09:56:06] (step=0011478) Train Loss: 0.4554, Train Steps/Sec: 0.07, Epoch: 2.2457444727059284, LR: 0.0003 +[2026-02-28 09:56:19] (step=0011479) Train Loss: 0.4361, Train Steps/Sec: 0.07, Epoch: 2.245940129133242, LR: 0.0003 +[2026-02-28 09:56:33] (step=0011480) Train Loss: 0.4480, Train Steps/Sec: 0.07, Epoch: 2.2461357855605555, LR: 0.0003 +[2026-02-28 09:56:47] (step=0011481) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 2.2463314419878695, LR: 0.0003 +[2026-02-28 09:57:01] (step=0011482) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 2.246527098415183, LR: 0.0003 +[2026-02-28 09:57:14] (step=0011483) Train Loss: 0.4396, Train Steps/Sec: 0.07, Epoch: 2.2467227548424966, LR: 0.0003 +[2026-02-28 09:57:28] (step=0011484) Train Loss: 0.4573, Train Steps/Sec: 0.07, Epoch: 2.24691841126981, LR: 0.0003 +[2026-02-28 09:57:42] (step=0011485) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 2.2471140676971237, LR: 0.0003 +[2026-02-28 09:57:56] (step=0011486) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 2.2473097241244373, LR: 0.0003 +[2026-02-28 09:58:09] (step=0011487) Train Loss: 0.4598, Train Steps/Sec: 0.07, Epoch: 2.2475053805517513, LR: 0.0003 +[2026-02-28 09:58:23] (step=0011488) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.247701036979065, LR: 0.0003 +[2026-02-28 09:58:37] (step=0011489) Train Loss: 0.4611, Train Steps/Sec: 0.07, Epoch: 2.2478966934063784, LR: 0.0003 +[2026-02-28 09:58:51] (step=0011490) Train Loss: 0.4544, Train Steps/Sec: 0.07, Epoch: 2.248092349833692, LR: 0.0003 +[2026-02-28 09:59:04] (step=0011491) Train Loss: 0.4627, Train Steps/Sec: 0.07, Epoch: 2.2482880062610056, LR: 0.0003 +[2026-02-28 09:59:18] (step=0011492) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 2.248483662688319, LR: 0.0003 +[2026-02-28 09:59:32] (step=0011493) Train Loss: 0.4698, Train Steps/Sec: 0.07, Epoch: 2.248679319115633, LR: 0.0003 +[2026-02-28 09:59:46] (step=0011494) Train Loss: 0.4435, Train Steps/Sec: 0.07, Epoch: 2.2488749755429467, LR: 0.0003 +[2026-02-28 10:00:00] (step=0011495) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 2.2490706319702602, LR: 0.0003 +[2026-02-28 10:00:13] (step=0011496) Train Loss: 0.4469, Train Steps/Sec: 0.07, Epoch: 2.249266288397574, LR: 0.0003 +[2026-02-28 10:00:27] (step=0011497) Train Loss: 0.4490, Train Steps/Sec: 0.07, Epoch: 2.2494619448248874, LR: 0.0003 +[2026-02-28 10:00:41] (step=0011498) Train Loss: 0.4504, Train Steps/Sec: 0.07, Epoch: 2.249657601252201, LR: 0.0003 +[2026-02-28 10:00:55] (step=0011499) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.249853257679515, LR: 0.0003 +[2026-02-28 10:01:08] (step=0011500) Train Loss: 0.4633, Train Steps/Sec: 0.07, Epoch: 2.2500489141068285, LR: 0.0003 +[2026-02-28 10:01:08] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0011500/ +[2026-02-28 10:01:22] (step=0011501) Train Loss: 0.4587, Train Steps/Sec: 0.07, Epoch: 2.250244570534142, LR: 0.0003 +[2026-02-28 10:01:36] (step=0011502) Train Loss: 0.4646, Train Steps/Sec: 0.07, Epoch: 2.2504402269614556, LR: 0.0003 +[2026-02-28 10:01:50] (step=0011503) Train Loss: 0.4593, Train Steps/Sec: 0.07, Epoch: 2.250635883388769, LR: 0.0003 +[2026-02-28 10:02:04] (step=0011504) Train Loss: 0.4462, Train Steps/Sec: 0.07, Epoch: 2.2508315398160827, LR: 0.0003 +[2026-02-28 10:02:17] (step=0011505) Train Loss: 0.4583, Train Steps/Sec: 0.07, Epoch: 2.2510271962433968, LR: 0.0003 +[2026-02-28 10:02:31] (step=0011506) Train Loss: 0.4472, Train Steps/Sec: 0.07, Epoch: 2.2512228526707103, LR: 0.0003 +[2026-02-28 10:02:45] (step=0011507) Train Loss: 0.4625, Train Steps/Sec: 0.07, Epoch: 2.251418509098024, LR: 0.0003 +[2026-02-28 10:02:59] (step=0011508) Train Loss: 0.4453, Train Steps/Sec: 0.07, Epoch: 2.2516141655253374, LR: 0.0003 +[2026-02-28 10:03:12] (step=0011509) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 2.251809821952651, LR: 0.0003 +[2026-02-28 10:03:26] (step=0011510) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 2.2520054783799646, LR: 0.0003 +[2026-02-28 10:03:40] (step=0011511) Train Loss: 0.4509, Train Steps/Sec: 0.07, Epoch: 2.2522011348072786, LR: 0.0003 +[2026-02-28 10:03:54] (step=0011512) Train Loss: 0.4659, Train Steps/Sec: 0.07, Epoch: 2.252396791234592, LR: 0.0003 +[2026-02-28 10:04:07] (step=0011513) Train Loss: 0.4456, Train Steps/Sec: 0.07, Epoch: 2.2525924476619057, LR: 0.0003 +[2026-02-28 10:04:21] (step=0011514) Train Loss: 0.4499, Train Steps/Sec: 0.07, Epoch: 2.2527881040892193, LR: 0.0003 +[2026-02-28 10:04:35] (step=0011515) Train Loss: 0.4516, Train Steps/Sec: 0.07, Epoch: 2.252983760516533, LR: 0.0003 +[2026-02-28 10:04:49] (step=0011516) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 2.253179416943847, LR: 0.0003 +[2026-02-28 10:05:02] (step=0011517) Train Loss: 0.4449, Train Steps/Sec: 0.07, Epoch: 2.2533750733711604, LR: 0.0003 +[2026-02-28 10:05:16] (step=0011518) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.253570729798474, LR: 0.0003 +[2026-02-28 10:05:30] (step=0011519) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 2.2537663862257875, LR: 0.0003 +[2026-02-28 10:05:44] (step=0011520) Train Loss: 0.4417, Train Steps/Sec: 0.07, Epoch: 2.253962042653101, LR: 0.0003 +[2026-02-28 10:05:57] (step=0011521) Train Loss: 0.4534, Train Steps/Sec: 0.07, Epoch: 2.2541576990804146, LR: 0.0003 +[2026-02-28 10:06:11] (step=0011522) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 2.2543533555077286, LR: 0.0003 +[2026-02-28 10:06:25] (step=0011523) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 2.254549011935042, LR: 0.0003 +[2026-02-28 10:06:38] (step=0011524) Train Loss: 0.4574, Train Steps/Sec: 0.07, Epoch: 2.2547446683623558, LR: 0.0003 +[2026-02-28 10:06:52] (step=0011525) Train Loss: 0.4461, Train Steps/Sec: 0.07, Epoch: 2.2549403247896693, LR: 0.0003 +[2026-02-28 10:07:06] (step=0011526) Train Loss: 0.4456, Train Steps/Sec: 0.07, Epoch: 2.255135981216983, LR: 0.0003 +[2026-02-28 10:07:20] (step=0011527) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 2.2553316376442964, LR: 0.0003 +[2026-02-28 10:07:33] (step=0011528) Train Loss: 0.4569, Train Steps/Sec: 0.07, Epoch: 2.2555272940716105, LR: 0.0003 +[2026-02-28 10:07:47] (step=0011529) Train Loss: 0.4486, Train Steps/Sec: 0.07, Epoch: 2.255722950498924, LR: 0.0003 +[2026-02-28 10:08:01] (step=0011530) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 2.2559186069262376, LR: 0.0003 +[2026-02-28 10:08:15] (step=0011531) Train Loss: 0.4587, Train Steps/Sec: 0.07, Epoch: 2.256114263353551, LR: 0.0003 +[2026-02-28 10:08:28] (step=0011532) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 2.2563099197808647, LR: 0.0003 +[2026-02-28 10:08:42] (step=0011533) Train Loss: 0.4516, Train Steps/Sec: 0.07, Epoch: 2.2565055762081783, LR: 0.0003 +[2026-02-28 10:08:56] (step=0011534) Train Loss: 0.4434, Train Steps/Sec: 0.07, Epoch: 2.2567012326354923, LR: 0.0003 +[2026-02-28 10:09:09] (step=0011535) Train Loss: 0.4583, Train Steps/Sec: 0.07, Epoch: 2.256896889062806, LR: 0.0003 +[2026-02-28 10:09:23] (step=0011536) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 2.2570925454901194, LR: 0.0003 +[2026-02-28 10:09:37] (step=0011537) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 2.257288201917433, LR: 0.0003 +[2026-02-28 10:09:51] (step=0011538) Train Loss: 0.4644, Train Steps/Sec: 0.07, Epoch: 2.2574838583447465, LR: 0.0003 +[2026-02-28 10:10:04] (step=0011539) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 2.25767951477206, LR: 0.0003 +[2026-02-28 10:10:18] (step=0011540) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 2.257875171199374, LR: 0.0003 +[2026-02-28 10:10:32] (step=0011541) Train Loss: 0.4427, Train Steps/Sec: 0.07, Epoch: 2.2580708276266876, LR: 0.0003 +[2026-02-28 10:10:46] (step=0011542) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 2.258266484054001, LR: 0.0003 +[2026-02-28 10:10:59] (step=0011543) Train Loss: 0.4638, Train Steps/Sec: 0.07, Epoch: 2.2584621404813148, LR: 0.0003 +[2026-02-28 10:11:13] (step=0011544) Train Loss: 0.4400, Train Steps/Sec: 0.07, Epoch: 2.2586577969086283, LR: 0.0003 +[2026-02-28 10:11:27] (step=0011545) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 2.258853453335942, LR: 0.0003 +[2026-02-28 10:11:41] (step=0011546) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 2.259049109763256, LR: 0.0003 +[2026-02-28 10:11:54] (step=0011547) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.2592447661905695, LR: 0.0003 +[2026-02-28 10:12:08] (step=0011548) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 2.259440422617883, LR: 0.0003 +[2026-02-28 10:12:22] (step=0011549) Train Loss: 0.4582, Train Steps/Sec: 0.07, Epoch: 2.2596360790451966, LR: 0.0003 +[2026-02-28 10:12:36] (step=0011550) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 2.25983173547251, LR: 0.0003 +[2026-02-28 10:12:49] (step=0011551) Train Loss: 0.4429, Train Steps/Sec: 0.07, Epoch: 2.2600273918998237, LR: 0.0003 +[2026-02-28 10:13:03] (step=0011552) Train Loss: 0.4532, Train Steps/Sec: 0.07, Epoch: 2.2602230483271377, LR: 0.0003 +[2026-02-28 10:13:17] (step=0011553) Train Loss: 0.4677, Train Steps/Sec: 0.07, Epoch: 2.2604187047544513, LR: 0.0003 +[2026-02-28 10:13:31] (step=0011554) Train Loss: 0.4532, Train Steps/Sec: 0.07, Epoch: 2.260614361181765, LR: 0.0003 +[2026-02-28 10:13:44] (step=0011555) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 2.2608100176090784, LR: 0.0003 +[2026-02-28 10:13:58] (step=0011556) Train Loss: 0.4586, Train Steps/Sec: 0.07, Epoch: 2.261005674036392, LR: 0.0003 +[2026-02-28 10:14:12] (step=0011557) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 2.2612013304637055, LR: 0.0003 +[2026-02-28 10:14:26] (step=0011558) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 2.2613969868910195, LR: 0.0003 +[2026-02-28 10:14:40] (step=0011559) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.261592643318333, LR: 0.0003 +[2026-02-28 10:14:53] (step=0011560) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 2.2617882997456467, LR: 0.0003 +[2026-02-28 10:15:07] (step=0011561) Train Loss: 0.4620, Train Steps/Sec: 0.07, Epoch: 2.26198395617296, LR: 0.0003 +[2026-02-28 10:15:21] (step=0011562) Train Loss: 0.4447, Train Steps/Sec: 0.07, Epoch: 2.262179612600274, LR: 0.0003 +[2026-02-28 10:15:35] (step=0011563) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 2.2623752690275873, LR: 0.0003 +[2026-02-28 10:15:48] (step=0011564) Train Loss: 0.4495, Train Steps/Sec: 0.07, Epoch: 2.2625709254549013, LR: 0.0003 +[2026-02-28 10:16:02] (step=0011565) Train Loss: 0.4448, Train Steps/Sec: 0.07, Epoch: 2.262766581882215, LR: 0.0003 +[2026-02-28 10:16:16] (step=0011566) Train Loss: 0.4485, Train Steps/Sec: 0.07, Epoch: 2.2629622383095285, LR: 0.0003 +[2026-02-28 10:16:30] (step=0011567) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 2.263157894736842, LR: 0.0003 +[2026-02-28 10:16:43] (step=0011568) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 2.2633535511641556, LR: 0.0003 +[2026-02-28 10:16:57] (step=0011569) Train Loss: 0.4499, Train Steps/Sec: 0.07, Epoch: 2.263549207591469, LR: 0.0003 +[2026-02-28 10:17:11] (step=0011570) Train Loss: 0.4558, Train Steps/Sec: 0.07, Epoch: 2.263744864018783, LR: 0.0003 +[2026-02-28 10:17:25] (step=0011571) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.2639405204460967, LR: 0.0003 +[2026-02-28 10:17:38] (step=0011572) Train Loss: 0.4466, Train Steps/Sec: 0.07, Epoch: 2.2641361768734103, LR: 0.0003 +[2026-02-28 10:17:52] (step=0011573) Train Loss: 0.4374, Train Steps/Sec: 0.07, Epoch: 2.264331833300724, LR: 0.0003 +[2026-02-28 10:18:06] (step=0011574) Train Loss: 0.4451, Train Steps/Sec: 0.07, Epoch: 2.2645274897280374, LR: 0.0003 +[2026-02-28 10:18:19] (step=0011575) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 2.2647231461553514, LR: 0.0003 +[2026-02-28 10:18:33] (step=0011576) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 2.264918802582665, LR: 0.0003 +[2026-02-28 10:18:47] (step=0011577) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 2.2651144590099785, LR: 0.0003 +[2026-02-28 10:19:01] (step=0011578) Train Loss: 0.4544, Train Steps/Sec: 0.07, Epoch: 2.265310115437292, LR: 0.0003 +[2026-02-28 10:19:14] (step=0011579) Train Loss: 0.4483, Train Steps/Sec: 0.07, Epoch: 2.2655057718646057, LR: 0.0003 +[2026-02-28 10:19:28] (step=0011580) Train Loss: 0.4565, Train Steps/Sec: 0.07, Epoch: 2.2657014282919192, LR: 0.0003 +[2026-02-28 10:19:42] (step=0011581) Train Loss: 0.4468, Train Steps/Sec: 0.07, Epoch: 2.2658970847192332, LR: 0.0003 +[2026-02-28 10:19:56] (step=0011582) Train Loss: 0.4614, Train Steps/Sec: 0.07, Epoch: 2.266092741146547, LR: 0.0003 +[2026-02-28 10:20:09] (step=0011583) Train Loss: 0.4470, Train Steps/Sec: 0.07, Epoch: 2.2662883975738604, LR: 0.0003 +[2026-02-28 10:20:23] (step=0011584) Train Loss: 0.4586, Train Steps/Sec: 0.07, Epoch: 2.266484054001174, LR: 0.0003 +[2026-02-28 10:20:37] (step=0011585) Train Loss: 0.4436, Train Steps/Sec: 0.07, Epoch: 2.2666797104284875, LR: 0.0003 +[2026-02-28 10:20:51] (step=0011586) Train Loss: 0.4480, Train Steps/Sec: 0.07, Epoch: 2.266875366855801, LR: 0.0003 +[2026-02-28 10:21:04] (step=0011587) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 2.267071023283115, LR: 0.0003 +[2026-02-28 10:21:18] (step=0011588) Train Loss: 0.4486, Train Steps/Sec: 0.07, Epoch: 2.2672666797104286, LR: 0.0003 +[2026-02-28 10:21:32] (step=0011589) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 2.267462336137742, LR: 0.0003 +[2026-02-28 10:21:46] (step=0011590) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 2.2676579925650557, LR: 0.0003 +[2026-02-28 10:21:59] (step=0011591) Train Loss: 0.4481, Train Steps/Sec: 0.07, Epoch: 2.2678536489923693, LR: 0.0003 +[2026-02-28 10:22:13] (step=0011592) Train Loss: 0.4611, Train Steps/Sec: 0.07, Epoch: 2.268049305419683, LR: 0.0003 +[2026-02-28 10:22:27] (step=0011593) Train Loss: 0.4567, Train Steps/Sec: 0.07, Epoch: 2.268244961846997, LR: 0.0003 +[2026-02-28 10:22:40] (step=0011594) Train Loss: 0.4642, Train Steps/Sec: 0.07, Epoch: 2.2684406182743104, LR: 0.0003 +[2026-02-28 10:22:54] (step=0011595) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 2.268636274701624, LR: 0.0003 +[2026-02-28 10:23:08] (step=0011596) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.2688319311289376, LR: 0.0003 +[2026-02-28 10:23:22] (step=0011597) Train Loss: 0.4401, Train Steps/Sec: 0.07, Epoch: 2.269027587556251, LR: 0.0003 +[2026-02-28 10:23:36] (step=0011598) Train Loss: 0.4453, Train Steps/Sec: 0.07, Epoch: 2.2692232439835647, LR: 0.0003 +[2026-02-28 10:23:49] (step=0011599) Train Loss: 0.4574, Train Steps/Sec: 0.07, Epoch: 2.2694189004108787, LR: 0.0003 +[2026-02-28 10:24:03] (step=0011600) Train Loss: 0.4578, Train Steps/Sec: 0.07, Epoch: 2.2696145568381922, LR: 0.0003 +[2026-02-28 10:24:17] (step=0011601) Train Loss: 0.4415, Train Steps/Sec: 0.07, Epoch: 2.269810213265506, LR: 0.0003 +[2026-02-28 10:24:30] (step=0011602) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 2.2700058696928194, LR: 0.0003 +[2026-02-28 10:24:44] (step=0011603) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 2.270201526120133, LR: 0.0003 +[2026-02-28 10:24:58] (step=0011604) Train Loss: 0.4509, Train Steps/Sec: 0.07, Epoch: 2.2703971825474465, LR: 0.0003 +[2026-02-28 10:25:12] (step=0011605) Train Loss: 0.4594, Train Steps/Sec: 0.07, Epoch: 2.2705928389747605, LR: 0.0003 +[2026-02-28 10:25:26] (step=0011606) Train Loss: 0.4469, Train Steps/Sec: 0.07, Epoch: 2.270788495402074, LR: 0.0003 +[2026-02-28 10:25:39] (step=0011607) Train Loss: 0.4455, Train Steps/Sec: 0.07, Epoch: 2.2709841518293876, LR: 0.0003 +[2026-02-28 10:25:53] (step=0011608) Train Loss: 0.4650, Train Steps/Sec: 0.07, Epoch: 2.271179808256701, LR: 0.0003 +[2026-02-28 10:26:07] (step=0011609) Train Loss: 0.4437, Train Steps/Sec: 0.07, Epoch: 2.2713754646840147, LR: 0.0003 +[2026-02-28 10:26:21] (step=0011610) Train Loss: 0.4471, Train Steps/Sec: 0.07, Epoch: 2.2715711211113283, LR: 0.0003 +[2026-02-28 10:26:34] (step=0011611) Train Loss: 0.4459, Train Steps/Sec: 0.07, Epoch: 2.2717667775386423, LR: 0.0003 +[2026-02-28 10:26:48] (step=0011612) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 2.271962433965956, LR: 0.0003 +[2026-02-28 10:27:02] (step=0011613) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 2.2721580903932694, LR: 0.0003 +[2026-02-28 10:27:16] (step=0011614) Train Loss: 0.4522, Train Steps/Sec: 0.07, Epoch: 2.272353746820583, LR: 0.0003 +[2026-02-28 10:27:29] (step=0011615) Train Loss: 0.4429, Train Steps/Sec: 0.07, Epoch: 2.2725494032478966, LR: 0.0003 +[2026-02-28 10:27:43] (step=0011616) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 2.27274505967521, LR: 0.0003 +[2026-02-28 10:27:57] (step=0011617) Train Loss: 0.4496, Train Steps/Sec: 0.07, Epoch: 2.272940716102524, LR: 0.0003 +[2026-02-28 10:28:11] (step=0011618) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.2731363725298377, LR: 0.0003 +[2026-02-28 10:28:24] (step=0011619) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 2.2733320289571513, LR: 0.0003 +[2026-02-28 10:28:38] (step=0011620) Train Loss: 0.4725, Train Steps/Sec: 0.07, Epoch: 2.273527685384465, LR: 0.0003 +[2026-02-28 10:28:52] (step=0011621) Train Loss: 0.4492, Train Steps/Sec: 0.07, Epoch: 2.2737233418117784, LR: 0.0003 +[2026-02-28 10:29:05] (step=0011622) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 2.273918998239092, LR: 0.0003 +[2026-02-28 10:29:19] (step=0011623) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 2.274114654666406, LR: 0.0003 +[2026-02-28 10:29:33] (step=0011624) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 2.2743103110937195, LR: 0.0003 +[2026-02-28 10:29:47] (step=0011625) Train Loss: 0.4435, Train Steps/Sec: 0.07, Epoch: 2.274505967521033, LR: 0.0003 +[2026-02-28 10:30:01] (step=0011626) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 2.2747016239483466, LR: 0.0003 +[2026-02-28 10:30:14] (step=0011627) Train Loss: 0.4492, Train Steps/Sec: 0.07, Epoch: 2.27489728037566, LR: 0.0003 +[2026-02-28 10:30:28] (step=0011628) Train Loss: 0.4425, Train Steps/Sec: 0.07, Epoch: 2.275092936802974, LR: 0.0003 +[2026-02-28 10:30:42] (step=0011629) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 2.2752885932302878, LR: 0.0003 +[2026-02-28 10:30:55] (step=0011630) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 2.2754842496576013, LR: 0.0003 +[2026-02-28 10:31:09] (step=0011631) Train Loss: 0.4488, Train Steps/Sec: 0.07, Epoch: 2.275679906084915, LR: 0.0003 +[2026-02-28 10:31:23] (step=0011632) Train Loss: 0.4604, Train Steps/Sec: 0.07, Epoch: 2.2758755625122284, LR: 0.0003 +[2026-02-28 10:31:37] (step=0011633) Train Loss: 0.4582, Train Steps/Sec: 0.07, Epoch: 2.276071218939542, LR: 0.0003 +[2026-02-28 10:31:50] (step=0011634) Train Loss: 0.4572, Train Steps/Sec: 0.07, Epoch: 2.276266875366856, LR: 0.0003 +[2026-02-28 10:32:04] (step=0011635) Train Loss: 0.4442, Train Steps/Sec: 0.07, Epoch: 2.2764625317941696, LR: 0.0003 +[2026-02-28 10:32:18] (step=0011636) Train Loss: 0.4461, Train Steps/Sec: 0.07, Epoch: 2.276658188221483, LR: 0.0003 +[2026-02-28 10:32:31] (step=0011637) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 2.2768538446487967, LR: 0.0003 +[2026-02-28 10:32:45] (step=0011638) Train Loss: 0.4430, Train Steps/Sec: 0.07, Epoch: 2.2770495010761103, LR: 0.0003 +[2026-02-28 10:32:59] (step=0011639) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.277245157503424, LR: 0.0003 +[2026-02-28 10:33:13] (step=0011640) Train Loss: 0.4452, Train Steps/Sec: 0.07, Epoch: 2.277440813930738, LR: 0.0003 +[2026-02-28 10:33:26] (step=0011641) Train Loss: 0.4630, Train Steps/Sec: 0.07, Epoch: 2.2776364703580514, LR: 0.0003 +[2026-02-28 10:33:40] (step=0011642) Train Loss: 0.4403, Train Steps/Sec: 0.07, Epoch: 2.277832126785365, LR: 0.0003 +[2026-02-28 10:33:54] (step=0011643) Train Loss: 0.4486, Train Steps/Sec: 0.07, Epoch: 2.2780277832126785, LR: 0.0003 +[2026-02-28 10:34:08] (step=0011644) Train Loss: 0.4553, Train Steps/Sec: 0.07, Epoch: 2.278223439639992, LR: 0.0003 +[2026-02-28 10:34:21] (step=0011645) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 2.2784190960673056, LR: 0.0003 +[2026-02-28 10:34:35] (step=0011646) Train Loss: 0.4349, Train Steps/Sec: 0.07, Epoch: 2.2786147524946196, LR: 0.0003 +[2026-02-28 10:34:49] (step=0011647) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 2.278810408921933, LR: 0.0003 +[2026-02-28 10:35:03] (step=0011648) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 2.2790060653492468, LR: 0.0003 +[2026-02-28 10:35:16] (step=0011649) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 2.2792017217765603, LR: 0.0003 +[2026-02-28 10:35:30] (step=0011650) Train Loss: 0.4571, Train Steps/Sec: 0.07, Epoch: 2.279397378203874, LR: 0.0003 +[2026-02-28 10:35:44] (step=0011651) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 2.2795930346311875, LR: 0.0003 +[2026-02-28 10:35:57] (step=0011652) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 2.2797886910585015, LR: 0.0003 +[2026-02-28 10:36:11] (step=0011653) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.279984347485815, LR: 0.0003 +[2026-02-28 10:36:25] (step=0011654) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 2.2801800039131286, LR: 0.0003 +[2026-02-28 10:36:39] (step=0011655) Train Loss: 0.4344, Train Steps/Sec: 0.07, Epoch: 2.280375660340442, LR: 0.0003 +[2026-02-28 10:36:52] (step=0011656) Train Loss: 0.4571, Train Steps/Sec: 0.07, Epoch: 2.2805713167677557, LR: 0.0003 +[2026-02-28 10:37:06] (step=0011657) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 2.2807669731950693, LR: 0.0003 +[2026-02-28 10:37:20] (step=0011658) Train Loss: 0.4509, Train Steps/Sec: 0.07, Epoch: 2.2809626296223833, LR: 0.0003 +[2026-02-28 10:37:33] (step=0011659) Train Loss: 0.4613, Train Steps/Sec: 0.07, Epoch: 2.281158286049697, LR: 0.0003 +[2026-02-28 10:37:47] (step=0011660) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 2.2813539424770104, LR: 0.0003 +[2026-02-28 10:38:01] (step=0011661) Train Loss: 0.4488, Train Steps/Sec: 0.07, Epoch: 2.281549598904324, LR: 0.0003 +[2026-02-28 10:38:15] (step=0011662) Train Loss: 0.4638, Train Steps/Sec: 0.07, Epoch: 2.2817452553316375, LR: 0.0003 +[2026-02-28 10:38:28] (step=0011663) Train Loss: 0.4506, Train Steps/Sec: 0.07, Epoch: 2.281940911758951, LR: 0.0003 +[2026-02-28 10:38:42] (step=0011664) Train Loss: 0.4573, Train Steps/Sec: 0.07, Epoch: 2.282136568186265, LR: 0.0003 +[2026-02-28 10:38:56] (step=0011665) Train Loss: 0.4461, Train Steps/Sec: 0.07, Epoch: 2.2823322246135787, LR: 0.0003 +[2026-02-28 10:39:10] (step=0011666) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.282527881040892, LR: 0.0003 +[2026-02-28 10:39:23] (step=0011667) Train Loss: 0.4327, Train Steps/Sec: 0.07, Epoch: 2.2827235374682058, LR: 0.0003 +[2026-02-28 10:39:37] (step=0011668) Train Loss: 0.4483, Train Steps/Sec: 0.07, Epoch: 2.2829191938955193, LR: 0.0003 +[2026-02-28 10:39:51] (step=0011669) Train Loss: 0.4482, Train Steps/Sec: 0.07, Epoch: 2.283114850322833, LR: 0.0003 +[2026-02-28 10:40:04] (step=0011670) Train Loss: 0.4470, Train Steps/Sec: 0.07, Epoch: 2.283310506750147, LR: 0.0003 +[2026-02-28 10:40:18] (step=0011671) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 2.2835061631774605, LR: 0.0003 +[2026-02-28 10:40:32] (step=0011672) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 2.283701819604774, LR: 0.0003 +[2026-02-28 10:40:46] (step=0011673) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 2.2838974760320876, LR: 0.0003 +[2026-02-28 10:40:59] (step=0011674) Train Loss: 0.4475, Train Steps/Sec: 0.07, Epoch: 2.284093132459401, LR: 0.0003 +[2026-02-28 10:41:13] (step=0011675) Train Loss: 0.4598, Train Steps/Sec: 0.07, Epoch: 2.2842887888867147, LR: 0.0003 +[2026-02-28 10:41:27] (step=0011676) Train Loss: 0.4560, Train Steps/Sec: 0.07, Epoch: 2.2844844453140287, LR: 0.0003 +[2026-02-28 10:41:41] (step=0011677) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.2846801017413423, LR: 0.0003 +[2026-02-28 10:41:54] (step=0011678) Train Loss: 0.4576, Train Steps/Sec: 0.07, Epoch: 2.284875758168656, LR: 0.0003 +[2026-02-28 10:42:08] (step=0011679) Train Loss: 0.4365, Train Steps/Sec: 0.07, Epoch: 2.2850714145959694, LR: 0.0003 +[2026-02-28 10:42:22] (step=0011680) Train Loss: 0.4461, Train Steps/Sec: 0.07, Epoch: 2.285267071023283, LR: 0.0003 +[2026-02-28 10:42:36] (step=0011681) Train Loss: 0.4591, Train Steps/Sec: 0.07, Epoch: 2.2854627274505965, LR: 0.0003 +[2026-02-28 10:42:49] (step=0011682) Train Loss: 0.4613, Train Steps/Sec: 0.07, Epoch: 2.2856583838779105, LR: 0.0003 +[2026-02-28 10:43:03] (step=0011683) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 2.285854040305224, LR: 0.0003 +[2026-02-28 10:43:17] (step=0011684) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 2.2860496967325377, LR: 0.0003 +[2026-02-28 10:43:30] (step=0011685) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.2862453531598512, LR: 0.0003 +[2026-02-28 10:43:44] (step=0011686) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.286441009587165, LR: 0.0003 +[2026-02-28 10:43:58] (step=0011687) Train Loss: 0.4455, Train Steps/Sec: 0.07, Epoch: 2.286636666014479, LR: 0.0003 +[2026-02-28 10:44:12] (step=0011688) Train Loss: 0.4430, Train Steps/Sec: 0.07, Epoch: 2.2868323224417924, LR: 0.0003 +[2026-02-28 10:44:25] (step=0011689) Train Loss: 0.4616, Train Steps/Sec: 0.07, Epoch: 2.287027978869106, LR: 0.0003 +[2026-02-28 10:44:39] (step=0011690) Train Loss: 0.4512, Train Steps/Sec: 0.07, Epoch: 2.2872236352964195, LR: 0.0003 +[2026-02-28 10:44:53] (step=0011691) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 2.287419291723733, LR: 0.0003 +[2026-02-28 10:45:07] (step=0011692) Train Loss: 0.4487, Train Steps/Sec: 0.07, Epoch: 2.2876149481510466, LR: 0.0003 +[2026-02-28 10:45:20] (step=0011693) Train Loss: 0.4478, Train Steps/Sec: 0.07, Epoch: 2.2878106045783606, LR: 0.0003 +[2026-02-28 10:45:34] (step=0011694) Train Loss: 0.4387, Train Steps/Sec: 0.07, Epoch: 2.288006261005674, LR: 0.0003 +[2026-02-28 10:45:48] (step=0011695) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.2882019174329877, LR: 0.0003 +[2026-02-28 10:46:02] (step=0011696) Train Loss: 0.4450, Train Steps/Sec: 0.07, Epoch: 2.2883975738603013, LR: 0.0003 +[2026-02-28 10:46:15] (step=0011697) Train Loss: 0.4594, Train Steps/Sec: 0.07, Epoch: 2.288593230287615, LR: 0.0003 +[2026-02-28 10:46:29] (step=0011698) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.2887888867149284, LR: 0.0003 +[2026-02-28 10:46:43] (step=0011699) Train Loss: 0.4456, Train Steps/Sec: 0.07, Epoch: 2.2889845431422424, LR: 0.0003 +[2026-02-28 10:46:56] (step=0011700) Train Loss: 0.4440, Train Steps/Sec: 0.07, Epoch: 2.289180199569556, LR: 0.0003 +[2026-02-28 10:47:10] (step=0011701) Train Loss: 0.4416, Train Steps/Sec: 0.07, Epoch: 2.2893758559968695, LR: 0.0003 +[2026-02-28 10:47:24] (step=0011702) Train Loss: 0.4360, Train Steps/Sec: 0.07, Epoch: 2.289571512424183, LR: 0.0003 +[2026-02-28 10:47:37] (step=0011703) Train Loss: 0.4622, Train Steps/Sec: 0.07, Epoch: 2.2897671688514967, LR: 0.0003 +[2026-02-28 10:47:51] (step=0011704) Train Loss: 0.4528, Train Steps/Sec: 0.07, Epoch: 2.2899628252788102, LR: 0.0003 +[2026-02-28 10:48:05] (step=0011705) Train Loss: 0.4580, Train Steps/Sec: 0.07, Epoch: 2.2901584817061242, LR: 0.0003 +[2026-02-28 10:48:19] (step=0011706) Train Loss: 0.4376, Train Steps/Sec: 0.07, Epoch: 2.290354138133438, LR: 0.0003 +[2026-02-28 10:48:33] (step=0011707) Train Loss: 0.4579, Train Steps/Sec: 0.07, Epoch: 2.2905497945607514, LR: 0.0003 +[2026-02-28 10:48:46] (step=0011708) Train Loss: 0.4608, Train Steps/Sec: 0.07, Epoch: 2.290745450988065, LR: 0.0003 +[2026-02-28 10:49:00] (step=0011709) Train Loss: 0.4488, Train Steps/Sec: 0.07, Epoch: 2.2909411074153785, LR: 0.0003 +[2026-02-28 10:49:14] (step=0011710) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 2.291136763842692, LR: 0.0003 +[2026-02-28 10:49:28] (step=0011711) Train Loss: 0.4433, Train Steps/Sec: 0.07, Epoch: 2.291332420270006, LR: 0.0003 +[2026-02-28 10:49:41] (step=0011712) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.2915280766973196, LR: 0.0003 +[2026-02-28 10:49:55] (step=0011713) Train Loss: 0.4479, Train Steps/Sec: 0.07, Epoch: 2.291723733124633, LR: 0.0003 +[2026-02-28 10:50:08] (step=0011714) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 2.2919193895519467, LR: 0.0003 +[2026-02-28 10:50:22] (step=0011715) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 2.2921150459792603, LR: 0.0003 +[2026-02-28 10:50:36] (step=0011716) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.292310702406574, LR: 0.0003 +[2026-02-28 10:50:50] (step=0011717) Train Loss: 0.4455, Train Steps/Sec: 0.07, Epoch: 2.292506358833888, LR: 0.0003 +[2026-02-28 10:51:03] (step=0011718) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.2927020152612014, LR: 0.0003 +[2026-02-28 10:51:17] (step=0011719) Train Loss: 0.4505, Train Steps/Sec: 0.07, Epoch: 2.292897671688515, LR: 0.0003 +[2026-02-28 10:51:31] (step=0011720) Train Loss: 0.4610, Train Steps/Sec: 0.07, Epoch: 2.2930933281158286, LR: 0.0003 +[2026-02-28 10:51:44] (step=0011721) Train Loss: 0.4370, Train Steps/Sec: 0.07, Epoch: 2.293288984543142, LR: 0.0003 +[2026-02-28 10:51:58] (step=0011722) Train Loss: 0.4496, Train Steps/Sec: 0.07, Epoch: 2.2934846409704557, LR: 0.0003 +[2026-02-28 10:52:12] (step=0011723) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 2.2936802973977697, LR: 0.0003 +[2026-02-28 10:52:26] (step=0011724) Train Loss: 0.4568, Train Steps/Sec: 0.07, Epoch: 2.2938759538250832, LR: 0.0003 +[2026-02-28 10:52:39] (step=0011725) Train Loss: 0.4458, Train Steps/Sec: 0.07, Epoch: 2.294071610252397, LR: 0.0003 +[2026-02-28 10:52:53] (step=0011726) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 2.2942672666797104, LR: 0.0003 +[2026-02-28 10:53:07] (step=0011727) Train Loss: 0.4626, Train Steps/Sec: 0.07, Epoch: 2.294462923107024, LR: 0.0003 +[2026-02-28 10:53:20] (step=0011728) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 2.2946585795343375, LR: 0.0003 +[2026-02-28 10:53:34] (step=0011729) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 2.2948542359616515, LR: 0.0003 +[2026-02-28 10:53:48] (step=0011730) Train Loss: 0.4459, Train Steps/Sec: 0.07, Epoch: 2.295049892388965, LR: 0.0003 +[2026-02-28 10:54:01] (step=0011731) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 2.2952455488162786, LR: 0.0003 +[2026-02-28 10:54:15] (step=0011732) Train Loss: 0.4607, Train Steps/Sec: 0.07, Epoch: 2.295441205243592, LR: 0.0003 +[2026-02-28 10:54:29] (step=0011733) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 2.2956368616709057, LR: 0.0003 +[2026-02-28 10:54:42] (step=0011734) Train Loss: 0.4466, Train Steps/Sec: 0.07, Epoch: 2.2958325180982193, LR: 0.0003 +[2026-02-28 10:54:56] (step=0011735) Train Loss: 0.4667, Train Steps/Sec: 0.07, Epoch: 2.2960281745255333, LR: 0.0003 +[2026-02-28 10:55:10] (step=0011736) Train Loss: 0.4649, Train Steps/Sec: 0.07, Epoch: 2.296223830952847, LR: 0.0003 +[2026-02-28 10:55:24] (step=0011737) Train Loss: 0.4606, Train Steps/Sec: 0.07, Epoch: 2.2964194873801604, LR: 0.0003 +[2026-02-28 10:55:37] (step=0011738) Train Loss: 0.4440, Train Steps/Sec: 0.07, Epoch: 2.296615143807474, LR: 0.0003 +[2026-02-28 10:55:51] (step=0011739) Train Loss: 0.4466, Train Steps/Sec: 0.07, Epoch: 2.2968108002347876, LR: 0.0003 +[2026-02-28 10:56:05] (step=0011740) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.2970064566621016, LR: 0.0003 +[2026-02-28 10:56:18] (step=0011741) Train Loss: 0.4604, Train Steps/Sec: 0.07, Epoch: 2.297202113089415, LR: 0.0003 +[2026-02-28 10:56:32] (step=0011742) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 2.2973977695167287, LR: 0.0003 +[2026-02-28 10:56:46] (step=0011743) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 2.2975934259440423, LR: 0.0003 +[2026-02-28 10:56:59] (step=0011744) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 2.297789082371356, LR: 0.0003 +[2026-02-28 10:57:13] (step=0011745) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.2979847387986694, LR: 0.0003 +[2026-02-28 10:57:27] (step=0011746) Train Loss: 0.4586, Train Steps/Sec: 0.07, Epoch: 2.2981803952259834, LR: 0.0003 +[2026-02-28 10:57:40] (step=0011747) Train Loss: 0.4584, Train Steps/Sec: 0.07, Epoch: 2.298376051653297, LR: 0.0003 +[2026-02-28 10:57:54] (step=0011748) Train Loss: 0.4572, Train Steps/Sec: 0.07, Epoch: 2.2985717080806105, LR: 0.0003 +[2026-02-28 10:58:08] (step=0011749) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 2.298767364507924, LR: 0.0003 +[2026-02-28 10:58:21] (step=0011750) Train Loss: 0.4590, Train Steps/Sec: 0.07, Epoch: 2.2989630209352376, LR: 0.0003 +[2026-02-28 10:58:35] (step=0011751) Train Loss: 0.4486, Train Steps/Sec: 0.07, Epoch: 2.299158677362551, LR: 0.0003 +[2026-02-28 10:58:49] (step=0011752) Train Loss: 0.4577, Train Steps/Sec: 0.07, Epoch: 2.299354333789865, LR: 0.0003 +[2026-02-28 10:59:02] (step=0011753) Train Loss: 0.4536, Train Steps/Sec: 0.07, Epoch: 2.2995499902171788, LR: 0.0003 +[2026-02-28 10:59:16] (step=0011754) Train Loss: 0.4476, Train Steps/Sec: 0.07, Epoch: 2.2997456466444923, LR: 0.0003 +[2026-02-28 10:59:30] (step=0011755) Train Loss: 0.4489, Train Steps/Sec: 0.07, Epoch: 2.299941303071806, LR: 0.0003 +[2026-02-28 10:59:43] (step=0011756) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 2.3001369594991194, LR: 0.0003 +[2026-02-28 10:59:57] (step=0011757) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 2.300332615926433, LR: 0.0003 +[2026-02-28 11:00:11] (step=0011758) Train Loss: 0.4552, Train Steps/Sec: 0.07, Epoch: 2.300528272353747, LR: 0.0003 +[2026-02-28 11:00:24] (step=0011759) Train Loss: 0.4409, Train Steps/Sec: 0.07, Epoch: 2.3007239287810606, LR: 0.0003 +[2026-02-28 11:00:38] (step=0011760) Train Loss: 0.4586, Train Steps/Sec: 0.07, Epoch: 2.300919585208374, LR: 0.0003 +[2026-02-28 11:00:52] (step=0011761) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 2.3011152416356877, LR: 0.0003 +[2026-02-28 11:01:05] (step=0011762) Train Loss: 0.4555, Train Steps/Sec: 0.07, Epoch: 2.3013108980630013, LR: 0.0003 +[2026-02-28 11:01:19] (step=0011763) Train Loss: 0.4409, Train Steps/Sec: 0.07, Epoch: 2.301506554490315, LR: 0.0003 +[2026-02-28 11:01:33] (step=0011764) Train Loss: 0.4506, Train Steps/Sec: 0.07, Epoch: 2.301702210917629, LR: 0.0003 +[2026-02-28 11:01:46] (step=0011765) Train Loss: 0.4543, Train Steps/Sec: 0.07, Epoch: 2.3018978673449424, LR: 0.0003 +[2026-02-28 11:02:00] (step=0011766) Train Loss: 0.4533, Train Steps/Sec: 0.07, Epoch: 2.302093523772256, LR: 0.0003 +[2026-02-28 11:02:14] (step=0011767) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 2.3022891801995695, LR: 0.0003 +[2026-02-28 11:02:27] (step=0011768) Train Loss: 0.4685, Train Steps/Sec: 0.07, Epoch: 2.302484836626883, LR: 0.0003 +[2026-02-28 11:02:41] (step=0011769) Train Loss: 0.4453, Train Steps/Sec: 0.07, Epoch: 2.3026804930541966, LR: 0.0003 +[2026-02-28 11:02:54] (step=0011770) Train Loss: 0.4484, Train Steps/Sec: 0.07, Epoch: 2.3028761494815106, LR: 0.0003 +[2026-02-28 11:03:08] (step=0011771) Train Loss: 0.4391, Train Steps/Sec: 0.07, Epoch: 2.303071805908824, LR: 0.0003 +[2026-02-28 11:03:22] (step=0011772) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 2.3032674623361378, LR: 0.0003 +[2026-02-28 11:03:35] (step=0011773) Train Loss: 0.4470, Train Steps/Sec: 0.07, Epoch: 2.3034631187634513, LR: 0.0003 +[2026-02-28 11:03:49] (step=0011774) Train Loss: 0.4622, Train Steps/Sec: 0.07, Epoch: 2.303658775190765, LR: 0.0003 +[2026-02-28 11:04:03] (step=0011775) Train Loss: 0.4570, Train Steps/Sec: 0.07, Epoch: 2.3038544316180785, LR: 0.0003 +[2026-02-28 11:04:16] (step=0011776) Train Loss: 0.4566, Train Steps/Sec: 0.07, Epoch: 2.3040500880453925, LR: 0.0003 +[2026-02-28 11:04:30] (step=0011777) Train Loss: 0.4464, Train Steps/Sec: 0.07, Epoch: 2.304245744472706, LR: 0.0003 +[2026-02-28 11:04:44] (step=0011778) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 2.3044414009000196, LR: 0.0003 +[2026-02-28 11:04:57] (step=0011779) Train Loss: 0.4583, Train Steps/Sec: 0.07, Epoch: 2.304637057327333, LR: 0.0003 +[2026-02-28 11:05:11] (step=0011780) Train Loss: 0.4492, Train Steps/Sec: 0.07, Epoch: 2.3048327137546467, LR: 0.0003 +[2026-02-28 11:05:24] (step=0011781) Train Loss: 0.4598, Train Steps/Sec: 0.07, Epoch: 2.3050283701819603, LR: 0.0003 +[2026-02-28 11:05:38] (step=0011782) Train Loss: 0.4654, Train Steps/Sec: 0.07, Epoch: 2.3052240266092743, LR: 0.0003 +[2026-02-28 11:05:52] (step=0011783) Train Loss: 0.4538, Train Steps/Sec: 0.07, Epoch: 2.305419683036588, LR: 0.0003 +[2026-02-28 11:06:06] (step=0011784) Train Loss: 0.4396, Train Steps/Sec: 0.07, Epoch: 2.3056153394639014, LR: 0.0003 +[2026-02-28 11:06:19] (step=0011785) Train Loss: 0.4476, Train Steps/Sec: 0.07, Epoch: 2.305810995891215, LR: 0.0003 +[2026-02-28 11:06:33] (step=0011786) Train Loss: 0.4307, Train Steps/Sec: 0.07, Epoch: 2.3060066523185285, LR: 0.0003 +[2026-02-28 11:06:47] (step=0011787) Train Loss: 0.4559, Train Steps/Sec: 0.07, Epoch: 2.306202308745842, LR: 0.0003 +[2026-02-28 11:07:00] (step=0011788) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 2.306397965173156, LR: 0.0003 +[2026-02-28 11:07:14] (step=0011789) Train Loss: 0.4462, Train Steps/Sec: 0.07, Epoch: 2.3065936216004697, LR: 0.0003 +[2026-02-28 11:07:27] (step=0011790) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 2.306789278027783, LR: 0.0003 +[2026-02-28 11:07:41] (step=0011791) Train Loss: 0.4611, Train Steps/Sec: 0.07, Epoch: 2.306984934455097, LR: 0.0003 +[2026-02-28 11:07:55] (step=0011792) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 2.3071805908824103, LR: 0.0003 +[2026-02-28 11:08:08] (step=0011793) Train Loss: 0.4580, Train Steps/Sec: 0.07, Epoch: 2.307376247309724, LR: 0.0003 +[2026-02-28 11:08:22] (step=0011794) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.307571903737038, LR: 0.0003 +[2026-02-28 11:08:36] (step=0011795) Train Loss: 0.4494, Train Steps/Sec: 0.07, Epoch: 2.3077675601643515, LR: 0.0003 +[2026-02-28 11:08:49] (step=0011796) Train Loss: 0.4628, Train Steps/Sec: 0.07, Epoch: 2.307963216591665, LR: 0.0003 +[2026-02-28 11:09:03] (step=0011797) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 2.3081588730189786, LR: 0.0003 +[2026-02-28 11:09:17] (step=0011798) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 2.308354529446292, LR: 0.0003 +[2026-02-28 11:09:31] (step=0011799) Train Loss: 0.4547, Train Steps/Sec: 0.07, Epoch: 2.308550185873606, LR: 0.0003 +[2026-02-28 11:09:44] (step=0011800) Train Loss: 0.4363, Train Steps/Sec: 0.07, Epoch: 2.3087458423009197, LR: 0.0003 +[2026-02-28 11:09:58] (step=0011801) Train Loss: 0.4355, Train Steps/Sec: 0.07, Epoch: 2.3089414987282333, LR: 0.0003 +[2026-02-28 11:10:12] (step=0011802) Train Loss: 0.4379, Train Steps/Sec: 0.07, Epoch: 2.309137155155547, LR: 0.0003 +[2026-02-28 11:10:26] (step=0011803) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.3093328115828604, LR: 0.0003 +[2026-02-28 11:10:39] (step=0011804) Train Loss: 0.4440, Train Steps/Sec: 0.07, Epoch: 2.309528468010174, LR: 0.0003 +[2026-02-28 11:10:53] (step=0011805) Train Loss: 0.4446, Train Steps/Sec: 0.07, Epoch: 2.309724124437488, LR: 0.0003 +[2026-02-28 11:11:06] (step=0011806) Train Loss: 0.4516, Train Steps/Sec: 0.07, Epoch: 2.3099197808648015, LR: 0.0003 +[2026-02-28 11:11:20] (step=0011807) Train Loss: 0.4722, Train Steps/Sec: 0.07, Epoch: 2.310115437292115, LR: 0.0003 +[2026-02-28 11:11:34] (step=0011808) Train Loss: 0.4581, Train Steps/Sec: 0.07, Epoch: 2.3103110937194287, LR: 0.0003 +[2026-02-28 11:11:48] (step=0011809) Train Loss: 0.4473, Train Steps/Sec: 0.07, Epoch: 2.3105067501467422, LR: 0.0003 +[2026-02-28 11:12:01] (step=0011810) Train Loss: 0.4520, Train Steps/Sec: 0.07, Epoch: 2.310702406574056, LR: 0.0003 +[2026-02-28 11:12:15] (step=0011811) Train Loss: 0.4422, Train Steps/Sec: 0.07, Epoch: 2.31089806300137, LR: 0.0003 +[2026-02-28 11:12:29] (step=0011812) Train Loss: 0.4605, Train Steps/Sec: 0.07, Epoch: 2.3110937194286834, LR: 0.0003 +[2026-02-28 11:12:42] (step=0011813) Train Loss: 0.4583, Train Steps/Sec: 0.07, Epoch: 2.311289375855997, LR: 0.0003 +[2026-02-28 11:12:56] (step=0011814) Train Loss: 0.4541, Train Steps/Sec: 0.07, Epoch: 2.3114850322833105, LR: 0.0003 +[2026-02-28 11:13:10] (step=0011815) Train Loss: 0.4616, Train Steps/Sec: 0.07, Epoch: 2.311680688710624, LR: 0.0003 +[2026-02-28 11:13:24] (step=0011816) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 2.3118763451379376, LR: 0.0003 +[2026-02-28 11:13:38] (step=0011817) Train Loss: 0.4632, Train Steps/Sec: 0.07, Epoch: 2.3120720015652516, LR: 0.0003 +[2026-02-28 11:13:51] (step=0011818) Train Loss: 0.4579, Train Steps/Sec: 0.07, Epoch: 2.312267657992565, LR: 0.0003 +[2026-02-28 11:14:05] (step=0011819) Train Loss: 0.4585, Train Steps/Sec: 0.07, Epoch: 2.3124633144198787, LR: 0.0003 +[2026-02-28 11:14:18] (step=0011820) Train Loss: 0.4499, Train Steps/Sec: 0.07, Epoch: 2.3126589708471923, LR: 0.0003 +[2026-02-28 11:14:32] (step=0011821) Train Loss: 0.4631, Train Steps/Sec: 0.07, Epoch: 2.312854627274506, LR: 0.0003 +[2026-02-28 11:14:46] (step=0011822) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 2.3130502837018194, LR: 0.0003 +[2026-02-28 11:15:00] (step=0011823) Train Loss: 0.4362, Train Steps/Sec: 0.07, Epoch: 2.3132459401291334, LR: 0.0003 +[2026-02-28 11:15:13] (step=0011824) Train Loss: 0.4562, Train Steps/Sec: 0.07, Epoch: 2.313441596556447, LR: 0.0003 +[2026-02-28 11:15:27] (step=0011825) Train Loss: 0.4468, Train Steps/Sec: 0.07, Epoch: 2.3136372529837606, LR: 0.0003 +[2026-02-28 11:15:41] (step=0011826) Train Loss: 0.4572, Train Steps/Sec: 0.07, Epoch: 2.313832909411074, LR: 0.0003 +[2026-02-28 11:15:55] (step=0011827) Train Loss: 0.4429, Train Steps/Sec: 0.07, Epoch: 2.3140285658383877, LR: 0.0003 +[2026-02-28 11:16:08] (step=0011828) Train Loss: 0.4706, Train Steps/Sec: 0.07, Epoch: 2.3142242222657012, LR: 0.0003 +[2026-02-28 11:16:22] (step=0011829) Train Loss: 0.4667, Train Steps/Sec: 0.07, Epoch: 2.3144198786930152, LR: 0.0003 +[2026-02-28 11:16:36] (step=0011830) Train Loss: 0.4560, Train Steps/Sec: 0.07, Epoch: 2.314615535120329, LR: 0.0003 +[2026-02-28 11:16:50] (step=0011831) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 2.3148111915476424, LR: 0.0003 +[2026-02-28 11:17:03] (step=0011832) Train Loss: 0.4497, Train Steps/Sec: 0.07, Epoch: 2.315006847974956, LR: 0.0003 +[2026-02-28 11:17:17] (step=0011833) Train Loss: 0.4523, Train Steps/Sec: 0.07, Epoch: 2.3152025044022695, LR: 0.0003 +[2026-02-28 11:17:31] (step=0011834) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 2.315398160829583, LR: 0.0003 +[2026-02-28 11:17:44] (step=0011835) Train Loss: 0.4524, Train Steps/Sec: 0.07, Epoch: 2.315593817256897, LR: 0.0003 +[2026-02-28 11:17:58] (step=0011836) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 2.3157894736842106, LR: 0.0003 +[2026-02-28 11:18:12] (step=0011837) Train Loss: 0.4432, Train Steps/Sec: 0.07, Epoch: 2.315985130111524, LR: 0.0003 +[2026-02-28 11:18:26] (step=0011838) Train Loss: 0.4442, Train Steps/Sec: 0.07, Epoch: 2.3161807865388377, LR: 0.0003 +[2026-02-28 11:18:39] (step=0011839) Train Loss: 0.4459, Train Steps/Sec: 0.07, Epoch: 2.3163764429661513, LR: 0.0003 +[2026-02-28 11:18:53] (step=0011840) Train Loss: 0.4375, Train Steps/Sec: 0.07, Epoch: 2.316572099393465, LR: 0.0003 +[2026-02-28 11:19:07] (step=0011841) Train Loss: 0.4483, Train Steps/Sec: 0.07, Epoch: 2.316767755820779, LR: 0.0003 +[2026-02-28 11:19:21] (step=0011842) Train Loss: 0.4575, Train Steps/Sec: 0.07, Epoch: 2.3169634122480924, LR: 0.0003 +[2026-02-28 11:19:34] (step=0011843) Train Loss: 0.4472, Train Steps/Sec: 0.07, Epoch: 2.317159068675406, LR: 0.0003 +[2026-02-28 11:19:48] (step=0011844) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 2.3173547251027196, LR: 0.0003 +[2026-02-28 11:20:02] (step=0011845) Train Loss: 0.4508, Train Steps/Sec: 0.07, Epoch: 2.317550381530033, LR: 0.0003 +[2026-02-28 11:20:16] (step=0011846) Train Loss: 0.4370, Train Steps/Sec: 0.07, Epoch: 2.3177460379573467, LR: 0.0003 +[2026-02-28 11:20:29] (step=0011847) Train Loss: 0.4665, Train Steps/Sec: 0.07, Epoch: 2.3179416943846607, LR: 0.0003 +[2026-02-28 11:20:43] (step=0011848) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.3181373508119743, LR: 0.0003 +[2026-02-28 11:20:57] (step=0011849) Train Loss: 0.4451, Train Steps/Sec: 0.07, Epoch: 2.318333007239288, LR: 0.0003 +[2026-02-28 11:21:10] (step=0011850) Train Loss: 0.4471, Train Steps/Sec: 0.07, Epoch: 2.3185286636666014, LR: 0.0003 +[2026-02-28 11:21:24] (step=0011851) Train Loss: 0.4522, Train Steps/Sec: 0.07, Epoch: 2.318724320093915, LR: 0.0003 +[2026-02-28 11:21:38] (step=0011852) Train Loss: 0.4527, Train Steps/Sec: 0.07, Epoch: 2.318919976521229, LR: 0.0003 +[2026-02-28 11:21:52] (step=0011853) Train Loss: 0.4498, Train Steps/Sec: 0.07, Epoch: 2.3191156329485425, LR: 0.0003 +[2026-02-28 11:22:05] (step=0011854) Train Loss: 0.4657, Train Steps/Sec: 0.07, Epoch: 2.319311289375856, LR: 0.0003 +[2026-02-28 11:22:19] (step=0011855) Train Loss: 0.4524, Train Steps/Sec: 0.07, Epoch: 2.3195069458031696, LR: 0.0003 +[2026-02-28 11:22:33] (step=0011856) Train Loss: 0.4627, Train Steps/Sec: 0.07, Epoch: 2.319702602230483, LR: 0.0003 +[2026-02-28 11:22:47] (step=0011857) Train Loss: 0.4501, Train Steps/Sec: 0.07, Epoch: 2.3198982586577968, LR: 0.0003 +[2026-02-28 11:23:00] (step=0011858) Train Loss: 0.4628, Train Steps/Sec: 0.07, Epoch: 2.3200939150851108, LR: 0.0003 +[2026-02-28 11:23:14] (step=0011859) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.3202895715124243, LR: 0.0003 +[2026-02-28 11:23:28] (step=0011860) Train Loss: 0.4450, Train Steps/Sec: 0.07, Epoch: 2.320485227939738, LR: 0.0003 +[2026-02-28 11:23:42] (step=0011861) Train Loss: 0.4378, Train Steps/Sec: 0.07, Epoch: 2.3206808843670514, LR: 0.0003 +[2026-02-28 11:23:55] (step=0011862) Train Loss: 0.4469, Train Steps/Sec: 0.07, Epoch: 2.320876540794365, LR: 0.0003 +[2026-02-28 11:24:09] (step=0011863) Train Loss: 0.4393, Train Steps/Sec: 0.07, Epoch: 2.3210721972216786, LR: 0.0003 +[2026-02-28 11:24:23] (step=0011864) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 2.3212678536489926, LR: 0.0003 +[2026-02-28 11:24:37] (step=0011865) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 2.321463510076306, LR: 0.0003 +[2026-02-28 11:24:50] (step=0011866) Train Loss: 0.4529, Train Steps/Sec: 0.07, Epoch: 2.3216591665036197, LR: 0.0003 +[2026-02-28 11:25:04] (step=0011867) Train Loss: 0.4530, Train Steps/Sec: 0.07, Epoch: 2.3218548229309333, LR: 0.0003 +[2026-02-28 11:25:18] (step=0011868) Train Loss: 0.4546, Train Steps/Sec: 0.07, Epoch: 2.322050479358247, LR: 0.0003 +[2026-02-28 11:25:32] (step=0011869) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 2.3222461357855604, LR: 0.0003 +[2026-02-28 11:25:45] (step=0011870) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 2.3224417922128744, LR: 0.0003 +[2026-02-28 11:25:59] (step=0011871) Train Loss: 0.4476, Train Steps/Sec: 0.07, Epoch: 2.322637448640188, LR: 0.0003 +[2026-02-28 11:26:13] (step=0011872) Train Loss: 0.4436, Train Steps/Sec: 0.07, Epoch: 2.3228331050675015, LR: 0.0003 +[2026-02-28 11:26:27] (step=0011873) Train Loss: 0.4405, Train Steps/Sec: 0.07, Epoch: 2.323028761494815, LR: 0.0003 +[2026-02-28 11:26:40] (step=0011874) Train Loss: 0.4451, Train Steps/Sec: 0.07, Epoch: 2.3232244179221286, LR: 0.0003 +[2026-02-28 11:26:54] (step=0011875) Train Loss: 0.4445, Train Steps/Sec: 0.07, Epoch: 2.323420074349442, LR: 0.0003 +[2026-02-28 11:27:08] (step=0011876) Train Loss: 0.4502, Train Steps/Sec: 0.07, Epoch: 2.323615730776756, LR: 0.0003 +[2026-02-28 11:27:21] (step=0011877) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.3238113872040698, LR: 0.0003 +[2026-02-28 11:27:35] (step=0011878) Train Loss: 0.4507, Train Steps/Sec: 0.07, Epoch: 2.3240070436313833, LR: 0.0003 +[2026-02-28 11:27:49] (step=0011879) Train Loss: 0.4519, Train Steps/Sec: 0.07, Epoch: 2.324202700058697, LR: 0.0003 +[2026-02-28 11:28:03] (step=0011880) Train Loss: 0.4486, Train Steps/Sec: 0.07, Epoch: 2.3243983564860105, LR: 0.0003 +[2026-02-28 11:28:17] (step=0011881) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 2.324594012913324, LR: 0.0003 +[2026-02-28 11:28:30] (step=0011882) Train Loss: 0.4441, Train Steps/Sec: 0.07, Epoch: 2.324789669340638, LR: 0.0003 +[2026-02-28 11:28:44] (step=0011883) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 2.3249853257679516, LR: 0.0003 +[2026-02-28 11:28:58] (step=0011884) Train Loss: 0.4504, Train Steps/Sec: 0.07, Epoch: 2.325180982195265, LR: 0.0003 +[2026-02-28 11:29:12] (step=0011885) Train Loss: 0.4495, Train Steps/Sec: 0.07, Epoch: 2.3253766386225787, LR: 0.0003 +[2026-02-28 11:29:25] (step=0011886) Train Loss: 0.4600, Train Steps/Sec: 0.07, Epoch: 2.3255722950498923, LR: 0.0003 +[2026-02-28 11:29:39] (step=0011887) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.325767951477206, LR: 0.0003 +[2026-02-28 11:29:53] (step=0011888) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.32596360790452, LR: 0.0003 +[2026-02-28 11:30:07] (step=0011889) Train Loss: 0.4597, Train Steps/Sec: 0.07, Epoch: 2.3261592643318334, LR: 0.0003 +[2026-02-28 11:30:20] (step=0011890) Train Loss: 0.4494, Train Steps/Sec: 0.07, Epoch: 2.326354920759147, LR: 0.0003 +[2026-02-28 11:30:34] (step=0011891) Train Loss: 0.4515, Train Steps/Sec: 0.07, Epoch: 2.3265505771864605, LR: 0.0003 +[2026-02-28 11:30:48] (step=0011892) Train Loss: 0.4466, Train Steps/Sec: 0.07, Epoch: 2.326746233613774, LR: 0.0003 +[2026-02-28 11:31:01] (step=0011893) Train Loss: 0.4428, Train Steps/Sec: 0.07, Epoch: 2.3269418900410876, LR: 0.0003 +[2026-02-28 11:31:15] (step=0011894) Train Loss: 0.4503, Train Steps/Sec: 0.07, Epoch: 2.3271375464684017, LR: 0.0003 +[2026-02-28 11:31:29] (step=0011895) Train Loss: 0.4572, Train Steps/Sec: 0.07, Epoch: 2.327333202895715, LR: 0.0003 +[2026-02-28 11:31:43] (step=0011896) Train Loss: 0.4480, Train Steps/Sec: 0.07, Epoch: 2.3275288593230288, LR: 0.0003 +[2026-02-28 11:31:57] (step=0011897) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 2.3277245157503423, LR: 0.0003 +[2026-02-28 11:32:10] (step=0011898) Train Loss: 0.4517, Train Steps/Sec: 0.07, Epoch: 2.327920172177656, LR: 0.0003 +[2026-02-28 11:32:24] (step=0011899) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.3281158286049695, LR: 0.0003 +[2026-02-28 11:32:38] (step=0011900) Train Loss: 0.4549, Train Steps/Sec: 0.07, Epoch: 2.3283114850322835, LR: 0.0003 +[2026-02-28 11:32:52] (step=0011901) Train Loss: 0.4568, Train Steps/Sec: 0.07, Epoch: 2.328507141459597, LR: 0.0003 +[2026-02-28 11:33:05] (step=0011902) Train Loss: 0.4561, Train Steps/Sec: 0.07, Epoch: 2.3287027978869106, LR: 0.0003 +[2026-02-28 11:33:19] (step=0011903) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 2.328898454314224, LR: 0.0003 +[2026-02-28 11:33:33] (step=0011904) Train Loss: 0.4488, Train Steps/Sec: 0.07, Epoch: 2.3290941107415377, LR: 0.0003 +[2026-02-28 11:33:47] (step=0011905) Train Loss: 0.4604, Train Steps/Sec: 0.07, Epoch: 2.3292897671688513, LR: 0.0003 +[2026-02-28 11:34:00] (step=0011906) Train Loss: 0.4429, Train Steps/Sec: 0.07, Epoch: 2.3294854235961653, LR: 0.0003 +[2026-02-28 11:34:14] (step=0011907) Train Loss: 0.4442, Train Steps/Sec: 0.07, Epoch: 2.329681080023479, LR: 0.0003 +[2026-02-28 11:34:28] (step=0011908) Train Loss: 0.4498, Train Steps/Sec: 0.07, Epoch: 2.3298767364507924, LR: 0.0003 +[2026-02-28 11:34:42] (step=0011909) Train Loss: 0.4511, Train Steps/Sec: 0.07, Epoch: 2.330072392878106, LR: 0.0003 +[2026-02-28 11:34:55] (step=0011910) Train Loss: 0.4551, Train Steps/Sec: 0.07, Epoch: 2.3302680493054195, LR: 0.0003 +[2026-02-28 11:35:09] (step=0011911) Train Loss: 0.4438, Train Steps/Sec: 0.07, Epoch: 2.3304637057327335, LR: 0.0003 +[2026-02-28 11:35:23] (step=0011912) Train Loss: 0.4422, Train Steps/Sec: 0.07, Epoch: 2.330659362160047, LR: 0.0003 +[2026-02-28 11:35:36] (step=0011913) Train Loss: 0.4471, Train Steps/Sec: 0.07, Epoch: 2.3308550185873607, LR: 0.0003 +[2026-02-28 11:35:50] (step=0011914) Train Loss: 0.4492, Train Steps/Sec: 0.07, Epoch: 2.3310506750146742, LR: 0.0003 +[2026-02-28 11:36:04] (step=0011915) Train Loss: 0.4550, Train Steps/Sec: 0.07, Epoch: 2.331246331441988, LR: 0.0003 +[2026-02-28 11:36:18] (step=0011916) Train Loss: 0.4610, Train Steps/Sec: 0.07, Epoch: 2.3314419878693013, LR: 0.0003 +[2026-02-28 11:36:32] (step=0011917) Train Loss: 0.4602, Train Steps/Sec: 0.07, Epoch: 2.3316376442966154, LR: 0.0003 +[2026-02-28 11:36:45] (step=0011918) Train Loss: 0.4698, Train Steps/Sec: 0.07, Epoch: 2.331833300723929, LR: 0.0003 +[2026-02-28 11:36:59] (step=0011919) Train Loss: 0.4524, Train Steps/Sec: 0.07, Epoch: 2.3320289571512425, LR: 0.0003 +[2026-02-28 11:37:13] (step=0011920) Train Loss: 0.4462, Train Steps/Sec: 0.07, Epoch: 2.332224613578556, LR: 0.0003 +[2026-02-28 11:37:27] (step=0011921) Train Loss: 0.4545, Train Steps/Sec: 0.07, Epoch: 2.3324202700058696, LR: 0.0003 +[2026-02-28 11:37:41] (step=0011922) Train Loss: 0.4526, Train Steps/Sec: 0.07, Epoch: 2.332615926433183, LR: 0.0003 +[2026-02-28 11:37:54] (step=0011923) Train Loss: 0.4513, Train Steps/Sec: 0.07, Epoch: 2.332811582860497, LR: 0.0003 +[2026-02-28 11:38:08] (step=0011924) Train Loss: 0.4448, Train Steps/Sec: 0.07, Epoch: 2.3330072392878107, LR: 0.0003 +[2026-02-28 11:38:22] (step=0011925) Train Loss: 0.4483, Train Steps/Sec: 0.07, Epoch: 2.3332028957151243, LR: 0.0003 +[2026-02-28 11:38:35] (step=0011926) Train Loss: 0.4539, Train Steps/Sec: 0.07, Epoch: 2.333398552142438, LR: 0.0003 +[2026-02-28 11:38:49] (step=0011927) Train Loss: 0.4419, Train Steps/Sec: 0.07, Epoch: 2.3335942085697514, LR: 0.0003 +[2026-02-28 11:39:03] (step=0011928) Train Loss: 0.4504, Train Steps/Sec: 0.07, Epoch: 2.333789864997065, LR: 0.0003 +[2026-02-28 11:39:17] (step=0011929) Train Loss: 0.4406, Train Steps/Sec: 0.07, Epoch: 2.333985521424379, LR: 0.0003 +[2026-02-28 11:39:31] (step=0011930) Train Loss: 0.4420, Train Steps/Sec: 0.07, Epoch: 2.3341811778516925, LR: 0.0003 +[2026-02-28 11:39:44] (step=0011931) Train Loss: 0.4484, Train Steps/Sec: 0.07, Epoch: 2.334376834279006, LR: 0.0003 +[2026-02-28 11:39:58] (step=0011932) Train Loss: 0.4437, Train Steps/Sec: 0.07, Epoch: 2.3345724907063197, LR: 0.0003 +[2026-02-28 11:40:12] (step=0011933) Train Loss: 0.4518, Train Steps/Sec: 0.07, Epoch: 2.3347681471336332, LR: 0.0003 +[2026-02-28 11:40:25] (step=0011934) Train Loss: 0.4554, Train Steps/Sec: 0.07, Epoch: 2.334963803560947, LR: 0.0003 +[2026-02-28 11:40:39] (step=0011935) Train Loss: 0.4514, Train Steps/Sec: 0.07, Epoch: 2.335159459988261, LR: 0.0003 +[2026-02-28 11:40:53] (step=0011936) Train Loss: 0.4603, Train Steps/Sec: 0.07, Epoch: 2.3353551164155744, LR: 0.0003 +[2026-02-28 11:41:07] (step=0011937) Train Loss: 0.4535, Train Steps/Sec: 0.07, Epoch: 2.335550772842888, LR: 0.0003 +[2026-02-28 11:41:20] (step=0011938) Train Loss: 0.4709, Train Steps/Sec: 0.07, Epoch: 2.3357464292702015, LR: 0.0003 +[2026-02-28 11:41:34] (step=0011939) Train Loss: 0.4615, Train Steps/Sec: 0.07, Epoch: 2.335942085697515, LR: 0.0003 +[2026-02-28 11:41:48] (step=0011940) Train Loss: 0.4574, Train Steps/Sec: 0.07, Epoch: 2.3361377421248286, LR: 0.0003 +[2026-02-28 11:42:01] (step=0011941) Train Loss: 0.4525, Train Steps/Sec: 0.07, Epoch: 2.3363333985521426, LR: 0.0003 +[2026-02-28 11:42:15] (step=0011942) Train Loss: 0.4542, Train Steps/Sec: 0.07, Epoch: 2.336529054979456, LR: 0.0003 +[2026-02-28 11:42:29] (step=0011943) Train Loss: 0.4531, Train Steps/Sec: 0.07, Epoch: 2.3367247114067697, LR: 0.0003 +[2026-02-28 11:42:43] (step=0011944) Train Loss: 0.4611, Train Steps/Sec: 0.07, Epoch: 2.3369203678340833, LR: 0.0003 +[2026-02-28 11:42:57] (step=0011945) Train Loss: 0.4615, Train Steps/Sec: 0.07, Epoch: 2.337116024261397, LR: 0.0003 +[2026-02-28 11:43:10] (step=0011946) Train Loss: 0.4440, Train Steps/Sec: 0.07, Epoch: 2.3373116806887104, LR: 0.0003 +[2026-02-28 11:43:24] (step=0011947) Train Loss: 0.4650, Train Steps/Sec: 0.07, Epoch: 2.3375073371160244, LR: 0.0003 +[2026-02-28 11:43:38] (step=0011948) Train Loss: 0.4491, Train Steps/Sec: 0.07, Epoch: 2.337702993543338, LR: 0.0003 +[2026-02-28 11:43:52] (step=0011949) Train Loss: 0.4583, Train Steps/Sec: 0.07, Epoch: 2.3378986499706516, LR: 0.0003 +[2026-02-28 11:44:05] (step=0011950) Train Loss: 0.4477, Train Steps/Sec: 0.07, Epoch: 2.338094306397965, LR: 0.0003 +[2026-02-28 11:44:19] (step=0011951) Train Loss: 0.4521, Train Steps/Sec: 0.07, Epoch: 2.3382899628252787, LR: 0.0003 +[2026-02-28 11:44:33] (step=0011952) Train Loss: 0.4537, Train Steps/Sec: 0.07, Epoch: 2.3384856192525922, LR: 0.0003 +[2026-02-28 11:44:47] (step=0011953) Train Loss: 0.4484, Train Steps/Sec: 0.07, Epoch: 2.3386812756799062, LR: 0.0003 +[2026-02-28 11:45:00] (step=0011954) Train Loss: 0.4693, Train Steps/Sec: 0.07, Epoch: 2.33887693210722, LR: 0.0003 +[2026-02-28 11:45:14] (step=0011955) Train Loss: 0.4500, Train Steps/Sec: 0.07, Epoch: 2.3390725885345334, LR: 0.0003 +[2026-02-28 11:45:28] (step=0011956) Train Loss: 0.4540, Train Steps/Sec: 0.07, Epoch: 2.339268244961847, LR: 0.0003 +[2026-02-28 11:45:41] (step=0011957) Train Loss: 0.4493, Train Steps/Sec: 0.07, Epoch: 2.3394639013891605, LR: 0.0003 +[2026-02-28 11:45:55] (step=0011958) Train Loss: 0.4612, Train Steps/Sec: 0.07, Epoch: 2.339659557816474, LR: 0.0003 +[2026-02-28 11:46:09] (step=0011959) Train Loss: 0.4473, Train Steps/Sec: 0.07, Epoch: 2.339855214243788, LR: 0.0003 +[2026-02-28 11:46:23] (step=0011960) Train Loss: 0.4495, Train Steps/Sec: 0.07, Epoch: 2.3400508706711016, LR: 0.0003 +[2026-02-28 11:46:36] (step=0011961) Train Loss: 0.4474, Train Steps/Sec: 0.07, Epoch: 2.340246527098415, LR: 0.0003 +[2026-02-28 11:46:50] (step=0011962) Train Loss: 0.4459, Train Steps/Sec: 0.07, Epoch: 2.3404421835257287, LR: 0.0003 +[2026-02-28 11:47:04] (step=0011963) Train Loss: 0.4548, Train Steps/Sec: 0.07, Epoch: 2.3406378399530423, LR: 0.0003 +[2026-02-28 11:47:18] (step=0011964) Train Loss: 0.4427, Train Steps/Sec: 0.07, Epoch: 2.3408334963803563, LR: 0.0003 +[2026-02-28 11:47:31] (step=0011965) Train Loss: 0.4647, Train Steps/Sec: 0.07, Epoch: 2.34102915280767, LR: 0.0003 +[2026-02-28 11:47:45] (step=0011966) Train Loss: 0.4510, Train Steps/Sec: 0.07, Epoch: 2.3412248092349834, LR: 0.0003 +[2026-02-28 11:47:55] (step=0011967) Train Loss: 0.4454, Train Steps/Sec: 0.10, Epoch: 2.341420465662297, LR: 0.0003 +[2026-02-28 11:48:04] (step=0011968) Train Loss: 0.4469, Train Steps/Sec: 0.12, Epoch: 2.3416161220896106, LR: 0.0003 +[2026-02-28 11:48:12] (step=0011969) Train Loss: 0.4280, Train Steps/Sec: 0.12, Epoch: 2.341811778516924, LR: 0.0003 +[2026-02-28 11:48:21] (step=0011970) Train Loss: 0.4549, Train Steps/Sec: 0.12, Epoch: 2.342007434944238, LR: 0.0003 +[2026-02-28 11:48:29] (step=0011971) Train Loss: 0.4582, Train Steps/Sec: 0.12, Epoch: 2.3422030913715517, LR: 0.0003 +[2026-02-28 11:48:38] (step=0011972) Train Loss: 0.4555, Train Steps/Sec: 0.12, Epoch: 2.3423987477988653, LR: 0.0003 +[2026-02-28 11:48:46] (step=0011973) Train Loss: 0.4561, Train Steps/Sec: 0.12, Epoch: 2.342594404226179, LR: 0.0003 +[2026-02-28 11:48:55] (step=0011974) Train Loss: 0.4675, Train Steps/Sec: 0.12, Epoch: 2.3427900606534924, LR: 0.0003 +[2026-02-28 11:49:03] (step=0011975) Train Loss: 0.4634, Train Steps/Sec: 0.12, Epoch: 2.342985717080806, LR: 0.0003 +[2026-02-28 11:49:12] (step=0011976) Train Loss: 0.4607, Train Steps/Sec: 0.12, Epoch: 2.34318137350812, LR: 0.0003 +[2026-02-28 11:49:20] (step=0011977) Train Loss: 0.4503, Train Steps/Sec: 0.12, Epoch: 2.3433770299354335, LR: 0.0003 +[2026-02-28 11:49:29] (step=0011978) Train Loss: 0.4531, Train Steps/Sec: 0.12, Epoch: 2.343572686362747, LR: 0.0003 +[2026-02-28 11:49:37] (step=0011979) Train Loss: 0.4396, Train Steps/Sec: 0.12, Epoch: 2.3437683427900606, LR: 0.0003 +[2026-02-28 11:49:46] (step=0011980) Train Loss: 0.4501, Train Steps/Sec: 0.12, Epoch: 2.343963999217374, LR: 0.0003 +[2026-02-28 11:49:54] (step=0011981) Train Loss: 0.4427, Train Steps/Sec: 0.12, Epoch: 2.3441596556446878, LR: 0.0003 +[2026-02-28 11:50:03] (step=0011982) Train Loss: 0.4501, Train Steps/Sec: 0.12, Epoch: 2.3443553120720018, LR: 0.0003 +[2026-02-28 11:50:11] (step=0011983) Train Loss: 0.4490, Train Steps/Sec: 0.12, Epoch: 2.3445509684993153, LR: 0.0003 +[2026-02-28 11:50:20] (step=0011984) Train Loss: 0.4428, Train Steps/Sec: 0.12, Epoch: 2.344746624926629, LR: 0.0003 +[2026-02-28 11:50:29] (step=0011985) Train Loss: 0.4516, Train Steps/Sec: 0.12, Epoch: 2.3449422813539424, LR: 0.0003 +[2026-02-28 11:50:37] (step=0011986) Train Loss: 0.4449, Train Steps/Sec: 0.12, Epoch: 2.345137937781256, LR: 0.0003 +[2026-02-28 11:50:46] (step=0011987) Train Loss: 0.4570, Train Steps/Sec: 0.12, Epoch: 2.3453335942085696, LR: 0.0003 +[2026-02-28 11:50:54] (step=0011988) Train Loss: 0.4507, Train Steps/Sec: 0.12, Epoch: 2.3455292506358836, LR: 0.0003 +[2026-02-28 11:51:03] (step=0011989) Train Loss: 0.4627, Train Steps/Sec: 0.12, Epoch: 2.345724907063197, LR: 0.0003 +[2026-02-28 11:51:11] (step=0011990) Train Loss: 0.4547, Train Steps/Sec: 0.12, Epoch: 2.3459205634905107, LR: 0.0003 +[2026-02-28 11:51:20] (step=0011991) Train Loss: 0.4455, Train Steps/Sec: 0.12, Epoch: 2.3461162199178243, LR: 0.0003 +[2026-02-28 11:51:28] (step=0011992) Train Loss: 0.4466, Train Steps/Sec: 0.12, Epoch: 2.346311876345138, LR: 0.0003 +[2026-02-28 11:51:37] (step=0011993) Train Loss: 0.4575, Train Steps/Sec: 0.12, Epoch: 2.3465075327724514, LR: 0.0003 +[2026-02-28 11:51:45] (step=0011994) Train Loss: 0.4529, Train Steps/Sec: 0.12, Epoch: 2.3467031891997654, LR: 0.0003 +[2026-02-28 11:51:54] (step=0011995) Train Loss: 0.4347, Train Steps/Sec: 0.12, Epoch: 2.346898845627079, LR: 0.0003 +[2026-02-28 11:52:02] (step=0011996) Train Loss: 0.4545, Train Steps/Sec: 0.12, Epoch: 2.3470945020543925, LR: 0.0003 +[2026-02-28 11:52:10] (step=0011997) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.347290158481706, LR: 0.0003 +[2026-02-28 11:52:18] (step=0011998) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.3474858149090196, LR: 0.0003 +[2026-02-28 11:52:26] (step=0011999) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 2.347681471336333, LR: 0.0003 +[2026-02-28 11:52:34] (step=0012000) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 2.347877127763647, LR: 0.0003 +[2026-02-28 11:52:34] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0012000/ +[2026-02-28 11:52:42] (step=0012001) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 2.3480727841909608, LR: 0.0003 +[2026-02-28 11:52:50] (step=0012002) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.3482684406182743, LR: 0.0003 +[2026-02-28 11:52:57] (step=0012003) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 2.348464097045588, LR: 0.0003 +[2026-02-28 11:53:05] (step=0012004) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.3486597534729015, LR: 0.0003 +[2026-02-28 11:53:13] (step=0012005) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.348855409900215, LR: 0.0003 +[2026-02-28 11:53:21] (step=0012006) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.349051066327529, LR: 0.0003 +[2026-02-28 11:53:29] (step=0012007) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 2.3492467227548426, LR: 0.0003 +[2026-02-28 11:53:37] (step=0012008) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.349442379182156, LR: 0.0003 +[2026-02-28 11:53:44] (step=0012009) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 2.3496380356094697, LR: 0.0003 +[2026-02-28 11:53:52] (step=0012010) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 2.3498336920367833, LR: 0.0003 +[2026-02-28 11:54:00] (step=0012011) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.350029348464097, LR: 0.0003 +[2026-02-28 11:54:08] (step=0012012) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.350225004891411, LR: 0.0003 +[2026-02-28 11:54:16] (step=0012013) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 2.3504206613187244, LR: 0.0003 +[2026-02-28 11:54:24] (step=0012014) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.350616317746038, LR: 0.0003 +[2026-02-28 11:54:32] (step=0012015) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 2.3508119741733515, LR: 0.0003 +[2026-02-28 11:54:39] (step=0012016) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.351007630600665, LR: 0.0003 +[2026-02-28 11:54:47] (step=0012017) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.3512032870279787, LR: 0.0003 +[2026-02-28 11:54:55] (step=0012018) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 2.3513989434552927, LR: 0.0003 +[2026-02-28 11:55:03] (step=0012019) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 2.351594599882606, LR: 0.0003 +[2026-02-28 11:55:11] (step=0012020) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.35179025630992, LR: 0.0003 +[2026-02-28 11:55:19] (step=0012021) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 2.3519859127372333, LR: 0.0003 +[2026-02-28 11:55:26] (step=0012022) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.352181569164547, LR: 0.0003 +[2026-02-28 11:55:34] (step=0012023) Train Loss: 0.4518, Train Steps/Sec: 0.12, Epoch: 2.352377225591861, LR: 0.0003 +[2026-02-28 11:55:42] (step=0012024) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.3525728820191745, LR: 0.0003 +[2026-02-28 11:55:50] (step=0012025) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 2.352768538446488, LR: 0.0003 +[2026-02-28 11:55:58] (step=0012026) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 2.3529641948738016, LR: 0.0003 +[2026-02-28 11:56:06] (step=0012027) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.353159851301115, LR: 0.0003 +[2026-02-28 11:56:14] (step=0012028) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.3533555077284287, LR: 0.0003 +[2026-02-28 11:56:21] (step=0012029) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.3535511641557427, LR: 0.0003 +[2026-02-28 11:56:29] (step=0012030) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.3537468205830563, LR: 0.0003 +[2026-02-28 11:56:37] (step=0012031) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 2.35394247701037, LR: 0.0003 +[2026-02-28 11:56:45] (step=0012032) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.3541381334376834, LR: 0.0003 +[2026-02-28 11:56:53] (step=0012033) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 2.354333789864997, LR: 0.0003 +[2026-02-28 11:57:01] (step=0012034) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.3545294462923105, LR: 0.0003 +[2026-02-28 11:57:09] (step=0012035) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.3547251027196245, LR: 0.0003 +[2026-02-28 11:57:16] (step=0012036) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 2.354920759146938, LR: 0.0003 +[2026-02-28 11:57:24] (step=0012037) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 2.3551164155742517, LR: 0.0003 +[2026-02-28 11:57:32] (step=0012038) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 2.3553120720015652, LR: 0.0003 +[2026-02-28 11:57:40] (step=0012039) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.355507728428879, LR: 0.0003 +[2026-02-28 11:57:48] (step=0012040) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.3557033848561924, LR: 0.0003 +[2026-02-28 11:57:56] (step=0012041) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.3558990412835064, LR: 0.0003 +[2026-02-28 11:58:03] (step=0012042) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.35609469771082, LR: 0.0003 +[2026-02-28 11:58:11] (step=0012043) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 2.3562903541381335, LR: 0.0003 +[2026-02-28 11:58:19] (step=0012044) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.356486010565447, LR: 0.0003 +[2026-02-28 11:58:27] (step=0012045) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 2.3566816669927606, LR: 0.0003 +[2026-02-28 11:58:35] (step=0012046) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 2.356877323420074, LR: 0.0003 +[2026-02-28 11:58:43] (step=0012047) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.357072979847388, LR: 0.0003 +[2026-02-28 11:58:51] (step=0012048) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.3572686362747017, LR: 0.0003 +[2026-02-28 11:58:58] (step=0012049) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.3574642927020153, LR: 0.0003 +[2026-02-28 11:59:06] (step=0012050) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.357659949129329, LR: 0.0003 +[2026-02-28 11:59:14] (step=0012051) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 2.3578556055566424, LR: 0.0003 +[2026-02-28 11:59:22] (step=0012052) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 2.358051261983956, LR: 0.0003 +[2026-02-28 11:59:30] (step=0012053) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 2.35824691841127, LR: 0.0003 +[2026-02-28 11:59:38] (step=0012054) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.3584425748385835, LR: 0.0003 +[2026-02-28 11:59:45] (step=0012055) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.358638231265897, LR: 0.0003 +[2026-02-28 11:59:53] (step=0012056) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 2.3588338876932107, LR: 0.0003 +[2026-02-28 12:00:01] (step=0012057) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.3590295441205242, LR: 0.0003 +[2026-02-28 12:00:09] (step=0012058) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.359225200547838, LR: 0.0003 +[2026-02-28 12:00:17] (step=0012059) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 2.359420856975152, LR: 0.0003 +[2026-02-28 12:00:25] (step=0012060) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 2.3596165134024654, LR: 0.0003 +[2026-02-28 12:00:32] (step=0012061) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.359812169829779, LR: 0.0003 +[2026-02-28 12:00:40] (step=0012062) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.3600078262570925, LR: 0.0003 +[2026-02-28 12:00:48] (step=0012063) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 2.360203482684406, LR: 0.0003 +[2026-02-28 12:00:56] (step=0012064) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.3603991391117196, LR: 0.0003 +[2026-02-28 12:01:04] (step=0012065) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 2.3605947955390336, LR: 0.0003 +[2026-02-28 12:01:12] (step=0012066) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.360790451966347, LR: 0.0003 +[2026-02-28 12:01:20] (step=0012067) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 2.3609861083936607, LR: 0.0003 +[2026-02-28 12:01:27] (step=0012068) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.3611817648209743, LR: 0.0003 +[2026-02-28 12:01:35] (step=0012069) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.361377421248288, LR: 0.0003 +[2026-02-28 12:01:43] (step=0012070) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 2.3615730776756014, LR: 0.0003 +[2026-02-28 12:01:51] (step=0012071) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.3617687341029154, LR: 0.0003 +[2026-02-28 12:01:59] (step=0012072) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 2.361964390530229, LR: 0.0003 +[2026-02-28 12:02:07] (step=0012073) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.3621600469575426, LR: 0.0003 +[2026-02-28 12:02:15] (step=0012074) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 2.362355703384856, LR: 0.0003 +[2026-02-28 12:02:23] (step=0012075) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.3625513598121697, LR: 0.0003 +[2026-02-28 12:02:30] (step=0012076) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.3627470162394837, LR: 0.0003 +[2026-02-28 12:02:38] (step=0012077) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.3629426726667973, LR: 0.0003 +[2026-02-28 12:02:46] (step=0012078) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.363138329094111, LR: 0.0003 +[2026-02-28 12:02:54] (step=0012079) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.3633339855214244, LR: 0.0003 +[2026-02-28 12:03:02] (step=0012080) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.363529641948738, LR: 0.0003 +[2026-02-28 12:03:10] (step=0012081) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.3637252983760515, LR: 0.0003 +[2026-02-28 12:03:17] (step=0012082) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 2.3639209548033655, LR: 0.0003 +[2026-02-28 12:03:25] (step=0012083) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 2.364116611230679, LR: 0.0003 +[2026-02-28 12:03:33] (step=0012084) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 2.3643122676579926, LR: 0.0003 +[2026-02-28 12:03:41] (step=0012085) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.364507924085306, LR: 0.0003 +[2026-02-28 12:03:49] (step=0012086) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 2.3647035805126198, LR: 0.0003 +[2026-02-28 12:03:57] (step=0012087) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.3648992369399333, LR: 0.0003 +[2026-02-28 12:04:04] (step=0012088) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 2.3650948933672473, LR: 0.0003 +[2026-02-28 12:04:12] (step=0012089) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.365290549794561, LR: 0.0003 +[2026-02-28 12:04:20] (step=0012090) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.3654862062218744, LR: 0.0003 +[2026-02-28 12:04:28] (step=0012091) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 2.365681862649188, LR: 0.0003 +[2026-02-28 12:04:36] (step=0012092) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.3658775190765016, LR: 0.0003 +[2026-02-28 12:04:44] (step=0012093) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.366073175503815, LR: 0.0003 +[2026-02-28 12:04:52] (step=0012094) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 2.366268831931129, LR: 0.0003 +[2026-02-28 12:04:59] (step=0012095) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 2.3664644883584427, LR: 0.0003 +[2026-02-28 12:05:07] (step=0012096) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.3666601447857563, LR: 0.0003 +[2026-02-28 12:05:15] (step=0012097) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.36685580121307, LR: 0.0003 +[2026-02-28 12:05:23] (step=0012098) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.3670514576403834, LR: 0.0003 +[2026-02-28 12:05:31] (step=0012099) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.367247114067697, LR: 0.0003 +[2026-02-28 12:05:39] (step=0012100) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.367442770495011, LR: 0.0003 +[2026-02-28 12:05:46] (step=0012101) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 2.3676384269223245, LR: 0.0003 +[2026-02-28 12:05:54] (step=0012102) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.367834083349638, LR: 0.0003 +[2026-02-28 12:06:02] (step=0012103) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.3680297397769516, LR: 0.0003 +[2026-02-28 12:06:10] (step=0012104) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 2.368225396204265, LR: 0.0003 +[2026-02-28 12:06:18] (step=0012105) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 2.3684210526315788, LR: 0.0003 +[2026-02-28 12:06:26] (step=0012106) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.3686167090588928, LR: 0.0003 +[2026-02-28 12:06:33] (step=0012107) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.3688123654862063, LR: 0.0003 +[2026-02-28 12:06:41] (step=0012108) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 2.36900802191352, LR: 0.0003 +[2026-02-28 12:06:49] (step=0012109) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 2.3692036783408335, LR: 0.0003 +[2026-02-28 12:06:57] (step=0012110) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.369399334768147, LR: 0.0003 +[2026-02-28 12:07:05] (step=0012111) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.3695949911954606, LR: 0.0003 +[2026-02-28 12:07:13] (step=0012112) Train Loss: 0.4811, Train Steps/Sec: 0.13, Epoch: 2.3697906476227746, LR: 0.0003 +[2026-02-28 12:07:20] (step=0012113) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 2.369986304050088, LR: 0.0003 +[2026-02-28 12:07:28] (step=0012114) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.3701819604774017, LR: 0.0003 +[2026-02-28 12:07:36] (step=0012115) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.3703776169047153, LR: 0.0003 +[2026-02-28 12:07:44] (step=0012116) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 2.370573273332029, LR: 0.0003 +[2026-02-28 12:07:52] (step=0012117) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.3707689297593424, LR: 0.0003 +[2026-02-28 12:08:00] (step=0012118) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.3709645861866564, LR: 0.0003 +[2026-02-28 12:08:08] (step=0012119) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 2.37116024261397, LR: 0.0003 +[2026-02-28 12:08:15] (step=0012120) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 2.3713558990412835, LR: 0.0003 +[2026-02-28 12:08:23] (step=0012121) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.371551555468597, LR: 0.0003 +[2026-02-28 12:08:31] (step=0012122) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.3717472118959106, LR: 0.0003 +[2026-02-28 12:08:39] (step=0012123) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.371942868323224, LR: 0.0003 +[2026-02-28 12:08:47] (step=0012124) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.372138524750538, LR: 0.0003 +[2026-02-28 12:08:55] (step=0012125) Train Loss: 0.4702, Train Steps/Sec: 0.13, Epoch: 2.3723341811778518, LR: 0.0003 +[2026-02-28 12:09:03] (step=0012126) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 2.3725298376051653, LR: 0.0003 +[2026-02-28 12:09:10] (step=0012127) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.372725494032479, LR: 0.0003 +[2026-02-28 12:09:18] (step=0012128) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.3729211504597925, LR: 0.0003 +[2026-02-28 12:09:26] (step=0012129) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 2.373116806887106, LR: 0.0003 +[2026-02-28 12:09:34] (step=0012130) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 2.37331246331442, LR: 0.0003 +[2026-02-28 12:09:42] (step=0012131) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 2.3735081197417336, LR: 0.0003 +[2026-02-28 12:09:50] (step=0012132) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.373703776169047, LR: 0.0003 +[2026-02-28 12:09:58] (step=0012133) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 2.3738994325963607, LR: 0.0003 +[2026-02-28 12:10:05] (step=0012134) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.3740950890236743, LR: 0.0003 +[2026-02-28 12:10:13] (step=0012135) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.3742907454509883, LR: 0.0003 +[2026-02-28 12:10:21] (step=0012136) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.374486401878302, LR: 0.0003 +[2026-02-28 12:10:29] (step=0012137) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.3746820583056154, LR: 0.0003 +[2026-02-28 12:10:37] (step=0012138) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.374877714732929, LR: 0.0003 +[2026-02-28 12:10:45] (step=0012139) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 2.3750733711602425, LR: 0.0003 +[2026-02-28 12:10:52] (step=0012140) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 2.375269027587556, LR: 0.0003 +[2026-02-28 12:11:00] (step=0012141) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 2.37546468401487, LR: 0.0003 +[2026-02-28 12:11:08] (step=0012142) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 2.3756603404421837, LR: 0.0003 +[2026-02-28 12:11:16] (step=0012143) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 2.375855996869497, LR: 0.0003 +[2026-02-28 12:11:24] (step=0012144) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.376051653296811, LR: 0.0003 +[2026-02-28 12:11:32] (step=0012145) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.3762473097241243, LR: 0.0003 +[2026-02-28 12:11:39] (step=0012146) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.376442966151438, LR: 0.0003 +[2026-02-28 12:11:47] (step=0012147) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 2.376638622578752, LR: 0.0003 +[2026-02-28 12:11:55] (step=0012148) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.3768342790060655, LR: 0.0003 +[2026-02-28 12:12:03] (step=0012149) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 2.377029935433379, LR: 0.0003 +[2026-02-28 12:12:11] (step=0012150) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 2.3772255918606926, LR: 0.0003 +[2026-02-28 12:12:19] (step=0012151) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 2.377421248288006, LR: 0.0003 +[2026-02-28 12:12:27] (step=0012152) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 2.3776169047153197, LR: 0.0003 +[2026-02-28 12:12:34] (step=0012153) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.3778125611426337, LR: 0.0003 +[2026-02-28 12:12:42] (step=0012154) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.3780082175699473, LR: 0.0003 +[2026-02-28 12:12:50] (step=0012155) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.378203873997261, LR: 0.0003 +[2026-02-28 12:12:58] (step=0012156) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 2.3783995304245744, LR: 0.0003 +[2026-02-28 12:13:06] (step=0012157) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 2.378595186851888, LR: 0.0003 +[2026-02-28 12:13:14] (step=0012158) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.3787908432792015, LR: 0.0003 +[2026-02-28 12:13:21] (step=0012159) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 2.3789864997065155, LR: 0.0003 +[2026-02-28 12:13:29] (step=0012160) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 2.379182156133829, LR: 0.0003 +[2026-02-28 12:13:37] (step=0012161) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.3793778125611427, LR: 0.0003 +[2026-02-28 12:13:45] (step=0012162) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 2.3795734689884562, LR: 0.0003 +[2026-02-28 12:13:53] (step=0012163) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 2.37976912541577, LR: 0.0003 +[2026-02-28 12:14:01] (step=0012164) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 2.3799647818430834, LR: 0.0003 +[2026-02-28 12:14:09] (step=0012165) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.3801604382703974, LR: 0.0003 +[2026-02-28 12:14:16] (step=0012166) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 2.380356094697711, LR: 0.0003 +[2026-02-28 12:14:24] (step=0012167) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.3805517511250245, LR: 0.0003 +[2026-02-28 12:14:32] (step=0012168) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 2.380747407552338, LR: 0.0003 +[2026-02-28 12:14:40] (step=0012169) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 2.3809430639796516, LR: 0.0003 +[2026-02-28 12:14:48] (step=0012170) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.381138720406965, LR: 0.0003 +[2026-02-28 12:14:56] (step=0012171) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 2.381334376834279, LR: 0.0003 +[2026-02-28 12:15:04] (step=0012172) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 2.3815300332615927, LR: 0.0003 +[2026-02-28 12:15:11] (step=0012173) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.3817256896889063, LR: 0.0003 +[2026-02-28 12:15:19] (step=0012174) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.38192134611622, LR: 0.0003 +[2026-02-28 12:15:27] (step=0012175) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.3821170025435334, LR: 0.0003 +[2026-02-28 12:15:35] (step=0012176) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.382312658970847, LR: 0.0003 +[2026-02-28 12:15:43] (step=0012177) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 2.382508315398161, LR: 0.0003 +[2026-02-28 12:15:51] (step=0012178) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.3827039718254746, LR: 0.0003 +[2026-02-28 12:15:59] (step=0012179) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.382899628252788, LR: 0.0003 +[2026-02-28 12:16:07] (step=0012180) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 2.3830952846801017, LR: 0.0003 +[2026-02-28 12:16:14] (step=0012181) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 2.3832909411074152, LR: 0.0003 +[2026-02-28 12:16:22] (step=0012182) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 2.383486597534729, LR: 0.0003 +[2026-02-28 12:16:30] (step=0012183) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 2.383682253962043, LR: 0.0003 +[2026-02-28 12:16:38] (step=0012184) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.3838779103893564, LR: 0.0003 +[2026-02-28 12:16:46] (step=0012185) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.38407356681667, LR: 0.0003 +[2026-02-28 12:16:54] (step=0012186) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.3842692232439835, LR: 0.0003 +[2026-02-28 12:17:01] (step=0012187) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.384464879671297, LR: 0.0003 +[2026-02-28 12:17:09] (step=0012188) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.384660536098611, LR: 0.0003 +[2026-02-28 12:17:17] (step=0012189) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 2.3848561925259246, LR: 0.0003 +[2026-02-28 12:17:25] (step=0012190) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 2.385051848953238, LR: 0.0003 +[2026-02-28 12:17:33] (step=0012191) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 2.3852475053805517, LR: 0.0003 +[2026-02-28 12:17:41] (step=0012192) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 2.3854431618078653, LR: 0.0003 +[2026-02-28 12:17:48] (step=0012193) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 2.385638818235179, LR: 0.0003 +[2026-02-28 12:17:56] (step=0012194) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 2.385834474662493, LR: 0.0003 +[2026-02-28 12:18:04] (step=0012195) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.3860301310898064, LR: 0.0003 +[2026-02-28 12:18:12] (step=0012196) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 2.38622578751712, LR: 0.0003 +[2026-02-28 12:18:20] (step=0012197) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.3864214439444336, LR: 0.0003 +[2026-02-28 12:18:28] (step=0012198) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 2.386617100371747, LR: 0.0003 +[2026-02-28 12:18:36] (step=0012199) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.3868127567990607, LR: 0.0003 +[2026-02-28 12:18:43] (step=0012200) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 2.3870084132263747, LR: 0.0003 +[2026-02-28 12:18:51] (step=0012201) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 2.3872040696536883, LR: 0.0003 +[2026-02-28 12:18:59] (step=0012202) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 2.387399726081002, LR: 0.0003 +[2026-02-28 12:19:07] (step=0012203) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 2.3875953825083154, LR: 0.0003 +[2026-02-28 12:19:15] (step=0012204) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.387791038935629, LR: 0.0003 +[2026-02-28 12:19:23] (step=0012205) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.3879866953629425, LR: 0.0003 +[2026-02-28 12:19:30] (step=0012206) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.3881823517902565, LR: 0.0003 +[2026-02-28 12:19:38] (step=0012207) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.38837800821757, LR: 0.0003 +[2026-02-28 12:19:46] (step=0012208) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 2.3885736646448836, LR: 0.0003 +[2026-02-28 12:19:54] (step=0012209) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.388769321072197, LR: 0.0003 +[2026-02-28 12:20:02] (step=0012210) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.3889649774995108, LR: 0.0003 +[2026-02-28 12:20:10] (step=0012211) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.3891606339268243, LR: 0.0003 +[2026-02-28 12:20:17] (step=0012212) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 2.3893562903541383, LR: 0.0003 +[2026-02-28 12:20:25] (step=0012213) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.389551946781452, LR: 0.0003 +[2026-02-28 12:20:33] (step=0012214) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 2.3897476032087654, LR: 0.0003 +[2026-02-28 12:20:41] (step=0012215) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 2.389943259636079, LR: 0.0003 +[2026-02-28 12:20:49] (step=0012216) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 2.3901389160633926, LR: 0.0003 +[2026-02-28 12:20:57] (step=0012217) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 2.390334572490706, LR: 0.0003 +[2026-02-28 12:21:05] (step=0012218) Train Loss: 0.4540, Train Steps/Sec: 0.12, Epoch: 2.39053022891802, LR: 0.0003 +[2026-02-28 12:21:13] (step=0012219) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.3907258853453337, LR: 0.0003 +[2026-02-28 12:21:20] (step=0012220) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 2.3909215417726473, LR: 0.0003 +[2026-02-28 12:21:28] (step=0012221) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.391117198199961, LR: 0.0003 +[2026-02-28 12:21:36] (step=0012222) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.3913128546272744, LR: 0.0003 +[2026-02-28 12:21:44] (step=0012223) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.391508511054588, LR: 0.0003 +[2026-02-28 12:21:52] (step=0012224) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 2.391704167481902, LR: 0.0003 +[2026-02-28 12:22:00] (step=0012225) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 2.3918998239092155, LR: 0.0003 +[2026-02-28 12:22:08] (step=0012226) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 2.392095480336529, LR: 0.0003 +[2026-02-28 12:22:15] (step=0012227) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 2.3922911367638426, LR: 0.0003 +[2026-02-28 12:22:23] (step=0012228) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.392486793191156, LR: 0.0003 +[2026-02-28 12:22:31] (step=0012229) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.3926824496184698, LR: 0.0003 +[2026-02-28 12:22:39] (step=0012230) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.3928781060457838, LR: 0.0003 +[2026-02-28 12:22:47] (step=0012231) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.3930737624730973, LR: 0.0003 +[2026-02-28 12:22:55] (step=0012232) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.393269418900411, LR: 0.0003 +[2026-02-28 12:23:02] (step=0012233) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 2.3934650753277245, LR: 0.0003 +[2026-02-28 12:23:10] (step=0012234) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 2.393660731755038, LR: 0.0003 +[2026-02-28 12:23:18] (step=0012235) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 2.3938563881823516, LR: 0.0003 +[2026-02-28 12:23:26] (step=0012236) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.3940520446096656, LR: 0.0003 +[2026-02-28 12:23:34] (step=0012237) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 2.394247701036979, LR: 0.0003 +[2026-02-28 12:23:42] (step=0012238) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 2.3944433574642927, LR: 0.0003 +[2026-02-28 12:23:49] (step=0012239) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 2.3946390138916063, LR: 0.0003 +[2026-02-28 12:23:57] (step=0012240) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.39483467031892, LR: 0.0003 +[2026-02-28 12:24:05] (step=0012241) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.3950303267462334, LR: 0.0003 +[2026-02-28 12:24:13] (step=0012242) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 2.3952259831735474, LR: 0.0003 +[2026-02-28 12:24:21] (step=0012243) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.395421639600861, LR: 0.0003 +[2026-02-28 12:24:29] (step=0012244) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.3956172960281745, LR: 0.0003 +[2026-02-28 12:24:36] (step=0012245) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 2.395812952455488, LR: 0.0003 +[2026-02-28 12:24:44] (step=0012246) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 2.3960086088828016, LR: 0.0003 +[2026-02-28 12:24:52] (step=0012247) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.3962042653101157, LR: 0.0003 +[2026-02-28 12:25:00] (step=0012248) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 2.396399921737429, LR: 0.0003 +[2026-02-28 12:25:08] (step=0012249) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.396595578164743, LR: 0.0003 +[2026-02-28 12:25:16] (step=0012250) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 2.3967912345920563, LR: 0.0003 +[2026-02-28 12:25:24] (step=0012251) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.39698689101937, LR: 0.0003 +[2026-02-28 12:25:31] (step=0012252) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.3971825474466835, LR: 0.0003 +[2026-02-28 12:25:39] (step=0012253) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 2.3973782038739975, LR: 0.0003 +[2026-02-28 12:25:47] (step=0012254) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.397573860301311, LR: 0.0003 +[2026-02-28 12:25:55] (step=0012255) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 2.3977695167286246, LR: 0.0003 +[2026-02-28 12:26:03] (step=0012256) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.397965173155938, LR: 0.0003 +[2026-02-28 12:26:11] (step=0012257) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 2.3981608295832517, LR: 0.0003 +[2026-02-28 12:26:18] (step=0012258) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 2.3983564860105653, LR: 0.0003 +[2026-02-28 12:26:26] (step=0012259) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 2.3985521424378793, LR: 0.0003 +[2026-02-28 12:26:34] (step=0012260) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.398747798865193, LR: 0.0003 +[2026-02-28 12:26:42] (step=0012261) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 2.3989434552925064, LR: 0.0003 +[2026-02-28 12:26:50] (step=0012262) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 2.39913911171982, LR: 0.0003 +[2026-02-28 12:26:58] (step=0012263) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 2.3993347681471335, LR: 0.0003 +[2026-02-28 12:27:06] (step=0012264) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.399530424574447, LR: 0.0003 +[2026-02-28 12:27:14] (step=0012265) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.399726081001761, LR: 0.0003 +[2026-02-28 12:27:21] (step=0012266) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 2.3999217374290747, LR: 0.0003 +[2026-02-28 12:27:29] (step=0012267) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 2.4001173938563882, LR: 0.0003 +[2026-02-28 12:27:37] (step=0012268) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.400313050283702, LR: 0.0003 +[2026-02-28 12:27:45] (step=0012269) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.4005087067110153, LR: 0.0003 +[2026-02-28 12:27:53] (step=0012270) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.400704363138329, LR: 0.0003 +[2026-02-28 12:28:01] (step=0012271) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 2.400900019565643, LR: 0.0003 +[2026-02-28 12:28:09] (step=0012272) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 2.4010956759929565, LR: 0.0003 +[2026-02-28 12:28:16] (step=0012273) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 2.40129133242027, LR: 0.0003 +[2026-02-28 12:28:24] (step=0012274) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 2.4014869888475836, LR: 0.0003 +[2026-02-28 12:28:32] (step=0012275) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.401682645274897, LR: 0.0003 +[2026-02-28 12:28:40] (step=0012276) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.4018783017022107, LR: 0.0003 +[2026-02-28 12:28:48] (step=0012277) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 2.4020739581295247, LR: 0.0003 +[2026-02-28 12:28:56] (step=0012278) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.4022696145568383, LR: 0.0003 +[2026-02-28 12:29:03] (step=0012279) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 2.402465270984152, LR: 0.0003 +[2026-02-28 12:29:11] (step=0012280) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.4026609274114654, LR: 0.0003 +[2026-02-28 12:29:19] (step=0012281) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 2.402856583838779, LR: 0.0003 +[2026-02-28 12:29:27] (step=0012282) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 2.4030522402660925, LR: 0.0003 +[2026-02-28 12:29:35] (step=0012283) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.4032478966934065, LR: 0.0003 +[2026-02-28 12:29:43] (step=0012284) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.40344355312072, LR: 0.0003 +[2026-02-28 12:29:51] (step=0012285) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.4036392095480337, LR: 0.0003 +[2026-02-28 12:29:58] (step=0012286) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.4038348659753472, LR: 0.0003 +[2026-02-28 12:30:06] (step=0012287) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.404030522402661, LR: 0.0003 +[2026-02-28 12:30:14] (step=0012288) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.4042261788299744, LR: 0.0003 +[2026-02-28 12:30:22] (step=0012289) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 2.4044218352572884, LR: 0.0003 +[2026-02-28 12:30:30] (step=0012290) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.404617491684602, LR: 0.0003 +[2026-02-28 12:30:38] (step=0012291) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.4048131481119155, LR: 0.0003 +[2026-02-28 12:30:45] (step=0012292) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 2.405008804539229, LR: 0.0003 +[2026-02-28 12:30:53] (step=0012293) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.4052044609665426, LR: 0.0003 +[2026-02-28 12:31:01] (step=0012294) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.405400117393856, LR: 0.0003 +[2026-02-28 12:31:09] (step=0012295) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 2.40559577382117, LR: 0.0003 +[2026-02-28 12:31:17] (step=0012296) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.4057914302484837, LR: 0.0003 +[2026-02-28 12:31:25] (step=0012297) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 2.4059870866757973, LR: 0.0003 +[2026-02-28 12:31:32] (step=0012298) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.406182743103111, LR: 0.0003 +[2026-02-28 12:31:40] (step=0012299) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.4063783995304244, LR: 0.0003 +[2026-02-28 12:31:48] (step=0012300) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.406574055957738, LR: 0.0003 +[2026-02-28 12:31:56] (step=0012301) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.406769712385052, LR: 0.0003 +[2026-02-28 12:32:04] (step=0012302) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.4069653688123656, LR: 0.0003 +[2026-02-28 12:32:12] (step=0012303) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 2.407161025239679, LR: 0.0003 +[2026-02-28 12:32:19] (step=0012304) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.4073566816669927, LR: 0.0003 +[2026-02-28 12:32:27] (step=0012305) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.4075523380943062, LR: 0.0003 +[2026-02-28 12:32:35] (step=0012306) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 2.4077479945216202, LR: 0.0003 +[2026-02-28 12:32:43] (step=0012307) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 2.407943650948934, LR: 0.0003 +[2026-02-28 12:32:51] (step=0012308) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 2.4081393073762474, LR: 0.0003 +[2026-02-28 12:32:59] (step=0012309) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 2.408334963803561, LR: 0.0003 +[2026-02-28 12:33:07] (step=0012310) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 2.4085306202308745, LR: 0.0003 +[2026-02-28 12:33:14] (step=0012311) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.408726276658188, LR: 0.0003 +[2026-02-28 12:33:22] (step=0012312) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.408921933085502, LR: 0.0003 +[2026-02-28 12:33:30] (step=0012313) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.4091175895128156, LR: 0.0003 +[2026-02-28 12:33:38] (step=0012314) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.409313245940129, LR: 0.0003 +[2026-02-28 12:33:46] (step=0012315) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 2.4095089023674428, LR: 0.0003 +[2026-02-28 12:33:54] (step=0012316) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.4097045587947563, LR: 0.0003 +[2026-02-28 12:34:02] (step=0012317) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 2.40990021522207, LR: 0.0003 +[2026-02-28 12:34:10] (step=0012318) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 2.410095871649384, LR: 0.0003 +[2026-02-28 12:34:17] (step=0012319) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.4102915280766974, LR: 0.0003 +[2026-02-28 12:34:25] (step=0012320) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.410487184504011, LR: 0.0003 +[2026-02-28 12:34:33] (step=0012321) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 2.4106828409313246, LR: 0.0003 +[2026-02-28 12:34:41] (step=0012322) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 2.410878497358638, LR: 0.0003 +[2026-02-28 12:34:49] (step=0012323) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 2.4110741537859517, LR: 0.0003 +[2026-02-28 12:34:57] (step=0012324) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.4112698102132657, LR: 0.0003 +[2026-02-28 12:35:04] (step=0012325) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.4114654666405793, LR: 0.0003 +[2026-02-28 12:35:12] (step=0012326) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 2.411661123067893, LR: 0.0003 +[2026-02-28 12:35:20] (step=0012327) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.4118567794952064, LR: 0.0003 +[2026-02-28 12:35:28] (step=0012328) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.41205243592252, LR: 0.0003 +[2026-02-28 12:35:36] (step=0012329) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 2.4122480923498335, LR: 0.0003 +[2026-02-28 12:35:44] (step=0012330) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.4124437487771475, LR: 0.0003 +[2026-02-28 12:35:52] (step=0012331) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 2.412639405204461, LR: 0.0003 +[2026-02-28 12:35:59] (step=0012332) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.4128350616317746, LR: 0.0003 +[2026-02-28 12:36:07] (step=0012333) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.413030718059088, LR: 0.0003 +[2026-02-28 12:36:15] (step=0012334) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 2.4132263744864018, LR: 0.0003 +[2026-02-28 12:36:23] (step=0012335) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.4134220309137153, LR: 0.0003 +[2026-02-28 12:36:31] (step=0012336) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.4136176873410293, LR: 0.0003 +[2026-02-28 12:36:39] (step=0012337) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.413813343768343, LR: 0.0003 +[2026-02-28 12:36:46] (step=0012338) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 2.4140090001956565, LR: 0.0003 +[2026-02-28 12:36:54] (step=0012339) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 2.41420465662297, LR: 0.0003 +[2026-02-28 12:37:02] (step=0012340) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 2.4144003130502836, LR: 0.0003 +[2026-02-28 12:37:10] (step=0012341) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.414595969477597, LR: 0.0003 +[2026-02-28 12:37:18] (step=0012342) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.414791625904911, LR: 0.0003 +[2026-02-28 12:37:26] (step=0012343) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 2.4149872823322247, LR: 0.0003 +[2026-02-28 12:37:34] (step=0012344) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.4151829387595383, LR: 0.0003 +[2026-02-28 12:37:41] (step=0012345) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 2.415378595186852, LR: 0.0003 +[2026-02-28 12:37:49] (step=0012346) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.4155742516141654, LR: 0.0003 +[2026-02-28 12:37:57] (step=0012347) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 2.415769908041479, LR: 0.0003 +[2026-02-28 12:38:05] (step=0012348) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.415965564468793, LR: 0.0003 +[2026-02-28 12:38:13] (step=0012349) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.4161612208961065, LR: 0.0003 +[2026-02-28 12:38:21] (step=0012350) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 2.41635687732342, LR: 0.0003 +[2026-02-28 12:38:28] (step=0012351) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 2.4165525337507336, LR: 0.0003 +[2026-02-28 12:38:36] (step=0012352) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 2.416748190178047, LR: 0.0003 +[2026-02-28 12:38:44] (step=0012353) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 2.4169438466053608, LR: 0.0003 +[2026-02-28 12:38:52] (step=0012354) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.4171395030326748, LR: 0.0003 +[2026-02-28 12:39:00] (step=0012355) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.4173351594599883, LR: 0.0003 +[2026-02-28 12:39:08] (step=0012356) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 2.417530815887302, LR: 0.0003 +[2026-02-28 12:39:15] (step=0012357) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.4177264723146155, LR: 0.0003 +[2026-02-28 12:39:23] (step=0012358) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.417922128741929, LR: 0.0003 +[2026-02-28 12:39:31] (step=0012359) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 2.418117785169243, LR: 0.0003 +[2026-02-28 12:39:39] (step=0012360) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 2.4183134415965566, LR: 0.0003 +[2026-02-28 12:39:47] (step=0012361) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 2.41850909802387, LR: 0.0003 +[2026-02-28 12:39:55] (step=0012362) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.4187047544511837, LR: 0.0003 +[2026-02-28 12:40:03] (step=0012363) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.4189004108784973, LR: 0.0003 +[2026-02-28 12:40:10] (step=0012364) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.419096067305811, LR: 0.0003 +[2026-02-28 12:40:18] (step=0012365) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.419291723733125, LR: 0.0003 +[2026-02-28 12:40:26] (step=0012366) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.4194873801604384, LR: 0.0003 +[2026-02-28 12:40:34] (step=0012367) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.419683036587752, LR: 0.0003 +[2026-02-28 12:40:42] (step=0012368) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 2.4198786930150655, LR: 0.0003 +[2026-02-28 12:40:50] (step=0012369) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 2.420074349442379, LR: 0.0003 +[2026-02-28 12:40:58] (step=0012370) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 2.4202700058696927, LR: 0.0003 +[2026-02-28 12:41:06] (step=0012371) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.4204656622970067, LR: 0.0003 +[2026-02-28 12:41:13] (step=0012372) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 2.42066131872432, LR: 0.0003 +[2026-02-28 12:41:21] (step=0012373) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 2.420856975151634, LR: 0.0003 +[2026-02-28 12:41:29] (step=0012374) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 2.4210526315789473, LR: 0.0003 +[2026-02-28 12:41:37] (step=0012375) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.421248288006261, LR: 0.0003 +[2026-02-28 12:41:45] (step=0012376) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.4214439444335745, LR: 0.0003 +[2026-02-28 12:41:53] (step=0012377) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 2.4216396008608885, LR: 0.0003 +[2026-02-28 12:42:01] (step=0012378) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.421835257288202, LR: 0.0003 +[2026-02-28 12:42:08] (step=0012379) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.4220309137155156, LR: 0.0003 +[2026-02-28 12:42:16] (step=0012380) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 2.422226570142829, LR: 0.0003 +[2026-02-28 12:42:24] (step=0012381) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 2.4224222265701427, LR: 0.0003 +[2026-02-28 12:42:32] (step=0012382) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.4226178829974563, LR: 0.0003 +[2026-02-28 12:42:40] (step=0012383) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.4228135394247703, LR: 0.0003 +[2026-02-28 12:42:48] (step=0012384) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 2.423009195852084, LR: 0.0003 +[2026-02-28 12:42:55] (step=0012385) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.4232048522793974, LR: 0.0003 +[2026-02-28 12:43:03] (step=0012386) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.423400508706711, LR: 0.0003 +[2026-02-28 12:43:11] (step=0012387) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.4235961651340245, LR: 0.0003 +[2026-02-28 12:43:19] (step=0012388) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 2.423791821561338, LR: 0.0003 +[2026-02-28 12:43:27] (step=0012389) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.423987477988652, LR: 0.0003 +[2026-02-28 12:43:35] (step=0012390) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.4241831344159657, LR: 0.0003 +[2026-02-28 12:43:43] (step=0012391) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.4243787908432792, LR: 0.0003 +[2026-02-28 12:43:50] (step=0012392) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 2.424574447270593, LR: 0.0003 +[2026-02-28 12:43:58] (step=0012393) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.4247701036979064, LR: 0.0003 +[2026-02-28 12:44:06] (step=0012394) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 2.42496576012522, LR: 0.0003 +[2026-02-28 12:44:14] (step=0012395) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.425161416552534, LR: 0.0003 +[2026-02-28 12:44:22] (step=0012396) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.4253570729798475, LR: 0.0003 +[2026-02-28 12:44:30] (step=0012397) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 2.425552729407161, LR: 0.0003 +[2026-02-28 12:44:37] (step=0012398) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.4257483858344746, LR: 0.0003 +[2026-02-28 12:44:45] (step=0012399) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.425944042261788, LR: 0.0003 +[2026-02-28 12:44:53] (step=0012400) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.4261396986891017, LR: 0.0003 +[2026-02-28 12:45:01] (step=0012401) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 2.4263353551164157, LR: 0.0003 +[2026-02-28 12:45:09] (step=0012402) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 2.4265310115437293, LR: 0.0003 +[2026-02-28 12:45:17] (step=0012403) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.426726667971043, LR: 0.0003 +[2026-02-28 12:45:25] (step=0012404) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.4269223243983564, LR: 0.0003 +[2026-02-28 12:45:32] (step=0012405) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 2.42711798082567, LR: 0.0003 +[2026-02-28 12:45:40] (step=0012406) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.4273136372529835, LR: 0.0003 +[2026-02-28 12:45:48] (step=0012407) Train Loss: 0.4719, Train Steps/Sec: 0.13, Epoch: 2.4275092936802976, LR: 0.0003 +[2026-02-28 12:45:56] (step=0012408) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 2.427704950107611, LR: 0.0003 +[2026-02-28 12:46:04] (step=0012409) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 2.4279006065349247, LR: 0.0003 +[2026-02-28 12:46:12] (step=0012410) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.4280962629622382, LR: 0.0003 +[2026-02-28 12:46:20] (step=0012411) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 2.428291919389552, LR: 0.0003 +[2026-02-28 12:46:27] (step=0012412) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.4284875758168654, LR: 0.0003 +[2026-02-28 12:46:35] (step=0012413) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.4286832322441794, LR: 0.0003 +[2026-02-28 12:46:43] (step=0012414) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.428878888671493, LR: 0.0003 +[2026-02-28 12:46:51] (step=0012415) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.4290745450988065, LR: 0.0003 +[2026-02-28 12:46:59] (step=0012416) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 2.42927020152612, LR: 0.0003 +[2026-02-28 12:47:07] (step=0012417) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.4294658579534336, LR: 0.0003 +[2026-02-28 12:47:14] (step=0012418) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 2.4296615143807476, LR: 0.0003 +[2026-02-28 12:47:22] (step=0012419) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 2.429857170808061, LR: 0.0003 +[2026-02-28 12:47:30] (step=0012420) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.4300528272353747, LR: 0.0003 +[2026-02-28 12:47:38] (step=0012421) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.4302484836626883, LR: 0.0003 +[2026-02-28 12:47:46] (step=0012422) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 2.430444140090002, LR: 0.0003 +[2026-02-28 12:47:54] (step=0012423) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.4306397965173154, LR: 0.0003 +[2026-02-28 12:48:02] (step=0012424) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.4308354529446294, LR: 0.0003 +[2026-02-28 12:48:09] (step=0012425) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 2.431031109371943, LR: 0.0003 +[2026-02-28 12:48:17] (step=0012426) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 2.4312267657992566, LR: 0.0003 +[2026-02-28 12:48:25] (step=0012427) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.43142242222657, LR: 0.0003 +[2026-02-28 12:48:33] (step=0012428) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.4316180786538837, LR: 0.0003 +[2026-02-28 12:48:41] (step=0012429) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.4318137350811972, LR: 0.0003 +[2026-02-28 12:48:49] (step=0012430) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.4320093915085113, LR: 0.0003 +[2026-02-28 12:48:57] (step=0012431) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 2.432205047935825, LR: 0.0003 +[2026-02-28 12:49:04] (step=0012432) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 2.4324007043631384, LR: 0.0003 +[2026-02-28 12:49:12] (step=0012433) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 2.432596360790452, LR: 0.0003 +[2026-02-28 12:49:20] (step=0012434) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.4327920172177655, LR: 0.0003 +[2026-02-28 12:49:28] (step=0012435) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 2.432987673645079, LR: 0.0003 +[2026-02-28 12:49:36] (step=0012436) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 2.433183330072393, LR: 0.0003 +[2026-02-28 12:49:44] (step=0012437) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 2.4333789864997066, LR: 0.0003 +[2026-02-28 12:49:51] (step=0012438) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 2.43357464292702, LR: 0.0003 +[2026-02-28 12:49:59] (step=0012439) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.4337702993543338, LR: 0.0003 +[2026-02-28 12:50:07] (step=0012440) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 2.4339659557816473, LR: 0.0003 +[2026-02-28 12:50:15] (step=0012441) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.434161612208961, LR: 0.0003 +[2026-02-28 12:50:23] (step=0012442) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.434357268636275, LR: 0.0003 +[2026-02-28 12:50:31] (step=0012443) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.4345529250635884, LR: 0.0003 +[2026-02-28 12:50:38] (step=0012444) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 2.434748581490902, LR: 0.0003 +[2026-02-28 12:50:46] (step=0012445) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.4349442379182156, LR: 0.0003 +[2026-02-28 12:50:54] (step=0012446) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 2.435139894345529, LR: 0.0003 +[2026-02-28 12:51:02] (step=0012447) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.4353355507728427, LR: 0.0003 +[2026-02-28 12:51:10] (step=0012448) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.4355312072001567, LR: 0.0003 +[2026-02-28 12:51:18] (step=0012449) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 2.4357268636274703, LR: 0.0003 +[2026-02-28 12:51:25] (step=0012450) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.435922520054784, LR: 0.0003 +[2026-02-28 12:51:33] (step=0012451) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.4361181764820974, LR: 0.0003 +[2026-02-28 12:51:41] (step=0012452) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.436313832909411, LR: 0.0003 +[2026-02-28 12:51:49] (step=0012453) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 2.4365094893367245, LR: 0.0003 +[2026-02-28 12:51:57] (step=0012454) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.4367051457640385, LR: 0.0003 +[2026-02-28 12:52:05] (step=0012455) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 2.436900802191352, LR: 0.0003 +[2026-02-28 12:52:12] (step=0012456) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.4370964586186656, LR: 0.0003 +[2026-02-28 12:52:20] (step=0012457) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.437292115045979, LR: 0.0003 +[2026-02-28 12:52:28] (step=0012458) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 2.4374877714732928, LR: 0.0003 +[2026-02-28 12:52:36] (step=0012459) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.4376834279006063, LR: 0.0003 +[2026-02-28 12:52:44] (step=0012460) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.4378790843279203, LR: 0.0003 +[2026-02-28 12:52:52] (step=0012461) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 2.438074740755234, LR: 0.0003 +[2026-02-28 12:53:00] (step=0012462) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 2.4382703971825475, LR: 0.0003 +[2026-02-28 12:53:07] (step=0012463) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 2.438466053609861, LR: 0.0003 +[2026-02-28 12:53:15] (step=0012464) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.4386617100371746, LR: 0.0003 +[2026-02-28 12:53:23] (step=0012465) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 2.438857366464488, LR: 0.0003 +[2026-02-28 12:53:31] (step=0012466) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 2.439053022891802, LR: 0.0003 +[2026-02-28 12:53:39] (step=0012467) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 2.4392486793191157, LR: 0.0003 +[2026-02-28 12:53:47] (step=0012468) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 2.4394443357464293, LR: 0.0003 +[2026-02-28 12:53:55] (step=0012469) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 2.439639992173743, LR: 0.0003 +[2026-02-28 12:54:02] (step=0012470) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 2.4398356486010564, LR: 0.0003 +[2026-02-28 12:54:10] (step=0012471) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.4400313050283704, LR: 0.0003 +[2026-02-28 12:54:18] (step=0012472) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.440226961455684, LR: 0.0003 +[2026-02-28 12:54:26] (step=0012473) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 2.4404226178829975, LR: 0.0003 +[2026-02-28 12:54:34] (step=0012474) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 2.440618274310311, LR: 0.0003 +[2026-02-28 12:54:42] (step=0012475) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 2.4408139307376246, LR: 0.0003 +[2026-02-28 12:54:49] (step=0012476) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.441009587164938, LR: 0.0003 +[2026-02-28 12:54:57] (step=0012477) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.441205243592252, LR: 0.0003 +[2026-02-28 12:55:05] (step=0012478) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.441400900019566, LR: 0.0003 +[2026-02-28 12:55:13] (step=0012479) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 2.4415965564468793, LR: 0.0003 +[2026-02-28 12:55:21] (step=0012480) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.441792212874193, LR: 0.0003 +[2026-02-28 12:55:29] (step=0012481) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.4419878693015065, LR: 0.0003 +[2026-02-28 12:55:37] (step=0012482) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.44218352572882, LR: 0.0003 +[2026-02-28 12:55:44] (step=0012483) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 2.442379182156134, LR: 0.0003 +[2026-02-28 12:55:52] (step=0012484) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 2.4425748385834476, LR: 0.0003 +[2026-02-28 12:56:00] (step=0012485) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.442770495010761, LR: 0.0003 +[2026-02-28 12:56:08] (step=0012486) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.4429661514380747, LR: 0.0003 +[2026-02-28 12:56:16] (step=0012487) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.4431618078653883, LR: 0.0003 +[2026-02-28 12:56:24] (step=0012488) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.443357464292702, LR: 0.0003 +[2026-02-28 12:56:31] (step=0012489) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 2.443553120720016, LR: 0.0003 +[2026-02-28 12:56:39] (step=0012490) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.4437487771473294, LR: 0.0003 +[2026-02-28 12:56:47] (step=0012491) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.443944433574643, LR: 0.0003 +[2026-02-28 12:56:55] (step=0012492) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.4441400900019565, LR: 0.0003 +[2026-02-28 12:57:03] (step=0012493) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.44433574642927, LR: 0.0003 +[2026-02-28 12:57:11] (step=0012494) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.4445314028565837, LR: 0.0003 +[2026-02-28 12:57:18] (step=0012495) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 2.4447270592838977, LR: 0.0003 +[2026-02-28 12:57:26] (step=0012496) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.4449227157112112, LR: 0.0003 +[2026-02-28 12:57:34] (step=0012497) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.445118372138525, LR: 0.0003 +[2026-02-28 12:57:42] (step=0012498) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 2.4453140285658383, LR: 0.0003 +[2026-02-28 12:57:50] (step=0012499) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.445509684993152, LR: 0.0003 +[2026-02-28 12:57:58] (step=0012500) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.4457053414204655, LR: 0.0003 +[2026-02-28 12:57:58] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0012500/ +[2026-02-28 12:58:06] (step=0012501) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 2.4459009978477795, LR: 0.0003 +[2026-02-28 12:58:13] (step=0012502) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.446096654275093, LR: 0.0003 +[2026-02-28 12:58:21] (step=0012503) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.4462923107024066, LR: 0.0003 +[2026-02-28 12:58:29] (step=0012504) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 2.44648796712972, LR: 0.0003 +[2026-02-28 12:58:37] (step=0012505) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.4466836235570337, LR: 0.0003 +[2026-02-28 12:58:45] (step=0012506) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.4468792799843473, LR: 0.0003 +[2026-02-28 12:58:53] (step=0012507) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 2.4470749364116613, LR: 0.0003 +[2026-02-28 12:59:00] (step=0012508) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.447270592838975, LR: 0.0003 +[2026-02-28 12:59:08] (step=0012509) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.4474662492662884, LR: 0.0003 +[2026-02-28 12:59:16] (step=0012510) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.447661905693602, LR: 0.0003 +[2026-02-28 12:59:24] (step=0012511) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.4478575621209155, LR: 0.0003 +[2026-02-28 12:59:32] (step=0012512) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.448053218548229, LR: 0.0003 +[2026-02-28 12:59:40] (step=0012513) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 2.448248874975543, LR: 0.0003 +[2026-02-28 12:59:48] (step=0012514) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 2.4484445314028567, LR: 0.0003 +[2026-02-28 12:59:55] (step=0012515) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 2.4486401878301702, LR: 0.0003 +[2026-02-28 13:00:03] (step=0012516) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.448835844257484, LR: 0.0003 +[2026-02-28 13:00:11] (step=0012517) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 2.4490315006847974, LR: 0.0003 +[2026-02-28 13:00:19] (step=0012518) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 2.449227157112111, LR: 0.0003 +[2026-02-28 13:00:27] (step=0012519) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.449422813539425, LR: 0.0003 +[2026-02-28 13:00:35] (step=0012520) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.4496184699667385, LR: 0.0003 +[2026-02-28 13:00:42] (step=0012521) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.449814126394052, LR: 0.0003 +[2026-02-28 13:00:50] (step=0012522) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 2.4500097828213656, LR: 0.0003 +[2026-02-28 13:00:58] (step=0012523) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.450205439248679, LR: 0.0003 +[2026-02-28 13:01:06] (step=0012524) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.4504010956759927, LR: 0.0003 +[2026-02-28 13:01:14] (step=0012525) Train Loss: 0.4599, Train Steps/Sec: 0.12, Epoch: 2.4505967521033067, LR: 0.0003 +[2026-02-28 13:01:22] (step=0012526) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.4507924085306203, LR: 0.0003 +[2026-02-28 13:01:30] (step=0012527) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.450988064957934, LR: 0.0003 +[2026-02-28 13:01:37] (step=0012528) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 2.4511837213852474, LR: 0.0003 +[2026-02-28 13:01:45] (step=0012529) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.451379377812561, LR: 0.0003 +[2026-02-28 13:01:53] (step=0012530) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 2.451575034239875, LR: 0.0003 +[2026-02-28 13:02:01] (step=0012531) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 2.4517706906671886, LR: 0.0003 +[2026-02-28 13:02:09] (step=0012532) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.451966347094502, LR: 0.0003 +[2026-02-28 13:02:17] (step=0012533) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.4521620035218157, LR: 0.0003 +[2026-02-28 13:02:25] (step=0012534) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 2.4523576599491292, LR: 0.0003 +[2026-02-28 13:02:32] (step=0012535) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 2.452553316376443, LR: 0.0003 +[2026-02-28 13:02:40] (step=0012536) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.452748972803757, LR: 0.0003 +[2026-02-28 13:02:48] (step=0012537) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.4529446292310704, LR: 0.0003 +[2026-02-28 13:02:56] (step=0012538) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.453140285658384, LR: 0.0003 +[2026-02-28 13:03:04] (step=0012539) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 2.4533359420856975, LR: 0.0003 +[2026-02-28 13:03:12] (step=0012540) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.453531598513011, LR: 0.0003 +[2026-02-28 13:03:19] (step=0012541) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 2.4537272549403246, LR: 0.0003 +[2026-02-28 13:03:27] (step=0012542) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 2.4539229113676386, LR: 0.0003 +[2026-02-28 13:03:35] (step=0012543) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 2.454118567794952, LR: 0.0003 +[2026-02-28 13:03:43] (step=0012544) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 2.4543142242222658, LR: 0.0003 +[2026-02-28 13:03:51] (step=0012545) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 2.4545098806495793, LR: 0.0003 +[2026-02-28 13:03:59] (step=0012546) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 2.454705537076893, LR: 0.0003 +[2026-02-28 13:04:06] (step=0012547) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.4549011935042064, LR: 0.0003 +[2026-02-28 13:04:14] (step=0012548) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 2.4550968499315204, LR: 0.0003 +[2026-02-28 13:04:22] (step=0012549) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 2.455292506358834, LR: 0.0003 +[2026-02-28 13:04:30] (step=0012550) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.4554881627861476, LR: 0.0003 +[2026-02-28 13:04:38] (step=0012551) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.455683819213461, LR: 0.0003 +[2026-02-28 13:04:46] (step=0012552) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 2.4558794756407747, LR: 0.0003 +[2026-02-28 13:04:53] (step=0012553) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.4560751320680883, LR: 0.0003 +[2026-02-28 13:05:01] (step=0012554) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 2.4562707884954023, LR: 0.0003 +[2026-02-28 13:05:09] (step=0012555) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.456466444922716, LR: 0.0003 +[2026-02-28 13:05:17] (step=0012556) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 2.4566621013500294, LR: 0.0003 +[2026-02-28 13:05:25] (step=0012557) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.456857757777343, LR: 0.0003 +[2026-02-28 13:05:33] (step=0012558) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.4570534142046565, LR: 0.0003 +[2026-02-28 13:05:40] (step=0012559) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.45724907063197, LR: 0.0003 +[2026-02-28 13:05:48] (step=0012560) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 2.457444727059284, LR: 0.0003 +[2026-02-28 13:05:56] (step=0012561) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.4576403834865976, LR: 0.0003 +[2026-02-28 13:06:04] (step=0012562) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.457836039913911, LR: 0.0003 +[2026-02-28 13:06:12] (step=0012563) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.4580316963412248, LR: 0.0003 +[2026-02-28 13:06:20] (step=0012564) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.4582273527685383, LR: 0.0003 +[2026-02-28 13:06:28] (step=0012565) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 2.458423009195852, LR: 0.0003 +[2026-02-28 13:06:36] (step=0012566) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.458618665623166, LR: 0.0003 +[2026-02-28 13:06:43] (step=0012567) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.4588143220504795, LR: 0.0003 +[2026-02-28 13:06:51] (step=0012568) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 2.459009978477793, LR: 0.0003 +[2026-02-28 13:06:59] (step=0012569) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 2.4592056349051066, LR: 0.0003 +[2026-02-28 13:07:07] (step=0012570) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.45940129133242, LR: 0.0003 +[2026-02-28 13:07:15] (step=0012571) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 2.4595969477597337, LR: 0.0003 +[2026-02-28 13:07:23] (step=0012572) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.4597926041870477, LR: 0.0003 +[2026-02-28 13:07:30] (step=0012573) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 2.4599882606143613, LR: 0.0003 +[2026-02-28 13:07:38] (step=0012574) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 2.460183917041675, LR: 0.0003 +[2026-02-28 13:07:46] (step=0012575) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.4603795734689884, LR: 0.0003 +[2026-02-28 13:07:54] (step=0012576) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.460575229896302, LR: 0.0003 +[2026-02-28 13:08:02] (step=0012577) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 2.4607708863236155, LR: 0.0003 +[2026-02-28 13:08:10] (step=0012578) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 2.4609665427509295, LR: 0.0003 +[2026-02-28 13:08:18] (step=0012579) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.461162199178243, LR: 0.0003 +[2026-02-28 13:08:25] (step=0012580) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.4613578556055566, LR: 0.0003 +[2026-02-28 13:08:33] (step=0012581) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 2.46155351203287, LR: 0.0003 +[2026-02-28 13:08:41] (step=0012582) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 2.4617491684601838, LR: 0.0003 +[2026-02-28 13:08:49] (step=0012583) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 2.4619448248874978, LR: 0.0003 +[2026-02-28 13:08:57] (step=0012584) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.4621404813148113, LR: 0.0003 +[2026-02-28 13:09:05] (step=0012585) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 2.462336137742125, LR: 0.0003 +[2026-02-28 13:09:13] (step=0012586) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 2.4625317941694385, LR: 0.0003 +[2026-02-28 13:09:20] (step=0012587) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 2.462727450596752, LR: 0.0003 +[2026-02-28 13:09:28] (step=0012588) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.4629231070240656, LR: 0.0003 +[2026-02-28 13:09:36] (step=0012589) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 2.4631187634513796, LR: 0.0003 +[2026-02-28 13:09:44] (step=0012590) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 2.463314419878693, LR: 0.0003 +[2026-02-28 13:09:52] (step=0012591) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 2.4635100763060067, LR: 0.0003 +[2026-02-28 13:10:00] (step=0012592) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.4637057327333203, LR: 0.0003 +[2026-02-28 13:10:07] (step=0012593) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.463901389160634, LR: 0.0003 +[2026-02-28 13:10:15] (step=0012594) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.4640970455879474, LR: 0.0003 +[2026-02-28 13:10:23] (step=0012595) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.4642927020152614, LR: 0.0003 +[2026-02-28 13:10:31] (step=0012596) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 2.464488358442575, LR: 0.0003 +[2026-02-28 13:10:39] (step=0012597) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 2.4646840148698885, LR: 0.0003 +[2026-02-28 13:10:47] (step=0012598) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.464879671297202, LR: 0.0003 +[2026-02-28 13:10:54] (step=0012599) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 2.4650753277245157, LR: 0.0003 +[2026-02-28 13:11:02] (step=0012600) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.465270984151829, LR: 0.0003 +[2026-02-28 13:11:10] (step=0012601) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 2.465466640579143, LR: 0.0003 +[2026-02-28 13:11:18] (step=0012602) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.465662297006457, LR: 0.0003 +[2026-02-28 13:11:26] (step=0012603) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 2.4658579534337703, LR: 0.0003 +[2026-02-28 13:11:34] (step=0012604) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.466053609861084, LR: 0.0003 +[2026-02-28 13:11:42] (step=0012605) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.4662492662883975, LR: 0.0003 +[2026-02-28 13:11:49] (step=0012606) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.466444922715711, LR: 0.0003 +[2026-02-28 13:11:57] (step=0012607) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.466640579143025, LR: 0.0003 +[2026-02-28 13:12:05] (step=0012608) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 2.4668362355703386, LR: 0.0003 +[2026-02-28 13:12:13] (step=0012609) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 2.467031891997652, LR: 0.0003 +[2026-02-28 13:12:21] (step=0012610) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.4672275484249657, LR: 0.0003 +[2026-02-28 13:12:29] (step=0012611) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 2.4674232048522793, LR: 0.0003 +[2026-02-28 13:12:36] (step=0012612) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 2.467618861279593, LR: 0.0003 +[2026-02-28 13:12:44] (step=0012613) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 2.467814517706907, LR: 0.0003 +[2026-02-28 13:12:52] (step=0012614) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 2.4680101741342204, LR: 0.0003 +[2026-02-28 13:13:00] (step=0012615) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.468205830561534, LR: 0.0003 +[2026-02-28 13:13:08] (step=0012616) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.4684014869888475, LR: 0.0003 +[2026-02-28 13:13:16] (step=0012617) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 2.468597143416161, LR: 0.0003 +[2026-02-28 13:13:24] (step=0012618) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.4687927998434747, LR: 0.0003 +[2026-02-28 13:13:32] (step=0012619) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.4689884562707887, LR: 0.0003 +[2026-02-28 13:13:39] (step=0012620) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 2.4691841126981022, LR: 0.0003 +[2026-02-28 13:13:47] (step=0012621) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 2.469379769125416, LR: 0.0003 +[2026-02-28 13:13:55] (step=0012622) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.4695754255527294, LR: 0.0003 +[2026-02-28 13:14:03] (step=0012623) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 2.469771081980043, LR: 0.0003 +[2026-02-28 13:14:11] (step=0012624) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.4699667384073565, LR: 0.0003 +[2026-02-28 13:14:19] (step=0012625) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 2.4701623948346705, LR: 0.0003 +[2026-02-28 13:14:26] (step=0012626) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.470358051261984, LR: 0.0003 +[2026-02-28 13:14:34] (step=0012627) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 2.4705537076892976, LR: 0.0003 +[2026-02-28 13:14:42] (step=0012628) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.470749364116611, LR: 0.0003 +[2026-02-28 13:14:50] (step=0012629) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 2.4709450205439247, LR: 0.0003 +[2026-02-28 13:14:58] (step=0012630) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.4711406769712383, LR: 0.0003 +[2026-02-28 13:15:06] (step=0012631) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.4713363333985523, LR: 0.0003 +[2026-02-28 13:15:14] (step=0012632) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.471531989825866, LR: 0.0003 +[2026-02-28 13:15:21] (step=0012633) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 2.4717276462531794, LR: 0.0003 +[2026-02-28 13:15:29] (step=0012634) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 2.471923302680493, LR: 0.0003 +[2026-02-28 13:15:37] (step=0012635) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 2.4721189591078065, LR: 0.0003 +[2026-02-28 13:15:45] (step=0012636) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 2.47231461553512, LR: 0.0003 +[2026-02-28 13:15:53] (step=0012637) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 2.472510271962434, LR: 0.0003 +[2026-02-28 13:16:01] (step=0012638) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.4727059283897477, LR: 0.0003 +[2026-02-28 13:16:08] (step=0012639) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 2.4729015848170612, LR: 0.0003 +[2026-02-28 13:16:16] (step=0012640) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 2.473097241244375, LR: 0.0003 +[2026-02-28 13:16:24] (step=0012641) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 2.4732928976716884, LR: 0.0003 +[2026-02-28 13:16:32] (step=0012642) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 2.4734885540990024, LR: 0.0003 +[2026-02-28 13:16:40] (step=0012643) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 2.473684210526316, LR: 0.0003 +[2026-02-28 13:16:48] (step=0012644) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 2.4738798669536295, LR: 0.0003 +[2026-02-28 13:16:56] (step=0012645) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.474075523380943, LR: 0.0003 +[2026-02-28 13:17:03] (step=0012646) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 2.4742711798082566, LR: 0.0003 +[2026-02-28 13:17:11] (step=0012647) Train Loss: 0.4724, Train Steps/Sec: 0.13, Epoch: 2.47446683623557, LR: 0.0003 +[2026-02-28 13:17:19] (step=0012648) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 2.474662492662884, LR: 0.0003 +[2026-02-28 13:17:27] (step=0012649) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.4748581490901977, LR: 0.0003 +[2026-02-28 13:17:35] (step=0012650) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.4750538055175113, LR: 0.0003 +[2026-02-28 13:17:43] (step=0012651) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 2.475249461944825, LR: 0.0003 +[2026-02-28 13:17:50] (step=0012652) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.4754451183721384, LR: 0.0003 +[2026-02-28 13:17:58] (step=0012653) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.475640774799452, LR: 0.0003 +[2026-02-28 13:18:06] (step=0012654) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.475836431226766, LR: 0.0003 +[2026-02-28 13:18:14] (step=0012655) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 2.4760320876540796, LR: 0.0003 +[2026-02-28 13:18:22] (step=0012656) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.476227744081393, LR: 0.0003 +[2026-02-28 13:18:30] (step=0012657) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 2.4764234005087067, LR: 0.0003 +[2026-02-28 13:18:37] (step=0012658) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.4766190569360202, LR: 0.0003 +[2026-02-28 13:18:45] (step=0012659) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.476814713363334, LR: 0.0003 +[2026-02-28 13:18:53] (step=0012660) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.477010369790648, LR: 0.0003 +[2026-02-28 13:19:01] (step=0012661) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 2.4772060262179614, LR: 0.0003 +[2026-02-28 13:19:09] (step=0012662) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 2.477401682645275, LR: 0.0003 +[2026-02-28 13:19:17] (step=0012663) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 2.4775973390725885, LR: 0.0003 +[2026-02-28 13:19:25] (step=0012664) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 2.477792995499902, LR: 0.0003 +[2026-02-28 13:19:32] (step=0012665) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 2.4779886519272156, LR: 0.0003 +[2026-02-28 13:19:40] (step=0012666) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.4781843083545296, LR: 0.0003 +[2026-02-28 13:19:48] (step=0012667) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 2.478379964781843, LR: 0.0003 +[2026-02-28 13:19:56] (step=0012668) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.4785756212091568, LR: 0.0003 +[2026-02-28 13:20:04] (step=0012669) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 2.4787712776364703, LR: 0.0003 +[2026-02-28 13:20:12] (step=0012670) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 2.478966934063784, LR: 0.0003 +[2026-02-28 13:20:20] (step=0012671) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 2.4791625904910974, LR: 0.0003 +[2026-02-28 13:20:27] (step=0012672) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.4793582469184114, LR: 0.0003 +[2026-02-28 13:20:35] (step=0012673) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 2.479553903345725, LR: 0.0003 +[2026-02-28 13:20:43] (step=0012674) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.4797495597730386, LR: 0.0003 +[2026-02-28 13:20:51] (step=0012675) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 2.479945216200352, LR: 0.0003 +[2026-02-28 13:20:59] (step=0012676) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 2.4801408726276657, LR: 0.0003 +[2026-02-28 13:21:07] (step=0012677) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 2.4803365290549793, LR: 0.0003 +[2026-02-28 13:21:15] (step=0012678) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 2.4805321854822933, LR: 0.0003 +[2026-02-28 13:21:22] (step=0012679) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.480727841909607, LR: 0.0003 +[2026-02-28 13:21:30] (step=0012680) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.4809234983369204, LR: 0.0003 +[2026-02-28 13:21:38] (step=0012681) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 2.481119154764234, LR: 0.0003 +[2026-02-28 13:21:46] (step=0012682) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.4813148111915475, LR: 0.0003 +[2026-02-28 13:21:54] (step=0012683) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.481510467618861, LR: 0.0003 +[2026-02-28 13:22:02] (step=0012684) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.481706124046175, LR: 0.0003 +[2026-02-28 13:22:09] (step=0012685) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.4819017804734886, LR: 0.0003 +[2026-02-28 13:22:17] (step=0012686) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 2.482097436900802, LR: 0.0003 +[2026-02-28 13:22:25] (step=0012687) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 2.4822930933281158, LR: 0.0003 +[2026-02-28 13:22:33] (step=0012688) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 2.4824887497554293, LR: 0.0003 +[2026-02-28 13:22:41] (step=0012689) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 2.482684406182743, LR: 0.0003 +[2026-02-28 13:22:49] (step=0012690) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.482880062610057, LR: 0.0003 +[2026-02-28 13:22:57] (step=0012691) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 2.4830757190373705, LR: 0.0003 +[2026-02-28 13:23:04] (step=0012692) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.483271375464684, LR: 0.0003 +[2026-02-28 13:23:12] (step=0012693) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 2.4834670318919976, LR: 0.0003 +[2026-02-28 13:23:20] (step=0012694) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 2.483662688319311, LR: 0.0003 +[2026-02-28 13:23:28] (step=0012695) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 2.483858344746625, LR: 0.0003 +[2026-02-28 13:23:36] (step=0012696) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.4840540011739387, LR: 0.0003 +[2026-02-28 13:23:44] (step=0012697) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 2.4842496576012523, LR: 0.0003 +[2026-02-28 13:23:51] (step=0012698) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.484445314028566, LR: 0.0003 +[2026-02-28 13:23:59] (step=0012699) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.4846409704558794, LR: 0.0003 +[2026-02-28 13:24:07] (step=0012700) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 2.484836626883193, LR: 0.0003 +[2026-02-28 13:24:15] (step=0012701) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.485032283310507, LR: 0.0003 +[2026-02-28 13:24:23] (step=0012702) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 2.4852279397378205, LR: 0.0003 +[2026-02-28 13:24:31] (step=0012703) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.485423596165134, LR: 0.0003 +[2026-02-28 13:24:38] (step=0012704) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.4856192525924476, LR: 0.0003 +[2026-02-28 13:24:46] (step=0012705) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.485814909019761, LR: 0.0003 +[2026-02-28 13:24:54] (step=0012706) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.4860105654470748, LR: 0.0003 +[2026-02-28 13:25:02] (step=0012707) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.4862062218743888, LR: 0.0003 +[2026-02-28 13:25:10] (step=0012708) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.4864018783017023, LR: 0.0003 +[2026-02-28 13:25:18] (step=0012709) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.486597534729016, LR: 0.0003 +[2026-02-28 13:25:26] (step=0012710) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.4867931911563295, LR: 0.0003 +[2026-02-28 13:25:34] (step=0012711) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 2.486988847583643, LR: 0.0003 +[2026-02-28 13:25:41] (step=0012712) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.4871845040109566, LR: 0.0003 +[2026-02-28 13:25:49] (step=0012713) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 2.4873801604382706, LR: 0.0003 +[2026-02-28 13:25:57] (step=0012714) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.487575816865584, LR: 0.0003 +[2026-02-28 13:26:05] (step=0012715) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.4877714732928977, LR: 0.0003 +[2026-02-28 13:26:13] (step=0012716) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.4879671297202113, LR: 0.0003 +[2026-02-28 13:26:21] (step=0012717) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 2.488162786147525, LR: 0.0003 +[2026-02-28 13:26:28] (step=0012718) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 2.4883584425748384, LR: 0.0003 +[2026-02-28 13:26:36] (step=0012719) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.4885540990021524, LR: 0.0003 +[2026-02-28 13:26:44] (step=0012720) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.488749755429466, LR: 0.0003 +[2026-02-28 13:26:52] (step=0012721) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.4889454118567795, LR: 0.0003 +[2026-02-28 13:27:00] (step=0012722) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 2.489141068284093, LR: 0.0003 +[2026-02-28 13:27:08] (step=0012723) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.4893367247114067, LR: 0.0003 +[2026-02-28 13:27:16] (step=0012724) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.48953238113872, LR: 0.0003 +[2026-02-28 13:27:23] (step=0012725) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.4897280375660342, LR: 0.0003 +[2026-02-28 13:27:31] (step=0012726) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 2.489923693993348, LR: 0.0003 +[2026-02-28 13:27:39] (step=0012727) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 2.4901193504206613, LR: 0.0003 +[2026-02-28 13:27:47] (step=0012728) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 2.490315006847975, LR: 0.0003 +[2026-02-28 13:27:55] (step=0012729) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.4905106632752885, LR: 0.0003 +[2026-02-28 13:28:03] (step=0012730) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.490706319702602, LR: 0.0003 +[2026-02-28 13:28:11] (step=0012731) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.490901976129916, LR: 0.0003 +[2026-02-28 13:28:18] (step=0012732) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.4910976325572296, LR: 0.0003 +[2026-02-28 13:28:26] (step=0012733) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.491293288984543, LR: 0.0003 +[2026-02-28 13:28:34] (step=0012734) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.4914889454118567, LR: 0.0003 +[2026-02-28 13:28:42] (step=0012735) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 2.4916846018391703, LR: 0.0003 +[2026-02-28 13:28:50] (step=0012736) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.491880258266484, LR: 0.0003 +[2026-02-28 13:28:58] (step=0012737) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 2.492075914693798, LR: 0.0003 +[2026-02-28 13:29:05] (step=0012738) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.4922715711211114, LR: 0.0003 +[2026-02-28 13:29:13] (step=0012739) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 2.492467227548425, LR: 0.0003 +[2026-02-28 13:29:21] (step=0012740) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.4926628839757385, LR: 0.0003 +[2026-02-28 13:29:29] (step=0012741) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 2.492858540403052, LR: 0.0003 +[2026-02-28 13:29:37] (step=0012742) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 2.4930541968303657, LR: 0.0003 +[2026-02-28 13:29:45] (step=0012743) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.4932498532576797, LR: 0.0003 +[2026-02-28 13:29:52] (step=0012744) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.4934455096849932, LR: 0.0003 +[2026-02-28 13:30:00] (step=0012745) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 2.493641166112307, LR: 0.0003 +[2026-02-28 13:30:08] (step=0012746) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.4938368225396204, LR: 0.0003 +[2026-02-28 13:30:16] (step=0012747) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.494032478966934, LR: 0.0003 +[2026-02-28 13:30:24] (step=0012748) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 2.4942281353942475, LR: 0.0003 +[2026-02-28 13:30:32] (step=0012749) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 2.4944237918215615, LR: 0.0003 +[2026-02-28 13:30:40] (step=0012750) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 2.494619448248875, LR: 0.0003 +[2026-02-28 13:30:47] (step=0012751) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 2.4948151046761886, LR: 0.0003 +[2026-02-28 13:30:55] (step=0012752) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.495010761103502, LR: 0.0003 +[2026-02-28 13:31:03] (step=0012753) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 2.4952064175308157, LR: 0.0003 +[2026-02-28 13:31:11] (step=0012754) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 2.4954020739581297, LR: 0.0003 +[2026-02-28 13:31:19] (step=0012755) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.4955977303854433, LR: 0.0003 +[2026-02-28 13:31:27] (step=0012756) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 2.495793386812757, LR: 0.0003 +[2026-02-28 13:31:35] (step=0012757) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.4959890432400704, LR: 0.0003 +[2026-02-28 13:31:42] (step=0012758) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 2.496184699667384, LR: 0.0003 +[2026-02-28 13:31:50] (step=0012759) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.4963803560946976, LR: 0.0003 +[2026-02-28 13:31:58] (step=0012760) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.4965760125220116, LR: 0.0003 +[2026-02-28 13:32:06] (step=0012761) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.496771668949325, LR: 0.0003 +[2026-02-28 13:32:14] (step=0012762) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.4969673253766387, LR: 0.0003 +[2026-02-28 13:32:22] (step=0012763) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 2.4971629818039522, LR: 0.0003 +[2026-02-28 13:32:29] (step=0012764) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.497358638231266, LR: 0.0003 +[2026-02-28 13:32:37] (step=0012765) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.4975542946585794, LR: 0.0003 +[2026-02-28 13:32:45] (step=0012766) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.4977499510858934, LR: 0.0003 +[2026-02-28 13:32:53] (step=0012767) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 2.497945607513207, LR: 0.0003 +[2026-02-28 13:33:01] (step=0012768) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.4981412639405205, LR: 0.0003 +[2026-02-28 13:33:09] (step=0012769) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 2.498336920367834, LR: 0.0003 +[2026-02-28 13:33:17] (step=0012770) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 2.4985325767951476, LR: 0.0003 +[2026-02-28 13:33:24] (step=0012771) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 2.498728233222461, LR: 0.0003 +[2026-02-28 13:33:32] (step=0012772) Train Loss: 0.4728, Train Steps/Sec: 0.13, Epoch: 2.498923889649775, LR: 0.0003 +[2026-02-28 13:33:40] (step=0012773) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 2.4991195460770887, LR: 0.0003 +[2026-02-28 13:33:48] (step=0012774) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.4993152025044023, LR: 0.0003 +[2026-02-28 13:33:56] (step=0012775) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.499510858931716, LR: 0.0003 +[2026-02-28 13:34:04] (step=0012776) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.4997065153590294, LR: 0.0003 +[2026-02-28 13:34:11] (step=0012777) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.499902171786343, LR: 0.0003 +[2026-02-28 13:34:19] (step=0012778) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 2.500097828213657, LR: 0.0003 +[2026-02-28 13:34:27] (step=0012779) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.5002934846409706, LR: 0.0003 +[2026-02-28 13:34:35] (step=0012780) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.500489141068284, LR: 0.0003 +[2026-02-28 13:34:43] (step=0012781) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 2.5006847974955977, LR: 0.0003 +[2026-02-28 13:34:51] (step=0012782) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.5008804539229113, LR: 0.0003 +[2026-02-28 13:34:59] (step=0012783) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 2.501076110350225, LR: 0.0003 +[2026-02-28 13:35:06] (step=0012784) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 2.501271766777539, LR: 0.0003 +[2026-02-28 13:35:14] (step=0012785) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 2.5014674232048524, LR: 0.0003 +[2026-02-28 13:35:22] (step=0012786) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.501663079632166, LR: 0.0003 +[2026-02-28 13:35:30] (step=0012787) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.5018587360594795, LR: 0.0003 +[2026-02-28 13:35:38] (step=0012788) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 2.502054392486793, LR: 0.0003 +[2026-02-28 13:35:46] (step=0012789) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.5022500489141066, LR: 0.0003 +[2026-02-28 13:35:53] (step=0012790) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 2.5024457053414206, LR: 0.0003 +[2026-02-28 13:36:01] (step=0012791) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 2.502641361768734, LR: 0.0003 +[2026-02-28 13:36:09] (step=0012792) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.5028370181960478, LR: 0.0003 +[2026-02-28 13:36:17] (step=0012793) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.5030326746233613, LR: 0.0003 +[2026-02-28 13:36:25] (step=0012794) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 2.503228331050675, LR: 0.0003 +[2026-02-28 13:36:33] (step=0012795) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 2.5034239874779884, LR: 0.0003 +[2026-02-28 13:36:40] (step=0012796) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 2.5036196439053024, LR: 0.0003 +[2026-02-28 13:36:48] (step=0012797) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.503815300332616, LR: 0.0003 +[2026-02-28 13:36:56] (step=0012798) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.5040109567599296, LR: 0.0003 +[2026-02-28 13:37:04] (step=0012799) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.504206613187243, LR: 0.0003 +[2026-02-28 13:37:12] (step=0012800) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.5044022696145567, LR: 0.0003 +[2026-02-28 13:37:20] (step=0012801) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.5045979260418703, LR: 0.0003 +[2026-02-28 13:37:28] (step=0012802) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 2.5047935824691843, LR: 0.0003 +[2026-02-28 13:37:36] (step=0012803) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.504989238896498, LR: 0.0003 +[2026-02-28 13:37:43] (step=0012804) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 2.5051848953238114, LR: 0.0003 +[2026-02-28 13:37:51] (step=0012805) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 2.505380551751125, LR: 0.0003 +[2026-02-28 13:37:59] (step=0012806) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 2.5055762081784385, LR: 0.0003 +[2026-02-28 13:38:07] (step=0012807) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 2.5057718646057525, LR: 0.0003 +[2026-02-28 13:38:15] (step=0012808) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 2.505967521033066, LR: 0.0003 +[2026-02-28 13:38:23] (step=0012809) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.5061631774603796, LR: 0.0003 +[2026-02-28 13:38:30] (step=0012810) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 2.506358833887693, LR: 0.0003 +[2026-02-28 13:38:38] (step=0012811) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 2.5065544903150068, LR: 0.0003 +[2026-02-28 13:38:46] (step=0012812) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.5067501467423203, LR: 0.0003 +[2026-02-28 13:38:54] (step=0012813) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.5069458031696343, LR: 0.0003 +[2026-02-28 13:39:02] (step=0012814) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.507141459596948, LR: 0.0003 +[2026-02-28 13:39:10] (step=0012815) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 2.5073371160242615, LR: 0.0003 +[2026-02-28 13:39:17] (step=0012816) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.507532772451575, LR: 0.0003 +[2026-02-28 13:39:25] (step=0012817) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.5077284288788886, LR: 0.0003 +[2026-02-28 13:39:33] (step=0012818) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 2.507924085306202, LR: 0.0003 +[2026-02-28 13:39:41] (step=0012819) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 2.508119741733516, LR: 0.0003 +[2026-02-28 13:39:49] (step=0012820) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 2.5083153981608297, LR: 0.0003 +[2026-02-28 13:39:57] (step=0012821) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.5085110545881433, LR: 0.0003 +[2026-02-28 13:40:05] (step=0012822) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.508706711015457, LR: 0.0003 +[2026-02-28 13:40:12] (step=0012823) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 2.5089023674427704, LR: 0.0003 +[2026-02-28 13:40:20] (step=0012824) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.509098023870084, LR: 0.0003 +[2026-02-28 13:40:28] (step=0012825) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 2.509293680297398, LR: 0.0003 +[2026-02-28 13:40:36] (step=0012826) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 2.5094893367247115, LR: 0.0003 +[2026-02-28 13:40:44] (step=0012827) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.509684993152025, LR: 0.0003 +[2026-02-28 13:40:52] (step=0012828) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.5098806495793387, LR: 0.0003 +[2026-02-28 13:41:00] (step=0012829) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.510076306006652, LR: 0.0003 +[2026-02-28 13:41:07] (step=0012830) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 2.5102719624339658, LR: 0.0003 +[2026-02-28 13:41:15] (step=0012831) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.51046761886128, LR: 0.0003 +[2026-02-28 13:41:23] (step=0012832) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 2.5106632752885933, LR: 0.0003 +[2026-02-28 13:41:31] (step=0012833) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.510858931715907, LR: 0.0003 +[2026-02-28 13:41:39] (step=0012834) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 2.5110545881432205, LR: 0.0003 +[2026-02-28 13:41:47] (step=0012835) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 2.511250244570534, LR: 0.0003 +[2026-02-28 13:41:54] (step=0012836) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 2.5114459009978476, LR: 0.0003 +[2026-02-28 13:42:02] (step=0012837) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 2.5116415574251616, LR: 0.0003 +[2026-02-28 13:42:10] (step=0012838) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.511837213852475, LR: 0.0003 +[2026-02-28 13:42:18] (step=0012839) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.5120328702797887, LR: 0.0003 +[2026-02-28 13:42:26] (step=0012840) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.5122285267071023, LR: 0.0003 +[2026-02-28 13:42:34] (step=0012841) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 2.512424183134416, LR: 0.0003 +[2026-02-28 13:42:42] (step=0012842) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.5126198395617294, LR: 0.0003 +[2026-02-28 13:42:49] (step=0012843) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 2.5128154959890434, LR: 0.0003 +[2026-02-28 13:42:57] (step=0012844) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 2.513011152416357, LR: 0.0003 +[2026-02-28 13:43:05] (step=0012845) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 2.5132068088436705, LR: 0.0003 +[2026-02-28 13:43:13] (step=0012846) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.513402465270984, LR: 0.0003 +[2026-02-28 13:43:21] (step=0012847) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.5135981216982977, LR: 0.0003 +[2026-02-28 13:43:29] (step=0012848) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 2.5137937781256112, LR: 0.0003 +[2026-02-28 13:43:37] (step=0012849) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 2.5139894345529252, LR: 0.0003 +[2026-02-28 13:43:44] (step=0012850) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.514185090980239, LR: 0.0003 +[2026-02-28 13:43:52] (step=0012851) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.5143807474075524, LR: 0.0003 +[2026-02-28 13:44:00] (step=0012852) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 2.514576403834866, LR: 0.0003 +[2026-02-28 13:44:08] (step=0012853) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 2.5147720602621795, LR: 0.0003 +[2026-02-28 13:44:16] (step=0012854) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.514967716689493, LR: 0.0003 +[2026-02-28 13:44:24] (step=0012855) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.515163373116807, LR: 0.0003 +[2026-02-28 13:44:32] (step=0012856) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 2.5153590295441206, LR: 0.0003 +[2026-02-28 13:44:39] (step=0012857) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.515554685971434, LR: 0.0003 +[2026-02-28 13:44:47] (step=0012858) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 2.5157503423987477, LR: 0.0003 +[2026-02-28 13:44:55] (step=0012859) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 2.5159459988260613, LR: 0.0003 +[2026-02-28 13:45:03] (step=0012860) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 2.516141655253375, LR: 0.0003 +[2026-02-28 13:45:11] (step=0012861) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 2.516337311680689, LR: 0.0003 +[2026-02-28 13:45:19] (step=0012862) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.5165329681080024, LR: 0.0003 +[2026-02-28 13:45:26] (step=0012863) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 2.516728624535316, LR: 0.0003 +[2026-02-28 13:45:34] (step=0012864) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.5169242809626295, LR: 0.0003 +[2026-02-28 13:45:42] (step=0012865) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 2.517119937389943, LR: 0.0003 +[2026-02-28 13:45:50] (step=0012866) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.517315593817257, LR: 0.0003 +[2026-02-28 13:45:58] (step=0012867) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.5175112502445707, LR: 0.0003 +[2026-02-28 13:46:06] (step=0012868) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 2.5177069066718842, LR: 0.0003 +[2026-02-28 13:46:14] (step=0012869) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.517902563099198, LR: 0.0003 +[2026-02-28 13:46:21] (step=0012870) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 2.5180982195265114, LR: 0.0003 +[2026-02-28 13:46:29] (step=0012871) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.518293875953825, LR: 0.0003 +[2026-02-28 13:46:37] (step=0012872) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 2.518489532381139, LR: 0.0003 +[2026-02-28 13:46:45] (step=0012873) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.5186851888084525, LR: 0.0003 +[2026-02-28 13:46:53] (step=0012874) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.518880845235766, LR: 0.0003 +[2026-02-28 13:47:01] (step=0012875) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 2.5190765016630796, LR: 0.0003 +[2026-02-28 13:47:08] (step=0012876) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.519272158090393, LR: 0.0003 +[2026-02-28 13:47:16] (step=0012877) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 2.5194678145177067, LR: 0.0003 +[2026-02-28 13:47:24] (step=0012878) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.5196634709450207, LR: 0.0003 +[2026-02-28 13:47:32] (step=0012879) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.5198591273723343, LR: 0.0003 +[2026-02-28 13:47:40] (step=0012880) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 2.520054783799648, LR: 0.0003 +[2026-02-28 13:47:48] (step=0012881) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.5202504402269614, LR: 0.0003 +[2026-02-28 13:47:56] (step=0012882) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 2.520446096654275, LR: 0.0003 +[2026-02-28 13:48:03] (step=0012883) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 2.5206417530815886, LR: 0.0003 +[2026-02-28 13:48:11] (step=0012884) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.5208374095089026, LR: 0.0003 +[2026-02-28 13:48:19] (step=0012885) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.521033065936216, LR: 0.0003 +[2026-02-28 13:48:27] (step=0012886) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 2.5212287223635297, LR: 0.0003 +[2026-02-28 13:48:35] (step=0012887) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.5214243787908432, LR: 0.0003 +[2026-02-28 13:48:43] (step=0012888) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 2.521620035218157, LR: 0.0003 +[2026-02-28 13:48:50] (step=0012889) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 2.5218156916454704, LR: 0.0003 +[2026-02-28 13:48:58] (step=0012890) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 2.5220113480727844, LR: 0.0003 +[2026-02-28 13:49:06] (step=0012891) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.522207004500098, LR: 0.0003 +[2026-02-28 13:49:14] (step=0012892) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.5224026609274115, LR: 0.0003 +[2026-02-28 13:49:22] (step=0012893) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.522598317354725, LR: 0.0003 +[2026-02-28 13:49:30] (step=0012894) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.5227939737820386, LR: 0.0003 +[2026-02-28 13:49:38] (step=0012895) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 2.522989630209352, LR: 0.0003 +[2026-02-28 13:49:45] (step=0012896) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 2.523185286636666, LR: 0.0003 +[2026-02-28 13:49:53] (step=0012897) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 2.5233809430639798, LR: 0.0003 +[2026-02-28 13:50:01] (step=0012898) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 2.5235765994912933, LR: 0.0003 +[2026-02-28 13:50:09] (step=0012899) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 2.523772255918607, LR: 0.0003 +[2026-02-28 13:50:17] (step=0012900) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 2.5239679123459204, LR: 0.0003 +[2026-02-28 13:50:25] (step=0012901) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.524163568773234, LR: 0.0003 +[2026-02-28 13:50:33] (step=0012902) Train Loss: 0.4390, Train Steps/Sec: 0.12, Epoch: 2.524359225200548, LR: 0.0003 +[2026-02-28 13:50:40] (step=0012903) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 2.5245548816278616, LR: 0.0003 +[2026-02-28 13:50:48] (step=0012904) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.524750538055175, LR: 0.0003 +[2026-02-28 13:50:56] (step=0012905) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.5249461944824887, LR: 0.0003 +[2026-02-28 13:51:04] (step=0012906) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.5251418509098023, LR: 0.0003 +[2026-02-28 13:51:12] (step=0012907) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 2.525337507337116, LR: 0.0003 +[2026-02-28 13:51:20] (step=0012908) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 2.52553316376443, LR: 0.0003 +[2026-02-28 13:51:28] (step=0012909) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.5257288201917434, LR: 0.0003 +[2026-02-28 13:51:35] (step=0012910) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.525924476619057, LR: 0.0003 +[2026-02-28 13:51:43] (step=0012911) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 2.5261201330463705, LR: 0.0003 +[2026-02-28 13:51:51] (step=0012912) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 2.526315789473684, LR: 0.0003 +[2026-02-28 13:51:59] (step=0012913) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.5265114459009976, LR: 0.0003 +[2026-02-28 13:52:07] (step=0012914) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 2.5267071023283116, LR: 0.0003 +[2026-02-28 13:52:15] (step=0012915) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.526902758755625, LR: 0.0003 +[2026-02-28 13:52:22] (step=0012916) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 2.5270984151829388, LR: 0.0003 +[2026-02-28 13:52:30] (step=0012917) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 2.5272940716102523, LR: 0.0003 +[2026-02-28 13:52:38] (step=0012918) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.527489728037566, LR: 0.0003 +[2026-02-28 13:52:46] (step=0012919) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.52768538446488, LR: 0.0003 +[2026-02-28 13:52:54] (step=0012920) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 2.5278810408921935, LR: 0.0003 +[2026-02-28 13:53:02] (step=0012921) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 2.528076697319507, LR: 0.0003 +[2026-02-28 13:53:09] (step=0012922) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 2.5282723537468206, LR: 0.0003 +[2026-02-28 13:53:17] (step=0012923) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 2.528468010174134, LR: 0.0003 +[2026-02-28 13:53:25] (step=0012924) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 2.5286636666014477, LR: 0.0003 +[2026-02-28 13:53:33] (step=0012925) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 2.5288593230287617, LR: 0.0003 +[2026-02-28 13:53:41] (step=0012926) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.5290549794560753, LR: 0.0003 +[2026-02-28 13:53:49] (step=0012927) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.529250635883389, LR: 0.0003 +[2026-02-28 13:53:56] (step=0012928) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.5294462923107024, LR: 0.0003 +[2026-02-28 13:54:04] (step=0012929) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 2.529641948738016, LR: 0.0003 +[2026-02-28 13:54:12] (step=0012930) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 2.5298376051653295, LR: 0.0003 +[2026-02-28 13:54:20] (step=0012931) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.5300332615926435, LR: 0.0003 +[2026-02-28 13:54:28] (step=0012932) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.530228918019957, LR: 0.0003 +[2026-02-28 13:54:36] (step=0012933) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.5304245744472706, LR: 0.0003 +[2026-02-28 13:54:44] (step=0012934) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.530620230874584, LR: 0.0003 +[2026-02-28 13:54:51] (step=0012935) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.5308158873018978, LR: 0.0003 +[2026-02-28 13:54:59] (step=0012936) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 2.5310115437292113, LR: 0.0003 +[2026-02-28 13:55:07] (step=0012937) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 2.5312072001565253, LR: 0.0003 +[2026-02-28 13:55:15] (step=0012938) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 2.531402856583839, LR: 0.0003 +[2026-02-28 13:55:23] (step=0012939) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 2.5315985130111525, LR: 0.0003 +[2026-02-28 13:55:31] (step=0012940) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 2.531794169438466, LR: 0.0003 +[2026-02-28 13:55:38] (step=0012941) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 2.5319898258657796, LR: 0.0003 +[2026-02-28 13:55:46] (step=0012942) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 2.532185482293093, LR: 0.0003 +[2026-02-28 13:55:54] (step=0012943) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 2.532381138720407, LR: 0.0003 +[2026-02-28 13:56:02] (step=0012944) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.5325767951477207, LR: 0.0003 +[2026-02-28 13:56:10] (step=0012945) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 2.5327724515750343, LR: 0.0003 +[2026-02-28 13:56:18] (step=0012946) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 2.532968108002348, LR: 0.0003 +[2026-02-28 13:56:25] (step=0012947) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 2.5331637644296614, LR: 0.0003 +[2026-02-28 13:56:33] (step=0012948) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 2.533359420856975, LR: 0.0003 +[2026-02-28 13:56:41] (step=0012949) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.533555077284289, LR: 0.0003 +[2026-02-28 13:56:49] (step=0012950) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.5337507337116025, LR: 0.0003 +[2026-02-28 13:56:57] (step=0012951) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.533946390138916, LR: 0.0003 +[2026-02-28 13:57:05] (step=0012952) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.5341420465662297, LR: 0.0003 +[2026-02-28 13:57:13] (step=0012953) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.534337702993543, LR: 0.0003 +[2026-02-28 13:57:20] (step=0012954) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 2.534533359420857, LR: 0.0003 +[2026-02-28 13:57:28] (step=0012955) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.534729015848171, LR: 0.0003 +[2026-02-28 13:57:36] (step=0012956) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.5349246722754843, LR: 0.0003 +[2026-02-28 13:57:44] (step=0012957) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.535120328702798, LR: 0.0003 +[2026-02-28 13:57:52] (step=0012958) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.5353159851301115, LR: 0.0003 +[2026-02-28 13:58:00] (step=0012959) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.535511641557425, LR: 0.0003 +[2026-02-28 13:58:07] (step=0012960) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 2.5357072979847386, LR: 0.0003 +[2026-02-28 13:58:15] (step=0012961) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.5359029544120526, LR: 0.0003 +[2026-02-28 13:58:23] (step=0012962) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 2.536098610839366, LR: 0.0003 +[2026-02-28 13:58:31] (step=0012963) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 2.5362942672666797, LR: 0.0003 +[2026-02-28 13:58:39] (step=0012964) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.5364899236939933, LR: 0.0003 +[2026-02-28 13:58:47] (step=0012965) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.536685580121307, LR: 0.0003 +[2026-02-28 13:58:55] (step=0012966) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.5368812365486204, LR: 0.0003 +[2026-02-28 13:59:02] (step=0012967) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.5370768929759344, LR: 0.0003 +[2026-02-28 13:59:10] (step=0012968) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.537272549403248, LR: 0.0003 +[2026-02-28 13:59:18] (step=0012969) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 2.5374682058305615, LR: 0.0003 +[2026-02-28 13:59:26] (step=0012970) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 2.537663862257875, LR: 0.0003 +[2026-02-28 13:59:34] (step=0012971) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.5378595186851887, LR: 0.0003 +[2026-02-28 13:59:42] (step=0012972) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 2.5380551751125022, LR: 0.0003 +[2026-02-28 13:59:50] (step=0012973) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 2.5382508315398162, LR: 0.0003 +[2026-02-28 13:59:57] (step=0012974) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 2.53844648796713, LR: 0.0003 +[2026-02-28 14:00:05] (step=0012975) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.5386421443944434, LR: 0.0003 +[2026-02-28 14:00:13] (step=0012976) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 2.538837800821757, LR: 0.0003 +[2026-02-28 14:00:21] (step=0012977) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.5390334572490705, LR: 0.0003 +[2026-02-28 14:00:29] (step=0012978) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 2.5392291136763845, LR: 0.0003 +[2026-02-28 14:00:37] (step=0012979) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 2.539424770103698, LR: 0.0003 +[2026-02-28 14:00:44] (step=0012980) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.5396204265310116, LR: 0.0003 +[2026-02-28 14:00:52] (step=0012981) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 2.539816082958325, LR: 0.0003 +[2026-02-28 14:01:00] (step=0012982) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.5400117393856387, LR: 0.0003 +[2026-02-28 14:01:08] (step=0012983) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.5402073958129523, LR: 0.0003 +[2026-02-28 14:01:16] (step=0012984) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 2.5404030522402663, LR: 0.0003 +[2026-02-28 14:01:24] (step=0012985) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 2.54059870866758, LR: 0.0003 +[2026-02-28 14:01:31] (step=0012986) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.5407943650948934, LR: 0.0003 +[2026-02-28 14:01:39] (step=0012987) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 2.540990021522207, LR: 0.0003 +[2026-02-28 14:01:47] (step=0012988) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 2.5411856779495205, LR: 0.0003 +[2026-02-28 14:01:55] (step=0012989) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.541381334376834, LR: 0.0003 +[2026-02-28 14:02:03] (step=0012990) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.541576990804148, LR: 0.0003 +[2026-02-28 14:02:11] (step=0012991) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 2.5417726472314617, LR: 0.0003 +[2026-02-28 14:02:19] (step=0012992) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 2.5419683036587752, LR: 0.0003 +[2026-02-28 14:02:26] (step=0012993) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 2.542163960086089, LR: 0.0003 +[2026-02-28 14:02:34] (step=0012994) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.5423596165134024, LR: 0.0003 +[2026-02-28 14:02:42] (step=0012995) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.542555272940716, LR: 0.0003 +[2026-02-28 14:02:50] (step=0012996) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 2.54275092936803, LR: 0.0003 +[2026-02-28 14:02:58] (step=0012997) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.5429465857953435, LR: 0.0003 +[2026-02-28 14:03:06] (step=0012998) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 2.543142242222657, LR: 0.0003 +[2026-02-28 14:03:13] (step=0012999) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.5433378986499706, LR: 0.0003 +[2026-02-28 14:03:21] (step=0013000) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 2.543533555077284, LR: 0.0003 +[2026-02-28 14:03:21] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0013000/ +[2026-02-28 14:03:29] (step=0013001) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.5437292115045977, LR: 0.0003 +[2026-02-28 14:03:37] (step=0013002) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.5439248679319117, LR: 0.0003 +[2026-02-28 14:03:45] (step=0013003) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 2.5441205243592253, LR: 0.0003 +[2026-02-28 14:03:53] (step=0013004) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 2.544316180786539, LR: 0.0003 +[2026-02-28 14:04:01] (step=0013005) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 2.5445118372138524, LR: 0.0003 +[2026-02-28 14:04:08] (step=0013006) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.544707493641166, LR: 0.0003 +[2026-02-28 14:04:16] (step=0013007) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 2.5449031500684796, LR: 0.0003 +[2026-02-28 14:04:24] (step=0013008) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 2.5450988064957936, LR: 0.0003 +[2026-02-28 14:04:32] (step=0013009) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.545294462923107, LR: 0.0003 +[2026-02-28 14:04:40] (step=0013010) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 2.5454901193504207, LR: 0.0003 +[2026-02-28 14:04:48] (step=0013011) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.5456857757777342, LR: 0.0003 +[2026-02-28 14:04:56] (step=0013012) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 2.545881432205048, LR: 0.0003 +[2026-02-28 14:05:03] (step=0013013) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.5460770886323614, LR: 0.0003 +[2026-02-28 14:05:11] (step=0013014) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.5462727450596754, LR: 0.0003 +[2026-02-28 14:05:19] (step=0013015) Train Loss: 0.4738, Train Steps/Sec: 0.13, Epoch: 2.546468401486989, LR: 0.0003 +[2026-02-28 14:05:27] (step=0013016) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.5466640579143025, LR: 0.0003 +[2026-02-28 14:05:35] (step=0013017) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 2.546859714341616, LR: 0.0003 +[2026-02-28 14:05:43] (step=0013018) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 2.5470553707689296, LR: 0.0003 +[2026-02-28 14:05:51] (step=0013019) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 2.547251027196243, LR: 0.0003 +[2026-02-28 14:05:58] (step=0013020) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 2.547446683623557, LR: 0.0003 +[2026-02-28 14:06:06] (step=0013021) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 2.5476423400508708, LR: 0.0003 +[2026-02-28 14:06:14] (step=0013022) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.5478379964781843, LR: 0.0003 +[2026-02-28 14:06:22] (step=0013023) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 2.548033652905498, LR: 0.0003 +[2026-02-28 14:06:30] (step=0013024) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.5482293093328114, LR: 0.0003 +[2026-02-28 14:06:38] (step=0013025) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.548424965760125, LR: 0.0003 +[2026-02-28 14:06:45] (step=0013026) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 2.548620622187439, LR: 0.0003 +[2026-02-28 14:06:53] (step=0013027) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.5488162786147526, LR: 0.0003 +[2026-02-28 14:07:01] (step=0013028) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 2.549011935042066, LR: 0.0003 +[2026-02-28 14:07:09] (step=0013029) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.5492075914693797, LR: 0.0003 +[2026-02-28 14:07:17] (step=0013030) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.5494032478966933, LR: 0.0003 +[2026-02-28 14:07:25] (step=0013031) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 2.5495989043240073, LR: 0.0003 +[2026-02-28 14:07:33] (step=0013032) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.549794560751321, LR: 0.0003 +[2026-02-28 14:07:40] (step=0013033) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.5499902171786344, LR: 0.0003 +[2026-02-28 14:07:48] (step=0013034) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 2.550185873605948, LR: 0.0003 +[2026-02-28 14:07:56] (step=0013035) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.5503815300332615, LR: 0.0003 +[2026-02-28 14:08:04] (step=0013036) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 2.550577186460575, LR: 0.0003 +[2026-02-28 14:08:12] (step=0013037) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.550772842887889, LR: 0.0003 +[2026-02-28 14:08:20] (step=0013038) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.5509684993152026, LR: 0.0003 +[2026-02-28 14:08:27] (step=0013039) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 2.551164155742516, LR: 0.0003 +[2026-02-28 14:08:35] (step=0013040) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 2.5513598121698298, LR: 0.0003 +[2026-02-28 14:08:43] (step=0013041) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.5515554685971433, LR: 0.0003 +[2026-02-28 14:08:51] (step=0013042) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 2.551751125024457, LR: 0.0003 +[2026-02-28 14:08:59] (step=0013043) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.551946781451771, LR: 0.0003 +[2026-02-28 14:09:07] (step=0013044) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.5521424378790845, LR: 0.0003 +[2026-02-28 14:09:14] (step=0013045) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.552338094306398, LR: 0.0003 +[2026-02-28 14:09:22] (step=0013046) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.5525337507337116, LR: 0.0003 +[2026-02-28 14:09:30] (step=0013047) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.552729407161025, LR: 0.0003 +[2026-02-28 14:09:38] (step=0013048) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.5529250635883387, LR: 0.0003 +[2026-02-28 14:09:46] (step=0013049) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 2.5531207200156527, LR: 0.0003 +[2026-02-28 14:09:54] (step=0013050) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 2.5533163764429663, LR: 0.0003 +[2026-02-28 14:10:02] (step=0013051) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.55351203287028, LR: 0.0003 +[2026-02-28 14:10:10] (step=0013052) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.5537076892975934, LR: 0.0003 +[2026-02-28 14:10:17] (step=0013053) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 2.553903345724907, LR: 0.0003 +[2026-02-28 14:10:25] (step=0013054) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.5540990021522205, LR: 0.0003 +[2026-02-28 14:10:33] (step=0013055) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 2.5542946585795345, LR: 0.0003 +[2026-02-28 14:10:41] (step=0013056) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 2.554490315006848, LR: 0.0003 +[2026-02-28 14:10:49] (step=0013057) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 2.5546859714341617, LR: 0.0003 +[2026-02-28 14:10:57] (step=0013058) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 2.554881627861475, LR: 0.0003 +[2026-02-28 14:11:05] (step=0013059) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.5550772842887888, LR: 0.0003 +[2026-02-28 14:11:12] (step=0013060) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.5552729407161023, LR: 0.0003 +[2026-02-28 14:11:20] (step=0013061) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 2.5554685971434163, LR: 0.0003 +[2026-02-28 14:11:28] (step=0013062) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.55566425357073, LR: 0.0003 +[2026-02-28 14:11:36] (step=0013063) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 2.5558599099980435, LR: 0.0003 +[2026-02-28 14:11:44] (step=0013064) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.556055566425357, LR: 0.0003 +[2026-02-28 14:11:52] (step=0013065) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.5562512228526706, LR: 0.0003 +[2026-02-28 14:11:59] (step=0013066) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.556446879279984, LR: 0.0003 +[2026-02-28 14:12:07] (step=0013067) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.556642535707298, LR: 0.0003 +[2026-02-28 14:12:15] (step=0013068) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.5568381921346117, LR: 0.0003 +[2026-02-28 14:12:23] (step=0013069) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.5570338485619253, LR: 0.0003 +[2026-02-28 14:12:31] (step=0013070) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.557229504989239, LR: 0.0003 +[2026-02-28 14:12:39] (step=0013071) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 2.5574251614165524, LR: 0.0003 +[2026-02-28 14:12:47] (step=0013072) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.557620817843866, LR: 0.0003 +[2026-02-28 14:12:54] (step=0013073) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 2.55781647427118, LR: 0.0003 +[2026-02-28 14:13:02] (step=0013074) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 2.5580121306984935, LR: 0.0003 +[2026-02-28 14:13:10] (step=0013075) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.558207787125807, LR: 0.0003 +[2026-02-28 14:13:18] (step=0013076) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 2.5584034435531207, LR: 0.0003 +[2026-02-28 14:13:26] (step=0013077) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.558599099980434, LR: 0.0003 +[2026-02-28 14:13:34] (step=0013078) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.558794756407748, LR: 0.0003 +[2026-02-28 14:13:41] (step=0013079) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 2.558990412835062, LR: 0.0003 +[2026-02-28 14:13:49] (step=0013080) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 2.5591860692623754, LR: 0.0003 +[2026-02-28 14:13:57] (step=0013081) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.559381725689689, LR: 0.0003 +[2026-02-28 14:14:05] (step=0013082) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.5595773821170025, LR: 0.0003 +[2026-02-28 14:14:13] (step=0013083) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 2.559773038544316, LR: 0.0003 +[2026-02-28 14:14:21] (step=0013084) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.5599686949716296, LR: 0.0003 +[2026-02-28 14:14:28] (step=0013085) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.5601643513989436, LR: 0.0003 +[2026-02-28 14:14:36] (step=0013086) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 2.560360007826257, LR: 0.0003 +[2026-02-28 14:14:44] (step=0013087) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 2.5605556642535707, LR: 0.0003 +[2026-02-28 14:14:52] (step=0013088) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 2.5607513206808843, LR: 0.0003 +[2026-02-28 14:15:00] (step=0013089) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.560946977108198, LR: 0.0003 +[2026-02-28 14:15:08] (step=0013090) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 2.561142633535512, LR: 0.0003 +[2026-02-28 14:15:16] (step=0013091) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.5613382899628254, LR: 0.0003 +[2026-02-28 14:15:23] (step=0013092) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.561533946390139, LR: 0.0003 +[2026-02-28 14:15:31] (step=0013093) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.5617296028174525, LR: 0.0003 +[2026-02-28 14:15:39] (step=0013094) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.561925259244766, LR: 0.0003 +[2026-02-28 14:15:47] (step=0013095) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.5621209156720797, LR: 0.0003 +[2026-02-28 14:15:55] (step=0013096) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.5623165720993937, LR: 0.0003 +[2026-02-28 14:16:03] (step=0013097) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.5625122285267072, LR: 0.0003 +[2026-02-28 14:16:10] (step=0013098) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 2.562707884954021, LR: 0.0003 +[2026-02-28 14:16:18] (step=0013099) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.5629035413813344, LR: 0.0003 +[2026-02-28 14:16:26] (step=0013100) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 2.563099197808648, LR: 0.0003 +[2026-02-28 14:16:34] (step=0013101) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 2.5632948542359615, LR: 0.0003 +[2026-02-28 14:16:42] (step=0013102) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 2.5634905106632755, LR: 0.0003 +[2026-02-28 14:16:50] (step=0013103) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.563686167090589, LR: 0.0003 +[2026-02-28 14:16:58] (step=0013104) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.5638818235179026, LR: 0.0003 +[2026-02-28 14:17:05] (step=0013105) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 2.564077479945216, LR: 0.0003 +[2026-02-28 14:17:13] (step=0013106) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 2.5642731363725297, LR: 0.0003 +[2026-02-28 14:17:21] (step=0013107) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 2.5644687927998433, LR: 0.0003 +[2026-02-28 14:17:29] (step=0013108) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 2.5646644492271573, LR: 0.0003 +[2026-02-28 14:17:37] (step=0013109) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.564860105654471, LR: 0.0003 +[2026-02-28 14:17:45] (step=0013110) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.5650557620817844, LR: 0.0003 +[2026-02-28 14:17:53] (step=0013111) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.565251418509098, LR: 0.0003 +[2026-02-28 14:18:00] (step=0013112) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.5654470749364116, LR: 0.0003 +[2026-02-28 14:18:08] (step=0013113) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 2.565642731363725, LR: 0.0003 +[2026-02-28 14:18:16] (step=0013114) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 2.565838387791039, LR: 0.0003 +[2026-02-28 14:18:24] (step=0013115) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 2.5660340442183527, LR: 0.0003 +[2026-02-28 14:18:32] (step=0013116) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.5662297006456662, LR: 0.0003 +[2026-02-28 14:18:40] (step=0013117) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.56642535707298, LR: 0.0003 +[2026-02-28 14:18:48] (step=0013118) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.5666210135002934, LR: 0.0003 +[2026-02-28 14:18:55] (step=0013119) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.566816669927607, LR: 0.0003 +[2026-02-28 14:19:03] (step=0013120) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.567012326354921, LR: 0.0003 +[2026-02-28 14:19:11] (step=0013121) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.5672079827822345, LR: 0.0003 +[2026-02-28 14:19:19] (step=0013122) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 2.567403639209548, LR: 0.0003 +[2026-02-28 14:19:27] (step=0013123) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 2.5675992956368616, LR: 0.0003 +[2026-02-28 14:19:35] (step=0013124) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 2.567794952064175, LR: 0.0003 +[2026-02-28 14:19:42] (step=0013125) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.5679906084914887, LR: 0.0003 +[2026-02-28 14:19:50] (step=0013126) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.5681862649188028, LR: 0.0003 +[2026-02-28 14:19:58] (step=0013127) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.5683819213461163, LR: 0.0003 +[2026-02-28 14:20:06] (step=0013128) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 2.56857757777343, LR: 0.0003 +[2026-02-28 14:20:14] (step=0013129) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 2.5687732342007434, LR: 0.0003 +[2026-02-28 14:20:22] (step=0013130) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.568968890628057, LR: 0.0003 +[2026-02-28 14:20:30] (step=0013131) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.5691645470553706, LR: 0.0003 +[2026-02-28 14:20:37] (step=0013132) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 2.5693602034826846, LR: 0.0003 +[2026-02-28 14:20:45] (step=0013133) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 2.569555859909998, LR: 0.0003 +[2026-02-28 14:20:53] (step=0013134) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.5697515163373117, LR: 0.0003 +[2026-02-28 14:21:01] (step=0013135) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.5699471727646253, LR: 0.0003 +[2026-02-28 14:21:09] (step=0013136) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.570142829191939, LR: 0.0003 +[2026-02-28 14:21:17] (step=0013137) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 2.5703384856192524, LR: 0.0003 +[2026-02-28 14:21:24] (step=0013138) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 2.5705341420465664, LR: 0.0003 +[2026-02-28 14:21:32] (step=0013139) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 2.57072979847388, LR: 0.0003 +[2026-02-28 14:21:40] (step=0013140) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.5709254549011935, LR: 0.0003 +[2026-02-28 14:21:48] (step=0013141) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.571121111328507, LR: 0.0003 +[2026-02-28 14:21:56] (step=0013142) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.5713167677558206, LR: 0.0003 +[2026-02-28 14:22:04] (step=0013143) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 2.5715124241831346, LR: 0.0003 +[2026-02-28 14:22:11] (step=0013144) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.571708080610448, LR: 0.0003 +[2026-02-28 14:22:19] (step=0013145) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.5719037370377618, LR: 0.0003 +[2026-02-28 14:22:27] (step=0013146) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.5720993934650753, LR: 0.0003 +[2026-02-28 14:22:35] (step=0013147) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 2.572295049892389, LR: 0.0003 +[2026-02-28 14:22:43] (step=0013148) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.5724907063197024, LR: 0.0003 +[2026-02-28 14:22:51] (step=0013149) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.5726863627470165, LR: 0.0003 +[2026-02-28 14:22:58] (step=0013150) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.57288201917433, LR: 0.0003 +[2026-02-28 14:23:06] (step=0013151) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.5730776756016436, LR: 0.0003 +[2026-02-28 14:23:14] (step=0013152) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.573273332028957, LR: 0.0003 +[2026-02-28 14:23:22] (step=0013153) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.5734689884562707, LR: 0.0003 +[2026-02-28 14:23:30] (step=0013154) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.5736646448835843, LR: 0.0003 +[2026-02-28 14:23:38] (step=0013155) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 2.5738603013108983, LR: 0.0003 +[2026-02-28 14:23:46] (step=0013156) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 2.574055957738212, LR: 0.0003 +[2026-02-28 14:23:53] (step=0013157) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.5742516141655254, LR: 0.0003 +[2026-02-28 14:24:01] (step=0013158) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 2.574447270592839, LR: 0.0003 +[2026-02-28 14:24:09] (step=0013159) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 2.5746429270201525, LR: 0.0003 +[2026-02-28 14:24:17] (step=0013160) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 2.574838583447466, LR: 0.0003 +[2026-02-28 14:24:25] (step=0013161) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.57503423987478, LR: 0.0003 +[2026-02-28 14:24:33] (step=0013162) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 2.5752298963020936, LR: 0.0003 +[2026-02-28 14:24:41] (step=0013163) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.575425552729407, LR: 0.0003 +[2026-02-28 14:24:48] (step=0013164) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.5756212091567208, LR: 0.0003 +[2026-02-28 14:24:56] (step=0013165) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 2.5758168655840343, LR: 0.0003 +[2026-02-28 14:25:04] (step=0013166) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 2.576012522011348, LR: 0.0003 +[2026-02-28 14:25:12] (step=0013167) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.576208178438662, LR: 0.0003 +[2026-02-28 14:25:20] (step=0013168) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 2.5764038348659755, LR: 0.0003 +[2026-02-28 14:25:28] (step=0013169) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 2.576599491293289, LR: 0.0003 +[2026-02-28 14:25:36] (step=0013170) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 2.5767951477206026, LR: 0.0003 +[2026-02-28 14:25:43] (step=0013171) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.576990804147916, LR: 0.0003 +[2026-02-28 14:25:51] (step=0013172) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.5771864605752297, LR: 0.0003 +[2026-02-28 14:25:59] (step=0013173) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 2.5773821170025437, LR: 0.0003 +[2026-02-28 14:26:07] (step=0013174) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 2.5775777734298573, LR: 0.0003 +[2026-02-28 14:26:15] (step=0013175) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.577773429857171, LR: 0.0003 +[2026-02-28 14:26:23] (step=0013176) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 2.5779690862844844, LR: 0.0003 +[2026-02-28 14:26:30] (step=0013177) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.578164742711798, LR: 0.0003 +[2026-02-28 14:26:38] (step=0013178) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 2.5783603991391115, LR: 0.0003 +[2026-02-28 14:26:46] (step=0013179) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.5785560555664255, LR: 0.0003 +[2026-02-28 14:26:54] (step=0013180) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 2.578751711993739, LR: 0.0003 +[2026-02-28 14:27:02] (step=0013181) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.5789473684210527, LR: 0.0003 +[2026-02-28 14:27:10] (step=0013182) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 2.579143024848366, LR: 0.0003 +[2026-02-28 14:27:18] (step=0013183) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.57933868127568, LR: 0.0003 +[2026-02-28 14:27:25] (step=0013184) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.5795343377029933, LR: 0.0003 +[2026-02-28 14:27:33] (step=0013185) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 2.5797299941303073, LR: 0.0003 +[2026-02-28 14:27:41] (step=0013186) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.579925650557621, LR: 0.0003 +[2026-02-28 14:27:49] (step=0013187) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 2.5801213069849345, LR: 0.0003 +[2026-02-28 14:27:57] (step=0013188) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.580316963412248, LR: 0.0003 +[2026-02-28 14:28:05] (step=0013189) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.5805126198395616, LR: 0.0003 +[2026-02-28 14:28:12] (step=0013190) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.580708276266875, LR: 0.0003 +[2026-02-28 14:28:20] (step=0013191) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.580903932694189, LR: 0.0003 +[2026-02-28 14:28:28] (step=0013192) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 2.5810995891215027, LR: 0.0003 +[2026-02-28 14:28:36] (step=0013193) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 2.5812952455488163, LR: 0.0003 +[2026-02-28 14:28:44] (step=0013194) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 2.58149090197613, LR: 0.0003 +[2026-02-28 14:28:52] (step=0013195) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.5816865584034434, LR: 0.0003 +[2026-02-28 14:28:59] (step=0013196) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 2.581882214830757, LR: 0.0003 +[2026-02-28 14:29:07] (step=0013197) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 2.582077871258071, LR: 0.0003 +[2026-02-28 14:29:15] (step=0013198) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 2.5822735276853845, LR: 0.0003 +[2026-02-28 14:29:23] (step=0013199) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.582469184112698, LR: 0.0003 +[2026-02-28 14:29:31] (step=0013200) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 2.5826648405400117, LR: 0.0003 +[2026-02-28 14:29:39] (step=0013201) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.5828604969673252, LR: 0.0003 +[2026-02-28 14:29:46] (step=0013202) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 2.5830561533946392, LR: 0.0003 +[2026-02-28 14:29:54] (step=0013203) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 2.583251809821953, LR: 0.0003 +[2026-02-28 14:30:02] (step=0013204) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 2.5834474662492664, LR: 0.0003 +[2026-02-28 14:30:10] (step=0013205) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.58364312267658, LR: 0.0003 +[2026-02-28 14:30:18] (step=0013206) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 2.5838387791038935, LR: 0.0003 +[2026-02-28 14:30:26] (step=0013207) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 2.584034435531207, LR: 0.0003 +[2026-02-28 14:30:34] (step=0013208) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.584230091958521, LR: 0.0003 +[2026-02-28 14:30:42] (step=0013209) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.5844257483858346, LR: 0.0003 +[2026-02-28 14:30:49] (step=0013210) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 2.584621404813148, LR: 0.0003 +[2026-02-28 14:30:57] (step=0013211) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.5848170612404617, LR: 0.0003 +[2026-02-28 14:31:05] (step=0013212) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.5850127176677753, LR: 0.0003 +[2026-02-28 14:31:13] (step=0013213) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.585208374095089, LR: 0.0003 +[2026-02-28 14:31:21] (step=0013214) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 2.585404030522403, LR: 0.0003 +[2026-02-28 14:31:29] (step=0013215) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.5855996869497164, LR: 0.0003 +[2026-02-28 14:31:37] (step=0013216) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.58579534337703, LR: 0.0003 +[2026-02-28 14:31:44] (step=0013217) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 2.5859909998043435, LR: 0.0003 +[2026-02-28 14:31:52] (step=0013218) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 2.586186656231657, LR: 0.0003 +[2026-02-28 14:32:00] (step=0013219) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 2.5863823126589707, LR: 0.0003 +[2026-02-28 14:32:08] (step=0013220) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 2.5865779690862847, LR: 0.0003 +[2026-02-28 14:32:16] (step=0013221) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 2.5867736255135982, LR: 0.0003 +[2026-02-28 14:32:24] (step=0013222) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.586969281940912, LR: 0.0003 +[2026-02-28 14:32:31] (step=0013223) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.5871649383682254, LR: 0.0003 +[2026-02-28 14:32:39] (step=0013224) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 2.587360594795539, LR: 0.0003 +[2026-02-28 14:32:47] (step=0013225) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 2.5875562512228525, LR: 0.0003 +[2026-02-28 14:32:55] (step=0013226) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 2.5877519076501665, LR: 0.0003 +[2026-02-28 14:33:03] (step=0013227) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.58794756407748, LR: 0.0003 +[2026-02-28 14:33:11] (step=0013228) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.5881432205047936, LR: 0.0003 +[2026-02-28 14:33:18] (step=0013229) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 2.588338876932107, LR: 0.0003 +[2026-02-28 14:33:26] (step=0013230) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.5885345333594207, LR: 0.0003 +[2026-02-28 14:33:34] (step=0013231) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.5887301897867343, LR: 0.0003 +[2026-02-28 14:33:42] (step=0013232) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.5889258462140483, LR: 0.0003 +[2026-02-28 14:33:50] (step=0013233) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.589121502641362, LR: 0.0003 +[2026-02-28 14:33:58] (step=0013234) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 2.5893171590686754, LR: 0.0003 +[2026-02-28 14:34:06] (step=0013235) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.589512815495989, LR: 0.0003 +[2026-02-28 14:34:13] (step=0013236) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.5897084719233026, LR: 0.0003 +[2026-02-28 14:34:21] (step=0013237) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 2.589904128350616, LR: 0.0003 +[2026-02-28 14:34:29] (step=0013238) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.59009978477793, LR: 0.0003 +[2026-02-28 14:34:37] (step=0013239) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.5902954412052437, LR: 0.0003 +[2026-02-28 14:34:45] (step=0013240) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 2.5904910976325572, LR: 0.0003 +[2026-02-28 14:34:53] (step=0013241) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.590686754059871, LR: 0.0003 +[2026-02-28 14:35:00] (step=0013242) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.5908824104871844, LR: 0.0003 +[2026-02-28 14:35:08] (step=0013243) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 2.591078066914498, LR: 0.0003 +[2026-02-28 14:35:16] (step=0013244) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.591273723341812, LR: 0.0003 +[2026-02-28 14:35:24] (step=0013245) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.5914693797691255, LR: 0.0003 +[2026-02-28 14:35:32] (step=0013246) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.591665036196439, LR: 0.0003 +[2026-02-28 14:35:40] (step=0013247) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 2.5918606926237526, LR: 0.0003 +[2026-02-28 14:35:48] (step=0013248) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.592056349051066, LR: 0.0003 +[2026-02-28 14:35:55] (step=0013249) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.5922520054783798, LR: 0.0003 +[2026-02-28 14:36:03] (step=0013250) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.5924476619056938, LR: 0.0003 +[2026-02-28 14:36:11] (step=0013251) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 2.5926433183330073, LR: 0.0003 +[2026-02-28 14:36:19] (step=0013252) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.592838974760321, LR: 0.0003 +[2026-02-28 14:36:27] (step=0013253) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 2.5930346311876344, LR: 0.0003 +[2026-02-28 14:36:35] (step=0013254) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.593230287614948, LR: 0.0003 +[2026-02-28 14:36:43] (step=0013255) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.593425944042262, LR: 0.0003 +[2026-02-28 14:36:50] (step=0013256) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.5936216004695756, LR: 0.0003 +[2026-02-28 14:36:58] (step=0013257) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.593817256896889, LR: 0.0003 +[2026-02-28 14:37:06] (step=0013258) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.5940129133242027, LR: 0.0003 +[2026-02-28 14:37:14] (step=0013259) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.5942085697515163, LR: 0.0003 +[2026-02-28 14:37:22] (step=0013260) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.59440422617883, LR: 0.0003 +[2026-02-28 14:37:30] (step=0013261) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 2.594599882606144, LR: 0.0003 +[2026-02-28 14:37:37] (step=0013262) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.5947955390334574, LR: 0.0003 +[2026-02-28 14:37:45] (step=0013263) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 2.594991195460771, LR: 0.0003 +[2026-02-28 14:37:53] (step=0013264) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 2.5951868518880845, LR: 0.0003 +[2026-02-28 14:38:01] (step=0013265) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.595382508315398, LR: 0.0003 +[2026-02-28 14:38:09] (step=0013266) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.5955781647427116, LR: 0.0003 +[2026-02-28 14:38:17] (step=0013267) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.5957738211700256, LR: 0.0003 +[2026-02-28 14:38:25] (step=0013268) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 2.595969477597339, LR: 0.0003 +[2026-02-28 14:38:32] (step=0013269) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.5961651340246528, LR: 0.0003 +[2026-02-28 14:38:40] (step=0013270) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.5963607904519663, LR: 0.0003 +[2026-02-28 14:38:48] (step=0013271) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 2.59655644687928, LR: 0.0003 +[2026-02-28 14:38:56] (step=0013272) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.5967521033065935, LR: 0.0003 +[2026-02-28 14:39:04] (step=0013273) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 2.5969477597339075, LR: 0.0003 +[2026-02-28 14:39:12] (step=0013274) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 2.597143416161221, LR: 0.0003 +[2026-02-28 14:39:19] (step=0013275) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 2.5973390725885346, LR: 0.0003 +[2026-02-28 14:39:27] (step=0013276) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.597534729015848, LR: 0.0003 +[2026-02-28 14:39:35] (step=0013277) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 2.5977303854431617, LR: 0.0003 +[2026-02-28 14:39:43] (step=0013278) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 2.5979260418704753, LR: 0.0003 +[2026-02-28 14:39:51] (step=0013279) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 2.5981216982977893, LR: 0.0003 +[2026-02-28 14:39:59] (step=0013280) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.598317354725103, LR: 0.0003 +[2026-02-28 14:40:07] (step=0013281) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.5985130111524164, LR: 0.0003 +[2026-02-28 14:40:14] (step=0013282) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.59870866757973, LR: 0.0003 +[2026-02-28 14:40:22] (step=0013283) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 2.5989043240070435, LR: 0.0003 +[2026-02-28 14:40:30] (step=0013284) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 2.599099980434357, LR: 0.0003 +[2026-02-28 14:40:38] (step=0013285) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.599295636861671, LR: 0.0003 +[2026-02-28 14:40:46] (step=0013286) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.5994912932889847, LR: 0.0003 +[2026-02-28 14:40:54] (step=0013287) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 2.599686949716298, LR: 0.0003 +[2026-02-28 14:41:01] (step=0013288) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.5998826061436118, LR: 0.0003 +[2026-02-28 14:41:09] (step=0013289) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.6000782625709253, LR: 0.0003 +[2026-02-28 14:41:17] (step=0013290) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.600273918998239, LR: 0.0003 +[2026-02-28 14:41:25] (step=0013291) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.600469575425553, LR: 0.0003 +[2026-02-28 14:41:33] (step=0013292) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 2.6006652318528665, LR: 0.0003 +[2026-02-28 14:41:41] (step=0013293) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 2.60086088828018, LR: 0.0003 +[2026-02-28 14:41:49] (step=0013294) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.6010565447074936, LR: 0.0003 +[2026-02-28 14:41:56] (step=0013295) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.601252201134807, LR: 0.0003 +[2026-02-28 14:42:04] (step=0013296) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.6014478575621207, LR: 0.0003 +[2026-02-28 14:42:12] (step=0013297) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.6016435139894347, LR: 0.0003 +[2026-02-28 14:42:20] (step=0013298) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.6018391704167483, LR: 0.0003 +[2026-02-28 14:42:28] (step=0013299) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.602034826844062, LR: 0.0003 +[2026-02-28 14:42:36] (step=0013300) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 2.6022304832713754, LR: 0.0003 +[2026-02-28 14:42:44] (step=0013301) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 2.602426139698689, LR: 0.0003 +[2026-02-28 14:42:51] (step=0013302) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.6026217961260025, LR: 0.0003 +[2026-02-28 14:42:59] (step=0013303) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.6028174525533165, LR: 0.0003 +[2026-02-28 14:43:07] (step=0013304) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.60301310898063, LR: 0.0003 +[2026-02-28 14:43:15] (step=0013305) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 2.6032087654079437, LR: 0.0003 +[2026-02-28 14:43:23] (step=0013306) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 2.603404421835257, LR: 0.0003 +[2026-02-28 14:43:31] (step=0013307) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.603600078262571, LR: 0.0003 +[2026-02-28 14:43:38] (step=0013308) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.6037957346898843, LR: 0.0003 +[2026-02-28 14:43:46] (step=0013309) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.6039913911171984, LR: 0.0003 +[2026-02-28 14:43:54] (step=0013310) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 2.604187047544512, LR: 0.0003 +[2026-02-28 14:44:02] (step=0013311) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 2.6043827039718255, LR: 0.0003 +[2026-02-28 14:44:10] (step=0013312) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 2.604578360399139, LR: 0.0003 +[2026-02-28 14:44:18] (step=0013313) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.6047740168264526, LR: 0.0003 +[2026-02-28 14:44:26] (step=0013314) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 2.6049696732537666, LR: 0.0003 +[2026-02-28 14:44:33] (step=0013315) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 2.60516532968108, LR: 0.0003 +[2026-02-28 14:44:41] (step=0013316) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.6053609861083937, LR: 0.0003 +[2026-02-28 14:44:49] (step=0013317) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.6055566425357073, LR: 0.0003 +[2026-02-28 14:44:57] (step=0013318) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.605752298963021, LR: 0.0003 +[2026-02-28 14:45:05] (step=0013319) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.6059479553903344, LR: 0.0003 +[2026-02-28 14:45:13] (step=0013320) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.6061436118176484, LR: 0.0003 +[2026-02-28 14:45:21] (step=0013321) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.606339268244962, LR: 0.0003 +[2026-02-28 14:45:28] (step=0013322) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 2.6065349246722755, LR: 0.0003 +[2026-02-28 14:45:36] (step=0013323) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 2.606730581099589, LR: 0.0003 +[2026-02-28 14:45:44] (step=0013324) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.6069262375269027, LR: 0.0003 +[2026-02-28 14:45:52] (step=0013325) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.6071218939542162, LR: 0.0003 +[2026-02-28 14:46:00] (step=0013326) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.6073175503815302, LR: 0.0003 +[2026-02-28 14:46:08] (step=0013327) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.607513206808844, LR: 0.0003 +[2026-02-28 14:46:15] (step=0013328) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 2.6077088632361574, LR: 0.0003 +[2026-02-28 14:46:23] (step=0013329) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 2.607904519663471, LR: 0.0003 +[2026-02-28 14:46:31] (step=0013330) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 2.6081001760907845, LR: 0.0003 +[2026-02-28 14:46:39] (step=0013331) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.608295832518098, LR: 0.0003 +[2026-02-28 14:46:47] (step=0013332) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 2.608491488945412, LR: 0.0003 +[2026-02-28 14:46:55] (step=0013333) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.6086871453727256, LR: 0.0003 +[2026-02-28 14:47:02] (step=0013334) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 2.608882801800039, LR: 0.0003 +[2026-02-28 14:47:10] (step=0013335) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.6090784582273527, LR: 0.0003 +[2026-02-28 14:47:18] (step=0013336) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 2.6092741146546663, LR: 0.0003 +[2026-02-28 14:47:26] (step=0013337) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.60946977108198, LR: 0.0003 +[2026-02-28 14:47:34] (step=0013338) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 2.609665427509294, LR: 0.0003 +[2026-02-28 14:47:42] (step=0013339) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.6098610839366074, LR: 0.0003 +[2026-02-28 14:47:49] (step=0013340) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.610056740363921, LR: 0.0003 +[2026-02-28 14:47:57] (step=0013341) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.6102523967912346, LR: 0.0003 +[2026-02-28 14:48:05] (step=0013342) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.610448053218548, LR: 0.0003 +[2026-02-28 14:48:13] (step=0013343) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.6106437096458617, LR: 0.0003 +[2026-02-28 14:48:21] (step=0013344) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.6108393660731757, LR: 0.0003 +[2026-02-28 14:48:29] (step=0013345) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 2.6110350225004892, LR: 0.0003 +[2026-02-28 14:48:37] (step=0013346) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.611230678927803, LR: 0.0003 +[2026-02-28 14:48:45] (step=0013347) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 2.6114263353551164, LR: 0.0003 +[2026-02-28 14:48:52] (step=0013348) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 2.61162199178243, LR: 0.0003 +[2026-02-28 14:49:00] (step=0013349) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.6118176482097435, LR: 0.0003 +[2026-02-28 14:49:08] (step=0013350) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.6120133046370575, LR: 0.0003 +[2026-02-28 14:49:16] (step=0013351) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 2.612208961064371, LR: 0.0003 +[2026-02-28 14:49:24] (step=0013352) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 2.6124046174916846, LR: 0.0003 +[2026-02-28 14:49:32] (step=0013353) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.612600273918998, LR: 0.0003 +[2026-02-28 14:49:39] (step=0013354) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.6127959303463117, LR: 0.0003 +[2026-02-28 14:49:47] (step=0013355) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.6129915867736253, LR: 0.0003 +[2026-02-28 14:49:55] (step=0013356) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 2.6131872432009393, LR: 0.0003 +[2026-02-28 14:50:03] (step=0013357) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 2.613382899628253, LR: 0.0003 +[2026-02-28 14:50:11] (step=0013358) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 2.6135785560555664, LR: 0.0003 +[2026-02-28 14:50:19] (step=0013359) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 2.61377421248288, LR: 0.0003 +[2026-02-28 14:50:26] (step=0013360) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 2.6139698689101936, LR: 0.0003 +[2026-02-28 14:50:34] (step=0013361) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.614165525337507, LR: 0.0003 +[2026-02-28 14:50:42] (step=0013362) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.614361181764821, LR: 0.0003 +[2026-02-28 14:50:50] (step=0013363) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 2.6145568381921347, LR: 0.0003 +[2026-02-28 14:50:58] (step=0013364) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 2.6147524946194483, LR: 0.0003 +[2026-02-28 14:51:06] (step=0013365) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.614948151046762, LR: 0.0003 +[2026-02-28 14:51:14] (step=0013366) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.6151438074740754, LR: 0.0003 +[2026-02-28 14:51:22] (step=0013367) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.615339463901389, LR: 0.0003 +[2026-02-28 14:51:29] (step=0013368) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.615535120328703, LR: 0.0003 +[2026-02-28 14:51:37] (step=0013369) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 2.6157307767560165, LR: 0.0003 +[2026-02-28 14:51:45] (step=0013370) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 2.61592643318333, LR: 0.0003 +[2026-02-28 14:51:53] (step=0013371) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 2.6161220896106436, LR: 0.0003 +[2026-02-28 14:52:01] (step=0013372) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 2.616317746037957, LR: 0.0003 +[2026-02-28 14:52:09] (step=0013373) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 2.616513402465271, LR: 0.0003 +[2026-02-28 14:52:16] (step=0013374) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.6167090588925848, LR: 0.0003 +[2026-02-28 14:52:24] (step=0013375) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 2.6169047153198983, LR: 0.0003 +[2026-02-28 14:52:32] (step=0013376) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 2.617100371747212, LR: 0.0003 +[2026-02-28 14:52:40] (step=0013377) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 2.6172960281745254, LR: 0.0003 +[2026-02-28 14:52:48] (step=0013378) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.617491684601839, LR: 0.0003 +[2026-02-28 14:52:56] (step=0013379) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.617687341029153, LR: 0.0003 +[2026-02-28 14:53:04] (step=0013380) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 2.6178829974564666, LR: 0.0003 +[2026-02-28 14:53:11] (step=0013381) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.61807865388378, LR: 0.0003 +[2026-02-28 14:53:19] (step=0013382) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 2.6182743103110937, LR: 0.0003 +[2026-02-28 14:53:27] (step=0013383) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.6184699667384073, LR: 0.0003 +[2026-02-28 14:53:35] (step=0013384) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 2.618665623165721, LR: 0.0003 +[2026-02-28 14:53:43] (step=0013385) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.618861279593035, LR: 0.0003 +[2026-02-28 14:53:51] (step=0013386) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.6190569360203484, LR: 0.0003 +[2026-02-28 14:53:58] (step=0013387) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.619252592447662, LR: 0.0003 +[2026-02-28 14:54:06] (step=0013388) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 2.6194482488749755, LR: 0.0003 +[2026-02-28 14:54:14] (step=0013389) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.619643905302289, LR: 0.0003 +[2026-02-28 14:54:22] (step=0013390) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 2.6198395617296026, LR: 0.0003 +[2026-02-28 14:54:30] (step=0013391) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 2.6200352181569166, LR: 0.0003 +[2026-02-28 14:54:38] (step=0013392) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 2.62023087458423, LR: 0.0003 +[2026-02-28 14:54:46] (step=0013393) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 2.6204265310115438, LR: 0.0003 +[2026-02-28 14:54:53] (step=0013394) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 2.6206221874388573, LR: 0.0003 +[2026-02-28 14:55:01] (step=0013395) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 2.620817843866171, LR: 0.0003 +[2026-02-28 14:55:09] (step=0013396) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 2.6210135002934845, LR: 0.0003 +[2026-02-28 14:55:17] (step=0013397) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.6212091567207985, LR: 0.0003 +[2026-02-28 14:55:25] (step=0013398) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 2.621404813148112, LR: 0.0003 +[2026-02-28 14:55:33] (step=0013399) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 2.6216004695754256, LR: 0.0003 +[2026-02-28 14:55:41] (step=0013400) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.621796126002739, LR: 0.0003 +[2026-02-28 14:55:48] (step=0013401) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.6219917824300527, LR: 0.0003 +[2026-02-28 14:55:56] (step=0013402) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 2.6221874388573663, LR: 0.0003 +[2026-02-28 14:56:04] (step=0013403) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.6223830952846803, LR: 0.0003 +[2026-02-28 14:56:12] (step=0013404) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.622578751711994, LR: 0.0003 +[2026-02-28 14:56:20] (step=0013405) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 2.6227744081393074, LR: 0.0003 +[2026-02-28 14:56:28] (step=0013406) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 2.622970064566621, LR: 0.0003 +[2026-02-28 14:56:35] (step=0013407) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 2.6231657209939345, LR: 0.0003 +[2026-02-28 14:56:43] (step=0013408) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.623361377421248, LR: 0.0003 +[2026-02-28 14:56:51] (step=0013409) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 2.623557033848562, LR: 0.0003 +[2026-02-28 14:56:59] (step=0013410) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 2.6237526902758757, LR: 0.0003 +[2026-02-28 14:57:07] (step=0013411) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 2.623948346703189, LR: 0.0003 +[2026-02-28 14:57:15] (step=0013412) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.624144003130503, LR: 0.0003 +[2026-02-28 14:57:23] (step=0013413) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.6243396595578163, LR: 0.0003 +[2026-02-28 14:57:30] (step=0013414) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.62453531598513, LR: 0.0003 +[2026-02-28 14:57:38] (step=0013415) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.624730972412444, LR: 0.0003 +[2026-02-28 14:57:46] (step=0013416) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 2.6249266288397575, LR: 0.0003 +[2026-02-28 14:57:54] (step=0013417) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.625122285267071, LR: 0.0003 +[2026-02-28 14:58:02] (step=0013418) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.6253179416943846, LR: 0.0003 +[2026-02-28 14:58:10] (step=0013419) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 2.625513598121698, LR: 0.0003 +[2026-02-28 14:58:17] (step=0013420) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.6257092545490117, LR: 0.0003 +[2026-02-28 14:58:25] (step=0013421) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 2.6259049109763257, LR: 0.0003 +[2026-02-28 14:58:33] (step=0013422) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 2.6261005674036393, LR: 0.0003 +[2026-02-28 14:58:41] (step=0013423) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.626296223830953, LR: 0.0003 +[2026-02-28 14:58:49] (step=0013424) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 2.6264918802582664, LR: 0.0003 +[2026-02-28 14:58:57] (step=0013425) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 2.62668753668558, LR: 0.0003 +[2026-02-28 14:59:05] (step=0013426) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 2.626883193112894, LR: 0.0003 +[2026-02-28 14:59:12] (step=0013427) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.6270788495402075, LR: 0.0003 +[2026-02-28 14:59:20] (step=0013428) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 2.627274505967521, LR: 0.0003 +[2026-02-28 14:59:28] (step=0013429) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 2.6274701623948347, LR: 0.0003 +[2026-02-28 14:59:36] (step=0013430) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.6276658188221482, LR: 0.0003 +[2026-02-28 14:59:44] (step=0013431) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 2.627861475249462, LR: 0.0003 +[2026-02-28 14:59:52] (step=0013432) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.628057131676776, LR: 0.0003 +[2026-02-28 14:59:59] (step=0013433) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 2.6282527881040894, LR: 0.0003 +[2026-02-28 15:00:07] (step=0013434) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 2.628448444531403, LR: 0.0003 +[2026-02-28 15:00:15] (step=0013435) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 2.6286441009587165, LR: 0.0003 +[2026-02-28 15:00:23] (step=0013436) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 2.62883975738603, LR: 0.0003 +[2026-02-28 15:00:31] (step=0013437) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 2.6290354138133436, LR: 0.0003 +[2026-02-28 15:00:39] (step=0013438) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.6292310702406576, LR: 0.0003 +[2026-02-28 15:00:46] (step=0013439) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.629426726667971, LR: 0.0003 +[2026-02-28 15:00:54] (step=0013440) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.6296223830952847, LR: 0.0003 +[2026-02-28 15:01:02] (step=0013441) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.6298180395225983, LR: 0.0003 +[2026-02-28 15:01:10] (step=0013442) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 2.630013695949912, LR: 0.0003 +[2026-02-28 15:01:18] (step=0013443) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 2.6302093523772254, LR: 0.0003 +[2026-02-28 15:01:26] (step=0013444) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.6304050088045394, LR: 0.0003 +[2026-02-28 15:01:34] (step=0013445) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.630600665231853, LR: 0.0003 +[2026-02-28 15:01:41] (step=0013446) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.6307963216591665, LR: 0.0003 +[2026-02-28 15:01:49] (step=0013447) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 2.63099197808648, LR: 0.0003 +[2026-02-28 15:01:57] (step=0013448) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.6311876345137937, LR: 0.0003 +[2026-02-28 15:02:05] (step=0013449) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.6313832909411072, LR: 0.0003 +[2026-02-28 15:02:13] (step=0013450) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 2.6315789473684212, LR: 0.0003 +[2026-02-28 15:02:21] (step=0013451) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 2.631774603795735, LR: 0.0003 +[2026-02-28 15:02:29] (step=0013452) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.6319702602230484, LR: 0.0003 +[2026-02-28 15:02:36] (step=0013453) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.632165916650362, LR: 0.0003 +[2026-02-28 15:02:44] (step=0013454) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.6323615730776755, LR: 0.0003 +[2026-02-28 15:02:52] (step=0013455) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.632557229504989, LR: 0.0003 +[2026-02-28 15:03:00] (step=0013456) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 2.632752885932303, LR: 0.0003 +[2026-02-28 15:03:08] (step=0013457) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.6329485423596166, LR: 0.0003 +[2026-02-28 15:03:16] (step=0013458) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.63314419878693, LR: 0.0003 +[2026-02-28 15:03:23] (step=0013459) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 2.6333398552142437, LR: 0.0003 +[2026-02-28 15:03:31] (step=0013460) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 2.6335355116415573, LR: 0.0003 +[2026-02-28 15:03:39] (step=0013461) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 2.633731168068871, LR: 0.0003 +[2026-02-28 15:03:47] (step=0013462) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 2.633926824496185, LR: 0.0003 +[2026-02-28 15:03:55] (step=0013463) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.6341224809234984, LR: 0.0003 +[2026-02-28 15:04:03] (step=0013464) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 2.634318137350812, LR: 0.0003 +[2026-02-28 15:04:10] (step=0013465) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 2.6345137937781256, LR: 0.0003 +[2026-02-28 15:04:18] (step=0013466) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.634709450205439, LR: 0.0003 +[2026-02-28 15:04:26] (step=0013467) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.6349051066327527, LR: 0.0003 +[2026-02-28 15:04:34] (step=0013468) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 2.6351007630600667, LR: 0.0003 +[2026-02-28 15:04:42] (step=0013469) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.6352964194873802, LR: 0.0003 +[2026-02-28 15:04:50] (step=0013470) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.635492075914694, LR: 0.0003 +[2026-02-28 15:04:58] (step=0013471) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 2.6356877323420074, LR: 0.0003 +[2026-02-28 15:05:06] (step=0013472) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.635883388769321, LR: 0.0003 +[2026-02-28 15:05:13] (step=0013473) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 2.6360790451966345, LR: 0.0003 +[2026-02-28 15:05:21] (step=0013474) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.6362747016239485, LR: 0.0003 +[2026-02-28 15:05:29] (step=0013475) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 2.636470358051262, LR: 0.0003 +[2026-02-28 15:05:37] (step=0013476) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 2.6366660144785756, LR: 0.0003 +[2026-02-28 15:05:45] (step=0013477) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.636861670905889, LR: 0.0003 +[2026-02-28 15:05:53] (step=0013478) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 2.6370573273332027, LR: 0.0003 +[2026-02-28 15:06:00] (step=0013479) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 2.6372529837605163, LR: 0.0003 +[2026-02-28 15:06:08] (step=0013480) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 2.6374486401878303, LR: 0.0003 +[2026-02-28 15:06:16] (step=0013481) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 2.637644296615144, LR: 0.0003 +[2026-02-28 15:06:24] (step=0013482) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.6378399530424574, LR: 0.0003 +[2026-02-28 15:06:32] (step=0013483) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 2.638035609469771, LR: 0.0003 +[2026-02-28 15:06:40] (step=0013484) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.6382312658970846, LR: 0.0003 +[2026-02-28 15:06:48] (step=0013485) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 2.6384269223243986, LR: 0.0003 +[2026-02-28 15:06:55] (step=0013486) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.638622578751712, LR: 0.0003 +[2026-02-28 15:07:03] (step=0013487) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 2.6388182351790257, LR: 0.0003 +[2026-02-28 15:07:11] (step=0013488) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 2.6390138916063393, LR: 0.0003 +[2026-02-28 15:07:19] (step=0013489) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.639209548033653, LR: 0.0003 +[2026-02-28 15:07:27] (step=0013490) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 2.6394052044609664, LR: 0.0003 +[2026-02-28 15:07:35] (step=0013491) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.6396008608882804, LR: 0.0003 +[2026-02-28 15:07:43] (step=0013492) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.639796517315594, LR: 0.0003 +[2026-02-28 15:07:50] (step=0013493) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 2.6399921737429075, LR: 0.0003 +[2026-02-28 15:07:58] (step=0013494) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 2.640187830170221, LR: 0.0003 +[2026-02-28 15:08:06] (step=0013495) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 2.6403834865975346, LR: 0.0003 +[2026-02-28 15:08:14] (step=0013496) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 2.640579143024848, LR: 0.0003 +[2026-02-28 15:08:22] (step=0013497) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.640774799452162, LR: 0.0003 +[2026-02-28 15:08:30] (step=0013498) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 2.6409704558794758, LR: 0.0003 +[2026-02-28 15:08:37] (step=0013499) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.6411661123067893, LR: 0.0003 +[2026-02-28 15:08:45] (step=0013500) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.641361768734103, LR: 0.0003 +[2026-02-28 15:08:45] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0013500/ +[2026-02-28 15:08:53] (step=0013501) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.6415574251614165, LR: 0.0003 +[2026-02-28 15:09:01] (step=0013502) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 2.64175308158873, LR: 0.0003 +[2026-02-28 15:09:09] (step=0013503) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 2.641948738016044, LR: 0.0003 +[2026-02-28 15:09:17] (step=0013504) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.6421443944433576, LR: 0.0003 +[2026-02-28 15:09:25] (step=0013505) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.642340050870671, LR: 0.0003 +[2026-02-28 15:09:32] (step=0013506) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.6425357072979847, LR: 0.0003 +[2026-02-28 15:09:40] (step=0013507) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 2.6427313637252983, LR: 0.0003 +[2026-02-28 15:09:48] (step=0013508) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 2.642927020152612, LR: 0.0003 +[2026-02-28 15:09:56] (step=0013509) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 2.643122676579926, LR: 0.0003 +[2026-02-28 15:10:04] (step=0013510) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.6433183330072394, LR: 0.0003 +[2026-02-28 15:10:12] (step=0013511) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 2.643513989434553, LR: 0.0003 +[2026-02-28 15:10:19] (step=0013512) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 2.6437096458618665, LR: 0.0003 +[2026-02-28 15:10:27] (step=0013513) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.64390530228918, LR: 0.0003 +[2026-02-28 15:10:35] (step=0013514) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 2.6441009587164936, LR: 0.0003 +[2026-02-28 15:10:43] (step=0013515) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.6442966151438076, LR: 0.0003 +[2026-02-28 15:10:51] (step=0013516) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 2.644492271571121, LR: 0.0003 +[2026-02-28 15:10:59] (step=0013517) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 2.6446879279984348, LR: 0.0003 +[2026-02-28 15:11:07] (step=0013518) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 2.6448835844257483, LR: 0.0003 +[2026-02-28 15:11:14] (step=0013519) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 2.645079240853062, LR: 0.0003 +[2026-02-28 15:11:22] (step=0013520) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.6452748972803755, LR: 0.0003 +[2026-02-28 15:11:30] (step=0013521) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.6454705537076895, LR: 0.0003 +[2026-02-28 15:11:38] (step=0013522) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 2.645666210135003, LR: 0.0003 +[2026-02-28 15:11:46] (step=0013523) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 2.6458618665623166, LR: 0.0003 +[2026-02-28 15:11:54] (step=0013524) Train Loss: 0.4732, Train Steps/Sec: 0.13, Epoch: 2.64605752298963, LR: 0.0003 +[2026-02-28 15:12:02] (step=0013525) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.6462531794169437, LR: 0.0003 +[2026-02-28 15:12:09] (step=0013526) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.6464488358442573, LR: 0.0003 +[2026-02-28 15:12:17] (step=0013527) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 2.6466444922715713, LR: 0.0003 +[2026-02-28 15:12:25] (step=0013528) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.646840148698885, LR: 0.0003 +[2026-02-28 15:12:33] (step=0013529) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 2.6470358051261984, LR: 0.0003 +[2026-02-28 15:12:41] (step=0013530) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 2.647231461553512, LR: 0.0003 +[2026-02-28 15:12:49] (step=0013531) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 2.6474271179808255, LR: 0.0003 +[2026-02-28 15:12:56] (step=0013532) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 2.647622774408139, LR: 0.0003 +[2026-02-28 15:13:04] (step=0013533) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 2.647818430835453, LR: 0.0003 +[2026-02-28 15:13:12] (step=0013534) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.6480140872627667, LR: 0.0003 +[2026-02-28 15:13:20] (step=0013535) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.64820974369008, LR: 0.0003 +[2026-02-28 15:13:28] (step=0013536) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.648405400117394, LR: 0.0003 +[2026-02-28 15:13:36] (step=0013537) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.6486010565447073, LR: 0.0003 +[2026-02-28 15:13:43] (step=0013538) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.6487967129720213, LR: 0.0003 +[2026-02-28 15:13:51] (step=0013539) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 2.648992369399335, LR: 0.0003 +[2026-02-28 15:13:59] (step=0013540) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.6491880258266485, LR: 0.0003 +[2026-02-28 15:14:07] (step=0013541) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.649383682253962, LR: 0.0003 +[2026-02-28 15:14:15] (step=0013542) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.6495793386812756, LR: 0.0003 +[2026-02-28 15:14:23] (step=0013543) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 2.649774995108589, LR: 0.0003 +[2026-02-28 15:14:31] (step=0013544) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 2.649970651535903, LR: 0.0003 +[2026-02-28 15:14:38] (step=0013545) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.6501663079632167, LR: 0.0003 +[2026-02-28 15:14:46] (step=0013546) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.6503619643905303, LR: 0.0003 +[2026-02-28 15:14:54] (step=0013547) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.650557620817844, LR: 0.0003 +[2026-02-28 15:15:02] (step=0013548) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.6507532772451574, LR: 0.0003 +[2026-02-28 15:15:10] (step=0013549) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 2.650948933672471, LR: 0.0003 +[2026-02-28 15:15:18] (step=0013550) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.651144590099785, LR: 0.0003 +[2026-02-28 15:15:26] (step=0013551) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.6513402465270985, LR: 0.0003 +[2026-02-28 15:15:33] (step=0013552) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.651535902954412, LR: 0.0003 +[2026-02-28 15:15:41] (step=0013553) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 2.6517315593817257, LR: 0.0003 +[2026-02-28 15:15:49] (step=0013554) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.6519272158090392, LR: 0.0003 +[2026-02-28 15:15:57] (step=0013555) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.652122872236353, LR: 0.0003 +[2026-02-28 15:16:05] (step=0013556) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 2.652318528663667, LR: 0.0003 +[2026-02-28 15:16:13] (step=0013557) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.6525141850909804, LR: 0.0003 +[2026-02-28 15:16:20] (step=0013558) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 2.652709841518294, LR: 0.0003 +[2026-02-28 15:16:28] (step=0013559) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.6529054979456075, LR: 0.0003 +[2026-02-28 15:16:36] (step=0013560) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 2.653101154372921, LR: 0.0003 +[2026-02-28 15:16:44] (step=0013561) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 2.6532968108002346, LR: 0.0003 +[2026-02-28 15:16:52] (step=0013562) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.6534924672275486, LR: 0.0003 +[2026-02-28 15:17:00] (step=0013563) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.653688123654862, LR: 0.0003 +[2026-02-28 15:17:07] (step=0013564) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 2.6538837800821757, LR: 0.0003 +[2026-02-28 15:17:15] (step=0013565) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 2.6540794365094893, LR: 0.0003 +[2026-02-28 15:17:23] (step=0013566) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.654275092936803, LR: 0.0003 +[2026-02-28 15:17:31] (step=0013567) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.6544707493641164, LR: 0.0003 +[2026-02-28 15:17:39] (step=0013568) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.6546664057914304, LR: 0.0003 +[2026-02-28 15:17:47] (step=0013569) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.654862062218744, LR: 0.0003 +[2026-02-28 15:17:55] (step=0013570) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.6550577186460576, LR: 0.0003 +[2026-02-28 15:18:02] (step=0013571) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 2.655253375073371, LR: 0.0003 +[2026-02-28 15:18:10] (step=0013572) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 2.6554490315006847, LR: 0.0003 +[2026-02-28 15:18:18] (step=0013573) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.6556446879279982, LR: 0.0003 +[2026-02-28 15:18:26] (step=0013574) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.6558403443553122, LR: 0.0003 +[2026-02-28 15:18:34] (step=0013575) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.656036000782626, LR: 0.0003 +[2026-02-28 15:18:42] (step=0013576) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 2.6562316572099394, LR: 0.0003 +[2026-02-28 15:18:50] (step=0013577) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 2.656427313637253, LR: 0.0003 +[2026-02-28 15:18:57] (step=0013578) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.6566229700645665, LR: 0.0003 +[2026-02-28 15:19:05] (step=0013579) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.65681862649188, LR: 0.0003 +[2026-02-28 15:19:13] (step=0013580) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.657014282919194, LR: 0.0003 +[2026-02-28 15:19:21] (step=0013581) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 2.6572099393465076, LR: 0.0003 +[2026-02-28 15:19:29] (step=0013582) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 2.657405595773821, LR: 0.0003 +[2026-02-28 15:19:37] (step=0013583) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.6576012522011347, LR: 0.0003 +[2026-02-28 15:19:44] (step=0013584) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.6577969086284483, LR: 0.0003 +[2026-02-28 15:19:52] (step=0013585) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 2.657992565055762, LR: 0.0003 +[2026-02-28 15:20:00] (step=0013586) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.658188221483076, LR: 0.0003 +[2026-02-28 15:20:08] (step=0013587) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 2.6583838779103894, LR: 0.0003 +[2026-02-28 15:20:16] (step=0013588) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.658579534337703, LR: 0.0003 +[2026-02-28 15:20:24] (step=0013589) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 2.6587751907650166, LR: 0.0003 +[2026-02-28 15:20:32] (step=0013590) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.65897084719233, LR: 0.0003 +[2026-02-28 15:20:39] (step=0013591) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.6591665036196437, LR: 0.0003 +[2026-02-28 15:20:47] (step=0013592) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 2.6593621600469577, LR: 0.0003 +[2026-02-28 15:20:55] (step=0013593) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.6595578164742713, LR: 0.0003 +[2026-02-28 15:21:03] (step=0013594) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.659753472901585, LR: 0.0003 +[2026-02-28 15:21:11] (step=0013595) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.6599491293288984, LR: 0.0003 +[2026-02-28 15:21:19] (step=0013596) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.660144785756212, LR: 0.0003 +[2026-02-28 15:21:27] (step=0013597) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 2.660340442183526, LR: 0.0003 +[2026-02-28 15:21:34] (step=0013598) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.6605360986108395, LR: 0.0003 +[2026-02-28 15:21:42] (step=0013599) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.660731755038153, LR: 0.0003 +[2026-02-28 15:21:50] (step=0013600) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.6609274114654666, LR: 0.0003 +[2026-02-28 15:21:58] (step=0013601) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.66112306789278, LR: 0.0003 +[2026-02-28 15:22:06] (step=0013602) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 2.6613187243200938, LR: 0.0003 +[2026-02-28 15:22:14] (step=0013603) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.6615143807474078, LR: 0.0003 +[2026-02-28 15:22:21] (step=0013604) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.6617100371747213, LR: 0.0003 +[2026-02-28 15:22:29] (step=0013605) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 2.661905693602035, LR: 0.0003 +[2026-02-28 15:22:37] (step=0013606) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.6621013500293484, LR: 0.0003 +[2026-02-28 15:22:45] (step=0013607) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.662297006456662, LR: 0.0003 +[2026-02-28 15:22:53] (step=0013608) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 2.6624926628839756, LR: 0.0003 +[2026-02-28 15:23:01] (step=0013609) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.6626883193112896, LR: 0.0003 +[2026-02-28 15:23:09] (step=0013610) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 2.662883975738603, LR: 0.0003 +[2026-02-28 15:23:16] (step=0013611) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.6630796321659167, LR: 0.0003 +[2026-02-28 15:23:24] (step=0013612) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 2.6632752885932303, LR: 0.0003 +[2026-02-28 15:23:32] (step=0013613) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 2.663470945020544, LR: 0.0003 +[2026-02-28 15:23:40] (step=0013614) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.6636666014478574, LR: 0.0003 +[2026-02-28 15:23:48] (step=0013615) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 2.6638622578751714, LR: 0.0003 +[2026-02-28 15:23:56] (step=0013616) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.664057914302485, LR: 0.0003 +[2026-02-28 15:24:03] (step=0013617) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.6642535707297985, LR: 0.0003 +[2026-02-28 15:24:11] (step=0013618) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.664449227157112, LR: 0.0003 +[2026-02-28 15:24:19] (step=0013619) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.6646448835844256, LR: 0.0003 +[2026-02-28 15:24:27] (step=0013620) Train Loss: 0.4444, Train Steps/Sec: 0.12, Epoch: 2.664840540011739, LR: 0.0003 +[2026-02-28 15:24:35] (step=0013621) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.665036196439053, LR: 0.0003 +[2026-02-28 15:24:43] (step=0013622) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 2.6652318528663668, LR: 0.0003 +[2026-02-28 15:24:51] (step=0013623) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 2.6654275092936803, LR: 0.0003 +[2026-02-28 15:24:58] (step=0013624) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 2.665623165720994, LR: 0.0003 +[2026-02-28 15:25:06] (step=0013625) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.6658188221483075, LR: 0.0003 +[2026-02-28 15:25:14] (step=0013626) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 2.666014478575621, LR: 0.0003 +[2026-02-28 15:25:22] (step=0013627) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.666210135002935, LR: 0.0003 +[2026-02-28 15:25:30] (step=0013628) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.6664057914302486, LR: 0.0003 +[2026-02-28 15:25:38] (step=0013629) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 2.666601447857562, LR: 0.0003 +[2026-02-28 15:25:46] (step=0013630) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.6667971042848757, LR: 0.0003 +[2026-02-28 15:25:53] (step=0013631) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.6669927607121893, LR: 0.0003 +[2026-02-28 15:26:01] (step=0013632) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 2.667188417139503, LR: 0.0003 +[2026-02-28 15:26:09] (step=0013633) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.667384073566817, LR: 0.0003 +[2026-02-28 15:26:17] (step=0013634) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 2.6675797299941304, LR: 0.0003 +[2026-02-28 15:26:25] (step=0013635) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.667775386421444, LR: 0.0003 +[2026-02-28 15:26:33] (step=0013636) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 2.6679710428487575, LR: 0.0003 +[2026-02-28 15:26:41] (step=0013637) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 2.668166699276071, LR: 0.0003 +[2026-02-28 15:26:48] (step=0013638) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.6683623557033846, LR: 0.0003 +[2026-02-28 15:26:56] (step=0013639) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 2.6685580121306987, LR: 0.0003 +[2026-02-28 15:27:04] (step=0013640) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.668753668558012, LR: 0.0003 +[2026-02-28 15:27:12] (step=0013641) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 2.6689493249853258, LR: 0.0003 +[2026-02-28 15:27:20] (step=0013642) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.6691449814126393, LR: 0.0003 +[2026-02-28 15:27:28] (step=0013643) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 2.669340637839953, LR: 0.0003 +[2026-02-28 15:27:36] (step=0013644) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 2.6695362942672665, LR: 0.0003 +[2026-02-28 15:27:43] (step=0013645) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 2.6697319506945805, LR: 0.0003 +[2026-02-28 15:27:51] (step=0013646) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 2.669927607121894, LR: 0.0003 +[2026-02-28 15:27:59] (step=0013647) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 2.6701232635492076, LR: 0.0003 +[2026-02-28 15:28:07] (step=0013648) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 2.670318919976521, LR: 0.0003 +[2026-02-28 15:28:15] (step=0013649) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 2.6705145764038347, LR: 0.0003 +[2026-02-28 15:28:23] (step=0013650) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.6707102328311487, LR: 0.0003 +[2026-02-28 15:28:30] (step=0013651) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.6709058892584623, LR: 0.0003 +[2026-02-28 15:28:38] (step=0013652) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.671101545685776, LR: 0.0003 +[2026-02-28 15:28:46] (step=0013653) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.6712972021130894, LR: 0.0003 +[2026-02-28 15:28:54] (step=0013654) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 2.671492858540403, LR: 0.0003 +[2026-02-28 15:29:02] (step=0013655) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.6716885149677165, LR: 0.0003 +[2026-02-28 15:29:10] (step=0013656) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 2.6718841713950305, LR: 0.0003 +[2026-02-28 15:29:17] (step=0013657) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.672079827822344, LR: 0.0003 +[2026-02-28 15:29:25] (step=0013658) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.6722754842496577, LR: 0.0003 +[2026-02-28 15:29:33] (step=0013659) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 2.6724711406769712, LR: 0.0003 +[2026-02-28 15:29:41] (step=0013660) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.672666797104285, LR: 0.0003 +[2026-02-28 15:29:49] (step=0013661) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 2.6728624535315983, LR: 0.0003 +[2026-02-28 15:29:57] (step=0013662) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 2.6730581099589124, LR: 0.0003 +[2026-02-28 15:30:04] (step=0013663) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 2.673253766386226, LR: 0.0003 +[2026-02-28 15:30:12] (step=0013664) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 2.6734494228135395, LR: 0.0003 +[2026-02-28 15:30:20] (step=0013665) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.673645079240853, LR: 0.0003 +[2026-02-28 15:30:28] (step=0013666) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.6738407356681666, LR: 0.0003 +[2026-02-28 15:30:36] (step=0013667) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 2.67403639209548, LR: 0.0003 +[2026-02-28 15:30:44] (step=0013668) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 2.674232048522794, LR: 0.0003 +[2026-02-28 15:30:52] (step=0013669) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 2.6744277049501077, LR: 0.0003 +[2026-02-28 15:30:59] (step=0013670) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.6746233613774213, LR: 0.0003 +[2026-02-28 15:31:07] (step=0013671) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.674819017804735, LR: 0.0003 +[2026-02-28 15:31:15] (step=0013672) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 2.6750146742320484, LR: 0.0003 +[2026-02-28 15:31:23] (step=0013673) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 2.675210330659362, LR: 0.0003 +[2026-02-28 15:31:31] (step=0013674) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 2.675405987086676, LR: 0.0003 +[2026-02-28 15:31:39] (step=0013675) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 2.6756016435139895, LR: 0.0003 +[2026-02-28 15:31:47] (step=0013676) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 2.675797299941303, LR: 0.0003 +[2026-02-28 15:31:54] (step=0013677) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 2.6759929563686167, LR: 0.0003 +[2026-02-28 15:32:02] (step=0013678) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 2.6761886127959302, LR: 0.0003 +[2026-02-28 15:32:10] (step=0013679) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.676384269223244, LR: 0.0003 +[2026-02-28 15:32:18] (step=0013680) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 2.676579925650558, LR: 0.0003 +[2026-02-28 15:32:26] (step=0013681) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.6767755820778714, LR: 0.0003 +[2026-02-28 15:32:34] (step=0013682) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 2.676971238505185, LR: 0.0003 +[2026-02-28 15:32:42] (step=0013683) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.6771668949324985, LR: 0.0003 +[2026-02-28 15:32:49] (step=0013684) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.677362551359812, LR: 0.0003 +[2026-02-28 15:32:57] (step=0013685) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.6775582077871256, LR: 0.0003 +[2026-02-28 15:33:05] (step=0013686) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 2.6777538642144396, LR: 0.0003 +[2026-02-28 15:33:13] (step=0013687) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 2.677949520641753, LR: 0.0003 +[2026-02-28 15:33:21] (step=0013688) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 2.6781451770690667, LR: 0.0003 +[2026-02-28 15:33:29] (step=0013689) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 2.6783408334963803, LR: 0.0003 +[2026-02-28 15:33:36] (step=0013690) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 2.678536489923694, LR: 0.0003 +[2026-02-28 15:33:44] (step=0013691) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.6787321463510074, LR: 0.0003 +[2026-02-28 15:33:52] (step=0013692) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 2.6789278027783214, LR: 0.0003 +[2026-02-28 15:34:00] (step=0013693) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.679123459205635, LR: 0.0003 +[2026-02-28 15:34:08] (step=0013694) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.6793191156329486, LR: 0.0003 +[2026-02-28 15:34:16] (step=0013695) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 2.679514772060262, LR: 0.0003 +[2026-02-28 15:34:23] (step=0013696) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 2.6797104284875757, LR: 0.0003 +[2026-02-28 15:34:31] (step=0013697) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 2.6799060849148892, LR: 0.0003 +[2026-02-28 15:34:39] (step=0013698) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.6801017413422032, LR: 0.0003 +[2026-02-28 15:34:47] (step=0013699) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.680297397769517, LR: 0.0003 +[2026-02-28 15:34:55] (step=0013700) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 2.6804930541968304, LR: 0.0003 +[2026-02-28 15:35:03] (step=0013701) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 2.680688710624144, LR: 0.0003 +[2026-02-28 15:35:11] (step=0013702) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 2.6808843670514575, LR: 0.0003 +[2026-02-28 15:35:18] (step=0013703) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.681080023478771, LR: 0.0003 +[2026-02-28 15:35:26] (step=0013704) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 2.681275679906085, LR: 0.0003 +[2026-02-28 15:35:34] (step=0013705) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.6814713363333986, LR: 0.0003 +[2026-02-28 15:35:42] (step=0013706) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 2.681666992760712, LR: 0.0003 +[2026-02-28 15:35:50] (step=0013707) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.6818626491880257, LR: 0.0003 +[2026-02-28 15:35:58] (step=0013708) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 2.6820583056153393, LR: 0.0003 +[2026-02-28 15:36:05] (step=0013709) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.6822539620426533, LR: 0.0003 +[2026-02-28 15:36:13] (step=0013710) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.682449618469967, LR: 0.0003 +[2026-02-28 15:36:21] (step=0013711) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.6826452748972804, LR: 0.0003 +[2026-02-28 15:36:29] (step=0013712) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 2.682840931324594, LR: 0.0003 +[2026-02-28 15:36:37] (step=0013713) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 2.6830365877519076, LR: 0.0003 +[2026-02-28 15:36:45] (step=0013714) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 2.683232244179221, LR: 0.0003 +[2026-02-28 15:36:52] (step=0013715) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.683427900606535, LR: 0.0003 +[2026-02-28 15:37:00] (step=0013716) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 2.6836235570338487, LR: 0.0003 +[2026-02-28 15:37:08] (step=0013717) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 2.6838192134611623, LR: 0.0003 +[2026-02-28 15:37:16] (step=0013718) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 2.684014869888476, LR: 0.0003 +[2026-02-28 15:37:24] (step=0013719) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.6842105263157894, LR: 0.0003 +[2026-02-28 15:37:32] (step=0013720) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.684406182743103, LR: 0.0003 +[2026-02-28 15:37:40] (step=0013721) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 2.684601839170417, LR: 0.0003 +[2026-02-28 15:37:48] (step=0013722) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.6847974955977305, LR: 0.0003 +[2026-02-28 15:37:55] (step=0013723) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 2.684993152025044, LR: 0.0003 +[2026-02-28 15:38:03] (step=0013724) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 2.6851888084523576, LR: 0.0003 +[2026-02-28 15:38:11] (step=0013725) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.685384464879671, LR: 0.0003 +[2026-02-28 15:38:19] (step=0013726) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.6855801213069848, LR: 0.0003 +[2026-02-28 15:38:27] (step=0013727) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.6857757777342988, LR: 0.0003 +[2026-02-28 15:38:35] (step=0013728) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 2.6859714341616123, LR: 0.0003 +[2026-02-28 15:38:42] (step=0013729) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.686167090588926, LR: 0.0003 +[2026-02-28 15:38:50] (step=0013730) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 2.6863627470162394, LR: 0.0003 +[2026-02-28 15:38:58] (step=0013731) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.686558403443553, LR: 0.0003 +[2026-02-28 15:39:06] (step=0013732) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.6867540598708666, LR: 0.0003 +[2026-02-28 15:39:14] (step=0013733) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 2.6869497162981806, LR: 0.0003 +[2026-02-28 15:39:22] (step=0013734) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.687145372725494, LR: 0.0003 +[2026-02-28 15:39:29] (step=0013735) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.6873410291528077, LR: 0.0003 +[2026-02-28 15:39:37] (step=0013736) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 2.6875366855801213, LR: 0.0003 +[2026-02-28 15:39:45] (step=0013737) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.687732342007435, LR: 0.0003 +[2026-02-28 15:39:53] (step=0013738) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.6879279984347484, LR: 0.0003 +[2026-02-28 15:40:01] (step=0013739) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.6881236548620624, LR: 0.0003 +[2026-02-28 15:40:09] (step=0013740) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 2.688319311289376, LR: 0.0003 +[2026-02-28 15:40:17] (step=0013741) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 2.6885149677166895, LR: 0.0003 +[2026-02-28 15:40:24] (step=0013742) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 2.688710624144003, LR: 0.0003 +[2026-02-28 15:40:32] (step=0013743) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 2.6889062805713166, LR: 0.0003 +[2026-02-28 15:40:40] (step=0013744) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.68910193699863, LR: 0.0003 +[2026-02-28 15:40:48] (step=0013745) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.689297593425944, LR: 0.0003 +[2026-02-28 15:40:56] (step=0013746) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.6894932498532578, LR: 0.0003 +[2026-02-28 15:41:04] (step=0013747) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.6896889062805713, LR: 0.0003 +[2026-02-28 15:41:12] (step=0013748) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 2.689884562707885, LR: 0.0003 +[2026-02-28 15:41:19] (step=0013749) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.6900802191351985, LR: 0.0003 +[2026-02-28 15:41:27] (step=0013750) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.690275875562512, LR: 0.0003 +[2026-02-28 15:41:35] (step=0013751) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.690471531989826, LR: 0.0003 +[2026-02-28 15:41:43] (step=0013752) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.6906671884171396, LR: 0.0003 +[2026-02-28 15:41:51] (step=0013753) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.690862844844453, LR: 0.0003 +[2026-02-28 15:41:59] (step=0013754) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.6910585012717667, LR: 0.0003 +[2026-02-28 15:42:06] (step=0013755) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 2.6912541576990803, LR: 0.0003 +[2026-02-28 15:42:14] (step=0013756) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.691449814126394, LR: 0.0003 +[2026-02-28 15:42:22] (step=0013757) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.691645470553708, LR: 0.0003 +[2026-02-28 15:42:30] (step=0013758) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 2.6918411269810214, LR: 0.0003 +[2026-02-28 15:42:38] (step=0013759) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.692036783408335, LR: 0.0003 +[2026-02-28 15:42:46] (step=0013760) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.6922324398356485, LR: 0.0003 +[2026-02-28 15:42:53] (step=0013761) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.692428096262962, LR: 0.0003 +[2026-02-28 15:43:01] (step=0013762) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 2.692623752690276, LR: 0.0003 +[2026-02-28 15:43:09] (step=0013763) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.6928194091175897, LR: 0.0003 +[2026-02-28 15:43:17] (step=0013764) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 2.693015065544903, LR: 0.0003 +[2026-02-28 15:43:25] (step=0013765) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 2.693210721972217, LR: 0.0003 +[2026-02-28 15:43:33] (step=0013766) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 2.6934063783995303, LR: 0.0003 +[2026-02-28 15:43:41] (step=0013767) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 2.693602034826844, LR: 0.0003 +[2026-02-28 15:43:48] (step=0013768) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 2.693797691254158, LR: 0.0003 +[2026-02-28 15:43:56] (step=0013769) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.6939933476814715, LR: 0.0003 +[2026-02-28 15:44:04] (step=0013770) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.694189004108785, LR: 0.0003 +[2026-02-28 15:44:12] (step=0013771) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.6943846605360986, LR: 0.0003 +[2026-02-28 15:44:20] (step=0013772) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.694580316963412, LR: 0.0003 +[2026-02-28 15:44:28] (step=0013773) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 2.6947759733907257, LR: 0.0003 +[2026-02-28 15:44:35] (step=0013774) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 2.6949716298180397, LR: 0.0003 +[2026-02-28 15:44:43] (step=0013775) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 2.6951672862453533, LR: 0.0003 +[2026-02-28 15:44:51] (step=0013776) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.695362942672667, LR: 0.0003 +[2026-02-28 15:44:59] (step=0013777) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 2.6955585990999804, LR: 0.0003 +[2026-02-28 15:45:07] (step=0013778) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 2.695754255527294, LR: 0.0003 +[2026-02-28 15:45:15] (step=0013779) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 2.6959499119546075, LR: 0.0003 +[2026-02-28 15:45:23] (step=0013780) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 2.6961455683819215, LR: 0.0003 +[2026-02-28 15:45:30] (step=0013781) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.696341224809235, LR: 0.0003 +[2026-02-28 15:45:38] (step=0013782) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 2.6965368812365487, LR: 0.0003 +[2026-02-28 15:45:46] (step=0013783) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 2.6967325376638622, LR: 0.0003 +[2026-02-28 15:45:54] (step=0013784) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 2.696928194091176, LR: 0.0003 +[2026-02-28 15:46:02] (step=0013785) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.6971238505184894, LR: 0.0003 +[2026-02-28 15:46:10] (step=0013786) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 2.6973195069458034, LR: 0.0003 +[2026-02-28 15:46:18] (step=0013787) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.697515163373117, LR: 0.0003 +[2026-02-28 15:46:26] (step=0013788) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.6977108198004305, LR: 0.0003 +[2026-02-28 15:46:33] (step=0013789) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.697906476227744, LR: 0.0003 +[2026-02-28 15:46:41] (step=0013790) Train Loss: 0.4705, Train Steps/Sec: 0.13, Epoch: 2.6981021326550576, LR: 0.0003 +[2026-02-28 15:46:49] (step=0013791) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 2.698297789082371, LR: 0.0003 +[2026-02-28 15:46:57] (step=0013792) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.698493445509685, LR: 0.0003 +[2026-02-28 15:47:05] (step=0013793) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 2.6986891019369987, LR: 0.0003 +[2026-02-28 15:47:13] (step=0013794) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 2.6988847583643123, LR: 0.0003 +[2026-02-28 15:47:20] (step=0013795) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.699080414791626, LR: 0.0003 +[2026-02-28 15:47:28] (step=0013796) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.6992760712189394, LR: 0.0003 +[2026-02-28 15:47:36] (step=0013797) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.699471727646253, LR: 0.0003 +[2026-02-28 15:47:44] (step=0013798) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 2.699667384073567, LR: 0.0003 +[2026-02-28 15:47:52] (step=0013799) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 2.6998630405008806, LR: 0.0003 +[2026-02-28 15:48:00] (step=0013800) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.700058696928194, LR: 0.0003 +[2026-02-28 15:48:07] (step=0013801) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 2.7002543533555077, LR: 0.0003 +[2026-02-28 15:48:15] (step=0013802) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 2.7004500097828212, LR: 0.0003 +[2026-02-28 15:48:23] (step=0013803) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.700645666210135, LR: 0.0003 +[2026-02-28 15:48:31] (step=0013804) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 2.700841322637449, LR: 0.0003 +[2026-02-28 15:48:39] (step=0013805) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.7010369790647624, LR: 0.0003 +[2026-02-28 15:48:47] (step=0013806) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.701232635492076, LR: 0.0003 +[2026-02-28 15:48:54] (step=0013807) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.7014282919193895, LR: 0.0003 +[2026-02-28 15:49:02] (step=0013808) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 2.701623948346703, LR: 0.0003 +[2026-02-28 15:49:10] (step=0013809) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.7018196047740166, LR: 0.0003 +[2026-02-28 15:49:18] (step=0013810) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 2.7020152612013306, LR: 0.0003 +[2026-02-28 15:49:26] (step=0013811) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.702210917628644, LR: 0.0003 +[2026-02-28 15:49:34] (step=0013812) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 2.7024065740559577, LR: 0.0003 +[2026-02-28 15:49:41] (step=0013813) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.7026022304832713, LR: 0.0003 +[2026-02-28 15:49:49] (step=0013814) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.702797886910585, LR: 0.0003 +[2026-02-28 15:49:57] (step=0013815) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 2.7029935433378984, LR: 0.0003 +[2026-02-28 15:50:05] (step=0013816) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 2.7031891997652124, LR: 0.0003 +[2026-02-28 15:50:13] (step=0013817) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.703384856192526, LR: 0.0003 +[2026-02-28 15:50:21] (step=0013818) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 2.7035805126198396, LR: 0.0003 +[2026-02-28 15:50:28] (step=0013819) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 2.703776169047153, LR: 0.0003 +[2026-02-28 15:50:36] (step=0013820) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 2.7039718254744667, LR: 0.0003 +[2026-02-28 15:50:44] (step=0013821) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 2.7041674819017807, LR: 0.0003 +[2026-02-28 15:50:52] (step=0013822) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.7043631383290943, LR: 0.0003 +[2026-02-28 15:51:00] (step=0013823) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 2.704558794756408, LR: 0.0003 +[2026-02-28 15:51:08] (step=0013824) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 2.7047544511837214, LR: 0.0003 +[2026-02-28 15:51:16] (step=0013825) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 2.704950107611035, LR: 0.0003 +[2026-02-28 15:51:23] (step=0013826) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 2.7051457640383485, LR: 0.0003 +[2026-02-28 15:51:31] (step=0013827) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 2.7053414204656625, LR: 0.0003 +[2026-02-28 15:51:39] (step=0013828) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.705537076892976, LR: 0.0003 +[2026-02-28 15:51:47] (step=0013829) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 2.7057327333202896, LR: 0.0003 +[2026-02-28 15:51:55] (step=0013830) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.705928389747603, LR: 0.0003 +[2026-02-28 15:52:03] (step=0013831) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 2.7061240461749168, LR: 0.0003 +[2026-02-28 15:52:10] (step=0013832) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 2.7063197026022303, LR: 0.0003 +[2026-02-28 15:52:18] (step=0013833) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 2.7065153590295443, LR: 0.0003 +[2026-02-28 15:52:26] (step=0013834) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 2.706711015456858, LR: 0.0003 +[2026-02-28 15:52:34] (step=0013835) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 2.7069066718841714, LR: 0.0003 +[2026-02-28 15:52:42] (step=0013836) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 2.707102328311485, LR: 0.0003 +[2026-02-28 15:52:50] (step=0013837) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 2.7072979847387986, LR: 0.0003 +[2026-02-28 15:52:58] (step=0013838) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 2.707493641166112, LR: 0.0003 +[2026-02-28 15:53:05] (step=0013839) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.707689297593426, LR: 0.0003 +[2026-02-28 15:53:13] (step=0013840) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.7078849540207397, LR: 0.0003 +[2026-02-28 15:53:21] (step=0013841) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 2.7080806104480533, LR: 0.0003 +[2026-02-28 15:53:29] (step=0013842) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.708276266875367, LR: 0.0003 +[2026-02-28 15:53:37] (step=0013843) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.7084719233026804, LR: 0.0003 +[2026-02-28 15:53:45] (step=0013844) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 2.708667579729994, LR: 0.0003 +[2026-02-28 15:53:52] (step=0013845) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 2.708863236157308, LR: 0.0003 +[2026-02-28 15:54:00] (step=0013846) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.7090588925846215, LR: 0.0003 +[2026-02-28 15:54:08] (step=0013847) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 2.709254549011935, LR: 0.0003 +[2026-02-28 15:54:16] (step=0013848) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.7094502054392486, LR: 0.0003 +[2026-02-28 15:54:24] (step=0013849) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 2.709645861866562, LR: 0.0003 +[2026-02-28 15:54:32] (step=0013850) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.7098415182938758, LR: 0.0003 +[2026-02-28 15:54:39] (step=0013851) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 2.7100371747211898, LR: 0.0003 +[2026-02-28 15:54:47] (step=0013852) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 2.7102328311485033, LR: 0.0003 +[2026-02-28 15:54:55] (step=0013853) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.710428487575817, LR: 0.0003 +[2026-02-28 15:55:03] (step=0013854) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.7106241440031305, LR: 0.0003 +[2026-02-28 15:55:11] (step=0013855) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 2.710819800430444, LR: 0.0003 +[2026-02-28 15:55:19] (step=0013856) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.7110154568577576, LR: 0.0003 +[2026-02-28 15:55:26] (step=0013857) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 2.7112111132850716, LR: 0.0003 +[2026-02-28 15:55:34] (step=0013858) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.711406769712385, LR: 0.0003 +[2026-02-28 15:55:42] (step=0013859) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 2.7116024261396987, LR: 0.0003 +[2026-02-28 15:55:50] (step=0013860) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 2.7117980825670123, LR: 0.0003 +[2026-02-28 15:55:58] (step=0013861) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 2.711993738994326, LR: 0.0003 +[2026-02-28 15:56:06] (step=0013862) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 2.7121893954216394, LR: 0.0003 +[2026-02-28 15:56:13] (step=0013863) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 2.7123850518489534, LR: 0.0003 +[2026-02-28 15:56:21] (step=0013864) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 2.712580708276267, LR: 0.0003 +[2026-02-28 15:56:29] (step=0013865) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.7127763647035805, LR: 0.0003 +[2026-02-28 15:56:37] (step=0013866) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 2.712972021130894, LR: 0.0003 +[2026-02-28 15:56:45] (step=0013867) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.7131676775582076, LR: 0.0003 +[2026-02-28 15:56:53] (step=0013868) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.713363333985521, LR: 0.0003 +[2026-02-28 15:57:01] (step=0013869) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 2.713558990412835, LR: 0.0003 +[2026-02-28 15:57:08] (step=0013870) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.7137546468401488, LR: 0.0003 +[2026-02-28 15:57:16] (step=0013871) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.7139503032674623, LR: 0.0003 +[2026-02-28 15:57:24] (step=0013872) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.714145959694776, LR: 0.0003 +[2026-02-28 15:57:32] (step=0013873) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 2.7143416161220895, LR: 0.0003 +[2026-02-28 15:57:40] (step=0013874) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.7145372725494035, LR: 0.0003 +[2026-02-28 15:57:48] (step=0013875) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.714732928976717, LR: 0.0003 +[2026-02-28 15:57:55] (step=0013876) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.7149285854040306, LR: 0.0003 +[2026-02-28 15:58:03] (step=0013877) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.715124241831344, LR: 0.0003 +[2026-02-28 15:58:11] (step=0013878) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.7153198982586577, LR: 0.0003 +[2026-02-28 15:58:19] (step=0013879) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.7155155546859713, LR: 0.0003 +[2026-02-28 15:58:27] (step=0013880) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 2.7157112111132853, LR: 0.0003 +[2026-02-28 15:58:35] (step=0013881) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.715906867540599, LR: 0.0003 +[2026-02-28 15:58:42] (step=0013882) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 2.7161025239679124, LR: 0.0003 +[2026-02-28 15:58:50] (step=0013883) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 2.716298180395226, LR: 0.0003 +[2026-02-28 15:58:58] (step=0013884) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.7164938368225395, LR: 0.0003 +[2026-02-28 15:59:06] (step=0013885) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 2.716689493249853, LR: 0.0003 +[2026-02-28 15:59:14] (step=0013886) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 2.716885149677167, LR: 0.0003 +[2026-02-28 15:59:22] (step=0013887) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.7170808061044807, LR: 0.0003 +[2026-02-28 15:59:29] (step=0013888) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 2.7172764625317942, LR: 0.0003 +[2026-02-28 15:59:37] (step=0013889) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.717472118959108, LR: 0.0003 +[2026-02-28 15:59:45] (step=0013890) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 2.7176677753864213, LR: 0.0003 +[2026-02-28 15:59:53] (step=0013891) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 2.717863431813735, LR: 0.0003 +[2026-02-28 16:00:01] (step=0013892) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 2.718059088241049, LR: 0.0003 +[2026-02-28 16:00:09] (step=0013893) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.7182547446683625, LR: 0.0003 +[2026-02-28 16:00:17] (step=0013894) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.718450401095676, LR: 0.0003 +[2026-02-28 16:00:24] (step=0013895) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.7186460575229896, LR: 0.0003 +[2026-02-28 16:00:32] (step=0013896) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.718841713950303, LR: 0.0003 +[2026-02-28 16:00:40] (step=0013897) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.7190373703776167, LR: 0.0003 +[2026-02-28 16:00:48] (step=0013898) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.7192330268049307, LR: 0.0003 +[2026-02-28 16:00:56] (step=0013899) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.7194286832322443, LR: 0.0003 +[2026-02-28 16:01:04] (step=0013900) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.719624339659558, LR: 0.0003 +[2026-02-28 16:01:11] (step=0013901) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 2.7198199960868714, LR: 0.0003 +[2026-02-28 16:01:19] (step=0013902) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.720015652514185, LR: 0.0003 +[2026-02-28 16:01:27] (step=0013903) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 2.7202113089414985, LR: 0.0003 +[2026-02-28 16:01:35] (step=0013904) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 2.7204069653688125, LR: 0.0003 +[2026-02-28 16:01:43] (step=0013905) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.720602621796126, LR: 0.0003 +[2026-02-28 16:01:51] (step=0013906) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.7207982782234397, LR: 0.0003 +[2026-02-28 16:01:58] (step=0013907) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.7209939346507532, LR: 0.0003 +[2026-02-28 16:02:06] (step=0013908) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.721189591078067, LR: 0.0003 +[2026-02-28 16:02:14] (step=0013909) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.7213852475053804, LR: 0.0003 +[2026-02-28 16:02:22] (step=0013910) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 2.7215809039326944, LR: 0.0003 +[2026-02-28 16:02:30] (step=0013911) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.721776560360008, LR: 0.0003 +[2026-02-28 16:02:38] (step=0013912) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.7219722167873215, LR: 0.0003 +[2026-02-28 16:02:45] (step=0013913) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.722167873214635, LR: 0.0003 +[2026-02-28 16:02:53] (step=0013914) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 2.7223635296419486, LR: 0.0003 +[2026-02-28 16:03:01] (step=0013915) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 2.722559186069262, LR: 0.0003 +[2026-02-28 16:03:09] (step=0013916) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 2.722754842496576, LR: 0.0003 +[2026-02-28 16:03:17] (step=0013917) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 2.7229504989238897, LR: 0.0003 +[2026-02-28 16:03:25] (step=0013918) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 2.7231461553512033, LR: 0.0003 +[2026-02-28 16:03:33] (step=0013919) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.723341811778517, LR: 0.0003 +[2026-02-28 16:03:40] (step=0013920) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.7235374682058304, LR: 0.0003 +[2026-02-28 16:03:48] (step=0013921) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.723733124633144, LR: 0.0003 +[2026-02-28 16:03:56] (step=0013922) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 2.723928781060458, LR: 0.0003 +[2026-02-28 16:04:04] (step=0013923) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 2.7241244374877716, LR: 0.0003 +[2026-02-28 16:04:12] (step=0013924) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.724320093915085, LR: 0.0003 +[2026-02-28 16:04:20] (step=0013925) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.7245157503423987, LR: 0.0003 +[2026-02-28 16:04:27] (step=0013926) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 2.7247114067697122, LR: 0.0003 +[2026-02-28 16:04:35] (step=0013927) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 2.724907063197026, LR: 0.0003 +[2026-02-28 16:04:43] (step=0013928) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 2.72510271962434, LR: 0.0003 +[2026-02-28 16:04:51] (step=0013929) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 2.7252983760516534, LR: 0.0003 +[2026-02-28 16:04:59] (step=0013930) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 2.725494032478967, LR: 0.0003 +[2026-02-28 16:05:07] (step=0013931) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.7256896889062805, LR: 0.0003 +[2026-02-28 16:05:14] (step=0013932) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 2.725885345333594, LR: 0.0003 +[2026-02-28 16:05:22] (step=0013933) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.726081001760908, LR: 0.0003 +[2026-02-28 16:05:30] (step=0013934) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.7262766581882216, LR: 0.0003 +[2026-02-28 16:05:38] (step=0013935) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 2.726472314615535, LR: 0.0003 +[2026-02-28 16:05:46] (step=0013936) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 2.7266679710428487, LR: 0.0003 +[2026-02-28 16:05:54] (step=0013937) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.7268636274701623, LR: 0.0003 +[2026-02-28 16:06:01] (step=0013938) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 2.727059283897476, LR: 0.0003 +[2026-02-28 16:06:09] (step=0013939) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 2.72725494032479, LR: 0.0003 +[2026-02-28 16:06:17] (step=0013940) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.7274505967521034, LR: 0.0003 +[2026-02-28 16:06:25] (step=0013941) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 2.727646253179417, LR: 0.0003 +[2026-02-28 16:06:33] (step=0013942) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 2.7278419096067306, LR: 0.0003 +[2026-02-28 16:06:41] (step=0013943) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.728037566034044, LR: 0.0003 +[2026-02-28 16:06:49] (step=0013944) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.7282332224613577, LR: 0.0003 +[2026-02-28 16:06:56] (step=0013945) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.7284288788886717, LR: 0.0003 +[2026-02-28 16:07:04] (step=0013946) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 2.7286245353159853, LR: 0.0003 +[2026-02-28 16:07:12] (step=0013947) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.728820191743299, LR: 0.0003 +[2026-02-28 16:07:20] (step=0013948) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 2.7290158481706124, LR: 0.0003 +[2026-02-28 16:07:28] (step=0013949) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.729211504597926, LR: 0.0003 +[2026-02-28 16:07:36] (step=0013950) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.7294071610252395, LR: 0.0003 +[2026-02-28 16:07:43] (step=0013951) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 2.7296028174525535, LR: 0.0003 +[2026-02-28 16:07:51] (step=0013952) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 2.729798473879867, LR: 0.0003 +[2026-02-28 16:07:59] (step=0013953) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.7299941303071806, LR: 0.0003 +[2026-02-28 16:08:07] (step=0013954) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.730189786734494, LR: 0.0003 +[2026-02-28 16:08:15] (step=0013955) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.7303854431618078, LR: 0.0003 +[2026-02-28 16:08:23] (step=0013956) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 2.7305810995891213, LR: 0.0003 +[2026-02-28 16:08:30] (step=0013957) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 2.7307767560164353, LR: 0.0003 +[2026-02-28 16:08:38] (step=0013958) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.730972412443749, LR: 0.0003 +[2026-02-28 16:08:46] (step=0013959) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 2.7311680688710624, LR: 0.0003 +[2026-02-28 16:08:54] (step=0013960) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.731363725298376, LR: 0.0003 +[2026-02-28 16:09:02] (step=0013961) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.7315593817256896, LR: 0.0003 +[2026-02-28 16:09:10] (step=0013962) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.731755038153003, LR: 0.0003 +[2026-02-28 16:09:17] (step=0013963) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.731950694580317, LR: 0.0003 +[2026-02-28 16:09:25] (step=0013964) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.7321463510076307, LR: 0.0003 +[2026-02-28 16:09:33] (step=0013965) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.7323420074349443, LR: 0.0003 +[2026-02-28 16:09:41] (step=0013966) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 2.732537663862258, LR: 0.0003 +[2026-02-28 16:09:49] (step=0013967) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.7327333202895714, LR: 0.0003 +[2026-02-28 16:09:57] (step=0013968) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 2.732928976716885, LR: 0.0003 +[2026-02-28 16:10:04] (step=0013969) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 2.733124633144199, LR: 0.0003 +[2026-02-28 16:10:12] (step=0013970) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 2.7333202895715125, LR: 0.0003 +[2026-02-28 16:10:20] (step=0013971) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.733515945998826, LR: 0.0003 +[2026-02-28 16:10:28] (step=0013972) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.7337116024261396, LR: 0.0003 +[2026-02-28 16:10:36] (step=0013973) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 2.733907258853453, LR: 0.0003 +[2026-02-28 16:10:44] (step=0013974) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.7341029152807668, LR: 0.0003 +[2026-02-28 16:10:52] (step=0013975) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.7342985717080808, LR: 0.0003 +[2026-02-28 16:10:59] (step=0013976) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.7344942281353943, LR: 0.0003 +[2026-02-28 16:11:07] (step=0013977) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.734689884562708, LR: 0.0003 +[2026-02-28 16:11:15] (step=0013978) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.7348855409900215, LR: 0.0003 +[2026-02-28 16:11:23] (step=0013979) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 2.735081197417335, LR: 0.0003 +[2026-02-28 16:11:31] (step=0013980) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 2.7352768538446486, LR: 0.0003 +[2026-02-28 16:11:39] (step=0013981) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.7354725102719626, LR: 0.0003 +[2026-02-28 16:11:46] (step=0013982) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 2.735668166699276, LR: 0.0003 +[2026-02-28 16:11:54] (step=0013983) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.7358638231265897, LR: 0.0003 +[2026-02-28 16:12:02] (step=0013984) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 2.7360594795539033, LR: 0.0003 +[2026-02-28 16:12:10] (step=0013985) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 2.736255135981217, LR: 0.0003 +[2026-02-28 16:12:18] (step=0013986) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.736450792408531, LR: 0.0003 +[2026-02-28 16:12:26] (step=0013987) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.7366464488358444, LR: 0.0003 +[2026-02-28 16:12:33] (step=0013988) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 2.736842105263158, LR: 0.0003 +[2026-02-28 16:12:41] (step=0013989) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.7370377616904715, LR: 0.0003 +[2026-02-28 16:12:49] (step=0013990) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 2.737233418117785, LR: 0.0003 +[2026-02-28 16:12:57] (step=0013991) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 2.7374290745450987, LR: 0.0003 +[2026-02-28 16:13:05] (step=0013992) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.7376247309724127, LR: 0.0003 +[2026-02-28 16:13:13] (step=0013993) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 2.737820387399726, LR: 0.0003 +[2026-02-28 16:13:21] (step=0013994) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 2.73801604382704, LR: 0.0003 +[2026-02-28 16:13:28] (step=0013995) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.7382117002543533, LR: 0.0003 +[2026-02-28 16:13:36] (step=0013996) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 2.738407356681667, LR: 0.0003 +[2026-02-28 16:13:44] (step=0013997) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.7386030131089805, LR: 0.0003 +[2026-02-28 16:13:52] (step=0013998) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 2.7387986695362945, LR: 0.0003 +[2026-02-28 16:14:00] (step=0013999) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 2.738994325963608, LR: 0.0003 +[2026-02-28 16:14:08] (step=0014000) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.7391899823909216, LR: 0.0003 +[2026-02-28 16:14:08] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0014000/ +[2026-02-28 16:14:15] (step=0014001) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 2.739385638818235, LR: 0.0003 +[2026-02-28 16:14:23] (step=0014002) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 2.7395812952455487, LR: 0.0003 +[2026-02-28 16:14:31] (step=0014003) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 2.7397769516728623, LR: 0.0003 +[2026-02-28 16:14:39] (step=0014004) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.7399726081001763, LR: 0.0003 +[2026-02-28 16:14:47] (step=0014005) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.74016826452749, LR: 0.0003 +[2026-02-28 16:14:55] (step=0014006) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.7403639209548034, LR: 0.0003 +[2026-02-28 16:15:02] (step=0014007) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 2.740559577382117, LR: 0.0003 +[2026-02-28 16:15:10] (step=0014008) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 2.7407552338094305, LR: 0.0003 +[2026-02-28 16:15:18] (step=0014009) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 2.740950890236744, LR: 0.0003 +[2026-02-28 16:15:26] (step=0014010) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.741146546664058, LR: 0.0003 +[2026-02-28 16:15:34] (step=0014011) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.7413422030913717, LR: 0.0003 +[2026-02-28 16:15:42] (step=0014012) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 2.7415378595186852, LR: 0.0003 +[2026-02-28 16:15:49] (step=0014013) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.741733515945999, LR: 0.0003 +[2026-02-28 16:15:57] (step=0014014) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.7419291723733124, LR: 0.0003 +[2026-02-28 16:16:05] (step=0014015) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 2.742124828800626, LR: 0.0003 +[2026-02-28 16:16:13] (step=0014016) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 2.74232048522794, LR: 0.0003 +[2026-02-28 16:16:21] (step=0014017) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 2.7425161416552535, LR: 0.0003 +[2026-02-28 16:16:29] (step=0014018) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 2.742711798082567, LR: 0.0003 +[2026-02-28 16:16:36] (step=0014019) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.7429074545098806, LR: 0.0003 +[2026-02-28 16:16:44] (step=0014020) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.743103110937194, LR: 0.0003 +[2026-02-28 16:16:52] (step=0014021) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 2.7432987673645077, LR: 0.0003 +[2026-02-28 16:17:00] (step=0014022) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 2.7434944237918217, LR: 0.0003 +[2026-02-28 16:17:08] (step=0014023) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.7436900802191353, LR: 0.0003 +[2026-02-28 16:17:16] (step=0014024) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.743885736646449, LR: 0.0003 +[2026-02-28 16:17:24] (step=0014025) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.7440813930737624, LR: 0.0003 +[2026-02-28 16:17:32] (step=0014026) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 2.744277049501076, LR: 0.0003 +[2026-02-28 16:17:39] (step=0014027) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.7444727059283895, LR: 0.0003 +[2026-02-28 16:17:47] (step=0014028) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 2.7446683623557036, LR: 0.0003 +[2026-02-28 16:17:55] (step=0014029) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.744864018783017, LR: 0.0003 +[2026-02-28 16:18:03] (step=0014030) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 2.7450596752103307, LR: 0.0003 +[2026-02-28 16:18:11] (step=0014031) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.7452553316376442, LR: 0.0003 +[2026-02-28 16:18:19] (step=0014032) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 2.745450988064958, LR: 0.0003 +[2026-02-28 16:18:26] (step=0014033) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.7456466444922714, LR: 0.0003 +[2026-02-28 16:18:34] (step=0014034) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.7458423009195854, LR: 0.0003 +[2026-02-28 16:18:42] (step=0014035) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.746037957346899, LR: 0.0003 +[2026-02-28 16:18:50] (step=0014036) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.7462336137742125, LR: 0.0003 +[2026-02-28 16:18:58] (step=0014037) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.746429270201526, LR: 0.0003 +[2026-02-28 16:19:06] (step=0014038) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 2.7466249266288396, LR: 0.0003 +[2026-02-28 16:19:13] (step=0014039) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 2.746820583056153, LR: 0.0003 +[2026-02-28 16:19:21] (step=0014040) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.747016239483467, LR: 0.0003 +[2026-02-28 16:19:29] (step=0014041) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 2.7472118959107807, LR: 0.0003 +[2026-02-28 16:19:37] (step=0014042) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.7474075523380943, LR: 0.0003 +[2026-02-28 16:19:45] (step=0014043) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 2.747603208765408, LR: 0.0003 +[2026-02-28 16:19:53] (step=0014044) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.7477988651927214, LR: 0.0003 +[2026-02-28 16:20:01] (step=0014045) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 2.7479945216200354, LR: 0.0003 +[2026-02-28 16:20:08] (step=0014046) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.748190178047349, LR: 0.0003 +[2026-02-28 16:20:16] (step=0014047) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 2.7483858344746626, LR: 0.0003 +[2026-02-28 16:20:24] (step=0014048) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.748581490901976, LR: 0.0003 +[2026-02-28 16:20:32] (step=0014049) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.7487771473292897, LR: 0.0003 +[2026-02-28 16:20:40] (step=0014050) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.7489728037566032, LR: 0.0003 +[2026-02-28 16:20:48] (step=0014051) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.7491684601839173, LR: 0.0003 +[2026-02-28 16:20:55] (step=0014052) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 2.749364116611231, LR: 0.0003 +[2026-02-28 16:21:03] (step=0014053) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 2.7495597730385444, LR: 0.0003 +[2026-02-28 16:21:11] (step=0014054) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.749755429465858, LR: 0.0003 +[2026-02-28 16:21:19] (step=0014055) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 2.7499510858931715, LR: 0.0003 +[2026-02-28 16:21:27] (step=0014056) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.750146742320485, LR: 0.0003 +[2026-02-28 16:21:35] (step=0014057) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 2.750342398747799, LR: 0.0003 +[2026-02-28 16:21:42] (step=0014058) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 2.7505380551751126, LR: 0.0003 +[2026-02-28 16:21:50] (step=0014059) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.750733711602426, LR: 0.0003 +[2026-02-28 16:21:58] (step=0014060) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 2.7509293680297398, LR: 0.0003 +[2026-02-28 16:22:06] (step=0014061) Train Loss: 0.4666, Train Steps/Sec: 0.13, Epoch: 2.7511250244570533, LR: 0.0003 +[2026-02-28 16:22:14] (step=0014062) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.751320680884367, LR: 0.0003 +[2026-02-28 16:22:21] (step=0014063) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 2.751516337311681, LR: 0.0003 +[2026-02-28 16:22:29] (step=0014064) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.7517119937389944, LR: 0.0003 +[2026-02-28 16:22:37] (step=0014065) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 2.751907650166308, LR: 0.0003 +[2026-02-28 16:22:45] (step=0014066) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 2.7521033065936216, LR: 0.0003 +[2026-02-28 16:22:53] (step=0014067) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 2.752298963020935, LR: 0.0003 +[2026-02-28 16:23:01] (step=0014068) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.7524946194482487, LR: 0.0003 +[2026-02-28 16:23:09] (step=0014069) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 2.7526902758755627, LR: 0.0003 +[2026-02-28 16:23:16] (step=0014070) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.7528859323028763, LR: 0.0003 +[2026-02-28 16:23:24] (step=0014071) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.75308158873019, LR: 0.0003 +[2026-02-28 16:23:32] (step=0014072) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 2.7532772451575034, LR: 0.0003 +[2026-02-28 16:23:40] (step=0014073) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.753472901584817, LR: 0.0003 +[2026-02-28 16:23:48] (step=0014074) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 2.7536685580121305, LR: 0.0003 +[2026-02-28 16:23:56] (step=0014075) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 2.7538642144394445, LR: 0.0003 +[2026-02-28 16:24:04] (step=0014076) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.754059870866758, LR: 0.0003 +[2026-02-28 16:24:11] (step=0014077) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 2.7542555272940716, LR: 0.0003 +[2026-02-28 16:24:19] (step=0014078) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.754451183721385, LR: 0.0003 +[2026-02-28 16:24:27] (step=0014079) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.7546468401486988, LR: 0.0003 +[2026-02-28 16:24:35] (step=0014080) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.7548424965760123, LR: 0.0003 +[2026-02-28 16:24:43] (step=0014081) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.7550381530033263, LR: 0.0003 +[2026-02-28 16:24:51] (step=0014082) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 2.75523380943064, LR: 0.0003 +[2026-02-28 16:24:59] (step=0014083) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 2.7554294658579535, LR: 0.0003 +[2026-02-28 16:25:06] (step=0014084) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 2.755625122285267, LR: 0.0003 +[2026-02-28 16:25:14] (step=0014085) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 2.7558207787125806, LR: 0.0003 +[2026-02-28 16:25:22] (step=0014086) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.756016435139894, LR: 0.0003 +[2026-02-28 16:25:30] (step=0014087) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.756212091567208, LR: 0.0003 +[2026-02-28 16:25:38] (step=0014088) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 2.7564077479945217, LR: 0.0003 +[2026-02-28 16:25:46] (step=0014089) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.7566034044218353, LR: 0.0003 +[2026-02-28 16:25:53] (step=0014090) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 2.756799060849149, LR: 0.0003 +[2026-02-28 16:26:01] (step=0014091) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.7569947172764624, LR: 0.0003 +[2026-02-28 16:26:09] (step=0014092) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.757190373703776, LR: 0.0003 +[2026-02-28 16:26:17] (step=0014093) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 2.75738603013109, LR: 0.0003 +[2026-02-28 16:26:25] (step=0014094) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.7575816865584035, LR: 0.0003 +[2026-02-28 16:26:33] (step=0014095) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.757777342985717, LR: 0.0003 +[2026-02-28 16:26:40] (step=0014096) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.7579729994130306, LR: 0.0003 +[2026-02-28 16:26:48] (step=0014097) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.758168655840344, LR: 0.0003 +[2026-02-28 16:26:56] (step=0014098) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 2.758364312267658, LR: 0.0003 +[2026-02-28 16:27:04] (step=0014099) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 2.7585599686949718, LR: 0.0003 +[2026-02-28 16:27:12] (step=0014100) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 2.7587556251222853, LR: 0.0003 +[2026-02-28 16:27:20] (step=0014101) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.758951281549599, LR: 0.0003 +[2026-02-28 16:27:27] (step=0014102) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 2.7591469379769125, LR: 0.0003 +[2026-02-28 16:27:35] (step=0014103) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.759342594404226, LR: 0.0003 +[2026-02-28 16:27:43] (step=0014104) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.75953825083154, LR: 0.0003 +[2026-02-28 16:27:51] (step=0014105) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.7597339072588536, LR: 0.0003 +[2026-02-28 16:27:59] (step=0014106) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 2.759929563686167, LR: 0.0003 +[2026-02-28 16:28:07] (step=0014107) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.7601252201134807, LR: 0.0003 +[2026-02-28 16:28:14] (step=0014108) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 2.7603208765407943, LR: 0.0003 +[2026-02-28 16:28:22] (step=0014109) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.760516532968108, LR: 0.0003 +[2026-02-28 16:28:30] (step=0014110) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 2.760712189395422, LR: 0.0003 +[2026-02-28 16:28:38] (step=0014111) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.7609078458227354, LR: 0.0003 +[2026-02-28 16:28:46] (step=0014112) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.761103502250049, LR: 0.0003 +[2026-02-28 16:28:54] (step=0014113) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.7612991586773625, LR: 0.0003 +[2026-02-28 16:29:02] (step=0014114) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 2.761494815104676, LR: 0.0003 +[2026-02-28 16:29:09] (step=0014115) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.7616904715319897, LR: 0.0003 +[2026-02-28 16:29:17] (step=0014116) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.7618861279593037, LR: 0.0003 +[2026-02-28 16:29:25] (step=0014117) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 2.7620817843866172, LR: 0.0003 +[2026-02-28 16:29:33] (step=0014118) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 2.762277440813931, LR: 0.0003 +[2026-02-28 16:29:41] (step=0014119) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.7624730972412443, LR: 0.0003 +[2026-02-28 16:29:49] (step=0014120) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 2.762668753668558, LR: 0.0003 +[2026-02-28 16:29:57] (step=0014121) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.7628644100958715, LR: 0.0003 +[2026-02-28 16:30:04] (step=0014122) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 2.7630600665231855, LR: 0.0003 +[2026-02-28 16:30:12] (step=0014123) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 2.763255722950499, LR: 0.0003 +[2026-02-28 16:30:20] (step=0014124) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.7634513793778126, LR: 0.0003 +[2026-02-28 16:30:28] (step=0014125) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 2.763647035805126, LR: 0.0003 +[2026-02-28 16:30:36] (step=0014126) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 2.7638426922324397, LR: 0.0003 +[2026-02-28 16:30:44] (step=0014127) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.7640383486597533, LR: 0.0003 +[2026-02-28 16:30:51] (step=0014128) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.7642340050870673, LR: 0.0003 +[2026-02-28 16:30:59] (step=0014129) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 2.764429661514381, LR: 0.0003 +[2026-02-28 16:31:07] (step=0014130) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.7646253179416944, LR: 0.0003 +[2026-02-28 16:31:15] (step=0014131) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.764820974369008, LR: 0.0003 +[2026-02-28 16:31:23] (step=0014132) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 2.7650166307963215, LR: 0.0003 +[2026-02-28 16:31:31] (step=0014133) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.765212287223635, LR: 0.0003 +[2026-02-28 16:31:38] (step=0014134) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.765407943650949, LR: 0.0003 +[2026-02-28 16:31:46] (step=0014135) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.7656036000782627, LR: 0.0003 +[2026-02-28 16:31:54] (step=0014136) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 2.7657992565055762, LR: 0.0003 +[2026-02-28 16:32:02] (step=0014137) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 2.76599491293289, LR: 0.0003 +[2026-02-28 16:32:10] (step=0014138) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 2.7661905693602034, LR: 0.0003 +[2026-02-28 16:32:18] (step=0014139) Train Loss: 0.4696, Train Steps/Sec: 0.13, Epoch: 2.766386225787517, LR: 0.0003 +[2026-02-28 16:32:26] (step=0014140) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.766581882214831, LR: 0.0003 +[2026-02-28 16:32:33] (step=0014141) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 2.7667775386421445, LR: 0.0003 +[2026-02-28 16:32:41] (step=0014142) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.766973195069458, LR: 0.0003 +[2026-02-28 16:32:49] (step=0014143) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.7671688514967716, LR: 0.0003 +[2026-02-28 16:32:57] (step=0014144) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.767364507924085, LR: 0.0003 +[2026-02-28 16:33:05] (step=0014145) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.7675601643513987, LR: 0.0003 +[2026-02-28 16:33:13] (step=0014146) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.7677558207787127, LR: 0.0003 +[2026-02-28 16:33:20] (step=0014147) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 2.7679514772060263, LR: 0.0003 +[2026-02-28 16:33:28] (step=0014148) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.76814713363334, LR: 0.0003 +[2026-02-28 16:33:36] (step=0014149) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.7683427900606534, LR: 0.0003 +[2026-02-28 16:33:44] (step=0014150) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.768538446487967, LR: 0.0003 +[2026-02-28 16:33:52] (step=0014151) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 2.7687341029152805, LR: 0.0003 +[2026-02-28 16:34:00] (step=0014152) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 2.7689297593425946, LR: 0.0003 +[2026-02-28 16:34:07] (step=0014153) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.769125415769908, LR: 0.0003 +[2026-02-28 16:34:15] (step=0014154) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 2.7693210721972217, LR: 0.0003 +[2026-02-28 16:34:23] (step=0014155) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.7695167286245352, LR: 0.0003 +[2026-02-28 16:34:31] (step=0014156) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.769712385051849, LR: 0.0003 +[2026-02-28 16:34:39] (step=0014157) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.769908041479163, LR: 0.0003 +[2026-02-28 16:34:47] (step=0014158) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.7701036979064764, LR: 0.0003 +[2026-02-28 16:34:54] (step=0014159) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.77029935433379, LR: 0.0003 +[2026-02-28 16:35:02] (step=0014160) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.7704950107611035, LR: 0.0003 +[2026-02-28 16:35:10] (step=0014161) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 2.770690667188417, LR: 0.0003 +[2026-02-28 16:35:18] (step=0014162) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 2.7708863236157306, LR: 0.0003 +[2026-02-28 16:35:26] (step=0014163) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.7710819800430446, LR: 0.0003 +[2026-02-28 16:35:34] (step=0014164) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 2.771277636470358, LR: 0.0003 +[2026-02-28 16:35:41] (step=0014165) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 2.7714732928976717, LR: 0.0003 +[2026-02-28 16:35:49] (step=0014166) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 2.7716689493249853, LR: 0.0003 +[2026-02-28 16:35:57] (step=0014167) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 2.771864605752299, LR: 0.0003 +[2026-02-28 16:36:05] (step=0014168) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.7720602621796124, LR: 0.0003 +[2026-02-28 16:36:13] (step=0014169) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 2.7722559186069264, LR: 0.0003 +[2026-02-28 16:36:21] (step=0014170) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.77245157503424, LR: 0.0003 +[2026-02-28 16:36:28] (step=0014171) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 2.7726472314615536, LR: 0.0003 +[2026-02-28 16:36:36] (step=0014172) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.772842887888867, LR: 0.0003 +[2026-02-28 16:36:44] (step=0014173) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 2.7730385443161807, LR: 0.0003 +[2026-02-28 16:36:52] (step=0014174) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.7732342007434942, LR: 0.0003 +[2026-02-28 16:37:00] (step=0014175) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 2.7734298571708083, LR: 0.0003 +[2026-02-28 16:37:08] (step=0014176) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 2.773625513598122, LR: 0.0003 +[2026-02-28 16:37:16] (step=0014177) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.7738211700254354, LR: 0.0003 +[2026-02-28 16:37:23] (step=0014178) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.774016826452749, LR: 0.0003 +[2026-02-28 16:37:31] (step=0014179) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.7742124828800625, LR: 0.0003 +[2026-02-28 16:37:39] (step=0014180) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 2.774408139307376, LR: 0.0003 +[2026-02-28 16:37:47] (step=0014181) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.77460379573469, LR: 0.0003 +[2026-02-28 16:37:55] (step=0014182) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.7747994521620036, LR: 0.0003 +[2026-02-28 16:38:03] (step=0014183) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 2.774995108589317, LR: 0.0003 +[2026-02-28 16:38:11] (step=0014184) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 2.7751907650166308, LR: 0.0003 +[2026-02-28 16:38:18] (step=0014185) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.7753864214439443, LR: 0.0003 +[2026-02-28 16:38:26] (step=0014186) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.775582077871258, LR: 0.0003 +[2026-02-28 16:38:34] (step=0014187) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 2.775777734298572, LR: 0.0003 +[2026-02-28 16:38:42] (step=0014188) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.7759733907258854, LR: 0.0003 +[2026-02-28 16:38:50] (step=0014189) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 2.776169047153199, LR: 0.0003 +[2026-02-28 16:38:58] (step=0014190) Train Loss: 0.4735, Train Steps/Sec: 0.13, Epoch: 2.7763647035805126, LR: 0.0003 +[2026-02-28 16:39:05] (step=0014191) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.776560360007826, LR: 0.0003 +[2026-02-28 16:39:13] (step=0014192) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 2.7767560164351397, LR: 0.0003 +[2026-02-28 16:39:21] (step=0014193) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.7769516728624537, LR: 0.0003 +[2026-02-28 16:39:29] (step=0014194) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.7771473292897673, LR: 0.0003 +[2026-02-28 16:39:37] (step=0014195) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 2.777342985717081, LR: 0.0003 +[2026-02-28 16:39:45] (step=0014196) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.7775386421443944, LR: 0.0003 +[2026-02-28 16:39:53] (step=0014197) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 2.777734298571708, LR: 0.0003 +[2026-02-28 16:40:00] (step=0014198) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.7779299549990215, LR: 0.0003 +[2026-02-28 16:40:08] (step=0014199) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.7781256114263355, LR: 0.0003 +[2026-02-28 16:40:16] (step=0014200) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.778321267853649, LR: 0.0003 +[2026-02-28 16:40:24] (step=0014201) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.7785169242809626, LR: 0.0003 +[2026-02-28 16:40:32] (step=0014202) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.778712580708276, LR: 0.0003 +[2026-02-28 16:40:40] (step=0014203) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.7789082371355898, LR: 0.0003 +[2026-02-28 16:40:47] (step=0014204) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.7791038935629033, LR: 0.0003 +[2026-02-28 16:40:55] (step=0014205) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.7792995499902173, LR: 0.0003 +[2026-02-28 16:41:03] (step=0014206) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 2.779495206417531, LR: 0.0003 +[2026-02-28 16:41:11] (step=0014207) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.7796908628448445, LR: 0.0003 +[2026-02-28 16:41:19] (step=0014208) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 2.779886519272158, LR: 0.0003 +[2026-02-28 16:41:27] (step=0014209) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.7800821756994716, LR: 0.0003 +[2026-02-28 16:41:34] (step=0014210) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 2.7802778321267856, LR: 0.0003 +[2026-02-28 16:41:42] (step=0014211) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.780473488554099, LR: 0.0003 +[2026-02-28 16:41:50] (step=0014212) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.7806691449814127, LR: 0.0003 +[2026-02-28 16:41:58] (step=0014213) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 2.7808648014087263, LR: 0.0003 +[2026-02-28 16:42:06] (step=0014214) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 2.78106045783604, LR: 0.0003 +[2026-02-28 16:42:14] (step=0014215) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 2.7812561142633534, LR: 0.0003 +[2026-02-28 16:42:21] (step=0014216) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 2.7814517706906674, LR: 0.0003 +[2026-02-28 16:42:29] (step=0014217) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.781647427117981, LR: 0.0003 +[2026-02-28 16:42:37] (step=0014218) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.7818430835452945, LR: 0.0003 +[2026-02-28 16:42:45] (step=0014219) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 2.782038739972608, LR: 0.0003 +[2026-02-28 16:42:53] (step=0014220) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.7822343963999216, LR: 0.0003 +[2026-02-28 16:43:00] (step=0014221) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.782430052827235, LR: 0.0003 +[2026-02-28 16:43:08] (step=0014222) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.782625709254549, LR: 0.0003 +[2026-02-28 16:43:16] (step=0014223) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 2.782821365681863, LR: 0.0003 +[2026-02-28 16:43:24] (step=0014224) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 2.7830170221091763, LR: 0.0003 +[2026-02-28 16:43:32] (step=0014225) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.78321267853649, LR: 0.0003 +[2026-02-28 16:43:40] (step=0014226) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.7834083349638035, LR: 0.0003 +[2026-02-28 16:43:48] (step=0014227) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 2.783603991391117, LR: 0.0003 +[2026-02-28 16:43:55] (step=0014228) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 2.783799647818431, LR: 0.0003 +[2026-02-28 16:44:03] (step=0014229) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 2.7839953042457446, LR: 0.0003 +[2026-02-28 16:44:11] (step=0014230) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.784190960673058, LR: 0.0003 +[2026-02-28 16:44:19] (step=0014231) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.7843866171003717, LR: 0.0003 +[2026-02-28 16:44:27] (step=0014232) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.7845822735276853, LR: 0.0003 +[2026-02-28 16:44:35] (step=0014233) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 2.784777929954999, LR: 0.0003 +[2026-02-28 16:44:43] (step=0014234) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 2.784973586382313, LR: 0.0003 +[2026-02-28 16:44:50] (step=0014235) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.7851692428096264, LR: 0.0003 +[2026-02-28 16:44:58] (step=0014236) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.78536489923694, LR: 0.0003 +[2026-02-28 16:45:06] (step=0014237) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 2.7855605556642535, LR: 0.0003 +[2026-02-28 16:45:14] (step=0014238) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.785756212091567, LR: 0.0003 +[2026-02-28 16:45:22] (step=0014239) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.7859518685188807, LR: 0.0003 +[2026-02-28 16:45:30] (step=0014240) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 2.7861475249461947, LR: 0.0003 +[2026-02-28 16:45:37] (step=0014241) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 2.7863431813735082, LR: 0.0003 +[2026-02-28 16:45:45] (step=0014242) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.786538837800822, LR: 0.0003 +[2026-02-28 16:45:53] (step=0014243) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.7867344942281354, LR: 0.0003 +[2026-02-28 16:46:01] (step=0014244) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.786930150655449, LR: 0.0003 +[2026-02-28 16:46:09] (step=0014245) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.7871258070827625, LR: 0.0003 +[2026-02-28 16:46:17] (step=0014246) Train Loss: 0.4662, Train Steps/Sec: 0.13, Epoch: 2.7873214635100765, LR: 0.0003 +[2026-02-28 16:46:24] (step=0014247) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 2.78751711993739, LR: 0.0003 +[2026-02-28 16:46:32] (step=0014248) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.7877127763647036, LR: 0.0003 +[2026-02-28 16:46:40] (step=0014249) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.787908432792017, LR: 0.0003 +[2026-02-28 16:46:48] (step=0014250) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 2.7881040892193307, LR: 0.0003 +[2026-02-28 16:46:56] (step=0014251) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.7882997456466443, LR: 0.0003 +[2026-02-28 16:47:04] (step=0014252) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 2.7884954020739583, LR: 0.0003 +[2026-02-28 16:47:11] (step=0014253) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.788691058501272, LR: 0.0003 +[2026-02-28 16:47:19] (step=0014254) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.7888867149285854, LR: 0.0003 +[2026-02-28 16:47:27] (step=0014255) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.789082371355899, LR: 0.0003 +[2026-02-28 16:47:35] (step=0014256) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.7892780277832125, LR: 0.0003 +[2026-02-28 16:47:43] (step=0014257) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.789473684210526, LR: 0.0003 +[2026-02-28 16:47:51] (step=0014258) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.78966934063784, LR: 0.0003 +[2026-02-28 16:47:58] (step=0014259) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.7898649970651537, LR: 0.0003 +[2026-02-28 16:48:06] (step=0014260) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.7900606534924672, LR: 0.0003 +[2026-02-28 16:48:14] (step=0014261) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.790256309919781, LR: 0.0003 +[2026-02-28 16:48:22] (step=0014262) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 2.7904519663470944, LR: 0.0003 +[2026-02-28 16:48:30] (step=0014263) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.790647622774408, LR: 0.0003 +[2026-02-28 16:48:38] (step=0014264) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.790843279201722, LR: 0.0003 +[2026-02-28 16:48:46] (step=0014265) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.7910389356290355, LR: 0.0003 +[2026-02-28 16:48:53] (step=0014266) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.791234592056349, LR: 0.0003 +[2026-02-28 16:49:01] (step=0014267) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 2.7914302484836626, LR: 0.0003 +[2026-02-28 16:49:09] (step=0014268) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 2.791625904910976, LR: 0.0003 +[2026-02-28 16:49:17] (step=0014269) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.79182156133829, LR: 0.0003 +[2026-02-28 16:49:25] (step=0014270) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.7920172177656037, LR: 0.0003 +[2026-02-28 16:49:33] (step=0014271) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.7922128741929173, LR: 0.0003 +[2026-02-28 16:49:41] (step=0014272) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 2.792408530620231, LR: 0.0003 +[2026-02-28 16:49:48] (step=0014273) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.7926041870475444, LR: 0.0003 +[2026-02-28 16:49:56] (step=0014274) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 2.792799843474858, LR: 0.0003 +[2026-02-28 16:50:04] (step=0014275) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 2.792995499902172, LR: 0.0003 +[2026-02-28 16:50:12] (step=0014276) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.7931911563294856, LR: 0.0003 +[2026-02-28 16:50:20] (step=0014277) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.793386812756799, LR: 0.0003 +[2026-02-28 16:50:28] (step=0014278) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.7935824691841127, LR: 0.0003 +[2026-02-28 16:50:35] (step=0014279) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 2.7937781256114262, LR: 0.0003 +[2026-02-28 16:50:43] (step=0014280) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.79397378203874, LR: 0.0003 +[2026-02-28 16:50:51] (step=0014281) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.794169438466054, LR: 0.0003 +[2026-02-28 16:50:59] (step=0014282) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.7943650948933674, LR: 0.0003 +[2026-02-28 16:51:07] (step=0014283) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 2.794560751320681, LR: 0.0003 +[2026-02-28 16:51:15] (step=0014284) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 2.7947564077479945, LR: 0.0003 +[2026-02-28 16:51:23] (step=0014285) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 2.794952064175308, LR: 0.0003 +[2026-02-28 16:51:30] (step=0014286) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.7951477206026216, LR: 0.0003 +[2026-02-28 16:51:38] (step=0014287) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 2.7953433770299356, LR: 0.0003 +[2026-02-28 16:51:46] (step=0014288) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 2.795539033457249, LR: 0.0003 +[2026-02-28 16:51:54] (step=0014289) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 2.7957346898845628, LR: 0.0003 +[2026-02-28 16:52:02] (step=0014290) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 2.7959303463118763, LR: 0.0003 +[2026-02-28 16:52:10] (step=0014291) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 2.79612600273919, LR: 0.0003 +[2026-02-28 16:52:17] (step=0014292) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 2.7963216591665034, LR: 0.0003 +[2026-02-28 16:52:25] (step=0014293) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.7965173155938174, LR: 0.0003 +[2026-02-28 16:52:33] (step=0014294) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.796712972021131, LR: 0.0003 +[2026-02-28 16:52:41] (step=0014295) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 2.7969086284484446, LR: 0.0003 +[2026-02-28 16:52:49] (step=0014296) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 2.797104284875758, LR: 0.0003 +[2026-02-28 16:52:57] (step=0014297) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.7972999413030717, LR: 0.0003 +[2026-02-28 16:53:04] (step=0014298) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 2.7974955977303853, LR: 0.0003 +[2026-02-28 16:53:12] (step=0014299) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.7976912541576993, LR: 0.0003 +[2026-02-28 16:53:20] (step=0014300) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.797886910585013, LR: 0.0003 +[2026-02-28 16:53:28] (step=0014301) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.7980825670123264, LR: 0.0003 +[2026-02-28 16:53:36] (step=0014302) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 2.79827822343964, LR: 0.0003 +[2026-02-28 16:53:44] (step=0014303) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.7984738798669535, LR: 0.0003 +[2026-02-28 16:53:51] (step=0014304) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.798669536294267, LR: 0.0003 +[2026-02-28 16:53:59] (step=0014305) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.798865192721581, LR: 0.0003 +[2026-02-28 16:54:07] (step=0014306) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 2.7990608491488946, LR: 0.0003 +[2026-02-28 16:54:15] (step=0014307) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 2.799256505576208, LR: 0.0003 +[2026-02-28 16:54:23] (step=0014308) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 2.7994521620035218, LR: 0.0003 +[2026-02-28 16:54:30] (step=0014309) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.7996478184308353, LR: 0.0003 +[2026-02-28 16:54:38] (step=0014310) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.799843474858149, LR: 0.0003 +[2026-02-28 16:54:46] (step=0014311) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 2.800039131285463, LR: 0.0003 +[2026-02-28 16:54:54] (step=0014312) Train Loss: 0.4476, Train Steps/Sec: 0.12, Epoch: 2.8002347877127765, LR: 0.0003 +[2026-02-28 16:55:02] (step=0014313) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.80043044414009, LR: 0.0003 +[2026-02-28 16:55:10] (step=0014314) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 2.8006261005674036, LR: 0.0003 +[2026-02-28 16:55:18] (step=0014315) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 2.800821756994717, LR: 0.0003 +[2026-02-28 16:55:26] (step=0014316) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 2.8010174134220307, LR: 0.0003 +[2026-02-28 16:55:33] (step=0014317) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 2.8012130698493447, LR: 0.0003 +[2026-02-28 16:55:41] (step=0014318) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 2.8014087262766583, LR: 0.0003 +[2026-02-28 16:55:49] (step=0014319) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.801604382703972, LR: 0.0003 +[2026-02-28 16:55:57] (step=0014320) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 2.8018000391312854, LR: 0.0003 +[2026-02-28 16:56:05] (step=0014321) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 2.801995695558599, LR: 0.0003 +[2026-02-28 16:56:12] (step=0014322) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 2.8021913519859125, LR: 0.0003 +[2026-02-28 16:56:20] (step=0014323) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.8023870084132265, LR: 0.0003 +[2026-02-28 16:56:28] (step=0014324) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.80258266484054, LR: 0.0003 +[2026-02-28 16:56:36] (step=0014325) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.8027783212678536, LR: 0.0003 +[2026-02-28 16:56:44] (step=0014326) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.802973977695167, LR: 0.0003 +[2026-02-28 16:56:52] (step=0014327) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.8031696341224808, LR: 0.0003 +[2026-02-28 16:56:59] (step=0014328) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 2.8033652905497948, LR: 0.0003 +[2026-02-28 16:57:07] (step=0014329) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 2.8035609469771083, LR: 0.0003 +[2026-02-28 16:57:15] (step=0014330) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.803756603404422, LR: 0.0003 +[2026-02-28 16:57:23] (step=0014331) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.8039522598317355, LR: 0.0003 +[2026-02-28 16:57:31] (step=0014332) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.804147916259049, LR: 0.0003 +[2026-02-28 16:57:39] (step=0014333) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.8043435726863626, LR: 0.0003 +[2026-02-28 16:57:46] (step=0014334) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.8045392291136766, LR: 0.0003 +[2026-02-28 16:57:54] (step=0014335) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 2.80473488554099, LR: 0.0003 +[2026-02-28 16:58:02] (step=0014336) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.8049305419683037, LR: 0.0003 +[2026-02-28 16:58:10] (step=0014337) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 2.8051261983956173, LR: 0.0003 +[2026-02-28 16:58:18] (step=0014338) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.805321854822931, LR: 0.0003 +[2026-02-28 16:58:26] (step=0014339) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 2.8055175112502444, LR: 0.0003 +[2026-02-28 16:58:34] (step=0014340) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 2.8057131676775584, LR: 0.0003 +[2026-02-28 16:58:41] (step=0014341) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.805908824104872, LR: 0.0003 +[2026-02-28 16:58:49] (step=0014342) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.8061044805321855, LR: 0.0003 +[2026-02-28 16:58:57] (step=0014343) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 2.806300136959499, LR: 0.0003 +[2026-02-28 16:59:05] (step=0014344) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.8064957933868127, LR: 0.0003 +[2026-02-28 16:59:13] (step=0014345) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 2.806691449814126, LR: 0.0003 +[2026-02-28 16:59:21] (step=0014346) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.80688710624144, LR: 0.0003 +[2026-02-28 16:59:28] (step=0014347) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 2.807082762668754, LR: 0.0003 +[2026-02-28 16:59:36] (step=0014348) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.8072784190960673, LR: 0.0003 +[2026-02-28 16:59:44] (step=0014349) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.807474075523381, LR: 0.0003 +[2026-02-28 16:59:52] (step=0014350) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.8076697319506945, LR: 0.0003 +[2026-02-28 17:00:00] (step=0014351) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.807865388378008, LR: 0.0003 +[2026-02-28 17:00:08] (step=0014352) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 2.808061044805322, LR: 0.0003 +[2026-02-28 17:00:15] (step=0014353) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 2.8082567012326356, LR: 0.0003 +[2026-02-28 17:00:23] (step=0014354) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.808452357659949, LR: 0.0003 +[2026-02-28 17:00:31] (step=0014355) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.8086480140872627, LR: 0.0003 +[2026-02-28 17:00:39] (step=0014356) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.8088436705145763, LR: 0.0003 +[2026-02-28 17:00:47] (step=0014357) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.80903932694189, LR: 0.0003 +[2026-02-28 17:00:55] (step=0014358) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 2.809234983369204, LR: 0.0003 +[2026-02-28 17:01:02] (step=0014359) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.8094306397965174, LR: 0.0003 +[2026-02-28 17:01:10] (step=0014360) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.809626296223831, LR: 0.0003 +[2026-02-28 17:01:18] (step=0014361) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.8098219526511445, LR: 0.0003 +[2026-02-28 17:01:26] (step=0014362) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 2.810017609078458, LR: 0.0003 +[2026-02-28 17:01:34] (step=0014363) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.8102132655057717, LR: 0.0003 +[2026-02-28 17:01:42] (step=0014364) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 2.8104089219330857, LR: 0.0003 +[2026-02-28 17:01:50] (step=0014365) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.8106045783603992, LR: 0.0003 +[2026-02-28 17:01:57] (step=0014366) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 2.810800234787713, LR: 0.0003 +[2026-02-28 17:02:05] (step=0014367) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 2.8109958912150264, LR: 0.0003 +[2026-02-28 17:02:13] (step=0014368) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 2.81119154764234, LR: 0.0003 +[2026-02-28 17:02:21] (step=0014369) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 2.8113872040696535, LR: 0.0003 +[2026-02-28 17:02:29] (step=0014370) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 2.8115828604969675, LR: 0.0003 +[2026-02-28 17:02:37] (step=0014371) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.811778516924281, LR: 0.0003 +[2026-02-28 17:02:44] (step=0014372) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.8119741733515946, LR: 0.0003 +[2026-02-28 17:02:52] (step=0014373) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.812169829778908, LR: 0.0003 +[2026-02-28 17:03:00] (step=0014374) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 2.8123654862062217, LR: 0.0003 +[2026-02-28 17:03:08] (step=0014375) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.8125611426335353, LR: 0.0003 +[2026-02-28 17:03:16] (step=0014376) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.8127567990608493, LR: 0.0003 +[2026-02-28 17:03:24] (step=0014377) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 2.812952455488163, LR: 0.0003 +[2026-02-28 17:03:31] (step=0014378) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 2.8131481119154764, LR: 0.0003 +[2026-02-28 17:03:39] (step=0014379) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.81334376834279, LR: 0.0003 +[2026-02-28 17:03:47] (step=0014380) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 2.8135394247701035, LR: 0.0003 +[2026-02-28 17:03:55] (step=0014381) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 2.8137350811974176, LR: 0.0003 +[2026-02-28 17:04:03] (step=0014382) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.813930737624731, LR: 0.0003 +[2026-02-28 17:04:11] (step=0014383) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.8141263940520447, LR: 0.0003 +[2026-02-28 17:04:19] (step=0014384) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.8143220504793582, LR: 0.0003 +[2026-02-28 17:04:26] (step=0014385) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 2.814517706906672, LR: 0.0003 +[2026-02-28 17:04:34] (step=0014386) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.8147133633339854, LR: 0.0003 +[2026-02-28 17:04:42] (step=0014387) Train Loss: 0.4693, Train Steps/Sec: 0.13, Epoch: 2.8149090197612994, LR: 0.0003 +[2026-02-28 17:04:50] (step=0014388) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 2.815104676188613, LR: 0.0003 +[2026-02-28 17:04:58] (step=0014389) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 2.8153003326159265, LR: 0.0003 +[2026-02-28 17:05:06] (step=0014390) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.81549598904324, LR: 0.0003 +[2026-02-28 17:05:13] (step=0014391) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 2.8156916454705536, LR: 0.0003 +[2026-02-28 17:05:21] (step=0014392) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 2.815887301897867, LR: 0.0003 +[2026-02-28 17:05:29] (step=0014393) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 2.816082958325181, LR: 0.0003 +[2026-02-28 17:05:37] (step=0014394) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 2.8162786147524947, LR: 0.0003 +[2026-02-28 17:05:45] (step=0014395) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 2.8164742711798083, LR: 0.0003 +[2026-02-28 17:05:53] (step=0014396) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 2.816669927607122, LR: 0.0003 +[2026-02-28 17:06:00] (step=0014397) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.8168655840344354, LR: 0.0003 +[2026-02-28 17:06:08] (step=0014398) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 2.817061240461749, LR: 0.0003 +[2026-02-28 17:06:16] (step=0014399) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.817256896889063, LR: 0.0003 +[2026-02-28 17:06:24] (step=0014400) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.8174525533163766, LR: 0.0003 +[2026-02-28 17:06:32] (step=0014401) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.81764820974369, LR: 0.0003 +[2026-02-28 17:06:40] (step=0014402) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 2.8178438661710037, LR: 0.0003 +[2026-02-28 17:06:47] (step=0014403) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.8180395225983172, LR: 0.0003 +[2026-02-28 17:06:55] (step=0014404) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 2.818235179025631, LR: 0.0003 +[2026-02-28 17:07:03] (step=0014405) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 2.818430835452945, LR: 0.0003 +[2026-02-28 17:07:11] (step=0014406) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 2.8186264918802584, LR: 0.0003 +[2026-02-28 17:07:19] (step=0014407) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.818822148307572, LR: 0.0003 +[2026-02-28 17:07:27] (step=0014408) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 2.8190178047348855, LR: 0.0003 +[2026-02-28 17:07:35] (step=0014409) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.819213461162199, LR: 0.0003 +[2026-02-28 17:07:42] (step=0014410) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 2.8194091175895126, LR: 0.0003 +[2026-02-28 17:07:50] (step=0014411) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.8196047740168266, LR: 0.0003 +[2026-02-28 17:07:58] (step=0014412) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 2.81980043044414, LR: 0.0003 +[2026-02-28 17:08:06] (step=0014413) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.8199960868714538, LR: 0.0003 +[2026-02-28 17:08:14] (step=0014414) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 2.8201917432987673, LR: 0.0003 +[2026-02-28 17:08:22] (step=0014415) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.820387399726081, LR: 0.0003 +[2026-02-28 17:08:29] (step=0014416) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.8205830561533944, LR: 0.0003 +[2026-02-28 17:08:37] (step=0014417) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.8207787125807084, LR: 0.0003 +[2026-02-28 17:08:45] (step=0014418) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 2.820974369008022, LR: 0.0003 +[2026-02-28 17:08:53] (step=0014419) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 2.8211700254353356, LR: 0.0003 +[2026-02-28 17:09:01] (step=0014420) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.821365681862649, LR: 0.0003 +[2026-02-28 17:09:09] (step=0014421) Train Loss: 0.4715, Train Steps/Sec: 0.13, Epoch: 2.8215613382899627, LR: 0.0003 +[2026-02-28 17:09:16] (step=0014422) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.8217569947172763, LR: 0.0003 +[2026-02-28 17:09:24] (step=0014423) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 2.8219526511445903, LR: 0.0003 +[2026-02-28 17:09:32] (step=0014424) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.822148307571904, LR: 0.0003 +[2026-02-28 17:09:40] (step=0014425) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 2.8223439639992174, LR: 0.0003 +[2026-02-28 17:09:48] (step=0014426) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 2.822539620426531, LR: 0.0003 +[2026-02-28 17:09:56] (step=0014427) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.8227352768538445, LR: 0.0003 +[2026-02-28 17:10:03] (step=0014428) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 2.822930933281158, LR: 0.0003 +[2026-02-28 17:10:11] (step=0014429) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.823126589708472, LR: 0.0003 +[2026-02-28 17:10:19] (step=0014430) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 2.8233222461357856, LR: 0.0003 +[2026-02-28 17:10:27] (step=0014431) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 2.823517902563099, LR: 0.0003 +[2026-02-28 17:10:35] (step=0014432) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.8237135589904128, LR: 0.0003 +[2026-02-28 17:10:43] (step=0014433) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 2.8239092154177263, LR: 0.0003 +[2026-02-28 17:10:51] (step=0014434) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 2.82410487184504, LR: 0.0003 +[2026-02-28 17:10:58] (step=0014435) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.824300528272354, LR: 0.0003 +[2026-02-28 17:11:06] (step=0014436) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 2.8244961846996675, LR: 0.0003 +[2026-02-28 17:11:14] (step=0014437) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.824691841126981, LR: 0.0003 +[2026-02-28 17:11:22] (step=0014438) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.8248874975542946, LR: 0.0003 +[2026-02-28 17:11:30] (step=0014439) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 2.825083153981608, LR: 0.0003 +[2026-02-28 17:11:38] (step=0014440) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.825278810408922, LR: 0.0003 +[2026-02-28 17:11:45] (step=0014441) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.8254744668362357, LR: 0.0003 +[2026-02-28 17:11:53] (step=0014442) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.8256701232635493, LR: 0.0003 +[2026-02-28 17:12:01] (step=0014443) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 2.825865779690863, LR: 0.0003 +[2026-02-28 17:12:09] (step=0014444) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 2.8260614361181764, LR: 0.0003 +[2026-02-28 17:12:17] (step=0014445) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.82625709254549, LR: 0.0003 +[2026-02-28 17:12:25] (step=0014446) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.826452748972804, LR: 0.0003 +[2026-02-28 17:12:32] (step=0014447) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 2.8266484054001175, LR: 0.0003 +[2026-02-28 17:12:40] (step=0014448) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 2.826844061827431, LR: 0.0003 +[2026-02-28 17:12:48] (step=0014449) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.8270397182547446, LR: 0.0003 +[2026-02-28 17:12:56] (step=0014450) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.827235374682058, LR: 0.0003 +[2026-02-28 17:13:04] (step=0014451) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 2.8274310311093718, LR: 0.0003 +[2026-02-28 17:13:12] (step=0014452) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 2.827626687536686, LR: 0.0003 +[2026-02-28 17:13:19] (step=0014453) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 2.8278223439639993, LR: 0.0003 +[2026-02-28 17:13:27] (step=0014454) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.828018000391313, LR: 0.0003 +[2026-02-28 17:13:35] (step=0014455) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.8282136568186265, LR: 0.0003 +[2026-02-28 17:13:43] (step=0014456) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.82840931324594, LR: 0.0003 +[2026-02-28 17:13:51] (step=0014457) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 2.8286049696732536, LR: 0.0003 +[2026-02-28 17:13:59] (step=0014458) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.8288006261005676, LR: 0.0003 +[2026-02-28 17:14:07] (step=0014459) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 2.828996282527881, LR: 0.0003 +[2026-02-28 17:14:14] (step=0014460) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.8291919389551947, LR: 0.0003 +[2026-02-28 17:14:22] (step=0014461) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 2.8293875953825083, LR: 0.0003 +[2026-02-28 17:14:30] (step=0014462) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.829583251809822, LR: 0.0003 +[2026-02-28 17:14:38] (step=0014463) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.8297789082371354, LR: 0.0003 +[2026-02-28 17:14:46] (step=0014464) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.8299745646644494, LR: 0.0003 +[2026-02-28 17:14:54] (step=0014465) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 2.830170221091763, LR: 0.0003 +[2026-02-28 17:15:01] (step=0014466) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.8303658775190765, LR: 0.0003 +[2026-02-28 17:15:09] (step=0014467) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.83056153394639, LR: 0.0003 +[2026-02-28 17:15:17] (step=0014468) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 2.8307571903737037, LR: 0.0003 +[2026-02-28 17:15:25] (step=0014469) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 2.830952846801017, LR: 0.0003 +[2026-02-28 17:15:33] (step=0014470) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 2.8311485032283312, LR: 0.0003 +[2026-02-28 17:15:41] (step=0014471) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.831344159655645, LR: 0.0003 +[2026-02-28 17:15:49] (step=0014472) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.8315398160829583, LR: 0.0003 +[2026-02-28 17:15:56] (step=0014473) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.831735472510272, LR: 0.0003 +[2026-02-28 17:16:04] (step=0014474) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 2.8319311289375855, LR: 0.0003 +[2026-02-28 17:16:12] (step=0014475) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.832126785364899, LR: 0.0003 +[2026-02-28 17:16:20] (step=0014476) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 2.832322441792213, LR: 0.0003 +[2026-02-28 17:16:28] (step=0014477) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 2.8325180982195266, LR: 0.0003 +[2026-02-28 17:16:36] (step=0014478) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 2.83271375464684, LR: 0.0003 +[2026-02-28 17:16:43] (step=0014479) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 2.8329094110741537, LR: 0.0003 +[2026-02-28 17:16:51] (step=0014480) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 2.8331050675014673, LR: 0.0003 +[2026-02-28 17:16:59] (step=0014481) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 2.833300723928781, LR: 0.0003 +[2026-02-28 17:17:07] (step=0014482) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.833496380356095, LR: 0.0003 +[2026-02-28 17:17:15] (step=0014483) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.8336920367834084, LR: 0.0003 +[2026-02-28 17:17:23] (step=0014484) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.833887693210722, LR: 0.0003 +[2026-02-28 17:17:31] (step=0014485) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.8340833496380355, LR: 0.0003 +[2026-02-28 17:17:38] (step=0014486) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 2.834279006065349, LR: 0.0003 +[2026-02-28 17:17:46] (step=0014487) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.8344746624926627, LR: 0.0003 +[2026-02-28 17:17:54] (step=0014488) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 2.8346703189199767, LR: 0.0003 +[2026-02-28 17:18:02] (step=0014489) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.8348659753472902, LR: 0.0003 +[2026-02-28 17:18:10] (step=0014490) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 2.835061631774604, LR: 0.0003 +[2026-02-28 17:18:18] (step=0014491) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.8352572882019174, LR: 0.0003 +[2026-02-28 17:18:25] (step=0014492) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 2.835452944629231, LR: 0.0003 +[2026-02-28 17:18:33] (step=0014493) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 2.835648601056545, LR: 0.0003 +[2026-02-28 17:18:41] (step=0014494) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.8358442574838585, LR: 0.0003 +[2026-02-28 17:18:49] (step=0014495) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.836039913911172, LR: 0.0003 +[2026-02-28 17:18:57] (step=0014496) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 2.8362355703384856, LR: 0.0003 +[2026-02-28 17:19:05] (step=0014497) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.836431226765799, LR: 0.0003 +[2026-02-28 17:19:13] (step=0014498) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 2.8366268831931127, LR: 0.0003 +[2026-02-28 17:19:20] (step=0014499) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.8368225396204267, LR: 0.0003 +[2026-02-28 17:19:28] (step=0014500) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.8370181960477403, LR: 0.0003 +[2026-02-28 17:19:28] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0014500/ +[2026-02-28 17:19:36] (step=0014501) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.837213852475054, LR: 0.0003 +[2026-02-28 17:19:44] (step=0014502) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 2.8374095089023674, LR: 0.0003 +[2026-02-28 17:19:52] (step=0014503) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.837605165329681, LR: 0.0003 +[2026-02-28 17:20:00] (step=0014504) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.8378008217569946, LR: 0.0003 +[2026-02-28 17:20:08] (step=0014505) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.8379964781843086, LR: 0.0003 +[2026-02-28 17:20:15] (step=0014506) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 2.838192134611622, LR: 0.0003 +[2026-02-28 17:20:23] (step=0014507) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.8383877910389357, LR: 0.0003 +[2026-02-28 17:20:31] (step=0014508) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 2.8385834474662492, LR: 0.0003 +[2026-02-28 17:20:39] (step=0014509) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.838779103893563, LR: 0.0003 +[2026-02-28 17:20:47] (step=0014510) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 2.8389747603208764, LR: 0.0003 +[2026-02-28 17:20:55] (step=0014511) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 2.8391704167481904, LR: 0.0003 +[2026-02-28 17:21:03] (step=0014512) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.839366073175504, LR: 0.0003 +[2026-02-28 17:21:10] (step=0014513) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.8395617296028175, LR: 0.0003 +[2026-02-28 17:21:18] (step=0014514) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.839757386030131, LR: 0.0003 +[2026-02-28 17:21:26] (step=0014515) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.8399530424574446, LR: 0.0003 +[2026-02-28 17:21:34] (step=0014516) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 2.840148698884758, LR: 0.0003 +[2026-02-28 17:21:42] (step=0014517) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.840344355312072, LR: 0.0003 +[2026-02-28 17:21:50] (step=0014518) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.8405400117393858, LR: 0.0003 +[2026-02-28 17:21:58] (step=0014519) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.8407356681666993, LR: 0.0003 +[2026-02-28 17:22:05] (step=0014520) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.840931324594013, LR: 0.0003 +[2026-02-28 17:22:13] (step=0014521) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 2.8411269810213264, LR: 0.0003 +[2026-02-28 17:22:21] (step=0014522) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 2.84132263744864, LR: 0.0003 +[2026-02-28 17:22:29] (step=0014523) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.841518293875954, LR: 0.0003 +[2026-02-28 17:22:37] (step=0014524) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.8417139503032676, LR: 0.0003 +[2026-02-28 17:22:45] (step=0014525) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.841909606730581, LR: 0.0003 +[2026-02-28 17:22:52] (step=0014526) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.8421052631578947, LR: 0.0003 +[2026-02-28 17:23:00] (step=0014527) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 2.8423009195852083, LR: 0.0003 +[2026-02-28 17:23:08] (step=0014528) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 2.842496576012522, LR: 0.0003 +[2026-02-28 17:23:16] (step=0014529) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 2.842692232439836, LR: 0.0003 +[2026-02-28 17:23:24] (step=0014530) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.8428878888671494, LR: 0.0003 +[2026-02-28 17:23:32] (step=0014531) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.843083545294463, LR: 0.0003 +[2026-02-28 17:23:40] (step=0014532) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 2.8432792017217765, LR: 0.0003 +[2026-02-28 17:23:48] (step=0014533) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.84347485814909, LR: 0.0003 +[2026-02-28 17:23:55] (step=0014534) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 2.8436705145764036, LR: 0.0003 +[2026-02-28 17:24:03] (step=0014535) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.8438661710037176, LR: 0.0003 +[2026-02-28 17:24:11] (step=0014536) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 2.844061827431031, LR: 0.0003 +[2026-02-28 17:24:19] (step=0014537) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.8442574838583448, LR: 0.0003 +[2026-02-28 17:24:27] (step=0014538) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 2.8444531402856583, LR: 0.0003 +[2026-02-28 17:24:35] (step=0014539) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.844648796712972, LR: 0.0003 +[2026-02-28 17:24:42] (step=0014540) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.8448444531402854, LR: 0.0003 +[2026-02-28 17:24:50] (step=0014541) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.8450401095675995, LR: 0.0003 +[2026-02-28 17:24:58] (step=0014542) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.845235765994913, LR: 0.0003 +[2026-02-28 17:25:06] (step=0014543) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 2.8454314224222266, LR: 0.0003 +[2026-02-28 17:25:14] (step=0014544) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 2.84562707884954, LR: 0.0003 +[2026-02-28 17:25:22] (step=0014545) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 2.8458227352768537, LR: 0.0003 +[2026-02-28 17:25:30] (step=0014546) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.8460183917041673, LR: 0.0003 +[2026-02-28 17:25:37] (step=0014547) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 2.8462140481314813, LR: 0.0003 +[2026-02-28 17:25:45] (step=0014548) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.846409704558795, LR: 0.0003 +[2026-02-28 17:25:53] (step=0014549) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.8466053609861084, LR: 0.0003 +[2026-02-28 17:26:01] (step=0014550) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 2.846801017413422, LR: 0.0003 +[2026-02-28 17:26:09] (step=0014551) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 2.8469966738407355, LR: 0.0003 +[2026-02-28 17:26:17] (step=0014552) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.8471923302680495, LR: 0.0003 +[2026-02-28 17:26:25] (step=0014553) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.847387986695363, LR: 0.0003 +[2026-02-28 17:26:32] (step=0014554) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.8475836431226766, LR: 0.0003 +[2026-02-28 17:26:40] (step=0014555) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 2.84777929954999, LR: 0.0003 +[2026-02-28 17:26:48] (step=0014556) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 2.8479749559773038, LR: 0.0003 +[2026-02-28 17:26:56] (step=0014557) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 2.8481706124046173, LR: 0.0003 +[2026-02-28 17:27:04] (step=0014558) Train Loss: 0.4752, Train Steps/Sec: 0.13, Epoch: 2.8483662688319313, LR: 0.0003 +[2026-02-28 17:27:12] (step=0014559) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.848561925259245, LR: 0.0003 +[2026-02-28 17:27:20] (step=0014560) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.8487575816865585, LR: 0.0003 +[2026-02-28 17:27:27] (step=0014561) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.848953238113872, LR: 0.0003 +[2026-02-28 17:27:35] (step=0014562) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 2.8491488945411856, LR: 0.0003 +[2026-02-28 17:27:43] (step=0014563) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.849344550968499, LR: 0.0003 +[2026-02-28 17:27:51] (step=0014564) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 2.849540207395813, LR: 0.0003 +[2026-02-28 17:27:59] (step=0014565) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 2.8497358638231267, LR: 0.0003 +[2026-02-28 17:28:07] (step=0014566) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.8499315202504403, LR: 0.0003 +[2026-02-28 17:28:14] (step=0014567) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 2.850127176677754, LR: 0.0003 +[2026-02-28 17:28:22] (step=0014568) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 2.8503228331050674, LR: 0.0003 +[2026-02-28 17:28:30] (step=0014569) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 2.850518489532381, LR: 0.0003 +[2026-02-28 17:28:38] (step=0014570) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.850714145959695, LR: 0.0003 +[2026-02-28 17:28:46] (step=0014571) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 2.8509098023870085, LR: 0.0003 +[2026-02-28 17:28:54] (step=0014572) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 2.851105458814322, LR: 0.0003 +[2026-02-28 17:29:02] (step=0014573) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.8513011152416357, LR: 0.0003 +[2026-02-28 17:29:09] (step=0014574) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.851496771668949, LR: 0.0003 +[2026-02-28 17:29:17] (step=0014575) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.8516924280962628, LR: 0.0003 +[2026-02-28 17:29:25] (step=0014576) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.851888084523577, LR: 0.0003 +[2026-02-28 17:29:33] (step=0014577) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.8520837409508903, LR: 0.0003 +[2026-02-28 17:29:41] (step=0014578) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.852279397378204, LR: 0.0003 +[2026-02-28 17:29:49] (step=0014579) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 2.8524750538055175, LR: 0.0003 +[2026-02-28 17:29:57] (step=0014580) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.852670710232831, LR: 0.0003 +[2026-02-28 17:30:04] (step=0014581) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 2.8528663666601446, LR: 0.0003 +[2026-02-28 17:30:12] (step=0014582) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.8530620230874586, LR: 0.0003 +[2026-02-28 17:30:20] (step=0014583) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 2.853257679514772, LR: 0.0003 +[2026-02-28 17:30:28] (step=0014584) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.8534533359420857, LR: 0.0003 +[2026-02-28 17:30:36] (step=0014585) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.8536489923693993, LR: 0.0003 +[2026-02-28 17:30:44] (step=0014586) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.853844648796713, LR: 0.0003 +[2026-02-28 17:30:52] (step=0014587) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 2.8540403052240264, LR: 0.0003 +[2026-02-28 17:30:59] (step=0014588) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.8542359616513404, LR: 0.0003 +[2026-02-28 17:31:07] (step=0014589) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 2.854431618078654, LR: 0.0003 +[2026-02-28 17:31:15] (step=0014590) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.8546272745059675, LR: 0.0003 +[2026-02-28 17:31:23] (step=0014591) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.854822930933281, LR: 0.0003 +[2026-02-28 17:31:31] (step=0014592) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.8550185873605947, LR: 0.0003 +[2026-02-28 17:31:39] (step=0014593) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.8552142437879082, LR: 0.0003 +[2026-02-28 17:31:46] (step=0014594) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 2.8554099002152222, LR: 0.0003 +[2026-02-28 17:31:54] (step=0014595) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.855605556642536, LR: 0.0003 +[2026-02-28 17:32:02] (step=0014596) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 2.8558012130698494, LR: 0.0003 +[2026-02-28 17:32:10] (step=0014597) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 2.855996869497163, LR: 0.0003 +[2026-02-28 17:32:18] (step=0014598) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.8561925259244765, LR: 0.0003 +[2026-02-28 17:32:26] (step=0014599) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.85638818235179, LR: 0.0003 +[2026-02-28 17:32:34] (step=0014600) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 2.856583838779104, LR: 0.0003 +[2026-02-28 17:32:41] (step=0014601) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.8567794952064176, LR: 0.0003 +[2026-02-28 17:32:49] (step=0014602) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.856975151633731, LR: 0.0003 +[2026-02-28 17:32:57] (step=0014603) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 2.8571708080610447, LR: 0.0003 +[2026-02-28 17:33:05] (step=0014604) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.8573664644883583, LR: 0.0003 +[2026-02-28 17:33:13] (step=0014605) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.8575621209156723, LR: 0.0003 +[2026-02-28 17:33:21] (step=0014606) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.857757777342986, LR: 0.0003 +[2026-02-28 17:33:29] (step=0014607) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 2.8579534337702994, LR: 0.0003 +[2026-02-28 17:33:36] (step=0014608) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.858149090197613, LR: 0.0003 +[2026-02-28 17:33:44] (step=0014609) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 2.8583447466249265, LR: 0.0003 +[2026-02-28 17:33:52] (step=0014610) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.85854040305224, LR: 0.0003 +[2026-02-28 17:34:00] (step=0014611) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.858736059479554, LR: 0.0003 +[2026-02-28 17:34:08] (step=0014612) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.8589317159068677, LR: 0.0003 +[2026-02-28 17:34:16] (step=0014613) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.8591273723341812, LR: 0.0003 +[2026-02-28 17:34:24] (step=0014614) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 2.859323028761495, LR: 0.0003 +[2026-02-28 17:34:31] (step=0014615) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 2.8595186851888084, LR: 0.0003 +[2026-02-28 17:34:39] (step=0014616) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.859714341616122, LR: 0.0003 +[2026-02-28 17:34:47] (step=0014617) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.859909998043436, LR: 0.0003 +[2026-02-28 17:34:55] (step=0014618) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.8601056544707495, LR: 0.0003 +[2026-02-28 17:35:03] (step=0014619) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 2.860301310898063, LR: 0.0003 +[2026-02-28 17:35:11] (step=0014620) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.8604969673253766, LR: 0.0003 +[2026-02-28 17:35:18] (step=0014621) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 2.86069262375269, LR: 0.0003 +[2026-02-28 17:35:26] (step=0014622) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.8608882801800037, LR: 0.0003 +[2026-02-28 17:35:34] (step=0014623) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.8610839366073177, LR: 0.0003 +[2026-02-28 17:35:42] (step=0014624) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 2.8612795930346313, LR: 0.0003 +[2026-02-28 17:35:50] (step=0014625) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 2.861475249461945, LR: 0.0003 +[2026-02-28 17:35:58] (step=0014626) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.8616709058892584, LR: 0.0003 +[2026-02-28 17:36:05] (step=0014627) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 2.861866562316572, LR: 0.0003 +[2026-02-28 17:36:13] (step=0014628) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 2.8620622187438856, LR: 0.0003 +[2026-02-28 17:36:21] (step=0014629) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 2.8622578751711996, LR: 0.0003 +[2026-02-28 17:36:29] (step=0014630) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.862453531598513, LR: 0.0003 +[2026-02-28 17:36:37] (step=0014631) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.8626491880258267, LR: 0.0003 +[2026-02-28 17:36:45] (step=0014632) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.8628448444531402, LR: 0.0003 +[2026-02-28 17:36:53] (step=0014633) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 2.863040500880454, LR: 0.0003 +[2026-02-28 17:37:01] (step=0014634) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 2.8632361573077674, LR: 0.0003 +[2026-02-28 17:37:08] (step=0014635) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 2.8634318137350814, LR: 0.0003 +[2026-02-28 17:37:16] (step=0014636) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 2.863627470162395, LR: 0.0003 +[2026-02-28 17:37:24] (step=0014637) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.8638231265897085, LR: 0.0003 +[2026-02-28 17:37:32] (step=0014638) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 2.864018783017022, LR: 0.0003 +[2026-02-28 17:37:40] (step=0014639) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 2.8642144394443356, LR: 0.0003 +[2026-02-28 17:37:48] (step=0014640) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 2.864410095871649, LR: 0.0003 +[2026-02-28 17:37:55] (step=0014641) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 2.864605752298963, LR: 0.0003 +[2026-02-28 17:38:03] (step=0014642) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 2.8648014087262768, LR: 0.0003 +[2026-02-28 17:38:11] (step=0014643) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 2.8649970651535903, LR: 0.0003 +[2026-02-28 17:38:19] (step=0014644) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.865192721580904, LR: 0.0003 +[2026-02-28 17:38:27] (step=0014645) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 2.8653883780082174, LR: 0.0003 +[2026-02-28 17:38:35] (step=0014646) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.865584034435531, LR: 0.0003 +[2026-02-28 17:38:43] (step=0014647) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.865779690862845, LR: 0.0003 +[2026-02-28 17:38:50] (step=0014648) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.8659753472901586, LR: 0.0003 +[2026-02-28 17:38:58] (step=0014649) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.866171003717472, LR: 0.0003 +[2026-02-28 17:39:06] (step=0014650) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 2.8663666601447857, LR: 0.0003 +[2026-02-28 17:39:14] (step=0014651) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 2.8665623165720993, LR: 0.0003 +[2026-02-28 17:39:22] (step=0014652) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.866757972999413, LR: 0.0003 +[2026-02-28 17:39:30] (step=0014653) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 2.866953629426727, LR: 0.0003 +[2026-02-28 17:39:38] (step=0014654) Train Loss: 0.4584, Train Steps/Sec: 0.12, Epoch: 2.8671492858540404, LR: 0.0003 +[2026-02-28 17:39:45] (step=0014655) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.867344942281354, LR: 0.0003 +[2026-02-28 17:39:53] (step=0014656) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.8675405987086675, LR: 0.0003 +[2026-02-28 17:40:01] (step=0014657) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.867736255135981, LR: 0.0003 +[2026-02-28 17:40:09] (step=0014658) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 2.8679319115632946, LR: 0.0003 +[2026-02-28 17:40:17] (step=0014659) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 2.8681275679906086, LR: 0.0003 +[2026-02-28 17:40:25] (step=0014660) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 2.868323224417922, LR: 0.0003 +[2026-02-28 17:40:32] (step=0014661) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 2.8685188808452358, LR: 0.0003 +[2026-02-28 17:40:40] (step=0014662) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.8687145372725493, LR: 0.0003 +[2026-02-28 17:40:48] (step=0014663) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 2.868910193699863, LR: 0.0003 +[2026-02-28 17:40:56] (step=0014664) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 2.869105850127177, LR: 0.0003 +[2026-02-28 17:41:04] (step=0014665) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.8693015065544905, LR: 0.0003 +[2026-02-28 17:41:12] (step=0014666) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.869497162981804, LR: 0.0003 +[2026-02-28 17:41:19] (step=0014667) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.8696928194091176, LR: 0.0003 +[2026-02-28 17:41:27] (step=0014668) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.869888475836431, LR: 0.0003 +[2026-02-28 17:41:35] (step=0014669) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.8700841322637447, LR: 0.0003 +[2026-02-28 17:41:43] (step=0014670) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 2.8702797886910587, LR: 0.0003 +[2026-02-28 17:41:51] (step=0014671) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.8704754451183723, LR: 0.0003 +[2026-02-28 17:41:59] (step=0014672) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 2.870671101545686, LR: 0.0003 +[2026-02-28 17:42:07] (step=0014673) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 2.8708667579729994, LR: 0.0003 +[2026-02-28 17:42:14] (step=0014674) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.871062414400313, LR: 0.0003 +[2026-02-28 17:42:22] (step=0014675) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 2.8712580708276265, LR: 0.0003 +[2026-02-28 17:42:30] (step=0014676) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 2.8714537272549405, LR: 0.0003 +[2026-02-28 17:42:38] (step=0014677) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.871649383682254, LR: 0.0003 +[2026-02-28 17:42:46] (step=0014678) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.8718450401095676, LR: 0.0003 +[2026-02-28 17:42:54] (step=0014679) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 2.872040696536881, LR: 0.0003 +[2026-02-28 17:43:02] (step=0014680) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.8722363529641948, LR: 0.0003 +[2026-02-28 17:43:09] (step=0014681) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.8724320093915083, LR: 0.0003 +[2026-02-28 17:43:17] (step=0014682) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 2.8726276658188223, LR: 0.0003 +[2026-02-28 17:43:25] (step=0014683) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.872823322246136, LR: 0.0003 +[2026-02-28 17:43:33] (step=0014684) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.8730189786734495, LR: 0.0003 +[2026-02-28 17:43:41] (step=0014685) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.873214635100763, LR: 0.0003 +[2026-02-28 17:43:49] (step=0014686) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.8734102915280766, LR: 0.0003 +[2026-02-28 17:43:57] (step=0014687) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 2.87360594795539, LR: 0.0003 +[2026-02-28 17:44:04] (step=0014688) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 2.873801604382704, LR: 0.0003 +[2026-02-28 17:44:12] (step=0014689) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 2.8739972608100177, LR: 0.0003 +[2026-02-28 17:44:20] (step=0014690) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 2.8741929172373313, LR: 0.0003 +[2026-02-28 17:44:28] (step=0014691) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 2.874388573664645, LR: 0.0003 +[2026-02-28 17:44:36] (step=0014692) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.8745842300919584, LR: 0.0003 +[2026-02-28 17:44:44] (step=0014693) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 2.874779886519272, LR: 0.0003 +[2026-02-28 17:44:51] (step=0014694) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.874975542946586, LR: 0.0003 +[2026-02-28 17:44:59] (step=0014695) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 2.8751711993738995, LR: 0.0003 +[2026-02-28 17:45:07] (step=0014696) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 2.875366855801213, LR: 0.0003 +[2026-02-28 17:45:15] (step=0014697) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.8755625122285267, LR: 0.0003 +[2026-02-28 17:45:23] (step=0014698) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 2.87575816865584, LR: 0.0003 +[2026-02-28 17:45:31] (step=0014699) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 2.875953825083154, LR: 0.0003 +[2026-02-28 17:45:38] (step=0014700) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.876149481510468, LR: 0.0003 +[2026-02-28 17:45:46] (step=0014701) Train Loss: 0.4641, Train Steps/Sec: 0.12, Epoch: 2.8763451379377813, LR: 0.0003 +[2026-02-28 17:45:54] (step=0014702) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.876540794365095, LR: 0.0003 +[2026-02-28 17:46:02] (step=0014703) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 2.8767364507924085, LR: 0.0003 +[2026-02-28 17:46:10] (step=0014704) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 2.876932107219722, LR: 0.0003 +[2026-02-28 17:46:18] (step=0014705) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.8771277636470356, LR: 0.0003 +[2026-02-28 17:46:26] (step=0014706) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.8773234200743496, LR: 0.0003 +[2026-02-28 17:46:34] (step=0014707) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 2.877519076501663, LR: 0.0003 +[2026-02-28 17:46:41] (step=0014708) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 2.8777147329289767, LR: 0.0003 +[2026-02-28 17:46:49] (step=0014709) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.8779103893562903, LR: 0.0003 +[2026-02-28 17:46:57] (step=0014710) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.878106045783604, LR: 0.0003 +[2026-02-28 17:47:05] (step=0014711) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.8783017022109174, LR: 0.0003 +[2026-02-28 17:47:13] (step=0014712) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.8784973586382314, LR: 0.0003 +[2026-02-28 17:47:21] (step=0014713) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.878693015065545, LR: 0.0003 +[2026-02-28 17:47:28] (step=0014714) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.8788886714928585, LR: 0.0003 +[2026-02-28 17:47:36] (step=0014715) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.879084327920172, LR: 0.0003 +[2026-02-28 17:47:44] (step=0014716) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 2.8792799843474857, LR: 0.0003 +[2026-02-28 17:47:52] (step=0014717) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.8794756407747997, LR: 0.0003 +[2026-02-28 17:48:00] (step=0014718) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 2.8796712972021132, LR: 0.0003 +[2026-02-28 17:48:08] (step=0014719) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.879866953629427, LR: 0.0003 +[2026-02-28 17:48:15] (step=0014720) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.8800626100567404, LR: 0.0003 +[2026-02-28 17:48:23] (step=0014721) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 2.880258266484054, LR: 0.0003 +[2026-02-28 17:48:31] (step=0014722) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 2.8804539229113675, LR: 0.0003 +[2026-02-28 17:48:39] (step=0014723) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.8806495793386815, LR: 0.0003 +[2026-02-28 17:48:47] (step=0014724) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 2.880845235765995, LR: 0.0003 +[2026-02-28 17:48:55] (step=0014725) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.8810408921933086, LR: 0.0003 +[2026-02-28 17:49:02] (step=0014726) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 2.881236548620622, LR: 0.0003 +[2026-02-28 17:49:10] (step=0014727) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 2.8814322050479357, LR: 0.0003 +[2026-02-28 17:49:18] (step=0014728) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 2.8816278614752493, LR: 0.0003 +[2026-02-28 17:49:26] (step=0014729) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.8818235179025633, LR: 0.0003 +[2026-02-28 17:49:34] (step=0014730) Train Loss: 0.4646, Train Steps/Sec: 0.12, Epoch: 2.882019174329877, LR: 0.0003 +[2026-02-28 17:49:42] (step=0014731) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 2.8822148307571904, LR: 0.0003 +[2026-02-28 17:49:50] (step=0014732) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.882410487184504, LR: 0.0003 +[2026-02-28 17:49:58] (step=0014733) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.8826061436118176, LR: 0.0003 +[2026-02-28 17:50:05] (step=0014734) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 2.882801800039131, LR: 0.0003 +[2026-02-28 17:50:13] (step=0014735) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.882997456466445, LR: 0.0003 +[2026-02-28 17:50:21] (step=0014736) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.8831931128937587, LR: 0.0003 +[2026-02-28 17:50:29] (step=0014737) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.8833887693210722, LR: 0.0003 +[2026-02-28 17:50:37] (step=0014738) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 2.883584425748386, LR: 0.0003 +[2026-02-28 17:50:45] (step=0014739) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 2.8837800821756994, LR: 0.0003 +[2026-02-28 17:50:52] (step=0014740) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.883975738603013, LR: 0.0003 +[2026-02-28 17:51:00] (step=0014741) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.884171395030327, LR: 0.0003 +[2026-02-28 17:51:08] (step=0014742) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.8843670514576405, LR: 0.0003 +[2026-02-28 17:51:16] (step=0014743) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 2.884562707884954, LR: 0.0003 +[2026-02-28 17:51:24] (step=0014744) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 2.8847583643122676, LR: 0.0003 +[2026-02-28 17:51:32] (step=0014745) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 2.884954020739581, LR: 0.0003 +[2026-02-28 17:51:40] (step=0014746) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.8851496771668947, LR: 0.0003 +[2026-02-28 17:51:47] (step=0014747) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 2.8853453335942087, LR: 0.0003 +[2026-02-28 17:51:55] (step=0014748) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.8855409900215223, LR: 0.0003 +[2026-02-28 17:52:03] (step=0014749) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.885736646448836, LR: 0.0003 +[2026-02-28 17:52:11] (step=0014750) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 2.8859323028761494, LR: 0.0003 +[2026-02-28 17:52:19] (step=0014751) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 2.886127959303463, LR: 0.0003 +[2026-02-28 17:52:27] (step=0014752) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 2.8863236157307766, LR: 0.0003 +[2026-02-28 17:52:35] (step=0014753) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 2.8865192721580906, LR: 0.0003 +[2026-02-28 17:52:42] (step=0014754) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.886714928585404, LR: 0.0003 +[2026-02-28 17:52:50] (step=0014755) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.8869105850127177, LR: 0.0003 +[2026-02-28 17:52:58] (step=0014756) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.8871062414400313, LR: 0.0003 +[2026-02-28 17:53:06] (step=0014757) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 2.887301897867345, LR: 0.0003 +[2026-02-28 17:53:14] (step=0014758) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 2.8874975542946584, LR: 0.0003 +[2026-02-28 17:53:22] (step=0014759) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 2.8876932107219724, LR: 0.0003 +[2026-02-28 17:53:29] (step=0014760) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 2.887888867149286, LR: 0.0003 +[2026-02-28 17:53:37] (step=0014761) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 2.8880845235765995, LR: 0.0003 +[2026-02-28 17:53:45] (step=0014762) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.888280180003913, LR: 0.0003 +[2026-02-28 17:53:53] (step=0014763) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 2.8884758364312266, LR: 0.0003 +[2026-02-28 17:54:01] (step=0014764) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.88867149285854, LR: 0.0003 +[2026-02-28 17:54:09] (step=0014765) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.888867149285854, LR: 0.0003 +[2026-02-28 17:54:17] (step=0014766) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.8890628057131678, LR: 0.0003 +[2026-02-28 17:54:24] (step=0014767) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 2.8892584621404813, LR: 0.0003 +[2026-02-28 17:54:32] (step=0014768) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.889454118567795, LR: 0.0003 +[2026-02-28 17:54:40] (step=0014769) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.8896497749951084, LR: 0.0003 +[2026-02-28 17:54:48] (step=0014770) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.889845431422422, LR: 0.0003 +[2026-02-28 17:54:56] (step=0014771) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.890041087849736, LR: 0.0003 +[2026-02-28 17:55:04] (step=0014772) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 2.8902367442770496, LR: 0.0003 +[2026-02-28 17:55:11] (step=0014773) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 2.890432400704363, LR: 0.0003 +[2026-02-28 17:55:19] (step=0014774) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 2.8906280571316767, LR: 0.0003 +[2026-02-28 17:55:27] (step=0014775) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.8908237135589903, LR: 0.0003 +[2026-02-28 17:55:35] (step=0014776) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 2.8910193699863043, LR: 0.0003 +[2026-02-28 17:55:43] (step=0014777) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.891215026413618, LR: 0.0003 +[2026-02-28 17:55:51] (step=0014778) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.8914106828409314, LR: 0.0003 +[2026-02-28 17:55:59] (step=0014779) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 2.891606339268245, LR: 0.0003 +[2026-02-28 17:56:06] (step=0014780) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 2.8918019956955585, LR: 0.0003 +[2026-02-28 17:56:14] (step=0014781) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.891997652122872, LR: 0.0003 +[2026-02-28 17:56:22] (step=0014782) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 2.892193308550186, LR: 0.0003 +[2026-02-28 17:56:30] (step=0014783) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 2.8923889649774996, LR: 0.0003 +[2026-02-28 17:56:38] (step=0014784) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 2.892584621404813, LR: 0.0003 +[2026-02-28 17:56:46] (step=0014785) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 2.8927802778321268, LR: 0.0003 +[2026-02-28 17:56:54] (step=0014786) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 2.8929759342594403, LR: 0.0003 +[2026-02-28 17:57:01] (step=0014787) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 2.893171590686754, LR: 0.0003 +[2026-02-28 17:57:09] (step=0014788) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 2.893367247114068, LR: 0.0003 +[2026-02-28 17:57:17] (step=0014789) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 2.8935629035413815, LR: 0.0003 +[2026-02-28 17:57:25] (step=0014790) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 2.893758559968695, LR: 0.0003 +[2026-02-28 17:57:33] (step=0014791) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 2.8939542163960086, LR: 0.0003 +[2026-02-28 17:57:41] (step=0014792) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 2.894149872823322, LR: 0.0003 +[2026-02-28 17:57:49] (step=0014793) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.8943455292506357, LR: 0.0003 +[2026-02-28 17:57:56] (step=0014794) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.8945411856779497, LR: 0.0003 +[2026-02-28 17:58:04] (step=0014795) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 2.8947368421052633, LR: 0.0003 +[2026-02-28 17:58:12] (step=0014796) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.894932498532577, LR: 0.0003 +[2026-02-28 17:58:20] (step=0014797) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.8951281549598904, LR: 0.0003 +[2026-02-28 17:58:28] (step=0014798) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 2.895323811387204, LR: 0.0003 +[2026-02-28 17:58:36] (step=0014799) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 2.8955194678145175, LR: 0.0003 +[2026-02-28 17:58:44] (step=0014800) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 2.8957151242418315, LR: 0.0003 +[2026-02-28 17:58:52] (step=0014801) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.895910780669145, LR: 0.0003 +[2026-02-28 17:58:59] (step=0014802) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 2.8961064370964587, LR: 0.0003 +[2026-02-28 17:59:07] (step=0014803) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.896302093523772, LR: 0.0003 +[2026-02-28 17:59:15] (step=0014804) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 2.8964977499510858, LR: 0.0003 +[2026-02-28 17:59:23] (step=0014805) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.8966934063783993, LR: 0.0003 +[2026-02-28 17:59:31] (step=0014806) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.8968890628057133, LR: 0.0003 +[2026-02-28 17:59:39] (step=0014807) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 2.897084719233027, LR: 0.0003 +[2026-02-28 17:59:46] (step=0014808) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.8972803756603405, LR: 0.0003 +[2026-02-28 17:59:54] (step=0014809) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.897476032087654, LR: 0.0003 +[2026-02-28 18:00:02] (step=0014810) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 2.8976716885149676, LR: 0.0003 +[2026-02-28 18:00:10] (step=0014811) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 2.897867344942281, LR: 0.0003 +[2026-02-28 18:00:18] (step=0014812) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.898063001369595, LR: 0.0003 +[2026-02-28 18:00:26] (step=0014813) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 2.8982586577969087, LR: 0.0003 +[2026-02-28 18:00:34] (step=0014814) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.8984543142242223, LR: 0.0003 +[2026-02-28 18:00:41] (step=0014815) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.898649970651536, LR: 0.0003 +[2026-02-28 18:00:49] (step=0014816) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.8988456270788494, LR: 0.0003 +[2026-02-28 18:00:57] (step=0014817) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 2.899041283506163, LR: 0.0003 +[2026-02-28 18:01:05] (step=0014818) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.899236939933477, LR: 0.0003 +[2026-02-28 18:01:13] (step=0014819) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.8994325963607905, LR: 0.0003 +[2026-02-28 18:01:21] (step=0014820) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 2.899628252788104, LR: 0.0003 +[2026-02-28 18:01:28] (step=0014821) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.8998239092154177, LR: 0.0003 +[2026-02-28 18:01:36] (step=0014822) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.9000195656427312, LR: 0.0003 +[2026-02-28 18:01:44] (step=0014823) Train Loss: 0.4703, Train Steps/Sec: 0.13, Epoch: 2.900215222070045, LR: 0.0003 +[2026-02-28 18:01:52] (step=0014824) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.900410878497359, LR: 0.0003 +[2026-02-28 18:02:00] (step=0014825) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 2.9006065349246724, LR: 0.0003 +[2026-02-28 18:02:08] (step=0014826) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 2.900802191351986, LR: 0.0003 +[2026-02-28 18:02:16] (step=0014827) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 2.9009978477792995, LR: 0.0003 +[2026-02-28 18:02:24] (step=0014828) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 2.901193504206613, LR: 0.0003 +[2026-02-28 18:02:31] (step=0014829) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 2.901389160633927, LR: 0.0003 +[2026-02-28 18:02:39] (step=0014830) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 2.9015848170612406, LR: 0.0003 +[2026-02-28 18:02:47] (step=0014831) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 2.901780473488554, LR: 0.0003 +[2026-02-28 18:02:55] (step=0014832) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.9019761299158677, LR: 0.0003 +[2026-02-28 18:03:03] (step=0014833) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 2.9021717863431813, LR: 0.0003 +[2026-02-28 18:03:11] (step=0014834) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.902367442770495, LR: 0.0003 +[2026-02-28 18:03:18] (step=0014835) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 2.902563099197809, LR: 0.0003 +[2026-02-28 18:03:26] (step=0014836) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 2.9027587556251224, LR: 0.0003 +[2026-02-28 18:03:34] (step=0014837) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.902954412052436, LR: 0.0003 +[2026-02-28 18:03:42] (step=0014838) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 2.9031500684797495, LR: 0.0003 +[2026-02-28 18:03:50] (step=0014839) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.903345724907063, LR: 0.0003 +[2026-02-28 18:03:58] (step=0014840) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.9035413813343767, LR: 0.0003 +[2026-02-28 18:04:05] (step=0014841) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.9037370377616907, LR: 0.0003 +[2026-02-28 18:04:13] (step=0014842) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.9039326941890042, LR: 0.0003 +[2026-02-28 18:04:21] (step=0014843) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 2.904128350616318, LR: 0.0003 +[2026-02-28 18:04:29] (step=0014844) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.9043240070436314, LR: 0.0003 +[2026-02-28 18:04:37] (step=0014845) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.904519663470945, LR: 0.0003 +[2026-02-28 18:04:45] (step=0014846) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 2.9047153198982585, LR: 0.0003 +[2026-02-28 18:04:53] (step=0014847) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 2.9049109763255725, LR: 0.0003 +[2026-02-28 18:05:00] (step=0014848) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.905106632752886, LR: 0.0003 +[2026-02-28 18:05:08] (step=0014849) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 2.9053022891801996, LR: 0.0003 +[2026-02-28 18:05:16] (step=0014850) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 2.905497945607513, LR: 0.0003 +[2026-02-28 18:05:24] (step=0014851) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.9056936020348267, LR: 0.0003 +[2026-02-28 18:05:32] (step=0014852) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 2.9058892584621403, LR: 0.0003 +[2026-02-28 18:05:40] (step=0014853) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.9060849148894543, LR: 0.0003 +[2026-02-28 18:05:48] (step=0014854) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.906280571316768, LR: 0.0003 +[2026-02-28 18:05:55] (step=0014855) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.9064762277440814, LR: 0.0003 +[2026-02-28 18:06:03] (step=0014856) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.906671884171395, LR: 0.0003 +[2026-02-28 18:06:11] (step=0014857) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 2.9068675405987086, LR: 0.0003 +[2026-02-28 18:06:19] (step=0014858) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.907063197026022, LR: 0.0003 +[2026-02-28 18:06:27] (step=0014859) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 2.907258853453336, LR: 0.0003 +[2026-02-28 18:06:35] (step=0014860) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.9074545098806497, LR: 0.0003 +[2026-02-28 18:06:42] (step=0014861) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.9076501663079632, LR: 0.0003 +[2026-02-28 18:06:50] (step=0014862) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.907845822735277, LR: 0.0003 +[2026-02-28 18:06:58] (step=0014863) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 2.9080414791625904, LR: 0.0003 +[2026-02-28 18:07:06] (step=0014864) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.908237135589904, LR: 0.0003 +[2026-02-28 18:07:14] (step=0014865) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 2.908432792017218, LR: 0.0003 +[2026-02-28 18:07:22] (step=0014866) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 2.9086284484445315, LR: 0.0003 +[2026-02-28 18:07:29] (step=0014867) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 2.908824104871845, LR: 0.0003 +[2026-02-28 18:07:37] (step=0014868) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 2.9090197612991586, LR: 0.0003 +[2026-02-28 18:07:45] (step=0014869) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 2.909215417726472, LR: 0.0003 +[2026-02-28 18:07:53] (step=0014870) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 2.9094110741537857, LR: 0.0003 +[2026-02-28 18:08:01] (step=0014871) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.9096067305810998, LR: 0.0003 +[2026-02-28 18:08:09] (step=0014872) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 2.9098023870084133, LR: 0.0003 +[2026-02-28 18:08:16] (step=0014873) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.909998043435727, LR: 0.0003 +[2026-02-28 18:08:24] (step=0014874) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.9101936998630404, LR: 0.0003 +[2026-02-28 18:08:32] (step=0014875) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 2.910389356290354, LR: 0.0003 +[2026-02-28 18:08:40] (step=0014876) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.9105850127176676, LR: 0.0003 +[2026-02-28 18:08:48] (step=0014877) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 2.9107806691449816, LR: 0.0003 +[2026-02-28 18:08:56] (step=0014878) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 2.910976325572295, LR: 0.0003 +[2026-02-28 18:09:04] (step=0014879) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 2.9111719819996087, LR: 0.0003 +[2026-02-28 18:09:11] (step=0014880) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 2.9113676384269223, LR: 0.0003 +[2026-02-28 18:09:19] (step=0014881) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.911563294854236, LR: 0.0003 +[2026-02-28 18:09:27] (step=0014882) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 2.9117589512815494, LR: 0.0003 +[2026-02-28 18:09:35] (step=0014883) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 2.9119546077088634, LR: 0.0003 +[2026-02-28 18:09:43] (step=0014884) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.912150264136177, LR: 0.0003 +[2026-02-28 18:09:51] (step=0014885) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 2.9123459205634905, LR: 0.0003 +[2026-02-28 18:09:58] (step=0014886) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.912541576990804, LR: 0.0003 +[2026-02-28 18:10:06] (step=0014887) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 2.9127372334181176, LR: 0.0003 +[2026-02-28 18:10:14] (step=0014888) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.9129328898454316, LR: 0.0003 +[2026-02-28 18:10:22] (step=0014889) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.913128546272745, LR: 0.0003 +[2026-02-28 18:10:30] (step=0014890) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 2.9133242027000588, LR: 0.0003 +[2026-02-28 18:10:38] (step=0014891) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 2.9135198591273723, LR: 0.0003 +[2026-02-28 18:10:45] (step=0014892) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 2.913715515554686, LR: 0.0003 +[2026-02-28 18:10:53] (step=0014893) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.9139111719819994, LR: 0.0003 +[2026-02-28 18:11:01] (step=0014894) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.9141068284093135, LR: 0.0003 +[2026-02-28 18:11:09] (step=0014895) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 2.914302484836627, LR: 0.0003 +[2026-02-28 18:11:17] (step=0014896) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.9144981412639406, LR: 0.0003 +[2026-02-28 18:11:25] (step=0014897) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 2.914693797691254, LR: 0.0003 +[2026-02-28 18:11:33] (step=0014898) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 2.9148894541185677, LR: 0.0003 +[2026-02-28 18:11:40] (step=0014899) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 2.9150851105458813, LR: 0.0003 +[2026-02-28 18:11:48] (step=0014900) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.9152807669731953, LR: 0.0003 +[2026-02-28 18:11:56] (step=0014901) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.915476423400509, LR: 0.0003 +[2026-02-28 18:12:04] (step=0014902) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.9156720798278224, LR: 0.0003 +[2026-02-28 18:12:12] (step=0014903) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.915867736255136, LR: 0.0003 +[2026-02-28 18:12:20] (step=0014904) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 2.9160633926824495, LR: 0.0003 +[2026-02-28 18:12:27] (step=0014905) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.916259049109763, LR: 0.0003 +[2026-02-28 18:12:35] (step=0014906) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 2.916454705537077, LR: 0.0003 +[2026-02-28 18:12:43] (step=0014907) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 2.9166503619643906, LR: 0.0003 +[2026-02-28 18:12:51] (step=0014908) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.916846018391704, LR: 0.0003 +[2026-02-28 18:12:59] (step=0014909) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 2.9170416748190178, LR: 0.0003 +[2026-02-28 18:13:07] (step=0014910) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.9172373312463313, LR: 0.0003 +[2026-02-28 18:13:14] (step=0014911) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.917432987673645, LR: 0.0003 +[2026-02-28 18:13:22] (step=0014912) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.917628644100959, LR: 0.0003 +[2026-02-28 18:13:30] (step=0014913) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 2.9178243005282725, LR: 0.0003 +[2026-02-28 18:13:38] (step=0014914) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 2.918019956955586, LR: 0.0003 +[2026-02-28 18:13:46] (step=0014915) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 2.9182156133828996, LR: 0.0003 +[2026-02-28 18:13:54] (step=0014916) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 2.918411269810213, LR: 0.0003 +[2026-02-28 18:14:01] (step=0014917) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 2.9186069262375267, LR: 0.0003 +[2026-02-28 18:14:09] (step=0014918) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 2.9188025826648407, LR: 0.0003 +[2026-02-28 18:14:17] (step=0014919) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.9189982390921543, LR: 0.0003 +[2026-02-28 18:14:25] (step=0014920) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 2.919193895519468, LR: 0.0003 +[2026-02-28 18:14:33] (step=0014921) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.9193895519467814, LR: 0.0003 +[2026-02-28 18:14:41] (step=0014922) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 2.919585208374095, LR: 0.0003 +[2026-02-28 18:14:48] (step=0014923) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.9197808648014085, LR: 0.0003 +[2026-02-28 18:14:56] (step=0014924) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.9199765212287225, LR: 0.0003 +[2026-02-28 18:15:04] (step=0014925) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.920172177656036, LR: 0.0003 +[2026-02-28 18:15:12] (step=0014926) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.9203678340833497, LR: 0.0003 +[2026-02-28 18:15:20] (step=0014927) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 2.920563490510663, LR: 0.0003 +[2026-02-28 18:15:28] (step=0014928) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 2.920759146937977, LR: 0.0003 +[2026-02-28 18:15:36] (step=0014929) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 2.9209548033652903, LR: 0.0003 +[2026-02-28 18:15:43] (step=0014930) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.9211504597926043, LR: 0.0003 +[2026-02-28 18:15:51] (step=0014931) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.921346116219918, LR: 0.0003 +[2026-02-28 18:15:59] (step=0014932) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.9215417726472315, LR: 0.0003 +[2026-02-28 18:16:07] (step=0014933) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.921737429074545, LR: 0.0003 +[2026-02-28 18:16:15] (step=0014934) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.9219330855018586, LR: 0.0003 +[2026-02-28 18:16:23] (step=0014935) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 2.922128741929172, LR: 0.0003 +[2026-02-28 18:16:30] (step=0014936) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 2.922324398356486, LR: 0.0003 +[2026-02-28 18:16:38] (step=0014937) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 2.9225200547837997, LR: 0.0003 +[2026-02-28 18:16:46] (step=0014938) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.9227157112111133, LR: 0.0003 +[2026-02-28 18:16:54] (step=0014939) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 2.922911367638427, LR: 0.0003 +[2026-02-28 18:17:02] (step=0014940) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 2.9231070240657404, LR: 0.0003 +[2026-02-28 18:17:10] (step=0014941) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.9233026804930544, LR: 0.0003 +[2026-02-28 18:17:17] (step=0014942) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.923498336920368, LR: 0.0003 +[2026-02-28 18:17:25] (step=0014943) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 2.9236939933476815, LR: 0.0003 +[2026-02-28 18:17:33] (step=0014944) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.923889649774995, LR: 0.0003 +[2026-02-28 18:17:41] (step=0014945) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 2.9240853062023087, LR: 0.0003 +[2026-02-28 18:17:49] (step=0014946) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.9242809626296222, LR: 0.0003 +[2026-02-28 18:17:57] (step=0014947) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 2.9244766190569362, LR: 0.0003 +[2026-02-28 18:18:05] (step=0014948) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.92467227548425, LR: 0.0003 +[2026-02-28 18:18:12] (step=0014949) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 2.9248679319115634, LR: 0.0003 +[2026-02-28 18:18:20] (step=0014950) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 2.925063588338877, LR: 0.0003 +[2026-02-28 18:18:28] (step=0014951) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.9252592447661905, LR: 0.0003 +[2026-02-28 18:18:36] (step=0014952) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.925454901193504, LR: 0.0003 +[2026-02-28 18:18:44] (step=0014953) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.925650557620818, LR: 0.0003 +[2026-02-28 18:18:52] (step=0014954) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.9258462140481316, LR: 0.0003 +[2026-02-28 18:19:00] (step=0014955) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 2.926041870475445, LR: 0.0003 +[2026-02-28 18:19:07] (step=0014956) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.9262375269027587, LR: 0.0003 +[2026-02-28 18:19:15] (step=0014957) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.9264331833300723, LR: 0.0003 +[2026-02-28 18:19:23] (step=0014958) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.926628839757386, LR: 0.0003 +[2026-02-28 18:19:31] (step=0014959) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 2.9268244961847, LR: 0.0003 +[2026-02-28 18:19:39] (step=0014960) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.9270201526120134, LR: 0.0003 +[2026-02-28 18:19:47] (step=0014961) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.927215809039327, LR: 0.0003 +[2026-02-28 18:19:54] (step=0014962) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.9274114654666405, LR: 0.0003 +[2026-02-28 18:20:02] (step=0014963) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 2.927607121893954, LR: 0.0003 +[2026-02-28 18:20:10] (step=0014964) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 2.9278027783212677, LR: 0.0003 +[2026-02-28 18:20:18] (step=0014965) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.9279984347485817, LR: 0.0003 +[2026-02-28 18:20:26] (step=0014966) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 2.9281940911758952, LR: 0.0003 +[2026-02-28 18:20:34] (step=0014967) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 2.928389747603209, LR: 0.0003 +[2026-02-28 18:20:41] (step=0014968) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 2.9285854040305224, LR: 0.0003 +[2026-02-28 18:20:49] (step=0014969) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 2.928781060457836, LR: 0.0003 +[2026-02-28 18:20:57] (step=0014970) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 2.9289767168851495, LR: 0.0003 +[2026-02-28 18:21:05] (step=0014971) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 2.9291723733124635, LR: 0.0003 +[2026-02-28 18:21:13] (step=0014972) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.929368029739777, LR: 0.0003 +[2026-02-28 18:21:21] (step=0014973) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.9295636861670906, LR: 0.0003 +[2026-02-28 18:21:29] (step=0014974) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.929759342594404, LR: 0.0003 +[2026-02-28 18:21:36] (step=0014975) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 2.9299549990217177, LR: 0.0003 +[2026-02-28 18:21:44] (step=0014976) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.9301506554490313, LR: 0.0003 +[2026-02-28 18:21:52] (step=0014977) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 2.9303463118763453, LR: 0.0003 +[2026-02-28 18:22:00] (step=0014978) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.930541968303659, LR: 0.0003 +[2026-02-28 18:22:08] (step=0014979) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 2.9307376247309724, LR: 0.0003 +[2026-02-28 18:22:16] (step=0014980) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 2.930933281158286, LR: 0.0003 +[2026-02-28 18:22:23] (step=0014981) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 2.9311289375855996, LR: 0.0003 +[2026-02-28 18:22:31] (step=0014982) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.931324594012913, LR: 0.0003 +[2026-02-28 18:22:39] (step=0014983) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 2.931520250440227, LR: 0.0003 +[2026-02-28 18:22:47] (step=0014984) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.9317159068675407, LR: 0.0003 +[2026-02-28 18:22:55] (step=0014985) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.9319115632948543, LR: 0.0003 +[2026-02-28 18:23:03] (step=0014986) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.932107219722168, LR: 0.0003 +[2026-02-28 18:23:10] (step=0014987) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 2.9323028761494814, LR: 0.0003 +[2026-02-28 18:23:18] (step=0014988) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 2.932498532576795, LR: 0.0003 +[2026-02-28 18:23:26] (step=0014989) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.932694189004109, LR: 0.0003 +[2026-02-28 18:23:34] (step=0014990) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.9328898454314225, LR: 0.0003 +[2026-02-28 18:23:42] (step=0014991) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 2.933085501858736, LR: 0.0003 +[2026-02-28 18:23:50] (step=0014992) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.9332811582860496, LR: 0.0003 +[2026-02-28 18:23:58] (step=0014993) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 2.933476814713363, LR: 0.0003 +[2026-02-28 18:24:05] (step=0014994) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 2.9336724711406768, LR: 0.0003 +[2026-02-28 18:24:13] (step=0014995) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 2.9338681275679908, LR: 0.0003 +[2026-02-28 18:24:21] (step=0014996) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 2.9340637839953043, LR: 0.0003 +[2026-02-28 18:24:29] (step=0014997) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 2.934259440422618, LR: 0.0003 +[2026-02-28 18:24:37] (step=0014998) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.9344550968499314, LR: 0.0003 +[2026-02-28 18:24:45] (step=0014999) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 2.934650753277245, LR: 0.0003 +[2026-02-28 18:24:52] (step=0015000) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 2.934846409704559, LR: 0.0003 +[2026-02-28 18:24:52] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0015000/ +[2026-02-28 18:25:00] (step=0015001) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.9350420661318726, LR: 0.0003 +[2026-02-28 18:25:08] (step=0015002) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 2.935237722559186, LR: 0.0003 +[2026-02-28 18:25:16] (step=0015003) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 2.9354333789864997, LR: 0.0003 +[2026-02-28 18:25:24] (step=0015004) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 2.9356290354138133, LR: 0.0003 +[2026-02-28 18:25:32] (step=0015005) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.935824691841127, LR: 0.0003 +[2026-02-28 18:25:40] (step=0015006) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 2.936020348268441, LR: 0.0003 +[2026-02-28 18:25:47] (step=0015007) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.9362160046957544, LR: 0.0003 +[2026-02-28 18:25:55] (step=0015008) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.936411661123068, LR: 0.0003 +[2026-02-28 18:26:03] (step=0015009) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.9366073175503815, LR: 0.0003 +[2026-02-28 18:26:11] (step=0015010) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.936802973977695, LR: 0.0003 +[2026-02-28 18:26:19] (step=0015011) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 2.9369986304050086, LR: 0.0003 +[2026-02-28 18:26:26] (step=0015012) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.9371942868323226, LR: 0.0003 +[2026-02-28 18:26:34] (step=0015013) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 2.937389943259636, LR: 0.0003 +[2026-02-28 18:26:42] (step=0015014) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.9375855996869498, LR: 0.0003 +[2026-02-28 18:26:50] (step=0015015) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.9377812561142633, LR: 0.0003 +[2026-02-28 18:26:58] (step=0015016) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 2.937976912541577, LR: 0.0003 +[2026-02-28 18:27:06] (step=0015017) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 2.9381725689688905, LR: 0.0003 +[2026-02-28 18:27:14] (step=0015018) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 2.9383682253962045, LR: 0.0003 +[2026-02-28 18:27:21] (step=0015019) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.938563881823518, LR: 0.0003 +[2026-02-28 18:27:29] (step=0015020) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.9387595382508316, LR: 0.0003 +[2026-02-28 18:27:37] (step=0015021) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.938955194678145, LR: 0.0003 +[2026-02-28 18:27:45] (step=0015022) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 2.9391508511054587, LR: 0.0003 +[2026-02-28 18:27:53] (step=0015023) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 2.9393465075327723, LR: 0.0003 +[2026-02-28 18:28:01] (step=0015024) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 2.9395421639600863, LR: 0.0003 +[2026-02-28 18:28:09] (step=0015025) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 2.9397378203874, LR: 0.0003 +[2026-02-28 18:28:16] (step=0015026) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.9399334768147134, LR: 0.0003 +[2026-02-28 18:28:24] (step=0015027) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 2.940129133242027, LR: 0.0003 +[2026-02-28 18:28:32] (step=0015028) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.9403247896693405, LR: 0.0003 +[2026-02-28 18:28:40] (step=0015029) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 2.940520446096654, LR: 0.0003 +[2026-02-28 18:28:48] (step=0015030) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.940716102523968, LR: 0.0003 +[2026-02-28 18:28:56] (step=0015031) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 2.9409117589512817, LR: 0.0003 +[2026-02-28 18:29:03] (step=0015032) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 2.941107415378595, LR: 0.0003 +[2026-02-28 18:29:11] (step=0015033) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.9413030718059088, LR: 0.0003 +[2026-02-28 18:29:19] (step=0015034) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.9414987282332223, LR: 0.0003 +[2026-02-28 18:29:27] (step=0015035) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 2.941694384660536, LR: 0.0003 +[2026-02-28 18:29:35] (step=0015036) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 2.94189004108785, LR: 0.0003 +[2026-02-28 18:29:43] (step=0015037) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 2.9420856975151635, LR: 0.0003 +[2026-02-28 18:29:50] (step=0015038) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 2.942281353942477, LR: 0.0003 +[2026-02-28 18:29:58] (step=0015039) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 2.9424770103697906, LR: 0.0003 +[2026-02-28 18:30:06] (step=0015040) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 2.942672666797104, LR: 0.0003 +[2026-02-28 18:30:14] (step=0015041) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.9428683232244177, LR: 0.0003 +[2026-02-28 18:30:22] (step=0015042) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 2.9430639796517317, LR: 0.0003 +[2026-02-28 18:30:30] (step=0015043) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 2.9432596360790453, LR: 0.0003 +[2026-02-28 18:30:38] (step=0015044) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 2.943455292506359, LR: 0.0003 +[2026-02-28 18:30:45] (step=0015045) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 2.9436509489336724, LR: 0.0003 +[2026-02-28 18:30:53] (step=0015046) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.943846605360986, LR: 0.0003 +[2026-02-28 18:31:01] (step=0015047) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 2.9440422617882995, LR: 0.0003 +[2026-02-28 18:31:09] (step=0015048) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.9442379182156135, LR: 0.0003 +[2026-02-28 18:31:17] (step=0015049) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 2.944433574642927, LR: 0.0003 +[2026-02-28 18:31:25] (step=0015050) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.9446292310702407, LR: 0.0003 +[2026-02-28 18:31:32] (step=0015051) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 2.9448248874975542, LR: 0.0003 +[2026-02-28 18:31:40] (step=0015052) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 2.945020543924868, LR: 0.0003 +[2026-02-28 18:31:48] (step=0015053) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 2.945216200352182, LR: 0.0003 +[2026-02-28 18:31:56] (step=0015054) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 2.9454118567794954, LR: 0.0003 +[2026-02-28 18:32:04] (step=0015055) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 2.945607513206809, LR: 0.0003 +[2026-02-28 18:32:12] (step=0015056) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 2.9458031696341225, LR: 0.0003 +[2026-02-28 18:32:19] (step=0015057) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 2.945998826061436, LR: 0.0003 +[2026-02-28 18:32:27] (step=0015058) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 2.9461944824887496, LR: 0.0003 +[2026-02-28 18:32:35] (step=0015059) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.9463901389160636, LR: 0.0003 +[2026-02-28 18:32:43] (step=0015060) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 2.946585795343377, LR: 0.0003 +[2026-02-28 18:32:51] (step=0015061) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 2.9467814517706907, LR: 0.0003 +[2026-02-28 18:32:59] (step=0015062) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 2.9469771081980043, LR: 0.0003 +[2026-02-28 18:33:06] (step=0015063) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.947172764625318, LR: 0.0003 +[2026-02-28 18:33:14] (step=0015064) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 2.9473684210526314, LR: 0.0003 +[2026-02-28 18:33:22] (step=0015065) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 2.9475640774799454, LR: 0.0003 +[2026-02-28 18:33:30] (step=0015066) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 2.947759733907259, LR: 0.0003 +[2026-02-28 18:33:38] (step=0015067) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.9479553903345725, LR: 0.0003 +[2026-02-28 18:33:46] (step=0015068) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.948151046761886, LR: 0.0003 +[2026-02-28 18:33:53] (step=0015069) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 2.9483467031891997, LR: 0.0003 +[2026-02-28 18:34:01] (step=0015070) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.9485423596165132, LR: 0.0003 +[2026-02-28 18:34:09] (step=0015071) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 2.9487380160438272, LR: 0.0003 +[2026-02-28 18:34:17] (step=0015072) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.948933672471141, LR: 0.0003 +[2026-02-28 18:34:25] (step=0015073) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 2.9491293288984544, LR: 0.0003 +[2026-02-28 18:34:33] (step=0015074) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.949324985325768, LR: 0.0003 +[2026-02-28 18:34:41] (step=0015075) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.9495206417530815, LR: 0.0003 +[2026-02-28 18:34:48] (step=0015076) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 2.949716298180395, LR: 0.0003 +[2026-02-28 18:34:56] (step=0015077) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.949911954607709, LR: 0.0003 +[2026-02-28 18:35:04] (step=0015078) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 2.9501076110350226, LR: 0.0003 +[2026-02-28 18:35:12] (step=0015079) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.950303267462336, LR: 0.0003 +[2026-02-28 18:35:20] (step=0015080) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 2.9504989238896497, LR: 0.0003 +[2026-02-28 18:35:28] (step=0015081) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.9506945803169633, LR: 0.0003 +[2026-02-28 18:35:35] (step=0015082) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 2.950890236744277, LR: 0.0003 +[2026-02-28 18:35:43] (step=0015083) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.951085893171591, LR: 0.0003 +[2026-02-28 18:35:51] (step=0015084) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.9512815495989044, LR: 0.0003 +[2026-02-28 18:35:59] (step=0015085) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.951477206026218, LR: 0.0003 +[2026-02-28 18:36:07] (step=0015086) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 2.9516728624535316, LR: 0.0003 +[2026-02-28 18:36:15] (step=0015087) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 2.951868518880845, LR: 0.0003 +[2026-02-28 18:36:22] (step=0015088) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 2.9520641753081587, LR: 0.0003 +[2026-02-28 18:36:30] (step=0015089) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 2.9522598317354727, LR: 0.0003 +[2026-02-28 18:36:38] (step=0015090) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.9524554881627862, LR: 0.0003 +[2026-02-28 18:36:46] (step=0015091) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 2.9526511445901, LR: 0.0003 +[2026-02-28 18:36:54] (step=0015092) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.9528468010174134, LR: 0.0003 +[2026-02-28 18:37:02] (step=0015093) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 2.953042457444727, LR: 0.0003 +[2026-02-28 18:37:10] (step=0015094) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.9532381138720405, LR: 0.0003 +[2026-02-28 18:37:18] (step=0015095) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.9534337702993545, LR: 0.0003 +[2026-02-28 18:37:25] (step=0015096) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.953629426726668, LR: 0.0003 +[2026-02-28 18:37:33] (step=0015097) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 2.9538250831539816, LR: 0.0003 +[2026-02-28 18:37:41] (step=0015098) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 2.954020739581295, LR: 0.0003 +[2026-02-28 18:37:49] (step=0015099) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.9542163960086087, LR: 0.0003 +[2026-02-28 18:37:57] (step=0015100) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 2.9544120524359223, LR: 0.0003 +[2026-02-28 18:38:04] (step=0015101) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 2.9546077088632363, LR: 0.0003 +[2026-02-28 18:38:12] (step=0015102) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.95480336529055, LR: 0.0003 +[2026-02-28 18:38:20] (step=0015103) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 2.9549990217178634, LR: 0.0003 +[2026-02-28 18:38:28] (step=0015104) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 2.955194678145177, LR: 0.0003 +[2026-02-28 18:38:36] (step=0015105) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 2.9553903345724906, LR: 0.0003 +[2026-02-28 18:38:44] (step=0015106) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.955585990999804, LR: 0.0003 +[2026-02-28 18:38:52] (step=0015107) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 2.955781647427118, LR: 0.0003 +[2026-02-28 18:38:59] (step=0015108) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.9559773038544317, LR: 0.0003 +[2026-02-28 18:39:07] (step=0015109) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.9561729602817453, LR: 0.0003 +[2026-02-28 18:39:15] (step=0015110) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 2.956368616709059, LR: 0.0003 +[2026-02-28 18:39:23] (step=0015111) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.9565642731363724, LR: 0.0003 +[2026-02-28 18:39:31] (step=0015112) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.9567599295636864, LR: 0.0003 +[2026-02-28 18:39:39] (step=0015113) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.956955585991, LR: 0.0003 +[2026-02-28 18:39:46] (step=0015114) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.9571512424183135, LR: 0.0003 +[2026-02-28 18:39:54] (step=0015115) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 2.957346898845627, LR: 0.0003 +[2026-02-28 18:40:02] (step=0015116) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 2.9575425552729406, LR: 0.0003 +[2026-02-28 18:40:10] (step=0015117) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.957738211700254, LR: 0.0003 +[2026-02-28 18:40:18] (step=0015118) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 2.957933868127568, LR: 0.0003 +[2026-02-28 18:40:25] (step=0015119) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 2.9581295245548818, LR: 0.0003 +[2026-02-28 18:40:33] (step=0015120) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 2.9583251809821953, LR: 0.0003 +[2026-02-28 18:40:41] (step=0015121) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.958520837409509, LR: 0.0003 +[2026-02-28 18:40:49] (step=0015122) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 2.9587164938368224, LR: 0.0003 +[2026-02-28 18:40:57] (step=0015123) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 2.958912150264136, LR: 0.0003 +[2026-02-28 18:41:05] (step=0015124) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 2.95910780669145, LR: 0.0003 +[2026-02-28 18:41:13] (step=0015125) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 2.9593034631187636, LR: 0.0003 +[2026-02-28 18:41:21] (step=0015126) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 2.959499119546077, LR: 0.0003 +[2026-02-28 18:41:28] (step=0015127) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 2.9596947759733907, LR: 0.0003 +[2026-02-28 18:41:36] (step=0015128) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 2.9598904324007043, LR: 0.0003 +[2026-02-28 18:41:44] (step=0015129) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 2.960086088828018, LR: 0.0003 +[2026-02-28 18:41:52] (step=0015130) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.960281745255332, LR: 0.0003 +[2026-02-28 18:42:00] (step=0015131) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 2.9604774016826454, LR: 0.0003 +[2026-02-28 18:42:08] (step=0015132) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 2.960673058109959, LR: 0.0003 +[2026-02-28 18:42:15] (step=0015133) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.9608687145372725, LR: 0.0003 +[2026-02-28 18:42:23] (step=0015134) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 2.961064370964586, LR: 0.0003 +[2026-02-28 18:42:31] (step=0015135) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.9612600273918996, LR: 0.0003 +[2026-02-28 18:42:39] (step=0015136) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.9614556838192136, LR: 0.0003 +[2026-02-28 18:42:47] (step=0015137) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.961651340246527, LR: 0.0003 +[2026-02-28 18:42:55] (step=0015138) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.9618469966738408, LR: 0.0003 +[2026-02-28 18:43:02] (step=0015139) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 2.9620426531011543, LR: 0.0003 +[2026-02-28 18:43:10] (step=0015140) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 2.962238309528468, LR: 0.0003 +[2026-02-28 18:43:18] (step=0015141) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.9624339659557815, LR: 0.0003 +[2026-02-28 18:43:26] (step=0015142) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.9626296223830955, LR: 0.0003 +[2026-02-28 18:43:34] (step=0015143) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 2.962825278810409, LR: 0.0003 +[2026-02-28 18:43:42] (step=0015144) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 2.9630209352377226, LR: 0.0003 +[2026-02-28 18:43:50] (step=0015145) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 2.963216591665036, LR: 0.0003 +[2026-02-28 18:43:57] (step=0015146) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 2.9634122480923497, LR: 0.0003 +[2026-02-28 18:44:05] (step=0015147) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 2.9636079045196633, LR: 0.0003 +[2026-02-28 18:44:13] (step=0015148) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.9638035609469773, LR: 0.0003 +[2026-02-28 18:44:21] (step=0015149) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.963999217374291, LR: 0.0003 +[2026-02-28 18:44:29] (step=0015150) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.9641948738016044, LR: 0.0003 +[2026-02-28 18:44:37] (step=0015151) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 2.964390530228918, LR: 0.0003 +[2026-02-28 18:44:44] (step=0015152) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 2.9645861866562315, LR: 0.0003 +[2026-02-28 18:44:52] (step=0015153) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.964781843083545, LR: 0.0003 +[2026-02-28 18:45:00] (step=0015154) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.964977499510859, LR: 0.0003 +[2026-02-28 18:45:08] (step=0015155) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 2.9651731559381727, LR: 0.0003 +[2026-02-28 18:45:16] (step=0015156) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 2.965368812365486, LR: 0.0003 +[2026-02-28 18:45:24] (step=0015157) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 2.9655644687928, LR: 0.0003 +[2026-02-28 18:45:31] (step=0015158) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.9657601252201133, LR: 0.0003 +[2026-02-28 18:45:39] (step=0015159) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.965955781647427, LR: 0.0003 +[2026-02-28 18:45:47] (step=0015160) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.966151438074741, LR: 0.0003 +[2026-02-28 18:45:55] (step=0015161) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 2.9663470945020545, LR: 0.0003 +[2026-02-28 18:46:03] (step=0015162) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.966542750929368, LR: 0.0003 +[2026-02-28 18:46:11] (step=0015163) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 2.9667384073566816, LR: 0.0003 +[2026-02-28 18:46:18] (step=0015164) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 2.966934063783995, LR: 0.0003 +[2026-02-28 18:46:26] (step=0015165) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 2.967129720211309, LR: 0.0003 +[2026-02-28 18:46:34] (step=0015166) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.9673253766386227, LR: 0.0003 +[2026-02-28 18:46:42] (step=0015167) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 2.9675210330659363, LR: 0.0003 +[2026-02-28 18:46:50] (step=0015168) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.96771668949325, LR: 0.0003 +[2026-02-28 18:46:58] (step=0015169) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 2.9679123459205634, LR: 0.0003 +[2026-02-28 18:47:05] (step=0015170) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.968108002347877, LR: 0.0003 +[2026-02-28 18:47:13] (step=0015171) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 2.968303658775191, LR: 0.0003 +[2026-02-28 18:47:21] (step=0015172) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.9684993152025045, LR: 0.0003 +[2026-02-28 18:47:29] (step=0015173) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 2.968694971629818, LR: 0.0003 +[2026-02-28 18:47:37] (step=0015174) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 2.9688906280571317, LR: 0.0003 +[2026-02-28 18:47:45] (step=0015175) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.9690862844844452, LR: 0.0003 +[2026-02-28 18:47:53] (step=0015176) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.969281940911759, LR: 0.0003 +[2026-02-28 18:48:00] (step=0015177) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.969477597339073, LR: 0.0003 +[2026-02-28 18:48:08] (step=0015178) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 2.9696732537663864, LR: 0.0003 +[2026-02-28 18:48:16] (step=0015179) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.9698689101937, LR: 0.0003 +[2026-02-28 18:48:24] (step=0015180) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 2.9700645666210135, LR: 0.0003 +[2026-02-28 18:48:32] (step=0015181) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.970260223048327, LR: 0.0003 +[2026-02-28 18:48:40] (step=0015182) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.9704558794756406, LR: 0.0003 +[2026-02-28 18:48:48] (step=0015183) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.9706515359029546, LR: 0.0003 +[2026-02-28 18:48:55] (step=0015184) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 2.970847192330268, LR: 0.0003 +[2026-02-28 18:49:03] (step=0015185) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 2.9710428487575817, LR: 0.0003 +[2026-02-28 18:49:11] (step=0015186) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 2.9712385051848953, LR: 0.0003 +[2026-02-28 18:49:19] (step=0015187) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 2.971434161612209, LR: 0.0003 +[2026-02-28 18:49:27] (step=0015188) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.9716298180395224, LR: 0.0003 +[2026-02-28 18:49:35] (step=0015189) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.9718254744668364, LR: 0.0003 +[2026-02-28 18:49:42] (step=0015190) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.97202113089415, LR: 0.0003 +[2026-02-28 18:49:50] (step=0015191) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 2.9722167873214635, LR: 0.0003 +[2026-02-28 18:49:58] (step=0015192) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 2.972412443748777, LR: 0.0003 +[2026-02-28 18:50:06] (step=0015193) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 2.9726081001760907, LR: 0.0003 +[2026-02-28 18:50:14] (step=0015194) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 2.9728037566034042, LR: 0.0003 +[2026-02-28 18:50:22] (step=0015195) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.9729994130307182, LR: 0.0003 +[2026-02-28 18:50:29] (step=0015196) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 2.973195069458032, LR: 0.0003 +[2026-02-28 18:50:37] (step=0015197) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.9733907258853454, LR: 0.0003 +[2026-02-28 18:50:45] (step=0015198) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 2.973586382312659, LR: 0.0003 +[2026-02-28 18:50:53] (step=0015199) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.9737820387399725, LR: 0.0003 +[2026-02-28 18:51:01] (step=0015200) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 2.973977695167286, LR: 0.0003 +[2026-02-28 18:51:09] (step=0015201) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.9741733515946, LR: 0.0003 +[2026-02-28 18:51:17] (step=0015202) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 2.9743690080219136, LR: 0.0003 +[2026-02-28 18:51:24] (step=0015203) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 2.974564664449227, LR: 0.0003 +[2026-02-28 18:51:32] (step=0015204) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.9747603208765407, LR: 0.0003 +[2026-02-28 18:51:40] (step=0015205) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 2.9749559773038543, LR: 0.0003 +[2026-02-28 18:51:48] (step=0015206) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 2.975151633731168, LR: 0.0003 +[2026-02-28 18:51:56] (step=0015207) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 2.975347290158482, LR: 0.0003 +[2026-02-28 18:52:04] (step=0015208) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 2.9755429465857954, LR: 0.0003 +[2026-02-28 18:52:11] (step=0015209) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 2.975738603013109, LR: 0.0003 +[2026-02-28 18:52:19] (step=0015210) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.9759342594404226, LR: 0.0003 +[2026-02-28 18:52:27] (step=0015211) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.976129915867736, LR: 0.0003 +[2026-02-28 18:52:35] (step=0015212) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 2.9763255722950497, LR: 0.0003 +[2026-02-28 18:52:43] (step=0015213) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 2.9765212287223637, LR: 0.0003 +[2026-02-28 18:52:51] (step=0015214) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 2.9767168851496772, LR: 0.0003 +[2026-02-28 18:52:58] (step=0015215) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.976912541576991, LR: 0.0003 +[2026-02-28 18:53:06] (step=0015216) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 2.9771081980043044, LR: 0.0003 +[2026-02-28 18:53:14] (step=0015217) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 2.977303854431618, LR: 0.0003 +[2026-02-28 18:53:22] (step=0015218) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 2.9774995108589315, LR: 0.0003 +[2026-02-28 18:53:30] (step=0015219) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 2.9776951672862455, LR: 0.0003 +[2026-02-28 18:53:37] (step=0015220) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 2.977890823713559, LR: 0.0003 +[2026-02-28 18:53:45] (step=0015221) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 2.9780864801408726, LR: 0.0003 +[2026-02-28 18:53:53] (step=0015222) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.978282136568186, LR: 0.0003 +[2026-02-28 18:54:01] (step=0015223) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 2.9784777929954998, LR: 0.0003 +[2026-02-28 18:54:09] (step=0015224) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 2.9786734494228138, LR: 0.0003 +[2026-02-28 18:54:17] (step=0015225) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.9788691058501273, LR: 0.0003 +[2026-02-28 18:54:25] (step=0015226) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 2.979064762277441, LR: 0.0003 +[2026-02-28 18:54:32] (step=0015227) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 2.9792604187047544, LR: 0.0003 +[2026-02-28 18:54:40] (step=0015228) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 2.979456075132068, LR: 0.0003 +[2026-02-28 18:54:48] (step=0015229) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 2.9796517315593816, LR: 0.0003 +[2026-02-28 18:54:56] (step=0015230) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 2.9798473879866956, LR: 0.0003 +[2026-02-28 18:55:04] (step=0015231) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.980043044414009, LR: 0.0003 +[2026-02-28 18:55:12] (step=0015232) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.9802387008413227, LR: 0.0003 +[2026-02-28 18:55:20] (step=0015233) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 2.9804343572686363, LR: 0.0003 +[2026-02-28 18:55:27] (step=0015234) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 2.98063001369595, LR: 0.0003 +[2026-02-28 18:55:35] (step=0015235) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 2.9808256701232634, LR: 0.0003 +[2026-02-28 18:55:43] (step=0015236) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.9810213265505774, LR: 0.0003 +[2026-02-28 18:55:51] (step=0015237) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 2.981216982977891, LR: 0.0003 +[2026-02-28 18:55:59] (step=0015238) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 2.9814126394052045, LR: 0.0003 +[2026-02-28 18:56:07] (step=0015239) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 2.981608295832518, LR: 0.0003 +[2026-02-28 18:56:14] (step=0015240) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 2.9818039522598316, LR: 0.0003 +[2026-02-28 18:56:22] (step=0015241) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 2.981999608687145, LR: 0.0003 +[2026-02-28 18:56:30] (step=0015242) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.982195265114459, LR: 0.0003 +[2026-02-28 18:56:38] (step=0015243) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 2.9823909215417728, LR: 0.0003 +[2026-02-28 18:56:46] (step=0015244) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 2.9825865779690863, LR: 0.0003 +[2026-02-28 18:56:54] (step=0015245) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 2.9827822343964, LR: 0.0003 +[2026-02-28 18:57:01] (step=0015246) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.9829778908237135, LR: 0.0003 +[2026-02-28 18:57:09] (step=0015247) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 2.983173547251027, LR: 0.0003 +[2026-02-28 18:57:17] (step=0015248) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.983369203678341, LR: 0.0003 +[2026-02-28 18:57:25] (step=0015249) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 2.9835648601056546, LR: 0.0003 +[2026-02-28 18:57:33] (step=0015250) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 2.983760516532968, LR: 0.0003 +[2026-02-28 18:57:41] (step=0015251) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.9839561729602817, LR: 0.0003 +[2026-02-28 18:57:49] (step=0015252) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.9841518293875953, LR: 0.0003 +[2026-02-28 18:57:56] (step=0015253) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.984347485814909, LR: 0.0003 +[2026-02-28 18:58:04] (step=0015254) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 2.984543142242223, LR: 0.0003 +[2026-02-28 18:58:12] (step=0015255) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 2.9847387986695364, LR: 0.0003 +[2026-02-28 18:58:20] (step=0015256) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 2.98493445509685, LR: 0.0003 +[2026-02-28 18:58:28] (step=0015257) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 2.9851301115241635, LR: 0.0003 +[2026-02-28 18:58:36] (step=0015258) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 2.985325767951477, LR: 0.0003 +[2026-02-28 18:58:43] (step=0015259) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 2.9855214243787906, LR: 0.0003 +[2026-02-28 18:58:51] (step=0015260) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 2.9857170808061047, LR: 0.0003 +[2026-02-28 18:58:59] (step=0015261) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 2.985912737233418, LR: 0.0003 +[2026-02-28 18:59:07] (step=0015262) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 2.9861083936607318, LR: 0.0003 +[2026-02-28 18:59:15] (step=0015263) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 2.9863040500880453, LR: 0.0003 +[2026-02-28 18:59:23] (step=0015264) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 2.986499706515359, LR: 0.0003 +[2026-02-28 18:59:30] (step=0015265) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 2.9866953629426725, LR: 0.0003 +[2026-02-28 18:59:38] (step=0015266) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 2.9868910193699865, LR: 0.0003 +[2026-02-28 18:59:46] (step=0015267) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 2.9870866757973, LR: 0.0003 +[2026-02-28 18:59:54] (step=0015268) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 2.9872823322246136, LR: 0.0003 +[2026-02-28 19:00:02] (step=0015269) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 2.987477988651927, LR: 0.0003 +[2026-02-28 19:00:10] (step=0015270) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 2.9876736450792407, LR: 0.0003 +[2026-02-28 19:00:17] (step=0015271) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 2.9878693015065543, LR: 0.0003 +[2026-02-28 19:00:25] (step=0015272) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 2.9880649579338683, LR: 0.0003 +[2026-02-28 19:00:33] (step=0015273) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.988260614361182, LR: 0.0003 +[2026-02-28 19:00:41] (step=0015274) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 2.9884562707884954, LR: 0.0003 +[2026-02-28 19:00:49] (step=0015275) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 2.988651927215809, LR: 0.0003 +[2026-02-28 19:00:57] (step=0015276) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 2.9888475836431225, LR: 0.0003 +[2026-02-28 19:01:05] (step=0015277) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 2.9890432400704365, LR: 0.0003 +[2026-02-28 19:01:12] (step=0015278) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 2.98923889649775, LR: 0.0003 +[2026-02-28 19:01:20] (step=0015279) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 2.9894345529250637, LR: 0.0003 +[2026-02-28 19:01:28] (step=0015280) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 2.989630209352377, LR: 0.0003 +[2026-02-28 19:01:36] (step=0015281) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.989825865779691, LR: 0.0003 +[2026-02-28 19:01:44] (step=0015282) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 2.9900215222070043, LR: 0.0003 +[2026-02-28 19:01:52] (step=0015283) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 2.9902171786343184, LR: 0.0003 +[2026-02-28 19:01:59] (step=0015284) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 2.990412835061632, LR: 0.0003 +[2026-02-28 19:02:07] (step=0015285) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 2.9906084914889455, LR: 0.0003 +[2026-02-28 19:02:15] (step=0015286) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.990804147916259, LR: 0.0003 +[2026-02-28 19:02:23] (step=0015287) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 2.9909998043435726, LR: 0.0003 +[2026-02-28 19:02:31] (step=0015288) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 2.991195460770886, LR: 0.0003 +[2026-02-28 19:02:39] (step=0015289) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 2.9913911171982, LR: 0.0003 +[2026-02-28 19:02:47] (step=0015290) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.9915867736255137, LR: 0.0003 +[2026-02-28 19:02:54] (step=0015291) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.9917824300528273, LR: 0.0003 +[2026-02-28 19:03:02] (step=0015292) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 2.991978086480141, LR: 0.0003 +[2026-02-28 19:03:10] (step=0015293) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 2.9921737429074544, LR: 0.0003 +[2026-02-28 19:03:18] (step=0015294) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 2.992369399334768, LR: 0.0003 +[2026-02-28 19:03:26] (step=0015295) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 2.992565055762082, LR: 0.0003 +[2026-02-28 19:03:34] (step=0015296) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 2.9927607121893955, LR: 0.0003 +[2026-02-28 19:03:41] (step=0015297) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 2.992956368616709, LR: 0.0003 +[2026-02-28 19:03:49] (step=0015298) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 2.9931520250440227, LR: 0.0003 +[2026-02-28 19:03:57] (step=0015299) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 2.9933476814713362, LR: 0.0003 +[2026-02-28 19:04:05] (step=0015300) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 2.99354333789865, LR: 0.0003 +[2026-02-28 19:04:13] (step=0015301) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 2.993738994325964, LR: 0.0003 +[2026-02-28 19:04:21] (step=0015302) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.9939346507532774, LR: 0.0003 +[2026-02-28 19:04:28] (step=0015303) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 2.994130307180591, LR: 0.0003 +[2026-02-28 19:04:36] (step=0015304) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 2.9943259636079045, LR: 0.0003 +[2026-02-28 19:04:44] (step=0015305) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.994521620035218, LR: 0.0003 +[2026-02-28 19:04:52] (step=0015306) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 2.9947172764625316, LR: 0.0003 +[2026-02-28 19:05:00] (step=0015307) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 2.9949129328898456, LR: 0.0003 +[2026-02-28 19:05:08] (step=0015308) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 2.995108589317159, LR: 0.0003 +[2026-02-28 19:05:15] (step=0015309) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 2.9953042457444727, LR: 0.0003 +[2026-02-28 19:05:23] (step=0015310) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 2.9954999021717863, LR: 0.0003 +[2026-02-28 19:05:31] (step=0015311) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 2.9956955585991, LR: 0.0003 +[2026-02-28 19:05:39] (step=0015312) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 2.9958912150264134, LR: 0.0003 +[2026-02-28 19:05:47] (step=0015313) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 2.9960868714537274, LR: 0.0003 +[2026-02-28 19:05:55] (step=0015314) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 2.996282527881041, LR: 0.0003 +[2026-02-28 19:06:02] (step=0015315) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 2.9964781843083546, LR: 0.0003 +[2026-02-28 19:06:10] (step=0015316) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 2.996673840735668, LR: 0.0003 +[2026-02-28 19:06:18] (step=0015317) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 2.9968694971629817, LR: 0.0003 +[2026-02-28 19:06:26] (step=0015318) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 2.9970651535902952, LR: 0.0003 +[2026-02-28 19:06:34] (step=0015319) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 2.9972608100176092, LR: 0.0003 +[2026-02-28 19:06:42] (step=0015320) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 2.997456466444923, LR: 0.0003 +[2026-02-28 19:06:50] (step=0015321) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 2.9976521228722364, LR: 0.0003 +[2026-02-28 19:06:57] (step=0015322) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 2.99784777929955, LR: 0.0003 +[2026-02-28 19:07:05] (step=0015323) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 2.9980434357268635, LR: 0.0003 +[2026-02-28 19:07:13] (step=0015324) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 2.998239092154177, LR: 0.0003 +[2026-02-28 19:07:21] (step=0015325) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 2.998434748581491, LR: 0.0003 +[2026-02-28 19:07:29] (step=0015326) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 2.9986304050088046, LR: 0.0003 +[2026-02-28 19:07:37] (step=0015327) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 2.998826061436118, LR: 0.0003 +[2026-02-28 19:07:45] (step=0015328) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 2.9990217178634317, LR: 0.0003 +[2026-02-28 19:07:52] (step=0015329) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 2.9992173742907453, LR: 0.0003 +[2026-02-28 19:08:00] (step=0015330) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 2.999413030718059, LR: 0.0003 +[2026-02-28 19:08:08] (step=0015331) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 2.999608687145373, LR: 0.0003 +[2026-02-28 19:08:16] (step=0015332) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 2.9998043435726864, LR: 0.0003 +[2026-02-28 19:08:24] (step=0015333) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.0, LR: 0.0003 +[2026-02-28 19:08:24] Beginning epoch 3... +[2026-02-28 19:08:34] (step=0015334) Train Loss: 0.4509, Train Steps/Sec: 0.10, Epoch: 3.0001956564273136, LR: 0.0003 +[2026-02-28 19:08:41] (step=0015335) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.000391312854627, LR: 0.0003 +[2026-02-28 19:08:49] (step=0015336) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.000586969281941, LR: 0.0003 +[2026-02-28 19:08:57] (step=0015337) Train Loss: 0.4497, Train Steps/Sec: 0.12, Epoch: 3.0007826257092547, LR: 0.0003 +[2026-02-28 19:09:05] (step=0015338) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.0009782821365683, LR: 0.0003 +[2026-02-28 19:09:13] (step=0015339) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.001173938563882, LR: 0.0003 +[2026-02-28 19:09:21] (step=0015340) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.0013695949911954, LR: 0.0003 +[2026-02-28 19:09:29] (step=0015341) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 3.001565251418509, LR: 0.0003 +[2026-02-28 19:09:36] (step=0015342) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.001760907845823, LR: 0.0003 +[2026-02-28 19:09:44] (step=0015343) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.0019565642731365, LR: 0.0003 +[2026-02-28 19:09:52] (step=0015344) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.00215222070045, LR: 0.0003 +[2026-02-28 19:10:00] (step=0015345) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.0023478771277636, LR: 0.0003 +[2026-02-28 19:10:08] (step=0015346) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.002543533555077, LR: 0.0003 +[2026-02-28 19:10:16] (step=0015347) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.0027391899823908, LR: 0.0003 +[2026-02-28 19:10:23] (step=0015348) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.0029348464097048, LR: 0.0003 +[2026-02-28 19:10:31] (step=0015349) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.0031305028370183, LR: 0.0003 +[2026-02-28 19:10:39] (step=0015350) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.003326159264332, LR: 0.0003 +[2026-02-28 19:10:47] (step=0015351) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.0035218156916454, LR: 0.0003 +[2026-02-28 19:10:55] (step=0015352) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.003717472118959, LR: 0.0003 +[2026-02-28 19:11:03] (step=0015353) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 3.0039131285462726, LR: 0.0003 +[2026-02-28 19:11:10] (step=0015354) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.0041087849735866, LR: 0.0003 +[2026-02-28 19:11:18] (step=0015355) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.0043044414009, LR: 0.0003 +[2026-02-28 19:11:26] (step=0015356) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 3.0045000978282137, LR: 0.0003 +[2026-02-28 19:11:34] (step=0015357) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 3.0046957542555273, LR: 0.0003 +[2026-02-28 19:11:42] (step=0015358) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.004891410682841, LR: 0.0003 +[2026-02-28 19:11:50] (step=0015359) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.0050870671101544, LR: 0.0003 +[2026-02-28 19:11:58] (step=0015360) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.0052827235374684, LR: 0.0003 +[2026-02-28 19:12:05] (step=0015361) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.005478379964782, LR: 0.0003 +[2026-02-28 19:12:13] (step=0015362) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 3.0056740363920955, LR: 0.0003 +[2026-02-28 19:12:21] (step=0015363) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.005869692819409, LR: 0.0003 +[2026-02-28 19:12:29] (step=0015364) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.0060653492467226, LR: 0.0003 +[2026-02-28 19:12:37] (step=0015365) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.006261005674036, LR: 0.0003 +[2026-02-28 19:12:45] (step=0015366) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.00645666210135, LR: 0.0003 +[2026-02-28 19:12:52] (step=0015367) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.0066523185286638, LR: 0.0003 +[2026-02-28 19:13:00] (step=0015368) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.0068479749559773, LR: 0.0003 +[2026-02-28 19:13:08] (step=0015369) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.007043631383291, LR: 0.0003 +[2026-02-28 19:13:16] (step=0015370) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.0072392878106045, LR: 0.0003 +[2026-02-28 19:13:24] (step=0015371) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.007434944237918, LR: 0.0003 +[2026-02-28 19:13:32] (step=0015372) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 3.007630600665232, LR: 0.0003 +[2026-02-28 19:13:40] (step=0015373) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.0078262570925456, LR: 0.0003 +[2026-02-28 19:13:47] (step=0015374) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.008021913519859, LR: 0.0003 +[2026-02-28 19:13:55] (step=0015375) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.0082175699471727, LR: 0.0003 +[2026-02-28 19:14:03] (step=0015376) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.0084132263744863, LR: 0.0003 +[2026-02-28 19:14:11] (step=0015377) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 3.0086088828018, LR: 0.0003 +[2026-02-28 19:14:19] (step=0015378) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.008804539229114, LR: 0.0003 +[2026-02-28 19:14:27] (step=0015379) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 3.0090001956564274, LR: 0.0003 +[2026-02-28 19:14:34] (step=0015380) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.009195852083741, LR: 0.0003 +[2026-02-28 19:14:42] (step=0015381) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 3.0093915085110545, LR: 0.0003 +[2026-02-28 19:14:50] (step=0015382) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.009587164938368, LR: 0.0003 +[2026-02-28 19:14:58] (step=0015383) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.0097828213656816, LR: 0.0003 +[2026-02-28 19:15:06] (step=0015384) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 3.0099784777929957, LR: 0.0003 +[2026-02-28 19:15:14] (step=0015385) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.010174134220309, LR: 0.0003 +[2026-02-28 19:15:22] (step=0015386) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.010369790647623, LR: 0.0003 +[2026-02-28 19:15:29] (step=0015387) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 3.0105654470749363, LR: 0.0003 +[2026-02-28 19:15:37] (step=0015388) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.01076110350225, LR: 0.0003 +[2026-02-28 19:15:45] (step=0015389) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.0109567599295635, LR: 0.0003 +[2026-02-28 19:15:53] (step=0015390) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.0111524163568775, LR: 0.0003 +[2026-02-28 19:16:01] (step=0015391) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.011348072784191, LR: 0.0003 +[2026-02-28 19:16:09] (step=0015392) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.0115437292115046, LR: 0.0003 +[2026-02-28 19:16:16] (step=0015393) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.011739385638818, LR: 0.0003 +[2026-02-28 19:16:24] (step=0015394) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.0119350420661317, LR: 0.0003 +[2026-02-28 19:16:32] (step=0015395) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.0121306984934457, LR: 0.0003 +[2026-02-28 19:16:40] (step=0015396) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.0123263549207593, LR: 0.0003 +[2026-02-28 19:16:48] (step=0015397) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.012522011348073, LR: 0.0003 +[2026-02-28 19:16:56] (step=0015398) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.0127176677753864, LR: 0.0003 +[2026-02-28 19:17:03] (step=0015399) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.0129133242027, LR: 0.0003 +[2026-02-28 19:17:11] (step=0015400) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.0131089806300135, LR: 0.0003 +[2026-02-28 19:17:19] (step=0015401) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.0133046370573275, LR: 0.0003 +[2026-02-28 19:17:27] (step=0015402) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.013500293484641, LR: 0.0003 +[2026-02-28 19:17:35] (step=0015403) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.0136959499119547, LR: 0.0003 +[2026-02-28 19:17:43] (step=0015404) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.0138916063392682, LR: 0.0003 +[2026-02-28 19:17:50] (step=0015405) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.014087262766582, LR: 0.0003 +[2026-02-28 19:17:58] (step=0015406) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.0142829191938953, LR: 0.0003 +[2026-02-28 19:18:06] (step=0015407) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 3.0144785756212094, LR: 0.0003 +[2026-02-28 19:18:14] (step=0015408) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.014674232048523, LR: 0.0003 +[2026-02-28 19:18:22] (step=0015409) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.0148698884758365, LR: 0.0003 +[2026-02-28 19:18:30] (step=0015410) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.01506554490315, LR: 0.0003 +[2026-02-28 19:18:37] (step=0015411) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 3.0152612013304636, LR: 0.0003 +[2026-02-28 19:18:45] (step=0015412) Train Loss: 0.4696, Train Steps/Sec: 0.13, Epoch: 3.015456857757777, LR: 0.0003 +[2026-02-28 19:18:53] (step=0015413) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.015652514185091, LR: 0.0003 +[2026-02-28 19:19:01] (step=0015414) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 3.0158481706124047, LR: 0.0003 +[2026-02-28 19:19:09] (step=0015415) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 3.0160438270397183, LR: 0.0003 +[2026-02-28 19:19:17] (step=0015416) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.016239483467032, LR: 0.0003 +[2026-02-28 19:19:24] (step=0015417) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.0164351398943454, LR: 0.0003 +[2026-02-28 19:19:32] (step=0015418) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.016630796321659, LR: 0.0003 +[2026-02-28 19:19:40] (step=0015419) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.016826452748973, LR: 0.0003 +[2026-02-28 19:19:48] (step=0015420) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.0170221091762865, LR: 0.0003 +[2026-02-28 19:19:56] (step=0015421) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 3.0172177656036, LR: 0.0003 +[2026-02-28 19:20:04] (step=0015422) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.0174134220309137, LR: 0.0003 +[2026-02-28 19:20:11] (step=0015423) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.0176090784582272, LR: 0.0003 +[2026-02-28 19:20:19] (step=0015424) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.017804734885541, LR: 0.0003 +[2026-02-28 19:20:27] (step=0015425) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.018000391312855, LR: 0.0003 +[2026-02-28 19:20:35] (step=0015426) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.0181960477401684, LR: 0.0003 +[2026-02-28 19:20:43] (step=0015427) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 3.018391704167482, LR: 0.0003 +[2026-02-28 19:20:51] (step=0015428) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 3.0185873605947955, LR: 0.0003 +[2026-02-28 19:20:59] (step=0015429) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.018783017022109, LR: 0.0003 +[2026-02-28 19:21:06] (step=0015430) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.0189786734494226, LR: 0.0003 +[2026-02-28 19:21:14] (step=0015431) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 3.0191743298767366, LR: 0.0003 +[2026-02-28 19:21:22] (step=0015432) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 3.01936998630405, LR: 0.0003 +[2026-02-28 19:21:30] (step=0015433) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.0195656427313637, LR: 0.0003 +[2026-02-28 19:21:38] (step=0015434) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.0197612991586773, LR: 0.0003 +[2026-02-28 19:21:46] (step=0015435) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.019956955585991, LR: 0.0003 +[2026-02-28 19:21:54] (step=0015436) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 3.0201526120133044, LR: 0.0003 +[2026-02-28 19:22:01] (step=0015437) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.0203482684406184, LR: 0.0003 +[2026-02-28 19:22:09] (step=0015438) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.020543924867932, LR: 0.0003 +[2026-02-28 19:22:17] (step=0015439) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.0207395812952456, LR: 0.0003 +[2026-02-28 19:22:25] (step=0015440) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.020935237722559, LR: 0.0003 +[2026-02-28 19:22:33] (step=0015441) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.0211308941498727, LR: 0.0003 +[2026-02-28 19:22:41] (step=0015442) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 3.0213265505771862, LR: 0.0003 +[2026-02-28 19:22:48] (step=0015443) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 3.0215222070045002, LR: 0.0003 +[2026-02-28 19:22:56] (step=0015444) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.021717863431814, LR: 0.0003 +[2026-02-28 19:23:04] (step=0015445) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.0219135198591274, LR: 0.0003 +[2026-02-28 19:23:12] (step=0015446) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 3.022109176286441, LR: 0.0003 +[2026-02-28 19:23:20] (step=0015447) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.0223048327137545, LR: 0.0003 +[2026-02-28 19:23:28] (step=0015448) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.0225004891410685, LR: 0.0003 +[2026-02-28 19:23:35] (step=0015449) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 3.022696145568382, LR: 0.0003 +[2026-02-28 19:23:43] (step=0015450) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.0228918019956956, LR: 0.0003 +[2026-02-28 19:23:51] (step=0015451) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.023087458423009, LR: 0.0003 +[2026-02-28 19:23:59] (step=0015452) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.0232831148503228, LR: 0.0003 +[2026-02-28 19:24:07] (step=0015453) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.0234787712776363, LR: 0.0003 +[2026-02-28 19:24:15] (step=0015454) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.0236744277049503, LR: 0.0003 +[2026-02-28 19:24:22] (step=0015455) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 3.023870084132264, LR: 0.0003 +[2026-02-28 19:24:30] (step=0015456) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.0240657405595774, LR: 0.0003 +[2026-02-28 19:24:38] (step=0015457) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 3.024261396986891, LR: 0.0003 +[2026-02-28 19:24:46] (step=0015458) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.0244570534142046, LR: 0.0003 +[2026-02-28 19:24:54] (step=0015459) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.024652709841518, LR: 0.0003 +[2026-02-28 19:25:02] (step=0015460) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 3.024848366268832, LR: 0.0003 +[2026-02-28 19:25:09] (step=0015461) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.0250440226961457, LR: 0.0003 +[2026-02-28 19:25:17] (step=0015462) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.0252396791234593, LR: 0.0003 +[2026-02-28 19:25:25] (step=0015463) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.025435335550773, LR: 0.0003 +[2026-02-28 19:25:33] (step=0015464) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.0256309919780864, LR: 0.0003 +[2026-02-28 19:25:41] (step=0015465) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.0258266484054, LR: 0.0003 +[2026-02-28 19:25:49] (step=0015466) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.026022304832714, LR: 0.0003 +[2026-02-28 19:25:56] (step=0015467) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 3.0262179612600275, LR: 0.0003 +[2026-02-28 19:26:04] (step=0015468) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.026413617687341, LR: 0.0003 +[2026-02-28 19:26:12] (step=0015469) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 3.0266092741146546, LR: 0.0003 +[2026-02-28 19:26:20] (step=0015470) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.026804930541968, LR: 0.0003 +[2026-02-28 19:26:28] (step=0015471) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.0270005869692818, LR: 0.0003 +[2026-02-28 19:26:36] (step=0015472) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.0271962433965958, LR: 0.0003 +[2026-02-28 19:26:44] (step=0015473) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.0273918998239093, LR: 0.0003 +[2026-02-28 19:26:51] (step=0015474) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.027587556251223, LR: 0.0003 +[2026-02-28 19:26:59] (step=0015475) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.0277832126785365, LR: 0.0003 +[2026-02-28 19:27:07] (step=0015476) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.02797886910585, LR: 0.0003 +[2026-02-28 19:27:15] (step=0015477) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.0281745255331636, LR: 0.0003 +[2026-02-28 19:27:23] (step=0015478) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.0283701819604776, LR: 0.0003 +[2026-02-28 19:27:31] (step=0015479) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.028565838387791, LR: 0.0003 +[2026-02-28 19:27:38] (step=0015480) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.0287614948151047, LR: 0.0003 +[2026-02-28 19:27:46] (step=0015481) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 3.0289571512424183, LR: 0.0003 +[2026-02-28 19:27:54] (step=0015482) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.029152807669732, LR: 0.0003 +[2026-02-28 19:28:02] (step=0015483) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.0293484640970454, LR: 0.0003 +[2026-02-28 19:28:10] (step=0015484) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 3.0295441205243594, LR: 0.0003 +[2026-02-28 19:28:18] (step=0015485) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.029739776951673, LR: 0.0003 +[2026-02-28 19:28:26] (step=0015486) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.0299354333789865, LR: 0.0003 +[2026-02-28 19:28:33] (step=0015487) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.0301310898063, LR: 0.0003 +[2026-02-28 19:28:41] (step=0015488) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.0303267462336136, LR: 0.0003 +[2026-02-28 19:28:49] (step=0015489) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.030522402660927, LR: 0.0003 +[2026-02-28 19:28:57] (step=0015490) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.030718059088241, LR: 0.0003 +[2026-02-28 19:29:05] (step=0015491) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.0309137155155548, LR: 0.0003 +[2026-02-28 19:29:13] (step=0015492) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.0311093719428683, LR: 0.0003 +[2026-02-28 19:29:20] (step=0015493) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.031305028370182, LR: 0.0003 +[2026-02-28 19:29:28] (step=0015494) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.0315006847974955, LR: 0.0003 +[2026-02-28 19:29:36] (step=0015495) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.031696341224809, LR: 0.0003 +[2026-02-28 19:29:44] (step=0015496) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.031891997652123, LR: 0.0003 +[2026-02-28 19:29:52] (step=0015497) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.0320876540794366, LR: 0.0003 +[2026-02-28 19:30:00] (step=0015498) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.03228331050675, LR: 0.0003 +[2026-02-28 19:30:08] (step=0015499) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 3.0324789669340637, LR: 0.0003 +[2026-02-28 19:30:15] (step=0015500) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.0326746233613773, LR: 0.0003 +[2026-02-28 19:30:15] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0015500/ +[2026-02-28 19:30:23] (step=0015501) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.032870279788691, LR: 0.0003 +[2026-02-28 19:30:31] (step=0015502) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.033065936216005, LR: 0.0003 +[2026-02-28 19:30:39] (step=0015503) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 3.0332615926433184, LR: 0.0003 +[2026-02-28 19:30:47] (step=0015504) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 3.033457249070632, LR: 0.0003 +[2026-02-28 19:30:55] (step=0015505) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.0336529054979455, LR: 0.0003 +[2026-02-28 19:31:02] (step=0015506) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 3.033848561925259, LR: 0.0003 +[2026-02-28 19:31:10] (step=0015507) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 3.034044218352573, LR: 0.0003 +[2026-02-28 19:31:18] (step=0015508) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.0342398747798867, LR: 0.0003 +[2026-02-28 19:31:26] (step=0015509) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.0344355312072, LR: 0.0003 +[2026-02-28 19:31:34] (step=0015510) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.034631187634514, LR: 0.0003 +[2026-02-28 19:31:42] (step=0015511) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.0348268440618273, LR: 0.0003 +[2026-02-28 19:31:49] (step=0015512) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.035022500489141, LR: 0.0003 +[2026-02-28 19:31:57] (step=0015513) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.035218156916455, LR: 0.0003 +[2026-02-28 19:32:05] (step=0015514) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.0354138133437685, LR: 0.0003 +[2026-02-28 19:32:13] (step=0015515) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 3.035609469771082, LR: 0.0003 +[2026-02-28 19:32:21] (step=0015516) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.0358051261983956, LR: 0.0003 +[2026-02-28 19:32:29] (step=0015517) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 3.036000782625709, LR: 0.0003 +[2026-02-28 19:32:36] (step=0015518) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 3.0361964390530227, LR: 0.0003 +[2026-02-28 19:32:44] (step=0015519) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 3.0363920954803367, LR: 0.0003 +[2026-02-28 19:32:52] (step=0015520) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.0365877519076503, LR: 0.0003 +[2026-02-28 19:33:00] (step=0015521) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.036783408334964, LR: 0.0003 +[2026-02-28 19:33:08] (step=0015522) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.0369790647622774, LR: 0.0003 +[2026-02-28 19:33:16] (step=0015523) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.037174721189591, LR: 0.0003 +[2026-02-28 19:33:24] (step=0015524) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.0373703776169045, LR: 0.0003 +[2026-02-28 19:33:31] (step=0015525) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.0375660340442185, LR: 0.0003 +[2026-02-28 19:33:39] (step=0015526) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.037761690471532, LR: 0.0003 +[2026-02-28 19:33:47] (step=0015527) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 3.0379573468988457, LR: 0.0003 +[2026-02-28 19:33:55] (step=0015528) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 3.0381530033261592, LR: 0.0003 +[2026-02-28 19:34:03] (step=0015529) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.038348659753473, LR: 0.0003 +[2026-02-28 19:34:11] (step=0015530) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.0385443161807864, LR: 0.0003 +[2026-02-28 19:34:19] (step=0015531) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.0387399726081004, LR: 0.0003 +[2026-02-28 19:34:26] (step=0015532) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.038935629035414, LR: 0.0003 +[2026-02-28 19:34:34] (step=0015533) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.0391312854627275, LR: 0.0003 +[2026-02-28 19:34:42] (step=0015534) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 3.039326941890041, LR: 0.0003 +[2026-02-28 19:34:50] (step=0015535) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 3.0395225983173546, LR: 0.0003 +[2026-02-28 19:34:58] (step=0015536) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.039718254744668, LR: 0.0003 +[2026-02-28 19:35:06] (step=0015537) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.039913911171982, LR: 0.0003 +[2026-02-28 19:35:13] (step=0015538) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.0401095675992957, LR: 0.0003 +[2026-02-28 19:35:21] (step=0015539) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 3.0403052240266093, LR: 0.0003 +[2026-02-28 19:35:29] (step=0015540) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 3.040500880453923, LR: 0.0003 +[2026-02-28 19:35:37] (step=0015541) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.0406965368812364, LR: 0.0003 +[2026-02-28 19:35:45] (step=0015542) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.04089219330855, LR: 0.0003 +[2026-02-28 19:35:53] (step=0015543) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.041087849735864, LR: 0.0003 +[2026-02-28 19:36:00] (step=0015544) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 3.0412835061631776, LR: 0.0003 +[2026-02-28 19:36:08] (step=0015545) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.041479162590491, LR: 0.0003 +[2026-02-28 19:36:16] (step=0015546) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 3.0416748190178047, LR: 0.0003 +[2026-02-28 19:36:24] (step=0015547) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 3.0418704754451182, LR: 0.0003 +[2026-02-28 19:36:32] (step=0015548) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.042066131872432, LR: 0.0003 +[2026-02-28 19:36:40] (step=0015549) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.042261788299746, LR: 0.0003 +[2026-02-28 19:36:47] (step=0015550) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.0424574447270594, LR: 0.0003 +[2026-02-28 19:36:55] (step=0015551) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.042653101154373, LR: 0.0003 +[2026-02-28 19:37:03] (step=0015552) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.0428487575816865, LR: 0.0003 +[2026-02-28 19:37:11] (step=0015553) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.043044414009, LR: 0.0003 +[2026-02-28 19:37:19] (step=0015554) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.0432400704363136, LR: 0.0003 +[2026-02-28 19:37:27] (step=0015555) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.0434357268636276, LR: 0.0003 +[2026-02-28 19:37:34] (step=0015556) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.043631383290941, LR: 0.0003 +[2026-02-28 19:37:42] (step=0015557) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.0438270397182547, LR: 0.0003 +[2026-02-28 19:37:50] (step=0015558) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.0440226961455683, LR: 0.0003 +[2026-02-28 19:37:58] (step=0015559) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.044218352572882, LR: 0.0003 +[2026-02-28 19:38:06] (step=0015560) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.044414009000196, LR: 0.0003 +[2026-02-28 19:38:14] (step=0015561) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.0446096654275094, LR: 0.0003 +[2026-02-28 19:38:21] (step=0015562) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 3.044805321854823, LR: 0.0003 +[2026-02-28 19:38:29] (step=0015563) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.0450009782821366, LR: 0.0003 +[2026-02-28 19:38:37] (step=0015564) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.04519663470945, LR: 0.0003 +[2026-02-28 19:38:45] (step=0015565) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.0453922911367637, LR: 0.0003 +[2026-02-28 19:38:53] (step=0015566) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.0455879475640777, LR: 0.0003 +[2026-02-28 19:39:01] (step=0015567) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.0457836039913913, LR: 0.0003 +[2026-02-28 19:39:08] (step=0015568) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 3.045979260418705, LR: 0.0003 +[2026-02-28 19:39:16] (step=0015569) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.0461749168460184, LR: 0.0003 +[2026-02-28 19:39:24] (step=0015570) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.046370573273332, LR: 0.0003 +[2026-02-28 19:39:32] (step=0015571) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.0465662297006455, LR: 0.0003 +[2026-02-28 19:39:40] (step=0015572) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.0467618861279595, LR: 0.0003 +[2026-02-28 19:39:48] (step=0015573) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.046957542555273, LR: 0.0003 +[2026-02-28 19:39:56] (step=0015574) Train Loss: 0.4726, Train Steps/Sec: 0.13, Epoch: 3.0471531989825866, LR: 0.0003 +[2026-02-28 19:40:04] (step=0015575) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.0473488554099, LR: 0.0003 +[2026-02-28 19:40:11] (step=0015576) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 3.0475445118372138, LR: 0.0003 +[2026-02-28 19:40:19] (step=0015577) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.0477401682645273, LR: 0.0003 +[2026-02-28 19:40:27] (step=0015578) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 3.0479358246918413, LR: 0.0003 +[2026-02-28 19:40:35] (step=0015579) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.048131481119155, LR: 0.0003 +[2026-02-28 19:40:43] (step=0015580) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.0483271375464684, LR: 0.0003 +[2026-02-28 19:40:51] (step=0015581) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.048522793973782, LR: 0.0003 +[2026-02-28 19:40:58] (step=0015582) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 3.0487184504010956, LR: 0.0003 +[2026-02-28 19:41:06] (step=0015583) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.048914106828409, LR: 0.0003 +[2026-02-28 19:41:14] (step=0015584) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.049109763255723, LR: 0.0003 +[2026-02-28 19:41:22] (step=0015585) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 3.0493054196830367, LR: 0.0003 +[2026-02-28 19:41:30] (step=0015586) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.0495010761103503, LR: 0.0003 +[2026-02-28 19:41:38] (step=0015587) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.049696732537664, LR: 0.0003 +[2026-02-28 19:41:46] (step=0015588) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.0498923889649774, LR: 0.0003 +[2026-02-28 19:41:53] (step=0015589) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.050088045392291, LR: 0.0003 +[2026-02-28 19:42:01] (step=0015590) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.050283701819605, LR: 0.0003 +[2026-02-28 19:42:09] (step=0015591) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.0504793582469185, LR: 0.0003 +[2026-02-28 19:42:17] (step=0015592) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 3.050675014674232, LR: 0.0003 +[2026-02-28 19:42:25] (step=0015593) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.0508706711015456, LR: 0.0003 +[2026-02-28 19:42:33] (step=0015594) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.051066327528859, LR: 0.0003 +[2026-02-28 19:42:40] (step=0015595) Train Loss: 0.4714, Train Steps/Sec: 0.13, Epoch: 3.0512619839561728, LR: 0.0003 +[2026-02-28 19:42:48] (step=0015596) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.0514576403834868, LR: 0.0003 +[2026-02-28 19:42:56] (step=0015597) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 3.0516532968108003, LR: 0.0003 +[2026-02-28 19:43:04] (step=0015598) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.051848953238114, LR: 0.0003 +[2026-02-28 19:43:12] (step=0015599) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.0520446096654275, LR: 0.0003 +[2026-02-28 19:43:20] (step=0015600) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 3.052240266092741, LR: 0.0003 +[2026-02-28 19:43:27] (step=0015601) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.0524359225200546, LR: 0.0003 +[2026-02-28 19:43:35] (step=0015602) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.0526315789473686, LR: 0.0003 +[2026-02-28 19:43:43] (step=0015603) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.052827235374682, LR: 0.0003 +[2026-02-28 19:43:51] (step=0015604) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.0530228918019957, LR: 0.0003 +[2026-02-28 19:43:59] (step=0015605) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.0532185482293093, LR: 0.0003 +[2026-02-28 19:44:06] (step=0015606) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.053414204656623, LR: 0.0003 +[2026-02-28 19:44:14] (step=0015607) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 3.0536098610839364, LR: 0.0003 +[2026-02-28 19:44:22] (step=0015608) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.0538055175112504, LR: 0.0003 +[2026-02-28 19:44:30] (step=0015609) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.054001173938564, LR: 0.0003 +[2026-02-28 19:44:38] (step=0015610) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.0541968303658775, LR: 0.0003 +[2026-02-28 19:44:46] (step=0015611) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 3.054392486793191, LR: 0.0003 +[2026-02-28 19:44:54] (step=0015612) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.0545881432205046, LR: 0.0003 +[2026-02-28 19:45:01] (step=0015613) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 3.054783799647818, LR: 0.0003 +[2026-02-28 19:45:09] (step=0015614) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.054979456075132, LR: 0.0003 +[2026-02-28 19:45:17] (step=0015615) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.0551751125024458, LR: 0.0003 +[2026-02-28 19:45:25] (step=0015616) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.0553707689297593, LR: 0.0003 +[2026-02-28 19:45:33] (step=0015617) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 3.055566425357073, LR: 0.0003 +[2026-02-28 19:45:41] (step=0015618) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.0557620817843865, LR: 0.0003 +[2026-02-28 19:45:48] (step=0015619) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.0559577382117005, LR: 0.0003 +[2026-02-28 19:45:56] (step=0015620) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 3.056153394639014, LR: 0.0003 +[2026-02-28 19:46:04] (step=0015621) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.0563490510663276, LR: 0.0003 +[2026-02-28 19:46:12] (step=0015622) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.056544707493641, LR: 0.0003 +[2026-02-28 19:46:20] (step=0015623) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 3.0567403639209547, LR: 0.0003 +[2026-02-28 19:46:28] (step=0015624) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.0569360203482683, LR: 0.0003 +[2026-02-28 19:46:36] (step=0015625) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 3.0571316767755823, LR: 0.0003 +[2026-02-28 19:46:43] (step=0015626) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.057327333202896, LR: 0.0003 +[2026-02-28 19:46:51] (step=0015627) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.0575229896302094, LR: 0.0003 +[2026-02-28 19:46:59] (step=0015628) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.057718646057523, LR: 0.0003 +[2026-02-28 19:47:07] (step=0015629) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.0579143024848365, LR: 0.0003 +[2026-02-28 19:47:15] (step=0015630) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.05810995891215, LR: 0.0003 +[2026-02-28 19:47:23] (step=0015631) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.058305615339464, LR: 0.0003 +[2026-02-28 19:47:31] (step=0015632) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.0585012717667777, LR: 0.0003 +[2026-02-28 19:47:38] (step=0015633) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.0586969281940912, LR: 0.0003 +[2026-02-28 19:47:46] (step=0015634) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.058892584621405, LR: 0.0003 +[2026-02-28 19:47:54] (step=0015635) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.0590882410487183, LR: 0.0003 +[2026-02-28 19:48:02] (step=0015636) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 3.059283897476032, LR: 0.0003 +[2026-02-28 19:48:10] (step=0015637) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.059479553903346, LR: 0.0003 +[2026-02-28 19:48:18] (step=0015638) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.0596752103306595, LR: 0.0003 +[2026-02-28 19:48:25] (step=0015639) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 3.059870866757973, LR: 0.0003 +[2026-02-28 19:48:33] (step=0015640) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.0600665231852866, LR: 0.0003 +[2026-02-28 19:48:41] (step=0015641) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.0602621796126, LR: 0.0003 +[2026-02-28 19:48:49] (step=0015642) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 3.0604578360399137, LR: 0.0003 +[2026-02-28 19:48:57] (step=0015643) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 3.0606534924672277, LR: 0.0003 +[2026-02-28 19:49:05] (step=0015644) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.0608491488945413, LR: 0.0003 +[2026-02-28 19:49:12] (step=0015645) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.061044805321855, LR: 0.0003 +[2026-02-28 19:49:20] (step=0015646) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 3.0612404617491684, LR: 0.0003 +[2026-02-28 19:49:28] (step=0015647) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 3.061436118176482, LR: 0.0003 +[2026-02-28 19:49:36] (step=0015648) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.0616317746037955, LR: 0.0003 +[2026-02-28 19:49:44] (step=0015649) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.0618274310311095, LR: 0.0003 +[2026-02-28 19:49:51] (step=0015650) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.062023087458423, LR: 0.0003 +[2026-02-28 19:49:59] (step=0015651) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.0622187438857367, LR: 0.0003 +[2026-02-28 19:50:07] (step=0015652) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 3.0624144003130502, LR: 0.0003 +[2026-02-28 19:50:15] (step=0015653) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.062610056740364, LR: 0.0003 +[2026-02-28 19:50:23] (step=0015654) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.0628057131676774, LR: 0.0003 +[2026-02-28 19:50:31] (step=0015655) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.0630013695949914, LR: 0.0003 +[2026-02-28 19:50:39] (step=0015656) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.063197026022305, LR: 0.0003 +[2026-02-28 19:50:46] (step=0015657) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 3.0633926824496185, LR: 0.0003 +[2026-02-28 19:50:54] (step=0015658) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.063588338876932, LR: 0.0003 +[2026-02-28 19:51:02] (step=0015659) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.0637839953042456, LR: 0.0003 +[2026-02-28 19:51:10] (step=0015660) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.063979651731559, LR: 0.0003 +[2026-02-28 19:51:18] (step=0015661) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 3.064175308158873, LR: 0.0003 +[2026-02-28 19:51:26] (step=0015662) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.0643709645861867, LR: 0.0003 +[2026-02-28 19:51:33] (step=0015663) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.0645666210135003, LR: 0.0003 +[2026-02-28 19:51:41] (step=0015664) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.064762277440814, LR: 0.0003 +[2026-02-28 19:51:49] (step=0015665) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.0649579338681274, LR: 0.0003 +[2026-02-28 19:51:57] (step=0015666) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.065153590295441, LR: 0.0003 +[2026-02-28 19:52:05] (step=0015667) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.065349246722755, LR: 0.0003 +[2026-02-28 19:52:13] (step=0015668) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 3.0655449031500686, LR: 0.0003 +[2026-02-28 19:52:20] (step=0015669) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.065740559577382, LR: 0.0003 +[2026-02-28 19:52:28] (step=0015670) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 3.0659362160046957, LR: 0.0003 +[2026-02-28 19:52:36] (step=0015671) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.0661318724320092, LR: 0.0003 +[2026-02-28 19:52:44] (step=0015672) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.0663275288593232, LR: 0.0003 +[2026-02-28 19:52:52] (step=0015673) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.066523185286637, LR: 0.0003 +[2026-02-28 19:53:00] (step=0015674) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.0667188417139504, LR: 0.0003 +[2026-02-28 19:53:08] (step=0015675) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.066914498141264, LR: 0.0003 +[2026-02-28 19:53:15] (step=0015676) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 3.0671101545685775, LR: 0.0003 +[2026-02-28 19:53:23] (step=0015677) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.067305810995891, LR: 0.0003 +[2026-02-28 19:53:31] (step=0015678) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.067501467423205, LR: 0.0003 +[2026-02-28 19:53:39] (step=0015679) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 3.0676971238505186, LR: 0.0003 +[2026-02-28 19:53:47] (step=0015680) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.067892780277832, LR: 0.0003 +[2026-02-28 19:53:55] (step=0015681) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.0680884367051457, LR: 0.0003 +[2026-02-28 19:54:03] (step=0015682) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.0682840931324593, LR: 0.0003 +[2026-02-28 19:54:10] (step=0015683) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.068479749559773, LR: 0.0003 +[2026-02-28 19:54:18] (step=0015684) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.068675405987087, LR: 0.0003 +[2026-02-28 19:54:26] (step=0015685) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.0688710624144004, LR: 0.0003 +[2026-02-28 19:54:34] (step=0015686) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.069066718841714, LR: 0.0003 +[2026-02-28 19:54:42] (step=0015687) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.0692623752690276, LR: 0.0003 +[2026-02-28 19:54:50] (step=0015688) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.069458031696341, LR: 0.0003 +[2026-02-28 19:54:57] (step=0015689) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.0696536881236547, LR: 0.0003 +[2026-02-28 19:55:05] (step=0015690) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 3.0698493445509687, LR: 0.0003 +[2026-02-28 19:55:13] (step=0015691) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.0700450009782823, LR: 0.0003 +[2026-02-28 19:55:21] (step=0015692) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.070240657405596, LR: 0.0003 +[2026-02-28 19:55:29] (step=0015693) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.0704363138329094, LR: 0.0003 +[2026-02-28 19:55:37] (step=0015694) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.070631970260223, LR: 0.0003 +[2026-02-28 19:55:44] (step=0015695) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 3.0708276266875365, LR: 0.0003 +[2026-02-28 19:55:52] (step=0015696) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.0710232831148505, LR: 0.0003 +[2026-02-28 19:56:00] (step=0015697) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.071218939542164, LR: 0.0003 +[2026-02-28 19:56:08] (step=0015698) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.0714145959694776, LR: 0.0003 +[2026-02-28 19:56:16] (step=0015699) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 3.071610252396791, LR: 0.0003 +[2026-02-28 19:56:24] (step=0015700) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 3.0718059088241048, LR: 0.0003 +[2026-02-28 19:56:31] (step=0015701) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.0720015652514183, LR: 0.0003 +[2026-02-28 19:56:39] (step=0015702) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 3.0721972216787323, LR: 0.0003 +[2026-02-28 19:56:47] (step=0015703) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 3.072392878106046, LR: 0.0003 +[2026-02-28 19:56:55] (step=0015704) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.0725885345333595, LR: 0.0003 +[2026-02-28 19:57:03] (step=0015705) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.072784190960673, LR: 0.0003 +[2026-02-28 19:57:11] (step=0015706) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.0729798473879866, LR: 0.0003 +[2026-02-28 19:57:19] (step=0015707) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.0731755038153, LR: 0.0003 +[2026-02-28 19:57:26] (step=0015708) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.073371160242614, LR: 0.0003 +[2026-02-28 19:57:34] (step=0015709) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 3.0735668166699277, LR: 0.0003 +[2026-02-28 19:57:42] (step=0015710) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.0737624730972413, LR: 0.0003 +[2026-02-28 19:57:50] (step=0015711) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.073958129524555, LR: 0.0003 +[2026-02-28 19:57:58] (step=0015712) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.0741537859518684, LR: 0.0003 +[2026-02-28 19:58:06] (step=0015713) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.074349442379182, LR: 0.0003 +[2026-02-28 19:58:13] (step=0015714) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.074545098806496, LR: 0.0003 +[2026-02-28 19:58:21] (step=0015715) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.0747407552338095, LR: 0.0003 +[2026-02-28 19:58:29] (step=0015716) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.074936411661123, LR: 0.0003 +[2026-02-28 19:58:37] (step=0015717) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.0751320680884366, LR: 0.0003 +[2026-02-28 19:58:45] (step=0015718) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.07532772451575, LR: 0.0003 +[2026-02-28 19:58:53] (step=0015719) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.0755233809430638, LR: 0.0003 +[2026-02-28 19:59:00] (step=0015720) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.0757190373703778, LR: 0.0003 +[2026-02-28 19:59:08] (step=0015721) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.0759146937976913, LR: 0.0003 +[2026-02-28 19:59:16] (step=0015722) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.076110350225005, LR: 0.0003 +[2026-02-28 19:59:24] (step=0015723) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.0763060066523185, LR: 0.0003 +[2026-02-28 19:59:32] (step=0015724) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.076501663079632, LR: 0.0003 +[2026-02-28 19:59:40] (step=0015725) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.0766973195069456, LR: 0.0003 +[2026-02-28 19:59:47] (step=0015726) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.0768929759342596, LR: 0.0003 +[2026-02-28 19:59:55] (step=0015727) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.077088632361573, LR: 0.0003 +[2026-02-28 20:00:03] (step=0015728) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.0772842887888867, LR: 0.0003 +[2026-02-28 20:00:11] (step=0015729) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 3.0774799452162003, LR: 0.0003 +[2026-02-28 20:00:19] (step=0015730) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.077675601643514, LR: 0.0003 +[2026-02-28 20:00:27] (step=0015731) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.077871258070828, LR: 0.0003 +[2026-02-28 20:00:35] (step=0015732) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.0780669144981414, LR: 0.0003 +[2026-02-28 20:00:42] (step=0015733) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.078262570925455, LR: 0.0003 +[2026-02-28 20:00:50] (step=0015734) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.0784582273527685, LR: 0.0003 +[2026-02-28 20:00:58] (step=0015735) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 3.078653883780082, LR: 0.0003 +[2026-02-28 20:01:06] (step=0015736) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.0788495402073957, LR: 0.0003 +[2026-02-28 20:01:14] (step=0015737) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 3.0790451966347097, LR: 0.0003 +[2026-02-28 20:01:22] (step=0015738) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.079240853062023, LR: 0.0003 +[2026-02-28 20:01:29] (step=0015739) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.079436509489337, LR: 0.0003 +[2026-02-28 20:01:37] (step=0015740) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.0796321659166503, LR: 0.0003 +[2026-02-28 20:01:45] (step=0015741) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 3.079827822343964, LR: 0.0003 +[2026-02-28 20:01:53] (step=0015742) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 3.0800234787712775, LR: 0.0003 +[2026-02-28 20:02:01] (step=0015743) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.0802191351985915, LR: 0.0003 +[2026-02-28 20:02:09] (step=0015744) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.080414791625905, LR: 0.0003 +[2026-02-28 20:02:16] (step=0015745) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.0806104480532186, LR: 0.0003 +[2026-02-28 20:02:24] (step=0015746) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 3.080806104480532, LR: 0.0003 +[2026-02-28 20:02:32] (step=0015747) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.0810017609078457, LR: 0.0003 +[2026-02-28 20:02:40] (step=0015748) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.0811974173351593, LR: 0.0003 +[2026-02-28 20:02:48] (step=0015749) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.0813930737624733, LR: 0.0003 +[2026-02-28 20:02:56] (step=0015750) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.081588730189787, LR: 0.0003 +[2026-02-28 20:03:03] (step=0015751) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.0817843866171004, LR: 0.0003 +[2026-02-28 20:03:11] (step=0015752) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.081980043044414, LR: 0.0003 +[2026-02-28 20:03:19] (step=0015753) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 3.0821756994717275, LR: 0.0003 +[2026-02-28 20:03:27] (step=0015754) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.082371355899041, LR: 0.0003 +[2026-02-28 20:03:35] (step=0015755) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.082567012326355, LR: 0.0003 +[2026-02-28 20:03:43] (step=0015756) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 3.0827626687536687, LR: 0.0003 +[2026-02-28 20:03:50] (step=0015757) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.0829583251809822, LR: 0.0003 +[2026-02-28 20:03:58] (step=0015758) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 3.083153981608296, LR: 0.0003 +[2026-02-28 20:04:06] (step=0015759) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.0833496380356094, LR: 0.0003 +[2026-02-28 20:04:14] (step=0015760) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.083545294462923, LR: 0.0003 +[2026-02-28 20:04:22] (step=0015761) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.083740950890237, LR: 0.0003 +[2026-02-28 20:04:30] (step=0015762) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.0839366073175505, LR: 0.0003 +[2026-02-28 20:04:37] (step=0015763) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.084132263744864, LR: 0.0003 +[2026-02-28 20:04:45] (step=0015764) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.0843279201721776, LR: 0.0003 +[2026-02-28 20:04:53] (step=0015765) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 3.084523576599491, LR: 0.0003 +[2026-02-28 20:05:01] (step=0015766) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 3.0847192330268047, LR: 0.0003 +[2026-02-28 20:05:09] (step=0015767) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.0849148894541187, LR: 0.0003 +[2026-02-28 20:05:17] (step=0015768) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.0851105458814323, LR: 0.0003 +[2026-02-28 20:05:24] (step=0015769) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.085306202308746, LR: 0.0003 +[2026-02-28 20:05:32] (step=0015770) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.0855018587360594, LR: 0.0003 +[2026-02-28 20:05:40] (step=0015771) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.085697515163373, LR: 0.0003 +[2026-02-28 20:05:48] (step=0015772) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 3.0858931715906865, LR: 0.0003 +[2026-02-28 20:05:56] (step=0015773) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 3.0860888280180006, LR: 0.0003 +[2026-02-28 20:06:04] (step=0015774) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.086284484445314, LR: 0.0003 +[2026-02-28 20:06:11] (step=0015775) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.0864801408726277, LR: 0.0003 +[2026-02-28 20:06:19] (step=0015776) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.0866757972999412, LR: 0.0003 +[2026-02-28 20:06:27] (step=0015777) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.086871453727255, LR: 0.0003 +[2026-02-28 20:06:35] (step=0015778) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 3.0870671101545684, LR: 0.0003 +[2026-02-28 20:06:43] (step=0015779) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.0872627665818824, LR: 0.0003 +[2026-02-28 20:06:51] (step=0015780) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.087458423009196, LR: 0.0003 +[2026-02-28 20:06:59] (step=0015781) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 3.0876540794365095, LR: 0.0003 +[2026-02-28 20:07:07] (step=0015782) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 3.087849735863823, LR: 0.0003 +[2026-02-28 20:07:14] (step=0015783) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.0880453922911366, LR: 0.0003 +[2026-02-28 20:07:22] (step=0015784) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.0882410487184506, LR: 0.0003 +[2026-02-28 20:07:30] (step=0015785) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.088436705145764, LR: 0.0003 +[2026-02-28 20:07:38] (step=0015786) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.0886323615730777, LR: 0.0003 +[2026-02-28 20:07:46] (step=0015787) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 3.0888280180003913, LR: 0.0003 +[2026-02-28 20:07:54] (step=0015788) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.089023674427705, LR: 0.0003 +[2026-02-28 20:08:01] (step=0015789) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.0892193308550184, LR: 0.0003 +[2026-02-28 20:08:09] (step=0015790) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 3.0894149872823324, LR: 0.0003 +[2026-02-28 20:08:17] (step=0015791) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.089610643709646, LR: 0.0003 +[2026-02-28 20:08:25] (step=0015792) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.0898063001369596, LR: 0.0003 +[2026-02-28 20:08:33] (step=0015793) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.090001956564273, LR: 0.0003 +[2026-02-28 20:08:41] (step=0015794) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 3.0901976129915867, LR: 0.0003 +[2026-02-28 20:08:48] (step=0015795) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.0903932694189002, LR: 0.0003 +[2026-02-28 20:08:56] (step=0015796) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 3.0905889258462143, LR: 0.0003 +[2026-02-28 20:09:04] (step=0015797) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 3.090784582273528, LR: 0.0003 +[2026-02-28 20:09:12] (step=0015798) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.0909802387008414, LR: 0.0003 +[2026-02-28 20:09:20] (step=0015799) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.091175895128155, LR: 0.0003 +[2026-02-28 20:09:28] (step=0015800) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 3.0913715515554685, LR: 0.0003 +[2026-02-28 20:09:35] (step=0015801) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 3.091567207982782, LR: 0.0003 +[2026-02-28 20:09:43] (step=0015802) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.091762864410096, LR: 0.0003 +[2026-02-28 20:09:51] (step=0015803) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.0919585208374096, LR: 0.0003 +[2026-02-28 20:09:59] (step=0015804) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 3.092154177264723, LR: 0.0003 +[2026-02-28 20:10:07] (step=0015805) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.0923498336920368, LR: 0.0003 +[2026-02-28 20:10:15] (step=0015806) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 3.0925454901193503, LR: 0.0003 +[2026-02-28 20:10:23] (step=0015807) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.092741146546664, LR: 0.0003 +[2026-02-28 20:10:30] (step=0015808) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 3.092936802973978, LR: 0.0003 +[2026-02-28 20:10:38] (step=0015809) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.0931324594012914, LR: 0.0003 +[2026-02-28 20:10:46] (step=0015810) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 3.093328115828605, LR: 0.0003 +[2026-02-28 20:10:54] (step=0015811) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.0935237722559186, LR: 0.0003 +[2026-02-28 20:11:02] (step=0015812) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.093719428683232, LR: 0.0003 +[2026-02-28 20:11:10] (step=0015813) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 3.0939150851105457, LR: 0.0003 +[2026-02-28 20:11:17] (step=0015814) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.0941107415378597, LR: 0.0003 +[2026-02-28 20:11:25] (step=0015815) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.0943063979651733, LR: 0.0003 +[2026-02-28 20:11:33] (step=0015816) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.094502054392487, LR: 0.0003 +[2026-02-28 20:11:41] (step=0015817) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.0946977108198004, LR: 0.0003 +[2026-02-28 20:11:49] (step=0015818) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.094893367247114, LR: 0.0003 +[2026-02-28 20:11:57] (step=0015819) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.0950890236744275, LR: 0.0003 +[2026-02-28 20:12:04] (step=0015820) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.0952846801017415, LR: 0.0003 +[2026-02-28 20:12:12] (step=0015821) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.095480336529055, LR: 0.0003 +[2026-02-28 20:12:20] (step=0015822) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 3.0956759929563686, LR: 0.0003 +[2026-02-28 20:12:28] (step=0015823) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.095871649383682, LR: 0.0003 +[2026-02-28 20:12:36] (step=0015824) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.0960673058109958, LR: 0.0003 +[2026-02-28 20:12:44] (step=0015825) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 3.0962629622383093, LR: 0.0003 +[2026-02-28 20:12:51] (step=0015826) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.0964586186656233, LR: 0.0003 +[2026-02-28 20:12:59] (step=0015827) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.096654275092937, LR: 0.0003 +[2026-02-28 20:13:07] (step=0015828) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.0968499315202505, LR: 0.0003 +[2026-02-28 20:13:15] (step=0015829) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 3.097045587947564, LR: 0.0003 +[2026-02-28 20:13:23] (step=0015830) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.0972412443748776, LR: 0.0003 +[2026-02-28 20:13:31] (step=0015831) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.097436900802191, LR: 0.0003 +[2026-02-28 20:13:39] (step=0015832) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.097632557229505, LR: 0.0003 +[2026-02-28 20:13:47] (step=0015833) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.0978282136568187, LR: 0.0003 +[2026-02-28 20:13:54] (step=0015834) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.0980238700841323, LR: 0.0003 +[2026-02-28 20:14:02] (step=0015835) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.098219526511446, LR: 0.0003 +[2026-02-28 20:14:10] (step=0015836) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.0984151829387594, LR: 0.0003 +[2026-02-28 20:14:18] (step=0015837) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.098610839366073, LR: 0.0003 +[2026-02-28 20:14:26] (step=0015838) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 3.098806495793387, LR: 0.0003 +[2026-02-28 20:14:34] (step=0015839) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.0990021522207005, LR: 0.0003 +[2026-02-28 20:14:41] (step=0015840) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.099197808648014, LR: 0.0003 +[2026-02-28 20:14:49] (step=0015841) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.0993934650753276, LR: 0.0003 +[2026-02-28 20:14:57] (step=0015842) Train Loss: 0.4691, Train Steps/Sec: 0.13, Epoch: 3.099589121502641, LR: 0.0003 +[2026-02-28 20:15:05] (step=0015843) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.099784777929955, LR: 0.0003 +[2026-02-28 20:15:13] (step=0015844) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.0999804343572688, LR: 0.0003 +[2026-02-28 20:15:21] (step=0015845) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.1001760907845823, LR: 0.0003 +[2026-02-28 20:15:28] (step=0015846) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 3.100371747211896, LR: 0.0003 +[2026-02-28 20:15:36] (step=0015847) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 3.1005674036392095, LR: 0.0003 +[2026-02-28 20:15:44] (step=0015848) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.100763060066523, LR: 0.0003 +[2026-02-28 20:15:52] (step=0015849) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.100958716493837, LR: 0.0003 +[2026-02-28 20:16:00] (step=0015850) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.1011543729211506, LR: 0.0003 +[2026-02-28 20:16:08] (step=0015851) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.101350029348464, LR: 0.0003 +[2026-02-28 20:16:15] (step=0015852) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.1015456857757777, LR: 0.0003 +[2026-02-28 20:16:23] (step=0015853) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.1017413422030913, LR: 0.0003 +[2026-02-28 20:16:31] (step=0015854) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.101936998630405, LR: 0.0003 +[2026-02-28 20:16:39] (step=0015855) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 3.102132655057719, LR: 0.0003 +[2026-02-28 20:16:47] (step=0015856) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.1023283114850324, LR: 0.0003 +[2026-02-28 20:16:55] (step=0015857) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.102523967912346, LR: 0.0003 +[2026-02-28 20:17:02] (step=0015858) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.1027196243396595, LR: 0.0003 +[2026-02-28 20:17:10] (step=0015859) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.102915280766973, LR: 0.0003 +[2026-02-28 20:17:18] (step=0015860) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 3.1031109371942867, LR: 0.0003 +[2026-02-28 20:17:26] (step=0015861) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.1033065936216007, LR: 0.0003 +[2026-02-28 20:17:34] (step=0015862) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 3.1035022500489142, LR: 0.0003 +[2026-02-28 20:17:42] (step=0015863) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.103697906476228, LR: 0.0003 +[2026-02-28 20:17:49] (step=0015864) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.1038935629035413, LR: 0.0003 +[2026-02-28 20:17:57] (step=0015865) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.104089219330855, LR: 0.0003 +[2026-02-28 20:18:05] (step=0015866) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.1042848757581685, LR: 0.0003 +[2026-02-28 20:18:13] (step=0015867) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.1044805321854825, LR: 0.0003 +[2026-02-28 20:18:21] (step=0015868) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 3.104676188612796, LR: 0.0003 +[2026-02-28 20:18:29] (step=0015869) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.1048718450401096, LR: 0.0003 +[2026-02-28 20:18:36] (step=0015870) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 3.105067501467423, LR: 0.0003 +[2026-02-28 20:18:44] (step=0015871) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 3.1052631578947367, LR: 0.0003 +[2026-02-28 20:18:52] (step=0015872) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.1054588143220503, LR: 0.0003 +[2026-02-28 20:19:00] (step=0015873) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.1056544707493643, LR: 0.0003 +[2026-02-28 20:19:08] (step=0015874) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.105850127176678, LR: 0.0003 +[2026-02-28 20:19:16] (step=0015875) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 3.1060457836039914, LR: 0.0003 +[2026-02-28 20:19:24] (step=0015876) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.106241440031305, LR: 0.0003 +[2026-02-28 20:19:31] (step=0015877) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.1064370964586185, LR: 0.0003 +[2026-02-28 20:19:39] (step=0015878) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.106632752885932, LR: 0.0003 +[2026-02-28 20:19:47] (step=0015879) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.106828409313246, LR: 0.0003 +[2026-02-28 20:19:55] (step=0015880) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.1070240657405597, LR: 0.0003 +[2026-02-28 20:20:03] (step=0015881) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.1072197221678732, LR: 0.0003 +[2026-02-28 20:20:11] (step=0015882) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.107415378595187, LR: 0.0003 +[2026-02-28 20:20:19] (step=0015883) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.1076110350225004, LR: 0.0003 +[2026-02-28 20:20:26] (step=0015884) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 3.107806691449814, LR: 0.0003 +[2026-02-28 20:20:34] (step=0015885) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 3.108002347877128, LR: 0.0003 +[2026-02-28 20:20:42] (step=0015886) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.1081980043044415, LR: 0.0003 +[2026-02-28 20:20:50] (step=0015887) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.108393660731755, LR: 0.0003 +[2026-02-28 20:20:58] (step=0015888) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.1085893171590686, LR: 0.0003 +[2026-02-28 20:21:06] (step=0015889) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 3.108784973586382, LR: 0.0003 +[2026-02-28 20:21:13] (step=0015890) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 3.1089806300136957, LR: 0.0003 +[2026-02-28 20:21:21] (step=0015891) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.1091762864410097, LR: 0.0003 +[2026-02-28 20:21:29] (step=0015892) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.1093719428683233, LR: 0.0003 +[2026-02-28 20:21:37] (step=0015893) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 3.109567599295637, LR: 0.0003 +[2026-02-28 20:21:45] (step=0015894) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 3.1097632557229504, LR: 0.0003 +[2026-02-28 20:21:53] (step=0015895) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.109958912150264, LR: 0.0003 +[2026-02-28 20:22:00] (step=0015896) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.110154568577578, LR: 0.0003 +[2026-02-28 20:22:08] (step=0015897) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.1103502250048916, LR: 0.0003 +[2026-02-28 20:22:16] (step=0015898) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 3.110545881432205, LR: 0.0003 +[2026-02-28 20:22:24] (step=0015899) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.1107415378595187, LR: 0.0003 +[2026-02-28 20:22:32] (step=0015900) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.1109371942868322, LR: 0.0003 +[2026-02-28 20:22:40] (step=0015901) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.111132850714146, LR: 0.0003 +[2026-02-28 20:22:47] (step=0015902) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 3.11132850714146, LR: 0.0003 +[2026-02-28 20:22:55] (step=0015903) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.1115241635687734, LR: 0.0003 +[2026-02-28 20:23:03] (step=0015904) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 3.111719819996087, LR: 0.0003 +[2026-02-28 20:23:11] (step=0015905) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.1119154764234005, LR: 0.0003 +[2026-02-28 20:23:19] (step=0015906) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 3.112111132850714, LR: 0.0003 +[2026-02-28 20:23:27] (step=0015907) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.1123067892780276, LR: 0.0003 +[2026-02-28 20:23:34] (step=0015908) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.1125024457053416, LR: 0.0003 +[2026-02-28 20:23:42] (step=0015909) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.112698102132655, LR: 0.0003 +[2026-02-28 20:23:50] (step=0015910) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.1128937585599687, LR: 0.0003 +[2026-02-28 20:23:58] (step=0015911) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.1130894149872823, LR: 0.0003 +[2026-02-28 20:24:06] (step=0015912) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.113285071414596, LR: 0.0003 +[2026-02-28 20:24:14] (step=0015913) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.1134807278419094, LR: 0.0003 +[2026-02-28 20:24:21] (step=0015914) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.1136763842692234, LR: 0.0003 +[2026-02-28 20:24:29] (step=0015915) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.113872040696537, LR: 0.0003 +[2026-02-28 20:24:37] (step=0015916) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.1140676971238506, LR: 0.0003 +[2026-02-28 20:24:45] (step=0015917) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.114263353551164, LR: 0.0003 +[2026-02-28 20:24:53] (step=0015918) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.1144590099784777, LR: 0.0003 +[2026-02-28 20:25:01] (step=0015919) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.1146546664057913, LR: 0.0003 +[2026-02-28 20:25:08] (step=0015920) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.1148503228331053, LR: 0.0003 +[2026-02-28 20:25:16] (step=0015921) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.115045979260419, LR: 0.0003 +[2026-02-28 20:25:24] (step=0015922) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.1152416356877324, LR: 0.0003 +[2026-02-28 20:25:32] (step=0015923) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.115437292115046, LR: 0.0003 +[2026-02-28 20:25:40] (step=0015924) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.1156329485423595, LR: 0.0003 +[2026-02-28 20:25:48] (step=0015925) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.115828604969673, LR: 0.0003 +[2026-02-28 20:25:55] (step=0015926) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.116024261396987, LR: 0.0003 +[2026-02-28 20:26:03] (step=0015927) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.1162199178243006, LR: 0.0003 +[2026-02-28 20:26:11] (step=0015928) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.116415574251614, LR: 0.0003 +[2026-02-28 20:26:19] (step=0015929) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.1166112306789278, LR: 0.0003 +[2026-02-28 20:26:27] (step=0015930) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.1168068871062413, LR: 0.0003 +[2026-02-28 20:26:35] (step=0015931) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.117002543533555, LR: 0.0003 +[2026-02-28 20:26:43] (step=0015932) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 3.117198199960869, LR: 0.0003 +[2026-02-28 20:26:51] (step=0015933) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 3.1173938563881824, LR: 0.0003 +[2026-02-28 20:26:58] (step=0015934) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.117589512815496, LR: 0.0003 +[2026-02-28 20:27:06] (step=0015935) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.1177851692428096, LR: 0.0003 +[2026-02-28 20:27:14] (step=0015936) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 3.117980825670123, LR: 0.0003 +[2026-02-28 20:27:22] (step=0015937) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 3.1181764820974367, LR: 0.0003 +[2026-02-28 20:27:30] (step=0015938) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.1183721385247507, LR: 0.0003 +[2026-02-28 20:27:38] (step=0015939) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 3.1185677949520643, LR: 0.0003 +[2026-02-28 20:27:46] (step=0015940) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.118763451379378, LR: 0.0003 +[2026-02-28 20:27:53] (step=0015941) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 3.1189591078066914, LR: 0.0003 +[2026-02-28 20:28:01] (step=0015942) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.119154764234005, LR: 0.0003 +[2026-02-28 20:28:09] (step=0015943) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.1193504206613185, LR: 0.0003 +[2026-02-28 20:28:17] (step=0015944) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.1195460770886325, LR: 0.0003 +[2026-02-28 20:28:25] (step=0015945) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.119741733515946, LR: 0.0003 +[2026-02-28 20:28:33] (step=0015946) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.1199373899432596, LR: 0.0003 +[2026-02-28 20:28:40] (step=0015947) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.120133046370573, LR: 0.0003 +[2026-02-28 20:28:48] (step=0015948) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.1203287027978868, LR: 0.0003 +[2026-02-28 20:28:56] (step=0015949) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.1205243592252003, LR: 0.0003 +[2026-02-28 20:29:04] (step=0015950) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.1207200156525143, LR: 0.0003 +[2026-02-28 20:29:12] (step=0015951) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.120915672079828, LR: 0.0003 +[2026-02-28 20:29:20] (step=0015952) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.1211113285071415, LR: 0.0003 +[2026-02-28 20:29:27] (step=0015953) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.121306984934455, LR: 0.0003 +[2026-02-28 20:29:35] (step=0015954) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.1215026413617686, LR: 0.0003 +[2026-02-28 20:29:43] (step=0015955) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.1216982977890826, LR: 0.0003 +[2026-02-28 20:29:51] (step=0015956) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 3.121893954216396, LR: 0.0003 +[2026-02-28 20:29:59] (step=0015957) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.1220896106437097, LR: 0.0003 +[2026-02-28 20:30:07] (step=0015958) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.1222852670710233, LR: 0.0003 +[2026-02-28 20:30:14] (step=0015959) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.122480923498337, LR: 0.0003 +[2026-02-28 20:30:22] (step=0015960) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.1226765799256504, LR: 0.0003 +[2026-02-28 20:30:30] (step=0015961) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 3.1228722363529644, LR: 0.0003 +[2026-02-28 20:30:38] (step=0015962) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.123067892780278, LR: 0.0003 +[2026-02-28 20:30:46] (step=0015963) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.1232635492075915, LR: 0.0003 +[2026-02-28 20:30:54] (step=0015964) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.123459205634905, LR: 0.0003 +[2026-02-28 20:31:01] (step=0015965) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.1236548620622187, LR: 0.0003 +[2026-02-28 20:31:09] (step=0015966) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.123850518489532, LR: 0.0003 +[2026-02-28 20:31:17] (step=0015967) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.124046174916846, LR: 0.0003 +[2026-02-28 20:31:25] (step=0015968) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 3.12424183134416, LR: 0.0003 +[2026-02-28 20:31:33] (step=0015969) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.1244374877714733, LR: 0.0003 +[2026-02-28 20:31:41] (step=0015970) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.124633144198787, LR: 0.0003 +[2026-02-28 20:31:48] (step=0015971) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 3.1248288006261005, LR: 0.0003 +[2026-02-28 20:31:56] (step=0015972) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 3.125024457053414, LR: 0.0003 +[2026-02-28 20:32:04] (step=0015973) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 3.125220113480728, LR: 0.0003 +[2026-02-28 20:32:12] (step=0015974) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.1254157699080416, LR: 0.0003 +[2026-02-28 20:32:20] (step=0015975) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.125611426335355, LR: 0.0003 +[2026-02-28 20:32:28] (step=0015976) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.1258070827626687, LR: 0.0003 +[2026-02-28 20:32:36] (step=0015977) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 3.1260027391899823, LR: 0.0003 +[2026-02-28 20:32:43] (step=0015978) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 3.126198395617296, LR: 0.0003 +[2026-02-28 20:32:51] (step=0015979) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.12639405204461, LR: 0.0003 +[2026-02-28 20:32:59] (step=0015980) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 3.1265897084719234, LR: 0.0003 +[2026-02-28 20:33:07] (step=0015981) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.126785364899237, LR: 0.0003 +[2026-02-28 20:33:15] (step=0015982) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 3.1269810213265505, LR: 0.0003 +[2026-02-28 20:33:23] (step=0015983) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.127176677753864, LR: 0.0003 +[2026-02-28 20:33:31] (step=0015984) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.1273723341811777, LR: 0.0003 +[2026-02-28 20:33:38] (step=0015985) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.1275679906084917, LR: 0.0003 +[2026-02-28 20:33:46] (step=0015986) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.1277636470358052, LR: 0.0003 +[2026-02-28 20:33:54] (step=0015987) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 3.127959303463119, LR: 0.0003 +[2026-02-28 20:34:02] (step=0015988) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.1281549598904324, LR: 0.0003 +[2026-02-28 20:34:10] (step=0015989) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 3.128350616317746, LR: 0.0003 +[2026-02-28 20:34:18] (step=0015990) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 3.1285462727450595, LR: 0.0003 +[2026-02-28 20:34:25] (step=0015991) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.1287419291723735, LR: 0.0003 +[2026-02-28 20:34:33] (step=0015992) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.128937585599687, LR: 0.0003 +[2026-02-28 20:34:41] (step=0015993) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 3.1291332420270006, LR: 0.0003 +[2026-02-28 20:34:49] (step=0015994) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.129328898454314, LR: 0.0003 +[2026-02-28 20:34:57] (step=0015995) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.1295245548816277, LR: 0.0003 +[2026-02-28 20:35:05] (step=0015996) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.1297202113089413, LR: 0.0003 +[2026-02-28 20:35:12] (step=0015997) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.1299158677362553, LR: 0.0003 +[2026-02-28 20:35:20] (step=0015998) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.130111524163569, LR: 0.0003 +[2026-02-28 20:35:28] (step=0015999) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 3.1303071805908824, LR: 0.0003 +[2026-02-28 20:35:36] (step=0016000) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.130502837018196, LR: 0.0003 +[2026-02-28 20:35:36] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0016000/ +[2026-02-28 20:35:44] (step=0016001) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 3.1306984934455095, LR: 0.0003 +[2026-02-28 20:35:52] (step=0016002) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.130894149872823, LR: 0.0003 +[2026-02-28 20:35:59] (step=0016003) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.131089806300137, LR: 0.0003 +[2026-02-28 20:36:07] (step=0016004) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.1312854627274507, LR: 0.0003 +[2026-02-28 20:36:15] (step=0016005) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.1314811191547642, LR: 0.0003 +[2026-02-28 20:36:23] (step=0016006) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.131676775582078, LR: 0.0003 +[2026-02-28 20:36:31] (step=0016007) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 3.1318724320093914, LR: 0.0003 +[2026-02-28 20:36:39] (step=0016008) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.1320680884367054, LR: 0.0003 +[2026-02-28 20:36:47] (step=0016009) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.132263744864019, LR: 0.0003 +[2026-02-28 20:36:54] (step=0016010) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 3.1324594012913325, LR: 0.0003 +[2026-02-28 20:37:02] (step=0016011) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 3.132655057718646, LR: 0.0003 +[2026-02-28 20:37:10] (step=0016012) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.1328507141459596, LR: 0.0003 +[2026-02-28 20:37:18] (step=0016013) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.133046370573273, LR: 0.0003 +[2026-02-28 20:37:26] (step=0016014) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 3.133242027000587, LR: 0.0003 +[2026-02-28 20:37:34] (step=0016015) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 3.1334376834279007, LR: 0.0003 +[2026-02-28 20:37:41] (step=0016016) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.1336333398552143, LR: 0.0003 +[2026-02-28 20:37:49] (step=0016017) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.133828996282528, LR: 0.0003 +[2026-02-28 20:37:57] (step=0016018) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.1340246527098414, LR: 0.0003 +[2026-02-28 20:38:05] (step=0016019) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.134220309137155, LR: 0.0003 +[2026-02-28 20:38:13] (step=0016020) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.134415965564469, LR: 0.0003 +[2026-02-28 20:38:21] (step=0016021) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.1346116219917826, LR: 0.0003 +[2026-02-28 20:38:28] (step=0016022) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 3.134807278419096, LR: 0.0003 +[2026-02-28 20:38:36] (step=0016023) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 3.1350029348464097, LR: 0.0003 +[2026-02-28 20:38:44] (step=0016024) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.1351985912737232, LR: 0.0003 +[2026-02-28 20:38:52] (step=0016025) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.135394247701037, LR: 0.0003 +[2026-02-28 20:39:00] (step=0016026) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.135589904128351, LR: 0.0003 +[2026-02-28 20:39:08] (step=0016027) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 3.1357855605556644, LR: 0.0003 +[2026-02-28 20:39:16] (step=0016028) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.135981216982978, LR: 0.0003 +[2026-02-28 20:39:24] (step=0016029) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.1361768734102915, LR: 0.0003 +[2026-02-28 20:39:31] (step=0016030) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.136372529837605, LR: 0.0003 +[2026-02-28 20:39:39] (step=0016031) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.1365681862649186, LR: 0.0003 +[2026-02-28 20:39:47] (step=0016032) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.1367638426922326, LR: 0.0003 +[2026-02-28 20:39:55] (step=0016033) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 3.136959499119546, LR: 0.0003 +[2026-02-28 20:40:03] (step=0016034) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.1371551555468598, LR: 0.0003 +[2026-02-28 20:40:11] (step=0016035) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.1373508119741733, LR: 0.0003 +[2026-02-28 20:40:18] (step=0016036) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.137546468401487, LR: 0.0003 +[2026-02-28 20:40:26] (step=0016037) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.1377421248288004, LR: 0.0003 +[2026-02-28 20:40:34] (step=0016038) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.1379377812561144, LR: 0.0003 +[2026-02-28 20:40:42] (step=0016039) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 3.138133437683428, LR: 0.0003 +[2026-02-28 20:40:50] (step=0016040) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.1383290941107416, LR: 0.0003 +[2026-02-28 20:40:58] (step=0016041) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.138524750538055, LR: 0.0003 +[2026-02-28 20:41:05] (step=0016042) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.1387204069653687, LR: 0.0003 +[2026-02-28 20:41:13] (step=0016043) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.1389160633926823, LR: 0.0003 +[2026-02-28 20:41:21] (step=0016044) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.1391117198199963, LR: 0.0003 +[2026-02-28 20:41:29] (step=0016045) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.13930737624731, LR: 0.0003 +[2026-02-28 20:41:37] (step=0016046) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.1395030326746234, LR: 0.0003 +[2026-02-28 20:41:45] (step=0016047) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.139698689101937, LR: 0.0003 +[2026-02-28 20:41:52] (step=0016048) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 3.1398943455292505, LR: 0.0003 +[2026-02-28 20:42:00] (step=0016049) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 3.140090001956564, LR: 0.0003 +[2026-02-28 20:42:08] (step=0016050) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 3.140285658383878, LR: 0.0003 +[2026-02-28 20:42:16] (step=0016051) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.1404813148111916, LR: 0.0003 +[2026-02-28 20:42:24] (step=0016052) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 3.140676971238505, LR: 0.0003 +[2026-02-28 20:42:32] (step=0016053) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.1408726276658188, LR: 0.0003 +[2026-02-28 20:42:39] (step=0016054) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.1410682840931323, LR: 0.0003 +[2026-02-28 20:42:47] (step=0016055) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 3.141263940520446, LR: 0.0003 +[2026-02-28 20:42:55] (step=0016056) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.14145959694776, LR: 0.0003 +[2026-02-28 20:43:03] (step=0016057) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.1416552533750735, LR: 0.0003 +[2026-02-28 20:43:11] (step=0016058) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.141850909802387, LR: 0.0003 +[2026-02-28 20:43:19] (step=0016059) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.1420465662297006, LR: 0.0003 +[2026-02-28 20:43:26] (step=0016060) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.142242222657014, LR: 0.0003 +[2026-02-28 20:43:34] (step=0016061) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 3.1424378790843277, LR: 0.0003 +[2026-02-28 20:43:42] (step=0016062) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.1426335355116417, LR: 0.0003 +[2026-02-28 20:43:50] (step=0016063) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.1428291919389553, LR: 0.0003 +[2026-02-28 20:43:58] (step=0016064) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.143024848366269, LR: 0.0003 +[2026-02-28 20:44:06] (step=0016065) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.1432205047935824, LR: 0.0003 +[2026-02-28 20:44:13] (step=0016066) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.143416161220896, LR: 0.0003 +[2026-02-28 20:44:21] (step=0016067) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.14361181764821, LR: 0.0003 +[2026-02-28 20:44:29] (step=0016068) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.1438074740755235, LR: 0.0003 +[2026-02-28 20:44:37] (step=0016069) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.144003130502837, LR: 0.0003 +[2026-02-28 20:44:45] (step=0016070) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.1441987869301506, LR: 0.0003 +[2026-02-28 20:44:53] (step=0016071) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.144394443357464, LR: 0.0003 +[2026-02-28 20:45:00] (step=0016072) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.1445900997847778, LR: 0.0003 +[2026-02-28 20:45:08] (step=0016073) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.1447857562120918, LR: 0.0003 +[2026-02-28 20:45:16] (step=0016074) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.1449814126394053, LR: 0.0003 +[2026-02-28 20:45:24] (step=0016075) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 3.145177069066719, LR: 0.0003 +[2026-02-28 20:45:32] (step=0016076) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.1453727254940325, LR: 0.0003 +[2026-02-28 20:45:40] (step=0016077) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.145568381921346, LR: 0.0003 +[2026-02-28 20:45:48] (step=0016078) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 3.1457640383486596, LR: 0.0003 +[2026-02-28 20:45:56] (step=0016079) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.1459596947759736, LR: 0.0003 +[2026-02-28 20:46:03] (step=0016080) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.146155351203287, LR: 0.0003 +[2026-02-28 20:46:11] (step=0016081) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.1463510076306007, LR: 0.0003 +[2026-02-28 20:46:19] (step=0016082) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.1465466640579143, LR: 0.0003 +[2026-02-28 20:46:27] (step=0016083) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 3.146742320485228, LR: 0.0003 +[2026-02-28 20:46:35] (step=0016084) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.1469379769125414, LR: 0.0003 +[2026-02-28 20:46:43] (step=0016085) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 3.1471336333398554, LR: 0.0003 +[2026-02-28 20:46:50] (step=0016086) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.147329289767169, LR: 0.0003 +[2026-02-28 20:46:58] (step=0016087) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.1475249461944825, LR: 0.0003 +[2026-02-28 20:47:06] (step=0016088) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.147720602621796, LR: 0.0003 +[2026-02-28 20:47:14] (step=0016089) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.1479162590491097, LR: 0.0003 +[2026-02-28 20:47:22] (step=0016090) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.148111915476423, LR: 0.0003 +[2026-02-28 20:47:30] (step=0016091) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.1483075719037372, LR: 0.0003 +[2026-02-28 20:47:37] (step=0016092) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 3.148503228331051, LR: 0.0003 +[2026-02-28 20:47:45] (step=0016093) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.1486988847583643, LR: 0.0003 +[2026-02-28 20:47:53] (step=0016094) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.148894541185678, LR: 0.0003 +[2026-02-28 20:48:01] (step=0016095) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 3.1490901976129915, LR: 0.0003 +[2026-02-28 20:48:09] (step=0016096) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.149285854040305, LR: 0.0003 +[2026-02-28 20:48:17] (step=0016097) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.149481510467619, LR: 0.0003 +[2026-02-28 20:48:24] (step=0016098) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.1496771668949326, LR: 0.0003 +[2026-02-28 20:48:32] (step=0016099) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 3.149872823322246, LR: 0.0003 +[2026-02-28 20:48:40] (step=0016100) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.1500684797495597, LR: 0.0003 +[2026-02-28 20:48:48] (step=0016101) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.1502641361768733, LR: 0.0003 +[2026-02-28 20:48:56] (step=0016102) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.150459792604187, LR: 0.0003 +[2026-02-28 20:49:04] (step=0016103) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.150655449031501, LR: 0.0003 +[2026-02-28 20:49:11] (step=0016104) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.1508511054588144, LR: 0.0003 +[2026-02-28 20:49:19] (step=0016105) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.151046761886128, LR: 0.0003 +[2026-02-28 20:49:27] (step=0016106) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.1512424183134415, LR: 0.0003 +[2026-02-28 20:49:35] (step=0016107) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.151438074740755, LR: 0.0003 +[2026-02-28 20:49:43] (step=0016108) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 3.1516337311680687, LR: 0.0003 +[2026-02-28 20:49:51] (step=0016109) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 3.1518293875953827, LR: 0.0003 +[2026-02-28 20:49:58] (step=0016110) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.1520250440226962, LR: 0.0003 +[2026-02-28 20:50:06] (step=0016111) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.15222070045001, LR: 0.0003 +[2026-02-28 20:50:14] (step=0016112) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.1524163568773234, LR: 0.0003 +[2026-02-28 20:50:22] (step=0016113) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 3.152612013304637, LR: 0.0003 +[2026-02-28 20:50:30] (step=0016114) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.1528076697319505, LR: 0.0003 +[2026-02-28 20:50:38] (step=0016115) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 3.1530033261592645, LR: 0.0003 +[2026-02-28 20:50:46] (step=0016116) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.153198982586578, LR: 0.0003 +[2026-02-28 20:50:53] (step=0016117) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.1533946390138916, LR: 0.0003 +[2026-02-28 20:51:01] (step=0016118) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.153590295441205, LR: 0.0003 +[2026-02-28 20:51:09] (step=0016119) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.1537859518685187, LR: 0.0003 +[2026-02-28 20:51:17] (step=0016120) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.1539816082958327, LR: 0.0003 +[2026-02-28 20:51:25] (step=0016121) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.1541772647231463, LR: 0.0003 +[2026-02-28 20:51:33] (step=0016122) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 3.15437292115046, LR: 0.0003 +[2026-02-28 20:51:40] (step=0016123) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.1545685775777734, LR: 0.0003 +[2026-02-28 20:51:48] (step=0016124) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 3.154764234005087, LR: 0.0003 +[2026-02-28 20:51:56] (step=0016125) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.1549598904324005, LR: 0.0003 +[2026-02-28 20:52:04] (step=0016126) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 3.1551555468597146, LR: 0.0003 +[2026-02-28 20:52:12] (step=0016127) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.155351203287028, LR: 0.0003 +[2026-02-28 20:52:20] (step=0016128) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.1555468597143417, LR: 0.0003 +[2026-02-28 20:52:28] (step=0016129) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.1557425161416552, LR: 0.0003 +[2026-02-28 20:52:35] (step=0016130) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.155938172568969, LR: 0.0003 +[2026-02-28 20:52:43] (step=0016131) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.1561338289962824, LR: 0.0003 +[2026-02-28 20:52:51] (step=0016132) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 3.1563294854235964, LR: 0.0003 +[2026-02-28 20:52:59] (step=0016133) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.15652514185091, LR: 0.0003 +[2026-02-28 20:53:07] (step=0016134) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.1567207982782235, LR: 0.0003 +[2026-02-28 20:53:15] (step=0016135) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 3.156916454705537, LR: 0.0003 +[2026-02-28 20:53:22] (step=0016136) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.1571121111328506, LR: 0.0003 +[2026-02-28 20:53:30] (step=0016137) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.157307767560164, LR: 0.0003 +[2026-02-28 20:53:38] (step=0016138) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.157503423987478, LR: 0.0003 +[2026-02-28 20:53:46] (step=0016139) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.1576990804147917, LR: 0.0003 +[2026-02-28 20:53:54] (step=0016140) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.1578947368421053, LR: 0.0003 +[2026-02-28 20:54:02] (step=0016141) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.158090393269419, LR: 0.0003 +[2026-02-28 20:54:10] (step=0016142) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 3.1582860496967324, LR: 0.0003 +[2026-02-28 20:54:17] (step=0016143) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.158481706124046, LR: 0.0003 +[2026-02-28 20:54:25] (step=0016144) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 3.15867736255136, LR: 0.0003 +[2026-02-28 20:54:33] (step=0016145) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.1588730189786736, LR: 0.0003 +[2026-02-28 20:54:41] (step=0016146) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.159068675405987, LR: 0.0003 +[2026-02-28 20:54:49] (step=0016147) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 3.1592643318333007, LR: 0.0003 +[2026-02-28 20:54:57] (step=0016148) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 3.1594599882606142, LR: 0.0003 +[2026-02-28 20:55:04] (step=0016149) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 3.159655644687928, LR: 0.0003 +[2026-02-28 20:55:12] (step=0016150) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.159851301115242, LR: 0.0003 +[2026-02-28 20:55:20] (step=0016151) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.1600469575425554, LR: 0.0003 +[2026-02-28 20:55:28] (step=0016152) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.160242613969869, LR: 0.0003 +[2026-02-28 20:55:36] (step=0016153) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 3.1604382703971825, LR: 0.0003 +[2026-02-28 20:55:44] (step=0016154) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.160633926824496, LR: 0.0003 +[2026-02-28 20:55:51] (step=0016155) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.1608295832518096, LR: 0.0003 +[2026-02-28 20:55:59] (step=0016156) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.1610252396791236, LR: 0.0003 +[2026-02-28 20:56:07] (step=0016157) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.161220896106437, LR: 0.0003 +[2026-02-28 20:56:15] (step=0016158) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.1614165525337508, LR: 0.0003 +[2026-02-28 20:56:23] (step=0016159) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.1616122089610643, LR: 0.0003 +[2026-02-28 20:56:31] (step=0016160) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 3.161807865388378, LR: 0.0003 +[2026-02-28 20:56:39] (step=0016161) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 3.1620035218156914, LR: 0.0003 +[2026-02-28 20:56:46] (step=0016162) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 3.1621991782430054, LR: 0.0003 +[2026-02-28 20:56:54] (step=0016163) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.162394834670319, LR: 0.0003 +[2026-02-28 20:57:02] (step=0016164) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 3.1625904910976326, LR: 0.0003 +[2026-02-28 20:57:10] (step=0016165) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 3.162786147524946, LR: 0.0003 +[2026-02-28 20:57:18] (step=0016166) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.1629818039522597, LR: 0.0003 +[2026-02-28 20:57:26] (step=0016167) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 3.1631774603795733, LR: 0.0003 +[2026-02-28 20:57:33] (step=0016168) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 3.1633731168068873, LR: 0.0003 +[2026-02-28 20:57:41] (step=0016169) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 3.163568773234201, LR: 0.0003 +[2026-02-28 20:57:49] (step=0016170) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.1637644296615144, LR: 0.0003 +[2026-02-28 20:57:57] (step=0016171) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.163960086088828, LR: 0.0003 +[2026-02-28 20:58:05] (step=0016172) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 3.1641557425161415, LR: 0.0003 +[2026-02-28 20:58:13] (step=0016173) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.164351398943455, LR: 0.0003 +[2026-02-28 20:58:20] (step=0016174) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 3.164547055370769, LR: 0.0003 +[2026-02-28 20:58:28] (step=0016175) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.1647427117980826, LR: 0.0003 +[2026-02-28 20:58:36] (step=0016176) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 3.164938368225396, LR: 0.0003 +[2026-02-28 20:58:44] (step=0016177) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.1651340246527098, LR: 0.0003 +[2026-02-28 20:58:52] (step=0016178) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.1653296810800233, LR: 0.0003 +[2026-02-28 20:59:00] (step=0016179) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 3.1655253375073373, LR: 0.0003 +[2026-02-28 20:59:08] (step=0016180) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.165720993934651, LR: 0.0003 +[2026-02-28 20:59:15] (step=0016181) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.1659166503619645, LR: 0.0003 +[2026-02-28 20:59:23] (step=0016182) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.166112306789278, LR: 0.0003 +[2026-02-28 20:59:31] (step=0016183) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 3.1663079632165916, LR: 0.0003 +[2026-02-28 20:59:39] (step=0016184) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.166503619643905, LR: 0.0003 +[2026-02-28 20:59:47] (step=0016185) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 3.166699276071219, LR: 0.0003 +[2026-02-28 20:59:55] (step=0016186) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 3.1668949324985327, LR: 0.0003 +[2026-02-28 21:00:02] (step=0016187) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.1670905889258463, LR: 0.0003 +[2026-02-28 21:00:10] (step=0016188) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.16728624535316, LR: 0.0003 +[2026-02-28 21:00:18] (step=0016189) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 3.1674819017804734, LR: 0.0003 +[2026-02-28 21:00:26] (step=0016190) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.167677558207787, LR: 0.0003 +[2026-02-28 21:00:34] (step=0016191) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 3.167873214635101, LR: 0.0003 +[2026-02-28 21:00:42] (step=0016192) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.1680688710624145, LR: 0.0003 +[2026-02-28 21:00:50] (step=0016193) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.168264527489728, LR: 0.0003 +[2026-02-28 21:00:57] (step=0016194) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.1684601839170417, LR: 0.0003 +[2026-02-28 21:01:05] (step=0016195) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 3.168655840344355, LR: 0.0003 +[2026-02-28 21:01:13] (step=0016196) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.1688514967716688, LR: 0.0003 +[2026-02-28 21:01:21] (step=0016197) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 3.169047153198983, LR: 0.0003 +[2026-02-28 21:01:29] (step=0016198) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 3.1692428096262963, LR: 0.0003 +[2026-02-28 21:01:37] (step=0016199) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.16943846605361, LR: 0.0003 +[2026-02-28 21:01:44] (step=0016200) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.1696341224809235, LR: 0.0003 +[2026-02-28 21:01:52] (step=0016201) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.169829778908237, LR: 0.0003 +[2026-02-28 21:02:00] (step=0016202) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.1700254353355506, LR: 0.0003 +[2026-02-28 21:02:08] (step=0016203) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 3.1702210917628646, LR: 0.0003 +[2026-02-28 21:02:16] (step=0016204) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.170416748190178, LR: 0.0003 +[2026-02-28 21:02:24] (step=0016205) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.1706124046174917, LR: 0.0003 +[2026-02-28 21:02:31] (step=0016206) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.1708080610448053, LR: 0.0003 +[2026-02-28 21:02:39] (step=0016207) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 3.171003717472119, LR: 0.0003 +[2026-02-28 21:02:47] (step=0016208) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.1711993738994324, LR: 0.0003 +[2026-02-28 21:02:55] (step=0016209) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.1713950303267464, LR: 0.0003 +[2026-02-28 21:03:03] (step=0016210) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 3.17159068675406, LR: 0.0003 +[2026-02-28 21:03:11] (step=0016211) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 3.1717863431813735, LR: 0.0003 +[2026-02-28 21:03:18] (step=0016212) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 3.171981999608687, LR: 0.0003 +[2026-02-28 21:03:26] (step=0016213) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 3.1721776560360007, LR: 0.0003 +[2026-02-28 21:03:34] (step=0016214) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 3.172373312463314, LR: 0.0003 +[2026-02-28 21:03:42] (step=0016215) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.1725689688906282, LR: 0.0003 +[2026-02-28 21:03:50] (step=0016216) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.172764625317942, LR: 0.0003 +[2026-02-28 21:03:58] (step=0016217) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.1729602817452554, LR: 0.0003 +[2026-02-28 21:04:05] (step=0016218) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 3.173155938172569, LR: 0.0003 +[2026-02-28 21:04:13] (step=0016219) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.1733515945998825, LR: 0.0003 +[2026-02-28 21:04:21] (step=0016220) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.173547251027196, LR: 0.0003 +[2026-02-28 21:04:29] (step=0016221) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.17374290745451, LR: 0.0003 +[2026-02-28 21:04:37] (step=0016222) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.1739385638818236, LR: 0.0003 +[2026-02-28 21:04:45] (step=0016223) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.174134220309137, LR: 0.0003 +[2026-02-28 21:04:53] (step=0016224) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.1743298767364507, LR: 0.0003 +[2026-02-28 21:05:01] (step=0016225) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.1745255331637643, LR: 0.0003 +[2026-02-28 21:05:08] (step=0016226) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.174721189591078, LR: 0.0003 +[2026-02-28 21:05:16] (step=0016227) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.174916846018392, LR: 0.0003 +[2026-02-28 21:05:24] (step=0016228) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 3.1751125024457054, LR: 0.0003 +[2026-02-28 21:05:32] (step=0016229) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 3.175308158873019, LR: 0.0003 +[2026-02-28 21:05:40] (step=0016230) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.1755038153003325, LR: 0.0003 +[2026-02-28 21:05:48] (step=0016231) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.175699471727646, LR: 0.0003 +[2026-02-28 21:05:55] (step=0016232) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.17589512815496, LR: 0.0003 +[2026-02-28 21:06:03] (step=0016233) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.1760907845822737, LR: 0.0003 +[2026-02-28 21:06:11] (step=0016234) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.1762864410095872, LR: 0.0003 +[2026-02-28 21:06:19] (step=0016235) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.176482097436901, LR: 0.0003 +[2026-02-28 21:06:27] (step=0016236) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.1766777538642144, LR: 0.0003 +[2026-02-28 21:06:35] (step=0016237) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.176873410291528, LR: 0.0003 +[2026-02-28 21:06:43] (step=0016238) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.177069066718842, LR: 0.0003 +[2026-02-28 21:06:50] (step=0016239) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.1772647231461555, LR: 0.0003 +[2026-02-28 21:06:58] (step=0016240) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 3.177460379573469, LR: 0.0003 +[2026-02-28 21:07:06] (step=0016241) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.1776560360007826, LR: 0.0003 +[2026-02-28 21:07:14] (step=0016242) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 3.177851692428096, LR: 0.0003 +[2026-02-28 21:07:22] (step=0016243) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.1780473488554097, LR: 0.0003 +[2026-02-28 21:07:30] (step=0016244) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 3.1782430052827237, LR: 0.0003 +[2026-02-28 21:07:37] (step=0016245) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 3.1784386617100373, LR: 0.0003 +[2026-02-28 21:07:45] (step=0016246) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.178634318137351, LR: 0.0003 +[2026-02-28 21:07:53] (step=0016247) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.1788299745646644, LR: 0.0003 +[2026-02-28 21:08:01] (step=0016248) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.179025630991978, LR: 0.0003 +[2026-02-28 21:08:09] (step=0016249) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.1792212874192916, LR: 0.0003 +[2026-02-28 21:08:17] (step=0016250) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.1794169438466056, LR: 0.0003 +[2026-02-28 21:08:24] (step=0016251) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.179612600273919, LR: 0.0003 +[2026-02-28 21:08:32] (step=0016252) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.1798082567012327, LR: 0.0003 +[2026-02-28 21:08:40] (step=0016253) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.1800039131285462, LR: 0.0003 +[2026-02-28 21:08:48] (step=0016254) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.18019956955586, LR: 0.0003 +[2026-02-28 21:08:56] (step=0016255) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.1803952259831734, LR: 0.0003 +[2026-02-28 21:09:04] (step=0016256) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 3.1805908824104874, LR: 0.0003 +[2026-02-28 21:09:11] (step=0016257) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.180786538837801, LR: 0.0003 +[2026-02-28 21:09:19] (step=0016258) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 3.1809821952651145, LR: 0.0003 +[2026-02-28 21:09:27] (step=0016259) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.181177851692428, LR: 0.0003 +[2026-02-28 21:09:35] (step=0016260) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.1813735081197416, LR: 0.0003 +[2026-02-28 21:09:43] (step=0016261) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.181569164547055, LR: 0.0003 +[2026-02-28 21:09:51] (step=0016262) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.181764820974369, LR: 0.0003 +[2026-02-28 21:09:58] (step=0016263) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.1819604774016828, LR: 0.0003 +[2026-02-28 21:10:06] (step=0016264) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.1821561338289963, LR: 0.0003 +[2026-02-28 21:10:14] (step=0016265) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.18235179025631, LR: 0.0003 +[2026-02-28 21:10:22] (step=0016266) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.1825474466836234, LR: 0.0003 +[2026-02-28 21:10:30] (step=0016267) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.182743103110937, LR: 0.0003 +[2026-02-28 21:10:38] (step=0016268) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.182938759538251, LR: 0.0003 +[2026-02-28 21:10:45] (step=0016269) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.1831344159655646, LR: 0.0003 +[2026-02-28 21:10:53] (step=0016270) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.183330072392878, LR: 0.0003 +[2026-02-28 21:11:01] (step=0016271) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.1835257288201917, LR: 0.0003 +[2026-02-28 21:11:09] (step=0016272) Train Loss: 0.4614, Train Steps/Sec: 0.12, Epoch: 3.1837213852475053, LR: 0.0003 +[2026-02-28 21:11:17] (step=0016273) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.183917041674819, LR: 0.0003 +[2026-02-28 21:11:25] (step=0016274) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.184112698102133, LR: 0.0003 +[2026-02-28 21:11:33] (step=0016275) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.1843083545294464, LR: 0.0003 +[2026-02-28 21:11:41] (step=0016276) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 3.18450401095676, LR: 0.0003 +[2026-02-28 21:11:48] (step=0016277) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 3.1846996673840735, LR: 0.0003 +[2026-02-28 21:11:56] (step=0016278) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.184895323811387, LR: 0.0003 +[2026-02-28 21:12:04] (step=0016279) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.1850909802387006, LR: 0.0003 +[2026-02-28 21:12:12] (step=0016280) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.1852866366660146, LR: 0.0003 +[2026-02-28 21:12:20] (step=0016281) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 3.185482293093328, LR: 0.0003 +[2026-02-28 21:12:28] (step=0016282) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.1856779495206418, LR: 0.0003 +[2026-02-28 21:12:35] (step=0016283) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.1858736059479553, LR: 0.0003 +[2026-02-28 21:12:43] (step=0016284) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.186069262375269, LR: 0.0003 +[2026-02-28 21:12:51] (step=0016285) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.1862649188025824, LR: 0.0003 +[2026-02-28 21:12:59] (step=0016286) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.1864605752298965, LR: 0.0003 +[2026-02-28 21:13:07] (step=0016287) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.18665623165721, LR: 0.0003 +[2026-02-28 21:13:15] (step=0016288) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.1868518880845236, LR: 0.0003 +[2026-02-28 21:13:22] (step=0016289) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.187047544511837, LR: 0.0003 +[2026-02-28 21:13:30] (step=0016290) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.1872432009391507, LR: 0.0003 +[2026-02-28 21:13:38] (step=0016291) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.1874388573664647, LR: 0.0003 +[2026-02-28 21:13:46] (step=0016292) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.1876345137937783, LR: 0.0003 +[2026-02-28 21:13:54] (step=0016293) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.187830170221092, LR: 0.0003 +[2026-02-28 21:14:02] (step=0016294) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.1880258266484054, LR: 0.0003 +[2026-02-28 21:14:09] (step=0016295) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.188221483075719, LR: 0.0003 +[2026-02-28 21:14:17] (step=0016296) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.1884171395030325, LR: 0.0003 +[2026-02-28 21:14:25] (step=0016297) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 3.1886127959303465, LR: 0.0003 +[2026-02-28 21:14:33] (step=0016298) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 3.18880845235766, LR: 0.0003 +[2026-02-28 21:14:41] (step=0016299) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.1890041087849736, LR: 0.0003 +[2026-02-28 21:14:49] (step=0016300) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 3.189199765212287, LR: 0.0003 +[2026-02-28 21:14:57] (step=0016301) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 3.1893954216396008, LR: 0.0003 +[2026-02-28 21:15:04] (step=0016302) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.1895910780669143, LR: 0.0003 +[2026-02-28 21:15:12] (step=0016303) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.1897867344942283, LR: 0.0003 +[2026-02-28 21:15:20] (step=0016304) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.189982390921542, LR: 0.0003 +[2026-02-28 21:15:28] (step=0016305) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.1901780473488555, LR: 0.0003 +[2026-02-28 21:15:36] (step=0016306) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.190373703776169, LR: 0.0003 +[2026-02-28 21:15:44] (step=0016307) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.1905693602034826, LR: 0.0003 +[2026-02-28 21:15:51] (step=0016308) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 3.190765016630796, LR: 0.0003 +[2026-02-28 21:15:59] (step=0016309) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.19096067305811, LR: 0.0003 +[2026-02-28 21:16:07] (step=0016310) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.1911563294854237, LR: 0.0003 +[2026-02-28 21:16:15] (step=0016311) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 3.1913519859127373, LR: 0.0003 +[2026-02-28 21:16:23] (step=0016312) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 3.191547642340051, LR: 0.0003 +[2026-02-28 21:16:31] (step=0016313) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.1917432987673644, LR: 0.0003 +[2026-02-28 21:16:38] (step=0016314) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.191938955194678, LR: 0.0003 +[2026-02-28 21:16:46] (step=0016315) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.192134611621992, LR: 0.0003 +[2026-02-28 21:16:54] (step=0016316) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.1923302680493055, LR: 0.0003 +[2026-02-28 21:17:02] (step=0016317) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 3.192525924476619, LR: 0.0003 +[2026-02-28 21:17:10] (step=0016318) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 3.1927215809039327, LR: 0.0003 +[2026-02-28 21:17:18] (step=0016319) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 3.192917237331246, LR: 0.0003 +[2026-02-28 21:17:25] (step=0016320) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.19311289375856, LR: 0.0003 +[2026-02-28 21:17:33] (step=0016321) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.193308550185874, LR: 0.0003 +[2026-02-28 21:17:41] (step=0016322) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.1935042066131873, LR: 0.0003 +[2026-02-28 21:17:49] (step=0016323) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.193699863040501, LR: 0.0003 +[2026-02-28 21:17:57] (step=0016324) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.1938955194678145, LR: 0.0003 +[2026-02-28 21:18:05] (step=0016325) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.194091175895128, LR: 0.0003 +[2026-02-28 21:18:13] (step=0016326) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 3.1942868323224416, LR: 0.0003 +[2026-02-28 21:18:20] (step=0016327) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.1944824887497556, LR: 0.0003 +[2026-02-28 21:18:28] (step=0016328) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.194678145177069, LR: 0.0003 +[2026-02-28 21:18:36] (step=0016329) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.1948738016043827, LR: 0.0003 +[2026-02-28 21:18:44] (step=0016330) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.1950694580316963, LR: 0.0003 +[2026-02-28 21:18:52] (step=0016331) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.19526511445901, LR: 0.0003 +[2026-02-28 21:19:00] (step=0016332) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.1954607708863234, LR: 0.0003 +[2026-02-28 21:19:08] (step=0016333) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.1956564273136374, LR: 0.0003 +[2026-02-28 21:19:15] (step=0016334) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.195852083740951, LR: 0.0003 +[2026-02-28 21:19:23] (step=0016335) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 3.1960477401682645, LR: 0.0003 +[2026-02-28 21:19:31] (step=0016336) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.196243396595578, LR: 0.0003 +[2026-02-28 21:19:39] (step=0016337) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 3.1964390530228917, LR: 0.0003 +[2026-02-28 21:19:47] (step=0016338) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.1966347094502052, LR: 0.0003 +[2026-02-28 21:19:55] (step=0016339) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.1968303658775192, LR: 0.0003 +[2026-02-28 21:20:02] (step=0016340) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.197026022304833, LR: 0.0003 +[2026-02-28 21:20:10] (step=0016341) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.1972216787321464, LR: 0.0003 +[2026-02-28 21:20:18] (step=0016342) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.19741733515946, LR: 0.0003 +[2026-02-28 21:20:26] (step=0016343) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.1976129915867735, LR: 0.0003 +[2026-02-28 21:20:34] (step=0016344) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.1978086480140875, LR: 0.0003 +[2026-02-28 21:20:42] (step=0016345) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.198004304441401, LR: 0.0003 +[2026-02-28 21:20:49] (step=0016346) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.1981999608687146, LR: 0.0003 +[2026-02-28 21:20:57] (step=0016347) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.198395617296028, LR: 0.0003 +[2026-02-28 21:21:05] (step=0016348) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.1985912737233417, LR: 0.0003 +[2026-02-28 21:21:13] (step=0016349) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.1987869301506553, LR: 0.0003 +[2026-02-28 21:21:21] (step=0016350) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.1989825865779693, LR: 0.0003 +[2026-02-28 21:21:29] (step=0016351) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.199178243005283, LR: 0.0003 +[2026-02-28 21:21:36] (step=0016352) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.1993738994325964, LR: 0.0003 +[2026-02-28 21:21:44] (step=0016353) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.19956955585991, LR: 0.0003 +[2026-02-28 21:21:52] (step=0016354) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.1997652122872235, LR: 0.0003 +[2026-02-28 21:22:00] (step=0016355) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.199960868714537, LR: 0.0003 +[2026-02-28 21:22:08] (step=0016356) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.200156525141851, LR: 0.0003 +[2026-02-28 21:22:16] (step=0016357) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.2003521815691647, LR: 0.0003 +[2026-02-28 21:22:23] (step=0016358) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.2005478379964782, LR: 0.0003 +[2026-02-28 21:22:31] (step=0016359) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.200743494423792, LR: 0.0003 +[2026-02-28 21:22:39] (step=0016360) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 3.2009391508511054, LR: 0.0003 +[2026-02-28 21:22:47] (step=0016361) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.201134807278419, LR: 0.0003 +[2026-02-28 21:22:55] (step=0016362) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 3.201330463705733, LR: 0.0003 +[2026-02-28 21:23:03] (step=0016363) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.2015261201330465, LR: 0.0003 +[2026-02-28 21:23:10] (step=0016364) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.20172177656036, LR: 0.0003 +[2026-02-28 21:23:18] (step=0016365) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.2019174329876736, LR: 0.0003 +[2026-02-28 21:23:26] (step=0016366) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.202113089414987, LR: 0.0003 +[2026-02-28 21:23:34] (step=0016367) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 3.2023087458423007, LR: 0.0003 +[2026-02-28 21:23:42] (step=0016368) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.2025044022696147, LR: 0.0003 +[2026-02-28 21:23:50] (step=0016369) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.2027000586969283, LR: 0.0003 +[2026-02-28 21:23:58] (step=0016370) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.202895715124242, LR: 0.0003 +[2026-02-28 21:24:05] (step=0016371) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.2030913715515554, LR: 0.0003 +[2026-02-28 21:24:13] (step=0016372) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.203287027978869, LR: 0.0003 +[2026-02-28 21:24:21] (step=0016373) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.2034826844061826, LR: 0.0003 +[2026-02-28 21:24:29] (step=0016374) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.2036783408334966, LR: 0.0003 +[2026-02-28 21:24:37] (step=0016375) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.20387399726081, LR: 0.0003 +[2026-02-28 21:24:45] (step=0016376) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.2040696536881237, LR: 0.0003 +[2026-02-28 21:24:52] (step=0016377) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.2042653101154372, LR: 0.0003 +[2026-02-28 21:25:00] (step=0016378) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.204460966542751, LR: 0.0003 +[2026-02-28 21:25:08] (step=0016379) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 3.2046566229700644, LR: 0.0003 +[2026-02-28 21:25:16] (step=0016380) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.2048522793973784, LR: 0.0003 +[2026-02-28 21:25:24] (step=0016381) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 3.205047935824692, LR: 0.0003 +[2026-02-28 21:25:32] (step=0016382) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 3.2052435922520055, LR: 0.0003 +[2026-02-28 21:25:40] (step=0016383) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.205439248679319, LR: 0.0003 +[2026-02-28 21:25:47] (step=0016384) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.2056349051066326, LR: 0.0003 +[2026-02-28 21:25:55] (step=0016385) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.205830561533946, LR: 0.0003 +[2026-02-28 21:26:03] (step=0016386) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.20602621796126, LR: 0.0003 +[2026-02-28 21:26:11] (step=0016387) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 3.2062218743885738, LR: 0.0003 +[2026-02-28 21:26:19] (step=0016388) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 3.2064175308158873, LR: 0.0003 +[2026-02-28 21:26:27] (step=0016389) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.206613187243201, LR: 0.0003 +[2026-02-28 21:26:34] (step=0016390) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.2068088436705144, LR: 0.0003 +[2026-02-28 21:26:42] (step=0016391) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 3.207004500097828, LR: 0.0003 +[2026-02-28 21:26:50] (step=0016392) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.207200156525142, LR: 0.0003 +[2026-02-28 21:26:58] (step=0016393) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.2073958129524556, LR: 0.0003 +[2026-02-28 21:27:06] (step=0016394) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.207591469379769, LR: 0.0003 +[2026-02-28 21:27:14] (step=0016395) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.2077871258070827, LR: 0.0003 +[2026-02-28 21:27:21] (step=0016396) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.2079827822343963, LR: 0.0003 +[2026-02-28 21:27:29] (step=0016397) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.20817843866171, LR: 0.0003 +[2026-02-28 21:27:37] (step=0016398) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 3.208374095089024, LR: 0.0003 +[2026-02-28 21:27:45] (step=0016399) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.2085697515163374, LR: 0.0003 +[2026-02-28 21:27:53] (step=0016400) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.208765407943651, LR: 0.0003 +[2026-02-28 21:28:01] (step=0016401) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.2089610643709645, LR: 0.0003 +[2026-02-28 21:28:09] (step=0016402) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.209156720798278, LR: 0.0003 +[2026-02-28 21:28:16] (step=0016403) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 3.209352377225592, LR: 0.0003 +[2026-02-28 21:28:24] (step=0016404) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.2095480336529056, LR: 0.0003 +[2026-02-28 21:28:32] (step=0016405) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.209743690080219, LR: 0.0003 +[2026-02-28 21:28:40] (step=0016406) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.2099393465075328, LR: 0.0003 +[2026-02-28 21:28:48] (step=0016407) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.2101350029348463, LR: 0.0003 +[2026-02-28 21:28:56] (step=0016408) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.21033065936216, LR: 0.0003 +[2026-02-28 21:29:03] (step=0016409) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 3.210526315789474, LR: 0.0003 +[2026-02-28 21:29:11] (step=0016410) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.2107219722167875, LR: 0.0003 +[2026-02-28 21:29:19] (step=0016411) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 3.210917628644101, LR: 0.0003 +[2026-02-28 21:29:27] (step=0016412) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.2111132850714146, LR: 0.0003 +[2026-02-28 21:29:35] (step=0016413) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 3.211308941498728, LR: 0.0003 +[2026-02-28 21:29:43] (step=0016414) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 3.2115045979260417, LR: 0.0003 +[2026-02-28 21:29:50] (step=0016415) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.2117002543533557, LR: 0.0003 +[2026-02-28 21:29:58] (step=0016416) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.2118959107806693, LR: 0.0003 +[2026-02-28 21:30:06] (step=0016417) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.212091567207983, LR: 0.0003 +[2026-02-28 21:30:14] (step=0016418) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.2122872236352964, LR: 0.0003 +[2026-02-28 21:30:22] (step=0016419) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.21248288006261, LR: 0.0003 +[2026-02-28 21:30:30] (step=0016420) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.2126785364899235, LR: 0.0003 +[2026-02-28 21:30:38] (step=0016421) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.2128741929172375, LR: 0.0003 +[2026-02-28 21:30:45] (step=0016422) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.213069849344551, LR: 0.0003 +[2026-02-28 21:30:53] (step=0016423) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.2132655057718646, LR: 0.0003 +[2026-02-28 21:31:01] (step=0016424) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.213461162199178, LR: 0.0003 +[2026-02-28 21:31:09] (step=0016425) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.2136568186264918, LR: 0.0003 +[2026-02-28 21:31:17] (step=0016426) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.2138524750538053, LR: 0.0003 +[2026-02-28 21:31:25] (step=0016427) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.2140481314811193, LR: 0.0003 +[2026-02-28 21:31:33] (step=0016428) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.214243787908433, LR: 0.0003 +[2026-02-28 21:31:40] (step=0016429) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.2144394443357465, LR: 0.0003 +[2026-02-28 21:31:48] (step=0016430) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.21463510076306, LR: 0.0003 +[2026-02-28 21:31:56] (step=0016431) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 3.2148307571903736, LR: 0.0003 +[2026-02-28 21:32:04] (step=0016432) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 3.215026413617687, LR: 0.0003 +[2026-02-28 21:32:12] (step=0016433) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.215222070045001, LR: 0.0003 +[2026-02-28 21:32:20] (step=0016434) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 3.2154177264723147, LR: 0.0003 +[2026-02-28 21:32:27] (step=0016435) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.2156133828996283, LR: 0.0003 +[2026-02-28 21:32:35] (step=0016436) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.215809039326942, LR: 0.0003 +[2026-02-28 21:32:43] (step=0016437) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.2160046957542554, LR: 0.0003 +[2026-02-28 21:32:51] (step=0016438) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.216200352181569, LR: 0.0003 +[2026-02-28 21:32:59] (step=0016439) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.216396008608883, LR: 0.0003 +[2026-02-28 21:33:07] (step=0016440) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.2165916650361965, LR: 0.0003 +[2026-02-28 21:33:14] (step=0016441) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.21678732146351, LR: 0.0003 +[2026-02-28 21:33:22] (step=0016442) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.2169829778908237, LR: 0.0003 +[2026-02-28 21:33:30] (step=0016443) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.217178634318137, LR: 0.0003 +[2026-02-28 21:33:38] (step=0016444) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 3.217374290745451, LR: 0.0003 +[2026-02-28 21:33:46] (step=0016445) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.217569947172765, LR: 0.0003 +[2026-02-28 21:33:54] (step=0016446) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.2177656036000784, LR: 0.0003 +[2026-02-28 21:34:01] (step=0016447) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.217961260027392, LR: 0.0003 +[2026-02-28 21:34:09] (step=0016448) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.2181569164547055, LR: 0.0003 +[2026-02-28 21:34:17] (step=0016449) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.218352572882019, LR: 0.0003 +[2026-02-28 21:34:25] (step=0016450) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.2185482293093326, LR: 0.0003 +[2026-02-28 21:34:33] (step=0016451) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 3.2187438857366466, LR: 0.0003 +[2026-02-28 21:34:41] (step=0016452) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.21893954216396, LR: 0.0003 +[2026-02-28 21:34:48] (step=0016453) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 3.2191351985912737, LR: 0.0003 +[2026-02-28 21:34:56] (step=0016454) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.2193308550185873, LR: 0.0003 +[2026-02-28 21:35:04] (step=0016455) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.219526511445901, LR: 0.0003 +[2026-02-28 21:35:12] (step=0016456) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.2197221678732144, LR: 0.0003 +[2026-02-28 21:35:20] (step=0016457) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.2199178243005284, LR: 0.0003 +[2026-02-28 21:35:28] (step=0016458) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.220113480727842, LR: 0.0003 +[2026-02-28 21:35:35] (step=0016459) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 3.2203091371551555, LR: 0.0003 +[2026-02-28 21:35:43] (step=0016460) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.220504793582469, LR: 0.0003 +[2026-02-28 21:35:51] (step=0016461) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 3.2207004500097827, LR: 0.0003 +[2026-02-28 21:35:59] (step=0016462) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.2208961064370967, LR: 0.0003 +[2026-02-28 21:36:07] (step=0016463) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.2210917628644102, LR: 0.0003 +[2026-02-28 21:36:15] (step=0016464) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 3.221287419291724, LR: 0.0003 +[2026-02-28 21:36:23] (step=0016465) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.2214830757190374, LR: 0.0003 +[2026-02-28 21:36:30] (step=0016466) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.221678732146351, LR: 0.0003 +[2026-02-28 21:36:38] (step=0016467) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.2218743885736645, LR: 0.0003 +[2026-02-28 21:36:46] (step=0016468) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.2220700450009785, LR: 0.0003 +[2026-02-28 21:36:54] (step=0016469) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.222265701428292, LR: 0.0003 +[2026-02-28 21:37:02] (step=0016470) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.2224613578556056, LR: 0.0003 +[2026-02-28 21:37:10] (step=0016471) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.222657014282919, LR: 0.0003 +[2026-02-28 21:37:18] (step=0016472) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.2228526707102327, LR: 0.0003 +[2026-02-28 21:37:25] (step=0016473) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.2230483271375463, LR: 0.0003 +[2026-02-28 21:37:33] (step=0016474) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.2232439835648603, LR: 0.0003 +[2026-02-28 21:37:41] (step=0016475) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.223439639992174, LR: 0.0003 +[2026-02-28 21:37:49] (step=0016476) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 3.2236352964194874, LR: 0.0003 +[2026-02-28 21:37:57] (step=0016477) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.223830952846801, LR: 0.0003 +[2026-02-28 21:38:05] (step=0016478) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.2240266092741146, LR: 0.0003 +[2026-02-28 21:38:12] (step=0016479) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.224222265701428, LR: 0.0003 +[2026-02-28 21:38:20] (step=0016480) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.224417922128742, LR: 0.0003 +[2026-02-28 21:38:28] (step=0016481) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 3.2246135785560557, LR: 0.0003 +[2026-02-28 21:38:36] (step=0016482) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.2248092349833692, LR: 0.0003 +[2026-02-28 21:38:44] (step=0016483) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.225004891410683, LR: 0.0003 +[2026-02-28 21:38:52] (step=0016484) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.2252005478379964, LR: 0.0003 +[2026-02-28 21:38:59] (step=0016485) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.22539620426531, LR: 0.0003 +[2026-02-28 21:39:07] (step=0016486) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.225591860692624, LR: 0.0003 +[2026-02-28 21:39:15] (step=0016487) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.2257875171199375, LR: 0.0003 +[2026-02-28 21:39:23] (step=0016488) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 3.225983173547251, LR: 0.0003 +[2026-02-28 21:39:31] (step=0016489) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.2261788299745646, LR: 0.0003 +[2026-02-28 21:39:39] (step=0016490) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.226374486401878, LR: 0.0003 +[2026-02-28 21:39:46] (step=0016491) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.2265701428291917, LR: 0.0003 +[2026-02-28 21:39:54] (step=0016492) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.2267657992565058, LR: 0.0003 +[2026-02-28 21:40:02] (step=0016493) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.2269614556838193, LR: 0.0003 +[2026-02-28 21:40:10] (step=0016494) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.227157112111133, LR: 0.0003 +[2026-02-28 21:40:18] (step=0016495) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.2273527685384464, LR: 0.0003 +[2026-02-28 21:40:26] (step=0016496) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.22754842496576, LR: 0.0003 +[2026-02-28 21:40:34] (step=0016497) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.2277440813930736, LR: 0.0003 +[2026-02-28 21:40:41] (step=0016498) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.2279397378203876, LR: 0.0003 +[2026-02-28 21:40:49] (step=0016499) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.228135394247701, LR: 0.0003 +[2026-02-28 21:40:57] (step=0016500) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.2283310506750147, LR: 0.0003 +[2026-02-28 21:40:57] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0016500/ +[2026-02-28 21:41:05] (step=0016501) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.2285267071023283, LR: 0.0003 +[2026-02-28 21:41:13] (step=0016502) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 3.228722363529642, LR: 0.0003 +[2026-02-28 21:41:21] (step=0016503) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.2289180199569554, LR: 0.0003 +[2026-02-28 21:41:28] (step=0016504) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.2291136763842694, LR: 0.0003 +[2026-02-28 21:41:36] (step=0016505) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 3.229309332811583, LR: 0.0003 +[2026-02-28 21:41:44] (step=0016506) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.2295049892388965, LR: 0.0003 +[2026-02-28 21:41:52] (step=0016507) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.22970064566621, LR: 0.0003 +[2026-02-28 21:42:00] (step=0016508) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.2298963020935236, LR: 0.0003 +[2026-02-28 21:42:08] (step=0016509) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.230091958520837, LR: 0.0003 +[2026-02-28 21:42:15] (step=0016510) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.230287614948151, LR: 0.0003 +[2026-02-28 21:42:23] (step=0016511) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.2304832713754648, LR: 0.0003 +[2026-02-28 21:42:31] (step=0016512) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.2306789278027783, LR: 0.0003 +[2026-02-28 21:42:39] (step=0016513) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 3.230874584230092, LR: 0.0003 +[2026-02-28 21:42:47] (step=0016514) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.2310702406574054, LR: 0.0003 +[2026-02-28 21:42:55] (step=0016515) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 3.2312658970847195, LR: 0.0003 +[2026-02-28 21:43:03] (step=0016516) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.231461553512033, LR: 0.0003 +[2026-02-28 21:43:10] (step=0016517) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.2316572099393466, LR: 0.0003 +[2026-02-28 21:43:18] (step=0016518) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.23185286636666, LR: 0.0003 +[2026-02-28 21:43:26] (step=0016519) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.2320485227939737, LR: 0.0003 +[2026-02-28 21:43:34] (step=0016520) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.2322441792212873, LR: 0.0003 +[2026-02-28 21:43:42] (step=0016521) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 3.2324398356486013, LR: 0.0003 +[2026-02-28 21:43:50] (step=0016522) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.232635492075915, LR: 0.0003 +[2026-02-28 21:43:57] (step=0016523) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.2328311485032284, LR: 0.0003 +[2026-02-28 21:44:05] (step=0016524) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.233026804930542, LR: 0.0003 +[2026-02-28 21:44:13] (step=0016525) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.2332224613578555, LR: 0.0003 +[2026-02-28 21:44:21] (step=0016526) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.233418117785169, LR: 0.0003 +[2026-02-28 21:44:29] (step=0016527) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.233613774212483, LR: 0.0003 +[2026-02-28 21:44:37] (step=0016528) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.2338094306397966, LR: 0.0003 +[2026-02-28 21:44:45] (step=0016529) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.23400508706711, LR: 0.0003 +[2026-02-28 21:44:52] (step=0016530) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 3.2342007434944238, LR: 0.0003 +[2026-02-28 21:45:00] (step=0016531) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.2343963999217373, LR: 0.0003 +[2026-02-28 21:45:08] (step=0016532) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 3.234592056349051, LR: 0.0003 +[2026-02-28 21:45:16] (step=0016533) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 3.234787712776365, LR: 0.0003 +[2026-02-28 21:45:24] (step=0016534) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.2349833692036785, LR: 0.0003 +[2026-02-28 21:45:32] (step=0016535) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.235179025630992, LR: 0.0003 +[2026-02-28 21:45:39] (step=0016536) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.2353746820583056, LR: 0.0003 +[2026-02-28 21:45:47] (step=0016537) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.235570338485619, LR: 0.0003 +[2026-02-28 21:45:55] (step=0016538) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 3.2357659949129327, LR: 0.0003 +[2026-02-28 21:46:03] (step=0016539) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.2359616513402467, LR: 0.0003 +[2026-02-28 21:46:11] (step=0016540) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 3.2361573077675603, LR: 0.0003 +[2026-02-28 21:46:19] (step=0016541) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.236352964194874, LR: 0.0003 +[2026-02-28 21:46:27] (step=0016542) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.2365486206221874, LR: 0.0003 +[2026-02-28 21:46:34] (step=0016543) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.236744277049501, LR: 0.0003 +[2026-02-28 21:46:42] (step=0016544) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.2369399334768145, LR: 0.0003 +[2026-02-28 21:46:50] (step=0016545) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 3.2371355899041285, LR: 0.0003 +[2026-02-28 21:46:58] (step=0016546) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 3.237331246331442, LR: 0.0003 +[2026-02-28 21:47:06] (step=0016547) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 3.2375269027587557, LR: 0.0003 +[2026-02-28 21:47:14] (step=0016548) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 3.237722559186069, LR: 0.0003 +[2026-02-28 21:47:21] (step=0016549) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.2379182156133828, LR: 0.0003 +[2026-02-28 21:47:29] (step=0016550) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.2381138720406963, LR: 0.0003 +[2026-02-28 21:47:37] (step=0016551) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.2383095284680103, LR: 0.0003 +[2026-02-28 21:47:45] (step=0016552) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.238505184895324, LR: 0.0003 +[2026-02-28 21:47:53] (step=0016553) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.2387008413226375, LR: 0.0003 +[2026-02-28 21:48:01] (step=0016554) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.238896497749951, LR: 0.0003 +[2026-02-28 21:48:08] (step=0016555) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 3.2390921541772646, LR: 0.0003 +[2026-02-28 21:48:16] (step=0016556) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.239287810604578, LR: 0.0003 +[2026-02-28 21:48:24] (step=0016557) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.239483467031892, LR: 0.0003 +[2026-02-28 21:48:32] (step=0016558) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.2396791234592057, LR: 0.0003 +[2026-02-28 21:48:40] (step=0016559) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.2398747798865193, LR: 0.0003 +[2026-02-28 21:48:48] (step=0016560) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 3.240070436313833, LR: 0.0003 +[2026-02-28 21:48:55] (step=0016561) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.2402660927411464, LR: 0.0003 +[2026-02-28 21:49:03] (step=0016562) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.24046174916846, LR: 0.0003 +[2026-02-28 21:49:11] (step=0016563) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.240657405595774, LR: 0.0003 +[2026-02-28 21:49:19] (step=0016564) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.2408530620230875, LR: 0.0003 +[2026-02-28 21:49:27] (step=0016565) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.241048718450401, LR: 0.0003 +[2026-02-28 21:49:35] (step=0016566) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.2412443748777147, LR: 0.0003 +[2026-02-28 21:49:43] (step=0016567) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 3.2414400313050282, LR: 0.0003 +[2026-02-28 21:49:50] (step=0016568) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.241635687732342, LR: 0.0003 +[2026-02-28 21:49:58] (step=0016569) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.241831344159656, LR: 0.0003 +[2026-02-28 21:50:06] (step=0016570) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.2420270005869694, LR: 0.0003 +[2026-02-28 21:50:14] (step=0016571) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.242222657014283, LR: 0.0003 +[2026-02-28 21:50:22] (step=0016572) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.2424183134415965, LR: 0.0003 +[2026-02-28 21:50:30] (step=0016573) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 3.24261396986891, LR: 0.0003 +[2026-02-28 21:50:37] (step=0016574) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.242809626296224, LR: 0.0003 +[2026-02-28 21:50:45] (step=0016575) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.2430052827235376, LR: 0.0003 +[2026-02-28 21:50:53] (step=0016576) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.243200939150851, LR: 0.0003 +[2026-02-28 21:51:01] (step=0016577) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.2433965955781647, LR: 0.0003 +[2026-02-28 21:51:09] (step=0016578) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.2435922520054783, LR: 0.0003 +[2026-02-28 21:51:17] (step=0016579) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.243787908432792, LR: 0.0003 +[2026-02-28 21:51:25] (step=0016580) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.243983564860106, LR: 0.0003 +[2026-02-28 21:51:32] (step=0016581) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.2441792212874194, LR: 0.0003 +[2026-02-28 21:51:40] (step=0016582) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.244374877714733, LR: 0.0003 +[2026-02-28 21:51:48] (step=0016583) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.2445705341420465, LR: 0.0003 +[2026-02-28 21:51:56] (step=0016584) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 3.24476619056936, LR: 0.0003 +[2026-02-28 21:52:04] (step=0016585) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.2449618469966737, LR: 0.0003 +[2026-02-28 21:52:12] (step=0016586) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.2451575034239877, LR: 0.0003 +[2026-02-28 21:52:19] (step=0016587) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 3.2453531598513012, LR: 0.0003 +[2026-02-28 21:52:27] (step=0016588) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.245548816278615, LR: 0.0003 +[2026-02-28 21:52:35] (step=0016589) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.2457444727059284, LR: 0.0003 +[2026-02-28 21:52:43] (step=0016590) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.245940129133242, LR: 0.0003 +[2026-02-28 21:52:51] (step=0016591) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 3.2461357855605555, LR: 0.0003 +[2026-02-28 21:52:59] (step=0016592) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.2463314419878695, LR: 0.0003 +[2026-02-28 21:53:06] (step=0016593) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.246527098415183, LR: 0.0003 +[2026-02-28 21:53:14] (step=0016594) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.2467227548424966, LR: 0.0003 +[2026-02-28 21:53:22] (step=0016595) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.24691841126981, LR: 0.0003 +[2026-02-28 21:53:30] (step=0016596) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.2471140676971237, LR: 0.0003 +[2026-02-28 21:53:38] (step=0016597) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.2473097241244373, LR: 0.0003 +[2026-02-28 21:53:46] (step=0016598) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.2475053805517513, LR: 0.0003 +[2026-02-28 21:53:53] (step=0016599) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.247701036979065, LR: 0.0003 +[2026-02-28 21:54:01] (step=0016600) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.2478966934063784, LR: 0.0003 +[2026-02-28 21:54:09] (step=0016601) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.248092349833692, LR: 0.0003 +[2026-02-28 21:54:17] (step=0016602) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.2482880062610056, LR: 0.0003 +[2026-02-28 21:54:25] (step=0016603) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.248483662688319, LR: 0.0003 +[2026-02-28 21:54:33] (step=0016604) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 3.248679319115633, LR: 0.0003 +[2026-02-28 21:54:40] (step=0016605) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.2488749755429467, LR: 0.0003 +[2026-02-28 21:54:48] (step=0016606) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 3.2490706319702602, LR: 0.0003 +[2026-02-28 21:54:56] (step=0016607) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.249266288397574, LR: 0.0003 +[2026-02-28 21:55:04] (step=0016608) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.2494619448248874, LR: 0.0003 +[2026-02-28 21:55:12] (step=0016609) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 3.249657601252201, LR: 0.0003 +[2026-02-28 21:55:20] (step=0016610) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.249853257679515, LR: 0.0003 +[2026-02-28 21:55:28] (step=0016611) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.2500489141068285, LR: 0.0003 +[2026-02-28 21:55:35] (step=0016612) Train Loss: 0.4707, Train Steps/Sec: 0.13, Epoch: 3.250244570534142, LR: 0.0003 +[2026-02-28 21:55:43] (step=0016613) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.2504402269614556, LR: 0.0003 +[2026-02-28 21:55:51] (step=0016614) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.250635883388769, LR: 0.0003 +[2026-02-28 21:55:59] (step=0016615) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.2508315398160827, LR: 0.0003 +[2026-02-28 21:56:07] (step=0016616) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.2510271962433968, LR: 0.0003 +[2026-02-28 21:56:15] (step=0016617) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.2512228526707103, LR: 0.0003 +[2026-02-28 21:56:22] (step=0016618) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.251418509098024, LR: 0.0003 +[2026-02-28 21:56:30] (step=0016619) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.2516141655253374, LR: 0.0003 +[2026-02-28 21:56:38] (step=0016620) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 3.251809821952651, LR: 0.0003 +[2026-02-28 21:56:46] (step=0016621) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.2520054783799646, LR: 0.0003 +[2026-02-28 21:56:54] (step=0016622) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 3.2522011348072786, LR: 0.0003 +[2026-02-28 21:57:02] (step=0016623) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 3.252396791234592, LR: 0.0003 +[2026-02-28 21:57:10] (step=0016624) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.2525924476619057, LR: 0.0003 +[2026-02-28 21:57:17] (step=0016625) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.2527881040892193, LR: 0.0003 +[2026-02-28 21:57:25] (step=0016626) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 3.252983760516533, LR: 0.0003 +[2026-02-28 21:57:33] (step=0016627) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 3.253179416943847, LR: 0.0003 +[2026-02-28 21:57:41] (step=0016628) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.2533750733711604, LR: 0.0003 +[2026-02-28 21:57:49] (step=0016629) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.253570729798474, LR: 0.0003 +[2026-02-28 21:57:57] (step=0016630) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 3.2537663862257875, LR: 0.0003 +[2026-02-28 21:58:05] (step=0016631) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.253962042653101, LR: 0.0003 +[2026-02-28 21:58:12] (step=0016632) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.2541576990804146, LR: 0.0003 +[2026-02-28 21:58:20] (step=0016633) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 3.2543533555077286, LR: 0.0003 +[2026-02-28 21:58:28] (step=0016634) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.254549011935042, LR: 0.0003 +[2026-02-28 21:58:36] (step=0016635) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.2547446683623558, LR: 0.0003 +[2026-02-28 21:58:44] (step=0016636) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.2549403247896693, LR: 0.0003 +[2026-02-28 21:58:52] (step=0016637) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 3.255135981216983, LR: 0.0003 +[2026-02-28 21:58:59] (step=0016638) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.2553316376442964, LR: 0.0003 +[2026-02-28 21:59:07] (step=0016639) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.2555272940716105, LR: 0.0003 +[2026-02-28 21:59:15] (step=0016640) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.255722950498924, LR: 0.0003 +[2026-02-28 21:59:23] (step=0016641) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.2559186069262376, LR: 0.0003 +[2026-02-28 21:59:31] (step=0016642) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.256114263353551, LR: 0.0003 +[2026-02-28 21:59:39] (step=0016643) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.2563099197808647, LR: 0.0003 +[2026-02-28 21:59:46] (step=0016644) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.2565055762081783, LR: 0.0003 +[2026-02-28 21:59:54] (step=0016645) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 3.2567012326354923, LR: 0.0003 +[2026-02-28 22:00:02] (step=0016646) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 3.256896889062806, LR: 0.0003 +[2026-02-28 22:00:10] (step=0016647) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 3.2570925454901194, LR: 0.0003 +[2026-02-28 22:00:18] (step=0016648) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.257288201917433, LR: 0.0003 +[2026-02-28 22:00:26] (step=0016649) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.2574838583447465, LR: 0.0003 +[2026-02-28 22:00:33] (step=0016650) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.25767951477206, LR: 0.0003 +[2026-02-28 22:00:41] (step=0016651) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.257875171199374, LR: 0.0003 +[2026-02-28 22:00:49] (step=0016652) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.2580708276266876, LR: 0.0003 +[2026-02-28 22:00:57] (step=0016653) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.258266484054001, LR: 0.0003 +[2026-02-28 22:01:05] (step=0016654) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.2584621404813148, LR: 0.0003 +[2026-02-28 22:01:13] (step=0016655) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.2586577969086283, LR: 0.0003 +[2026-02-28 22:01:21] (step=0016656) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.258853453335942, LR: 0.0003 +[2026-02-28 22:01:28] (step=0016657) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 3.259049109763256, LR: 0.0003 +[2026-02-28 22:01:36] (step=0016658) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 3.2592447661905695, LR: 0.0003 +[2026-02-28 22:01:44] (step=0016659) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.259440422617883, LR: 0.0003 +[2026-02-28 22:01:52] (step=0016660) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.2596360790451966, LR: 0.0003 +[2026-02-28 22:02:00] (step=0016661) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 3.25983173547251, LR: 0.0003 +[2026-02-28 22:02:08] (step=0016662) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.2600273918998237, LR: 0.0003 +[2026-02-28 22:02:15] (step=0016663) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.2602230483271377, LR: 0.0003 +[2026-02-28 22:02:23] (step=0016664) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.2604187047544513, LR: 0.0003 +[2026-02-28 22:02:31] (step=0016665) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.260614361181765, LR: 0.0003 +[2026-02-28 22:02:39] (step=0016666) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.2608100176090784, LR: 0.0003 +[2026-02-28 22:02:47] (step=0016667) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 3.261005674036392, LR: 0.0003 +[2026-02-28 22:02:55] (step=0016668) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 3.2612013304637055, LR: 0.0003 +[2026-02-28 22:03:03] (step=0016669) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.2613969868910195, LR: 0.0003 +[2026-02-28 22:03:10] (step=0016670) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.261592643318333, LR: 0.0003 +[2026-02-28 22:03:18] (step=0016671) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.2617882997456467, LR: 0.0003 +[2026-02-28 22:03:26] (step=0016672) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.26198395617296, LR: 0.0003 +[2026-02-28 22:03:34] (step=0016673) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.262179612600274, LR: 0.0003 +[2026-02-28 22:03:42] (step=0016674) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.2623752690275873, LR: 0.0003 +[2026-02-28 22:03:50] (step=0016675) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.2625709254549013, LR: 0.0003 +[2026-02-28 22:03:58] (step=0016676) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.262766581882215, LR: 0.0003 +[2026-02-28 22:04:05] (step=0016677) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.2629622383095285, LR: 0.0003 +[2026-02-28 22:04:13] (step=0016678) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.263157894736842, LR: 0.0003 +[2026-02-28 22:04:21] (step=0016679) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.2633535511641556, LR: 0.0003 +[2026-02-28 22:04:29] (step=0016680) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.263549207591469, LR: 0.0003 +[2026-02-28 22:04:37] (step=0016681) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.263744864018783, LR: 0.0003 +[2026-02-28 22:04:45] (step=0016682) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.2639405204460967, LR: 0.0003 +[2026-02-28 22:04:52] (step=0016683) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 3.2641361768734103, LR: 0.0003 +[2026-02-28 22:05:00] (step=0016684) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.264331833300724, LR: 0.0003 +[2026-02-28 22:05:08] (step=0016685) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.2645274897280374, LR: 0.0003 +[2026-02-28 22:05:16] (step=0016686) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.2647231461553514, LR: 0.0003 +[2026-02-28 22:05:24] (step=0016687) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 3.264918802582665, LR: 0.0003 +[2026-02-28 22:05:32] (step=0016688) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 3.2651144590099785, LR: 0.0003 +[2026-02-28 22:05:40] (step=0016689) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.265310115437292, LR: 0.0003 +[2026-02-28 22:05:47] (step=0016690) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.2655057718646057, LR: 0.0003 +[2026-02-28 22:05:55] (step=0016691) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 3.2657014282919192, LR: 0.0003 +[2026-02-28 22:06:03] (step=0016692) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.2658970847192332, LR: 0.0003 +[2026-02-28 22:06:11] (step=0016693) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.266092741146547, LR: 0.0003 +[2026-02-28 22:06:19] (step=0016694) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.2662883975738604, LR: 0.0003 +[2026-02-28 22:06:27] (step=0016695) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.266484054001174, LR: 0.0003 +[2026-02-28 22:06:34] (step=0016696) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.2666797104284875, LR: 0.0003 +[2026-02-28 22:06:42] (step=0016697) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.266875366855801, LR: 0.0003 +[2026-02-28 22:06:50] (step=0016698) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.267071023283115, LR: 0.0003 +[2026-02-28 22:06:58] (step=0016699) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.2672666797104286, LR: 0.0003 +[2026-02-28 22:07:06] (step=0016700) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.267462336137742, LR: 0.0003 +[2026-02-28 22:07:14] (step=0016701) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.2676579925650557, LR: 0.0003 +[2026-02-28 22:07:21] (step=0016702) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 3.2678536489923693, LR: 0.0003 +[2026-02-28 22:07:29] (step=0016703) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.268049305419683, LR: 0.0003 +[2026-02-28 22:07:37] (step=0016704) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.268244961846997, LR: 0.0003 +[2026-02-28 22:07:45] (step=0016705) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.2684406182743104, LR: 0.0003 +[2026-02-28 22:07:53] (step=0016706) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 3.268636274701624, LR: 0.0003 +[2026-02-28 22:08:01] (step=0016707) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.2688319311289376, LR: 0.0003 +[2026-02-28 22:08:08] (step=0016708) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.269027587556251, LR: 0.0003 +[2026-02-28 22:08:16] (step=0016709) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.2692232439835647, LR: 0.0003 +[2026-02-28 22:08:24] (step=0016710) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.2694189004108787, LR: 0.0003 +[2026-02-28 22:08:32] (step=0016711) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 3.2696145568381922, LR: 0.0003 +[2026-02-28 22:08:40] (step=0016712) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.269810213265506, LR: 0.0003 +[2026-02-28 22:08:48] (step=0016713) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.2700058696928194, LR: 0.0003 +[2026-02-28 22:08:56] (step=0016714) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.270201526120133, LR: 0.0003 +[2026-02-28 22:09:04] (step=0016715) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.2703971825474465, LR: 0.0003 +[2026-02-28 22:09:11] (step=0016716) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.2705928389747605, LR: 0.0003 +[2026-02-28 22:09:19] (step=0016717) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.270788495402074, LR: 0.0003 +[2026-02-28 22:09:27] (step=0016718) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.2709841518293876, LR: 0.0003 +[2026-02-28 22:09:35] (step=0016719) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 3.271179808256701, LR: 0.0003 +[2026-02-28 22:09:43] (step=0016720) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.2713754646840147, LR: 0.0003 +[2026-02-28 22:09:51] (step=0016721) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.2715711211113283, LR: 0.0003 +[2026-02-28 22:09:58] (step=0016722) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.2717667775386423, LR: 0.0003 +[2026-02-28 22:10:06] (step=0016723) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.271962433965956, LR: 0.0003 +[2026-02-28 22:10:14] (step=0016724) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 3.2721580903932694, LR: 0.0003 +[2026-02-28 22:10:22] (step=0016725) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 3.272353746820583, LR: 0.0003 +[2026-02-28 22:10:30] (step=0016726) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.2725494032478966, LR: 0.0003 +[2026-02-28 22:10:38] (step=0016727) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.27274505967521, LR: 0.0003 +[2026-02-28 22:10:46] (step=0016728) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.272940716102524, LR: 0.0003 +[2026-02-28 22:10:53] (step=0016729) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 3.2731363725298377, LR: 0.0003 +[2026-02-28 22:11:01] (step=0016730) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.2733320289571513, LR: 0.0003 +[2026-02-28 22:11:09] (step=0016731) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.273527685384465, LR: 0.0003 +[2026-02-28 22:11:17] (step=0016732) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.2737233418117784, LR: 0.0003 +[2026-02-28 22:11:25] (step=0016733) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.273918998239092, LR: 0.0003 +[2026-02-28 22:11:33] (step=0016734) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.274114654666406, LR: 0.0003 +[2026-02-28 22:11:40] (step=0016735) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.2743103110937195, LR: 0.0003 +[2026-02-28 22:11:48] (step=0016736) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 3.274505967521033, LR: 0.0003 +[2026-02-28 22:11:56] (step=0016737) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 3.2747016239483466, LR: 0.0003 +[2026-02-28 22:12:04] (step=0016738) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.27489728037566, LR: 0.0003 +[2026-02-28 22:12:12] (step=0016739) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.275092936802974, LR: 0.0003 +[2026-02-28 22:12:20] (step=0016740) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.2752885932302878, LR: 0.0003 +[2026-02-28 22:12:27] (step=0016741) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 3.2754842496576013, LR: 0.0003 +[2026-02-28 22:12:35] (step=0016742) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.275679906084915, LR: 0.0003 +[2026-02-28 22:12:43] (step=0016743) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.2758755625122284, LR: 0.0003 +[2026-02-28 22:12:51] (step=0016744) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.276071218939542, LR: 0.0003 +[2026-02-28 22:12:59] (step=0016745) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.276266875366856, LR: 0.0003 +[2026-02-28 22:13:07] (step=0016746) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.2764625317941696, LR: 0.0003 +[2026-02-28 22:13:14] (step=0016747) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.276658188221483, LR: 0.0003 +[2026-02-28 22:13:22] (step=0016748) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.2768538446487967, LR: 0.0003 +[2026-02-28 22:13:30] (step=0016749) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.2770495010761103, LR: 0.0003 +[2026-02-28 22:13:38] (step=0016750) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 3.277245157503424, LR: 0.0003 +[2026-02-28 22:13:46] (step=0016751) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 3.277440813930738, LR: 0.0003 +[2026-02-28 22:13:54] (step=0016752) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.2776364703580514, LR: 0.0003 +[2026-02-28 22:14:02] (step=0016753) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.277832126785365, LR: 0.0003 +[2026-02-28 22:14:09] (step=0016754) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.2780277832126785, LR: 0.0003 +[2026-02-28 22:14:17] (step=0016755) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.278223439639992, LR: 0.0003 +[2026-02-28 22:14:25] (step=0016756) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.2784190960673056, LR: 0.0003 +[2026-02-28 22:14:33] (step=0016757) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.2786147524946196, LR: 0.0003 +[2026-02-28 22:14:41] (step=0016758) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.278810408921933, LR: 0.0003 +[2026-02-28 22:14:49] (step=0016759) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.2790060653492468, LR: 0.0003 +[2026-02-28 22:14:56] (step=0016760) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.2792017217765603, LR: 0.0003 +[2026-02-28 22:15:04] (step=0016761) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 3.279397378203874, LR: 0.0003 +[2026-02-28 22:15:12] (step=0016762) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.2795930346311875, LR: 0.0003 +[2026-02-28 22:15:20] (step=0016763) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.2797886910585015, LR: 0.0003 +[2026-02-28 22:15:28] (step=0016764) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.279984347485815, LR: 0.0003 +[2026-02-28 22:15:36] (step=0016765) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.2801800039131286, LR: 0.0003 +[2026-02-28 22:15:44] (step=0016766) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.280375660340442, LR: 0.0003 +[2026-02-28 22:15:51] (step=0016767) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 3.2805713167677557, LR: 0.0003 +[2026-02-28 22:15:59] (step=0016768) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.2807669731950693, LR: 0.0003 +[2026-02-28 22:16:07] (step=0016769) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.2809626296223833, LR: 0.0003 +[2026-02-28 22:16:15] (step=0016770) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.281158286049697, LR: 0.0003 +[2026-02-28 22:16:23] (step=0016771) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 3.2813539424770104, LR: 0.0003 +[2026-02-28 22:16:31] (step=0016772) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.281549598904324, LR: 0.0003 +[2026-02-28 22:16:38] (step=0016773) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.2817452553316375, LR: 0.0003 +[2026-02-28 22:16:46] (step=0016774) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.281940911758951, LR: 0.0003 +[2026-02-28 22:16:54] (step=0016775) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.282136568186265, LR: 0.0003 +[2026-02-28 22:17:02] (step=0016776) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 3.2823322246135787, LR: 0.0003 +[2026-02-28 22:17:10] (step=0016777) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 3.282527881040892, LR: 0.0003 +[2026-02-28 22:17:18] (step=0016778) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.2827235374682058, LR: 0.0003 +[2026-02-28 22:17:26] (step=0016779) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.2829191938955193, LR: 0.0003 +[2026-02-28 22:17:33] (step=0016780) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.283114850322833, LR: 0.0003 +[2026-02-28 22:17:41] (step=0016781) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 3.283310506750147, LR: 0.0003 +[2026-02-28 22:17:49] (step=0016782) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 3.2835061631774605, LR: 0.0003 +[2026-02-28 22:17:57] (step=0016783) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.283701819604774, LR: 0.0003 +[2026-02-28 22:18:05] (step=0016784) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.2838974760320876, LR: 0.0003 +[2026-02-28 22:18:13] (step=0016785) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.284093132459401, LR: 0.0003 +[2026-02-28 22:18:20] (step=0016786) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.2842887888867147, LR: 0.0003 +[2026-02-28 22:18:28] (step=0016787) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.2844844453140287, LR: 0.0003 +[2026-02-28 22:18:36] (step=0016788) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.2846801017413423, LR: 0.0003 +[2026-02-28 22:18:44] (step=0016789) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.284875758168656, LR: 0.0003 +[2026-02-28 22:18:52] (step=0016790) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.2850714145959694, LR: 0.0003 +[2026-02-28 22:19:00] (step=0016791) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.285267071023283, LR: 0.0003 +[2026-02-28 22:19:08] (step=0016792) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 3.2854627274505965, LR: 0.0003 +[2026-02-28 22:19:15] (step=0016793) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.2856583838779105, LR: 0.0003 +[2026-02-28 22:19:23] (step=0016794) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 3.285854040305224, LR: 0.0003 +[2026-02-28 22:19:31] (step=0016795) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 3.2860496967325377, LR: 0.0003 +[2026-02-28 22:19:39] (step=0016796) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 3.2862453531598512, LR: 0.0003 +[2026-02-28 22:19:47] (step=0016797) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.286441009587165, LR: 0.0003 +[2026-02-28 22:19:55] (step=0016798) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.286636666014479, LR: 0.0003 +[2026-02-28 22:20:02] (step=0016799) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 3.2868323224417924, LR: 0.0003 +[2026-02-28 22:20:10] (step=0016800) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.287027978869106, LR: 0.0003 +[2026-02-28 22:20:18] (step=0016801) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.2872236352964195, LR: 0.0003 +[2026-02-28 22:20:26] (step=0016802) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 3.287419291723733, LR: 0.0003 +[2026-02-28 22:20:34] (step=0016803) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.2876149481510466, LR: 0.0003 +[2026-02-28 22:20:42] (step=0016804) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.2878106045783606, LR: 0.0003 +[2026-02-28 22:20:49] (step=0016805) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 3.288006261005674, LR: 0.0003 +[2026-02-28 22:20:57] (step=0016806) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.2882019174329877, LR: 0.0003 +[2026-02-28 22:21:05] (step=0016807) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.2883975738603013, LR: 0.0003 +[2026-02-28 22:21:13] (step=0016808) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.288593230287615, LR: 0.0003 +[2026-02-28 22:21:21] (step=0016809) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.2887888867149284, LR: 0.0003 +[2026-02-28 22:21:29] (step=0016810) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 3.2889845431422424, LR: 0.0003 +[2026-02-28 22:21:36] (step=0016811) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.289180199569556, LR: 0.0003 +[2026-02-28 22:21:44] (step=0016812) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 3.2893758559968695, LR: 0.0003 +[2026-02-28 22:21:52] (step=0016813) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.289571512424183, LR: 0.0003 +[2026-02-28 22:22:00] (step=0016814) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.2897671688514967, LR: 0.0003 +[2026-02-28 22:22:08] (step=0016815) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.2899628252788102, LR: 0.0003 +[2026-02-28 22:22:16] (step=0016816) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.2901584817061242, LR: 0.0003 +[2026-02-28 22:22:24] (step=0016817) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.290354138133438, LR: 0.0003 +[2026-02-28 22:22:31] (step=0016818) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.2905497945607514, LR: 0.0003 +[2026-02-28 22:22:39] (step=0016819) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.290745450988065, LR: 0.0003 +[2026-02-28 22:22:47] (step=0016820) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 3.2909411074153785, LR: 0.0003 +[2026-02-28 22:22:55] (step=0016821) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.291136763842692, LR: 0.0003 +[2026-02-28 22:23:03] (step=0016822) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.291332420270006, LR: 0.0003 +[2026-02-28 22:23:11] (step=0016823) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.2915280766973196, LR: 0.0003 +[2026-02-28 22:23:19] (step=0016824) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 3.291723733124633, LR: 0.0003 +[2026-02-28 22:23:26] (step=0016825) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 3.2919193895519467, LR: 0.0003 +[2026-02-28 22:23:34] (step=0016826) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 3.2921150459792603, LR: 0.0003 +[2026-02-28 22:23:42] (step=0016827) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.292310702406574, LR: 0.0003 +[2026-02-28 22:23:50] (step=0016828) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.292506358833888, LR: 0.0003 +[2026-02-28 22:23:58] (step=0016829) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.2927020152612014, LR: 0.0003 +[2026-02-28 22:24:06] (step=0016830) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.292897671688515, LR: 0.0003 +[2026-02-28 22:24:14] (step=0016831) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 3.2930933281158286, LR: 0.0003 +[2026-02-28 22:24:21] (step=0016832) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 3.293288984543142, LR: 0.0003 +[2026-02-28 22:24:29] (step=0016833) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.2934846409704557, LR: 0.0003 +[2026-02-28 22:24:37] (step=0016834) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.2936802973977697, LR: 0.0003 +[2026-02-28 22:24:45] (step=0016835) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 3.2938759538250832, LR: 0.0003 +[2026-02-28 22:24:53] (step=0016836) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.294071610252397, LR: 0.0003 +[2026-02-28 22:25:01] (step=0016837) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.2942672666797104, LR: 0.0003 +[2026-02-28 22:25:08] (step=0016838) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 3.294462923107024, LR: 0.0003 +[2026-02-28 22:25:16] (step=0016839) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.2946585795343375, LR: 0.0003 +[2026-02-28 22:25:24] (step=0016840) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.2948542359616515, LR: 0.0003 +[2026-02-28 22:25:32] (step=0016841) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 3.295049892388965, LR: 0.0003 +[2026-02-28 22:25:40] (step=0016842) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.2952455488162786, LR: 0.0003 +[2026-02-28 22:25:48] (step=0016843) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.295441205243592, LR: 0.0003 +[2026-02-28 22:25:55] (step=0016844) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 3.2956368616709057, LR: 0.0003 +[2026-02-28 22:26:03] (step=0016845) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 3.2958325180982193, LR: 0.0003 +[2026-02-28 22:26:11] (step=0016846) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.2960281745255333, LR: 0.0003 +[2026-02-28 22:26:19] (step=0016847) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.296223830952847, LR: 0.0003 +[2026-02-28 22:26:27] (step=0016848) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.2964194873801604, LR: 0.0003 +[2026-02-28 22:26:35] (step=0016849) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.296615143807474, LR: 0.0003 +[2026-02-28 22:26:42] (step=0016850) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.2968108002347876, LR: 0.0003 +[2026-02-28 22:26:50] (step=0016851) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 3.2970064566621016, LR: 0.0003 +[2026-02-28 22:26:58] (step=0016852) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.297202113089415, LR: 0.0003 +[2026-02-28 22:27:06] (step=0016853) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.2973977695167287, LR: 0.0003 +[2026-02-28 22:27:14] (step=0016854) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.2975934259440423, LR: 0.0003 +[2026-02-28 22:27:22] (step=0016855) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.297789082371356, LR: 0.0003 +[2026-02-28 22:27:30] (step=0016856) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.2979847387986694, LR: 0.0003 +[2026-02-28 22:27:37] (step=0016857) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.2981803952259834, LR: 0.0003 +[2026-02-28 22:27:45] (step=0016858) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.298376051653297, LR: 0.0003 +[2026-02-28 22:27:53] (step=0016859) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.2985717080806105, LR: 0.0003 +[2026-02-28 22:28:01] (step=0016860) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.298767364507924, LR: 0.0003 +[2026-02-28 22:28:09] (step=0016861) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.2989630209352376, LR: 0.0003 +[2026-02-28 22:28:17] (step=0016862) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.299158677362551, LR: 0.0003 +[2026-02-28 22:28:25] (step=0016863) Train Loss: 0.4725, Train Steps/Sec: 0.13, Epoch: 3.299354333789865, LR: 0.0003 +[2026-02-28 22:28:32] (step=0016864) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.2995499902171788, LR: 0.0003 +[2026-02-28 22:28:40] (step=0016865) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.2997456466444923, LR: 0.0003 +[2026-02-28 22:28:48] (step=0016866) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 3.299941303071806, LR: 0.0003 +[2026-02-28 22:28:56] (step=0016867) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 3.3001369594991194, LR: 0.0003 +[2026-02-28 22:29:04] (step=0016868) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.300332615926433, LR: 0.0003 +[2026-02-28 22:29:12] (step=0016869) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.300528272353747, LR: 0.0003 +[2026-02-28 22:29:19] (step=0016870) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.3007239287810606, LR: 0.0003 +[2026-02-28 22:29:27] (step=0016871) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.300919585208374, LR: 0.0003 +[2026-02-28 22:29:35] (step=0016872) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.3011152416356877, LR: 0.0003 +[2026-02-28 22:29:43] (step=0016873) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.3013108980630013, LR: 0.0003 +[2026-02-28 22:29:51] (step=0016874) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.301506554490315, LR: 0.0003 +[2026-02-28 22:29:59] (step=0016875) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.301702210917629, LR: 0.0003 +[2026-02-28 22:30:07] (step=0016876) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.3018978673449424, LR: 0.0003 +[2026-02-28 22:30:14] (step=0016877) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.302093523772256, LR: 0.0003 +[2026-02-28 22:30:22] (step=0016878) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.3022891801995695, LR: 0.0003 +[2026-02-28 22:30:30] (step=0016879) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.302484836626883, LR: 0.0003 +[2026-02-28 22:30:38] (step=0016880) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.3026804930541966, LR: 0.0003 +[2026-02-28 22:30:46] (step=0016881) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 3.3028761494815106, LR: 0.0003 +[2026-02-28 22:30:54] (step=0016882) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 3.303071805908824, LR: 0.0003 +[2026-02-28 22:31:01] (step=0016883) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.3032674623361378, LR: 0.0003 +[2026-02-28 22:31:09] (step=0016884) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.3034631187634513, LR: 0.0003 +[2026-02-28 22:31:17] (step=0016885) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.303658775190765, LR: 0.0003 +[2026-02-28 22:31:25] (step=0016886) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.3038544316180785, LR: 0.0003 +[2026-02-28 22:31:33] (step=0016887) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.3040500880453925, LR: 0.0003 +[2026-02-28 22:31:41] (step=0016888) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.304245744472706, LR: 0.0003 +[2026-02-28 22:31:49] (step=0016889) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.3044414009000196, LR: 0.0003 +[2026-02-28 22:31:56] (step=0016890) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.304637057327333, LR: 0.0003 +[2026-02-28 22:32:04] (step=0016891) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 3.3048327137546467, LR: 0.0003 +[2026-02-28 22:32:12] (step=0016892) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.3050283701819603, LR: 0.0003 +[2026-02-28 22:32:20] (step=0016893) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 3.3052240266092743, LR: 0.0003 +[2026-02-28 22:32:28] (step=0016894) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.305419683036588, LR: 0.0003 +[2026-02-28 22:32:36] (step=0016895) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 3.3056153394639014, LR: 0.0003 +[2026-02-28 22:32:43] (step=0016896) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.305810995891215, LR: 0.0003 +[2026-02-28 22:32:51] (step=0016897) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 3.3060066523185285, LR: 0.0003 +[2026-02-28 22:32:59] (step=0016898) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.306202308745842, LR: 0.0003 +[2026-02-28 22:33:07] (step=0016899) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.306397965173156, LR: 0.0003 +[2026-02-28 22:33:15] (step=0016900) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.3065936216004697, LR: 0.0003 +[2026-02-28 22:33:23] (step=0016901) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.306789278027783, LR: 0.0003 +[2026-02-28 22:33:30] (step=0016902) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 3.306984934455097, LR: 0.0003 +[2026-02-28 22:33:38] (step=0016903) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.3071805908824103, LR: 0.0003 +[2026-02-28 22:33:46] (step=0016904) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.307376247309724, LR: 0.0003 +[2026-02-28 22:33:54] (step=0016905) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.307571903737038, LR: 0.0003 +[2026-02-28 22:34:02] (step=0016906) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.3077675601643515, LR: 0.0003 +[2026-02-28 22:34:10] (step=0016907) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 3.307963216591665, LR: 0.0003 +[2026-02-28 22:34:18] (step=0016908) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 3.3081588730189786, LR: 0.0003 +[2026-02-28 22:34:25] (step=0016909) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.308354529446292, LR: 0.0003 +[2026-02-28 22:34:33] (step=0016910) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.308550185873606, LR: 0.0003 +[2026-02-28 22:34:41] (step=0016911) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 3.3087458423009197, LR: 0.0003 +[2026-02-28 22:34:49] (step=0016912) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.3089414987282333, LR: 0.0003 +[2026-02-28 22:34:57] (step=0016913) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.309137155155547, LR: 0.0003 +[2026-02-28 22:35:05] (step=0016914) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.3093328115828604, LR: 0.0003 +[2026-02-28 22:35:12] (step=0016915) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.309528468010174, LR: 0.0003 +[2026-02-28 22:35:20] (step=0016916) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 3.309724124437488, LR: 0.0003 +[2026-02-28 22:35:28] (step=0016917) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.3099197808648015, LR: 0.0003 +[2026-02-28 22:35:36] (step=0016918) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.310115437292115, LR: 0.0003 +[2026-02-28 22:35:44] (step=0016919) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.3103110937194287, LR: 0.0003 +[2026-02-28 22:35:52] (step=0016920) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.3105067501467422, LR: 0.0003 +[2026-02-28 22:36:00] (step=0016921) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.310702406574056, LR: 0.0003 +[2026-02-28 22:36:07] (step=0016922) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.31089806300137, LR: 0.0003 +[2026-02-28 22:36:15] (step=0016923) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 3.3110937194286834, LR: 0.0003 +[2026-02-28 22:36:23] (step=0016924) Train Loss: 0.4483, Train Steps/Sec: 0.12, Epoch: 3.311289375855997, LR: 0.0003 +[2026-02-28 22:36:31] (step=0016925) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.3114850322833105, LR: 0.0003 +[2026-02-28 22:36:39] (step=0016926) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.311680688710624, LR: 0.0003 +[2026-02-28 22:36:47] (step=0016927) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.3118763451379376, LR: 0.0003 +[2026-02-28 22:36:55] (step=0016928) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 3.3120720015652516, LR: 0.0003 +[2026-02-28 22:37:02] (step=0016929) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 3.312267657992565, LR: 0.0003 +[2026-02-28 22:37:10] (step=0016930) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.3124633144198787, LR: 0.0003 +[2026-02-28 22:37:18] (step=0016931) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.3126589708471923, LR: 0.0003 +[2026-02-28 22:37:26] (step=0016932) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.312854627274506, LR: 0.0003 +[2026-02-28 22:37:34] (step=0016933) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.3130502837018194, LR: 0.0003 +[2026-02-28 22:37:42] (step=0016934) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.3132459401291334, LR: 0.0003 +[2026-02-28 22:37:49] (step=0016935) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.313441596556447, LR: 0.0003 +[2026-02-28 22:37:57] (step=0016936) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.3136372529837606, LR: 0.0003 +[2026-02-28 22:38:05] (step=0016937) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.313832909411074, LR: 0.0003 +[2026-02-28 22:38:13] (step=0016938) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.3140285658383877, LR: 0.0003 +[2026-02-28 22:38:21] (step=0016939) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.3142242222657012, LR: 0.0003 +[2026-02-28 22:38:29] (step=0016940) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.3144198786930152, LR: 0.0003 +[2026-02-28 22:38:36] (step=0016941) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.314615535120329, LR: 0.0003 +[2026-02-28 22:38:44] (step=0016942) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 3.3148111915476424, LR: 0.0003 +[2026-02-28 22:38:52] (step=0016943) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.315006847974956, LR: 0.0003 +[2026-02-28 22:39:00] (step=0016944) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.3152025044022695, LR: 0.0003 +[2026-02-28 22:39:08] (step=0016945) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.315398160829583, LR: 0.0003 +[2026-02-28 22:39:16] (step=0016946) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.315593817256897, LR: 0.0003 +[2026-02-28 22:39:23] (step=0016947) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.3157894736842106, LR: 0.0003 +[2026-02-28 22:39:31] (step=0016948) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.315985130111524, LR: 0.0003 +[2026-02-28 22:39:39] (step=0016949) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 3.3161807865388377, LR: 0.0003 +[2026-02-28 22:39:47] (step=0016950) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.3163764429661513, LR: 0.0003 +[2026-02-28 22:39:55] (step=0016951) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.316572099393465, LR: 0.0003 +[2026-02-28 22:40:03] (step=0016952) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.316767755820779, LR: 0.0003 +[2026-02-28 22:40:11] (step=0016953) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.3169634122480924, LR: 0.0003 +[2026-02-28 22:40:18] (step=0016954) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 3.317159068675406, LR: 0.0003 +[2026-02-28 22:40:26] (step=0016955) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 3.3173547251027196, LR: 0.0003 +[2026-02-28 22:40:34] (step=0016956) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.317550381530033, LR: 0.0003 +[2026-02-28 22:40:42] (step=0016957) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.3177460379573467, LR: 0.0003 +[2026-02-28 22:40:50] (step=0016958) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.3179416943846607, LR: 0.0003 +[2026-02-28 22:40:58] (step=0016959) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 3.3181373508119743, LR: 0.0003 +[2026-02-28 22:41:05] (step=0016960) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.318333007239288, LR: 0.0003 +[2026-02-28 22:41:13] (step=0016961) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.3185286636666014, LR: 0.0003 +[2026-02-28 22:41:21] (step=0016962) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 3.318724320093915, LR: 0.0003 +[2026-02-28 22:41:29] (step=0016963) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.318919976521229, LR: 0.0003 +[2026-02-28 22:41:37] (step=0016964) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.3191156329485425, LR: 0.0003 +[2026-02-28 22:41:45] (step=0016965) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.319311289375856, LR: 0.0003 +[2026-02-28 22:41:53] (step=0016966) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.3195069458031696, LR: 0.0003 +[2026-02-28 22:42:00] (step=0016967) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.319702602230483, LR: 0.0003 +[2026-02-28 22:42:08] (step=0016968) Train Loss: 0.4710, Train Steps/Sec: 0.13, Epoch: 3.3198982586577968, LR: 0.0003 +[2026-02-28 22:42:16] (step=0016969) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.3200939150851108, LR: 0.0003 +[2026-02-28 22:42:24] (step=0016970) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 3.3202895715124243, LR: 0.0003 +[2026-02-28 22:42:32] (step=0016971) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.320485227939738, LR: 0.0003 +[2026-02-28 22:42:40] (step=0016972) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.3206808843670514, LR: 0.0003 +[2026-02-28 22:42:48] (step=0016973) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.320876540794365, LR: 0.0003 +[2026-02-28 22:42:55] (step=0016974) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.3210721972216786, LR: 0.0003 +[2026-02-28 22:43:03] (step=0016975) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.3212678536489926, LR: 0.0003 +[2026-02-28 22:43:11] (step=0016976) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.321463510076306, LR: 0.0003 +[2026-02-28 22:43:19] (step=0016977) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 3.3216591665036197, LR: 0.0003 +[2026-02-28 22:43:27] (step=0016978) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.3218548229309333, LR: 0.0003 +[2026-02-28 22:43:35] (step=0016979) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 3.322050479358247, LR: 0.0003 +[2026-02-28 22:43:43] (step=0016980) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.3222461357855604, LR: 0.0003 +[2026-02-28 22:43:50] (step=0016981) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.3224417922128744, LR: 0.0003 +[2026-02-28 22:43:58] (step=0016982) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.322637448640188, LR: 0.0003 +[2026-02-28 22:44:06] (step=0016983) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.3228331050675015, LR: 0.0003 +[2026-02-28 22:44:14] (step=0016984) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.323028761494815, LR: 0.0003 +[2026-02-28 22:44:22] (step=0016985) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.3232244179221286, LR: 0.0003 +[2026-02-28 22:44:30] (step=0016986) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.323420074349442, LR: 0.0003 +[2026-02-28 22:44:37] (step=0016987) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.323615730776756, LR: 0.0003 +[2026-02-28 22:44:45] (step=0016988) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 3.3238113872040698, LR: 0.0003 +[2026-02-28 22:44:53] (step=0016989) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 3.3240070436313833, LR: 0.0003 +[2026-02-28 22:45:01] (step=0016990) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 3.324202700058697, LR: 0.0003 +[2026-02-28 22:45:09] (step=0016991) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.3243983564860105, LR: 0.0003 +[2026-02-28 22:45:17] (step=0016992) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.324594012913324, LR: 0.0003 +[2026-02-28 22:45:24] (step=0016993) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 3.324789669340638, LR: 0.0003 +[2026-02-28 22:45:32] (step=0016994) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 3.3249853257679516, LR: 0.0003 +[2026-02-28 22:45:40] (step=0016995) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.325180982195265, LR: 0.0003 +[2026-02-28 22:45:48] (step=0016996) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.3253766386225787, LR: 0.0003 +[2026-02-28 22:45:56] (step=0016997) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.3255722950498923, LR: 0.0003 +[2026-02-28 22:46:04] (step=0016998) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.325767951477206, LR: 0.0003 +[2026-02-28 22:46:12] (step=0016999) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.32596360790452, LR: 0.0003 +[2026-02-28 22:46:19] (step=0017000) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.3261592643318334, LR: 0.0003 +[2026-02-28 22:46:19] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0017000/ +[2026-02-28 22:46:27] (step=0017001) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.326354920759147, LR: 0.0003 +[2026-02-28 22:46:35] (step=0017002) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.3265505771864605, LR: 0.0003 +[2026-02-28 22:46:43] (step=0017003) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.326746233613774, LR: 0.0003 +[2026-02-28 22:46:51] (step=0017004) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.3269418900410876, LR: 0.0003 +[2026-02-28 22:46:59] (step=0017005) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.3271375464684017, LR: 0.0003 +[2026-02-28 22:47:06] (step=0017006) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.327333202895715, LR: 0.0003 +[2026-02-28 22:47:14] (step=0017007) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 3.3275288593230288, LR: 0.0003 +[2026-02-28 22:47:22] (step=0017008) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.3277245157503423, LR: 0.0003 +[2026-02-28 22:47:30] (step=0017009) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.327920172177656, LR: 0.0003 +[2026-02-28 22:47:38] (step=0017010) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.3281158286049695, LR: 0.0003 +[2026-02-28 22:47:46] (step=0017011) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.3283114850322835, LR: 0.0003 +[2026-02-28 22:47:54] (step=0017012) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.328507141459597, LR: 0.0003 +[2026-02-28 22:48:01] (step=0017013) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.3287027978869106, LR: 0.0003 +[2026-02-28 22:48:09] (step=0017014) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.328898454314224, LR: 0.0003 +[2026-02-28 22:48:17] (step=0017015) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 3.3290941107415377, LR: 0.0003 +[2026-02-28 22:48:25] (step=0017016) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.3292897671688513, LR: 0.0003 +[2026-02-28 22:48:33] (step=0017017) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 3.3294854235961653, LR: 0.0003 +[2026-02-28 22:48:41] (step=0017018) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.329681080023479, LR: 0.0003 +[2026-02-28 22:48:49] (step=0017019) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.3298767364507924, LR: 0.0003 +[2026-02-28 22:48:56] (step=0017020) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.330072392878106, LR: 0.0003 +[2026-02-28 22:49:04] (step=0017021) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 3.3302680493054195, LR: 0.0003 +[2026-02-28 22:49:12] (step=0017022) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.3304637057327335, LR: 0.0003 +[2026-02-28 22:49:20] (step=0017023) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.330659362160047, LR: 0.0003 +[2026-02-28 22:49:28] (step=0017024) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.3308550185873607, LR: 0.0003 +[2026-02-28 22:49:36] (step=0017025) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.3310506750146742, LR: 0.0003 +[2026-02-28 22:49:43] (step=0017026) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.331246331441988, LR: 0.0003 +[2026-02-28 22:49:51] (step=0017027) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.3314419878693013, LR: 0.0003 +[2026-02-28 22:49:59] (step=0017028) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 3.3316376442966154, LR: 0.0003 +[2026-02-28 22:50:07] (step=0017029) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.331833300723929, LR: 0.0003 +[2026-02-28 22:50:15] (step=0017030) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.3320289571512425, LR: 0.0003 +[2026-02-28 22:50:23] (step=0017031) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.332224613578556, LR: 0.0003 +[2026-02-28 22:50:31] (step=0017032) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 3.3324202700058696, LR: 0.0003 +[2026-02-28 22:50:38] (step=0017033) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.332615926433183, LR: 0.0003 +[2026-02-28 22:50:46] (step=0017034) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 3.332811582860497, LR: 0.0003 +[2026-02-28 22:50:54] (step=0017035) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 3.3330072392878107, LR: 0.0003 +[2026-02-28 22:51:02] (step=0017036) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.3332028957151243, LR: 0.0003 +[2026-02-28 22:51:10] (step=0017037) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.333398552142438, LR: 0.0003 +[2026-02-28 22:51:18] (step=0017038) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.3335942085697514, LR: 0.0003 +[2026-02-28 22:51:25] (step=0017039) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.333789864997065, LR: 0.0003 +[2026-02-28 22:51:33] (step=0017040) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.333985521424379, LR: 0.0003 +[2026-02-28 22:51:41] (step=0017041) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 3.3341811778516925, LR: 0.0003 +[2026-02-28 22:51:49] (step=0017042) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.334376834279006, LR: 0.0003 +[2026-02-28 22:51:57] (step=0017043) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 3.3345724907063197, LR: 0.0003 +[2026-02-28 22:52:05] (step=0017044) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 3.3347681471336332, LR: 0.0003 +[2026-02-28 22:52:13] (step=0017045) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.334963803560947, LR: 0.0003 +[2026-02-28 22:52:20] (step=0017046) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.335159459988261, LR: 0.0003 +[2026-02-28 22:52:28] (step=0017047) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.3353551164155744, LR: 0.0003 +[2026-02-28 22:52:36] (step=0017048) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.335550772842888, LR: 0.0003 +[2026-02-28 22:52:44] (step=0017049) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.3357464292702015, LR: 0.0003 +[2026-02-28 22:52:52] (step=0017050) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 3.335942085697515, LR: 0.0003 +[2026-02-28 22:53:00] (step=0017051) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.3361377421248286, LR: 0.0003 +[2026-02-28 22:53:07] (step=0017052) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.3363333985521426, LR: 0.0003 +[2026-02-28 22:53:15] (step=0017053) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.336529054979456, LR: 0.0003 +[2026-02-28 22:53:23] (step=0017054) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 3.3367247114067697, LR: 0.0003 +[2026-02-28 22:53:31] (step=0017055) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.3369203678340833, LR: 0.0003 +[2026-02-28 22:53:39] (step=0017056) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.337116024261397, LR: 0.0003 +[2026-02-28 22:53:47] (step=0017057) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.3373116806887104, LR: 0.0003 +[2026-02-28 22:53:54] (step=0017058) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.3375073371160244, LR: 0.0003 +[2026-02-28 22:54:02] (step=0017059) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.337702993543338, LR: 0.0003 +[2026-02-28 22:54:10] (step=0017060) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 3.3378986499706516, LR: 0.0003 +[2026-02-28 22:54:18] (step=0017061) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.338094306397965, LR: 0.0003 +[2026-02-28 22:54:26] (step=0017062) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.3382899628252787, LR: 0.0003 +[2026-02-28 22:54:34] (step=0017063) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.3384856192525922, LR: 0.0003 +[2026-02-28 22:54:42] (step=0017064) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.3386812756799062, LR: 0.0003 +[2026-02-28 22:54:49] (step=0017065) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 3.33887693210722, LR: 0.0003 +[2026-02-28 22:54:57] (step=0017066) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.3390725885345334, LR: 0.0003 +[2026-02-28 22:55:05] (step=0017067) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.339268244961847, LR: 0.0003 +[2026-02-28 22:55:13] (step=0017068) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.3394639013891605, LR: 0.0003 +[2026-02-28 22:55:21] (step=0017069) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.339659557816474, LR: 0.0003 +[2026-02-28 22:55:29] (step=0017070) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.339855214243788, LR: 0.0003 +[2026-02-28 22:55:36] (step=0017071) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.3400508706711016, LR: 0.0003 +[2026-02-28 22:55:44] (step=0017072) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.340246527098415, LR: 0.0003 +[2026-02-28 22:55:52] (step=0017073) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.3404421835257287, LR: 0.0003 +[2026-02-28 22:56:00] (step=0017074) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.3406378399530423, LR: 0.0003 +[2026-02-28 22:56:08] (step=0017075) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.3408334963803563, LR: 0.0003 +[2026-02-28 22:56:16] (step=0017076) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 3.34102915280767, LR: 0.0003 +[2026-02-28 22:56:24] (step=0017077) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.3412248092349834, LR: 0.0003 +[2026-02-28 22:56:31] (step=0017078) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.341420465662297, LR: 0.0003 +[2026-02-28 22:56:39] (step=0017079) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 3.3416161220896106, LR: 0.0003 +[2026-02-28 22:56:47] (step=0017080) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.341811778516924, LR: 0.0003 +[2026-02-28 22:56:55] (step=0017081) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.342007434944238, LR: 0.0003 +[2026-02-28 22:57:03] (step=0017082) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.3422030913715517, LR: 0.0003 +[2026-02-28 22:57:11] (step=0017083) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.3423987477988653, LR: 0.0003 +[2026-02-28 22:57:19] (step=0017084) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 3.342594404226179, LR: 0.0003 +[2026-02-28 22:57:26] (step=0017085) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 3.3427900606534924, LR: 0.0003 +[2026-02-28 22:57:34] (step=0017086) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.342985717080806, LR: 0.0003 +[2026-02-28 22:57:42] (step=0017087) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.34318137350812, LR: 0.0003 +[2026-02-28 22:57:50] (step=0017088) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.3433770299354335, LR: 0.0003 +[2026-02-28 22:57:58] (step=0017089) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.343572686362747, LR: 0.0003 +[2026-02-28 22:58:06] (step=0017090) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.3437683427900606, LR: 0.0003 +[2026-02-28 22:58:13] (step=0017091) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.343963999217374, LR: 0.0003 +[2026-02-28 22:58:21] (step=0017092) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.3441596556446878, LR: 0.0003 +[2026-02-28 22:58:29] (step=0017093) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.3443553120720018, LR: 0.0003 +[2026-02-28 22:58:37] (step=0017094) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.3445509684993153, LR: 0.0003 +[2026-02-28 22:58:45] (step=0017095) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.344746624926629, LR: 0.0003 +[2026-02-28 22:58:53] (step=0017096) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 3.3449422813539424, LR: 0.0003 +[2026-02-28 22:59:00] (step=0017097) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.345137937781256, LR: 0.0003 +[2026-02-28 22:59:08] (step=0017098) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.3453335942085696, LR: 0.0003 +[2026-02-28 22:59:16] (step=0017099) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.3455292506358836, LR: 0.0003 +[2026-02-28 22:59:24] (step=0017100) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.345724907063197, LR: 0.0003 +[2026-02-28 22:59:32] (step=0017101) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.3459205634905107, LR: 0.0003 +[2026-02-28 22:59:40] (step=0017102) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 3.3461162199178243, LR: 0.0003 +[2026-02-28 22:59:47] (step=0017103) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.346311876345138, LR: 0.0003 +[2026-02-28 22:59:55] (step=0017104) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 3.3465075327724514, LR: 0.0003 +[2026-02-28 23:00:03] (step=0017105) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.3467031891997654, LR: 0.0003 +[2026-02-28 23:00:11] (step=0017106) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 3.346898845627079, LR: 0.0003 +[2026-02-28 23:00:19] (step=0017107) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.3470945020543925, LR: 0.0003 +[2026-02-28 23:00:27] (step=0017108) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.347290158481706, LR: 0.0003 +[2026-02-28 23:00:34] (step=0017109) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.3474858149090196, LR: 0.0003 +[2026-02-28 23:00:42] (step=0017110) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 3.347681471336333, LR: 0.0003 +[2026-02-28 23:00:50] (step=0017111) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.347877127763647, LR: 0.0003 +[2026-02-28 23:00:58] (step=0017112) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 3.3480727841909608, LR: 0.0003 +[2026-02-28 23:01:06] (step=0017113) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.3482684406182743, LR: 0.0003 +[2026-02-28 23:01:14] (step=0017114) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 3.348464097045588, LR: 0.0003 +[2026-02-28 23:01:21] (step=0017115) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.3486597534729015, LR: 0.0003 +[2026-02-28 23:01:29] (step=0017116) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.348855409900215, LR: 0.0003 +[2026-02-28 23:01:37] (step=0017117) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.349051066327529, LR: 0.0003 +[2026-02-28 23:01:45] (step=0017118) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 3.3492467227548426, LR: 0.0003 +[2026-02-28 23:01:53] (step=0017119) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.349442379182156, LR: 0.0003 +[2026-02-28 23:02:01] (step=0017120) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.3496380356094697, LR: 0.0003 +[2026-02-28 23:02:08] (step=0017121) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.3498336920367833, LR: 0.0003 +[2026-02-28 23:02:16] (step=0017122) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.350029348464097, LR: 0.0003 +[2026-02-28 23:02:24] (step=0017123) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.350225004891411, LR: 0.0003 +[2026-02-28 23:02:32] (step=0017124) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 3.3504206613187244, LR: 0.0003 +[2026-02-28 23:02:40] (step=0017125) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.350616317746038, LR: 0.0003 +[2026-02-28 23:02:47] (step=0017126) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.3508119741733515, LR: 0.0003 +[2026-02-28 23:02:55] (step=0017127) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.351007630600665, LR: 0.0003 +[2026-02-28 23:03:03] (step=0017128) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.3512032870279787, LR: 0.0003 +[2026-02-28 23:03:11] (step=0017129) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.3513989434552927, LR: 0.0003 +[2026-02-28 23:03:19] (step=0017130) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.351594599882606, LR: 0.0003 +[2026-02-28 23:03:27] (step=0017131) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.35179025630992, LR: 0.0003 +[2026-02-28 23:03:34] (step=0017132) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.3519859127372333, LR: 0.0003 +[2026-02-28 23:03:42] (step=0017133) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.352181569164547, LR: 0.0003 +[2026-02-28 23:03:50] (step=0017134) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.352377225591861, LR: 0.0003 +[2026-02-28 23:03:58] (step=0017135) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.3525728820191745, LR: 0.0003 +[2026-02-28 23:04:06] (step=0017136) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.352768538446488, LR: 0.0003 +[2026-02-28 23:04:14] (step=0017137) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.3529641948738016, LR: 0.0003 +[2026-02-28 23:04:21] (step=0017138) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.353159851301115, LR: 0.0003 +[2026-02-28 23:04:29] (step=0017139) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.3533555077284287, LR: 0.0003 +[2026-02-28 23:04:37] (step=0017140) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.3535511641557427, LR: 0.0003 +[2026-02-28 23:04:45] (step=0017141) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.3537468205830563, LR: 0.0003 +[2026-02-28 23:04:53] (step=0017142) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.35394247701037, LR: 0.0003 +[2026-02-28 23:05:00] (step=0017143) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.3541381334376834, LR: 0.0003 +[2026-02-28 23:05:08] (step=0017144) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.354333789864997, LR: 0.0003 +[2026-02-28 23:05:16] (step=0017145) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 3.3545294462923105, LR: 0.0003 +[2026-02-28 23:05:24] (step=0017146) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 3.3547251027196245, LR: 0.0003 +[2026-02-28 23:05:32] (step=0017147) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 3.354920759146938, LR: 0.0003 +[2026-02-28 23:05:40] (step=0017148) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.3551164155742517, LR: 0.0003 +[2026-02-28 23:05:47] (step=0017149) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 3.3553120720015652, LR: 0.0003 +[2026-02-28 23:05:55] (step=0017150) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.355507728428879, LR: 0.0003 +[2026-02-28 23:06:03] (step=0017151) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.3557033848561924, LR: 0.0003 +[2026-02-28 23:06:11] (step=0017152) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.3558990412835064, LR: 0.0003 +[2026-02-28 23:06:19] (step=0017153) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.35609469771082, LR: 0.0003 +[2026-02-28 23:06:27] (step=0017154) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.3562903541381335, LR: 0.0003 +[2026-02-28 23:06:34] (step=0017155) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 3.356486010565447, LR: 0.0003 +[2026-02-28 23:06:42] (step=0017156) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.3566816669927606, LR: 0.0003 +[2026-02-28 23:06:50] (step=0017157) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.356877323420074, LR: 0.0003 +[2026-02-28 23:06:58] (step=0017158) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.357072979847388, LR: 0.0003 +[2026-02-28 23:07:06] (step=0017159) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 3.3572686362747017, LR: 0.0003 +[2026-02-28 23:07:14] (step=0017160) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.3574642927020153, LR: 0.0003 +[2026-02-28 23:07:22] (step=0017161) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.357659949129329, LR: 0.0003 +[2026-02-28 23:07:29] (step=0017162) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 3.3578556055566424, LR: 0.0003 +[2026-02-28 23:07:37] (step=0017163) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.358051261983956, LR: 0.0003 +[2026-02-28 23:07:45] (step=0017164) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.35824691841127, LR: 0.0003 +[2026-02-28 23:07:53] (step=0017165) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.3584425748385835, LR: 0.0003 +[2026-02-28 23:08:01] (step=0017166) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.358638231265897, LR: 0.0003 +[2026-02-28 23:08:09] (step=0017167) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.3588338876932107, LR: 0.0003 +[2026-02-28 23:08:16] (step=0017168) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 3.3590295441205242, LR: 0.0003 +[2026-02-28 23:08:24] (step=0017169) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.359225200547838, LR: 0.0003 +[2026-02-28 23:08:32] (step=0017170) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 3.359420856975152, LR: 0.0003 +[2026-02-28 23:08:40] (step=0017171) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.3596165134024654, LR: 0.0003 +[2026-02-28 23:08:48] (step=0017172) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 3.359812169829779, LR: 0.0003 +[2026-02-28 23:08:56] (step=0017173) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.3600078262570925, LR: 0.0003 +[2026-02-28 23:09:03] (step=0017174) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 3.360203482684406, LR: 0.0003 +[2026-02-28 23:09:11] (step=0017175) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.3603991391117196, LR: 0.0003 +[2026-02-28 23:09:19] (step=0017176) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.3605947955390336, LR: 0.0003 +[2026-02-28 23:09:27] (step=0017177) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 3.360790451966347, LR: 0.0003 +[2026-02-28 23:09:35] (step=0017178) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.3609861083936607, LR: 0.0003 +[2026-02-28 23:09:43] (step=0017179) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.3611817648209743, LR: 0.0003 +[2026-02-28 23:09:50] (step=0017180) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.361377421248288, LR: 0.0003 +[2026-02-28 23:09:58] (step=0017181) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 3.3615730776756014, LR: 0.0003 +[2026-02-28 23:10:06] (step=0017182) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.3617687341029154, LR: 0.0003 +[2026-02-28 23:10:14] (step=0017183) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.361964390530229, LR: 0.0003 +[2026-02-28 23:10:22] (step=0017184) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.3621600469575426, LR: 0.0003 +[2026-02-28 23:10:30] (step=0017185) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.362355703384856, LR: 0.0003 +[2026-02-28 23:10:38] (step=0017186) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.3625513598121697, LR: 0.0003 +[2026-02-28 23:10:45] (step=0017187) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 3.3627470162394837, LR: 0.0003 +[2026-02-28 23:10:53] (step=0017188) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.3629426726667973, LR: 0.0003 +[2026-02-28 23:11:01] (step=0017189) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.363138329094111, LR: 0.0003 +[2026-02-28 23:11:09] (step=0017190) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 3.3633339855214244, LR: 0.0003 +[2026-02-28 23:11:17] (step=0017191) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 3.363529641948738, LR: 0.0003 +[2026-02-28 23:11:25] (step=0017192) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.3637252983760515, LR: 0.0003 +[2026-02-28 23:11:32] (step=0017193) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.3639209548033655, LR: 0.0003 +[2026-02-28 23:11:40] (step=0017194) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 3.364116611230679, LR: 0.0003 +[2026-02-28 23:11:48] (step=0017195) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.3643122676579926, LR: 0.0003 +[2026-02-28 23:11:56] (step=0017196) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.364507924085306, LR: 0.0003 +[2026-02-28 23:12:04] (step=0017197) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 3.3647035805126198, LR: 0.0003 +[2026-02-28 23:12:12] (step=0017198) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.3648992369399333, LR: 0.0003 +[2026-02-28 23:12:20] (step=0017199) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.3650948933672473, LR: 0.0003 +[2026-02-28 23:12:27] (step=0017200) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.365290549794561, LR: 0.0003 +[2026-02-28 23:12:35] (step=0017201) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 3.3654862062218744, LR: 0.0003 +[2026-02-28 23:12:43] (step=0017202) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.365681862649188, LR: 0.0003 +[2026-02-28 23:12:51] (step=0017203) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.3658775190765016, LR: 0.0003 +[2026-02-28 23:12:59] (step=0017204) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 3.366073175503815, LR: 0.0003 +[2026-02-28 23:13:07] (step=0017205) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.366268831931129, LR: 0.0003 +[2026-02-28 23:13:15] (step=0017206) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.3664644883584427, LR: 0.0003 +[2026-02-28 23:13:22] (step=0017207) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.3666601447857563, LR: 0.0003 +[2026-02-28 23:13:30] (step=0017208) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.36685580121307, LR: 0.0003 +[2026-02-28 23:13:38] (step=0017209) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.3670514576403834, LR: 0.0003 +[2026-02-28 23:13:46] (step=0017210) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.367247114067697, LR: 0.0003 +[2026-02-28 23:13:54] (step=0017211) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.367442770495011, LR: 0.0003 +[2026-02-28 23:14:02] (step=0017212) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.3676384269223245, LR: 0.0003 +[2026-02-28 23:14:09] (step=0017213) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.367834083349638, LR: 0.0003 +[2026-02-28 23:14:17] (step=0017214) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.3680297397769516, LR: 0.0003 +[2026-02-28 23:14:25] (step=0017215) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.368225396204265, LR: 0.0003 +[2026-02-28 23:14:33] (step=0017216) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.3684210526315788, LR: 0.0003 +[2026-02-28 23:14:41] (step=0017217) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.3686167090588928, LR: 0.0003 +[2026-02-28 23:14:49] (step=0017218) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 3.3688123654862063, LR: 0.0003 +[2026-02-28 23:14:56] (step=0017219) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.36900802191352, LR: 0.0003 +[2026-02-28 23:15:04] (step=0017220) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.3692036783408335, LR: 0.0003 +[2026-02-28 23:15:12] (step=0017221) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.369399334768147, LR: 0.0003 +[2026-02-28 23:15:20] (step=0017222) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 3.3695949911954606, LR: 0.0003 +[2026-02-28 23:15:28] (step=0017223) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.3697906476227746, LR: 0.0003 +[2026-02-28 23:15:36] (step=0017224) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.369986304050088, LR: 0.0003 +[2026-02-28 23:15:44] (step=0017225) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.3701819604774017, LR: 0.0003 +[2026-02-28 23:15:51] (step=0017226) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.3703776169047153, LR: 0.0003 +[2026-02-28 23:15:59] (step=0017227) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.370573273332029, LR: 0.0003 +[2026-02-28 23:16:07] (step=0017228) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 3.3707689297593424, LR: 0.0003 +[2026-02-28 23:16:15] (step=0017229) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.3709645861866564, LR: 0.0003 +[2026-02-28 23:16:23] (step=0017230) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 3.37116024261397, LR: 0.0003 +[2026-02-28 23:16:31] (step=0017231) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.3713558990412835, LR: 0.0003 +[2026-02-28 23:16:38] (step=0017232) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.371551555468597, LR: 0.0003 +[2026-02-28 23:16:46] (step=0017233) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.3717472118959106, LR: 0.0003 +[2026-02-28 23:16:54] (step=0017234) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.371942868323224, LR: 0.0003 +[2026-02-28 23:17:02] (step=0017235) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.372138524750538, LR: 0.0003 +[2026-02-28 23:17:10] (step=0017236) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.3723341811778518, LR: 0.0003 +[2026-02-28 23:17:18] (step=0017237) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.3725298376051653, LR: 0.0003 +[2026-02-28 23:17:26] (step=0017238) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.372725494032479, LR: 0.0003 +[2026-02-28 23:17:33] (step=0017239) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.3729211504597925, LR: 0.0003 +[2026-02-28 23:17:41] (step=0017240) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 3.373116806887106, LR: 0.0003 +[2026-02-28 23:17:49] (step=0017241) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.37331246331442, LR: 0.0003 +[2026-02-28 23:17:57] (step=0017242) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.3735081197417336, LR: 0.0003 +[2026-02-28 23:18:05] (step=0017243) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.373703776169047, LR: 0.0003 +[2026-02-28 23:18:13] (step=0017244) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.3738994325963607, LR: 0.0003 +[2026-02-28 23:18:21] (step=0017245) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 3.3740950890236743, LR: 0.0003 +[2026-02-28 23:18:28] (step=0017246) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 3.3742907454509883, LR: 0.0003 +[2026-02-28 23:18:36] (step=0017247) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.374486401878302, LR: 0.0003 +[2026-02-28 23:18:44] (step=0017248) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.3746820583056154, LR: 0.0003 +[2026-02-28 23:18:52] (step=0017249) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.374877714732929, LR: 0.0003 +[2026-02-28 23:19:00] (step=0017250) Train Loss: 0.4694, Train Steps/Sec: 0.13, Epoch: 3.3750733711602425, LR: 0.0003 +[2026-02-28 23:19:08] (step=0017251) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.375269027587556, LR: 0.0003 +[2026-02-28 23:19:15] (step=0017252) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.37546468401487, LR: 0.0003 +[2026-02-28 23:19:23] (step=0017253) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.3756603404421837, LR: 0.0003 +[2026-02-28 23:19:31] (step=0017254) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 3.375855996869497, LR: 0.0003 +[2026-02-28 23:19:39] (step=0017255) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 3.376051653296811, LR: 0.0003 +[2026-02-28 23:19:47] (step=0017256) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.3762473097241243, LR: 0.0003 +[2026-02-28 23:19:55] (step=0017257) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 3.376442966151438, LR: 0.0003 +[2026-02-28 23:20:02] (step=0017258) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.376638622578752, LR: 0.0003 +[2026-02-28 23:20:10] (step=0017259) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.3768342790060655, LR: 0.0003 +[2026-02-28 23:20:18] (step=0017260) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.377029935433379, LR: 0.0003 +[2026-02-28 23:20:26] (step=0017261) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 3.3772255918606926, LR: 0.0003 +[2026-02-28 23:20:34] (step=0017262) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.377421248288006, LR: 0.0003 +[2026-02-28 23:20:42] (step=0017263) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.3776169047153197, LR: 0.0003 +[2026-02-28 23:20:50] (step=0017264) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.3778125611426337, LR: 0.0003 +[2026-02-28 23:20:57] (step=0017265) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.3780082175699473, LR: 0.0003 +[2026-02-28 23:21:05] (step=0017266) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 3.378203873997261, LR: 0.0003 +[2026-02-28 23:21:13] (step=0017267) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 3.3783995304245744, LR: 0.0003 +[2026-02-28 23:21:21] (step=0017268) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 3.378595186851888, LR: 0.0003 +[2026-02-28 23:21:29] (step=0017269) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.3787908432792015, LR: 0.0003 +[2026-02-28 23:21:37] (step=0017270) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.3789864997065155, LR: 0.0003 +[2026-02-28 23:21:44] (step=0017271) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 3.379182156133829, LR: 0.0003 +[2026-02-28 23:21:52] (step=0017272) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 3.3793778125611427, LR: 0.0003 +[2026-02-28 23:22:00] (step=0017273) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.3795734689884562, LR: 0.0003 +[2026-02-28 23:22:08] (step=0017274) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.37976912541577, LR: 0.0003 +[2026-02-28 23:22:16] (step=0017275) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.3799647818430834, LR: 0.0003 +[2026-02-28 23:22:24] (step=0017276) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.3801604382703974, LR: 0.0003 +[2026-02-28 23:22:31] (step=0017277) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.380356094697711, LR: 0.0003 +[2026-02-28 23:22:39] (step=0017278) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.3805517511250245, LR: 0.0003 +[2026-02-28 23:22:47] (step=0017279) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.380747407552338, LR: 0.0003 +[2026-02-28 23:22:55] (step=0017280) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 3.3809430639796516, LR: 0.0003 +[2026-02-28 23:23:03] (step=0017281) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.381138720406965, LR: 0.0003 +[2026-02-28 23:23:11] (step=0017282) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.381334376834279, LR: 0.0003 +[2026-02-28 23:23:18] (step=0017283) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.3815300332615927, LR: 0.0003 +[2026-02-28 23:23:26] (step=0017284) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.3817256896889063, LR: 0.0003 +[2026-02-28 23:23:34] (step=0017285) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.38192134611622, LR: 0.0003 +[2026-02-28 23:23:42] (step=0017286) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.3821170025435334, LR: 0.0003 +[2026-02-28 23:23:50] (step=0017287) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 3.382312658970847, LR: 0.0003 +[2026-02-28 23:23:58] (step=0017288) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.382508315398161, LR: 0.0003 +[2026-02-28 23:24:06] (step=0017289) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 3.3827039718254746, LR: 0.0003 +[2026-02-28 23:24:13] (step=0017290) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.382899628252788, LR: 0.0003 +[2026-02-28 23:24:21] (step=0017291) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.3830952846801017, LR: 0.0003 +[2026-02-28 23:24:29] (step=0017292) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.3832909411074152, LR: 0.0003 +[2026-02-28 23:24:37] (step=0017293) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.383486597534729, LR: 0.0003 +[2026-02-28 23:24:45] (step=0017294) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 3.383682253962043, LR: 0.0003 +[2026-02-28 23:24:53] (step=0017295) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.3838779103893564, LR: 0.0003 +[2026-02-28 23:25:00] (step=0017296) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.38407356681667, LR: 0.0003 +[2026-02-28 23:25:08] (step=0017297) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.3842692232439835, LR: 0.0003 +[2026-02-28 23:25:16] (step=0017298) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.384464879671297, LR: 0.0003 +[2026-02-28 23:25:24] (step=0017299) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.384660536098611, LR: 0.0003 +[2026-02-28 23:25:32] (step=0017300) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 3.3848561925259246, LR: 0.0003 +[2026-02-28 23:25:40] (step=0017301) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.385051848953238, LR: 0.0003 +[2026-02-28 23:25:48] (step=0017302) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.3852475053805517, LR: 0.0003 +[2026-02-28 23:25:55] (step=0017303) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.3854431618078653, LR: 0.0003 +[2026-02-28 23:26:03] (step=0017304) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.385638818235179, LR: 0.0003 +[2026-02-28 23:26:11] (step=0017305) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 3.385834474662493, LR: 0.0003 +[2026-02-28 23:26:19] (step=0017306) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.3860301310898064, LR: 0.0003 +[2026-02-28 23:26:27] (step=0017307) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 3.38622578751712, LR: 0.0003 +[2026-02-28 23:26:34] (step=0017308) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.3864214439444336, LR: 0.0003 +[2026-02-28 23:26:42] (step=0017309) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.386617100371747, LR: 0.0003 +[2026-02-28 23:26:50] (step=0017310) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.3868127567990607, LR: 0.0003 +[2026-02-28 23:26:58] (step=0017311) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.3870084132263747, LR: 0.0003 +[2026-02-28 23:27:06] (step=0017312) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.3872040696536883, LR: 0.0003 +[2026-02-28 23:27:14] (step=0017313) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 3.387399726081002, LR: 0.0003 +[2026-02-28 23:27:22] (step=0017314) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.3875953825083154, LR: 0.0003 +[2026-02-28 23:27:29] (step=0017315) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 3.387791038935629, LR: 0.0003 +[2026-02-28 23:27:37] (step=0017316) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 3.3879866953629425, LR: 0.0003 +[2026-02-28 23:27:45] (step=0017317) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 3.3881823517902565, LR: 0.0003 +[2026-02-28 23:27:53] (step=0017318) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.38837800821757, LR: 0.0003 +[2026-02-28 23:28:01] (step=0017319) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.3885736646448836, LR: 0.0003 +[2026-02-28 23:28:09] (step=0017320) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.388769321072197, LR: 0.0003 +[2026-02-28 23:28:16] (step=0017321) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.3889649774995108, LR: 0.0003 +[2026-02-28 23:28:24] (step=0017322) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 3.3891606339268243, LR: 0.0003 +[2026-02-28 23:28:32] (step=0017323) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.3893562903541383, LR: 0.0003 +[2026-02-28 23:28:40] (step=0017324) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 3.389551946781452, LR: 0.0003 +[2026-02-28 23:28:48] (step=0017325) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.3897476032087654, LR: 0.0003 +[2026-02-28 23:28:56] (step=0017326) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.389943259636079, LR: 0.0003 +[2026-02-28 23:29:03] (step=0017327) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 3.3901389160633926, LR: 0.0003 +[2026-02-28 23:29:11] (step=0017328) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.390334572490706, LR: 0.0003 +[2026-02-28 23:29:19] (step=0017329) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.39053022891802, LR: 0.0003 +[2026-02-28 23:29:27] (step=0017330) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.3907258853453337, LR: 0.0003 +[2026-02-28 23:29:35] (step=0017331) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 3.3909215417726473, LR: 0.0003 +[2026-02-28 23:29:43] (step=0017332) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.391117198199961, LR: 0.0003 +[2026-02-28 23:29:51] (step=0017333) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.3913128546272744, LR: 0.0003 +[2026-02-28 23:29:58] (step=0017334) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.391508511054588, LR: 0.0003 +[2026-02-28 23:30:06] (step=0017335) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 3.391704167481902, LR: 0.0003 +[2026-02-28 23:30:14] (step=0017336) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 3.3918998239092155, LR: 0.0003 +[2026-02-28 23:30:22] (step=0017337) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.392095480336529, LR: 0.0003 +[2026-02-28 23:30:30] (step=0017338) Train Loss: 0.4713, Train Steps/Sec: 0.13, Epoch: 3.3922911367638426, LR: 0.0003 +[2026-02-28 23:30:38] (step=0017339) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 3.392486793191156, LR: 0.0003 +[2026-02-28 23:30:45] (step=0017340) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.3926824496184698, LR: 0.0003 +[2026-02-28 23:30:53] (step=0017341) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.3928781060457838, LR: 0.0003 +[2026-02-28 23:31:01] (step=0017342) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.3930737624730973, LR: 0.0003 +[2026-02-28 23:31:09] (step=0017343) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 3.393269418900411, LR: 0.0003 +[2026-02-28 23:31:17] (step=0017344) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.3934650753277245, LR: 0.0003 +[2026-02-28 23:31:25] (step=0017345) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.393660731755038, LR: 0.0003 +[2026-02-28 23:31:33] (step=0017346) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.3938563881823516, LR: 0.0003 +[2026-02-28 23:31:40] (step=0017347) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 3.3940520446096656, LR: 0.0003 +[2026-02-28 23:31:48] (step=0017348) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.394247701036979, LR: 0.0003 +[2026-02-28 23:31:56] (step=0017349) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.3944433574642927, LR: 0.0003 +[2026-02-28 23:32:04] (step=0017350) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.3946390138916063, LR: 0.0003 +[2026-02-28 23:32:12] (step=0017351) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 3.39483467031892, LR: 0.0003 +[2026-02-28 23:32:20] (step=0017352) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 3.3950303267462334, LR: 0.0003 +[2026-02-28 23:32:28] (step=0017353) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 3.3952259831735474, LR: 0.0003 +[2026-02-28 23:32:35] (step=0017354) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.395421639600861, LR: 0.0003 +[2026-02-28 23:32:43] (step=0017355) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.3956172960281745, LR: 0.0003 +[2026-02-28 23:32:51] (step=0017356) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.395812952455488, LR: 0.0003 +[2026-02-28 23:32:59] (step=0017357) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.3960086088828016, LR: 0.0003 +[2026-02-28 23:33:07] (step=0017358) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.3962042653101157, LR: 0.0003 +[2026-02-28 23:33:15] (step=0017359) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.396399921737429, LR: 0.0003 +[2026-02-28 23:33:22] (step=0017360) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.396595578164743, LR: 0.0003 +[2026-02-28 23:33:30] (step=0017361) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.3967912345920563, LR: 0.0003 +[2026-02-28 23:33:38] (step=0017362) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 3.39698689101937, LR: 0.0003 +[2026-02-28 23:33:46] (step=0017363) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.3971825474466835, LR: 0.0003 +[2026-02-28 23:33:54] (step=0017364) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.3973782038739975, LR: 0.0003 +[2026-02-28 23:34:02] (step=0017365) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 3.397573860301311, LR: 0.0003 +[2026-02-28 23:34:09] (step=0017366) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 3.3977695167286246, LR: 0.0003 +[2026-02-28 23:34:17] (step=0017367) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.397965173155938, LR: 0.0003 +[2026-02-28 23:34:25] (step=0017368) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.3981608295832517, LR: 0.0003 +[2026-02-28 23:34:33] (step=0017369) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.3983564860105653, LR: 0.0003 +[2026-02-28 23:34:41] (step=0017370) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.3985521424378793, LR: 0.0003 +[2026-02-28 23:34:49] (step=0017371) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.398747798865193, LR: 0.0003 +[2026-02-28 23:34:56] (step=0017372) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.3989434552925064, LR: 0.0003 +[2026-02-28 23:35:04] (step=0017373) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.39913911171982, LR: 0.0003 +[2026-02-28 23:35:12] (step=0017374) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.3993347681471335, LR: 0.0003 +[2026-02-28 23:35:20] (step=0017375) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.399530424574447, LR: 0.0003 +[2026-02-28 23:35:28] (step=0017376) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 3.399726081001761, LR: 0.0003 +[2026-02-28 23:35:36] (step=0017377) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.3999217374290747, LR: 0.0003 +[2026-02-28 23:35:43] (step=0017378) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.4001173938563882, LR: 0.0003 +[2026-02-28 23:35:51] (step=0017379) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 3.400313050283702, LR: 0.0003 +[2026-02-28 23:35:59] (step=0017380) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.4005087067110153, LR: 0.0003 +[2026-02-28 23:36:07] (step=0017381) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.400704363138329, LR: 0.0003 +[2026-02-28 23:36:15] (step=0017382) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.400900019565643, LR: 0.0003 +[2026-02-28 23:36:23] (step=0017383) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.4010956759929565, LR: 0.0003 +[2026-02-28 23:36:31] (step=0017384) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 3.40129133242027, LR: 0.0003 +[2026-02-28 23:36:38] (step=0017385) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.4014869888475836, LR: 0.0003 +[2026-02-28 23:36:46] (step=0017386) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.401682645274897, LR: 0.0003 +[2026-02-28 23:36:54] (step=0017387) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.4018783017022107, LR: 0.0003 +[2026-02-28 23:37:02] (step=0017388) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 3.4020739581295247, LR: 0.0003 +[2026-02-28 23:37:10] (step=0017389) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.4022696145568383, LR: 0.0003 +[2026-02-28 23:37:18] (step=0017390) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 3.402465270984152, LR: 0.0003 +[2026-02-28 23:37:25] (step=0017391) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.4026609274114654, LR: 0.0003 +[2026-02-28 23:37:33] (step=0017392) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.402856583838779, LR: 0.0003 +[2026-02-28 23:37:41] (step=0017393) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.4030522402660925, LR: 0.0003 +[2026-02-28 23:37:49] (step=0017394) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 3.4032478966934065, LR: 0.0003 +[2026-02-28 23:37:57] (step=0017395) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.40344355312072, LR: 0.0003 +[2026-02-28 23:38:05] (step=0017396) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.4036392095480337, LR: 0.0003 +[2026-02-28 23:38:13] (step=0017397) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.4038348659753472, LR: 0.0003 +[2026-02-28 23:38:21] (step=0017398) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 3.404030522402661, LR: 0.0003 +[2026-02-28 23:38:28] (step=0017399) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 3.4042261788299744, LR: 0.0003 +[2026-02-28 23:38:36] (step=0017400) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.4044218352572884, LR: 0.0003 +[2026-02-28 23:38:44] (step=0017401) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 3.404617491684602, LR: 0.0003 +[2026-02-28 23:38:52] (step=0017402) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.4048131481119155, LR: 0.0003 +[2026-02-28 23:39:00] (step=0017403) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.405008804539229, LR: 0.0003 +[2026-02-28 23:39:08] (step=0017404) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.4052044609665426, LR: 0.0003 +[2026-02-28 23:39:15] (step=0017405) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.405400117393856, LR: 0.0003 +[2026-02-28 23:39:23] (step=0017406) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.40559577382117, LR: 0.0003 +[2026-02-28 23:39:31] (step=0017407) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 3.4057914302484837, LR: 0.0003 +[2026-02-28 23:39:39] (step=0017408) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.4059870866757973, LR: 0.0003 +[2026-02-28 23:39:47] (step=0017409) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 3.406182743103111, LR: 0.0003 +[2026-02-28 23:39:55] (step=0017410) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.4063783995304244, LR: 0.0003 +[2026-02-28 23:40:02] (step=0017411) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 3.406574055957738, LR: 0.0003 +[2026-02-28 23:40:10] (step=0017412) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.406769712385052, LR: 0.0003 +[2026-02-28 23:40:18] (step=0017413) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.4069653688123656, LR: 0.0003 +[2026-02-28 23:40:26] (step=0017414) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 3.407161025239679, LR: 0.0003 +[2026-02-28 23:40:34] (step=0017415) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.4073566816669927, LR: 0.0003 +[2026-02-28 23:40:42] (step=0017416) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.4075523380943062, LR: 0.0003 +[2026-02-28 23:40:49] (step=0017417) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.4077479945216202, LR: 0.0003 +[2026-02-28 23:40:57] (step=0017418) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 3.407943650948934, LR: 0.0003 +[2026-02-28 23:41:05] (step=0017419) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.4081393073762474, LR: 0.0003 +[2026-02-28 23:41:13] (step=0017420) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.408334963803561, LR: 0.0003 +[2026-02-28 23:41:21] (step=0017421) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.4085306202308745, LR: 0.0003 +[2026-02-28 23:41:29] (step=0017422) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.408726276658188, LR: 0.0003 +[2026-02-28 23:41:36] (step=0017423) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 3.408921933085502, LR: 0.0003 +[2026-02-28 23:41:44] (step=0017424) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.4091175895128156, LR: 0.0003 +[2026-02-28 23:41:52] (step=0017425) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.409313245940129, LR: 0.0003 +[2026-02-28 23:42:00] (step=0017426) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 3.4095089023674428, LR: 0.0003 +[2026-02-28 23:42:08] (step=0017427) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.4097045587947563, LR: 0.0003 +[2026-02-28 23:42:16] (step=0017428) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.40990021522207, LR: 0.0003 +[2026-02-28 23:42:23] (step=0017429) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.410095871649384, LR: 0.0003 +[2026-02-28 23:42:31] (step=0017430) Train Loss: 0.4744, Train Steps/Sec: 0.13, Epoch: 3.4102915280766974, LR: 0.0003 +[2026-02-28 23:42:39] (step=0017431) Train Loss: 0.4594, Train Steps/Sec: 0.12, Epoch: 3.410487184504011, LR: 0.0003 +[2026-02-28 23:42:47] (step=0017432) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.4106828409313246, LR: 0.0003 +[2026-02-28 23:42:55] (step=0017433) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.410878497358638, LR: 0.0003 +[2026-02-28 23:43:03] (step=0017434) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.4110741537859517, LR: 0.0003 +[2026-02-28 23:43:11] (step=0017435) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.4112698102132657, LR: 0.0003 +[2026-02-28 23:43:19] (step=0017436) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.4114654666405793, LR: 0.0003 +[2026-02-28 23:43:26] (step=0017437) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 3.411661123067893, LR: 0.0003 +[2026-02-28 23:43:34] (step=0017438) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.4118567794952064, LR: 0.0003 +[2026-02-28 23:43:42] (step=0017439) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.41205243592252, LR: 0.0003 +[2026-02-28 23:43:50] (step=0017440) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.4122480923498335, LR: 0.0003 +[2026-02-28 23:43:58] (step=0017441) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.4124437487771475, LR: 0.0003 +[2026-02-28 23:44:06] (step=0017442) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.412639405204461, LR: 0.0003 +[2026-02-28 23:44:13] (step=0017443) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 3.4128350616317746, LR: 0.0003 +[2026-02-28 23:44:21] (step=0017444) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.413030718059088, LR: 0.0003 +[2026-02-28 23:44:29] (step=0017445) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.4132263744864018, LR: 0.0003 +[2026-02-28 23:44:37] (step=0017446) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.4134220309137153, LR: 0.0003 +[2026-02-28 23:44:45] (step=0017447) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 3.4136176873410293, LR: 0.0003 +[2026-02-28 23:44:53] (step=0017448) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.413813343768343, LR: 0.0003 +[2026-02-28 23:45:01] (step=0017449) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 3.4140090001956565, LR: 0.0003 +[2026-02-28 23:45:08] (step=0017450) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.41420465662297, LR: 0.0003 +[2026-02-28 23:45:16] (step=0017451) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.4144003130502836, LR: 0.0003 +[2026-02-28 23:45:24] (step=0017452) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 3.414595969477597, LR: 0.0003 +[2026-02-28 23:45:32] (step=0017453) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.414791625904911, LR: 0.0003 +[2026-02-28 23:45:40] (step=0017454) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.4149872823322247, LR: 0.0003 +[2026-02-28 23:45:48] (step=0017455) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.4151829387595383, LR: 0.0003 +[2026-02-28 23:45:55] (step=0017456) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.415378595186852, LR: 0.0003 +[2026-02-28 23:46:03] (step=0017457) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.4155742516141654, LR: 0.0003 +[2026-02-28 23:46:11] (step=0017458) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.415769908041479, LR: 0.0003 +[2026-02-28 23:46:19] (step=0017459) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 3.415965564468793, LR: 0.0003 +[2026-02-28 23:46:27] (step=0017460) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.4161612208961065, LR: 0.0003 +[2026-02-28 23:46:35] (step=0017461) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.41635687732342, LR: 0.0003 +[2026-02-28 23:46:43] (step=0017462) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 3.4165525337507336, LR: 0.0003 +[2026-02-28 23:46:50] (step=0017463) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 3.416748190178047, LR: 0.0003 +[2026-02-28 23:46:58] (step=0017464) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.4169438466053608, LR: 0.0003 +[2026-02-28 23:47:06] (step=0017465) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.4171395030326748, LR: 0.0003 +[2026-02-28 23:47:14] (step=0017466) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.4173351594599883, LR: 0.0003 +[2026-02-28 23:47:22] (step=0017467) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.417530815887302, LR: 0.0003 +[2026-02-28 23:47:30] (step=0017468) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 3.4177264723146155, LR: 0.0003 +[2026-02-28 23:47:37] (step=0017469) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.417922128741929, LR: 0.0003 +[2026-02-28 23:47:45] (step=0017470) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.418117785169243, LR: 0.0003 +[2026-02-28 23:47:53] (step=0017471) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.4183134415965566, LR: 0.0003 +[2026-02-28 23:48:01] (step=0017472) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.41850909802387, LR: 0.0003 +[2026-02-28 23:48:09] (step=0017473) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.4187047544511837, LR: 0.0003 +[2026-02-28 23:48:17] (step=0017474) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.4189004108784973, LR: 0.0003 +[2026-02-28 23:48:24] (step=0017475) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.419096067305811, LR: 0.0003 +[2026-02-28 23:48:32] (step=0017476) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 3.419291723733125, LR: 0.0003 +[2026-02-28 23:48:40] (step=0017477) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.4194873801604384, LR: 0.0003 +[2026-02-28 23:48:48] (step=0017478) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.419683036587752, LR: 0.0003 +[2026-02-28 23:48:56] (step=0017479) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.4198786930150655, LR: 0.0003 +[2026-02-28 23:49:04] (step=0017480) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.420074349442379, LR: 0.0003 +[2026-02-28 23:49:12] (step=0017481) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.4202700058696927, LR: 0.0003 +[2026-02-28 23:49:19] (step=0017482) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.4204656622970067, LR: 0.0003 +[2026-02-28 23:49:27] (step=0017483) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.42066131872432, LR: 0.0003 +[2026-02-28 23:49:35] (step=0017484) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.420856975151634, LR: 0.0003 +[2026-02-28 23:49:43] (step=0017485) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.4210526315789473, LR: 0.0003 +[2026-02-28 23:49:51] (step=0017486) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.421248288006261, LR: 0.0003 +[2026-02-28 23:49:58] (step=0017487) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 3.4214439444335745, LR: 0.0003 +[2026-02-28 23:50:06] (step=0017488) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.4216396008608885, LR: 0.0003 +[2026-02-28 23:50:14] (step=0017489) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.421835257288202, LR: 0.0003 +[2026-02-28 23:50:22] (step=0017490) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.4220309137155156, LR: 0.0003 +[2026-02-28 23:50:30] (step=0017491) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.422226570142829, LR: 0.0003 +[2026-02-28 23:50:38] (step=0017492) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.4224222265701427, LR: 0.0003 +[2026-02-28 23:50:45] (step=0017493) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.4226178829974563, LR: 0.0003 +[2026-02-28 23:50:53] (step=0017494) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.4228135394247703, LR: 0.0003 +[2026-02-28 23:51:01] (step=0017495) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.423009195852084, LR: 0.0003 +[2026-02-28 23:51:09] (step=0017496) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.4232048522793974, LR: 0.0003 +[2026-02-28 23:51:17] (step=0017497) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.423400508706711, LR: 0.0003 +[2026-02-28 23:51:25] (step=0017498) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 3.4235961651340245, LR: 0.0003 +[2026-02-28 23:51:33] (step=0017499) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.423791821561338, LR: 0.0003 +[2026-02-28 23:51:40] (step=0017500) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.423987477988652, LR: 0.0003 +[2026-02-28 23:51:40] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0017500/ +[2026-02-28 23:51:48] (step=0017501) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.4241831344159657, LR: 0.0003 +[2026-02-28 23:51:56] (step=0017502) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.4243787908432792, LR: 0.0003 +[2026-02-28 23:52:04] (step=0017503) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 3.424574447270593, LR: 0.0003 +[2026-02-28 23:52:12] (step=0017504) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.4247701036979064, LR: 0.0003 +[2026-02-28 23:52:20] (step=0017505) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.42496576012522, LR: 0.0003 +[2026-02-28 23:52:28] (step=0017506) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 3.425161416552534, LR: 0.0003 +[2026-02-28 23:52:35] (step=0017507) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 3.4253570729798475, LR: 0.0003 +[2026-02-28 23:52:43] (step=0017508) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 3.425552729407161, LR: 0.0003 +[2026-02-28 23:52:51] (step=0017509) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 3.4257483858344746, LR: 0.0003 +[2026-02-28 23:52:59] (step=0017510) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.425944042261788, LR: 0.0003 +[2026-02-28 23:53:07] (step=0017511) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.4261396986891017, LR: 0.0003 +[2026-02-28 23:53:15] (step=0017512) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.4263353551164157, LR: 0.0003 +[2026-02-28 23:53:22] (step=0017513) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.4265310115437293, LR: 0.0003 +[2026-02-28 23:53:30] (step=0017514) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.426726667971043, LR: 0.0003 +[2026-02-28 23:53:38] (step=0017515) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.4269223243983564, LR: 0.0003 +[2026-02-28 23:53:46] (step=0017516) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.42711798082567, LR: 0.0003 +[2026-02-28 23:53:54] (step=0017517) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.4273136372529835, LR: 0.0003 +[2026-02-28 23:54:02] (step=0017518) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.4275092936802976, LR: 0.0003 +[2026-02-28 23:54:09] (step=0017519) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.427704950107611, LR: 0.0003 +[2026-02-28 23:54:17] (step=0017520) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.4279006065349247, LR: 0.0003 +[2026-02-28 23:54:25] (step=0017521) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.4280962629622382, LR: 0.0003 +[2026-02-28 23:54:33] (step=0017522) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.428291919389552, LR: 0.0003 +[2026-02-28 23:54:41] (step=0017523) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.4284875758168654, LR: 0.0003 +[2026-02-28 23:54:49] (step=0017524) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.4286832322441794, LR: 0.0003 +[2026-02-28 23:54:56] (step=0017525) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.428878888671493, LR: 0.0003 +[2026-02-28 23:55:04] (step=0017526) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 3.4290745450988065, LR: 0.0003 +[2026-02-28 23:55:12] (step=0017527) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.42927020152612, LR: 0.0003 +[2026-02-28 23:55:20] (step=0017528) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.4294658579534336, LR: 0.0003 +[2026-02-28 23:55:28] (step=0017529) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.4296615143807476, LR: 0.0003 +[2026-02-28 23:55:36] (step=0017530) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.429857170808061, LR: 0.0003 +[2026-02-28 23:55:44] (step=0017531) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.4300528272353747, LR: 0.0003 +[2026-02-28 23:55:51] (step=0017532) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.4302484836626883, LR: 0.0003 +[2026-02-28 23:55:59] (step=0017533) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 3.430444140090002, LR: 0.0003 +[2026-02-28 23:56:07] (step=0017534) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.4306397965173154, LR: 0.0003 +[2026-02-28 23:56:15] (step=0017535) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.4308354529446294, LR: 0.0003 +[2026-02-28 23:56:23] (step=0017536) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.431031109371943, LR: 0.0003 +[2026-02-28 23:56:31] (step=0017537) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.4312267657992566, LR: 0.0003 +[2026-02-28 23:56:38] (step=0017538) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.43142242222657, LR: 0.0003 +[2026-02-28 23:56:46] (step=0017539) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 3.4316180786538837, LR: 0.0003 +[2026-02-28 23:56:54] (step=0017540) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.4318137350811972, LR: 0.0003 +[2026-02-28 23:57:02] (step=0017541) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 3.4320093915085113, LR: 0.0003 +[2026-02-28 23:57:10] (step=0017542) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.432205047935825, LR: 0.0003 +[2026-02-28 23:57:18] (step=0017543) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.4324007043631384, LR: 0.0003 +[2026-02-28 23:57:25] (step=0017544) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 3.432596360790452, LR: 0.0003 +[2026-02-28 23:57:33] (step=0017545) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.4327920172177655, LR: 0.0003 +[2026-02-28 23:57:41] (step=0017546) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 3.432987673645079, LR: 0.0003 +[2026-02-28 23:57:49] (step=0017547) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.433183330072393, LR: 0.0003 +[2026-02-28 23:57:57] (step=0017548) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.4333789864997066, LR: 0.0003 +[2026-02-28 23:58:05] (step=0017549) Train Loss: 0.4499, Train Steps/Sec: 0.12, Epoch: 3.43357464292702, LR: 0.0003 +[2026-02-28 23:58:13] (step=0017550) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.4337702993543338, LR: 0.0003 +[2026-02-28 23:58:20] (step=0017551) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.4339659557816473, LR: 0.0003 +[2026-02-28 23:58:28] (step=0017552) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.434161612208961, LR: 0.0003 +[2026-02-28 23:58:36] (step=0017553) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.434357268636275, LR: 0.0003 +[2026-02-28 23:58:44] (step=0017554) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 3.4345529250635884, LR: 0.0003 +[2026-02-28 23:58:52] (step=0017555) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.434748581490902, LR: 0.0003 +[2026-02-28 23:59:00] (step=0017556) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.4349442379182156, LR: 0.0003 +[2026-02-28 23:59:08] (step=0017557) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.435139894345529, LR: 0.0003 +[2026-02-28 23:59:15] (step=0017558) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 3.4353355507728427, LR: 0.0003 +[2026-02-28 23:59:23] (step=0017559) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 3.4355312072001567, LR: 0.0003 +[2026-02-28 23:59:31] (step=0017560) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.4357268636274703, LR: 0.0003 +[2026-02-28 23:59:39] (step=0017561) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.435922520054784, LR: 0.0003 +[2026-02-28 23:59:47] (step=0017562) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.4361181764820974, LR: 0.0003 +[2026-02-28 23:59:55] (step=0017563) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.436313832909411, LR: 0.0003 +[2026-03-01 00:00:02] (step=0017564) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.4365094893367245, LR: 0.0003 +[2026-03-01 00:00:10] (step=0017565) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.4367051457640385, LR: 0.0003 +[2026-03-01 00:00:18] (step=0017566) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 3.436900802191352, LR: 0.0003 +[2026-03-01 00:00:26] (step=0017567) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.4370964586186656, LR: 0.0003 +[2026-03-01 00:00:34] (step=0017568) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.437292115045979, LR: 0.0003 +[2026-03-01 00:00:42] (step=0017569) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.4374877714732928, LR: 0.0003 +[2026-03-01 00:00:49] (step=0017570) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 3.4376834279006063, LR: 0.0003 +[2026-03-01 00:00:57] (step=0017571) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.4378790843279203, LR: 0.0003 +[2026-03-01 00:01:05] (step=0017572) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.438074740755234, LR: 0.0003 +[2026-03-01 00:01:13] (step=0017573) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.4382703971825475, LR: 0.0003 +[2026-03-01 00:01:21] (step=0017574) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.438466053609861, LR: 0.0003 +[2026-03-01 00:01:29] (step=0017575) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.4386617100371746, LR: 0.0003 +[2026-03-01 00:01:37] (step=0017576) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.438857366464488, LR: 0.0003 +[2026-03-01 00:01:44] (step=0017577) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 3.439053022891802, LR: 0.0003 +[2026-03-01 00:01:52] (step=0017578) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 3.4392486793191157, LR: 0.0003 +[2026-03-01 00:02:00] (step=0017579) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.4394443357464293, LR: 0.0003 +[2026-03-01 00:02:08] (step=0017580) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.439639992173743, LR: 0.0003 +[2026-03-01 00:02:16] (step=0017581) Train Loss: 0.4688, Train Steps/Sec: 0.13, Epoch: 3.4398356486010564, LR: 0.0003 +[2026-03-01 00:02:24] (step=0017582) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 3.4400313050283704, LR: 0.0003 +[2026-03-01 00:02:31] (step=0017583) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.440226961455684, LR: 0.0003 +[2026-03-01 00:02:39] (step=0017584) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.4404226178829975, LR: 0.0003 +[2026-03-01 00:02:47] (step=0017585) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.440618274310311, LR: 0.0003 +[2026-03-01 00:02:55] (step=0017586) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.4408139307376246, LR: 0.0003 +[2026-03-01 00:03:03] (step=0017587) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 3.441009587164938, LR: 0.0003 +[2026-03-01 00:03:11] (step=0017588) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.441205243592252, LR: 0.0003 +[2026-03-01 00:03:19] (step=0017589) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 3.441400900019566, LR: 0.0003 +[2026-03-01 00:03:26] (step=0017590) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 3.4415965564468793, LR: 0.0003 +[2026-03-01 00:03:34] (step=0017591) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.441792212874193, LR: 0.0003 +[2026-03-01 00:03:42] (step=0017592) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 3.4419878693015065, LR: 0.0003 +[2026-03-01 00:03:50] (step=0017593) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.44218352572882, LR: 0.0003 +[2026-03-01 00:03:58] (step=0017594) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.442379182156134, LR: 0.0003 +[2026-03-01 00:04:06] (step=0017595) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.4425748385834476, LR: 0.0003 +[2026-03-01 00:04:13] (step=0017596) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.442770495010761, LR: 0.0003 +[2026-03-01 00:04:21] (step=0017597) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.4429661514380747, LR: 0.0003 +[2026-03-01 00:04:29] (step=0017598) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.4431618078653883, LR: 0.0003 +[2026-03-01 00:04:37] (step=0017599) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.443357464292702, LR: 0.0003 +[2026-03-01 00:04:45] (step=0017600) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.443553120720016, LR: 0.0003 +[2026-03-01 00:04:53] (step=0017601) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.4437487771473294, LR: 0.0003 +[2026-03-01 00:05:01] (step=0017602) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.443944433574643, LR: 0.0003 +[2026-03-01 00:05:08] (step=0017603) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 3.4441400900019565, LR: 0.0003 +[2026-03-01 00:05:16] (step=0017604) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 3.44433574642927, LR: 0.0003 +[2026-03-01 00:05:24] (step=0017605) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.4445314028565837, LR: 0.0003 +[2026-03-01 00:05:32] (step=0017606) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.4447270592838977, LR: 0.0003 +[2026-03-01 00:05:40] (step=0017607) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.4449227157112112, LR: 0.0003 +[2026-03-01 00:05:48] (step=0017608) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.445118372138525, LR: 0.0003 +[2026-03-01 00:05:55] (step=0017609) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.4453140285658383, LR: 0.0003 +[2026-03-01 00:06:03] (step=0017610) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.445509684993152, LR: 0.0003 +[2026-03-01 00:06:11] (step=0017611) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 3.4457053414204655, LR: 0.0003 +[2026-03-01 00:06:19] (step=0017612) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.4459009978477795, LR: 0.0003 +[2026-03-01 00:06:27] (step=0017613) Train Loss: 0.4211, Train Steps/Sec: 0.13, Epoch: 3.446096654275093, LR: 0.0003 +[2026-03-01 00:06:35] (step=0017614) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.4462923107024066, LR: 0.0003 +[2026-03-01 00:06:42] (step=0017615) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 3.44648796712972, LR: 0.0003 +[2026-03-01 00:06:50] (step=0017616) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.4466836235570337, LR: 0.0003 +[2026-03-01 00:06:58] (step=0017617) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 3.4468792799843473, LR: 0.0003 +[2026-03-01 00:07:06] (step=0017618) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.4470749364116613, LR: 0.0003 +[2026-03-01 00:07:14] (step=0017619) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.447270592838975, LR: 0.0003 +[2026-03-01 00:07:22] (step=0017620) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.4474662492662884, LR: 0.0003 +[2026-03-01 00:07:30] (step=0017621) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.447661905693602, LR: 0.0003 +[2026-03-01 00:07:38] (step=0017622) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.4478575621209155, LR: 0.0003 +[2026-03-01 00:07:45] (step=0017623) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 3.448053218548229, LR: 0.0003 +[2026-03-01 00:07:53] (step=0017624) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.448248874975543, LR: 0.0003 +[2026-03-01 00:08:01] (step=0017625) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 3.4484445314028567, LR: 0.0003 +[2026-03-01 00:08:09] (step=0017626) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 3.4486401878301702, LR: 0.0003 +[2026-03-01 00:08:17] (step=0017627) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.448835844257484, LR: 0.0003 +[2026-03-01 00:08:25] (step=0017628) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.4490315006847974, LR: 0.0003 +[2026-03-01 00:08:32] (step=0017629) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.449227157112111, LR: 0.0003 +[2026-03-01 00:08:40] (step=0017630) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.449422813539425, LR: 0.0003 +[2026-03-01 00:08:48] (step=0017631) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.4496184699667385, LR: 0.0003 +[2026-03-01 00:08:56] (step=0017632) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.449814126394052, LR: 0.0003 +[2026-03-01 00:09:04] (step=0017633) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.4500097828213656, LR: 0.0003 +[2026-03-01 00:09:12] (step=0017634) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 3.450205439248679, LR: 0.0003 +[2026-03-01 00:09:19] (step=0017635) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.4504010956759927, LR: 0.0003 +[2026-03-01 00:09:27] (step=0017636) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 3.4505967521033067, LR: 0.0003 +[2026-03-01 00:09:35] (step=0017637) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.4507924085306203, LR: 0.0003 +[2026-03-01 00:09:43] (step=0017638) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.450988064957934, LR: 0.0003 +[2026-03-01 00:09:51] (step=0017639) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.4511837213852474, LR: 0.0003 +[2026-03-01 00:09:59] (step=0017640) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.451379377812561, LR: 0.0003 +[2026-03-01 00:10:06] (step=0017641) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 3.451575034239875, LR: 0.0003 +[2026-03-01 00:10:14] (step=0017642) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.4517706906671886, LR: 0.0003 +[2026-03-01 00:10:22] (step=0017643) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.451966347094502, LR: 0.0003 +[2026-03-01 00:10:30] (step=0017644) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 3.4521620035218157, LR: 0.0003 +[2026-03-01 00:10:38] (step=0017645) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.4523576599491292, LR: 0.0003 +[2026-03-01 00:10:46] (step=0017646) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.452553316376443, LR: 0.0003 +[2026-03-01 00:10:54] (step=0017647) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.452748972803757, LR: 0.0003 +[2026-03-01 00:11:01] (step=0017648) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.4529446292310704, LR: 0.0003 +[2026-03-01 00:11:09] (step=0017649) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.453140285658384, LR: 0.0003 +[2026-03-01 00:11:17] (step=0017650) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 3.4533359420856975, LR: 0.0003 +[2026-03-01 00:11:25] (step=0017651) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 3.453531598513011, LR: 0.0003 +[2026-03-01 00:11:33] (step=0017652) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.4537272549403246, LR: 0.0003 +[2026-03-01 00:11:41] (step=0017653) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.4539229113676386, LR: 0.0003 +[2026-03-01 00:11:49] (step=0017654) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.454118567794952, LR: 0.0003 +[2026-03-01 00:11:56] (step=0017655) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.4543142242222658, LR: 0.0003 +[2026-03-01 00:12:04] (step=0017656) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.4545098806495793, LR: 0.0003 +[2026-03-01 00:12:12] (step=0017657) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.454705537076893, LR: 0.0003 +[2026-03-01 00:12:20] (step=0017658) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.4549011935042064, LR: 0.0003 +[2026-03-01 00:12:28] (step=0017659) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.4550968499315204, LR: 0.0003 +[2026-03-01 00:12:36] (step=0017660) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.455292506358834, LR: 0.0003 +[2026-03-01 00:12:43] (step=0017661) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.4554881627861476, LR: 0.0003 +[2026-03-01 00:12:51] (step=0017662) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.455683819213461, LR: 0.0003 +[2026-03-01 00:12:59] (step=0017663) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.4558794756407747, LR: 0.0003 +[2026-03-01 00:13:07] (step=0017664) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.4560751320680883, LR: 0.0003 +[2026-03-01 00:13:15] (step=0017665) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 3.4562707884954023, LR: 0.0003 +[2026-03-01 00:13:23] (step=0017666) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.456466444922716, LR: 0.0003 +[2026-03-01 00:13:31] (step=0017667) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.4566621013500294, LR: 0.0003 +[2026-03-01 00:13:38] (step=0017668) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.456857757777343, LR: 0.0003 +[2026-03-01 00:13:46] (step=0017669) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.4570534142046565, LR: 0.0003 +[2026-03-01 00:13:54] (step=0017670) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 3.45724907063197, LR: 0.0003 +[2026-03-01 00:14:02] (step=0017671) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.457444727059284, LR: 0.0003 +[2026-03-01 00:14:10] (step=0017672) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.4576403834865976, LR: 0.0003 +[2026-03-01 00:14:18] (step=0017673) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.457836039913911, LR: 0.0003 +[2026-03-01 00:14:25] (step=0017674) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.4580316963412248, LR: 0.0003 +[2026-03-01 00:14:33] (step=0017675) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 3.4582273527685383, LR: 0.0003 +[2026-03-01 00:14:41] (step=0017676) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.458423009195852, LR: 0.0003 +[2026-03-01 00:14:49] (step=0017677) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.458618665623166, LR: 0.0003 +[2026-03-01 00:14:57] (step=0017678) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 3.4588143220504795, LR: 0.0003 +[2026-03-01 00:15:05] (step=0017679) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.459009978477793, LR: 0.0003 +[2026-03-01 00:15:12] (step=0017680) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 3.4592056349051066, LR: 0.0003 +[2026-03-01 00:15:20] (step=0017681) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.45940129133242, LR: 0.0003 +[2026-03-01 00:15:28] (step=0017682) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 3.4595969477597337, LR: 0.0003 +[2026-03-01 00:15:36] (step=0017683) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.4597926041870477, LR: 0.0003 +[2026-03-01 00:15:44] (step=0017684) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.4599882606143613, LR: 0.0003 +[2026-03-01 00:15:52] (step=0017685) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.460183917041675, LR: 0.0003 +[2026-03-01 00:16:00] (step=0017686) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 3.4603795734689884, LR: 0.0003 +[2026-03-01 00:16:07] (step=0017687) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.460575229896302, LR: 0.0003 +[2026-03-01 00:16:15] (step=0017688) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.4607708863236155, LR: 0.0003 +[2026-03-01 00:16:23] (step=0017689) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.4609665427509295, LR: 0.0003 +[2026-03-01 00:16:31] (step=0017690) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.461162199178243, LR: 0.0003 +[2026-03-01 00:16:39] (step=0017691) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.4613578556055566, LR: 0.0003 +[2026-03-01 00:16:47] (step=0017692) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.46155351203287, LR: 0.0003 +[2026-03-01 00:16:54] (step=0017693) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 3.4617491684601838, LR: 0.0003 +[2026-03-01 00:17:02] (step=0017694) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.4619448248874978, LR: 0.0003 +[2026-03-01 00:17:10] (step=0017695) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.4621404813148113, LR: 0.0003 +[2026-03-01 00:17:18] (step=0017696) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 3.462336137742125, LR: 0.0003 +[2026-03-01 00:17:26] (step=0017697) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.4625317941694385, LR: 0.0003 +[2026-03-01 00:17:34] (step=0017698) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 3.462727450596752, LR: 0.0003 +[2026-03-01 00:17:42] (step=0017699) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.4629231070240656, LR: 0.0003 +[2026-03-01 00:17:49] (step=0017700) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 3.4631187634513796, LR: 0.0003 +[2026-03-01 00:17:57] (step=0017701) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.463314419878693, LR: 0.0003 +[2026-03-01 00:18:05] (step=0017702) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.4635100763060067, LR: 0.0003 +[2026-03-01 00:18:13] (step=0017703) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.4637057327333203, LR: 0.0003 +[2026-03-01 00:18:21] (step=0017704) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.463901389160634, LR: 0.0003 +[2026-03-01 00:18:29] (step=0017705) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.4640970455879474, LR: 0.0003 +[2026-03-01 00:18:36] (step=0017706) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.4642927020152614, LR: 0.0003 +[2026-03-01 00:18:44] (step=0017707) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.464488358442575, LR: 0.0003 +[2026-03-01 00:18:52] (step=0017708) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.4646840148698885, LR: 0.0003 +[2026-03-01 00:19:00] (step=0017709) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.464879671297202, LR: 0.0003 +[2026-03-01 00:19:08] (step=0017710) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.4650753277245157, LR: 0.0003 +[2026-03-01 00:19:16] (step=0017711) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.465270984151829, LR: 0.0003 +[2026-03-01 00:19:24] (step=0017712) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.465466640579143, LR: 0.0003 +[2026-03-01 00:19:31] (step=0017713) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.465662297006457, LR: 0.0003 +[2026-03-01 00:19:39] (step=0017714) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.4658579534337703, LR: 0.0003 +[2026-03-01 00:19:47] (step=0017715) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.466053609861084, LR: 0.0003 +[2026-03-01 00:19:55] (step=0017716) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.4662492662883975, LR: 0.0003 +[2026-03-01 00:20:03] (step=0017717) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.466444922715711, LR: 0.0003 +[2026-03-01 00:20:11] (step=0017718) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.466640579143025, LR: 0.0003 +[2026-03-01 00:20:18] (step=0017719) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 3.4668362355703386, LR: 0.0003 +[2026-03-01 00:20:26] (step=0017720) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.467031891997652, LR: 0.0003 +[2026-03-01 00:20:34] (step=0017721) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.4672275484249657, LR: 0.0003 +[2026-03-01 00:20:42] (step=0017722) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.4674232048522793, LR: 0.0003 +[2026-03-01 00:20:50] (step=0017723) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.467618861279593, LR: 0.0003 +[2026-03-01 00:20:58] (step=0017724) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.467814517706907, LR: 0.0003 +[2026-03-01 00:21:06] (step=0017725) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.4680101741342204, LR: 0.0003 +[2026-03-01 00:21:13] (step=0017726) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.468205830561534, LR: 0.0003 +[2026-03-01 00:21:21] (step=0017727) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.4684014869888475, LR: 0.0003 +[2026-03-01 00:21:29] (step=0017728) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 3.468597143416161, LR: 0.0003 +[2026-03-01 00:21:37] (step=0017729) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.4687927998434747, LR: 0.0003 +[2026-03-01 00:21:45] (step=0017730) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 3.4689884562707887, LR: 0.0003 +[2026-03-01 00:21:53] (step=0017731) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.4691841126981022, LR: 0.0003 +[2026-03-01 00:22:00] (step=0017732) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.469379769125416, LR: 0.0003 +[2026-03-01 00:22:08] (step=0017733) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.4695754255527294, LR: 0.0003 +[2026-03-01 00:22:16] (step=0017734) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.469771081980043, LR: 0.0003 +[2026-03-01 00:22:24] (step=0017735) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.4699667384073565, LR: 0.0003 +[2026-03-01 00:22:32] (step=0017736) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.4701623948346705, LR: 0.0003 +[2026-03-01 00:22:40] (step=0017737) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.470358051261984, LR: 0.0003 +[2026-03-01 00:22:47] (step=0017738) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 3.4705537076892976, LR: 0.0003 +[2026-03-01 00:22:55] (step=0017739) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.470749364116611, LR: 0.0003 +[2026-03-01 00:23:03] (step=0017740) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.4709450205439247, LR: 0.0003 +[2026-03-01 00:23:11] (step=0017741) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.4711406769712383, LR: 0.0003 +[2026-03-01 00:23:19] (step=0017742) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.4713363333985523, LR: 0.0003 +[2026-03-01 00:23:27] (step=0017743) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.471531989825866, LR: 0.0003 +[2026-03-01 00:23:34] (step=0017744) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.4717276462531794, LR: 0.0003 +[2026-03-01 00:23:42] (step=0017745) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 3.471923302680493, LR: 0.0003 +[2026-03-01 00:23:50] (step=0017746) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.4721189591078065, LR: 0.0003 +[2026-03-01 00:23:58] (step=0017747) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.47231461553512, LR: 0.0003 +[2026-03-01 00:24:06] (step=0017748) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.472510271962434, LR: 0.0003 +[2026-03-01 00:24:14] (step=0017749) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 3.4727059283897477, LR: 0.0003 +[2026-03-01 00:24:22] (step=0017750) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 3.4729015848170612, LR: 0.0003 +[2026-03-01 00:24:30] (step=0017751) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.473097241244375, LR: 0.0003 +[2026-03-01 00:24:37] (step=0017752) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.4732928976716884, LR: 0.0003 +[2026-03-01 00:24:45] (step=0017753) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.4734885540990024, LR: 0.0003 +[2026-03-01 00:24:53] (step=0017754) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.473684210526316, LR: 0.0003 +[2026-03-01 00:25:01] (step=0017755) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.4738798669536295, LR: 0.0003 +[2026-03-01 00:25:09] (step=0017756) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.474075523380943, LR: 0.0003 +[2026-03-01 00:25:17] (step=0017757) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.4742711798082566, LR: 0.0003 +[2026-03-01 00:25:24] (step=0017758) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.47446683623557, LR: 0.0003 +[2026-03-01 00:25:32] (step=0017759) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.474662492662884, LR: 0.0003 +[2026-03-01 00:25:40] (step=0017760) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 3.4748581490901977, LR: 0.0003 +[2026-03-01 00:25:48] (step=0017761) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.4750538055175113, LR: 0.0003 +[2026-03-01 00:25:56] (step=0017762) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.475249461944825, LR: 0.0003 +[2026-03-01 00:26:04] (step=0017763) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.4754451183721384, LR: 0.0003 +[2026-03-01 00:26:11] (step=0017764) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 3.475640774799452, LR: 0.0003 +[2026-03-01 00:26:19] (step=0017765) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.475836431226766, LR: 0.0003 +[2026-03-01 00:26:27] (step=0017766) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.4760320876540796, LR: 0.0003 +[2026-03-01 00:26:35] (step=0017767) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.476227744081393, LR: 0.0003 +[2026-03-01 00:26:43] (step=0017768) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.4764234005087067, LR: 0.0003 +[2026-03-01 00:26:51] (step=0017769) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.4766190569360202, LR: 0.0003 +[2026-03-01 00:26:59] (step=0017770) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.476814713363334, LR: 0.0003 +[2026-03-01 00:27:06] (step=0017771) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.477010369790648, LR: 0.0003 +[2026-03-01 00:27:14] (step=0017772) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.4772060262179614, LR: 0.0003 +[2026-03-01 00:27:22] (step=0017773) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.477401682645275, LR: 0.0003 +[2026-03-01 00:27:30] (step=0017774) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.4775973390725885, LR: 0.0003 +[2026-03-01 00:27:38] (step=0017775) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.477792995499902, LR: 0.0003 +[2026-03-01 00:27:46] (step=0017776) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 3.4779886519272156, LR: 0.0003 +[2026-03-01 00:27:54] (step=0017777) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 3.4781843083545296, LR: 0.0003 +[2026-03-01 00:28:01] (step=0017778) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.478379964781843, LR: 0.0003 +[2026-03-01 00:28:09] (step=0017779) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 3.4785756212091568, LR: 0.0003 +[2026-03-01 00:28:17] (step=0017780) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 3.4787712776364703, LR: 0.0003 +[2026-03-01 00:28:25] (step=0017781) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.478966934063784, LR: 0.0003 +[2026-03-01 00:28:33] (step=0017782) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.4791625904910974, LR: 0.0003 +[2026-03-01 00:28:41] (step=0017783) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.4793582469184114, LR: 0.0003 +[2026-03-01 00:28:48] (step=0017784) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 3.479553903345725, LR: 0.0003 +[2026-03-01 00:28:56] (step=0017785) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.4797495597730386, LR: 0.0003 +[2026-03-01 00:29:04] (step=0017786) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.479945216200352, LR: 0.0003 +[2026-03-01 00:29:12] (step=0017787) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.4801408726276657, LR: 0.0003 +[2026-03-01 00:29:20] (step=0017788) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 3.4803365290549793, LR: 0.0003 +[2026-03-01 00:29:28] (step=0017789) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.4805321854822933, LR: 0.0003 +[2026-03-01 00:29:35] (step=0017790) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 3.480727841909607, LR: 0.0003 +[2026-03-01 00:29:43] (step=0017791) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.4809234983369204, LR: 0.0003 +[2026-03-01 00:29:51] (step=0017792) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.481119154764234, LR: 0.0003 +[2026-03-01 00:29:59] (step=0017793) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.4813148111915475, LR: 0.0003 +[2026-03-01 00:30:07] (step=0017794) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 3.481510467618861, LR: 0.0003 +[2026-03-01 00:30:15] (step=0017795) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.481706124046175, LR: 0.0003 +[2026-03-01 00:30:22] (step=0017796) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 3.4819017804734886, LR: 0.0003 +[2026-03-01 00:30:30] (step=0017797) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.482097436900802, LR: 0.0003 +[2026-03-01 00:30:38] (step=0017798) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.4822930933281158, LR: 0.0003 +[2026-03-01 00:30:46] (step=0017799) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 3.4824887497554293, LR: 0.0003 +[2026-03-01 00:30:54] (step=0017800) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.482684406182743, LR: 0.0003 +[2026-03-01 00:31:02] (step=0017801) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 3.482880062610057, LR: 0.0003 +[2026-03-01 00:31:10] (step=0017802) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.4830757190373705, LR: 0.0003 +[2026-03-01 00:31:18] (step=0017803) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.483271375464684, LR: 0.0003 +[2026-03-01 00:31:25] (step=0017804) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.4834670318919976, LR: 0.0003 +[2026-03-01 00:31:33] (step=0017805) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.483662688319311, LR: 0.0003 +[2026-03-01 00:31:41] (step=0017806) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.483858344746625, LR: 0.0003 +[2026-03-01 00:31:49] (step=0017807) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.4840540011739387, LR: 0.0003 +[2026-03-01 00:31:57] (step=0017808) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.4842496576012523, LR: 0.0003 +[2026-03-01 00:32:05] (step=0017809) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.484445314028566, LR: 0.0003 +[2026-03-01 00:32:12] (step=0017810) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 3.4846409704558794, LR: 0.0003 +[2026-03-01 00:32:20] (step=0017811) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.484836626883193, LR: 0.0003 +[2026-03-01 00:32:28] (step=0017812) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.485032283310507, LR: 0.0003 +[2026-03-01 00:32:36] (step=0017813) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.4852279397378205, LR: 0.0003 +[2026-03-01 00:32:44] (step=0017814) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.485423596165134, LR: 0.0003 +[2026-03-01 00:32:52] (step=0017815) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.4856192525924476, LR: 0.0003 +[2026-03-01 00:33:00] (step=0017816) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.485814909019761, LR: 0.0003 +[2026-03-01 00:33:07] (step=0017817) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.4860105654470748, LR: 0.0003 +[2026-03-01 00:33:15] (step=0017818) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.4862062218743888, LR: 0.0003 +[2026-03-01 00:33:23] (step=0017819) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.4864018783017023, LR: 0.0003 +[2026-03-01 00:33:31] (step=0017820) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.486597534729016, LR: 0.0003 +[2026-03-01 00:33:39] (step=0017821) Train Loss: 0.4505, Train Steps/Sec: 0.12, Epoch: 3.4867931911563295, LR: 0.0003 +[2026-03-01 00:33:47] (step=0017822) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.486988847583643, LR: 0.0003 +[2026-03-01 00:33:55] (step=0017823) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.4871845040109566, LR: 0.0003 +[2026-03-01 00:34:02] (step=0017824) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.4873801604382706, LR: 0.0003 +[2026-03-01 00:34:10] (step=0017825) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 3.487575816865584, LR: 0.0003 +[2026-03-01 00:34:18] (step=0017826) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 3.4877714732928977, LR: 0.0003 +[2026-03-01 00:34:26] (step=0017827) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 3.4879671297202113, LR: 0.0003 +[2026-03-01 00:34:34] (step=0017828) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.488162786147525, LR: 0.0003 +[2026-03-01 00:34:42] (step=0017829) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.4883584425748384, LR: 0.0003 +[2026-03-01 00:34:50] (step=0017830) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.4885540990021524, LR: 0.0003 +[2026-03-01 00:34:57] (step=0017831) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.488749755429466, LR: 0.0003 +[2026-03-01 00:35:05] (step=0017832) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.4889454118567795, LR: 0.0003 +[2026-03-01 00:35:13] (step=0017833) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 3.489141068284093, LR: 0.0003 +[2026-03-01 00:35:21] (step=0017834) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.4893367247114067, LR: 0.0003 +[2026-03-01 00:35:29] (step=0017835) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.48953238113872, LR: 0.0003 +[2026-03-01 00:35:37] (step=0017836) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.4897280375660342, LR: 0.0003 +[2026-03-01 00:35:44] (step=0017837) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.489923693993348, LR: 0.0003 +[2026-03-01 00:35:52] (step=0017838) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 3.4901193504206613, LR: 0.0003 +[2026-03-01 00:36:00] (step=0017839) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.490315006847975, LR: 0.0003 +[2026-03-01 00:36:08] (step=0017840) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.4905106632752885, LR: 0.0003 +[2026-03-01 00:36:16] (step=0017841) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.490706319702602, LR: 0.0003 +[2026-03-01 00:36:24] (step=0017842) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.490901976129916, LR: 0.0003 +[2026-03-01 00:36:31] (step=0017843) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.4910976325572296, LR: 0.0003 +[2026-03-01 00:36:39] (step=0017844) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.491293288984543, LR: 0.0003 +[2026-03-01 00:36:47] (step=0017845) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.4914889454118567, LR: 0.0003 +[2026-03-01 00:36:55] (step=0017846) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.4916846018391703, LR: 0.0003 +[2026-03-01 00:37:03] (step=0017847) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 3.491880258266484, LR: 0.0003 +[2026-03-01 00:37:11] (step=0017848) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.492075914693798, LR: 0.0003 +[2026-03-01 00:37:18] (step=0017849) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.4922715711211114, LR: 0.0003 +[2026-03-01 00:37:26] (step=0017850) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.492467227548425, LR: 0.0003 +[2026-03-01 00:37:34] (step=0017851) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 3.4926628839757385, LR: 0.0003 +[2026-03-01 00:37:42] (step=0017852) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 3.492858540403052, LR: 0.0003 +[2026-03-01 00:37:50] (step=0017853) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.4930541968303657, LR: 0.0003 +[2026-03-01 00:37:58] (step=0017854) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.4932498532576797, LR: 0.0003 +[2026-03-01 00:38:06] (step=0017855) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.4934455096849932, LR: 0.0003 +[2026-03-01 00:38:13] (step=0017856) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 3.493641166112307, LR: 0.0003 +[2026-03-01 00:38:21] (step=0017857) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.4938368225396204, LR: 0.0003 +[2026-03-01 00:38:29] (step=0017858) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.494032478966934, LR: 0.0003 +[2026-03-01 00:38:37] (step=0017859) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.4942281353942475, LR: 0.0003 +[2026-03-01 00:38:45] (step=0017860) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.4944237918215615, LR: 0.0003 +[2026-03-01 00:38:53] (step=0017861) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.494619448248875, LR: 0.0003 +[2026-03-01 00:39:01] (step=0017862) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.4948151046761886, LR: 0.0003 +[2026-03-01 00:39:08] (step=0017863) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.495010761103502, LR: 0.0003 +[2026-03-01 00:39:16] (step=0017864) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.4952064175308157, LR: 0.0003 +[2026-03-01 00:39:24] (step=0017865) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.4954020739581297, LR: 0.0003 +[2026-03-01 00:39:32] (step=0017866) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.4955977303854433, LR: 0.0003 +[2026-03-01 00:39:40] (step=0017867) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.495793386812757, LR: 0.0003 +[2026-03-01 00:39:48] (step=0017868) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.4959890432400704, LR: 0.0003 +[2026-03-01 00:39:55] (step=0017869) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 3.496184699667384, LR: 0.0003 +[2026-03-01 00:40:03] (step=0017870) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.4963803560946976, LR: 0.0003 +[2026-03-01 00:40:11] (step=0017871) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.4965760125220116, LR: 0.0003 +[2026-03-01 00:40:19] (step=0017872) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.496771668949325, LR: 0.0003 +[2026-03-01 00:40:27] (step=0017873) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.4969673253766387, LR: 0.0003 +[2026-03-01 00:40:35] (step=0017874) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.4971629818039522, LR: 0.0003 +[2026-03-01 00:40:43] (step=0017875) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.497358638231266, LR: 0.0003 +[2026-03-01 00:40:50] (step=0017876) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.4975542946585794, LR: 0.0003 +[2026-03-01 00:40:58] (step=0017877) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.4977499510858934, LR: 0.0003 +[2026-03-01 00:41:06] (step=0017878) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.497945607513207, LR: 0.0003 +[2026-03-01 00:41:14] (step=0017879) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.4981412639405205, LR: 0.0003 +[2026-03-01 00:41:22] (step=0017880) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 3.498336920367834, LR: 0.0003 +[2026-03-01 00:41:30] (step=0017881) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.4985325767951476, LR: 0.0003 +[2026-03-01 00:41:37] (step=0017882) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.498728233222461, LR: 0.0003 +[2026-03-01 00:41:45] (step=0017883) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 3.498923889649775, LR: 0.0003 +[2026-03-01 00:41:53] (step=0017884) Train Loss: 0.4711, Train Steps/Sec: 0.13, Epoch: 3.4991195460770887, LR: 0.0003 +[2026-03-01 00:42:01] (step=0017885) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.4993152025044023, LR: 0.0003 +[2026-03-01 00:42:09] (step=0017886) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.499510858931716, LR: 0.0003 +[2026-03-01 00:42:17] (step=0017887) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.4997065153590294, LR: 0.0003 +[2026-03-01 00:42:24] (step=0017888) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.499902171786343, LR: 0.0003 +[2026-03-01 00:42:32] (step=0017889) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.500097828213657, LR: 0.0003 +[2026-03-01 00:42:40] (step=0017890) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 3.5002934846409706, LR: 0.0003 +[2026-03-01 00:42:48] (step=0017891) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.500489141068284, LR: 0.0003 +[2026-03-01 00:42:56] (step=0017892) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.5006847974955977, LR: 0.0003 +[2026-03-01 00:43:04] (step=0017893) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 3.5008804539229113, LR: 0.0003 +[2026-03-01 00:43:11] (step=0017894) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.501076110350225, LR: 0.0003 +[2026-03-01 00:43:19] (step=0017895) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 3.501271766777539, LR: 0.0003 +[2026-03-01 00:43:27] (step=0017896) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.5014674232048524, LR: 0.0003 +[2026-03-01 00:43:35] (step=0017897) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.501663079632166, LR: 0.0003 +[2026-03-01 00:43:43] (step=0017898) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.5018587360594795, LR: 0.0003 +[2026-03-01 00:43:51] (step=0017899) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.502054392486793, LR: 0.0003 +[2026-03-01 00:43:59] (step=0017900) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.5022500489141066, LR: 0.0003 +[2026-03-01 00:44:07] (step=0017901) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.5024457053414206, LR: 0.0003 +[2026-03-01 00:44:14] (step=0017902) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 3.502641361768734, LR: 0.0003 +[2026-03-01 00:44:22] (step=0017903) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.5028370181960478, LR: 0.0003 +[2026-03-01 00:44:30] (step=0017904) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.5030326746233613, LR: 0.0003 +[2026-03-01 00:44:38] (step=0017905) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.503228331050675, LR: 0.0003 +[2026-03-01 00:44:46] (step=0017906) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.5034239874779884, LR: 0.0003 +[2026-03-01 00:44:54] (step=0017907) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.5036196439053024, LR: 0.0003 +[2026-03-01 00:45:01] (step=0017908) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.503815300332616, LR: 0.0003 +[2026-03-01 00:45:09] (step=0017909) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 3.5040109567599296, LR: 0.0003 +[2026-03-01 00:45:17] (step=0017910) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.504206613187243, LR: 0.0003 +[2026-03-01 00:45:25] (step=0017911) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.5044022696145567, LR: 0.0003 +[2026-03-01 00:45:33] (step=0017912) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 3.5045979260418703, LR: 0.0003 +[2026-03-01 00:45:41] (step=0017913) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.5047935824691843, LR: 0.0003 +[2026-03-01 00:45:48] (step=0017914) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 3.504989238896498, LR: 0.0003 +[2026-03-01 00:45:56] (step=0017915) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.5051848953238114, LR: 0.0003 +[2026-03-01 00:46:04] (step=0017916) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.505380551751125, LR: 0.0003 +[2026-03-01 00:46:12] (step=0017917) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.5055762081784385, LR: 0.0003 +[2026-03-01 00:46:20] (step=0017918) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.5057718646057525, LR: 0.0003 +[2026-03-01 00:46:28] (step=0017919) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.505967521033066, LR: 0.0003 +[2026-03-01 00:46:35] (step=0017920) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.5061631774603796, LR: 0.0003 +[2026-03-01 00:46:43] (step=0017921) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.506358833887693, LR: 0.0003 +[2026-03-01 00:46:51] (step=0017922) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 3.5065544903150068, LR: 0.0003 +[2026-03-01 00:46:59] (step=0017923) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.5067501467423203, LR: 0.0003 +[2026-03-01 00:47:07] (step=0017924) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.5069458031696343, LR: 0.0003 +[2026-03-01 00:47:15] (step=0017925) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.507141459596948, LR: 0.0003 +[2026-03-01 00:47:23] (step=0017926) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.5073371160242615, LR: 0.0003 +[2026-03-01 00:47:31] (step=0017927) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.507532772451575, LR: 0.0003 +[2026-03-01 00:47:38] (step=0017928) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.5077284288788886, LR: 0.0003 +[2026-03-01 00:47:46] (step=0017929) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.507924085306202, LR: 0.0003 +[2026-03-01 00:47:54] (step=0017930) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.508119741733516, LR: 0.0003 +[2026-03-01 00:48:02] (step=0017931) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 3.5083153981608297, LR: 0.0003 +[2026-03-01 00:48:10] (step=0017932) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.5085110545881433, LR: 0.0003 +[2026-03-01 00:48:18] (step=0017933) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.508706711015457, LR: 0.0003 +[2026-03-01 00:48:25] (step=0017934) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.5089023674427704, LR: 0.0003 +[2026-03-01 00:48:33] (step=0017935) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.509098023870084, LR: 0.0003 +[2026-03-01 00:48:41] (step=0017936) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.509293680297398, LR: 0.0003 +[2026-03-01 00:48:49] (step=0017937) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.5094893367247115, LR: 0.0003 +[2026-03-01 00:48:57] (step=0017938) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.509684993152025, LR: 0.0003 +[2026-03-01 00:49:05] (step=0017939) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.5098806495793387, LR: 0.0003 +[2026-03-01 00:49:12] (step=0017940) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 3.510076306006652, LR: 0.0003 +[2026-03-01 00:49:20] (step=0017941) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.5102719624339658, LR: 0.0003 +[2026-03-01 00:49:28] (step=0017942) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.51046761886128, LR: 0.0003 +[2026-03-01 00:49:36] (step=0017943) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 3.5106632752885933, LR: 0.0003 +[2026-03-01 00:49:44] (step=0017944) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 3.510858931715907, LR: 0.0003 +[2026-03-01 00:49:52] (step=0017945) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 3.5110545881432205, LR: 0.0003 +[2026-03-01 00:49:59] (step=0017946) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.511250244570534, LR: 0.0003 +[2026-03-01 00:50:07] (step=0017947) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.5114459009978476, LR: 0.0003 +[2026-03-01 00:50:15] (step=0017948) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.5116415574251616, LR: 0.0003 +[2026-03-01 00:50:23] (step=0017949) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.511837213852475, LR: 0.0003 +[2026-03-01 00:50:31] (step=0017950) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.5120328702797887, LR: 0.0003 +[2026-03-01 00:50:39] (step=0017951) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 3.5122285267071023, LR: 0.0003 +[2026-03-01 00:50:47] (step=0017952) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.512424183134416, LR: 0.0003 +[2026-03-01 00:50:54] (step=0017953) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.5126198395617294, LR: 0.0003 +[2026-03-01 00:51:02] (step=0017954) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.5128154959890434, LR: 0.0003 +[2026-03-01 00:51:10] (step=0017955) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.513011152416357, LR: 0.0003 +[2026-03-01 00:51:18] (step=0017956) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.5132068088436705, LR: 0.0003 +[2026-03-01 00:51:26] (step=0017957) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.513402465270984, LR: 0.0003 +[2026-03-01 00:51:34] (step=0017958) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.5135981216982977, LR: 0.0003 +[2026-03-01 00:51:42] (step=0017959) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.5137937781256112, LR: 0.0003 +[2026-03-01 00:51:49] (step=0017960) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.5139894345529252, LR: 0.0003 +[2026-03-01 00:51:57] (step=0017961) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.514185090980239, LR: 0.0003 +[2026-03-01 00:52:05] (step=0017962) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 3.5143807474075524, LR: 0.0003 +[2026-03-01 00:52:13] (step=0017963) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 3.514576403834866, LR: 0.0003 +[2026-03-01 00:52:21] (step=0017964) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 3.5147720602621795, LR: 0.0003 +[2026-03-01 00:52:29] (step=0017965) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.514967716689493, LR: 0.0003 +[2026-03-01 00:52:36] (step=0017966) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.515163373116807, LR: 0.0003 +[2026-03-01 00:52:44] (step=0017967) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.5153590295441206, LR: 0.0003 +[2026-03-01 00:52:52] (step=0017968) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.515554685971434, LR: 0.0003 +[2026-03-01 00:53:00] (step=0017969) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.5157503423987477, LR: 0.0003 +[2026-03-01 00:53:08] (step=0017970) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.5159459988260613, LR: 0.0003 +[2026-03-01 00:53:16] (step=0017971) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.516141655253375, LR: 0.0003 +[2026-03-01 00:53:23] (step=0017972) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.516337311680689, LR: 0.0003 +[2026-03-01 00:53:31] (step=0017973) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 3.5165329681080024, LR: 0.0003 +[2026-03-01 00:53:39] (step=0017974) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 3.516728624535316, LR: 0.0003 +[2026-03-01 00:53:47] (step=0017975) Train Loss: 0.4504, Train Steps/Sec: 0.12, Epoch: 3.5169242809626295, LR: 0.0003 +[2026-03-01 00:53:55] (step=0017976) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.517119937389943, LR: 0.0003 +[2026-03-01 00:54:03] (step=0017977) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.517315593817257, LR: 0.0003 +[2026-03-01 00:54:11] (step=0017978) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 3.5175112502445707, LR: 0.0003 +[2026-03-01 00:54:19] (step=0017979) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.5177069066718842, LR: 0.0003 +[2026-03-01 00:54:26] (step=0017980) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.517902563099198, LR: 0.0003 +[2026-03-01 00:54:34] (step=0017981) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.5180982195265114, LR: 0.0003 +[2026-03-01 00:54:42] (step=0017982) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 3.518293875953825, LR: 0.0003 +[2026-03-01 00:54:50] (step=0017983) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 3.518489532381139, LR: 0.0003 +[2026-03-01 00:54:58] (step=0017984) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 3.5186851888084525, LR: 0.0003 +[2026-03-01 00:55:06] (step=0017985) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 3.518880845235766, LR: 0.0003 +[2026-03-01 00:55:13] (step=0017986) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 3.5190765016630796, LR: 0.0003 +[2026-03-01 00:55:21] (step=0017987) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.519272158090393, LR: 0.0003 +[2026-03-01 00:55:29] (step=0017988) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.5194678145177067, LR: 0.0003 +[2026-03-01 00:55:37] (step=0017989) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.5196634709450207, LR: 0.0003 +[2026-03-01 00:55:45] (step=0017990) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.5198591273723343, LR: 0.0003 +[2026-03-01 00:55:53] (step=0017991) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.520054783799648, LR: 0.0003 +[2026-03-01 00:56:00] (step=0017992) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.5202504402269614, LR: 0.0003 +[2026-03-01 00:56:08] (step=0017993) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 3.520446096654275, LR: 0.0003 +[2026-03-01 00:56:16] (step=0017994) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.5206417530815886, LR: 0.0003 +[2026-03-01 00:56:24] (step=0017995) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 3.5208374095089026, LR: 0.0003 +[2026-03-01 00:56:32] (step=0017996) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 3.521033065936216, LR: 0.0003 +[2026-03-01 00:56:40] (step=0017997) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 3.5212287223635297, LR: 0.0003 +[2026-03-01 00:56:48] (step=0017998) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.5214243787908432, LR: 0.0003 +[2026-03-01 00:56:55] (step=0017999) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.521620035218157, LR: 0.0003 +[2026-03-01 00:57:03] (step=0018000) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.5218156916454704, LR: 0.0003 +[2026-03-01 00:57:03] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0018000/ +[2026-03-01 00:57:11] (step=0018001) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 3.5220113480727844, LR: 0.0003 +[2026-03-01 00:57:19] (step=0018002) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.522207004500098, LR: 0.0003 +[2026-03-01 00:57:27] (step=0018003) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.5224026609274115, LR: 0.0003 +[2026-03-01 00:57:35] (step=0018004) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.522598317354725, LR: 0.0003 +[2026-03-01 00:57:43] (step=0018005) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.5227939737820386, LR: 0.0003 +[2026-03-01 00:57:50] (step=0018006) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.522989630209352, LR: 0.0003 +[2026-03-01 00:57:58] (step=0018007) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.523185286636666, LR: 0.0003 +[2026-03-01 00:58:06] (step=0018008) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.5233809430639798, LR: 0.0003 +[2026-03-01 00:58:14] (step=0018009) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.5235765994912933, LR: 0.0003 +[2026-03-01 00:58:22] (step=0018010) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.523772255918607, LR: 0.0003 +[2026-03-01 00:58:30] (step=0018011) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.5239679123459204, LR: 0.0003 +[2026-03-01 00:58:37] (step=0018012) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.524163568773234, LR: 0.0003 +[2026-03-01 00:58:45] (step=0018013) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 3.524359225200548, LR: 0.0003 +[2026-03-01 00:58:53] (step=0018014) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.5245548816278616, LR: 0.0003 +[2026-03-01 00:59:01] (step=0018015) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.524750538055175, LR: 0.0003 +[2026-03-01 00:59:09] (step=0018016) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 3.5249461944824887, LR: 0.0003 +[2026-03-01 00:59:17] (step=0018017) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.5251418509098023, LR: 0.0003 +[2026-03-01 00:59:24] (step=0018018) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.525337507337116, LR: 0.0003 +[2026-03-01 00:59:32] (step=0018019) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.52553316376443, LR: 0.0003 +[2026-03-01 00:59:40] (step=0018020) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.5257288201917434, LR: 0.0003 +[2026-03-01 00:59:48] (step=0018021) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 3.525924476619057, LR: 0.0003 +[2026-03-01 00:59:56] (step=0018022) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.5261201330463705, LR: 0.0003 +[2026-03-01 01:00:04] (step=0018023) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.526315789473684, LR: 0.0003 +[2026-03-01 01:00:12] (step=0018024) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.5265114459009976, LR: 0.0003 +[2026-03-01 01:00:20] (step=0018025) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 3.5267071023283116, LR: 0.0003 +[2026-03-01 01:00:27] (step=0018026) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.526902758755625, LR: 0.0003 +[2026-03-01 01:00:35] (step=0018027) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.5270984151829388, LR: 0.0003 +[2026-03-01 01:00:43] (step=0018028) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 3.5272940716102523, LR: 0.0003 +[2026-03-01 01:00:51] (step=0018029) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.527489728037566, LR: 0.0003 +[2026-03-01 01:00:59] (step=0018030) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.52768538446488, LR: 0.0003 +[2026-03-01 01:01:07] (step=0018031) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.5278810408921935, LR: 0.0003 +[2026-03-01 01:01:14] (step=0018032) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.528076697319507, LR: 0.0003 +[2026-03-01 01:01:22] (step=0018033) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 3.5282723537468206, LR: 0.0003 +[2026-03-01 01:01:30] (step=0018034) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.528468010174134, LR: 0.0003 +[2026-03-01 01:01:38] (step=0018035) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.5286636666014477, LR: 0.0003 +[2026-03-01 01:01:46] (step=0018036) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.5288593230287617, LR: 0.0003 +[2026-03-01 01:01:54] (step=0018037) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.5290549794560753, LR: 0.0003 +[2026-03-01 01:02:02] (step=0018038) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.529250635883389, LR: 0.0003 +[2026-03-01 01:02:09] (step=0018039) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.5294462923107024, LR: 0.0003 +[2026-03-01 01:02:17] (step=0018040) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.529641948738016, LR: 0.0003 +[2026-03-01 01:02:25] (step=0018041) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.5298376051653295, LR: 0.0003 +[2026-03-01 01:02:33] (step=0018042) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.5300332615926435, LR: 0.0003 +[2026-03-01 01:02:41] (step=0018043) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 3.530228918019957, LR: 0.0003 +[2026-03-01 01:02:49] (step=0018044) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.5304245744472706, LR: 0.0003 +[2026-03-01 01:02:57] (step=0018045) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 3.530620230874584, LR: 0.0003 +[2026-03-01 01:03:04] (step=0018046) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.5308158873018978, LR: 0.0003 +[2026-03-01 01:03:12] (step=0018047) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.5310115437292113, LR: 0.0003 +[2026-03-01 01:03:20] (step=0018048) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.5312072001565253, LR: 0.0003 +[2026-03-01 01:03:28] (step=0018049) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.531402856583839, LR: 0.0003 +[2026-03-01 01:03:36] (step=0018050) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.5315985130111525, LR: 0.0003 +[2026-03-01 01:03:44] (step=0018051) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.531794169438466, LR: 0.0003 +[2026-03-01 01:03:51] (step=0018052) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.5319898258657796, LR: 0.0003 +[2026-03-01 01:03:59] (step=0018053) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.532185482293093, LR: 0.0003 +[2026-03-01 01:04:07] (step=0018054) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 3.532381138720407, LR: 0.0003 +[2026-03-01 01:04:15] (step=0018055) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.5325767951477207, LR: 0.0003 +[2026-03-01 01:04:23] (step=0018056) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.5327724515750343, LR: 0.0003 +[2026-03-01 01:04:31] (step=0018057) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.532968108002348, LR: 0.0003 +[2026-03-01 01:04:39] (step=0018058) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 3.5331637644296614, LR: 0.0003 +[2026-03-01 01:04:46] (step=0018059) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.533359420856975, LR: 0.0003 +[2026-03-01 01:04:54] (step=0018060) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.533555077284289, LR: 0.0003 +[2026-03-01 01:05:02] (step=0018061) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.5337507337116025, LR: 0.0003 +[2026-03-01 01:05:10] (step=0018062) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.533946390138916, LR: 0.0003 +[2026-03-01 01:05:18] (step=0018063) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.5341420465662297, LR: 0.0003 +[2026-03-01 01:05:26] (step=0018064) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.534337702993543, LR: 0.0003 +[2026-03-01 01:05:33] (step=0018065) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 3.534533359420857, LR: 0.0003 +[2026-03-01 01:05:41] (step=0018066) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.534729015848171, LR: 0.0003 +[2026-03-01 01:05:49] (step=0018067) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.5349246722754843, LR: 0.0003 +[2026-03-01 01:05:57] (step=0018068) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.535120328702798, LR: 0.0003 +[2026-03-01 01:06:05] (step=0018069) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.5353159851301115, LR: 0.0003 +[2026-03-01 01:06:13] (step=0018070) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.535511641557425, LR: 0.0003 +[2026-03-01 01:06:21] (step=0018071) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 3.5357072979847386, LR: 0.0003 +[2026-03-01 01:06:28] (step=0018072) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.5359029544120526, LR: 0.0003 +[2026-03-01 01:06:36] (step=0018073) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.536098610839366, LR: 0.0003 +[2026-03-01 01:06:44] (step=0018074) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.5362942672666797, LR: 0.0003 +[2026-03-01 01:06:52] (step=0018075) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 3.5364899236939933, LR: 0.0003 +[2026-03-01 01:07:00] (step=0018076) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 3.536685580121307, LR: 0.0003 +[2026-03-01 01:07:08] (step=0018077) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.5368812365486204, LR: 0.0003 +[2026-03-01 01:07:16] (step=0018078) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.5370768929759344, LR: 0.0003 +[2026-03-01 01:07:23] (step=0018079) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.537272549403248, LR: 0.0003 +[2026-03-01 01:07:31] (step=0018080) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 3.5374682058305615, LR: 0.0003 +[2026-03-01 01:07:39] (step=0018081) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.537663862257875, LR: 0.0003 +[2026-03-01 01:07:47] (step=0018082) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 3.5378595186851887, LR: 0.0003 +[2026-03-01 01:07:55] (step=0018083) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 3.5380551751125022, LR: 0.0003 +[2026-03-01 01:08:03] (step=0018084) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.5382508315398162, LR: 0.0003 +[2026-03-01 01:08:10] (step=0018085) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 3.53844648796713, LR: 0.0003 +[2026-03-01 01:08:18] (step=0018086) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.5386421443944434, LR: 0.0003 +[2026-03-01 01:08:26] (step=0018087) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.538837800821757, LR: 0.0003 +[2026-03-01 01:08:34] (step=0018088) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.5390334572490705, LR: 0.0003 +[2026-03-01 01:08:42] (step=0018089) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.5392291136763845, LR: 0.0003 +[2026-03-01 01:08:50] (step=0018090) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.539424770103698, LR: 0.0003 +[2026-03-01 01:08:58] (step=0018091) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 3.5396204265310116, LR: 0.0003 +[2026-03-01 01:09:05] (step=0018092) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.539816082958325, LR: 0.0003 +[2026-03-01 01:09:13] (step=0018093) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.5400117393856387, LR: 0.0003 +[2026-03-01 01:09:21] (step=0018094) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 3.5402073958129523, LR: 0.0003 +[2026-03-01 01:09:29] (step=0018095) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.5404030522402663, LR: 0.0003 +[2026-03-01 01:09:37] (step=0018096) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.54059870866758, LR: 0.0003 +[2026-03-01 01:09:45] (step=0018097) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 3.5407943650948934, LR: 0.0003 +[2026-03-01 01:09:53] (step=0018098) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 3.540990021522207, LR: 0.0003 +[2026-03-01 01:10:00] (step=0018099) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.5411856779495205, LR: 0.0003 +[2026-03-01 01:10:08] (step=0018100) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 3.541381334376834, LR: 0.0003 +[2026-03-01 01:10:16] (step=0018101) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.541576990804148, LR: 0.0003 +[2026-03-01 01:10:24] (step=0018102) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 3.5417726472314617, LR: 0.0003 +[2026-03-01 01:10:32] (step=0018103) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.5419683036587752, LR: 0.0003 +[2026-03-01 01:10:40] (step=0018104) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 3.542163960086089, LR: 0.0003 +[2026-03-01 01:10:48] (step=0018105) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.5423596165134024, LR: 0.0003 +[2026-03-01 01:10:55] (step=0018106) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 3.542555272940716, LR: 0.0003 +[2026-03-01 01:11:03] (step=0018107) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.54275092936803, LR: 0.0003 +[2026-03-01 01:11:11] (step=0018108) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.5429465857953435, LR: 0.0003 +[2026-03-01 01:11:19] (step=0018109) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.543142242222657, LR: 0.0003 +[2026-03-01 01:11:27] (step=0018110) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 3.5433378986499706, LR: 0.0003 +[2026-03-01 01:11:35] (step=0018111) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.543533555077284, LR: 0.0003 +[2026-03-01 01:11:42] (step=0018112) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.5437292115045977, LR: 0.0003 +[2026-03-01 01:11:50] (step=0018113) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.5439248679319117, LR: 0.0003 +[2026-03-01 01:11:58] (step=0018114) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.5441205243592253, LR: 0.0003 +[2026-03-01 01:12:06] (step=0018115) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 3.544316180786539, LR: 0.0003 +[2026-03-01 01:12:14] (step=0018116) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 3.5445118372138524, LR: 0.0003 +[2026-03-01 01:12:22] (step=0018117) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.544707493641166, LR: 0.0003 +[2026-03-01 01:12:29] (step=0018118) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.5449031500684796, LR: 0.0003 +[2026-03-01 01:12:37] (step=0018119) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.5450988064957936, LR: 0.0003 +[2026-03-01 01:12:45] (step=0018120) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.545294462923107, LR: 0.0003 +[2026-03-01 01:12:53] (step=0018121) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 3.5454901193504207, LR: 0.0003 +[2026-03-01 01:13:01] (step=0018122) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.5456857757777342, LR: 0.0003 +[2026-03-01 01:13:09] (step=0018123) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.545881432205048, LR: 0.0003 +[2026-03-01 01:13:17] (step=0018124) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.5460770886323614, LR: 0.0003 +[2026-03-01 01:13:25] (step=0018125) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.5462727450596754, LR: 0.0003 +[2026-03-01 01:13:32] (step=0018126) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.546468401486989, LR: 0.0003 +[2026-03-01 01:13:40] (step=0018127) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.5466640579143025, LR: 0.0003 +[2026-03-01 01:13:48] (step=0018128) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.546859714341616, LR: 0.0003 +[2026-03-01 01:13:56] (step=0018129) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 3.5470553707689296, LR: 0.0003 +[2026-03-01 01:14:04] (step=0018130) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.547251027196243, LR: 0.0003 +[2026-03-01 01:14:12] (step=0018131) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.547446683623557, LR: 0.0003 +[2026-03-01 01:14:19] (step=0018132) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.5476423400508708, LR: 0.0003 +[2026-03-01 01:14:27] (step=0018133) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.5478379964781843, LR: 0.0003 +[2026-03-01 01:14:35] (step=0018134) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.548033652905498, LR: 0.0003 +[2026-03-01 01:14:43] (step=0018135) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.5482293093328114, LR: 0.0003 +[2026-03-01 01:14:51] (step=0018136) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 3.548424965760125, LR: 0.0003 +[2026-03-01 01:14:59] (step=0018137) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.548620622187439, LR: 0.0003 +[2026-03-01 01:15:06] (step=0018138) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 3.5488162786147526, LR: 0.0003 +[2026-03-01 01:15:14] (step=0018139) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 3.549011935042066, LR: 0.0003 +[2026-03-01 01:15:22] (step=0018140) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 3.5492075914693797, LR: 0.0003 +[2026-03-01 01:15:30] (step=0018141) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.5494032478966933, LR: 0.0003 +[2026-03-01 01:15:38] (step=0018142) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.5495989043240073, LR: 0.0003 +[2026-03-01 01:15:46] (step=0018143) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.549794560751321, LR: 0.0003 +[2026-03-01 01:15:54] (step=0018144) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.5499902171786344, LR: 0.0003 +[2026-03-01 01:16:01] (step=0018145) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.550185873605948, LR: 0.0003 +[2026-03-01 01:16:09] (step=0018146) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 3.5503815300332615, LR: 0.0003 +[2026-03-01 01:16:17] (step=0018147) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 3.550577186460575, LR: 0.0003 +[2026-03-01 01:16:25] (step=0018148) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.550772842887889, LR: 0.0003 +[2026-03-01 01:16:33] (step=0018149) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 3.5509684993152026, LR: 0.0003 +[2026-03-01 01:16:41] (step=0018150) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.551164155742516, LR: 0.0003 +[2026-03-01 01:16:49] (step=0018151) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.5513598121698298, LR: 0.0003 +[2026-03-01 01:16:56] (step=0018152) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 3.5515554685971433, LR: 0.0003 +[2026-03-01 01:17:04] (step=0018153) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.551751125024457, LR: 0.0003 +[2026-03-01 01:17:12] (step=0018154) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.551946781451771, LR: 0.0003 +[2026-03-01 01:17:20] (step=0018155) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.5521424378790845, LR: 0.0003 +[2026-03-01 01:17:28] (step=0018156) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.552338094306398, LR: 0.0003 +[2026-03-01 01:17:36] (step=0018157) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.5525337507337116, LR: 0.0003 +[2026-03-01 01:17:43] (step=0018158) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.552729407161025, LR: 0.0003 +[2026-03-01 01:17:51] (step=0018159) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.5529250635883387, LR: 0.0003 +[2026-03-01 01:17:59] (step=0018160) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.5531207200156527, LR: 0.0003 +[2026-03-01 01:18:07] (step=0018161) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 3.5533163764429663, LR: 0.0003 +[2026-03-01 01:18:15] (step=0018162) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 3.55351203287028, LR: 0.0003 +[2026-03-01 01:18:23] (step=0018163) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.5537076892975934, LR: 0.0003 +[2026-03-01 01:18:30] (step=0018164) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.553903345724907, LR: 0.0003 +[2026-03-01 01:18:38] (step=0018165) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 3.5540990021522205, LR: 0.0003 +[2026-03-01 01:18:46] (step=0018166) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.5542946585795345, LR: 0.0003 +[2026-03-01 01:18:54] (step=0018167) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 3.554490315006848, LR: 0.0003 +[2026-03-01 01:19:02] (step=0018168) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.5546859714341617, LR: 0.0003 +[2026-03-01 01:19:10] (step=0018169) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.554881627861475, LR: 0.0003 +[2026-03-01 01:19:18] (step=0018170) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 3.5550772842887888, LR: 0.0003 +[2026-03-01 01:19:25] (step=0018171) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 3.5552729407161023, LR: 0.0003 +[2026-03-01 01:19:33] (step=0018172) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.5554685971434163, LR: 0.0003 +[2026-03-01 01:19:41] (step=0018173) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.55566425357073, LR: 0.0003 +[2026-03-01 01:19:49] (step=0018174) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 3.5558599099980435, LR: 0.0003 +[2026-03-01 01:19:57] (step=0018175) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 3.556055566425357, LR: 0.0003 +[2026-03-01 01:20:05] (step=0018176) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.5562512228526706, LR: 0.0003 +[2026-03-01 01:20:13] (step=0018177) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 3.556446879279984, LR: 0.0003 +[2026-03-01 01:20:20] (step=0018178) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.556642535707298, LR: 0.0003 +[2026-03-01 01:20:28] (step=0018179) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.5568381921346117, LR: 0.0003 +[2026-03-01 01:20:36] (step=0018180) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.5570338485619253, LR: 0.0003 +[2026-03-01 01:20:44] (step=0018181) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 3.557229504989239, LR: 0.0003 +[2026-03-01 01:20:52] (step=0018182) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.5574251614165524, LR: 0.0003 +[2026-03-01 01:21:00] (step=0018183) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.557620817843866, LR: 0.0003 +[2026-03-01 01:21:07] (step=0018184) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.55781647427118, LR: 0.0003 +[2026-03-01 01:21:15] (step=0018185) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.5580121306984935, LR: 0.0003 +[2026-03-01 01:21:23] (step=0018186) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.558207787125807, LR: 0.0003 +[2026-03-01 01:21:31] (step=0018187) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.5584034435531207, LR: 0.0003 +[2026-03-01 01:21:39] (step=0018188) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.558599099980434, LR: 0.0003 +[2026-03-01 01:21:47] (step=0018189) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.558794756407748, LR: 0.0003 +[2026-03-01 01:21:54] (step=0018190) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.558990412835062, LR: 0.0003 +[2026-03-01 01:22:02] (step=0018191) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 3.5591860692623754, LR: 0.0003 +[2026-03-01 01:22:10] (step=0018192) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 3.559381725689689, LR: 0.0003 +[2026-03-01 01:22:18] (step=0018193) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 3.5595773821170025, LR: 0.0003 +[2026-03-01 01:22:26] (step=0018194) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.559773038544316, LR: 0.0003 +[2026-03-01 01:22:34] (step=0018195) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.5599686949716296, LR: 0.0003 +[2026-03-01 01:22:41] (step=0018196) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 3.5601643513989436, LR: 0.0003 +[2026-03-01 01:22:49] (step=0018197) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.560360007826257, LR: 0.0003 +[2026-03-01 01:22:57] (step=0018198) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 3.5605556642535707, LR: 0.0003 +[2026-03-01 01:23:05] (step=0018199) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 3.5607513206808843, LR: 0.0003 +[2026-03-01 01:23:13] (step=0018200) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.560946977108198, LR: 0.0003 +[2026-03-01 01:23:21] (step=0018201) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.561142633535512, LR: 0.0003 +[2026-03-01 01:23:29] (step=0018202) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.5613382899628254, LR: 0.0003 +[2026-03-01 01:23:36] (step=0018203) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.561533946390139, LR: 0.0003 +[2026-03-01 01:23:44] (step=0018204) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.5617296028174525, LR: 0.0003 +[2026-03-01 01:23:52] (step=0018205) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.561925259244766, LR: 0.0003 +[2026-03-01 01:24:00] (step=0018206) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 3.5621209156720797, LR: 0.0003 +[2026-03-01 01:24:08] (step=0018207) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 3.5623165720993937, LR: 0.0003 +[2026-03-01 01:24:16] (step=0018208) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.5625122285267072, LR: 0.0003 +[2026-03-01 01:24:24] (step=0018209) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.562707884954021, LR: 0.0003 +[2026-03-01 01:24:31] (step=0018210) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.5629035413813344, LR: 0.0003 +[2026-03-01 01:24:39] (step=0018211) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 3.563099197808648, LR: 0.0003 +[2026-03-01 01:24:47] (step=0018212) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.5632948542359615, LR: 0.0003 +[2026-03-01 01:24:55] (step=0018213) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.5634905106632755, LR: 0.0003 +[2026-03-01 01:25:03] (step=0018214) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.563686167090589, LR: 0.0003 +[2026-03-01 01:25:11] (step=0018215) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 3.5638818235179026, LR: 0.0003 +[2026-03-01 01:25:18] (step=0018216) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.564077479945216, LR: 0.0003 +[2026-03-01 01:25:26] (step=0018217) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.5642731363725297, LR: 0.0003 +[2026-03-01 01:25:34] (step=0018218) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.5644687927998433, LR: 0.0003 +[2026-03-01 01:25:42] (step=0018219) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.5646644492271573, LR: 0.0003 +[2026-03-01 01:25:50] (step=0018220) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 3.564860105654471, LR: 0.0003 +[2026-03-01 01:25:58] (step=0018221) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.5650557620817844, LR: 0.0003 +[2026-03-01 01:26:05] (step=0018222) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.565251418509098, LR: 0.0003 +[2026-03-01 01:26:13] (step=0018223) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.5654470749364116, LR: 0.0003 +[2026-03-01 01:26:21] (step=0018224) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 3.565642731363725, LR: 0.0003 +[2026-03-01 01:26:29] (step=0018225) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.565838387791039, LR: 0.0003 +[2026-03-01 01:26:37] (step=0018226) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.5660340442183527, LR: 0.0003 +[2026-03-01 01:26:45] (step=0018227) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.5662297006456662, LR: 0.0003 +[2026-03-01 01:26:53] (step=0018228) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.56642535707298, LR: 0.0003 +[2026-03-01 01:27:00] (step=0018229) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.5666210135002934, LR: 0.0003 +[2026-03-01 01:27:08] (step=0018230) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.566816669927607, LR: 0.0003 +[2026-03-01 01:27:16] (step=0018231) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.567012326354921, LR: 0.0003 +[2026-03-01 01:27:24] (step=0018232) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.5672079827822345, LR: 0.0003 +[2026-03-01 01:27:32] (step=0018233) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 3.567403639209548, LR: 0.0003 +[2026-03-01 01:27:40] (step=0018234) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 3.5675992956368616, LR: 0.0003 +[2026-03-01 01:27:48] (step=0018235) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.567794952064175, LR: 0.0003 +[2026-03-01 01:27:55] (step=0018236) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.5679906084914887, LR: 0.0003 +[2026-03-01 01:28:03] (step=0018237) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.5681862649188028, LR: 0.0003 +[2026-03-01 01:28:11] (step=0018238) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.5683819213461163, LR: 0.0003 +[2026-03-01 01:28:19] (step=0018239) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.56857757777343, LR: 0.0003 +[2026-03-01 01:28:27] (step=0018240) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.5687732342007434, LR: 0.0003 +[2026-03-01 01:28:35] (step=0018241) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.568968890628057, LR: 0.0003 +[2026-03-01 01:28:42] (step=0018242) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.5691645470553706, LR: 0.0003 +[2026-03-01 01:28:50] (step=0018243) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.5693602034826846, LR: 0.0003 +[2026-03-01 01:28:58] (step=0018244) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 3.569555859909998, LR: 0.0003 +[2026-03-01 01:29:06] (step=0018245) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 3.5697515163373117, LR: 0.0003 +[2026-03-01 01:29:14] (step=0018246) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.5699471727646253, LR: 0.0003 +[2026-03-01 01:29:22] (step=0018247) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 3.570142829191939, LR: 0.0003 +[2026-03-01 01:29:29] (step=0018248) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.5703384856192524, LR: 0.0003 +[2026-03-01 01:29:37] (step=0018249) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.5705341420465664, LR: 0.0003 +[2026-03-01 01:29:45] (step=0018250) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.57072979847388, LR: 0.0003 +[2026-03-01 01:29:53] (step=0018251) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.5709254549011935, LR: 0.0003 +[2026-03-01 01:30:01] (step=0018252) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.571121111328507, LR: 0.0003 +[2026-03-01 01:30:09] (step=0018253) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.5713167677558206, LR: 0.0003 +[2026-03-01 01:30:17] (step=0018254) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.5715124241831346, LR: 0.0003 +[2026-03-01 01:30:24] (step=0018255) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.571708080610448, LR: 0.0003 +[2026-03-01 01:30:32] (step=0018256) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 3.5719037370377618, LR: 0.0003 +[2026-03-01 01:30:40] (step=0018257) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 3.5720993934650753, LR: 0.0003 +[2026-03-01 01:30:48] (step=0018258) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.572295049892389, LR: 0.0003 +[2026-03-01 01:30:56] (step=0018259) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.5724907063197024, LR: 0.0003 +[2026-03-01 01:31:04] (step=0018260) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.5726863627470165, LR: 0.0003 +[2026-03-01 01:31:11] (step=0018261) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.57288201917433, LR: 0.0003 +[2026-03-01 01:31:19] (step=0018262) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.5730776756016436, LR: 0.0003 +[2026-03-01 01:31:27] (step=0018263) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.573273332028957, LR: 0.0003 +[2026-03-01 01:31:35] (step=0018264) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.5734689884562707, LR: 0.0003 +[2026-03-01 01:31:43] (step=0018265) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 3.5736646448835843, LR: 0.0003 +[2026-03-01 01:31:51] (step=0018266) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.5738603013108983, LR: 0.0003 +[2026-03-01 01:31:59] (step=0018267) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 3.574055957738212, LR: 0.0003 +[2026-03-01 01:32:06] (step=0018268) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.5742516141655254, LR: 0.0003 +[2026-03-01 01:32:14] (step=0018269) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.574447270592839, LR: 0.0003 +[2026-03-01 01:32:22] (step=0018270) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.5746429270201525, LR: 0.0003 +[2026-03-01 01:32:30] (step=0018271) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 3.574838583447466, LR: 0.0003 +[2026-03-01 01:32:38] (step=0018272) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.57503423987478, LR: 0.0003 +[2026-03-01 01:32:46] (step=0018273) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.5752298963020936, LR: 0.0003 +[2026-03-01 01:32:53] (step=0018274) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 3.575425552729407, LR: 0.0003 +[2026-03-01 01:33:01] (step=0018275) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 3.5756212091567208, LR: 0.0003 +[2026-03-01 01:33:09] (step=0018276) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 3.5758168655840343, LR: 0.0003 +[2026-03-01 01:33:17] (step=0018277) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.576012522011348, LR: 0.0003 +[2026-03-01 01:33:25] (step=0018278) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.576208178438662, LR: 0.0003 +[2026-03-01 01:33:33] (step=0018279) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.5764038348659755, LR: 0.0003 +[2026-03-01 01:33:41] (step=0018280) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 3.576599491293289, LR: 0.0003 +[2026-03-01 01:33:48] (step=0018281) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.5767951477206026, LR: 0.0003 +[2026-03-01 01:33:56] (step=0018282) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.576990804147916, LR: 0.0003 +[2026-03-01 01:34:04] (step=0018283) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.5771864605752297, LR: 0.0003 +[2026-03-01 01:34:12] (step=0018284) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.5773821170025437, LR: 0.0003 +[2026-03-01 01:34:20] (step=0018285) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.5775777734298573, LR: 0.0003 +[2026-03-01 01:34:28] (step=0018286) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.577773429857171, LR: 0.0003 +[2026-03-01 01:34:35] (step=0018287) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.5779690862844844, LR: 0.0003 +[2026-03-01 01:34:43] (step=0018288) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.578164742711798, LR: 0.0003 +[2026-03-01 01:34:51] (step=0018289) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.5783603991391115, LR: 0.0003 +[2026-03-01 01:34:59] (step=0018290) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.5785560555664255, LR: 0.0003 +[2026-03-01 01:35:07] (step=0018291) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.578751711993739, LR: 0.0003 +[2026-03-01 01:35:15] (step=0018292) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 3.5789473684210527, LR: 0.0003 +[2026-03-01 01:35:23] (step=0018293) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.579143024848366, LR: 0.0003 +[2026-03-01 01:35:30] (step=0018294) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.57933868127568, LR: 0.0003 +[2026-03-01 01:35:38] (step=0018295) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 3.5795343377029933, LR: 0.0003 +[2026-03-01 01:35:46] (step=0018296) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 3.5797299941303073, LR: 0.0003 +[2026-03-01 01:35:54] (step=0018297) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 3.579925650557621, LR: 0.0003 +[2026-03-01 01:36:02] (step=0018298) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 3.5801213069849345, LR: 0.0003 +[2026-03-01 01:36:10] (step=0018299) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 3.580316963412248, LR: 0.0003 +[2026-03-01 01:36:18] (step=0018300) Train Loss: 0.4617, Train Steps/Sec: 0.12, Epoch: 3.5805126198395616, LR: 0.0003 +[2026-03-01 01:36:25] (step=0018301) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.580708276266875, LR: 0.0003 +[2026-03-01 01:36:33] (step=0018302) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.580903932694189, LR: 0.0003 +[2026-03-01 01:36:41] (step=0018303) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.5810995891215027, LR: 0.0003 +[2026-03-01 01:36:49] (step=0018304) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 3.5812952455488163, LR: 0.0003 +[2026-03-01 01:36:57] (step=0018305) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.58149090197613, LR: 0.0003 +[2026-03-01 01:37:05] (step=0018306) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.5816865584034434, LR: 0.0003 +[2026-03-01 01:37:13] (step=0018307) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.581882214830757, LR: 0.0003 +[2026-03-01 01:37:20] (step=0018308) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.582077871258071, LR: 0.0003 +[2026-03-01 01:37:28] (step=0018309) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.5822735276853845, LR: 0.0003 +[2026-03-01 01:37:36] (step=0018310) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.582469184112698, LR: 0.0003 +[2026-03-01 01:37:44] (step=0018311) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 3.5826648405400117, LR: 0.0003 +[2026-03-01 01:37:52] (step=0018312) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.5828604969673252, LR: 0.0003 +[2026-03-01 01:38:00] (step=0018313) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.5830561533946392, LR: 0.0003 +[2026-03-01 01:38:07] (step=0018314) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 3.583251809821953, LR: 0.0003 +[2026-03-01 01:38:15] (step=0018315) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.5834474662492664, LR: 0.0003 +[2026-03-01 01:38:23] (step=0018316) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.58364312267658, LR: 0.0003 +[2026-03-01 01:38:31] (step=0018317) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.5838387791038935, LR: 0.0003 +[2026-03-01 01:38:39] (step=0018318) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.584034435531207, LR: 0.0003 +[2026-03-01 01:38:47] (step=0018319) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.584230091958521, LR: 0.0003 +[2026-03-01 01:38:54] (step=0018320) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 3.5844257483858346, LR: 0.0003 +[2026-03-01 01:39:02] (step=0018321) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 3.584621404813148, LR: 0.0003 +[2026-03-01 01:39:10] (step=0018322) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.5848170612404617, LR: 0.0003 +[2026-03-01 01:39:18] (step=0018323) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.5850127176677753, LR: 0.0003 +[2026-03-01 01:39:26] (step=0018324) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.585208374095089, LR: 0.0003 +[2026-03-01 01:39:34] (step=0018325) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.585404030522403, LR: 0.0003 +[2026-03-01 01:39:42] (step=0018326) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.5855996869497164, LR: 0.0003 +[2026-03-01 01:39:50] (step=0018327) Train Loss: 0.4531, Train Steps/Sec: 0.12, Epoch: 3.58579534337703, LR: 0.0003 +[2026-03-01 01:39:57] (step=0018328) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.5859909998043435, LR: 0.0003 +[2026-03-01 01:40:05] (step=0018329) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 3.586186656231657, LR: 0.0003 +[2026-03-01 01:40:13] (step=0018330) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.5863823126589707, LR: 0.0003 +[2026-03-01 01:40:21] (step=0018331) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.5865779690862847, LR: 0.0003 +[2026-03-01 01:40:29] (step=0018332) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.5867736255135982, LR: 0.0003 +[2026-03-01 01:40:37] (step=0018333) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.586969281940912, LR: 0.0003 +[2026-03-01 01:40:44] (step=0018334) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.5871649383682254, LR: 0.0003 +[2026-03-01 01:40:52] (step=0018335) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.587360594795539, LR: 0.0003 +[2026-03-01 01:41:00] (step=0018336) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.5875562512228525, LR: 0.0003 +[2026-03-01 01:41:08] (step=0018337) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.5877519076501665, LR: 0.0003 +[2026-03-01 01:41:16] (step=0018338) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.58794756407748, LR: 0.0003 +[2026-03-01 01:41:24] (step=0018339) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.5881432205047936, LR: 0.0003 +[2026-03-01 01:41:31] (step=0018340) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 3.588338876932107, LR: 0.0003 +[2026-03-01 01:41:39] (step=0018341) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.5885345333594207, LR: 0.0003 +[2026-03-01 01:41:47] (step=0018342) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.5887301897867343, LR: 0.0003 +[2026-03-01 01:41:55] (step=0018343) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.5889258462140483, LR: 0.0003 +[2026-03-01 01:42:03] (step=0018344) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 3.589121502641362, LR: 0.0003 +[2026-03-01 01:42:11] (step=0018345) Train Loss: 0.4684, Train Steps/Sec: 0.13, Epoch: 3.5893171590686754, LR: 0.0003 +[2026-03-01 01:42:18] (step=0018346) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 3.589512815495989, LR: 0.0003 +[2026-03-01 01:42:26] (step=0018347) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.5897084719233026, LR: 0.0003 +[2026-03-01 01:42:34] (step=0018348) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.589904128350616, LR: 0.0003 +[2026-03-01 01:42:42] (step=0018349) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.59009978477793, LR: 0.0003 +[2026-03-01 01:42:50] (step=0018350) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.5902954412052437, LR: 0.0003 +[2026-03-01 01:42:58] (step=0018351) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.5904910976325572, LR: 0.0003 +[2026-03-01 01:43:06] (step=0018352) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.590686754059871, LR: 0.0003 +[2026-03-01 01:43:13] (step=0018353) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.5908824104871844, LR: 0.0003 +[2026-03-01 01:43:21] (step=0018354) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 3.591078066914498, LR: 0.0003 +[2026-03-01 01:43:29] (step=0018355) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.591273723341812, LR: 0.0003 +[2026-03-01 01:43:37] (step=0018356) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.5914693797691255, LR: 0.0003 +[2026-03-01 01:43:45] (step=0018357) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 3.591665036196439, LR: 0.0003 +[2026-03-01 01:43:53] (step=0018358) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.5918606926237526, LR: 0.0003 +[2026-03-01 01:44:01] (step=0018359) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 3.592056349051066, LR: 0.0003 +[2026-03-01 01:44:08] (step=0018360) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.5922520054783798, LR: 0.0003 +[2026-03-01 01:44:16] (step=0018361) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 3.5924476619056938, LR: 0.0003 +[2026-03-01 01:44:24] (step=0018362) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 3.5926433183330073, LR: 0.0003 +[2026-03-01 01:44:32] (step=0018363) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.592838974760321, LR: 0.0003 +[2026-03-01 01:44:40] (step=0018364) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.5930346311876344, LR: 0.0003 +[2026-03-01 01:44:48] (step=0018365) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 3.593230287614948, LR: 0.0003 +[2026-03-01 01:44:55] (step=0018366) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.593425944042262, LR: 0.0003 +[2026-03-01 01:45:03] (step=0018367) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.5936216004695756, LR: 0.0003 +[2026-03-01 01:45:11] (step=0018368) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 3.593817256896889, LR: 0.0003 +[2026-03-01 01:45:19] (step=0018369) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.5940129133242027, LR: 0.0003 +[2026-03-01 01:45:27] (step=0018370) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.5942085697515163, LR: 0.0003 +[2026-03-01 01:45:35] (step=0018371) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.59440422617883, LR: 0.0003 +[2026-03-01 01:45:43] (step=0018372) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 3.594599882606144, LR: 0.0003 +[2026-03-01 01:45:50] (step=0018373) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.5947955390334574, LR: 0.0003 +[2026-03-01 01:45:58] (step=0018374) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.594991195460771, LR: 0.0003 +[2026-03-01 01:46:06] (step=0018375) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.5951868518880845, LR: 0.0003 +[2026-03-01 01:46:14] (step=0018376) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.595382508315398, LR: 0.0003 +[2026-03-01 01:46:22] (step=0018377) Train Loss: 0.4451, Train Steps/Sec: 0.12, Epoch: 3.5955781647427116, LR: 0.0003 +[2026-03-01 01:46:30] (step=0018378) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.5957738211700256, LR: 0.0003 +[2026-03-01 01:46:38] (step=0018379) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 3.595969477597339, LR: 0.0003 +[2026-03-01 01:46:45] (step=0018380) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 3.5961651340246528, LR: 0.0003 +[2026-03-01 01:46:53] (step=0018381) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.5963607904519663, LR: 0.0003 +[2026-03-01 01:47:01] (step=0018382) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 3.59655644687928, LR: 0.0003 +[2026-03-01 01:47:09] (step=0018383) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 3.5967521033065935, LR: 0.0003 +[2026-03-01 01:47:17] (step=0018384) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.5969477597339075, LR: 0.0003 +[2026-03-01 01:47:25] (step=0018385) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.597143416161221, LR: 0.0003 +[2026-03-01 01:47:32] (step=0018386) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.5973390725885346, LR: 0.0003 +[2026-03-01 01:47:40] (step=0018387) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.597534729015848, LR: 0.0003 +[2026-03-01 01:47:48] (step=0018388) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.5977303854431617, LR: 0.0003 +[2026-03-01 01:47:56] (step=0018389) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.5979260418704753, LR: 0.0003 +[2026-03-01 01:48:04] (step=0018390) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.5981216982977893, LR: 0.0003 +[2026-03-01 01:48:12] (step=0018391) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.598317354725103, LR: 0.0003 +[2026-03-01 01:48:20] (step=0018392) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.5985130111524164, LR: 0.0003 +[2026-03-01 01:48:27] (step=0018393) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.59870866757973, LR: 0.0003 +[2026-03-01 01:48:35] (step=0018394) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.5989043240070435, LR: 0.0003 +[2026-03-01 01:48:43] (step=0018395) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 3.599099980434357, LR: 0.0003 +[2026-03-01 01:48:51] (step=0018396) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 3.599295636861671, LR: 0.0003 +[2026-03-01 01:48:59] (step=0018397) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.5994912932889847, LR: 0.0003 +[2026-03-01 01:49:07] (step=0018398) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.599686949716298, LR: 0.0003 +[2026-03-01 01:49:15] (step=0018399) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.5998826061436118, LR: 0.0003 +[2026-03-01 01:49:22] (step=0018400) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 3.6000782625709253, LR: 0.0003 +[2026-03-01 01:49:30] (step=0018401) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 3.600273918998239, LR: 0.0003 +[2026-03-01 01:49:38] (step=0018402) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.600469575425553, LR: 0.0003 +[2026-03-01 01:49:46] (step=0018403) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 3.6006652318528665, LR: 0.0003 +[2026-03-01 01:49:54] (step=0018404) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.60086088828018, LR: 0.0003 +[2026-03-01 01:50:02] (step=0018405) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 3.6010565447074936, LR: 0.0003 +[2026-03-01 01:50:09] (step=0018406) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.601252201134807, LR: 0.0003 +[2026-03-01 01:50:17] (step=0018407) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.6014478575621207, LR: 0.0003 +[2026-03-01 01:50:25] (step=0018408) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 3.6016435139894347, LR: 0.0003 +[2026-03-01 01:50:33] (step=0018409) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 3.6018391704167483, LR: 0.0003 +[2026-03-01 01:50:41] (step=0018410) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.602034826844062, LR: 0.0003 +[2026-03-01 01:50:49] (step=0018411) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.6022304832713754, LR: 0.0003 +[2026-03-01 01:50:56] (step=0018412) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.602426139698689, LR: 0.0003 +[2026-03-01 01:51:04] (step=0018413) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 3.6026217961260025, LR: 0.0003 +[2026-03-01 01:51:12] (step=0018414) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.6028174525533165, LR: 0.0003 +[2026-03-01 01:51:20] (step=0018415) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 3.60301310898063, LR: 0.0003 +[2026-03-01 01:51:28] (step=0018416) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.6032087654079437, LR: 0.0003 +[2026-03-01 01:51:36] (step=0018417) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.603404421835257, LR: 0.0003 +[2026-03-01 01:51:43] (step=0018418) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.603600078262571, LR: 0.0003 +[2026-03-01 01:51:51] (step=0018419) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.6037957346898843, LR: 0.0003 +[2026-03-01 01:51:59] (step=0018420) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 3.6039913911171984, LR: 0.0003 +[2026-03-01 01:52:07] (step=0018421) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.604187047544512, LR: 0.0003 +[2026-03-01 01:52:15] (step=0018422) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.6043827039718255, LR: 0.0003 +[2026-03-01 01:52:23] (step=0018423) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.604578360399139, LR: 0.0003 +[2026-03-01 01:52:31] (step=0018424) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.6047740168264526, LR: 0.0003 +[2026-03-01 01:52:38] (step=0018425) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.6049696732537666, LR: 0.0003 +[2026-03-01 01:52:46] (step=0018426) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.60516532968108, LR: 0.0003 +[2026-03-01 01:52:54] (step=0018427) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.6053609861083937, LR: 0.0003 +[2026-03-01 01:53:02] (step=0018428) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.6055566425357073, LR: 0.0003 +[2026-03-01 01:53:10] (step=0018429) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.605752298963021, LR: 0.0003 +[2026-03-01 01:53:18] (step=0018430) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 3.6059479553903344, LR: 0.0003 +[2026-03-01 01:53:25] (step=0018431) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.6061436118176484, LR: 0.0003 +[2026-03-01 01:53:33] (step=0018432) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.606339268244962, LR: 0.0003 +[2026-03-01 01:53:41] (step=0018433) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.6065349246722755, LR: 0.0003 +[2026-03-01 01:53:49] (step=0018434) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 3.606730581099589, LR: 0.0003 +[2026-03-01 01:53:57] (step=0018435) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.6069262375269027, LR: 0.0003 +[2026-03-01 01:54:05] (step=0018436) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.6071218939542162, LR: 0.0003 +[2026-03-01 01:54:13] (step=0018437) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.6073175503815302, LR: 0.0003 +[2026-03-01 01:54:20] (step=0018438) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 3.607513206808844, LR: 0.0003 +[2026-03-01 01:54:28] (step=0018439) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.6077088632361574, LR: 0.0003 +[2026-03-01 01:54:36] (step=0018440) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.607904519663471, LR: 0.0003 +[2026-03-01 01:54:44] (step=0018441) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.6081001760907845, LR: 0.0003 +[2026-03-01 01:54:52] (step=0018442) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.608295832518098, LR: 0.0003 +[2026-03-01 01:55:00] (step=0018443) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.608491488945412, LR: 0.0003 +[2026-03-01 01:55:07] (step=0018444) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.6086871453727256, LR: 0.0003 +[2026-03-01 01:55:15] (step=0018445) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.608882801800039, LR: 0.0003 +[2026-03-01 01:55:23] (step=0018446) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.6090784582273527, LR: 0.0003 +[2026-03-01 01:55:31] (step=0018447) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 3.6092741146546663, LR: 0.0003 +[2026-03-01 01:55:39] (step=0018448) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.60946977108198, LR: 0.0003 +[2026-03-01 01:55:47] (step=0018449) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.609665427509294, LR: 0.0003 +[2026-03-01 01:55:55] (step=0018450) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.6098610839366074, LR: 0.0003 +[2026-03-01 01:56:02] (step=0018451) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.610056740363921, LR: 0.0003 +[2026-03-01 01:56:10] (step=0018452) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.6102523967912346, LR: 0.0003 +[2026-03-01 01:56:18] (step=0018453) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.610448053218548, LR: 0.0003 +[2026-03-01 01:56:26] (step=0018454) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.6106437096458617, LR: 0.0003 +[2026-03-01 01:56:34] (step=0018455) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.6108393660731757, LR: 0.0003 +[2026-03-01 01:56:42] (step=0018456) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 3.6110350225004892, LR: 0.0003 +[2026-03-01 01:56:49] (step=0018457) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.611230678927803, LR: 0.0003 +[2026-03-01 01:56:57] (step=0018458) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.6114263353551164, LR: 0.0003 +[2026-03-01 01:57:05] (step=0018459) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.61162199178243, LR: 0.0003 +[2026-03-01 01:57:13] (step=0018460) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.6118176482097435, LR: 0.0003 +[2026-03-01 01:57:21] (step=0018461) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.6120133046370575, LR: 0.0003 +[2026-03-01 01:57:29] (step=0018462) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.612208961064371, LR: 0.0003 +[2026-03-01 01:57:37] (step=0018463) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.6124046174916846, LR: 0.0003 +[2026-03-01 01:57:44] (step=0018464) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.612600273918998, LR: 0.0003 +[2026-03-01 01:57:52] (step=0018465) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.6127959303463117, LR: 0.0003 +[2026-03-01 01:58:00] (step=0018466) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.6129915867736253, LR: 0.0003 +[2026-03-01 01:58:08] (step=0018467) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.6131872432009393, LR: 0.0003 +[2026-03-01 01:58:16] (step=0018468) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.613382899628253, LR: 0.0003 +[2026-03-01 01:58:24] (step=0018469) Train Loss: 0.4460, Train Steps/Sec: 0.12, Epoch: 3.6135785560555664, LR: 0.0003 +[2026-03-01 01:58:32] (step=0018470) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.61377421248288, LR: 0.0003 +[2026-03-01 01:58:39] (step=0018471) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.6139698689101936, LR: 0.0003 +[2026-03-01 01:58:47] (step=0018472) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 3.614165525337507, LR: 0.0003 +[2026-03-01 01:58:55] (step=0018473) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.614361181764821, LR: 0.0003 +[2026-03-01 01:59:03] (step=0018474) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.6145568381921347, LR: 0.0003 +[2026-03-01 01:59:11] (step=0018475) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 3.6147524946194483, LR: 0.0003 +[2026-03-01 01:59:19] (step=0018476) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.614948151046762, LR: 0.0003 +[2026-03-01 01:59:27] (step=0018477) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.6151438074740754, LR: 0.0003 +[2026-03-01 01:59:34] (step=0018478) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.615339463901389, LR: 0.0003 +[2026-03-01 01:59:42] (step=0018479) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.615535120328703, LR: 0.0003 +[2026-03-01 01:59:50] (step=0018480) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.6157307767560165, LR: 0.0003 +[2026-03-01 01:59:58] (step=0018481) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.61592643318333, LR: 0.0003 +[2026-03-01 02:00:06] (step=0018482) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 3.6161220896106436, LR: 0.0003 +[2026-03-01 02:00:14] (step=0018483) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.616317746037957, LR: 0.0003 +[2026-03-01 02:00:21] (step=0018484) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.616513402465271, LR: 0.0003 +[2026-03-01 02:00:29] (step=0018485) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.6167090588925848, LR: 0.0003 +[2026-03-01 02:00:37] (step=0018486) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.6169047153198983, LR: 0.0003 +[2026-03-01 02:00:45] (step=0018487) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.617100371747212, LR: 0.0003 +[2026-03-01 02:00:53] (step=0018488) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.6172960281745254, LR: 0.0003 +[2026-03-01 02:01:01] (step=0018489) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.617491684601839, LR: 0.0003 +[2026-03-01 02:01:08] (step=0018490) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.617687341029153, LR: 0.0003 +[2026-03-01 02:01:16] (step=0018491) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.6178829974564666, LR: 0.0003 +[2026-03-01 02:01:24] (step=0018492) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.61807865388378, LR: 0.0003 +[2026-03-01 02:01:32] (step=0018493) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.6182743103110937, LR: 0.0003 +[2026-03-01 02:01:40] (step=0018494) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.6184699667384073, LR: 0.0003 +[2026-03-01 02:01:48] (step=0018495) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.618665623165721, LR: 0.0003 +[2026-03-01 02:01:56] (step=0018496) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.618861279593035, LR: 0.0003 +[2026-03-01 02:02:03] (step=0018497) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.6190569360203484, LR: 0.0003 +[2026-03-01 02:02:11] (step=0018498) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.619252592447662, LR: 0.0003 +[2026-03-01 02:02:19] (step=0018499) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.6194482488749755, LR: 0.0003 +[2026-03-01 02:02:27] (step=0018500) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.619643905302289, LR: 0.0003 +[2026-03-01 02:02:27] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0018500/ +[2026-03-01 02:02:35] (step=0018501) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.6198395617296026, LR: 0.0003 +[2026-03-01 02:02:43] (step=0018502) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.6200352181569166, LR: 0.0003 +[2026-03-01 02:02:50] (step=0018503) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 3.62023087458423, LR: 0.0003 +[2026-03-01 02:02:58] (step=0018504) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.6204265310115438, LR: 0.0003 +[2026-03-01 02:03:06] (step=0018505) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.6206221874388573, LR: 0.0003 +[2026-03-01 02:03:14] (step=0018506) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.620817843866171, LR: 0.0003 +[2026-03-01 02:03:22] (step=0018507) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.6210135002934845, LR: 0.0003 +[2026-03-01 02:03:30] (step=0018508) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.6212091567207985, LR: 0.0003 +[2026-03-01 02:03:38] (step=0018509) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 3.621404813148112, LR: 0.0003 +[2026-03-01 02:03:45] (step=0018510) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.6216004695754256, LR: 0.0003 +[2026-03-01 02:03:53] (step=0018511) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.621796126002739, LR: 0.0003 +[2026-03-01 02:04:01] (step=0018512) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.6219917824300527, LR: 0.0003 +[2026-03-01 02:04:09] (step=0018513) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.6221874388573663, LR: 0.0003 +[2026-03-01 02:04:17] (step=0018514) Train Loss: 0.4729, Train Steps/Sec: 0.13, Epoch: 3.6223830952846803, LR: 0.0003 +[2026-03-01 02:04:25] (step=0018515) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.622578751711994, LR: 0.0003 +[2026-03-01 02:04:32] (step=0018516) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.6227744081393074, LR: 0.0003 +[2026-03-01 02:04:40] (step=0018517) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.622970064566621, LR: 0.0003 +[2026-03-01 02:04:48] (step=0018518) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.6231657209939345, LR: 0.0003 +[2026-03-01 02:04:56] (step=0018519) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.623361377421248, LR: 0.0003 +[2026-03-01 02:05:04] (step=0018520) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.623557033848562, LR: 0.0003 +[2026-03-01 02:05:12] (step=0018521) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.6237526902758757, LR: 0.0003 +[2026-03-01 02:05:20] (step=0018522) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.623948346703189, LR: 0.0003 +[2026-03-01 02:05:28] (step=0018523) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 3.624144003130503, LR: 0.0003 +[2026-03-01 02:05:35] (step=0018524) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.6243396595578163, LR: 0.0003 +[2026-03-01 02:05:43] (step=0018525) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 3.62453531598513, LR: 0.0003 +[2026-03-01 02:05:51] (step=0018526) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.624730972412444, LR: 0.0003 +[2026-03-01 02:05:59] (step=0018527) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 3.6249266288397575, LR: 0.0003 +[2026-03-01 02:06:07] (step=0018528) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 3.625122285267071, LR: 0.0003 +[2026-03-01 02:06:15] (step=0018529) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.6253179416943846, LR: 0.0003 +[2026-03-01 02:06:22] (step=0018530) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 3.625513598121698, LR: 0.0003 +[2026-03-01 02:06:30] (step=0018531) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.6257092545490117, LR: 0.0003 +[2026-03-01 02:06:38] (step=0018532) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.6259049109763257, LR: 0.0003 +[2026-03-01 02:06:46] (step=0018533) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 3.6261005674036393, LR: 0.0003 +[2026-03-01 02:06:54] (step=0018534) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 3.626296223830953, LR: 0.0003 +[2026-03-01 02:07:02] (step=0018535) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.6264918802582664, LR: 0.0003 +[2026-03-01 02:07:09] (step=0018536) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.62668753668558, LR: 0.0003 +[2026-03-01 02:07:17] (step=0018537) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.626883193112894, LR: 0.0003 +[2026-03-01 02:07:25] (step=0018538) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.6270788495402075, LR: 0.0003 +[2026-03-01 02:07:33] (step=0018539) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.627274505967521, LR: 0.0003 +[2026-03-01 02:07:41] (step=0018540) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 3.6274701623948347, LR: 0.0003 +[2026-03-01 02:07:49] (step=0018541) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.6276658188221482, LR: 0.0003 +[2026-03-01 02:07:57] (step=0018542) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 3.627861475249462, LR: 0.0003 +[2026-03-01 02:08:04] (step=0018543) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.628057131676776, LR: 0.0003 +[2026-03-01 02:08:12] (step=0018544) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.6282527881040894, LR: 0.0003 +[2026-03-01 02:08:20] (step=0018545) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 3.628448444531403, LR: 0.0003 +[2026-03-01 02:08:28] (step=0018546) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.6286441009587165, LR: 0.0003 +[2026-03-01 02:08:36] (step=0018547) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 3.62883975738603, LR: 0.0003 +[2026-03-01 02:08:44] (step=0018548) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.6290354138133436, LR: 0.0003 +[2026-03-01 02:08:51] (step=0018549) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.6292310702406576, LR: 0.0003 +[2026-03-01 02:08:59] (step=0018550) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.629426726667971, LR: 0.0003 +[2026-03-01 02:09:07] (step=0018551) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.6296223830952847, LR: 0.0003 +[2026-03-01 02:09:15] (step=0018552) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 3.6298180395225983, LR: 0.0003 +[2026-03-01 02:09:23] (step=0018553) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.630013695949912, LR: 0.0003 +[2026-03-01 02:09:31] (step=0018554) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.6302093523772254, LR: 0.0003 +[2026-03-01 02:09:39] (step=0018555) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.6304050088045394, LR: 0.0003 +[2026-03-01 02:09:46] (step=0018556) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.630600665231853, LR: 0.0003 +[2026-03-01 02:09:54] (step=0018557) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.6307963216591665, LR: 0.0003 +[2026-03-01 02:10:02] (step=0018558) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 3.63099197808648, LR: 0.0003 +[2026-03-01 02:10:10] (step=0018559) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.6311876345137937, LR: 0.0003 +[2026-03-01 02:10:18] (step=0018560) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 3.6313832909411072, LR: 0.0003 +[2026-03-01 02:10:26] (step=0018561) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.6315789473684212, LR: 0.0003 +[2026-03-01 02:10:34] (step=0018562) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 3.631774603795735, LR: 0.0003 +[2026-03-01 02:10:41] (step=0018563) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 3.6319702602230484, LR: 0.0003 +[2026-03-01 02:10:49] (step=0018564) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.632165916650362, LR: 0.0003 +[2026-03-01 02:10:57] (step=0018565) Train Loss: 0.4485, Train Steps/Sec: 0.12, Epoch: 3.6323615730776755, LR: 0.0003 +[2026-03-01 02:11:05] (step=0018566) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.632557229504989, LR: 0.0003 +[2026-03-01 02:11:13] (step=0018567) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 3.632752885932303, LR: 0.0003 +[2026-03-01 02:11:21] (step=0018568) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.6329485423596166, LR: 0.0003 +[2026-03-01 02:11:29] (step=0018569) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 3.63314419878693, LR: 0.0003 +[2026-03-01 02:11:36] (step=0018570) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.6333398552142437, LR: 0.0003 +[2026-03-01 02:11:44] (step=0018571) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.6335355116415573, LR: 0.0003 +[2026-03-01 02:11:52] (step=0018572) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.633731168068871, LR: 0.0003 +[2026-03-01 02:12:00] (step=0018573) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.633926824496185, LR: 0.0003 +[2026-03-01 02:12:08] (step=0018574) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.6341224809234984, LR: 0.0003 +[2026-03-01 02:12:16] (step=0018575) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 3.634318137350812, LR: 0.0003 +[2026-03-01 02:12:23] (step=0018576) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.6345137937781256, LR: 0.0003 +[2026-03-01 02:12:31] (step=0018577) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 3.634709450205439, LR: 0.0003 +[2026-03-01 02:12:39] (step=0018578) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.6349051066327527, LR: 0.0003 +[2026-03-01 02:12:47] (step=0018579) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 3.6351007630600667, LR: 0.0003 +[2026-03-01 02:12:55] (step=0018580) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.6352964194873802, LR: 0.0003 +[2026-03-01 02:13:03] (step=0018581) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.635492075914694, LR: 0.0003 +[2026-03-01 02:13:11] (step=0018582) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 3.6356877323420074, LR: 0.0003 +[2026-03-01 02:13:18] (step=0018583) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.635883388769321, LR: 0.0003 +[2026-03-01 02:13:26] (step=0018584) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.6360790451966345, LR: 0.0003 +[2026-03-01 02:13:34] (step=0018585) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 3.6362747016239485, LR: 0.0003 +[2026-03-01 02:13:42] (step=0018586) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.636470358051262, LR: 0.0003 +[2026-03-01 02:13:50] (step=0018587) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 3.6366660144785756, LR: 0.0003 +[2026-03-01 02:13:58] (step=0018588) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 3.636861670905889, LR: 0.0003 +[2026-03-01 02:14:05] (step=0018589) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.6370573273332027, LR: 0.0003 +[2026-03-01 02:14:13] (step=0018590) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.6372529837605163, LR: 0.0003 +[2026-03-01 02:14:21] (step=0018591) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.6374486401878303, LR: 0.0003 +[2026-03-01 02:14:29] (step=0018592) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 3.637644296615144, LR: 0.0003 +[2026-03-01 02:14:37] (step=0018593) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.6378399530424574, LR: 0.0003 +[2026-03-01 02:14:45] (step=0018594) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.638035609469771, LR: 0.0003 +[2026-03-01 02:14:53] (step=0018595) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.6382312658970846, LR: 0.0003 +[2026-03-01 02:15:00] (step=0018596) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 3.6384269223243986, LR: 0.0003 +[2026-03-01 02:15:08] (step=0018597) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 3.638622578751712, LR: 0.0003 +[2026-03-01 02:15:16] (step=0018598) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.6388182351790257, LR: 0.0003 +[2026-03-01 02:15:24] (step=0018599) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.6390138916063393, LR: 0.0003 +[2026-03-01 02:15:32] (step=0018600) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.639209548033653, LR: 0.0003 +[2026-03-01 02:15:40] (step=0018601) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 3.6394052044609664, LR: 0.0003 +[2026-03-01 02:15:47] (step=0018602) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.6396008608882804, LR: 0.0003 +[2026-03-01 02:15:55] (step=0018603) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.639796517315594, LR: 0.0003 +[2026-03-01 02:16:03] (step=0018604) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.6399921737429075, LR: 0.0003 +[2026-03-01 02:16:11] (step=0018605) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.640187830170221, LR: 0.0003 +[2026-03-01 02:16:19] (step=0018606) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.6403834865975346, LR: 0.0003 +[2026-03-01 02:16:27] (step=0018607) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 3.640579143024848, LR: 0.0003 +[2026-03-01 02:16:35] (step=0018608) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.640774799452162, LR: 0.0003 +[2026-03-01 02:16:42] (step=0018609) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.6409704558794758, LR: 0.0003 +[2026-03-01 02:16:50] (step=0018610) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 3.6411661123067893, LR: 0.0003 +[2026-03-01 02:16:58] (step=0018611) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.641361768734103, LR: 0.0003 +[2026-03-01 02:17:06] (step=0018612) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.6415574251614165, LR: 0.0003 +[2026-03-01 02:17:14] (step=0018613) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.64175308158873, LR: 0.0003 +[2026-03-01 02:17:22] (step=0018614) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.641948738016044, LR: 0.0003 +[2026-03-01 02:17:30] (step=0018615) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 3.6421443944433576, LR: 0.0003 +[2026-03-01 02:17:37] (step=0018616) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 3.642340050870671, LR: 0.0003 +[2026-03-01 02:17:45] (step=0018617) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.6425357072979847, LR: 0.0003 +[2026-03-01 02:17:53] (step=0018618) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.6427313637252983, LR: 0.0003 +[2026-03-01 02:18:01] (step=0018619) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.642927020152612, LR: 0.0003 +[2026-03-01 02:18:09] (step=0018620) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.643122676579926, LR: 0.0003 +[2026-03-01 02:18:17] (step=0018621) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.6433183330072394, LR: 0.0003 +[2026-03-01 02:18:24] (step=0018622) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.643513989434553, LR: 0.0003 +[2026-03-01 02:18:32] (step=0018623) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 3.6437096458618665, LR: 0.0003 +[2026-03-01 02:18:40] (step=0018624) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.64390530228918, LR: 0.0003 +[2026-03-01 02:18:48] (step=0018625) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 3.6441009587164936, LR: 0.0003 +[2026-03-01 02:18:56] (step=0018626) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 3.6442966151438076, LR: 0.0003 +[2026-03-01 02:19:04] (step=0018627) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.644492271571121, LR: 0.0003 +[2026-03-01 02:19:12] (step=0018628) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 3.6446879279984348, LR: 0.0003 +[2026-03-01 02:19:19] (step=0018629) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.6448835844257483, LR: 0.0003 +[2026-03-01 02:19:27] (step=0018630) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.645079240853062, LR: 0.0003 +[2026-03-01 02:19:35] (step=0018631) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.6452748972803755, LR: 0.0003 +[2026-03-01 02:19:43] (step=0018632) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.6454705537076895, LR: 0.0003 +[2026-03-01 02:19:51] (step=0018633) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.645666210135003, LR: 0.0003 +[2026-03-01 02:19:59] (step=0018634) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.6458618665623166, LR: 0.0003 +[2026-03-01 02:20:06] (step=0018635) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.64605752298963, LR: 0.0003 +[2026-03-01 02:20:14] (step=0018636) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.6462531794169437, LR: 0.0003 +[2026-03-01 02:20:22] (step=0018637) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.6464488358442573, LR: 0.0003 +[2026-03-01 02:20:30] (step=0018638) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 3.6466444922715713, LR: 0.0003 +[2026-03-01 02:20:38] (step=0018639) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.646840148698885, LR: 0.0003 +[2026-03-01 02:20:46] (step=0018640) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 3.6470358051261984, LR: 0.0003 +[2026-03-01 02:20:53] (step=0018641) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 3.647231461553512, LR: 0.0003 +[2026-03-01 02:21:01] (step=0018642) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 3.6474271179808255, LR: 0.0003 +[2026-03-01 02:21:09] (step=0018643) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 3.647622774408139, LR: 0.0003 +[2026-03-01 02:21:17] (step=0018644) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.647818430835453, LR: 0.0003 +[2026-03-01 02:21:25] (step=0018645) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.6480140872627667, LR: 0.0003 +[2026-03-01 02:21:33] (step=0018646) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.64820974369008, LR: 0.0003 +[2026-03-01 02:21:41] (step=0018647) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.648405400117394, LR: 0.0003 +[2026-03-01 02:21:48] (step=0018648) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 3.6486010565447073, LR: 0.0003 +[2026-03-01 02:21:56] (step=0018649) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.6487967129720213, LR: 0.0003 +[2026-03-01 02:22:04] (step=0018650) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.648992369399335, LR: 0.0003 +[2026-03-01 02:22:12] (step=0018651) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.6491880258266485, LR: 0.0003 +[2026-03-01 02:22:20] (step=0018652) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.649383682253962, LR: 0.0003 +[2026-03-01 02:22:28] (step=0018653) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.6495793386812756, LR: 0.0003 +[2026-03-01 02:22:36] (step=0018654) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.649774995108589, LR: 0.0003 +[2026-03-01 02:22:43] (step=0018655) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.649970651535903, LR: 0.0003 +[2026-03-01 02:22:51] (step=0018656) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 3.6501663079632167, LR: 0.0003 +[2026-03-01 02:22:59] (step=0018657) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.6503619643905303, LR: 0.0003 +[2026-03-01 02:23:07] (step=0018658) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.650557620817844, LR: 0.0003 +[2026-03-01 02:23:15] (step=0018659) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.6507532772451574, LR: 0.0003 +[2026-03-01 02:23:23] (step=0018660) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 3.650948933672471, LR: 0.0003 +[2026-03-01 02:23:30] (step=0018661) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.651144590099785, LR: 0.0003 +[2026-03-01 02:23:38] (step=0018662) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.6513402465270985, LR: 0.0003 +[2026-03-01 02:23:46] (step=0018663) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.651535902954412, LR: 0.0003 +[2026-03-01 02:23:54] (step=0018664) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.6517315593817257, LR: 0.0003 +[2026-03-01 02:24:02] (step=0018665) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.6519272158090392, LR: 0.0003 +[2026-03-01 02:24:10] (step=0018666) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.652122872236353, LR: 0.0003 +[2026-03-01 02:24:17] (step=0018667) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.652318528663667, LR: 0.0003 +[2026-03-01 02:24:25] (step=0018668) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.6525141850909804, LR: 0.0003 +[2026-03-01 02:24:33] (step=0018669) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 3.652709841518294, LR: 0.0003 +[2026-03-01 02:24:41] (step=0018670) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.6529054979456075, LR: 0.0003 +[2026-03-01 02:24:49] (step=0018671) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 3.653101154372921, LR: 0.0003 +[2026-03-01 02:24:57] (step=0018672) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 3.6532968108002346, LR: 0.0003 +[2026-03-01 02:25:05] (step=0018673) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.6534924672275486, LR: 0.0003 +[2026-03-01 02:25:12] (step=0018674) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.653688123654862, LR: 0.0003 +[2026-03-01 02:25:20] (step=0018675) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.6538837800821757, LR: 0.0003 +[2026-03-01 02:25:28] (step=0018676) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.6540794365094893, LR: 0.0003 +[2026-03-01 02:25:36] (step=0018677) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.654275092936803, LR: 0.0003 +[2026-03-01 02:25:44] (step=0018678) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.6544707493641164, LR: 0.0003 +[2026-03-01 02:25:52] (step=0018679) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.6546664057914304, LR: 0.0003 +[2026-03-01 02:26:00] (step=0018680) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.654862062218744, LR: 0.0003 +[2026-03-01 02:26:07] (step=0018681) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.6550577186460576, LR: 0.0003 +[2026-03-01 02:26:15] (step=0018682) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.655253375073371, LR: 0.0003 +[2026-03-01 02:26:23] (step=0018683) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 3.6554490315006847, LR: 0.0003 +[2026-03-01 02:26:31] (step=0018684) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.6556446879279982, LR: 0.0003 +[2026-03-01 02:26:39] (step=0018685) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.6558403443553122, LR: 0.0003 +[2026-03-01 02:26:47] (step=0018686) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.656036000782626, LR: 0.0003 +[2026-03-01 02:26:54] (step=0018687) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.6562316572099394, LR: 0.0003 +[2026-03-01 02:27:02] (step=0018688) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.656427313637253, LR: 0.0003 +[2026-03-01 02:27:10] (step=0018689) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.6566229700645665, LR: 0.0003 +[2026-03-01 02:27:18] (step=0018690) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.65681862649188, LR: 0.0003 +[2026-03-01 02:27:26] (step=0018691) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.657014282919194, LR: 0.0003 +[2026-03-01 02:27:34] (step=0018692) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.6572099393465076, LR: 0.0003 +[2026-03-01 02:27:42] (step=0018693) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.657405595773821, LR: 0.0003 +[2026-03-01 02:27:49] (step=0018694) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.6576012522011347, LR: 0.0003 +[2026-03-01 02:27:57] (step=0018695) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 3.6577969086284483, LR: 0.0003 +[2026-03-01 02:28:05] (step=0018696) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.657992565055762, LR: 0.0003 +[2026-03-01 02:28:13] (step=0018697) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.658188221483076, LR: 0.0003 +[2026-03-01 02:28:21] (step=0018698) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.6583838779103894, LR: 0.0003 +[2026-03-01 02:28:29] (step=0018699) Train Loss: 0.4541, Train Steps/Sec: 0.12, Epoch: 3.658579534337703, LR: 0.0003 +[2026-03-01 02:28:37] (step=0018700) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 3.6587751907650166, LR: 0.0003 +[2026-03-01 02:28:44] (step=0018701) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.65897084719233, LR: 0.0003 +[2026-03-01 02:28:52] (step=0018702) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.6591665036196437, LR: 0.0003 +[2026-03-01 02:29:00] (step=0018703) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.6593621600469577, LR: 0.0003 +[2026-03-01 02:29:08] (step=0018704) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 3.6595578164742713, LR: 0.0003 +[2026-03-01 02:29:16] (step=0018705) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.659753472901585, LR: 0.0003 +[2026-03-01 02:29:24] (step=0018706) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.6599491293288984, LR: 0.0003 +[2026-03-01 02:29:31] (step=0018707) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.660144785756212, LR: 0.0003 +[2026-03-01 02:29:39] (step=0018708) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.660340442183526, LR: 0.0003 +[2026-03-01 02:29:47] (step=0018709) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.6605360986108395, LR: 0.0003 +[2026-03-01 02:29:55] (step=0018710) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.660731755038153, LR: 0.0003 +[2026-03-01 02:30:03] (step=0018711) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.6609274114654666, LR: 0.0003 +[2026-03-01 02:30:11] (step=0018712) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.66112306789278, LR: 0.0003 +[2026-03-01 02:30:18] (step=0018713) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.6613187243200938, LR: 0.0003 +[2026-03-01 02:30:26] (step=0018714) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.6615143807474078, LR: 0.0003 +[2026-03-01 02:30:34] (step=0018715) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 3.6617100371747213, LR: 0.0003 +[2026-03-01 02:30:42] (step=0018716) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.661905693602035, LR: 0.0003 +[2026-03-01 02:30:50] (step=0018717) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.6621013500293484, LR: 0.0003 +[2026-03-01 02:30:58] (step=0018718) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.662297006456662, LR: 0.0003 +[2026-03-01 02:31:06] (step=0018719) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.6624926628839756, LR: 0.0003 +[2026-03-01 02:31:14] (step=0018720) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 3.6626883193112896, LR: 0.0003 +[2026-03-01 02:31:21] (step=0018721) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 3.662883975738603, LR: 0.0003 +[2026-03-01 02:31:29] (step=0018722) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.6630796321659167, LR: 0.0003 +[2026-03-01 02:31:37] (step=0018723) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.6632752885932303, LR: 0.0003 +[2026-03-01 02:31:45] (step=0018724) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.663470945020544, LR: 0.0003 +[2026-03-01 02:31:53] (step=0018725) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.6636666014478574, LR: 0.0003 +[2026-03-01 02:32:01] (step=0018726) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.6638622578751714, LR: 0.0003 +[2026-03-01 02:32:08] (step=0018727) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.664057914302485, LR: 0.0003 +[2026-03-01 02:32:16] (step=0018728) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.6642535707297985, LR: 0.0003 +[2026-03-01 02:32:24] (step=0018729) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.664449227157112, LR: 0.0003 +[2026-03-01 02:32:32] (step=0018730) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.6646448835844256, LR: 0.0003 +[2026-03-01 02:32:40] (step=0018731) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.664840540011739, LR: 0.0003 +[2026-03-01 02:32:48] (step=0018732) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.665036196439053, LR: 0.0003 +[2026-03-01 02:32:55] (step=0018733) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.6652318528663668, LR: 0.0003 +[2026-03-01 02:33:03] (step=0018734) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.6654275092936803, LR: 0.0003 +[2026-03-01 02:33:11] (step=0018735) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.665623165720994, LR: 0.0003 +[2026-03-01 02:33:19] (step=0018736) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.6658188221483075, LR: 0.0003 +[2026-03-01 02:33:27] (step=0018737) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.666014478575621, LR: 0.0003 +[2026-03-01 02:33:35] (step=0018738) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 3.666210135002935, LR: 0.0003 +[2026-03-01 02:33:42] (step=0018739) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.6664057914302486, LR: 0.0003 +[2026-03-01 02:33:50] (step=0018740) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.666601447857562, LR: 0.0003 +[2026-03-01 02:33:58] (step=0018741) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.6667971042848757, LR: 0.0003 +[2026-03-01 02:34:06] (step=0018742) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 3.6669927607121893, LR: 0.0003 +[2026-03-01 02:34:14] (step=0018743) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.667188417139503, LR: 0.0003 +[2026-03-01 02:34:22] (step=0018744) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 3.667384073566817, LR: 0.0003 +[2026-03-01 02:34:29] (step=0018745) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 3.6675797299941304, LR: 0.0003 +[2026-03-01 02:34:37] (step=0018746) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.667775386421444, LR: 0.0003 +[2026-03-01 02:34:45] (step=0018747) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.6679710428487575, LR: 0.0003 +[2026-03-01 02:34:53] (step=0018748) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.668166699276071, LR: 0.0003 +[2026-03-01 02:35:01] (step=0018749) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 3.6683623557033846, LR: 0.0003 +[2026-03-01 02:35:09] (step=0018750) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.6685580121306987, LR: 0.0003 +[2026-03-01 02:35:17] (step=0018751) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 3.668753668558012, LR: 0.0003 +[2026-03-01 02:35:24] (step=0018752) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.6689493249853258, LR: 0.0003 +[2026-03-01 02:35:32] (step=0018753) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.6691449814126393, LR: 0.0003 +[2026-03-01 02:35:40] (step=0018754) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.669340637839953, LR: 0.0003 +[2026-03-01 02:35:48] (step=0018755) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 3.6695362942672665, LR: 0.0003 +[2026-03-01 02:35:56] (step=0018756) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.6697319506945805, LR: 0.0003 +[2026-03-01 02:36:04] (step=0018757) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.669927607121894, LR: 0.0003 +[2026-03-01 02:36:11] (step=0018758) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.6701232635492076, LR: 0.0003 +[2026-03-01 02:36:19] (step=0018759) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.670318919976521, LR: 0.0003 +[2026-03-01 02:36:27] (step=0018760) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.6705145764038347, LR: 0.0003 +[2026-03-01 02:36:35] (step=0018761) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.6707102328311487, LR: 0.0003 +[2026-03-01 02:36:43] (step=0018762) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 3.6709058892584623, LR: 0.0003 +[2026-03-01 02:36:51] (step=0018763) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.671101545685776, LR: 0.0003 +[2026-03-01 02:36:58] (step=0018764) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.6712972021130894, LR: 0.0003 +[2026-03-01 02:37:06] (step=0018765) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 3.671492858540403, LR: 0.0003 +[2026-03-01 02:37:14] (step=0018766) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.6716885149677165, LR: 0.0003 +[2026-03-01 02:37:22] (step=0018767) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.6718841713950305, LR: 0.0003 +[2026-03-01 02:37:30] (step=0018768) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.672079827822344, LR: 0.0003 +[2026-03-01 02:37:38] (step=0018769) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.6722754842496577, LR: 0.0003 +[2026-03-01 02:37:46] (step=0018770) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.6724711406769712, LR: 0.0003 +[2026-03-01 02:37:53] (step=0018771) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 3.672666797104285, LR: 0.0003 +[2026-03-01 02:38:01] (step=0018772) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.6728624535315983, LR: 0.0003 +[2026-03-01 02:38:09] (step=0018773) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.6730581099589124, LR: 0.0003 +[2026-03-01 02:38:17] (step=0018774) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.673253766386226, LR: 0.0003 +[2026-03-01 02:38:25] (step=0018775) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.6734494228135395, LR: 0.0003 +[2026-03-01 02:38:33] (step=0018776) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.673645079240853, LR: 0.0003 +[2026-03-01 02:38:40] (step=0018777) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.6738407356681666, LR: 0.0003 +[2026-03-01 02:38:48] (step=0018778) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.67403639209548, LR: 0.0003 +[2026-03-01 02:38:56] (step=0018779) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.674232048522794, LR: 0.0003 +[2026-03-01 02:39:04] (step=0018780) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 3.6744277049501077, LR: 0.0003 +[2026-03-01 02:39:12] (step=0018781) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.6746233613774213, LR: 0.0003 +[2026-03-01 02:39:20] (step=0018782) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 3.674819017804735, LR: 0.0003 +[2026-03-01 02:39:27] (step=0018783) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 3.6750146742320484, LR: 0.0003 +[2026-03-01 02:39:35] (step=0018784) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.675210330659362, LR: 0.0003 +[2026-03-01 02:39:43] (step=0018785) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 3.675405987086676, LR: 0.0003 +[2026-03-01 02:39:51] (step=0018786) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.6756016435139895, LR: 0.0003 +[2026-03-01 02:39:59] (step=0018787) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.675797299941303, LR: 0.0003 +[2026-03-01 02:40:07] (step=0018788) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.6759929563686167, LR: 0.0003 +[2026-03-01 02:40:14] (step=0018789) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.6761886127959302, LR: 0.0003 +[2026-03-01 02:40:22] (step=0018790) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.676384269223244, LR: 0.0003 +[2026-03-01 02:40:30] (step=0018791) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 3.676579925650558, LR: 0.0003 +[2026-03-01 02:40:38] (step=0018792) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.6767755820778714, LR: 0.0003 +[2026-03-01 02:40:46] (step=0018793) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.676971238505185, LR: 0.0003 +[2026-03-01 02:40:54] (step=0018794) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.6771668949324985, LR: 0.0003 +[2026-03-01 02:41:02] (step=0018795) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.677362551359812, LR: 0.0003 +[2026-03-01 02:41:09] (step=0018796) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 3.6775582077871256, LR: 0.0003 +[2026-03-01 02:41:17] (step=0018797) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 3.6777538642144396, LR: 0.0003 +[2026-03-01 02:41:25] (step=0018798) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 3.677949520641753, LR: 0.0003 +[2026-03-01 02:41:33] (step=0018799) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.6781451770690667, LR: 0.0003 +[2026-03-01 02:41:41] (step=0018800) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.6783408334963803, LR: 0.0003 +[2026-03-01 02:41:49] (step=0018801) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.678536489923694, LR: 0.0003 +[2026-03-01 02:41:56] (step=0018802) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.6787321463510074, LR: 0.0003 +[2026-03-01 02:42:04] (step=0018803) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 3.6789278027783214, LR: 0.0003 +[2026-03-01 02:42:12] (step=0018804) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.679123459205635, LR: 0.0003 +[2026-03-01 02:42:20] (step=0018805) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.6793191156329486, LR: 0.0003 +[2026-03-01 02:42:28] (step=0018806) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.679514772060262, LR: 0.0003 +[2026-03-01 02:42:36] (step=0018807) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.6797104284875757, LR: 0.0003 +[2026-03-01 02:42:43] (step=0018808) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.6799060849148892, LR: 0.0003 +[2026-03-01 02:42:51] (step=0018809) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.6801017413422032, LR: 0.0003 +[2026-03-01 02:42:59] (step=0018810) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.680297397769517, LR: 0.0003 +[2026-03-01 02:43:07] (step=0018811) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.6804930541968304, LR: 0.0003 +[2026-03-01 02:43:15] (step=0018812) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.680688710624144, LR: 0.0003 +[2026-03-01 02:43:23] (step=0018813) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 3.6808843670514575, LR: 0.0003 +[2026-03-01 02:43:30] (step=0018814) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.681080023478771, LR: 0.0003 +[2026-03-01 02:43:38] (step=0018815) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 3.681275679906085, LR: 0.0003 +[2026-03-01 02:43:46] (step=0018816) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.6814713363333986, LR: 0.0003 +[2026-03-01 02:43:54] (step=0018817) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.681666992760712, LR: 0.0003 +[2026-03-01 02:44:02] (step=0018818) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.6818626491880257, LR: 0.0003 +[2026-03-01 02:44:10] (step=0018819) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.6820583056153393, LR: 0.0003 +[2026-03-01 02:44:18] (step=0018820) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.6822539620426533, LR: 0.0003 +[2026-03-01 02:44:26] (step=0018821) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.682449618469967, LR: 0.0003 +[2026-03-01 02:44:33] (step=0018822) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.6826452748972804, LR: 0.0003 +[2026-03-01 02:44:41] (step=0018823) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.682840931324594, LR: 0.0003 +[2026-03-01 02:44:49] (step=0018824) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.6830365877519076, LR: 0.0003 +[2026-03-01 02:44:57] (step=0018825) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.683232244179221, LR: 0.0003 +[2026-03-01 02:45:05] (step=0018826) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.683427900606535, LR: 0.0003 +[2026-03-01 02:45:13] (step=0018827) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.6836235570338487, LR: 0.0003 +[2026-03-01 02:45:20] (step=0018828) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.6838192134611623, LR: 0.0003 +[2026-03-01 02:45:28] (step=0018829) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.684014869888476, LR: 0.0003 +[2026-03-01 02:45:36] (step=0018830) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.6842105263157894, LR: 0.0003 +[2026-03-01 02:45:44] (step=0018831) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.684406182743103, LR: 0.0003 +[2026-03-01 02:45:52] (step=0018832) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.684601839170417, LR: 0.0003 +[2026-03-01 02:46:00] (step=0018833) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.6847974955977305, LR: 0.0003 +[2026-03-01 02:46:07] (step=0018834) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.684993152025044, LR: 0.0003 +[2026-03-01 02:46:15] (step=0018835) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.6851888084523576, LR: 0.0003 +[2026-03-01 02:46:23] (step=0018836) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.685384464879671, LR: 0.0003 +[2026-03-01 02:46:31] (step=0018837) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.6855801213069848, LR: 0.0003 +[2026-03-01 02:46:39] (step=0018838) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.6857757777342988, LR: 0.0003 +[2026-03-01 02:46:47] (step=0018839) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.6859714341616123, LR: 0.0003 +[2026-03-01 02:46:54] (step=0018840) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.686167090588926, LR: 0.0003 +[2026-03-01 02:47:02] (step=0018841) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.6863627470162394, LR: 0.0003 +[2026-03-01 02:47:10] (step=0018842) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.686558403443553, LR: 0.0003 +[2026-03-01 02:47:18] (step=0018843) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.6867540598708666, LR: 0.0003 +[2026-03-01 02:47:26] (step=0018844) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.6869497162981806, LR: 0.0003 +[2026-03-01 02:47:34] (step=0018845) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.687145372725494, LR: 0.0003 +[2026-03-01 02:47:42] (step=0018846) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.6873410291528077, LR: 0.0003 +[2026-03-01 02:47:49] (step=0018847) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.6875366855801213, LR: 0.0003 +[2026-03-01 02:47:57] (step=0018848) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 3.687732342007435, LR: 0.0003 +[2026-03-01 02:48:05] (step=0018849) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.6879279984347484, LR: 0.0003 +[2026-03-01 02:48:13] (step=0018850) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.6881236548620624, LR: 0.0003 +[2026-03-01 02:48:21] (step=0018851) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.688319311289376, LR: 0.0003 +[2026-03-01 02:48:29] (step=0018852) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 3.6885149677166895, LR: 0.0003 +[2026-03-01 02:48:36] (step=0018853) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.688710624144003, LR: 0.0003 +[2026-03-01 02:48:44] (step=0018854) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.6889062805713166, LR: 0.0003 +[2026-03-01 02:48:52] (step=0018855) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 3.68910193699863, LR: 0.0003 +[2026-03-01 02:49:00] (step=0018856) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 3.689297593425944, LR: 0.0003 +[2026-03-01 02:49:08] (step=0018857) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 3.6894932498532578, LR: 0.0003 +[2026-03-01 02:49:16] (step=0018858) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.6896889062805713, LR: 0.0003 +[2026-03-01 02:49:23] (step=0018859) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 3.689884562707885, LR: 0.0003 +[2026-03-01 02:49:31] (step=0018860) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.6900802191351985, LR: 0.0003 +[2026-03-01 02:49:39] (step=0018861) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 3.690275875562512, LR: 0.0003 +[2026-03-01 02:49:47] (step=0018862) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.690471531989826, LR: 0.0003 +[2026-03-01 02:49:55] (step=0018863) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.6906671884171396, LR: 0.0003 +[2026-03-01 02:50:03] (step=0018864) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.690862844844453, LR: 0.0003 +[2026-03-01 02:50:10] (step=0018865) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.6910585012717667, LR: 0.0003 +[2026-03-01 02:50:18] (step=0018866) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.6912541576990803, LR: 0.0003 +[2026-03-01 02:50:26] (step=0018867) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 3.691449814126394, LR: 0.0003 +[2026-03-01 02:50:34] (step=0018868) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.691645470553708, LR: 0.0003 +[2026-03-01 02:50:42] (step=0018869) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.6918411269810214, LR: 0.0003 +[2026-03-01 02:50:50] (step=0018870) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.692036783408335, LR: 0.0003 +[2026-03-01 02:50:58] (step=0018871) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.6922324398356485, LR: 0.0003 +[2026-03-01 02:51:05] (step=0018872) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.692428096262962, LR: 0.0003 +[2026-03-01 02:51:13] (step=0018873) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.692623752690276, LR: 0.0003 +[2026-03-01 02:51:21] (step=0018874) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 3.6928194091175897, LR: 0.0003 +[2026-03-01 02:51:29] (step=0018875) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.693015065544903, LR: 0.0003 +[2026-03-01 02:51:37] (step=0018876) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.693210721972217, LR: 0.0003 +[2026-03-01 02:51:45] (step=0018877) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.6934063783995303, LR: 0.0003 +[2026-03-01 02:51:52] (step=0018878) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.693602034826844, LR: 0.0003 +[2026-03-01 02:52:00] (step=0018879) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.693797691254158, LR: 0.0003 +[2026-03-01 02:52:08] (step=0018880) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.6939933476814715, LR: 0.0003 +[2026-03-01 02:52:16] (step=0018881) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.694189004108785, LR: 0.0003 +[2026-03-01 02:52:24] (step=0018882) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.6943846605360986, LR: 0.0003 +[2026-03-01 02:52:32] (step=0018883) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.694580316963412, LR: 0.0003 +[2026-03-01 02:52:39] (step=0018884) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 3.6947759733907257, LR: 0.0003 +[2026-03-01 02:52:47] (step=0018885) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.6949716298180397, LR: 0.0003 +[2026-03-01 02:52:55] (step=0018886) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.6951672862453533, LR: 0.0003 +[2026-03-01 02:53:03] (step=0018887) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.695362942672667, LR: 0.0003 +[2026-03-01 02:53:11] (step=0018888) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.6955585990999804, LR: 0.0003 +[2026-03-01 02:53:19] (step=0018889) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 3.695754255527294, LR: 0.0003 +[2026-03-01 02:53:27] (step=0018890) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.6959499119546075, LR: 0.0003 +[2026-03-01 02:53:34] (step=0018891) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.6961455683819215, LR: 0.0003 +[2026-03-01 02:53:42] (step=0018892) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.696341224809235, LR: 0.0003 +[2026-03-01 02:53:50] (step=0018893) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.6965368812365487, LR: 0.0003 +[2026-03-01 02:53:58] (step=0018894) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 3.6967325376638622, LR: 0.0003 +[2026-03-01 02:54:06] (step=0018895) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 3.696928194091176, LR: 0.0003 +[2026-03-01 02:54:14] (step=0018896) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 3.6971238505184894, LR: 0.0003 +[2026-03-01 02:54:22] (step=0018897) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.6973195069458034, LR: 0.0003 +[2026-03-01 02:54:29] (step=0018898) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.697515163373117, LR: 0.0003 +[2026-03-01 02:54:37] (step=0018899) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 3.6977108198004305, LR: 0.0003 +[2026-03-01 02:54:45] (step=0018900) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.697906476227744, LR: 0.0003 +[2026-03-01 02:54:53] (step=0018901) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 3.6981021326550576, LR: 0.0003 +[2026-03-01 02:55:01] (step=0018902) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 3.698297789082371, LR: 0.0003 +[2026-03-01 02:55:09] (step=0018903) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 3.698493445509685, LR: 0.0003 +[2026-03-01 02:55:16] (step=0018904) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.6986891019369987, LR: 0.0003 +[2026-03-01 02:55:24] (step=0018905) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.6988847583643123, LR: 0.0003 +[2026-03-01 02:55:32] (step=0018906) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.699080414791626, LR: 0.0003 +[2026-03-01 02:55:40] (step=0018907) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 3.6992760712189394, LR: 0.0003 +[2026-03-01 02:55:48] (step=0018908) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 3.699471727646253, LR: 0.0003 +[2026-03-01 02:55:56] (step=0018909) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 3.699667384073567, LR: 0.0003 +[2026-03-01 02:56:04] (step=0018910) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.6998630405008806, LR: 0.0003 +[2026-03-01 02:56:11] (step=0018911) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.700058696928194, LR: 0.0003 +[2026-03-01 02:56:19] (step=0018912) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 3.7002543533555077, LR: 0.0003 +[2026-03-01 02:56:27] (step=0018913) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.7004500097828212, LR: 0.0003 +[2026-03-01 02:56:35] (step=0018914) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.700645666210135, LR: 0.0003 +[2026-03-01 02:56:43] (step=0018915) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.700841322637449, LR: 0.0003 +[2026-03-01 02:56:51] (step=0018916) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 3.7010369790647624, LR: 0.0003 +[2026-03-01 02:56:59] (step=0018917) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.701232635492076, LR: 0.0003 +[2026-03-01 02:57:06] (step=0018918) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.7014282919193895, LR: 0.0003 +[2026-03-01 02:57:14] (step=0018919) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.701623948346703, LR: 0.0003 +[2026-03-01 02:57:22] (step=0018920) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.7018196047740166, LR: 0.0003 +[2026-03-01 02:57:30] (step=0018921) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.7020152612013306, LR: 0.0003 +[2026-03-01 02:57:38] (step=0018922) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.702210917628644, LR: 0.0003 +[2026-03-01 02:57:46] (step=0018923) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 3.7024065740559577, LR: 0.0003 +[2026-03-01 02:57:53] (step=0018924) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.7026022304832713, LR: 0.0003 +[2026-03-01 02:58:01] (step=0018925) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 3.702797886910585, LR: 0.0003 +[2026-03-01 02:58:09] (step=0018926) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 3.7029935433378984, LR: 0.0003 +[2026-03-01 02:58:17] (step=0018927) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 3.7031891997652124, LR: 0.0003 +[2026-03-01 02:58:25] (step=0018928) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.703384856192526, LR: 0.0003 +[2026-03-01 02:58:33] (step=0018929) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.7035805126198396, LR: 0.0003 +[2026-03-01 02:58:40] (step=0018930) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.703776169047153, LR: 0.0003 +[2026-03-01 02:58:48] (step=0018931) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.7039718254744667, LR: 0.0003 +[2026-03-01 02:58:56] (step=0018932) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.7041674819017807, LR: 0.0003 +[2026-03-01 02:59:04] (step=0018933) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 3.7043631383290943, LR: 0.0003 +[2026-03-01 02:59:12] (step=0018934) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.704558794756408, LR: 0.0003 +[2026-03-01 02:59:20] (step=0018935) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 3.7047544511837214, LR: 0.0003 +[2026-03-01 02:59:27] (step=0018936) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 3.704950107611035, LR: 0.0003 +[2026-03-01 02:59:35] (step=0018937) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.7051457640383485, LR: 0.0003 +[2026-03-01 02:59:43] (step=0018938) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.7053414204656625, LR: 0.0003 +[2026-03-01 02:59:51] (step=0018939) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.705537076892976, LR: 0.0003 +[2026-03-01 02:59:59] (step=0018940) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.7057327333202896, LR: 0.0003 +[2026-03-01 03:00:07] (step=0018941) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.705928389747603, LR: 0.0003 +[2026-03-01 03:00:14] (step=0018942) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.7061240461749168, LR: 0.0003 +[2026-03-01 03:00:22] (step=0018943) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.7063197026022303, LR: 0.0003 +[2026-03-01 03:00:30] (step=0018944) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.7065153590295443, LR: 0.0003 +[2026-03-01 03:00:38] (step=0018945) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.706711015456858, LR: 0.0003 +[2026-03-01 03:00:46] (step=0018946) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.7069066718841714, LR: 0.0003 +[2026-03-01 03:00:54] (step=0018947) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.707102328311485, LR: 0.0003 +[2026-03-01 03:01:01] (step=0018948) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.7072979847387986, LR: 0.0003 +[2026-03-01 03:01:09] (step=0018949) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.707493641166112, LR: 0.0003 +[2026-03-01 03:01:17] (step=0018950) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.707689297593426, LR: 0.0003 +[2026-03-01 03:01:25] (step=0018951) Train Loss: 0.4579, Train Steps/Sec: 0.12, Epoch: 3.7078849540207397, LR: 0.0003 +[2026-03-01 03:01:33] (step=0018952) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.7080806104480533, LR: 0.0003 +[2026-03-01 03:01:41] (step=0018953) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.708276266875367, LR: 0.0003 +[2026-03-01 03:01:49] (step=0018954) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.7084719233026804, LR: 0.0003 +[2026-03-01 03:01:57] (step=0018955) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.708667579729994, LR: 0.0003 +[2026-03-01 03:02:05] (step=0018956) Train Loss: 0.4538, Train Steps/Sec: 0.12, Epoch: 3.708863236157308, LR: 0.0003 +[2026-03-01 03:02:12] (step=0018957) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.7090588925846215, LR: 0.0003 +[2026-03-01 03:02:20] (step=0018958) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 3.709254549011935, LR: 0.0003 +[2026-03-01 03:02:28] (step=0018959) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 3.7094502054392486, LR: 0.0003 +[2026-03-01 03:02:36] (step=0018960) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.709645861866562, LR: 0.0003 +[2026-03-01 03:02:44] (step=0018961) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.7098415182938758, LR: 0.0003 +[2026-03-01 03:02:52] (step=0018962) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.7100371747211898, LR: 0.0003 +[2026-03-01 03:02:59] (step=0018963) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.7102328311485033, LR: 0.0003 +[2026-03-01 03:03:07] (step=0018964) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 3.710428487575817, LR: 0.0003 +[2026-03-01 03:03:15] (step=0018965) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.7106241440031305, LR: 0.0003 +[2026-03-01 03:03:23] (step=0018966) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.710819800430444, LR: 0.0003 +[2026-03-01 03:03:31] (step=0018967) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 3.7110154568577576, LR: 0.0003 +[2026-03-01 03:03:39] (step=0018968) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 3.7112111132850716, LR: 0.0003 +[2026-03-01 03:03:46] (step=0018969) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.711406769712385, LR: 0.0003 +[2026-03-01 03:03:54] (step=0018970) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 3.7116024261396987, LR: 0.0003 +[2026-03-01 03:04:02] (step=0018971) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 3.7117980825670123, LR: 0.0003 +[2026-03-01 03:04:10] (step=0018972) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.711993738994326, LR: 0.0003 +[2026-03-01 03:04:18] (step=0018973) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.7121893954216394, LR: 0.0003 +[2026-03-01 03:04:26] (step=0018974) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.7123850518489534, LR: 0.0003 +[2026-03-01 03:04:33] (step=0018975) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 3.712580708276267, LR: 0.0003 +[2026-03-01 03:04:41] (step=0018976) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.7127763647035805, LR: 0.0003 +[2026-03-01 03:04:49] (step=0018977) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 3.712972021130894, LR: 0.0003 +[2026-03-01 03:04:57] (step=0018978) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.7131676775582076, LR: 0.0003 +[2026-03-01 03:05:05] (step=0018979) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.713363333985521, LR: 0.0003 +[2026-03-01 03:05:13] (step=0018980) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.713558990412835, LR: 0.0003 +[2026-03-01 03:05:20] (step=0018981) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.7137546468401488, LR: 0.0003 +[2026-03-01 03:05:28] (step=0018982) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.7139503032674623, LR: 0.0003 +[2026-03-01 03:05:36] (step=0018983) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.714145959694776, LR: 0.0003 +[2026-03-01 03:05:44] (step=0018984) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.7143416161220895, LR: 0.0003 +[2026-03-01 03:05:52] (step=0018985) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.7145372725494035, LR: 0.0003 +[2026-03-01 03:06:00] (step=0018986) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.714732928976717, LR: 0.0003 +[2026-03-01 03:06:08] (step=0018987) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 3.7149285854040306, LR: 0.0003 +[2026-03-01 03:06:15] (step=0018988) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.715124241831344, LR: 0.0003 +[2026-03-01 03:06:23] (step=0018989) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.7153198982586577, LR: 0.0003 +[2026-03-01 03:06:31] (step=0018990) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.7155155546859713, LR: 0.0003 +[2026-03-01 03:06:39] (step=0018991) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.7157112111132853, LR: 0.0003 +[2026-03-01 03:06:47] (step=0018992) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 3.715906867540599, LR: 0.0003 +[2026-03-01 03:06:55] (step=0018993) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.7161025239679124, LR: 0.0003 +[2026-03-01 03:07:02] (step=0018994) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.716298180395226, LR: 0.0003 +[2026-03-01 03:07:10] (step=0018995) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.7164938368225395, LR: 0.0003 +[2026-03-01 03:07:18] (step=0018996) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.716689493249853, LR: 0.0003 +[2026-03-01 03:07:26] (step=0018997) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 3.716885149677167, LR: 0.0003 +[2026-03-01 03:07:34] (step=0018998) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.7170808061044807, LR: 0.0003 +[2026-03-01 03:07:42] (step=0018999) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.7172764625317942, LR: 0.0003 +[2026-03-01 03:07:50] (step=0019000) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.717472118959108, LR: 0.0003 +[2026-03-01 03:07:50] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0019000/ +[2026-03-01 03:07:58] (step=0019001) Train Loss: 0.4419, Train Steps/Sec: 0.12, Epoch: 3.7176677753864213, LR: 0.0003 +[2026-03-01 03:08:05] (step=0019002) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 3.717863431813735, LR: 0.0003 +[2026-03-01 03:08:13] (step=0019003) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.718059088241049, LR: 0.0003 +[2026-03-01 03:08:21] (step=0019004) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.7182547446683625, LR: 0.0003 +[2026-03-01 03:08:29] (step=0019005) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.718450401095676, LR: 0.0003 +[2026-03-01 03:08:37] (step=0019006) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 3.7186460575229896, LR: 0.0003 +[2026-03-01 03:08:45] (step=0019007) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 3.718841713950303, LR: 0.0003 +[2026-03-01 03:08:52] (step=0019008) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 3.7190373703776167, LR: 0.0003 +[2026-03-01 03:09:00] (step=0019009) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 3.7192330268049307, LR: 0.0003 +[2026-03-01 03:09:08] (step=0019010) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.7194286832322443, LR: 0.0003 +[2026-03-01 03:09:16] (step=0019011) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.719624339659558, LR: 0.0003 +[2026-03-01 03:09:24] (step=0019012) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 3.7198199960868714, LR: 0.0003 +[2026-03-01 03:09:32] (step=0019013) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.720015652514185, LR: 0.0003 +[2026-03-01 03:09:39] (step=0019014) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.7202113089414985, LR: 0.0003 +[2026-03-01 03:09:47] (step=0019015) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.7204069653688125, LR: 0.0003 +[2026-03-01 03:09:55] (step=0019016) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 3.720602621796126, LR: 0.0003 +[2026-03-01 03:10:03] (step=0019017) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.7207982782234397, LR: 0.0003 +[2026-03-01 03:10:11] (step=0019018) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.7209939346507532, LR: 0.0003 +[2026-03-01 03:10:19] (step=0019019) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 3.721189591078067, LR: 0.0003 +[2026-03-01 03:10:26] (step=0019020) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.7213852475053804, LR: 0.0003 +[2026-03-01 03:10:34] (step=0019021) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 3.7215809039326944, LR: 0.0003 +[2026-03-01 03:10:42] (step=0019022) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.721776560360008, LR: 0.0003 +[2026-03-01 03:10:50] (step=0019023) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.7219722167873215, LR: 0.0003 +[2026-03-01 03:10:58] (step=0019024) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.722167873214635, LR: 0.0003 +[2026-03-01 03:11:06] (step=0019025) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 3.7223635296419486, LR: 0.0003 +[2026-03-01 03:11:13] (step=0019026) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 3.722559186069262, LR: 0.0003 +[2026-03-01 03:11:21] (step=0019027) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.722754842496576, LR: 0.0003 +[2026-03-01 03:11:29] (step=0019028) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.7229504989238897, LR: 0.0003 +[2026-03-01 03:11:37] (step=0019029) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.7231461553512033, LR: 0.0003 +[2026-03-01 03:11:45] (step=0019030) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.723341811778517, LR: 0.0003 +[2026-03-01 03:11:53] (step=0019031) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.7235374682058304, LR: 0.0003 +[2026-03-01 03:12:00] (step=0019032) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.723733124633144, LR: 0.0003 +[2026-03-01 03:12:08] (step=0019033) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.723928781060458, LR: 0.0003 +[2026-03-01 03:12:16] (step=0019034) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.7241244374877716, LR: 0.0003 +[2026-03-01 03:12:24] (step=0019035) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.724320093915085, LR: 0.0003 +[2026-03-01 03:12:32] (step=0019036) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.7245157503423987, LR: 0.0003 +[2026-03-01 03:12:40] (step=0019037) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 3.7247114067697122, LR: 0.0003 +[2026-03-01 03:12:47] (step=0019038) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 3.724907063197026, LR: 0.0003 +[2026-03-01 03:12:55] (step=0019039) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 3.72510271962434, LR: 0.0003 +[2026-03-01 03:13:03] (step=0019040) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.7252983760516534, LR: 0.0003 +[2026-03-01 03:13:11] (step=0019041) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.725494032478967, LR: 0.0003 +[2026-03-01 03:13:19] (step=0019042) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.7256896889062805, LR: 0.0003 +[2026-03-01 03:13:27] (step=0019043) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.725885345333594, LR: 0.0003 +[2026-03-01 03:13:35] (step=0019044) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.726081001760908, LR: 0.0003 +[2026-03-01 03:13:43] (step=0019045) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.7262766581882216, LR: 0.0003 +[2026-03-01 03:13:50] (step=0019046) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.726472314615535, LR: 0.0003 +[2026-03-01 03:13:58] (step=0019047) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.7266679710428487, LR: 0.0003 +[2026-03-01 03:14:06] (step=0019048) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.7268636274701623, LR: 0.0003 +[2026-03-01 03:14:14] (step=0019049) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.727059283897476, LR: 0.0003 +[2026-03-01 03:14:22] (step=0019050) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.72725494032479, LR: 0.0003 +[2026-03-01 03:14:30] (step=0019051) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.7274505967521034, LR: 0.0003 +[2026-03-01 03:14:37] (step=0019052) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.727646253179417, LR: 0.0003 +[2026-03-01 03:14:45] (step=0019053) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.7278419096067306, LR: 0.0003 +[2026-03-01 03:14:53] (step=0019054) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.728037566034044, LR: 0.0003 +[2026-03-01 03:15:01] (step=0019055) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 3.7282332224613577, LR: 0.0003 +[2026-03-01 03:15:09] (step=0019056) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.7284288788886717, LR: 0.0003 +[2026-03-01 03:15:17] (step=0019057) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 3.7286245353159853, LR: 0.0003 +[2026-03-01 03:15:25] (step=0019058) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.728820191743299, LR: 0.0003 +[2026-03-01 03:15:32] (step=0019059) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 3.7290158481706124, LR: 0.0003 +[2026-03-01 03:15:40] (step=0019060) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.729211504597926, LR: 0.0003 +[2026-03-01 03:15:48] (step=0019061) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.7294071610252395, LR: 0.0003 +[2026-03-01 03:15:56] (step=0019062) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.7296028174525535, LR: 0.0003 +[2026-03-01 03:16:04] (step=0019063) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.729798473879867, LR: 0.0003 +[2026-03-01 03:16:12] (step=0019064) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.7299941303071806, LR: 0.0003 +[2026-03-01 03:16:19] (step=0019065) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 3.730189786734494, LR: 0.0003 +[2026-03-01 03:16:27] (step=0019066) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.7303854431618078, LR: 0.0003 +[2026-03-01 03:16:35] (step=0019067) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.7305810995891213, LR: 0.0003 +[2026-03-01 03:16:43] (step=0019068) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.7307767560164353, LR: 0.0003 +[2026-03-01 03:16:51] (step=0019069) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.730972412443749, LR: 0.0003 +[2026-03-01 03:16:59] (step=0019070) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.7311680688710624, LR: 0.0003 +[2026-03-01 03:17:06] (step=0019071) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.731363725298376, LR: 0.0003 +[2026-03-01 03:17:14] (step=0019072) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.7315593817256896, LR: 0.0003 +[2026-03-01 03:17:22] (step=0019073) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.731755038153003, LR: 0.0003 +[2026-03-01 03:17:30] (step=0019074) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.731950694580317, LR: 0.0003 +[2026-03-01 03:17:38] (step=0019075) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 3.7321463510076307, LR: 0.0003 +[2026-03-01 03:17:46] (step=0019076) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 3.7323420074349443, LR: 0.0003 +[2026-03-01 03:17:54] (step=0019077) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.732537663862258, LR: 0.0003 +[2026-03-01 03:18:01] (step=0019078) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.7327333202895714, LR: 0.0003 +[2026-03-01 03:18:09] (step=0019079) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 3.732928976716885, LR: 0.0003 +[2026-03-01 03:18:17] (step=0019080) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.733124633144199, LR: 0.0003 +[2026-03-01 03:18:25] (step=0019081) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.7333202895715125, LR: 0.0003 +[2026-03-01 03:18:33] (step=0019082) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.733515945998826, LR: 0.0003 +[2026-03-01 03:18:41] (step=0019083) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.7337116024261396, LR: 0.0003 +[2026-03-01 03:18:48] (step=0019084) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.733907258853453, LR: 0.0003 +[2026-03-01 03:18:56] (step=0019085) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 3.7341029152807668, LR: 0.0003 +[2026-03-01 03:19:04] (step=0019086) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.7342985717080808, LR: 0.0003 +[2026-03-01 03:19:12] (step=0019087) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.7344942281353943, LR: 0.0003 +[2026-03-01 03:19:20] (step=0019088) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.734689884562708, LR: 0.0003 +[2026-03-01 03:19:28] (step=0019089) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.7348855409900215, LR: 0.0003 +[2026-03-01 03:19:36] (step=0019090) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.735081197417335, LR: 0.0003 +[2026-03-01 03:19:43] (step=0019091) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 3.7352768538446486, LR: 0.0003 +[2026-03-01 03:19:51] (step=0019092) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.7354725102719626, LR: 0.0003 +[2026-03-01 03:19:59] (step=0019093) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.735668166699276, LR: 0.0003 +[2026-03-01 03:20:07] (step=0019094) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 3.7358638231265897, LR: 0.0003 +[2026-03-01 03:20:15] (step=0019095) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.7360594795539033, LR: 0.0003 +[2026-03-01 03:20:23] (step=0019096) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.736255135981217, LR: 0.0003 +[2026-03-01 03:20:31] (step=0019097) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.736450792408531, LR: 0.0003 +[2026-03-01 03:20:38] (step=0019098) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 3.7366464488358444, LR: 0.0003 +[2026-03-01 03:20:46] (step=0019099) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.736842105263158, LR: 0.0003 +[2026-03-01 03:20:54] (step=0019100) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.7370377616904715, LR: 0.0003 +[2026-03-01 03:21:02] (step=0019101) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.737233418117785, LR: 0.0003 +[2026-03-01 03:21:10] (step=0019102) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.7374290745450987, LR: 0.0003 +[2026-03-01 03:21:18] (step=0019103) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.7376247309724127, LR: 0.0003 +[2026-03-01 03:21:25] (step=0019104) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.737820387399726, LR: 0.0003 +[2026-03-01 03:21:33] (step=0019105) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.73801604382704, LR: 0.0003 +[2026-03-01 03:21:41] (step=0019106) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 3.7382117002543533, LR: 0.0003 +[2026-03-01 03:21:49] (step=0019107) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.738407356681667, LR: 0.0003 +[2026-03-01 03:21:57] (step=0019108) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.7386030131089805, LR: 0.0003 +[2026-03-01 03:22:05] (step=0019109) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 3.7387986695362945, LR: 0.0003 +[2026-03-01 03:22:12] (step=0019110) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.738994325963608, LR: 0.0003 +[2026-03-01 03:22:20] (step=0019111) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 3.7391899823909216, LR: 0.0003 +[2026-03-01 03:22:28] (step=0019112) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.739385638818235, LR: 0.0003 +[2026-03-01 03:22:36] (step=0019113) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.7395812952455487, LR: 0.0003 +[2026-03-01 03:22:44] (step=0019114) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.7397769516728623, LR: 0.0003 +[2026-03-01 03:22:52] (step=0019115) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.7399726081001763, LR: 0.0003 +[2026-03-01 03:22:59] (step=0019116) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.74016826452749, LR: 0.0003 +[2026-03-01 03:23:07] (step=0019117) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.7403639209548034, LR: 0.0003 +[2026-03-01 03:23:15] (step=0019118) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.740559577382117, LR: 0.0003 +[2026-03-01 03:23:23] (step=0019119) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.7407552338094305, LR: 0.0003 +[2026-03-01 03:23:31] (step=0019120) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.740950890236744, LR: 0.0003 +[2026-03-01 03:23:39] (step=0019121) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.741146546664058, LR: 0.0003 +[2026-03-01 03:23:47] (step=0019122) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.7413422030913717, LR: 0.0003 +[2026-03-01 03:23:54] (step=0019123) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.7415378595186852, LR: 0.0003 +[2026-03-01 03:24:02] (step=0019124) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.741733515945999, LR: 0.0003 +[2026-03-01 03:24:10] (step=0019125) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.7419291723733124, LR: 0.0003 +[2026-03-01 03:24:18] (step=0019126) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 3.742124828800626, LR: 0.0003 +[2026-03-01 03:24:26] (step=0019127) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.74232048522794, LR: 0.0003 +[2026-03-01 03:24:34] (step=0019128) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.7425161416552535, LR: 0.0003 +[2026-03-01 03:24:41] (step=0019129) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 3.742711798082567, LR: 0.0003 +[2026-03-01 03:24:49] (step=0019130) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.7429074545098806, LR: 0.0003 +[2026-03-01 03:24:57] (step=0019131) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.743103110937194, LR: 0.0003 +[2026-03-01 03:25:05] (step=0019132) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 3.7432987673645077, LR: 0.0003 +[2026-03-01 03:25:13] (step=0019133) Train Loss: 0.4481, Train Steps/Sec: 0.12, Epoch: 3.7434944237918217, LR: 0.0003 +[2026-03-01 03:25:21] (step=0019134) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.7436900802191353, LR: 0.0003 +[2026-03-01 03:25:29] (step=0019135) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.743885736646449, LR: 0.0003 +[2026-03-01 03:25:37] (step=0019136) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 3.7440813930737624, LR: 0.0003 +[2026-03-01 03:25:44] (step=0019137) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.744277049501076, LR: 0.0003 +[2026-03-01 03:25:52] (step=0019138) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 3.7444727059283895, LR: 0.0003 +[2026-03-01 03:26:00] (step=0019139) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.7446683623557036, LR: 0.0003 +[2026-03-01 03:26:08] (step=0019140) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.744864018783017, LR: 0.0003 +[2026-03-01 03:26:16] (step=0019141) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 3.7450596752103307, LR: 0.0003 +[2026-03-01 03:26:24] (step=0019142) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.7452553316376442, LR: 0.0003 +[2026-03-01 03:26:32] (step=0019143) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 3.745450988064958, LR: 0.0003 +[2026-03-01 03:26:39] (step=0019144) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.7456466444922714, LR: 0.0003 +[2026-03-01 03:26:47] (step=0019145) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.7458423009195854, LR: 0.0003 +[2026-03-01 03:26:55] (step=0019146) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.746037957346899, LR: 0.0003 +[2026-03-01 03:27:03] (step=0019147) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.7462336137742125, LR: 0.0003 +[2026-03-01 03:27:11] (step=0019148) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.746429270201526, LR: 0.0003 +[2026-03-01 03:27:19] (step=0019149) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.7466249266288396, LR: 0.0003 +[2026-03-01 03:27:26] (step=0019150) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.746820583056153, LR: 0.0003 +[2026-03-01 03:27:34] (step=0019151) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.747016239483467, LR: 0.0003 +[2026-03-01 03:27:42] (step=0019152) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.7472118959107807, LR: 0.0003 +[2026-03-01 03:27:50] (step=0019153) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 3.7474075523380943, LR: 0.0003 +[2026-03-01 03:27:58] (step=0019154) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 3.747603208765408, LR: 0.0003 +[2026-03-01 03:28:06] (step=0019155) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.7477988651927214, LR: 0.0003 +[2026-03-01 03:28:13] (step=0019156) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.7479945216200354, LR: 0.0003 +[2026-03-01 03:28:21] (step=0019157) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.748190178047349, LR: 0.0003 +[2026-03-01 03:28:29] (step=0019158) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.7483858344746626, LR: 0.0003 +[2026-03-01 03:28:37] (step=0019159) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.748581490901976, LR: 0.0003 +[2026-03-01 03:28:45] (step=0019160) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.7487771473292897, LR: 0.0003 +[2026-03-01 03:28:53] (step=0019161) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 3.7489728037566032, LR: 0.0003 +[2026-03-01 03:29:01] (step=0019162) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.7491684601839173, LR: 0.0003 +[2026-03-01 03:29:08] (step=0019163) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.749364116611231, LR: 0.0003 +[2026-03-01 03:29:16] (step=0019164) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 3.7495597730385444, LR: 0.0003 +[2026-03-01 03:29:24] (step=0019165) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.749755429465858, LR: 0.0003 +[2026-03-01 03:29:32] (step=0019166) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.7499510858931715, LR: 0.0003 +[2026-03-01 03:29:40] (step=0019167) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.750146742320485, LR: 0.0003 +[2026-03-01 03:29:48] (step=0019168) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.750342398747799, LR: 0.0003 +[2026-03-01 03:29:55] (step=0019169) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.7505380551751126, LR: 0.0003 +[2026-03-01 03:30:03] (step=0019170) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.750733711602426, LR: 0.0003 +[2026-03-01 03:30:11] (step=0019171) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.7509293680297398, LR: 0.0003 +[2026-03-01 03:30:19] (step=0019172) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.7511250244570533, LR: 0.0003 +[2026-03-01 03:30:27] (step=0019173) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.751320680884367, LR: 0.0003 +[2026-03-01 03:30:35] (step=0019174) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.751516337311681, LR: 0.0003 +[2026-03-01 03:30:42] (step=0019175) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.7517119937389944, LR: 0.0003 +[2026-03-01 03:30:50] (step=0019176) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.751907650166308, LR: 0.0003 +[2026-03-01 03:30:58] (step=0019177) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.7521033065936216, LR: 0.0003 +[2026-03-01 03:31:06] (step=0019178) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 3.752298963020935, LR: 0.0003 +[2026-03-01 03:31:14] (step=0019179) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 3.7524946194482487, LR: 0.0003 +[2026-03-01 03:31:22] (step=0019180) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.7526902758755627, LR: 0.0003 +[2026-03-01 03:31:29] (step=0019181) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 3.7528859323028763, LR: 0.0003 +[2026-03-01 03:31:37] (step=0019182) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.75308158873019, LR: 0.0003 +[2026-03-01 03:31:45] (step=0019183) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.7532772451575034, LR: 0.0003 +[2026-03-01 03:31:53] (step=0019184) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.753472901584817, LR: 0.0003 +[2026-03-01 03:32:01] (step=0019185) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.7536685580121305, LR: 0.0003 +[2026-03-01 03:32:09] (step=0019186) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.7538642144394445, LR: 0.0003 +[2026-03-01 03:32:17] (step=0019187) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.754059870866758, LR: 0.0003 +[2026-03-01 03:32:24] (step=0019188) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.7542555272940716, LR: 0.0003 +[2026-03-01 03:32:32] (step=0019189) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.754451183721385, LR: 0.0003 +[2026-03-01 03:32:40] (step=0019190) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.7546468401486988, LR: 0.0003 +[2026-03-01 03:32:48] (step=0019191) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.7548424965760123, LR: 0.0003 +[2026-03-01 03:32:56] (step=0019192) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.7550381530033263, LR: 0.0003 +[2026-03-01 03:33:04] (step=0019193) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.75523380943064, LR: 0.0003 +[2026-03-01 03:33:12] (step=0019194) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.7554294658579535, LR: 0.0003 +[2026-03-01 03:33:20] (step=0019195) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.755625122285267, LR: 0.0003 +[2026-03-01 03:33:27] (step=0019196) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.7558207787125806, LR: 0.0003 +[2026-03-01 03:33:35] (step=0019197) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.756016435139894, LR: 0.0003 +[2026-03-01 03:33:43] (step=0019198) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.756212091567208, LR: 0.0003 +[2026-03-01 03:33:51] (step=0019199) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.7564077479945217, LR: 0.0003 +[2026-03-01 03:33:59] (step=0019200) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.7566034044218353, LR: 0.0003 +[2026-03-01 03:34:07] (step=0019201) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.756799060849149, LR: 0.0003 +[2026-03-01 03:34:14] (step=0019202) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 3.7569947172764624, LR: 0.0003 +[2026-03-01 03:34:22] (step=0019203) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.757190373703776, LR: 0.0003 +[2026-03-01 03:34:30] (step=0019204) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.75738603013109, LR: 0.0003 +[2026-03-01 03:34:38] (step=0019205) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.7575816865584035, LR: 0.0003 +[2026-03-01 03:34:46] (step=0019206) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.757777342985717, LR: 0.0003 +[2026-03-01 03:34:54] (step=0019207) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.7579729994130306, LR: 0.0003 +[2026-03-01 03:35:01] (step=0019208) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.758168655840344, LR: 0.0003 +[2026-03-01 03:35:09] (step=0019209) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.758364312267658, LR: 0.0003 +[2026-03-01 03:35:17] (step=0019210) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.7585599686949718, LR: 0.0003 +[2026-03-01 03:35:25] (step=0019211) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 3.7587556251222853, LR: 0.0003 +[2026-03-01 03:35:33] (step=0019212) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.758951281549599, LR: 0.0003 +[2026-03-01 03:35:41] (step=0019213) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.7591469379769125, LR: 0.0003 +[2026-03-01 03:35:48] (step=0019214) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 3.759342594404226, LR: 0.0003 +[2026-03-01 03:35:56] (step=0019215) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 3.75953825083154, LR: 0.0003 +[2026-03-01 03:36:04] (step=0019216) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.7597339072588536, LR: 0.0003 +[2026-03-01 03:36:12] (step=0019217) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.759929563686167, LR: 0.0003 +[2026-03-01 03:36:20] (step=0019218) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 3.7601252201134807, LR: 0.0003 +[2026-03-01 03:36:28] (step=0019219) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.7603208765407943, LR: 0.0003 +[2026-03-01 03:36:35] (step=0019220) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.760516532968108, LR: 0.0003 +[2026-03-01 03:36:43] (step=0019221) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.760712189395422, LR: 0.0003 +[2026-03-01 03:36:51] (step=0019222) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.7609078458227354, LR: 0.0003 +[2026-03-01 03:36:59] (step=0019223) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.761103502250049, LR: 0.0003 +[2026-03-01 03:37:07] (step=0019224) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 3.7612991586773625, LR: 0.0003 +[2026-03-01 03:37:15] (step=0019225) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.761494815104676, LR: 0.0003 +[2026-03-01 03:37:23] (step=0019226) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.7616904715319897, LR: 0.0003 +[2026-03-01 03:37:31] (step=0019227) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.7618861279593037, LR: 0.0003 +[2026-03-01 03:37:38] (step=0019228) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 3.7620817843866172, LR: 0.0003 +[2026-03-01 03:37:46] (step=0019229) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 3.762277440813931, LR: 0.0003 +[2026-03-01 03:37:54] (step=0019230) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.7624730972412443, LR: 0.0003 +[2026-03-01 03:38:02] (step=0019231) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 3.762668753668558, LR: 0.0003 +[2026-03-01 03:38:10] (step=0019232) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.7628644100958715, LR: 0.0003 +[2026-03-01 03:38:18] (step=0019233) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.7630600665231855, LR: 0.0003 +[2026-03-01 03:38:25] (step=0019234) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.763255722950499, LR: 0.0003 +[2026-03-01 03:38:33] (step=0019235) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.7634513793778126, LR: 0.0003 +[2026-03-01 03:38:41] (step=0019236) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.763647035805126, LR: 0.0003 +[2026-03-01 03:38:49] (step=0019237) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.7638426922324397, LR: 0.0003 +[2026-03-01 03:38:57] (step=0019238) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.7640383486597533, LR: 0.0003 +[2026-03-01 03:39:05] (step=0019239) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 3.7642340050870673, LR: 0.0003 +[2026-03-01 03:39:13] (step=0019240) Train Loss: 0.4546, Train Steps/Sec: 0.12, Epoch: 3.764429661514381, LR: 0.0003 +[2026-03-01 03:39:20] (step=0019241) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 3.7646253179416944, LR: 0.0003 +[2026-03-01 03:39:28] (step=0019242) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.764820974369008, LR: 0.0003 +[2026-03-01 03:39:36] (step=0019243) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.7650166307963215, LR: 0.0003 +[2026-03-01 03:39:44] (step=0019244) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.765212287223635, LR: 0.0003 +[2026-03-01 03:39:52] (step=0019245) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.765407943650949, LR: 0.0003 +[2026-03-01 03:40:00] (step=0019246) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.7656036000782627, LR: 0.0003 +[2026-03-01 03:40:08] (step=0019247) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 3.7657992565055762, LR: 0.0003 +[2026-03-01 03:40:15] (step=0019248) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.76599491293289, LR: 0.0003 +[2026-03-01 03:40:23] (step=0019249) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 3.7661905693602034, LR: 0.0003 +[2026-03-01 03:40:31] (step=0019250) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.766386225787517, LR: 0.0003 +[2026-03-01 03:40:39] (step=0019251) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.766581882214831, LR: 0.0003 +[2026-03-01 03:40:47] (step=0019252) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.7667775386421445, LR: 0.0003 +[2026-03-01 03:40:55] (step=0019253) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.766973195069458, LR: 0.0003 +[2026-03-01 03:41:02] (step=0019254) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.7671688514967716, LR: 0.0003 +[2026-03-01 03:41:10] (step=0019255) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 3.767364507924085, LR: 0.0003 +[2026-03-01 03:41:18] (step=0019256) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.7675601643513987, LR: 0.0003 +[2026-03-01 03:41:26] (step=0019257) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.7677558207787127, LR: 0.0003 +[2026-03-01 03:41:34] (step=0019258) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 3.7679514772060263, LR: 0.0003 +[2026-03-01 03:41:42] (step=0019259) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 3.76814713363334, LR: 0.0003 +[2026-03-01 03:41:49] (step=0019260) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.7683427900606534, LR: 0.0003 +[2026-03-01 03:41:57] (step=0019261) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.768538446487967, LR: 0.0003 +[2026-03-01 03:42:05] (step=0019262) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 3.7687341029152805, LR: 0.0003 +[2026-03-01 03:42:13] (step=0019263) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.7689297593425946, LR: 0.0003 +[2026-03-01 03:42:21] (step=0019264) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.769125415769908, LR: 0.0003 +[2026-03-01 03:42:29] (step=0019265) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 3.7693210721972217, LR: 0.0003 +[2026-03-01 03:42:36] (step=0019266) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.7695167286245352, LR: 0.0003 +[2026-03-01 03:42:44] (step=0019267) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.769712385051849, LR: 0.0003 +[2026-03-01 03:42:52] (step=0019268) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.769908041479163, LR: 0.0003 +[2026-03-01 03:43:00] (step=0019269) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.7701036979064764, LR: 0.0003 +[2026-03-01 03:43:08] (step=0019270) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.77029935433379, LR: 0.0003 +[2026-03-01 03:43:16] (step=0019271) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.7704950107611035, LR: 0.0003 +[2026-03-01 03:43:24] (step=0019272) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.770690667188417, LR: 0.0003 +[2026-03-01 03:43:31] (step=0019273) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.7708863236157306, LR: 0.0003 +[2026-03-01 03:43:39] (step=0019274) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.7710819800430446, LR: 0.0003 +[2026-03-01 03:43:47] (step=0019275) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.771277636470358, LR: 0.0003 +[2026-03-01 03:43:55] (step=0019276) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.7714732928976717, LR: 0.0003 +[2026-03-01 03:44:03] (step=0019277) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.7716689493249853, LR: 0.0003 +[2026-03-01 03:44:11] (step=0019278) Train Loss: 0.4490, Train Steps/Sec: 0.12, Epoch: 3.771864605752299, LR: 0.0003 +[2026-03-01 03:44:19] (step=0019279) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.7720602621796124, LR: 0.0003 +[2026-03-01 03:44:26] (step=0019280) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 3.7722559186069264, LR: 0.0003 +[2026-03-01 03:44:34] (step=0019281) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.77245157503424, LR: 0.0003 +[2026-03-01 03:44:42] (step=0019282) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.7726472314615536, LR: 0.0003 +[2026-03-01 03:44:50] (step=0019283) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 3.772842887888867, LR: 0.0003 +[2026-03-01 03:44:58] (step=0019284) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.7730385443161807, LR: 0.0003 +[2026-03-01 03:45:06] (step=0019285) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.7732342007434942, LR: 0.0003 +[2026-03-01 03:45:14] (step=0019286) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.7734298571708083, LR: 0.0003 +[2026-03-01 03:45:21] (step=0019287) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.773625513598122, LR: 0.0003 +[2026-03-01 03:45:29] (step=0019288) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.7738211700254354, LR: 0.0003 +[2026-03-01 03:45:37] (step=0019289) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.774016826452749, LR: 0.0003 +[2026-03-01 03:45:45] (step=0019290) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.7742124828800625, LR: 0.0003 +[2026-03-01 03:45:53] (step=0019291) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.774408139307376, LR: 0.0003 +[2026-03-01 03:46:01] (step=0019292) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.77460379573469, LR: 0.0003 +[2026-03-01 03:46:08] (step=0019293) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.7747994521620036, LR: 0.0003 +[2026-03-01 03:46:16] (step=0019294) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.774995108589317, LR: 0.0003 +[2026-03-01 03:46:24] (step=0019295) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.7751907650166308, LR: 0.0003 +[2026-03-01 03:46:32] (step=0019296) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.7753864214439443, LR: 0.0003 +[2026-03-01 03:46:40] (step=0019297) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.775582077871258, LR: 0.0003 +[2026-03-01 03:46:48] (step=0019298) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 3.775777734298572, LR: 0.0003 +[2026-03-01 03:46:56] (step=0019299) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 3.7759733907258854, LR: 0.0003 +[2026-03-01 03:47:03] (step=0019300) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.776169047153199, LR: 0.0003 +[2026-03-01 03:47:11] (step=0019301) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.7763647035805126, LR: 0.0003 +[2026-03-01 03:47:19] (step=0019302) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.776560360007826, LR: 0.0003 +[2026-03-01 03:47:27] (step=0019303) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.7767560164351397, LR: 0.0003 +[2026-03-01 03:47:35] (step=0019304) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 3.7769516728624537, LR: 0.0003 +[2026-03-01 03:47:43] (step=0019305) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.7771473292897673, LR: 0.0003 +[2026-03-01 03:47:50] (step=0019306) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.777342985717081, LR: 0.0003 +[2026-03-01 03:47:58] (step=0019307) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 3.7775386421443944, LR: 0.0003 +[2026-03-01 03:48:06] (step=0019308) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.777734298571708, LR: 0.0003 +[2026-03-01 03:48:14] (step=0019309) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 3.7779299549990215, LR: 0.0003 +[2026-03-01 03:48:22] (step=0019310) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 3.7781256114263355, LR: 0.0003 +[2026-03-01 03:48:30] (step=0019311) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 3.778321267853649, LR: 0.0003 +[2026-03-01 03:48:37] (step=0019312) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.7785169242809626, LR: 0.0003 +[2026-03-01 03:48:45] (step=0019313) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.778712580708276, LR: 0.0003 +[2026-03-01 03:48:53] (step=0019314) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.7789082371355898, LR: 0.0003 +[2026-03-01 03:49:01] (step=0019315) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.7791038935629033, LR: 0.0003 +[2026-03-01 03:49:09] (step=0019316) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.7792995499902173, LR: 0.0003 +[2026-03-01 03:49:17] (step=0019317) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.779495206417531, LR: 0.0003 +[2026-03-01 03:49:24] (step=0019318) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 3.7796908628448445, LR: 0.0003 +[2026-03-01 03:49:32] (step=0019319) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.779886519272158, LR: 0.0003 +[2026-03-01 03:49:40] (step=0019320) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.7800821756994716, LR: 0.0003 +[2026-03-01 03:49:48] (step=0019321) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 3.7802778321267856, LR: 0.0003 +[2026-03-01 03:49:56] (step=0019322) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 3.780473488554099, LR: 0.0003 +[2026-03-01 03:50:04] (step=0019323) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.7806691449814127, LR: 0.0003 +[2026-03-01 03:50:12] (step=0019324) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.7808648014087263, LR: 0.0003 +[2026-03-01 03:50:20] (step=0019325) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.78106045783604, LR: 0.0003 +[2026-03-01 03:50:27] (step=0019326) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.7812561142633534, LR: 0.0003 +[2026-03-01 03:50:35] (step=0019327) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.7814517706906674, LR: 0.0003 +[2026-03-01 03:50:43] (step=0019328) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.781647427117981, LR: 0.0003 +[2026-03-01 03:50:51] (step=0019329) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.7818430835452945, LR: 0.0003 +[2026-03-01 03:50:59] (step=0019330) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.782038739972608, LR: 0.0003 +[2026-03-01 03:51:07] (step=0019331) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.7822343963999216, LR: 0.0003 +[2026-03-01 03:51:14] (step=0019332) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 3.782430052827235, LR: 0.0003 +[2026-03-01 03:51:22] (step=0019333) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 3.782625709254549, LR: 0.0003 +[2026-03-01 03:51:30] (step=0019334) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.782821365681863, LR: 0.0003 +[2026-03-01 03:51:38] (step=0019335) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.7830170221091763, LR: 0.0003 +[2026-03-01 03:51:46] (step=0019336) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.78321267853649, LR: 0.0003 +[2026-03-01 03:51:54] (step=0019337) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 3.7834083349638035, LR: 0.0003 +[2026-03-01 03:52:02] (step=0019338) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.783603991391117, LR: 0.0003 +[2026-03-01 03:52:09] (step=0019339) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.783799647818431, LR: 0.0003 +[2026-03-01 03:52:17] (step=0019340) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.7839953042457446, LR: 0.0003 +[2026-03-01 03:52:25] (step=0019341) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.784190960673058, LR: 0.0003 +[2026-03-01 03:52:33] (step=0019342) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.7843866171003717, LR: 0.0003 +[2026-03-01 03:52:41] (step=0019343) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.7845822735276853, LR: 0.0003 +[2026-03-01 03:52:49] (step=0019344) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.784777929954999, LR: 0.0003 +[2026-03-01 03:52:57] (step=0019345) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.784973586382313, LR: 0.0003 +[2026-03-01 03:53:04] (step=0019346) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.7851692428096264, LR: 0.0003 +[2026-03-01 03:53:12] (step=0019347) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.78536489923694, LR: 0.0003 +[2026-03-01 03:53:20] (step=0019348) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 3.7855605556642535, LR: 0.0003 +[2026-03-01 03:53:28] (step=0019349) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.785756212091567, LR: 0.0003 +[2026-03-01 03:53:36] (step=0019350) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 3.7859518685188807, LR: 0.0003 +[2026-03-01 03:53:44] (step=0019351) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 3.7861475249461947, LR: 0.0003 +[2026-03-01 03:53:51] (step=0019352) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 3.7863431813735082, LR: 0.0003 +[2026-03-01 03:53:59] (step=0019353) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 3.786538837800822, LR: 0.0003 +[2026-03-01 03:54:07] (step=0019354) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 3.7867344942281354, LR: 0.0003 +[2026-03-01 03:54:15] (step=0019355) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 3.786930150655449, LR: 0.0003 +[2026-03-01 03:54:23] (step=0019356) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 3.7871258070827625, LR: 0.0003 +[2026-03-01 03:54:31] (step=0019357) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.7873214635100765, LR: 0.0003 +[2026-03-01 03:54:38] (step=0019358) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.78751711993739, LR: 0.0003 +[2026-03-01 03:54:46] (step=0019359) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.7877127763647036, LR: 0.0003 +[2026-03-01 03:54:54] (step=0019360) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.787908432792017, LR: 0.0003 +[2026-03-01 03:55:02] (step=0019361) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.7881040892193307, LR: 0.0003 +[2026-03-01 03:55:10] (step=0019362) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.7882997456466443, LR: 0.0003 +[2026-03-01 03:55:18] (step=0019363) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.7884954020739583, LR: 0.0003 +[2026-03-01 03:55:25] (step=0019364) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.788691058501272, LR: 0.0003 +[2026-03-01 03:55:33] (step=0019365) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 3.7888867149285854, LR: 0.0003 +[2026-03-01 03:55:41] (step=0019366) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.789082371355899, LR: 0.0003 +[2026-03-01 03:55:49] (step=0019367) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.7892780277832125, LR: 0.0003 +[2026-03-01 03:55:57] (step=0019368) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.789473684210526, LR: 0.0003 +[2026-03-01 03:56:05] (step=0019369) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.78966934063784, LR: 0.0003 +[2026-03-01 03:56:13] (step=0019370) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.7898649970651537, LR: 0.0003 +[2026-03-01 03:56:20] (step=0019371) Train Loss: 0.4252, Train Steps/Sec: 0.13, Epoch: 3.7900606534924672, LR: 0.0003 +[2026-03-01 03:56:28] (step=0019372) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.790256309919781, LR: 0.0003 +[2026-03-01 03:56:36] (step=0019373) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.7904519663470944, LR: 0.0003 +[2026-03-01 03:56:44] (step=0019374) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 3.790647622774408, LR: 0.0003 +[2026-03-01 03:56:52] (step=0019375) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.790843279201722, LR: 0.0003 +[2026-03-01 03:57:00] (step=0019376) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.7910389356290355, LR: 0.0003 +[2026-03-01 03:57:08] (step=0019377) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.791234592056349, LR: 0.0003 +[2026-03-01 03:57:15] (step=0019378) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 3.7914302484836626, LR: 0.0003 +[2026-03-01 03:57:23] (step=0019379) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.791625904910976, LR: 0.0003 +[2026-03-01 03:57:31] (step=0019380) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.79182156133829, LR: 0.0003 +[2026-03-01 03:57:39] (step=0019381) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.7920172177656037, LR: 0.0003 +[2026-03-01 03:57:47] (step=0019382) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.7922128741929173, LR: 0.0003 +[2026-03-01 03:57:55] (step=0019383) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.792408530620231, LR: 0.0003 +[2026-03-01 03:58:02] (step=0019384) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.7926041870475444, LR: 0.0003 +[2026-03-01 03:58:10] (step=0019385) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 3.792799843474858, LR: 0.0003 +[2026-03-01 03:58:18] (step=0019386) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 3.792995499902172, LR: 0.0003 +[2026-03-01 03:58:26] (step=0019387) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.7931911563294856, LR: 0.0003 +[2026-03-01 03:58:34] (step=0019388) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.793386812756799, LR: 0.0003 +[2026-03-01 03:58:42] (step=0019389) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.7935824691841127, LR: 0.0003 +[2026-03-01 03:58:50] (step=0019390) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.7937781256114262, LR: 0.0003 +[2026-03-01 03:58:57] (step=0019391) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 3.79397378203874, LR: 0.0003 +[2026-03-01 03:59:05] (step=0019392) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 3.794169438466054, LR: 0.0003 +[2026-03-01 03:59:13] (step=0019393) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.7943650948933674, LR: 0.0003 +[2026-03-01 03:59:21] (step=0019394) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.794560751320681, LR: 0.0003 +[2026-03-01 03:59:29] (step=0019395) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 3.7947564077479945, LR: 0.0003 +[2026-03-01 03:59:37] (step=0019396) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.794952064175308, LR: 0.0003 +[2026-03-01 03:59:44] (step=0019397) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.7951477206026216, LR: 0.0003 +[2026-03-01 03:59:52] (step=0019398) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.7953433770299356, LR: 0.0003 +[2026-03-01 04:00:00] (step=0019399) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 3.795539033457249, LR: 0.0003 +[2026-03-01 04:00:08] (step=0019400) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.7957346898845628, LR: 0.0003 +[2026-03-01 04:00:16] (step=0019401) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.7959303463118763, LR: 0.0003 +[2026-03-01 04:00:24] (step=0019402) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.79612600273919, LR: 0.0003 +[2026-03-01 04:00:31] (step=0019403) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.7963216591665034, LR: 0.0003 +[2026-03-01 04:00:39] (step=0019404) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.7965173155938174, LR: 0.0003 +[2026-03-01 04:00:47] (step=0019405) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.796712972021131, LR: 0.0003 +[2026-03-01 04:00:55] (step=0019406) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 3.7969086284484446, LR: 0.0003 +[2026-03-01 04:01:03] (step=0019407) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 3.797104284875758, LR: 0.0003 +[2026-03-01 04:01:10] (step=0019408) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 3.7972999413030717, LR: 0.0003 +[2026-03-01 04:01:18] (step=0019409) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.7974955977303853, LR: 0.0003 +[2026-03-01 04:01:26] (step=0019410) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.7976912541576993, LR: 0.0003 +[2026-03-01 04:01:34] (step=0019411) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.797886910585013, LR: 0.0003 +[2026-03-01 04:01:42] (step=0019412) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 3.7980825670123264, LR: 0.0003 +[2026-03-01 04:01:50] (step=0019413) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.79827822343964, LR: 0.0003 +[2026-03-01 04:01:57] (step=0019414) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.7984738798669535, LR: 0.0003 +[2026-03-01 04:02:05] (step=0019415) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.798669536294267, LR: 0.0003 +[2026-03-01 04:02:13] (step=0019416) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 3.798865192721581, LR: 0.0003 +[2026-03-01 04:02:21] (step=0019417) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.7990608491488946, LR: 0.0003 +[2026-03-01 04:02:29] (step=0019418) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.799256505576208, LR: 0.0003 +[2026-03-01 04:02:36] (step=0019419) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.7994521620035218, LR: 0.0003 +[2026-03-01 04:02:44] (step=0019420) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.7996478184308353, LR: 0.0003 +[2026-03-01 04:02:52] (step=0019421) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.799843474858149, LR: 0.0003 +[2026-03-01 04:03:00] (step=0019422) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.800039131285463, LR: 0.0003 +[2026-03-01 04:03:08] (step=0019423) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 3.8002347877127765, LR: 0.0003 +[2026-03-01 04:03:16] (step=0019424) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.80043044414009, LR: 0.0003 +[2026-03-01 04:03:23] (step=0019425) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.8006261005674036, LR: 0.0003 +[2026-03-01 04:03:31] (step=0019426) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.800821756994717, LR: 0.0003 +[2026-03-01 04:03:39] (step=0019427) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.8010174134220307, LR: 0.0003 +[2026-03-01 04:03:47] (step=0019428) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.8012130698493447, LR: 0.0003 +[2026-03-01 04:03:55] (step=0019429) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 3.8014087262766583, LR: 0.0003 +[2026-03-01 04:04:03] (step=0019430) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.801604382703972, LR: 0.0003 +[2026-03-01 04:04:10] (step=0019431) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.8018000391312854, LR: 0.0003 +[2026-03-01 04:04:18] (step=0019432) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.801995695558599, LR: 0.0003 +[2026-03-01 04:04:26] (step=0019433) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.8021913519859125, LR: 0.0003 +[2026-03-01 04:04:34] (step=0019434) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.8023870084132265, LR: 0.0003 +[2026-03-01 04:04:42] (step=0019435) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.80258266484054, LR: 0.0003 +[2026-03-01 04:04:50] (step=0019436) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.8027783212678536, LR: 0.0003 +[2026-03-01 04:04:57] (step=0019437) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.802973977695167, LR: 0.0003 +[2026-03-01 04:05:05] (step=0019438) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.8031696341224808, LR: 0.0003 +[2026-03-01 04:05:13] (step=0019439) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.8033652905497948, LR: 0.0003 +[2026-03-01 04:05:21] (step=0019440) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.8035609469771083, LR: 0.0003 +[2026-03-01 04:05:29] (step=0019441) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.803756603404422, LR: 0.0003 +[2026-03-01 04:05:37] (step=0019442) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 3.8039522598317355, LR: 0.0003 +[2026-03-01 04:05:45] (step=0019443) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.804147916259049, LR: 0.0003 +[2026-03-01 04:05:52] (step=0019444) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.8043435726863626, LR: 0.0003 +[2026-03-01 04:06:00] (step=0019445) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.8045392291136766, LR: 0.0003 +[2026-03-01 04:06:08] (step=0019446) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.80473488554099, LR: 0.0003 +[2026-03-01 04:06:16] (step=0019447) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.8049305419683037, LR: 0.0003 +[2026-03-01 04:06:24] (step=0019448) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.8051261983956173, LR: 0.0003 +[2026-03-01 04:06:32] (step=0019449) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 3.805321854822931, LR: 0.0003 +[2026-03-01 04:06:39] (step=0019450) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.8055175112502444, LR: 0.0003 +[2026-03-01 04:06:47] (step=0019451) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.8057131676775584, LR: 0.0003 +[2026-03-01 04:06:55] (step=0019452) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.805908824104872, LR: 0.0003 +[2026-03-01 04:07:03] (step=0019453) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.8061044805321855, LR: 0.0003 +[2026-03-01 04:07:11] (step=0019454) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.806300136959499, LR: 0.0003 +[2026-03-01 04:07:19] (step=0019455) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.8064957933868127, LR: 0.0003 +[2026-03-01 04:07:27] (step=0019456) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.806691449814126, LR: 0.0003 +[2026-03-01 04:07:34] (step=0019457) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.80688710624144, LR: 0.0003 +[2026-03-01 04:07:42] (step=0019458) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.807082762668754, LR: 0.0003 +[2026-03-01 04:07:50] (step=0019459) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 3.8072784190960673, LR: 0.0003 +[2026-03-01 04:07:58] (step=0019460) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 3.807474075523381, LR: 0.0003 +[2026-03-01 04:08:06] (step=0019461) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.8076697319506945, LR: 0.0003 +[2026-03-01 04:08:14] (step=0019462) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.807865388378008, LR: 0.0003 +[2026-03-01 04:08:21] (step=0019463) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 3.808061044805322, LR: 0.0003 +[2026-03-01 04:08:29] (step=0019464) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.8082567012326356, LR: 0.0003 +[2026-03-01 04:08:37] (step=0019465) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.808452357659949, LR: 0.0003 +[2026-03-01 04:08:45] (step=0019466) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.8086480140872627, LR: 0.0003 +[2026-03-01 04:08:53] (step=0019467) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.8088436705145763, LR: 0.0003 +[2026-03-01 04:09:01] (step=0019468) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.80903932694189, LR: 0.0003 +[2026-03-01 04:09:08] (step=0019469) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 3.809234983369204, LR: 0.0003 +[2026-03-01 04:09:16] (step=0019470) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 3.8094306397965174, LR: 0.0003 +[2026-03-01 04:09:24] (step=0019471) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.809626296223831, LR: 0.0003 +[2026-03-01 04:09:32] (step=0019472) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.8098219526511445, LR: 0.0003 +[2026-03-01 04:09:40] (step=0019473) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.810017609078458, LR: 0.0003 +[2026-03-01 04:09:48] (step=0019474) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 3.8102132655057717, LR: 0.0003 +[2026-03-01 04:09:56] (step=0019475) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.8104089219330857, LR: 0.0003 +[2026-03-01 04:10:04] (step=0019476) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.8106045783603992, LR: 0.0003 +[2026-03-01 04:10:11] (step=0019477) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 3.810800234787713, LR: 0.0003 +[2026-03-01 04:10:19] (step=0019478) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.8109958912150264, LR: 0.0003 +[2026-03-01 04:10:27] (step=0019479) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.81119154764234, LR: 0.0003 +[2026-03-01 04:10:35] (step=0019480) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.8113872040696535, LR: 0.0003 +[2026-03-01 04:10:43] (step=0019481) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.8115828604969675, LR: 0.0003 +[2026-03-01 04:10:51] (step=0019482) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.811778516924281, LR: 0.0003 +[2026-03-01 04:10:58] (step=0019483) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.8119741733515946, LR: 0.0003 +[2026-03-01 04:11:06] (step=0019484) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.812169829778908, LR: 0.0003 +[2026-03-01 04:11:14] (step=0019485) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.8123654862062217, LR: 0.0003 +[2026-03-01 04:11:22] (step=0019486) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.8125611426335353, LR: 0.0003 +[2026-03-01 04:11:30] (step=0019487) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 3.8127567990608493, LR: 0.0003 +[2026-03-01 04:11:38] (step=0019488) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.812952455488163, LR: 0.0003 +[2026-03-01 04:11:46] (step=0019489) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.8131481119154764, LR: 0.0003 +[2026-03-01 04:11:53] (step=0019490) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.81334376834279, LR: 0.0003 +[2026-03-01 04:12:01] (step=0019491) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.8135394247701035, LR: 0.0003 +[2026-03-01 04:12:09] (step=0019492) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.8137350811974176, LR: 0.0003 +[2026-03-01 04:12:17] (step=0019493) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.813930737624731, LR: 0.0003 +[2026-03-01 04:12:25] (step=0019494) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.8141263940520447, LR: 0.0003 +[2026-03-01 04:12:33] (step=0019495) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.8143220504793582, LR: 0.0003 +[2026-03-01 04:12:40] (step=0019496) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.814517706906672, LR: 0.0003 +[2026-03-01 04:12:48] (step=0019497) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 3.8147133633339854, LR: 0.0003 +[2026-03-01 04:12:56] (step=0019498) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.8149090197612994, LR: 0.0003 +[2026-03-01 04:13:04] (step=0019499) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.815104676188613, LR: 0.0003 +[2026-03-01 04:13:12] (step=0019500) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.8153003326159265, LR: 0.0003 +[2026-03-01 04:13:12] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0019500/ +[2026-03-01 04:13:20] (step=0019501) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.81549598904324, LR: 0.0003 +[2026-03-01 04:13:28] (step=0019502) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.8156916454705536, LR: 0.0003 +[2026-03-01 04:13:35] (step=0019503) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.815887301897867, LR: 0.0003 +[2026-03-01 04:13:43] (step=0019504) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.816082958325181, LR: 0.0003 +[2026-03-01 04:13:51] (step=0019505) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 3.8162786147524947, LR: 0.0003 +[2026-03-01 04:13:59] (step=0019506) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.8164742711798083, LR: 0.0003 +[2026-03-01 04:14:07] (step=0019507) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.816669927607122, LR: 0.0003 +[2026-03-01 04:14:15] (step=0019508) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.8168655840344354, LR: 0.0003 +[2026-03-01 04:14:22] (step=0019509) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 3.817061240461749, LR: 0.0003 +[2026-03-01 04:14:30] (step=0019510) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 3.817256896889063, LR: 0.0003 +[2026-03-01 04:14:38] (step=0019511) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.8174525533163766, LR: 0.0003 +[2026-03-01 04:14:46] (step=0019512) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 3.81764820974369, LR: 0.0003 +[2026-03-01 04:14:54] (step=0019513) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.8178438661710037, LR: 0.0003 +[2026-03-01 04:15:02] (step=0019514) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.8180395225983172, LR: 0.0003 +[2026-03-01 04:15:10] (step=0019515) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 3.818235179025631, LR: 0.0003 +[2026-03-01 04:15:17] (step=0019516) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.818430835452945, LR: 0.0003 +[2026-03-01 04:15:25] (step=0019517) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.8186264918802584, LR: 0.0003 +[2026-03-01 04:15:33] (step=0019518) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 3.818822148307572, LR: 0.0003 +[2026-03-01 04:15:41] (step=0019519) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.8190178047348855, LR: 0.0003 +[2026-03-01 04:15:49] (step=0019520) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 3.819213461162199, LR: 0.0003 +[2026-03-01 04:15:57] (step=0019521) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.8194091175895126, LR: 0.0003 +[2026-03-01 04:16:05] (step=0019522) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.8196047740168266, LR: 0.0003 +[2026-03-01 04:16:12] (step=0019523) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.81980043044414, LR: 0.0003 +[2026-03-01 04:16:20] (step=0019524) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 3.8199960868714538, LR: 0.0003 +[2026-03-01 04:16:28] (step=0019525) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.8201917432987673, LR: 0.0003 +[2026-03-01 04:16:36] (step=0019526) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.820387399726081, LR: 0.0003 +[2026-03-01 04:16:44] (step=0019527) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.8205830561533944, LR: 0.0003 +[2026-03-01 04:16:52] (step=0019528) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.8207787125807084, LR: 0.0003 +[2026-03-01 04:16:59] (step=0019529) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.820974369008022, LR: 0.0003 +[2026-03-01 04:17:07] (step=0019530) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.8211700254353356, LR: 0.0003 +[2026-03-01 04:17:15] (step=0019531) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.821365681862649, LR: 0.0003 +[2026-03-01 04:17:23] (step=0019532) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.8215613382899627, LR: 0.0003 +[2026-03-01 04:17:31] (step=0019533) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 3.8217569947172763, LR: 0.0003 +[2026-03-01 04:17:39] (step=0019534) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.8219526511445903, LR: 0.0003 +[2026-03-01 04:17:47] (step=0019535) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.822148307571904, LR: 0.0003 +[2026-03-01 04:17:54] (step=0019536) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.8223439639992174, LR: 0.0003 +[2026-03-01 04:18:02] (step=0019537) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 3.822539620426531, LR: 0.0003 +[2026-03-01 04:18:10] (step=0019538) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.8227352768538445, LR: 0.0003 +[2026-03-01 04:18:18] (step=0019539) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.822930933281158, LR: 0.0003 +[2026-03-01 04:18:26] (step=0019540) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.823126589708472, LR: 0.0003 +[2026-03-01 04:18:34] (step=0019541) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.8233222461357856, LR: 0.0003 +[2026-03-01 04:18:41] (step=0019542) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.823517902563099, LR: 0.0003 +[2026-03-01 04:18:49] (step=0019543) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.8237135589904128, LR: 0.0003 +[2026-03-01 04:18:57] (step=0019544) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.8239092154177263, LR: 0.0003 +[2026-03-01 04:19:05] (step=0019545) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 3.82410487184504, LR: 0.0003 +[2026-03-01 04:19:13] (step=0019546) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 3.824300528272354, LR: 0.0003 +[2026-03-01 04:19:21] (step=0019547) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.8244961846996675, LR: 0.0003 +[2026-03-01 04:19:29] (step=0019548) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.824691841126981, LR: 0.0003 +[2026-03-01 04:19:36] (step=0019549) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 3.8248874975542946, LR: 0.0003 +[2026-03-01 04:19:44] (step=0019550) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.825083153981608, LR: 0.0003 +[2026-03-01 04:19:52] (step=0019551) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.825278810408922, LR: 0.0003 +[2026-03-01 04:20:00] (step=0019552) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.8254744668362357, LR: 0.0003 +[2026-03-01 04:20:08] (step=0019553) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.8256701232635493, LR: 0.0003 +[2026-03-01 04:20:16] (step=0019554) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.825865779690863, LR: 0.0003 +[2026-03-01 04:20:23] (step=0019555) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.8260614361181764, LR: 0.0003 +[2026-03-01 04:20:31] (step=0019556) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 3.82625709254549, LR: 0.0003 +[2026-03-01 04:20:39] (step=0019557) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.826452748972804, LR: 0.0003 +[2026-03-01 04:20:47] (step=0019558) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.8266484054001175, LR: 0.0003 +[2026-03-01 04:20:55] (step=0019559) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.826844061827431, LR: 0.0003 +[2026-03-01 04:21:03] (step=0019560) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 3.8270397182547446, LR: 0.0003 +[2026-03-01 04:21:10] (step=0019561) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.827235374682058, LR: 0.0003 +[2026-03-01 04:21:18] (step=0019562) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.8274310311093718, LR: 0.0003 +[2026-03-01 04:21:26] (step=0019563) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.827626687536686, LR: 0.0003 +[2026-03-01 04:21:34] (step=0019564) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 3.8278223439639993, LR: 0.0003 +[2026-03-01 04:21:42] (step=0019565) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.828018000391313, LR: 0.0003 +[2026-03-01 04:21:50] (step=0019566) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.8282136568186265, LR: 0.0003 +[2026-03-01 04:21:57] (step=0019567) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.82840931324594, LR: 0.0003 +[2026-03-01 04:22:05] (step=0019568) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.8286049696732536, LR: 0.0003 +[2026-03-01 04:22:13] (step=0019569) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.8288006261005676, LR: 0.0003 +[2026-03-01 04:22:21] (step=0019570) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.828996282527881, LR: 0.0003 +[2026-03-01 04:22:29] (step=0019571) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.8291919389551947, LR: 0.0003 +[2026-03-01 04:22:37] (step=0019572) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 3.8293875953825083, LR: 0.0003 +[2026-03-01 04:22:45] (step=0019573) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 3.829583251809822, LR: 0.0003 +[2026-03-01 04:22:52] (step=0019574) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.8297789082371354, LR: 0.0003 +[2026-03-01 04:23:00] (step=0019575) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.8299745646644494, LR: 0.0003 +[2026-03-01 04:23:08] (step=0019576) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.830170221091763, LR: 0.0003 +[2026-03-01 04:23:16] (step=0019577) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.8303658775190765, LR: 0.0003 +[2026-03-01 04:23:24] (step=0019578) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 3.83056153394639, LR: 0.0003 +[2026-03-01 04:23:32] (step=0019579) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 3.8307571903737037, LR: 0.0003 +[2026-03-01 04:23:40] (step=0019580) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.830952846801017, LR: 0.0003 +[2026-03-01 04:23:47] (step=0019581) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.8311485032283312, LR: 0.0003 +[2026-03-01 04:23:55] (step=0019582) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.831344159655645, LR: 0.0003 +[2026-03-01 04:24:03] (step=0019583) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 3.8315398160829583, LR: 0.0003 +[2026-03-01 04:24:11] (step=0019584) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 3.831735472510272, LR: 0.0003 +[2026-03-01 04:24:19] (step=0019585) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.8319311289375855, LR: 0.0003 +[2026-03-01 04:24:27] (step=0019586) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.832126785364899, LR: 0.0003 +[2026-03-01 04:24:35] (step=0019587) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.832322441792213, LR: 0.0003 +[2026-03-01 04:24:42] (step=0019588) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 3.8325180982195266, LR: 0.0003 +[2026-03-01 04:24:50] (step=0019589) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 3.83271375464684, LR: 0.0003 +[2026-03-01 04:24:58] (step=0019590) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 3.8329094110741537, LR: 0.0003 +[2026-03-01 04:25:06] (step=0019591) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.8331050675014673, LR: 0.0003 +[2026-03-01 04:25:14] (step=0019592) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.833300723928781, LR: 0.0003 +[2026-03-01 04:25:22] (step=0019593) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.833496380356095, LR: 0.0003 +[2026-03-01 04:25:29] (step=0019594) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.8336920367834084, LR: 0.0003 +[2026-03-01 04:25:37] (step=0019595) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.833887693210722, LR: 0.0003 +[2026-03-01 04:25:45] (step=0019596) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.8340833496380355, LR: 0.0003 +[2026-03-01 04:25:53] (step=0019597) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.834279006065349, LR: 0.0003 +[2026-03-01 04:26:01] (step=0019598) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.8344746624926627, LR: 0.0003 +[2026-03-01 04:26:09] (step=0019599) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 3.8346703189199767, LR: 0.0003 +[2026-03-01 04:26:17] (step=0019600) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.8348659753472902, LR: 0.0003 +[2026-03-01 04:26:24] (step=0019601) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.835061631774604, LR: 0.0003 +[2026-03-01 04:26:32] (step=0019602) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.8352572882019174, LR: 0.0003 +[2026-03-01 04:26:40] (step=0019603) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.835452944629231, LR: 0.0003 +[2026-03-01 04:26:48] (step=0019604) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 3.835648601056545, LR: 0.0003 +[2026-03-01 04:26:56] (step=0019605) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.8358442574838585, LR: 0.0003 +[2026-03-01 04:27:04] (step=0019606) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.836039913911172, LR: 0.0003 +[2026-03-01 04:27:11] (step=0019607) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 3.8362355703384856, LR: 0.0003 +[2026-03-01 04:27:19] (step=0019608) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 3.836431226765799, LR: 0.0003 +[2026-03-01 04:27:27] (step=0019609) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.8366268831931127, LR: 0.0003 +[2026-03-01 04:27:35] (step=0019610) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.8368225396204267, LR: 0.0003 +[2026-03-01 04:27:43] (step=0019611) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.8370181960477403, LR: 0.0003 +[2026-03-01 04:27:51] (step=0019612) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.837213852475054, LR: 0.0003 +[2026-03-01 04:27:58] (step=0019613) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.8374095089023674, LR: 0.0003 +[2026-03-01 04:28:06] (step=0019614) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.837605165329681, LR: 0.0003 +[2026-03-01 04:28:14] (step=0019615) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.8378008217569946, LR: 0.0003 +[2026-03-01 04:28:22] (step=0019616) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.8379964781843086, LR: 0.0003 +[2026-03-01 04:28:30] (step=0019617) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 3.838192134611622, LR: 0.0003 +[2026-03-01 04:28:38] (step=0019618) Train Loss: 0.4720, Train Steps/Sec: 0.13, Epoch: 3.8383877910389357, LR: 0.0003 +[2026-03-01 04:28:45] (step=0019619) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.8385834474662492, LR: 0.0003 +[2026-03-01 04:28:53] (step=0019620) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 3.838779103893563, LR: 0.0003 +[2026-03-01 04:29:01] (step=0019621) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 3.8389747603208764, LR: 0.0003 +[2026-03-01 04:29:09] (step=0019622) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.8391704167481904, LR: 0.0003 +[2026-03-01 04:29:17] (step=0019623) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.839366073175504, LR: 0.0003 +[2026-03-01 04:29:25] (step=0019624) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.8395617296028175, LR: 0.0003 +[2026-03-01 04:29:33] (step=0019625) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.839757386030131, LR: 0.0003 +[2026-03-01 04:29:41] (step=0019626) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 3.8399530424574446, LR: 0.0003 +[2026-03-01 04:29:48] (step=0019627) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.840148698884758, LR: 0.0003 +[2026-03-01 04:29:56] (step=0019628) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.840344355312072, LR: 0.0003 +[2026-03-01 04:30:04] (step=0019629) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.8405400117393858, LR: 0.0003 +[2026-03-01 04:30:12] (step=0019630) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.8407356681666993, LR: 0.0003 +[2026-03-01 04:30:20] (step=0019631) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.840931324594013, LR: 0.0003 +[2026-03-01 04:30:28] (step=0019632) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.8411269810213264, LR: 0.0003 +[2026-03-01 04:30:36] (step=0019633) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 3.84132263744864, LR: 0.0003 +[2026-03-01 04:30:43] (step=0019634) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.841518293875954, LR: 0.0003 +[2026-03-01 04:30:51] (step=0019635) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.8417139503032676, LR: 0.0003 +[2026-03-01 04:30:59] (step=0019636) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.841909606730581, LR: 0.0003 +[2026-03-01 04:31:07] (step=0019637) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 3.8421052631578947, LR: 0.0003 +[2026-03-01 04:31:15] (step=0019638) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.8423009195852083, LR: 0.0003 +[2026-03-01 04:31:23] (step=0019639) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.842496576012522, LR: 0.0003 +[2026-03-01 04:31:30] (step=0019640) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.842692232439836, LR: 0.0003 +[2026-03-01 04:31:38] (step=0019641) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.8428878888671494, LR: 0.0003 +[2026-03-01 04:31:46] (step=0019642) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.843083545294463, LR: 0.0003 +[2026-03-01 04:31:54] (step=0019643) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 3.8432792017217765, LR: 0.0003 +[2026-03-01 04:32:02] (step=0019644) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 3.84347485814909, LR: 0.0003 +[2026-03-01 04:32:10] (step=0019645) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 3.8436705145764036, LR: 0.0003 +[2026-03-01 04:32:18] (step=0019646) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.8438661710037176, LR: 0.0003 +[2026-03-01 04:32:25] (step=0019647) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.844061827431031, LR: 0.0003 +[2026-03-01 04:32:33] (step=0019648) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.8442574838583448, LR: 0.0003 +[2026-03-01 04:32:41] (step=0019649) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.8444531402856583, LR: 0.0003 +[2026-03-01 04:32:49] (step=0019650) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 3.844648796712972, LR: 0.0003 +[2026-03-01 04:32:57] (step=0019651) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 3.8448444531402854, LR: 0.0003 +[2026-03-01 04:33:05] (step=0019652) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.8450401095675995, LR: 0.0003 +[2026-03-01 04:33:12] (step=0019653) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 3.845235765994913, LR: 0.0003 +[2026-03-01 04:33:20] (step=0019654) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.8454314224222266, LR: 0.0003 +[2026-03-01 04:33:28] (step=0019655) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 3.84562707884954, LR: 0.0003 +[2026-03-01 04:33:36] (step=0019656) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.8458227352768537, LR: 0.0003 +[2026-03-01 04:33:44] (step=0019657) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.8460183917041673, LR: 0.0003 +[2026-03-01 04:33:52] (step=0019658) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 3.8462140481314813, LR: 0.0003 +[2026-03-01 04:33:59] (step=0019659) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 3.846409704558795, LR: 0.0003 +[2026-03-01 04:34:07] (step=0019660) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.8466053609861084, LR: 0.0003 +[2026-03-01 04:34:15] (step=0019661) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.846801017413422, LR: 0.0003 +[2026-03-01 04:34:23] (step=0019662) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.8469966738407355, LR: 0.0003 +[2026-03-01 04:34:31] (step=0019663) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.8471923302680495, LR: 0.0003 +[2026-03-01 04:34:39] (step=0019664) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.847387986695363, LR: 0.0003 +[2026-03-01 04:34:47] (step=0019665) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 3.8475836431226766, LR: 0.0003 +[2026-03-01 04:34:54] (step=0019666) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.84777929954999, LR: 0.0003 +[2026-03-01 04:35:02] (step=0019667) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 3.8479749559773038, LR: 0.0003 +[2026-03-01 04:35:10] (step=0019668) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.8481706124046173, LR: 0.0003 +[2026-03-01 04:35:18] (step=0019669) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.8483662688319313, LR: 0.0003 +[2026-03-01 04:35:26] (step=0019670) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 3.848561925259245, LR: 0.0003 +[2026-03-01 04:35:34] (step=0019671) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.8487575816865585, LR: 0.0003 +[2026-03-01 04:35:41] (step=0019672) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 3.848953238113872, LR: 0.0003 +[2026-03-01 04:35:49] (step=0019673) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.8491488945411856, LR: 0.0003 +[2026-03-01 04:35:57] (step=0019674) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 3.849344550968499, LR: 0.0003 +[2026-03-01 04:36:05] (step=0019675) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.849540207395813, LR: 0.0003 +[2026-03-01 04:36:13] (step=0019676) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.8497358638231267, LR: 0.0003 +[2026-03-01 04:36:21] (step=0019677) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.8499315202504403, LR: 0.0003 +[2026-03-01 04:36:29] (step=0019678) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 3.850127176677754, LR: 0.0003 +[2026-03-01 04:36:36] (step=0019679) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.8503228331050674, LR: 0.0003 +[2026-03-01 04:36:44] (step=0019680) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 3.850518489532381, LR: 0.0003 +[2026-03-01 04:36:52] (step=0019681) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 3.850714145959695, LR: 0.0003 +[2026-03-01 04:37:00] (step=0019682) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.8509098023870085, LR: 0.0003 +[2026-03-01 04:37:08] (step=0019683) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.851105458814322, LR: 0.0003 +[2026-03-01 04:37:16] (step=0019684) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.8513011152416357, LR: 0.0003 +[2026-03-01 04:37:24] (step=0019685) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.851496771668949, LR: 0.0003 +[2026-03-01 04:37:32] (step=0019686) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 3.8516924280962628, LR: 0.0003 +[2026-03-01 04:37:39] (step=0019687) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 3.851888084523577, LR: 0.0003 +[2026-03-01 04:37:47] (step=0019688) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.8520837409508903, LR: 0.0003 +[2026-03-01 04:37:55] (step=0019689) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 3.852279397378204, LR: 0.0003 +[2026-03-01 04:38:03] (step=0019690) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.8524750538055175, LR: 0.0003 +[2026-03-01 04:38:11] (step=0019691) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 3.852670710232831, LR: 0.0003 +[2026-03-01 04:38:19] (step=0019692) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 3.8528663666601446, LR: 0.0003 +[2026-03-01 04:38:26] (step=0019693) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.8530620230874586, LR: 0.0003 +[2026-03-01 04:38:34] (step=0019694) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.853257679514772, LR: 0.0003 +[2026-03-01 04:38:42] (step=0019695) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.8534533359420857, LR: 0.0003 +[2026-03-01 04:38:50] (step=0019696) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 3.8536489923693993, LR: 0.0003 +[2026-03-01 04:38:58] (step=0019697) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 3.853844648796713, LR: 0.0003 +[2026-03-01 04:39:06] (step=0019698) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.8540403052240264, LR: 0.0003 +[2026-03-01 04:39:13] (step=0019699) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.8542359616513404, LR: 0.0003 +[2026-03-01 04:39:21] (step=0019700) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 3.854431618078654, LR: 0.0003 +[2026-03-01 04:39:29] (step=0019701) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.8546272745059675, LR: 0.0003 +[2026-03-01 04:39:37] (step=0019702) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.854822930933281, LR: 0.0003 +[2026-03-01 04:39:45] (step=0019703) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 3.8550185873605947, LR: 0.0003 +[2026-03-01 04:39:53] (step=0019704) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 3.8552142437879082, LR: 0.0003 +[2026-03-01 04:40:01] (step=0019705) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 3.8554099002152222, LR: 0.0003 +[2026-03-01 04:40:08] (step=0019706) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.855605556642536, LR: 0.0003 +[2026-03-01 04:40:16] (step=0019707) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.8558012130698494, LR: 0.0003 +[2026-03-01 04:40:24] (step=0019708) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.855996869497163, LR: 0.0003 +[2026-03-01 04:40:32] (step=0019709) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.8561925259244765, LR: 0.0003 +[2026-03-01 04:40:40] (step=0019710) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.85638818235179, LR: 0.0003 +[2026-03-01 04:40:48] (step=0019711) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.856583838779104, LR: 0.0003 +[2026-03-01 04:40:55] (step=0019712) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.8567794952064176, LR: 0.0003 +[2026-03-01 04:41:03] (step=0019713) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.856975151633731, LR: 0.0003 +[2026-03-01 04:41:11] (step=0019714) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.8571708080610447, LR: 0.0003 +[2026-03-01 04:41:19] (step=0019715) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 3.8573664644883583, LR: 0.0003 +[2026-03-01 04:41:27] (step=0019716) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.8575621209156723, LR: 0.0003 +[2026-03-01 04:41:35] (step=0019717) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 3.857757777342986, LR: 0.0003 +[2026-03-01 04:41:42] (step=0019718) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 3.8579534337702994, LR: 0.0003 +[2026-03-01 04:41:50] (step=0019719) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 3.858149090197613, LR: 0.0003 +[2026-03-01 04:41:58] (step=0019720) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.8583447466249265, LR: 0.0003 +[2026-03-01 04:42:06] (step=0019721) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.85854040305224, LR: 0.0003 +[2026-03-01 04:42:14] (step=0019722) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.858736059479554, LR: 0.0003 +[2026-03-01 04:42:22] (step=0019723) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 3.8589317159068677, LR: 0.0003 +[2026-03-01 04:42:29] (step=0019724) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.8591273723341812, LR: 0.0003 +[2026-03-01 04:42:37] (step=0019725) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.859323028761495, LR: 0.0003 +[2026-03-01 04:42:45] (step=0019726) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.8595186851888084, LR: 0.0003 +[2026-03-01 04:42:53] (step=0019727) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.859714341616122, LR: 0.0003 +[2026-03-01 04:43:01] (step=0019728) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 3.859909998043436, LR: 0.0003 +[2026-03-01 04:43:09] (step=0019729) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.8601056544707495, LR: 0.0003 +[2026-03-01 04:43:17] (step=0019730) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.860301310898063, LR: 0.0003 +[2026-03-01 04:43:24] (step=0019731) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.8604969673253766, LR: 0.0003 +[2026-03-01 04:43:32] (step=0019732) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.86069262375269, LR: 0.0003 +[2026-03-01 04:43:40] (step=0019733) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.8608882801800037, LR: 0.0003 +[2026-03-01 04:43:48] (step=0019734) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.8610839366073177, LR: 0.0003 +[2026-03-01 04:43:56] (step=0019735) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 3.8612795930346313, LR: 0.0003 +[2026-03-01 04:44:04] (step=0019736) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.861475249461945, LR: 0.0003 +[2026-03-01 04:44:12] (step=0019737) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.8616709058892584, LR: 0.0003 +[2026-03-01 04:44:19] (step=0019738) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.861866562316572, LR: 0.0003 +[2026-03-01 04:44:27] (step=0019739) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 3.8620622187438856, LR: 0.0003 +[2026-03-01 04:44:35] (step=0019740) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.8622578751711996, LR: 0.0003 +[2026-03-01 04:44:43] (step=0019741) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.862453531598513, LR: 0.0003 +[2026-03-01 04:44:51] (step=0019742) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 3.8626491880258267, LR: 0.0003 +[2026-03-01 04:44:59] (step=0019743) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.8628448444531402, LR: 0.0003 +[2026-03-01 04:45:06] (step=0019744) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 3.863040500880454, LR: 0.0003 +[2026-03-01 04:45:14] (step=0019745) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 3.8632361573077674, LR: 0.0003 +[2026-03-01 04:45:22] (step=0019746) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 3.8634318137350814, LR: 0.0003 +[2026-03-01 04:45:30] (step=0019747) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 3.863627470162395, LR: 0.0003 +[2026-03-01 04:45:38] (step=0019748) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 3.8638231265897085, LR: 0.0003 +[2026-03-01 04:45:46] (step=0019749) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.864018783017022, LR: 0.0003 +[2026-03-01 04:45:53] (step=0019750) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 3.8642144394443356, LR: 0.0003 +[2026-03-01 04:46:01] (step=0019751) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 3.864410095871649, LR: 0.0003 +[2026-03-01 04:46:09] (step=0019752) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 3.864605752298963, LR: 0.0003 +[2026-03-01 04:46:17] (step=0019753) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.8648014087262768, LR: 0.0003 +[2026-03-01 04:46:25] (step=0019754) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.8649970651535903, LR: 0.0003 +[2026-03-01 04:46:33] (step=0019755) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.865192721580904, LR: 0.0003 +[2026-03-01 04:46:41] (step=0019756) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.8653883780082174, LR: 0.0003 +[2026-03-01 04:46:48] (step=0019757) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.865584034435531, LR: 0.0003 +[2026-03-01 04:46:56] (step=0019758) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.865779690862845, LR: 0.0003 +[2026-03-01 04:47:04] (step=0019759) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.8659753472901586, LR: 0.0003 +[2026-03-01 04:47:12] (step=0019760) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.866171003717472, LR: 0.0003 +[2026-03-01 04:47:20] (step=0019761) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.8663666601447857, LR: 0.0003 +[2026-03-01 04:47:28] (step=0019762) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 3.8665623165720993, LR: 0.0003 +[2026-03-01 04:47:35] (step=0019763) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.866757972999413, LR: 0.0003 +[2026-03-01 04:47:43] (step=0019764) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.866953629426727, LR: 0.0003 +[2026-03-01 04:47:51] (step=0019765) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 3.8671492858540404, LR: 0.0003 +[2026-03-01 04:47:59] (step=0019766) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.867344942281354, LR: 0.0003 +[2026-03-01 04:48:07] (step=0019767) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.8675405987086675, LR: 0.0003 +[2026-03-01 04:48:15] (step=0019768) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.867736255135981, LR: 0.0003 +[2026-03-01 04:48:22] (step=0019769) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 3.8679319115632946, LR: 0.0003 +[2026-03-01 04:48:30] (step=0019770) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.8681275679906086, LR: 0.0003 +[2026-03-01 04:48:38] (step=0019771) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 3.868323224417922, LR: 0.0003 +[2026-03-01 04:48:46] (step=0019772) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 3.8685188808452358, LR: 0.0003 +[2026-03-01 04:48:54] (step=0019773) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.8687145372725493, LR: 0.0003 +[2026-03-01 04:49:02] (step=0019774) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 3.868910193699863, LR: 0.0003 +[2026-03-01 04:49:10] (step=0019775) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 3.869105850127177, LR: 0.0003 +[2026-03-01 04:49:17] (step=0019776) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.8693015065544905, LR: 0.0003 +[2026-03-01 04:49:25] (step=0019777) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 3.869497162981804, LR: 0.0003 +[2026-03-01 04:49:33] (step=0019778) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.8696928194091176, LR: 0.0003 +[2026-03-01 04:49:41] (step=0019779) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.869888475836431, LR: 0.0003 +[2026-03-01 04:49:49] (step=0019780) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 3.8700841322637447, LR: 0.0003 +[2026-03-01 04:49:57] (step=0019781) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.8702797886910587, LR: 0.0003 +[2026-03-01 04:50:05] (step=0019782) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.8704754451183723, LR: 0.0003 +[2026-03-01 04:50:12] (step=0019783) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 3.870671101545686, LR: 0.0003 +[2026-03-01 04:50:20] (step=0019784) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 3.8708667579729994, LR: 0.0003 +[2026-03-01 04:50:28] (step=0019785) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 3.871062414400313, LR: 0.0003 +[2026-03-01 04:50:36] (step=0019786) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.8712580708276265, LR: 0.0003 +[2026-03-01 04:50:44] (step=0019787) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 3.8714537272549405, LR: 0.0003 +[2026-03-01 04:50:52] (step=0019788) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.871649383682254, LR: 0.0003 +[2026-03-01 04:51:00] (step=0019789) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.8718450401095676, LR: 0.0003 +[2026-03-01 04:51:07] (step=0019790) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 3.872040696536881, LR: 0.0003 +[2026-03-01 04:51:15] (step=0019791) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.8722363529641948, LR: 0.0003 +[2026-03-01 04:51:23] (step=0019792) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.8724320093915083, LR: 0.0003 +[2026-03-01 04:51:31] (step=0019793) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 3.8726276658188223, LR: 0.0003 +[2026-03-01 04:51:39] (step=0019794) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 3.872823322246136, LR: 0.0003 +[2026-03-01 04:51:47] (step=0019795) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.8730189786734495, LR: 0.0003 +[2026-03-01 04:51:54] (step=0019796) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 3.873214635100763, LR: 0.0003 +[2026-03-01 04:52:02] (step=0019797) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.8734102915280766, LR: 0.0003 +[2026-03-01 04:52:10] (step=0019798) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 3.87360594795539, LR: 0.0003 +[2026-03-01 04:52:18] (step=0019799) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.873801604382704, LR: 0.0003 +[2026-03-01 04:52:26] (step=0019800) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.8739972608100177, LR: 0.0003 +[2026-03-01 04:52:34] (step=0019801) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.8741929172373313, LR: 0.0003 +[2026-03-01 04:52:41] (step=0019802) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 3.874388573664645, LR: 0.0003 +[2026-03-01 04:52:49] (step=0019803) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.8745842300919584, LR: 0.0003 +[2026-03-01 04:52:57] (step=0019804) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.874779886519272, LR: 0.0003 +[2026-03-01 04:53:05] (step=0019805) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 3.874975542946586, LR: 0.0003 +[2026-03-01 04:53:13] (step=0019806) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 3.8751711993738995, LR: 0.0003 +[2026-03-01 04:53:21] (step=0019807) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 3.875366855801213, LR: 0.0003 +[2026-03-01 04:53:28] (step=0019808) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.8755625122285267, LR: 0.0003 +[2026-03-01 04:53:36] (step=0019809) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.87575816865584, LR: 0.0003 +[2026-03-01 04:53:44] (step=0019810) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 3.875953825083154, LR: 0.0003 +[2026-03-01 04:53:52] (step=0019811) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 3.876149481510468, LR: 0.0003 +[2026-03-01 04:54:00] (step=0019812) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.8763451379377813, LR: 0.0003 +[2026-03-01 04:54:08] (step=0019813) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 3.876540794365095, LR: 0.0003 +[2026-03-01 04:54:16] (step=0019814) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.8767364507924085, LR: 0.0003 +[2026-03-01 04:54:23] (step=0019815) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.876932107219722, LR: 0.0003 +[2026-03-01 04:54:31] (step=0019816) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.8771277636470356, LR: 0.0003 +[2026-03-01 04:54:39] (step=0019817) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.8773234200743496, LR: 0.0003 +[2026-03-01 04:54:47] (step=0019818) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.877519076501663, LR: 0.0003 +[2026-03-01 04:54:55] (step=0019819) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 3.8777147329289767, LR: 0.0003 +[2026-03-01 04:55:03] (step=0019820) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.8779103893562903, LR: 0.0003 +[2026-03-01 04:55:10] (step=0019821) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.878106045783604, LR: 0.0003 +[2026-03-01 04:55:18] (step=0019822) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.8783017022109174, LR: 0.0003 +[2026-03-01 04:55:26] (step=0019823) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.8784973586382314, LR: 0.0003 +[2026-03-01 04:55:34] (step=0019824) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.878693015065545, LR: 0.0003 +[2026-03-01 04:55:42] (step=0019825) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.8788886714928585, LR: 0.0003 +[2026-03-01 04:55:50] (step=0019826) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.879084327920172, LR: 0.0003 +[2026-03-01 04:55:58] (step=0019827) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.8792799843474857, LR: 0.0003 +[2026-03-01 04:56:05] (step=0019828) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.8794756407747997, LR: 0.0003 +[2026-03-01 04:56:13] (step=0019829) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.8796712972021132, LR: 0.0003 +[2026-03-01 04:56:21] (step=0019830) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.879866953629427, LR: 0.0003 +[2026-03-01 04:56:29] (step=0019831) Train Loss: 0.4573, Train Steps/Sec: 0.12, Epoch: 3.8800626100567404, LR: 0.0003 +[2026-03-01 04:56:37] (step=0019832) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.880258266484054, LR: 0.0003 +[2026-03-01 04:56:45] (step=0019833) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.8804539229113675, LR: 0.0003 +[2026-03-01 04:56:53] (step=0019834) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 3.8806495793386815, LR: 0.0003 +[2026-03-01 04:57:00] (step=0019835) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 3.880845235765995, LR: 0.0003 +[2026-03-01 04:57:08] (step=0019836) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 3.8810408921933086, LR: 0.0003 +[2026-03-01 04:57:16] (step=0019837) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.881236548620622, LR: 0.0003 +[2026-03-01 04:57:24] (step=0019838) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.8814322050479357, LR: 0.0003 +[2026-03-01 04:57:32] (step=0019839) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.8816278614752493, LR: 0.0003 +[2026-03-01 04:57:40] (step=0019840) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.8818235179025633, LR: 0.0003 +[2026-03-01 04:57:48] (step=0019841) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 3.882019174329877, LR: 0.0003 +[2026-03-01 04:57:55] (step=0019842) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.8822148307571904, LR: 0.0003 +[2026-03-01 04:58:03] (step=0019843) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.882410487184504, LR: 0.0003 +[2026-03-01 04:58:11] (step=0019844) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 3.8826061436118176, LR: 0.0003 +[2026-03-01 04:58:19] (step=0019845) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.882801800039131, LR: 0.0003 +[2026-03-01 04:58:27] (step=0019846) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 3.882997456466445, LR: 0.0003 +[2026-03-01 04:58:35] (step=0019847) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.8831931128937587, LR: 0.0003 +[2026-03-01 04:58:42] (step=0019848) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.8833887693210722, LR: 0.0003 +[2026-03-01 04:58:50] (step=0019849) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 3.883584425748386, LR: 0.0003 +[2026-03-01 04:58:58] (step=0019850) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 3.8837800821756994, LR: 0.0003 +[2026-03-01 04:59:06] (step=0019851) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.883975738603013, LR: 0.0003 +[2026-03-01 04:59:14] (step=0019852) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.884171395030327, LR: 0.0003 +[2026-03-01 04:59:22] (step=0019853) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 3.8843670514576405, LR: 0.0003 +[2026-03-01 04:59:29] (step=0019854) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.884562707884954, LR: 0.0003 +[2026-03-01 04:59:37] (step=0019855) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.8847583643122676, LR: 0.0003 +[2026-03-01 04:59:45] (step=0019856) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.884954020739581, LR: 0.0003 +[2026-03-01 04:59:53] (step=0019857) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 3.8851496771668947, LR: 0.0003 +[2026-03-01 05:00:01] (step=0019858) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.8853453335942087, LR: 0.0003 +[2026-03-01 05:00:09] (step=0019859) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.8855409900215223, LR: 0.0003 +[2026-03-01 05:00:16] (step=0019860) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.885736646448836, LR: 0.0003 +[2026-03-01 05:00:24] (step=0019861) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.8859323028761494, LR: 0.0003 +[2026-03-01 05:00:32] (step=0019862) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 3.886127959303463, LR: 0.0003 +[2026-03-01 05:00:40] (step=0019863) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 3.8863236157307766, LR: 0.0003 +[2026-03-01 05:00:48] (step=0019864) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.8865192721580906, LR: 0.0003 +[2026-03-01 05:00:56] (step=0019865) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.886714928585404, LR: 0.0003 +[2026-03-01 05:01:04] (step=0019866) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.8869105850127177, LR: 0.0003 +[2026-03-01 05:01:11] (step=0019867) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 3.8871062414400313, LR: 0.0003 +[2026-03-01 05:01:19] (step=0019868) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.887301897867345, LR: 0.0003 +[2026-03-01 05:01:27] (step=0019869) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.8874975542946584, LR: 0.0003 +[2026-03-01 05:01:35] (step=0019870) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.8876932107219724, LR: 0.0003 +[2026-03-01 05:01:43] (step=0019871) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.887888867149286, LR: 0.0003 +[2026-03-01 05:01:51] (step=0019872) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 3.8880845235765995, LR: 0.0003 +[2026-03-01 05:01:58] (step=0019873) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.888280180003913, LR: 0.0003 +[2026-03-01 05:02:06] (step=0019874) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.8884758364312266, LR: 0.0003 +[2026-03-01 05:02:14] (step=0019875) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.88867149285854, LR: 0.0003 +[2026-03-01 05:02:22] (step=0019876) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.888867149285854, LR: 0.0003 +[2026-03-01 05:02:30] (step=0019877) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 3.8890628057131678, LR: 0.0003 +[2026-03-01 05:02:38] (step=0019878) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.8892584621404813, LR: 0.0003 +[2026-03-01 05:02:46] (step=0019879) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 3.889454118567795, LR: 0.0003 +[2026-03-01 05:02:54] (step=0019880) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 3.8896497749951084, LR: 0.0003 +[2026-03-01 05:03:01] (step=0019881) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.889845431422422, LR: 0.0003 +[2026-03-01 05:03:09] (step=0019882) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.890041087849736, LR: 0.0003 +[2026-03-01 05:03:17] (step=0019883) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.8902367442770496, LR: 0.0003 +[2026-03-01 05:03:25] (step=0019884) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.890432400704363, LR: 0.0003 +[2026-03-01 05:03:33] (step=0019885) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.8906280571316767, LR: 0.0003 +[2026-03-01 05:03:41] (step=0019886) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.8908237135589903, LR: 0.0003 +[2026-03-01 05:03:49] (step=0019887) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.8910193699863043, LR: 0.0003 +[2026-03-01 05:03:56] (step=0019888) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 3.891215026413618, LR: 0.0003 +[2026-03-01 05:04:04] (step=0019889) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.8914106828409314, LR: 0.0003 +[2026-03-01 05:04:12] (step=0019890) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 3.891606339268245, LR: 0.0003 +[2026-03-01 05:04:20] (step=0019891) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 3.8918019956955585, LR: 0.0003 +[2026-03-01 05:04:28] (step=0019892) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.891997652122872, LR: 0.0003 +[2026-03-01 05:04:36] (step=0019893) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.892193308550186, LR: 0.0003 +[2026-03-01 05:04:43] (step=0019894) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.8923889649774996, LR: 0.0003 +[2026-03-01 05:04:51] (step=0019895) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.892584621404813, LR: 0.0003 +[2026-03-01 05:04:59] (step=0019896) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.8927802778321268, LR: 0.0003 +[2026-03-01 05:05:07] (step=0019897) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 3.8929759342594403, LR: 0.0003 +[2026-03-01 05:05:15] (step=0019898) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.893171590686754, LR: 0.0003 +[2026-03-01 05:05:23] (step=0019899) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.893367247114068, LR: 0.0003 +[2026-03-01 05:05:30] (step=0019900) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.8935629035413815, LR: 0.0003 +[2026-03-01 05:05:38] (step=0019901) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.893758559968695, LR: 0.0003 +[2026-03-01 05:05:46] (step=0019902) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.8939542163960086, LR: 0.0003 +[2026-03-01 05:05:54] (step=0019903) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.894149872823322, LR: 0.0003 +[2026-03-01 05:06:02] (step=0019904) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.8943455292506357, LR: 0.0003 +[2026-03-01 05:06:10] (step=0019905) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.8945411856779497, LR: 0.0003 +[2026-03-01 05:06:17] (step=0019906) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 3.8947368421052633, LR: 0.0003 +[2026-03-01 05:06:25] (step=0019907) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 3.894932498532577, LR: 0.0003 +[2026-03-01 05:06:33] (step=0019908) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 3.8951281549598904, LR: 0.0003 +[2026-03-01 05:06:41] (step=0019909) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.895323811387204, LR: 0.0003 +[2026-03-01 05:06:49] (step=0019910) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.8955194678145175, LR: 0.0003 +[2026-03-01 05:06:57] (step=0019911) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 3.8957151242418315, LR: 0.0003 +[2026-03-01 05:07:05] (step=0019912) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 3.895910780669145, LR: 0.0003 +[2026-03-01 05:07:12] (step=0019913) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.8961064370964587, LR: 0.0003 +[2026-03-01 05:07:20] (step=0019914) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.896302093523772, LR: 0.0003 +[2026-03-01 05:07:28] (step=0019915) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.8964977499510858, LR: 0.0003 +[2026-03-01 05:07:36] (step=0019916) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.8966934063783993, LR: 0.0003 +[2026-03-01 05:07:44] (step=0019917) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 3.8968890628057133, LR: 0.0003 +[2026-03-01 05:07:52] (step=0019918) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.897084719233027, LR: 0.0003 +[2026-03-01 05:07:59] (step=0019919) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.8972803756603405, LR: 0.0003 +[2026-03-01 05:08:07] (step=0019920) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.897476032087654, LR: 0.0003 +[2026-03-01 05:08:15] (step=0019921) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.8976716885149676, LR: 0.0003 +[2026-03-01 05:08:23] (step=0019922) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.897867344942281, LR: 0.0003 +[2026-03-01 05:08:31] (step=0019923) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 3.898063001369595, LR: 0.0003 +[2026-03-01 05:08:39] (step=0019924) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 3.8982586577969087, LR: 0.0003 +[2026-03-01 05:08:47] (step=0019925) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 3.8984543142242223, LR: 0.0003 +[2026-03-01 05:08:55] (step=0019926) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.898649970651536, LR: 0.0003 +[2026-03-01 05:09:03] (step=0019927) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.8988456270788494, LR: 0.0003 +[2026-03-01 05:09:10] (step=0019928) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 3.899041283506163, LR: 0.0003 +[2026-03-01 05:09:18] (step=0019929) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 3.899236939933477, LR: 0.0003 +[2026-03-01 05:09:26] (step=0019930) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.8994325963607905, LR: 0.0003 +[2026-03-01 05:09:34] (step=0019931) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.899628252788104, LR: 0.0003 +[2026-03-01 05:09:42] (step=0019932) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.8998239092154177, LR: 0.0003 +[2026-03-01 05:09:50] (step=0019933) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.9000195656427312, LR: 0.0003 +[2026-03-01 05:09:57] (step=0019934) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.900215222070045, LR: 0.0003 +[2026-03-01 05:10:05] (step=0019935) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.900410878497359, LR: 0.0003 +[2026-03-01 05:10:13] (step=0019936) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.9006065349246724, LR: 0.0003 +[2026-03-01 05:10:21] (step=0019937) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 3.900802191351986, LR: 0.0003 +[2026-03-01 05:10:29] (step=0019938) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 3.9009978477792995, LR: 0.0003 +[2026-03-01 05:10:37] (step=0019939) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.901193504206613, LR: 0.0003 +[2026-03-01 05:10:44] (step=0019940) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.901389160633927, LR: 0.0003 +[2026-03-01 05:10:52] (step=0019941) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.9015848170612406, LR: 0.0003 +[2026-03-01 05:11:00] (step=0019942) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.901780473488554, LR: 0.0003 +[2026-03-01 05:11:08] (step=0019943) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 3.9019761299158677, LR: 0.0003 +[2026-03-01 05:11:16] (step=0019944) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.9021717863431813, LR: 0.0003 +[2026-03-01 05:11:24] (step=0019945) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.902367442770495, LR: 0.0003 +[2026-03-01 05:11:31] (step=0019946) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.902563099197809, LR: 0.0003 +[2026-03-01 05:11:39] (step=0019947) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.9027587556251224, LR: 0.0003 +[2026-03-01 05:11:47] (step=0019948) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.902954412052436, LR: 0.0003 +[2026-03-01 05:11:55] (step=0019949) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 3.9031500684797495, LR: 0.0003 +[2026-03-01 05:12:03] (step=0019950) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.903345724907063, LR: 0.0003 +[2026-03-01 05:12:11] (step=0019951) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.9035413813343767, LR: 0.0003 +[2026-03-01 05:12:18] (step=0019952) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.9037370377616907, LR: 0.0003 +[2026-03-01 05:12:26] (step=0019953) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 3.9039326941890042, LR: 0.0003 +[2026-03-01 05:12:34] (step=0019954) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.904128350616318, LR: 0.0003 +[2026-03-01 05:12:42] (step=0019955) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.9043240070436314, LR: 0.0003 +[2026-03-01 05:12:50] (step=0019956) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.904519663470945, LR: 0.0003 +[2026-03-01 05:12:58] (step=0019957) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 3.9047153198982585, LR: 0.0003 +[2026-03-01 05:13:06] (step=0019958) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.9049109763255725, LR: 0.0003 +[2026-03-01 05:13:13] (step=0019959) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.905106632752886, LR: 0.0003 +[2026-03-01 05:13:21] (step=0019960) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.9053022891801996, LR: 0.0003 +[2026-03-01 05:13:29] (step=0019961) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.905497945607513, LR: 0.0003 +[2026-03-01 05:13:37] (step=0019962) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.9056936020348267, LR: 0.0003 +[2026-03-01 05:13:45] (step=0019963) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 3.9058892584621403, LR: 0.0003 +[2026-03-01 05:13:53] (step=0019964) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.9060849148894543, LR: 0.0003 +[2026-03-01 05:14:00] (step=0019965) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.906280571316768, LR: 0.0003 +[2026-03-01 05:14:08] (step=0019966) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.9064762277440814, LR: 0.0003 +[2026-03-01 05:14:16] (step=0019967) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.906671884171395, LR: 0.0003 +[2026-03-01 05:14:24] (step=0019968) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.9068675405987086, LR: 0.0003 +[2026-03-01 05:14:32] (step=0019969) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.907063197026022, LR: 0.0003 +[2026-03-01 05:14:40] (step=0019970) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.907258853453336, LR: 0.0003 +[2026-03-01 05:14:47] (step=0019971) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 3.9074545098806497, LR: 0.0003 +[2026-03-01 05:14:55] (step=0019972) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.9076501663079632, LR: 0.0003 +[2026-03-01 05:15:03] (step=0019973) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.907845822735277, LR: 0.0003 +[2026-03-01 05:15:11] (step=0019974) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.9080414791625904, LR: 0.0003 +[2026-03-01 05:15:19] (step=0019975) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 3.908237135589904, LR: 0.0003 +[2026-03-01 05:15:27] (step=0019976) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 3.908432792017218, LR: 0.0003 +[2026-03-01 05:15:35] (step=0019977) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.9086284484445315, LR: 0.0003 +[2026-03-01 05:15:43] (step=0019978) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.908824104871845, LR: 0.0003 +[2026-03-01 05:15:50] (step=0019979) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.9090197612991586, LR: 0.0003 +[2026-03-01 05:15:58] (step=0019980) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.909215417726472, LR: 0.0003 +[2026-03-01 05:16:06] (step=0019981) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.9094110741537857, LR: 0.0003 +[2026-03-01 05:16:14] (step=0019982) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.9096067305810998, LR: 0.0003 +[2026-03-01 05:16:22] (step=0019983) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 3.9098023870084133, LR: 0.0003 +[2026-03-01 05:16:30] (step=0019984) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 3.909998043435727, LR: 0.0003 +[2026-03-01 05:16:38] (step=0019985) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.9101936998630404, LR: 0.0003 +[2026-03-01 05:16:45] (step=0019986) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 3.910389356290354, LR: 0.0003 +[2026-03-01 05:16:53] (step=0019987) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.9105850127176676, LR: 0.0003 +[2026-03-01 05:17:01] (step=0019988) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 3.9107806691449816, LR: 0.0003 +[2026-03-01 05:17:09] (step=0019989) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.910976325572295, LR: 0.0003 +[2026-03-01 05:17:17] (step=0019990) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.9111719819996087, LR: 0.0003 +[2026-03-01 05:17:25] (step=0019991) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.9113676384269223, LR: 0.0003 +[2026-03-01 05:17:33] (step=0019992) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.911563294854236, LR: 0.0003 +[2026-03-01 05:17:40] (step=0019993) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 3.9117589512815494, LR: 0.0003 +[2026-03-01 05:17:48] (step=0019994) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.9119546077088634, LR: 0.0003 +[2026-03-01 05:17:56] (step=0019995) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 3.912150264136177, LR: 0.0003 +[2026-03-01 05:18:04] (step=0019996) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 3.9123459205634905, LR: 0.0003 +[2026-03-01 05:18:12] (step=0019997) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 3.912541576990804, LR: 0.0003 +[2026-03-01 05:18:20] (step=0019998) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 3.9127372334181176, LR: 0.0003 +[2026-03-01 05:18:27] (step=0019999) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.9129328898454316, LR: 0.0003 +[2026-03-01 05:18:35] (step=0020000) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 3.913128546272745, LR: 0.0003 +[2026-03-01 05:18:35] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0020000/ +[2026-03-01 05:18:43] (step=0020001) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 3.9133242027000588, LR: 0.0003 +[2026-03-01 05:18:51] (step=0020002) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.9135198591273723, LR: 0.0003 +[2026-03-01 05:18:59] (step=0020003) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 3.913715515554686, LR: 0.0003 +[2026-03-01 05:19:07] (step=0020004) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.9139111719819994, LR: 0.0003 +[2026-03-01 05:19:15] (step=0020005) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 3.9141068284093135, LR: 0.0003 +[2026-03-01 05:19:22] (step=0020006) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 3.914302484836627, LR: 0.0003 +[2026-03-01 05:19:30] (step=0020007) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 3.9144981412639406, LR: 0.0003 +[2026-03-01 05:19:38] (step=0020008) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 3.914693797691254, LR: 0.0003 +[2026-03-01 05:19:46] (step=0020009) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.9148894541185677, LR: 0.0003 +[2026-03-01 05:19:54] (step=0020010) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 3.9150851105458813, LR: 0.0003 +[2026-03-01 05:20:02] (step=0020011) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.9152807669731953, LR: 0.0003 +[2026-03-01 05:20:09] (step=0020012) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.915476423400509, LR: 0.0003 +[2026-03-01 05:20:17] (step=0020013) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 3.9156720798278224, LR: 0.0003 +[2026-03-01 05:20:25] (step=0020014) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 3.915867736255136, LR: 0.0003 +[2026-03-01 05:20:33] (step=0020015) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.9160633926824495, LR: 0.0003 +[2026-03-01 05:20:41] (step=0020016) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 3.916259049109763, LR: 0.0003 +[2026-03-01 05:20:49] (step=0020017) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.916454705537077, LR: 0.0003 +[2026-03-01 05:20:56] (step=0020018) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.9166503619643906, LR: 0.0003 +[2026-03-01 05:21:04] (step=0020019) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.916846018391704, LR: 0.0003 +[2026-03-01 05:21:12] (step=0020020) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 3.9170416748190178, LR: 0.0003 +[2026-03-01 05:21:20] (step=0020021) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.9172373312463313, LR: 0.0003 +[2026-03-01 05:21:28] (step=0020022) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.917432987673645, LR: 0.0003 +[2026-03-01 05:21:36] (step=0020023) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.917628644100959, LR: 0.0003 +[2026-03-01 05:21:44] (step=0020024) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 3.9178243005282725, LR: 0.0003 +[2026-03-01 05:21:51] (step=0020025) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 3.918019956955586, LR: 0.0003 +[2026-03-01 05:21:59] (step=0020026) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.9182156133828996, LR: 0.0003 +[2026-03-01 05:22:07] (step=0020027) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.918411269810213, LR: 0.0003 +[2026-03-01 05:22:15] (step=0020028) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.9186069262375267, LR: 0.0003 +[2026-03-01 05:22:23] (step=0020029) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 3.9188025826648407, LR: 0.0003 +[2026-03-01 05:22:31] (step=0020030) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.9189982390921543, LR: 0.0003 +[2026-03-01 05:22:39] (step=0020031) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.919193895519468, LR: 0.0003 +[2026-03-01 05:22:46] (step=0020032) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.9193895519467814, LR: 0.0003 +[2026-03-01 05:22:54] (step=0020033) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.919585208374095, LR: 0.0003 +[2026-03-01 05:23:02] (step=0020034) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 3.9197808648014085, LR: 0.0003 +[2026-03-01 05:23:10] (step=0020035) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.9199765212287225, LR: 0.0003 +[2026-03-01 05:23:18] (step=0020036) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.920172177656036, LR: 0.0003 +[2026-03-01 05:23:26] (step=0020037) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.9203678340833497, LR: 0.0003 +[2026-03-01 05:23:34] (step=0020038) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.920563490510663, LR: 0.0003 +[2026-03-01 05:23:41] (step=0020039) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.920759146937977, LR: 0.0003 +[2026-03-01 05:23:49] (step=0020040) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 3.9209548033652903, LR: 0.0003 +[2026-03-01 05:23:57] (step=0020041) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 3.9211504597926043, LR: 0.0003 +[2026-03-01 05:24:05] (step=0020042) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 3.921346116219918, LR: 0.0003 +[2026-03-01 05:24:13] (step=0020043) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.9215417726472315, LR: 0.0003 +[2026-03-01 05:24:21] (step=0020044) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.921737429074545, LR: 0.0003 +[2026-03-01 05:24:28] (step=0020045) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.9219330855018586, LR: 0.0003 +[2026-03-01 05:24:36] (step=0020046) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 3.922128741929172, LR: 0.0003 +[2026-03-01 05:24:44] (step=0020047) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 3.922324398356486, LR: 0.0003 +[2026-03-01 05:24:52] (step=0020048) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 3.9225200547837997, LR: 0.0003 +[2026-03-01 05:25:00] (step=0020049) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.9227157112111133, LR: 0.0003 +[2026-03-01 05:25:08] (step=0020050) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.922911367638427, LR: 0.0003 +[2026-03-01 05:25:15] (step=0020051) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 3.9231070240657404, LR: 0.0003 +[2026-03-01 05:25:23] (step=0020052) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.9233026804930544, LR: 0.0003 +[2026-03-01 05:25:31] (step=0020053) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 3.923498336920368, LR: 0.0003 +[2026-03-01 05:25:39] (step=0020054) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.9236939933476815, LR: 0.0003 +[2026-03-01 05:25:47] (step=0020055) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.923889649774995, LR: 0.0003 +[2026-03-01 05:25:55] (step=0020056) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 3.9240853062023087, LR: 0.0003 +[2026-03-01 05:26:03] (step=0020057) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.9242809626296222, LR: 0.0003 +[2026-03-01 05:26:10] (step=0020058) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.9244766190569362, LR: 0.0003 +[2026-03-01 05:26:18] (step=0020059) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 3.92467227548425, LR: 0.0003 +[2026-03-01 05:26:26] (step=0020060) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.9248679319115634, LR: 0.0003 +[2026-03-01 05:26:34] (step=0020061) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.925063588338877, LR: 0.0003 +[2026-03-01 05:26:42] (step=0020062) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.9252592447661905, LR: 0.0003 +[2026-03-01 05:26:50] (step=0020063) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.925454901193504, LR: 0.0003 +[2026-03-01 05:26:57] (step=0020064) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.925650557620818, LR: 0.0003 +[2026-03-01 05:27:05] (step=0020065) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 3.9258462140481316, LR: 0.0003 +[2026-03-01 05:27:13] (step=0020066) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.926041870475445, LR: 0.0003 +[2026-03-01 05:27:21] (step=0020067) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.9262375269027587, LR: 0.0003 +[2026-03-01 05:27:29] (step=0020068) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 3.9264331833300723, LR: 0.0003 +[2026-03-01 05:27:37] (step=0020069) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 3.926628839757386, LR: 0.0003 +[2026-03-01 05:27:44] (step=0020070) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 3.9268244961847, LR: 0.0003 +[2026-03-01 05:27:52] (step=0020071) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.9270201526120134, LR: 0.0003 +[2026-03-01 05:28:00] (step=0020072) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.927215809039327, LR: 0.0003 +[2026-03-01 05:28:08] (step=0020073) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.9274114654666405, LR: 0.0003 +[2026-03-01 05:28:16] (step=0020074) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 3.927607121893954, LR: 0.0003 +[2026-03-01 05:28:24] (step=0020075) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.9278027783212677, LR: 0.0003 +[2026-03-01 05:28:32] (step=0020076) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 3.9279984347485817, LR: 0.0003 +[2026-03-01 05:28:40] (step=0020077) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 3.9281940911758952, LR: 0.0003 +[2026-03-01 05:28:47] (step=0020078) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.928389747603209, LR: 0.0003 +[2026-03-01 05:28:55] (step=0020079) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 3.9285854040305224, LR: 0.0003 +[2026-03-01 05:29:03] (step=0020080) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 3.928781060457836, LR: 0.0003 +[2026-03-01 05:29:11] (step=0020081) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 3.9289767168851495, LR: 0.0003 +[2026-03-01 05:29:19] (step=0020082) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.9291723733124635, LR: 0.0003 +[2026-03-01 05:29:27] (step=0020083) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.929368029739777, LR: 0.0003 +[2026-03-01 05:29:34] (step=0020084) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 3.9295636861670906, LR: 0.0003 +[2026-03-01 05:29:42] (step=0020085) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 3.929759342594404, LR: 0.0003 +[2026-03-01 05:29:50] (step=0020086) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.9299549990217177, LR: 0.0003 +[2026-03-01 05:29:58] (step=0020087) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.9301506554490313, LR: 0.0003 +[2026-03-01 05:30:06] (step=0020088) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.9303463118763453, LR: 0.0003 +[2026-03-01 05:30:14] (step=0020089) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.930541968303659, LR: 0.0003 +[2026-03-01 05:30:21] (step=0020090) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.9307376247309724, LR: 0.0003 +[2026-03-01 05:30:29] (step=0020091) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 3.930933281158286, LR: 0.0003 +[2026-03-01 05:30:37] (step=0020092) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 3.9311289375855996, LR: 0.0003 +[2026-03-01 05:30:45] (step=0020093) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.931324594012913, LR: 0.0003 +[2026-03-01 05:30:53] (step=0020094) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.931520250440227, LR: 0.0003 +[2026-03-01 05:31:01] (step=0020095) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.9317159068675407, LR: 0.0003 +[2026-03-01 05:31:09] (step=0020096) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.9319115632948543, LR: 0.0003 +[2026-03-01 05:31:16] (step=0020097) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.932107219722168, LR: 0.0003 +[2026-03-01 05:31:24] (step=0020098) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.9323028761494814, LR: 0.0003 +[2026-03-01 05:31:32] (step=0020099) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 3.932498532576795, LR: 0.0003 +[2026-03-01 05:31:40] (step=0020100) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 3.932694189004109, LR: 0.0003 +[2026-03-01 05:31:48] (step=0020101) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 3.9328898454314225, LR: 0.0003 +[2026-03-01 05:31:56] (step=0020102) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 3.933085501858736, LR: 0.0003 +[2026-03-01 05:32:03] (step=0020103) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 3.9332811582860496, LR: 0.0003 +[2026-03-01 05:32:11] (step=0020104) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 3.933476814713363, LR: 0.0003 +[2026-03-01 05:32:19] (step=0020105) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 3.9336724711406768, LR: 0.0003 +[2026-03-01 05:32:27] (step=0020106) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.9338681275679908, LR: 0.0003 +[2026-03-01 05:32:35] (step=0020107) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.9340637839953043, LR: 0.0003 +[2026-03-01 05:32:43] (step=0020108) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.934259440422618, LR: 0.0003 +[2026-03-01 05:32:51] (step=0020109) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.9344550968499314, LR: 0.0003 +[2026-03-01 05:32:58] (step=0020110) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.934650753277245, LR: 0.0003 +[2026-03-01 05:33:06] (step=0020111) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.934846409704559, LR: 0.0003 +[2026-03-01 05:33:14] (step=0020112) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 3.9350420661318726, LR: 0.0003 +[2026-03-01 05:33:22] (step=0020113) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 3.935237722559186, LR: 0.0003 +[2026-03-01 05:33:30] (step=0020114) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.9354333789864997, LR: 0.0003 +[2026-03-01 05:33:38] (step=0020115) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 3.9356290354138133, LR: 0.0003 +[2026-03-01 05:33:45] (step=0020116) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.935824691841127, LR: 0.0003 +[2026-03-01 05:33:53] (step=0020117) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.936020348268441, LR: 0.0003 +[2026-03-01 05:34:01] (step=0020118) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.9362160046957544, LR: 0.0003 +[2026-03-01 05:34:09] (step=0020119) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.936411661123068, LR: 0.0003 +[2026-03-01 05:34:17] (step=0020120) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 3.9366073175503815, LR: 0.0003 +[2026-03-01 05:34:25] (step=0020121) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 3.936802973977695, LR: 0.0003 +[2026-03-01 05:34:32] (step=0020122) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.9369986304050086, LR: 0.0003 +[2026-03-01 05:34:40] (step=0020123) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.9371942868323226, LR: 0.0003 +[2026-03-01 05:34:48] (step=0020124) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 3.937389943259636, LR: 0.0003 +[2026-03-01 05:34:56] (step=0020125) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.9375855996869498, LR: 0.0003 +[2026-03-01 05:35:04] (step=0020126) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 3.9377812561142633, LR: 0.0003 +[2026-03-01 05:35:12] (step=0020127) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.937976912541577, LR: 0.0003 +[2026-03-01 05:35:20] (step=0020128) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.9381725689688905, LR: 0.0003 +[2026-03-01 05:35:28] (step=0020129) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.9383682253962045, LR: 0.0003 +[2026-03-01 05:35:35] (step=0020130) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 3.938563881823518, LR: 0.0003 +[2026-03-01 05:35:43] (step=0020131) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.9387595382508316, LR: 0.0003 +[2026-03-01 05:35:51] (step=0020132) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 3.938955194678145, LR: 0.0003 +[2026-03-01 05:35:59] (step=0020133) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 3.9391508511054587, LR: 0.0003 +[2026-03-01 05:36:07] (step=0020134) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.9393465075327723, LR: 0.0003 +[2026-03-01 05:36:15] (step=0020135) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 3.9395421639600863, LR: 0.0003 +[2026-03-01 05:36:22] (step=0020136) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.9397378203874, LR: 0.0003 +[2026-03-01 05:36:30] (step=0020137) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.9399334768147134, LR: 0.0003 +[2026-03-01 05:36:38] (step=0020138) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.940129133242027, LR: 0.0003 +[2026-03-01 05:36:46] (step=0020139) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.9403247896693405, LR: 0.0003 +[2026-03-01 05:36:54] (step=0020140) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 3.940520446096654, LR: 0.0003 +[2026-03-01 05:37:02] (step=0020141) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.940716102523968, LR: 0.0003 +[2026-03-01 05:37:09] (step=0020142) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.9409117589512817, LR: 0.0003 +[2026-03-01 05:37:17] (step=0020143) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.941107415378595, LR: 0.0003 +[2026-03-01 05:37:25] (step=0020144) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 3.9413030718059088, LR: 0.0003 +[2026-03-01 05:37:33] (step=0020145) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.9414987282332223, LR: 0.0003 +[2026-03-01 05:37:41] (step=0020146) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 3.941694384660536, LR: 0.0003 +[2026-03-01 05:37:49] (step=0020147) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 3.94189004108785, LR: 0.0003 +[2026-03-01 05:37:56] (step=0020148) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 3.9420856975151635, LR: 0.0003 +[2026-03-01 05:38:04] (step=0020149) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.942281353942477, LR: 0.0003 +[2026-03-01 05:38:12] (step=0020150) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 3.9424770103697906, LR: 0.0003 +[2026-03-01 05:38:20] (step=0020151) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 3.942672666797104, LR: 0.0003 +[2026-03-01 05:38:28] (step=0020152) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 3.9428683232244177, LR: 0.0003 +[2026-03-01 05:38:36] (step=0020153) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 3.9430639796517317, LR: 0.0003 +[2026-03-01 05:38:44] (step=0020154) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.9432596360790453, LR: 0.0003 +[2026-03-01 05:38:51] (step=0020155) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 3.943455292506359, LR: 0.0003 +[2026-03-01 05:38:59] (step=0020156) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.9436509489336724, LR: 0.0003 +[2026-03-01 05:39:07] (step=0020157) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.943846605360986, LR: 0.0003 +[2026-03-01 05:39:15] (step=0020158) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 3.9440422617882995, LR: 0.0003 +[2026-03-01 05:39:23] (step=0020159) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.9442379182156135, LR: 0.0003 +[2026-03-01 05:39:31] (step=0020160) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 3.944433574642927, LR: 0.0003 +[2026-03-01 05:39:38] (step=0020161) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.9446292310702407, LR: 0.0003 +[2026-03-01 05:39:46] (step=0020162) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 3.9448248874975542, LR: 0.0003 +[2026-03-01 05:39:54] (step=0020163) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 3.945020543924868, LR: 0.0003 +[2026-03-01 05:40:02] (step=0020164) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 3.945216200352182, LR: 0.0003 +[2026-03-01 05:40:10] (step=0020165) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.9454118567794954, LR: 0.0003 +[2026-03-01 05:40:18] (step=0020166) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 3.945607513206809, LR: 0.0003 +[2026-03-01 05:40:26] (step=0020167) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.9458031696341225, LR: 0.0003 +[2026-03-01 05:40:33] (step=0020168) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.945998826061436, LR: 0.0003 +[2026-03-01 05:40:41] (step=0020169) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.9461944824887496, LR: 0.0003 +[2026-03-01 05:40:49] (step=0020170) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 3.9463901389160636, LR: 0.0003 +[2026-03-01 05:40:57] (step=0020171) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 3.946585795343377, LR: 0.0003 +[2026-03-01 05:41:05] (step=0020172) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.9467814517706907, LR: 0.0003 +[2026-03-01 05:41:13] (step=0020173) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.9469771081980043, LR: 0.0003 +[2026-03-01 05:41:21] (step=0020174) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.947172764625318, LR: 0.0003 +[2026-03-01 05:41:28] (step=0020175) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.9473684210526314, LR: 0.0003 +[2026-03-01 05:41:36] (step=0020176) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 3.9475640774799454, LR: 0.0003 +[2026-03-01 05:41:44] (step=0020177) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.947759733907259, LR: 0.0003 +[2026-03-01 05:41:52] (step=0020178) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 3.9479553903345725, LR: 0.0003 +[2026-03-01 05:42:00] (step=0020179) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.948151046761886, LR: 0.0003 +[2026-03-01 05:42:08] (step=0020180) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 3.9483467031891997, LR: 0.0003 +[2026-03-01 05:42:16] (step=0020181) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.9485423596165132, LR: 0.0003 +[2026-03-01 05:42:23] (step=0020182) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 3.9487380160438272, LR: 0.0003 +[2026-03-01 05:42:31] (step=0020183) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 3.948933672471141, LR: 0.0003 +[2026-03-01 05:42:39] (step=0020184) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.9491293288984544, LR: 0.0003 +[2026-03-01 05:42:47] (step=0020185) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.949324985325768, LR: 0.0003 +[2026-03-01 05:42:55] (step=0020186) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 3.9495206417530815, LR: 0.0003 +[2026-03-01 05:43:03] (step=0020187) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 3.949716298180395, LR: 0.0003 +[2026-03-01 05:43:10] (step=0020188) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.949911954607709, LR: 0.0003 +[2026-03-01 05:43:18] (step=0020189) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 3.9501076110350226, LR: 0.0003 +[2026-03-01 05:43:26] (step=0020190) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 3.950303267462336, LR: 0.0003 +[2026-03-01 05:43:34] (step=0020191) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.9504989238896497, LR: 0.0003 +[2026-03-01 05:43:42] (step=0020192) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.9506945803169633, LR: 0.0003 +[2026-03-01 05:43:50] (step=0020193) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 3.950890236744277, LR: 0.0003 +[2026-03-01 05:43:58] (step=0020194) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.951085893171591, LR: 0.0003 +[2026-03-01 05:44:05] (step=0020195) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 3.9512815495989044, LR: 0.0003 +[2026-03-01 05:44:13] (step=0020196) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 3.951477206026218, LR: 0.0003 +[2026-03-01 05:44:21] (step=0020197) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 3.9516728624535316, LR: 0.0003 +[2026-03-01 05:44:29] (step=0020198) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 3.951868518880845, LR: 0.0003 +[2026-03-01 05:44:37] (step=0020199) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.9520641753081587, LR: 0.0003 +[2026-03-01 05:44:45] (step=0020200) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.9522598317354727, LR: 0.0003 +[2026-03-01 05:44:52] (step=0020201) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 3.9524554881627862, LR: 0.0003 +[2026-03-01 05:45:00] (step=0020202) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.9526511445901, LR: 0.0003 +[2026-03-01 05:45:08] (step=0020203) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.9528468010174134, LR: 0.0003 +[2026-03-01 05:45:16] (step=0020204) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.953042457444727, LR: 0.0003 +[2026-03-01 05:45:24] (step=0020205) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 3.9532381138720405, LR: 0.0003 +[2026-03-01 05:45:32] (step=0020206) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.9534337702993545, LR: 0.0003 +[2026-03-01 05:45:39] (step=0020207) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 3.953629426726668, LR: 0.0003 +[2026-03-01 05:45:47] (step=0020208) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.9538250831539816, LR: 0.0003 +[2026-03-01 05:45:55] (step=0020209) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 3.954020739581295, LR: 0.0003 +[2026-03-01 05:46:03] (step=0020210) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 3.9542163960086087, LR: 0.0003 +[2026-03-01 05:46:11] (step=0020211) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.9544120524359223, LR: 0.0003 +[2026-03-01 05:46:19] (step=0020212) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 3.9546077088632363, LR: 0.0003 +[2026-03-01 05:46:27] (step=0020213) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 3.95480336529055, LR: 0.0003 +[2026-03-01 05:46:34] (step=0020214) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.9549990217178634, LR: 0.0003 +[2026-03-01 05:46:42] (step=0020215) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 3.955194678145177, LR: 0.0003 +[2026-03-01 05:46:50] (step=0020216) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.9553903345724906, LR: 0.0003 +[2026-03-01 05:46:58] (step=0020217) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.955585990999804, LR: 0.0003 +[2026-03-01 05:47:06] (step=0020218) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.955781647427118, LR: 0.0003 +[2026-03-01 05:47:14] (step=0020219) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 3.9559773038544317, LR: 0.0003 +[2026-03-01 05:47:22] (step=0020220) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.9561729602817453, LR: 0.0003 +[2026-03-01 05:47:29] (step=0020221) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.956368616709059, LR: 0.0003 +[2026-03-01 05:47:37] (step=0020222) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.9565642731363724, LR: 0.0003 +[2026-03-01 05:47:45] (step=0020223) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 3.9567599295636864, LR: 0.0003 +[2026-03-01 05:47:53] (step=0020224) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.956955585991, LR: 0.0003 +[2026-03-01 05:48:01] (step=0020225) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 3.9571512424183135, LR: 0.0003 +[2026-03-01 05:48:09] (step=0020226) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 3.957346898845627, LR: 0.0003 +[2026-03-01 05:48:16] (step=0020227) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.9575425552729406, LR: 0.0003 +[2026-03-01 05:48:24] (step=0020228) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 3.957738211700254, LR: 0.0003 +[2026-03-01 05:48:32] (step=0020229) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 3.957933868127568, LR: 0.0003 +[2026-03-01 05:48:40] (step=0020230) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 3.9581295245548818, LR: 0.0003 +[2026-03-01 05:48:48] (step=0020231) Train Loss: 0.4554, Train Steps/Sec: 0.12, Epoch: 3.9583251809821953, LR: 0.0003 +[2026-03-01 05:48:56] (step=0020232) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.958520837409509, LR: 0.0003 +[2026-03-01 05:49:04] (step=0020233) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 3.9587164938368224, LR: 0.0003 +[2026-03-01 05:49:11] (step=0020234) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 3.958912150264136, LR: 0.0003 +[2026-03-01 05:49:19] (step=0020235) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 3.95910780669145, LR: 0.0003 +[2026-03-01 05:49:27] (step=0020236) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.9593034631187636, LR: 0.0003 +[2026-03-01 05:49:35] (step=0020237) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.959499119546077, LR: 0.0003 +[2026-03-01 05:49:43] (step=0020238) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.9596947759733907, LR: 0.0003 +[2026-03-01 05:49:51] (step=0020239) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 3.9598904324007043, LR: 0.0003 +[2026-03-01 05:49:59] (step=0020240) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.960086088828018, LR: 0.0003 +[2026-03-01 05:50:06] (step=0020241) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 3.960281745255332, LR: 0.0003 +[2026-03-01 05:50:14] (step=0020242) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 3.9604774016826454, LR: 0.0003 +[2026-03-01 05:50:22] (step=0020243) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 3.960673058109959, LR: 0.0003 +[2026-03-01 05:50:30] (step=0020244) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.9608687145372725, LR: 0.0003 +[2026-03-01 05:50:38] (step=0020245) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 3.961064370964586, LR: 0.0003 +[2026-03-01 05:50:46] (step=0020246) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 3.9612600273918996, LR: 0.0003 +[2026-03-01 05:50:53] (step=0020247) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 3.9614556838192136, LR: 0.0003 +[2026-03-01 05:51:01] (step=0020248) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.961651340246527, LR: 0.0003 +[2026-03-01 05:51:09] (step=0020249) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 3.9618469966738408, LR: 0.0003 +[2026-03-01 05:51:17] (step=0020250) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.9620426531011543, LR: 0.0003 +[2026-03-01 05:51:25] (step=0020251) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.962238309528468, LR: 0.0003 +[2026-03-01 05:51:33] (step=0020252) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.9624339659557815, LR: 0.0003 +[2026-03-01 05:51:41] (step=0020253) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 3.9626296223830955, LR: 0.0003 +[2026-03-01 05:51:48] (step=0020254) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.962825278810409, LR: 0.0003 +[2026-03-01 05:51:56] (step=0020255) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.9630209352377226, LR: 0.0003 +[2026-03-01 05:52:04] (step=0020256) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 3.963216591665036, LR: 0.0003 +[2026-03-01 05:52:12] (step=0020257) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 3.9634122480923497, LR: 0.0003 +[2026-03-01 05:52:20] (step=0020258) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 3.9636079045196633, LR: 0.0003 +[2026-03-01 05:52:28] (step=0020259) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.9638035609469773, LR: 0.0003 +[2026-03-01 05:52:35] (step=0020260) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.963999217374291, LR: 0.0003 +[2026-03-01 05:52:43] (step=0020261) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 3.9641948738016044, LR: 0.0003 +[2026-03-01 05:52:51] (step=0020262) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 3.964390530228918, LR: 0.0003 +[2026-03-01 05:52:59] (step=0020263) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 3.9645861866562315, LR: 0.0003 +[2026-03-01 05:53:07] (step=0020264) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 3.964781843083545, LR: 0.0003 +[2026-03-01 05:53:15] (step=0020265) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.964977499510859, LR: 0.0003 +[2026-03-01 05:53:22] (step=0020266) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.9651731559381727, LR: 0.0003 +[2026-03-01 05:53:30] (step=0020267) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.965368812365486, LR: 0.0003 +[2026-03-01 05:53:38] (step=0020268) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 3.9655644687928, LR: 0.0003 +[2026-03-01 05:53:46] (step=0020269) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.9657601252201133, LR: 0.0003 +[2026-03-01 05:53:54] (step=0020270) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 3.965955781647427, LR: 0.0003 +[2026-03-01 05:54:02] (step=0020271) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 3.966151438074741, LR: 0.0003 +[2026-03-01 05:54:10] (step=0020272) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 3.9663470945020545, LR: 0.0003 +[2026-03-01 05:54:17] (step=0020273) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.966542750929368, LR: 0.0003 +[2026-03-01 05:54:25] (step=0020274) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.9667384073566816, LR: 0.0003 +[2026-03-01 05:54:33] (step=0020275) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 3.966934063783995, LR: 0.0003 +[2026-03-01 05:54:41] (step=0020276) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 3.967129720211309, LR: 0.0003 +[2026-03-01 05:54:49] (step=0020277) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.9673253766386227, LR: 0.0003 +[2026-03-01 05:54:57] (step=0020278) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 3.9675210330659363, LR: 0.0003 +[2026-03-01 05:55:05] (step=0020279) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.96771668949325, LR: 0.0003 +[2026-03-01 05:55:12] (step=0020280) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.9679123459205634, LR: 0.0003 +[2026-03-01 05:55:20] (step=0020281) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.968108002347877, LR: 0.0003 +[2026-03-01 05:55:28] (step=0020282) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.968303658775191, LR: 0.0003 +[2026-03-01 05:55:36] (step=0020283) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 3.9684993152025045, LR: 0.0003 +[2026-03-01 05:55:44] (step=0020284) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 3.968694971629818, LR: 0.0003 +[2026-03-01 05:55:52] (step=0020285) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 3.9688906280571317, LR: 0.0003 +[2026-03-01 05:56:00] (step=0020286) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 3.9690862844844452, LR: 0.0003 +[2026-03-01 05:56:07] (step=0020287) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.969281940911759, LR: 0.0003 +[2026-03-01 05:56:15] (step=0020288) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.969477597339073, LR: 0.0003 +[2026-03-01 05:56:23] (step=0020289) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.9696732537663864, LR: 0.0003 +[2026-03-01 05:56:31] (step=0020290) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.9698689101937, LR: 0.0003 +[2026-03-01 05:56:39] (step=0020291) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.9700645666210135, LR: 0.0003 +[2026-03-01 05:56:47] (step=0020292) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.970260223048327, LR: 0.0003 +[2026-03-01 05:56:54] (step=0020293) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 3.9704558794756406, LR: 0.0003 +[2026-03-01 05:57:02] (step=0020294) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 3.9706515359029546, LR: 0.0003 +[2026-03-01 05:57:10] (step=0020295) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 3.970847192330268, LR: 0.0003 +[2026-03-01 05:57:18] (step=0020296) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 3.9710428487575817, LR: 0.0003 +[2026-03-01 05:57:26] (step=0020297) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 3.9712385051848953, LR: 0.0003 +[2026-03-01 05:57:34] (step=0020298) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 3.971434161612209, LR: 0.0003 +[2026-03-01 05:57:41] (step=0020299) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.9716298180395224, LR: 0.0003 +[2026-03-01 05:57:49] (step=0020300) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.9718254744668364, LR: 0.0003 +[2026-03-01 05:57:57] (step=0020301) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 3.97202113089415, LR: 0.0003 +[2026-03-01 05:58:05] (step=0020302) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 3.9722167873214635, LR: 0.0003 +[2026-03-01 05:58:13] (step=0020303) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 3.972412443748777, LR: 0.0003 +[2026-03-01 05:58:21] (step=0020304) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 3.9726081001760907, LR: 0.0003 +[2026-03-01 05:58:29] (step=0020305) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.9728037566034042, LR: 0.0003 +[2026-03-01 05:58:36] (step=0020306) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 3.9729994130307182, LR: 0.0003 +[2026-03-01 05:58:44] (step=0020307) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.973195069458032, LR: 0.0003 +[2026-03-01 05:58:52] (step=0020308) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 3.9733907258853454, LR: 0.0003 +[2026-03-01 05:59:00] (step=0020309) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.973586382312659, LR: 0.0003 +[2026-03-01 05:59:08] (step=0020310) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.9737820387399725, LR: 0.0003 +[2026-03-01 05:59:16] (step=0020311) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.973977695167286, LR: 0.0003 +[2026-03-01 05:59:23] (step=0020312) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 3.9741733515946, LR: 0.0003 +[2026-03-01 05:59:31] (step=0020313) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 3.9743690080219136, LR: 0.0003 +[2026-03-01 05:59:39] (step=0020314) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 3.974564664449227, LR: 0.0003 +[2026-03-01 05:59:47] (step=0020315) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 3.9747603208765407, LR: 0.0003 +[2026-03-01 05:59:55] (step=0020316) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 3.9749559773038543, LR: 0.0003 +[2026-03-01 06:00:03] (step=0020317) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 3.975151633731168, LR: 0.0003 +[2026-03-01 06:00:10] (step=0020318) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 3.975347290158482, LR: 0.0003 +[2026-03-01 06:00:18] (step=0020319) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.9755429465857954, LR: 0.0003 +[2026-03-01 06:00:26] (step=0020320) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 3.975738603013109, LR: 0.0003 +[2026-03-01 06:00:34] (step=0020321) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 3.9759342594404226, LR: 0.0003 +[2026-03-01 06:00:42] (step=0020322) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.976129915867736, LR: 0.0003 +[2026-03-01 06:00:50] (step=0020323) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 3.9763255722950497, LR: 0.0003 +[2026-03-01 06:00:58] (step=0020324) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 3.9765212287223637, LR: 0.0003 +[2026-03-01 06:01:05] (step=0020325) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 3.9767168851496772, LR: 0.0003 +[2026-03-01 06:01:13] (step=0020326) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.976912541576991, LR: 0.0003 +[2026-03-01 06:01:21] (step=0020327) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.9771081980043044, LR: 0.0003 +[2026-03-01 06:01:29] (step=0020328) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 3.977303854431618, LR: 0.0003 +[2026-03-01 06:01:37] (step=0020329) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 3.9774995108589315, LR: 0.0003 +[2026-03-01 06:01:45] (step=0020330) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.9776951672862455, LR: 0.0003 +[2026-03-01 06:01:53] (step=0020331) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 3.977890823713559, LR: 0.0003 +[2026-03-01 06:02:01] (step=0020332) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 3.9780864801408726, LR: 0.0003 +[2026-03-01 06:02:08] (step=0020333) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.978282136568186, LR: 0.0003 +[2026-03-01 06:02:16] (step=0020334) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 3.9784777929954998, LR: 0.0003 +[2026-03-01 06:02:24] (step=0020335) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 3.9786734494228138, LR: 0.0003 +[2026-03-01 06:02:32] (step=0020336) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.9788691058501273, LR: 0.0003 +[2026-03-01 06:02:40] (step=0020337) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 3.979064762277441, LR: 0.0003 +[2026-03-01 06:02:48] (step=0020338) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 3.9792604187047544, LR: 0.0003 +[2026-03-01 06:02:55] (step=0020339) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 3.979456075132068, LR: 0.0003 +[2026-03-01 06:03:03] (step=0020340) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.9796517315593816, LR: 0.0003 +[2026-03-01 06:03:11] (step=0020341) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.9798473879866956, LR: 0.0003 +[2026-03-01 06:03:19] (step=0020342) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.980043044414009, LR: 0.0003 +[2026-03-01 06:03:27] (step=0020343) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.9802387008413227, LR: 0.0003 +[2026-03-01 06:03:35] (step=0020344) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.9804343572686363, LR: 0.0003 +[2026-03-01 06:03:43] (step=0020345) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.98063001369595, LR: 0.0003 +[2026-03-01 06:03:50] (step=0020346) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.9808256701232634, LR: 0.0003 +[2026-03-01 06:03:58] (step=0020347) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.9810213265505774, LR: 0.0003 +[2026-03-01 06:04:06] (step=0020348) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 3.981216982977891, LR: 0.0003 +[2026-03-01 06:04:14] (step=0020349) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 3.9814126394052045, LR: 0.0003 +[2026-03-01 06:04:22] (step=0020350) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 3.981608295832518, LR: 0.0003 +[2026-03-01 06:04:30] (step=0020351) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 3.9818039522598316, LR: 0.0003 +[2026-03-01 06:04:37] (step=0020352) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 3.981999608687145, LR: 0.0003 +[2026-03-01 06:04:45] (step=0020353) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 3.982195265114459, LR: 0.0003 +[2026-03-01 06:04:53] (step=0020354) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.9823909215417728, LR: 0.0003 +[2026-03-01 06:05:01] (step=0020355) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 3.9825865779690863, LR: 0.0003 +[2026-03-01 06:05:09] (step=0020356) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 3.9827822343964, LR: 0.0003 +[2026-03-01 06:05:17] (step=0020357) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 3.9829778908237135, LR: 0.0003 +[2026-03-01 06:05:24] (step=0020358) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 3.983173547251027, LR: 0.0003 +[2026-03-01 06:05:32] (step=0020359) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 3.983369203678341, LR: 0.0003 +[2026-03-01 06:05:40] (step=0020360) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.9835648601056546, LR: 0.0003 +[2026-03-01 06:05:48] (step=0020361) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 3.983760516532968, LR: 0.0003 +[2026-03-01 06:05:56] (step=0020362) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.9839561729602817, LR: 0.0003 +[2026-03-01 06:06:04] (step=0020363) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 3.9841518293875953, LR: 0.0003 +[2026-03-01 06:06:11] (step=0020364) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 3.984347485814909, LR: 0.0003 +[2026-03-01 06:06:19] (step=0020365) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 3.984543142242223, LR: 0.0003 +[2026-03-01 06:06:27] (step=0020366) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.9847387986695364, LR: 0.0003 +[2026-03-01 06:06:35] (step=0020367) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 3.98493445509685, LR: 0.0003 +[2026-03-01 06:06:43] (step=0020368) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 3.9851301115241635, LR: 0.0003 +[2026-03-01 06:06:51] (step=0020369) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.985325767951477, LR: 0.0003 +[2026-03-01 06:06:59] (step=0020370) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 3.9855214243787906, LR: 0.0003 +[2026-03-01 06:07:07] (step=0020371) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 3.9857170808061047, LR: 0.0003 +[2026-03-01 06:07:14] (step=0020372) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 3.985912737233418, LR: 0.0003 +[2026-03-01 06:07:22] (step=0020373) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 3.9861083936607318, LR: 0.0003 +[2026-03-01 06:07:30] (step=0020374) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 3.9863040500880453, LR: 0.0003 +[2026-03-01 06:07:38] (step=0020375) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 3.986499706515359, LR: 0.0003 +[2026-03-01 06:07:46] (step=0020376) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 3.9866953629426725, LR: 0.0003 +[2026-03-01 06:07:54] (step=0020377) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 3.9868910193699865, LR: 0.0003 +[2026-03-01 06:08:01] (step=0020378) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 3.9870866757973, LR: 0.0003 +[2026-03-01 06:08:09] (step=0020379) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 3.9872823322246136, LR: 0.0003 +[2026-03-01 06:08:17] (step=0020380) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 3.987477988651927, LR: 0.0003 +[2026-03-01 06:08:25] (step=0020381) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 3.9876736450792407, LR: 0.0003 +[2026-03-01 06:08:33] (step=0020382) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 3.9878693015065543, LR: 0.0003 +[2026-03-01 06:08:41] (step=0020383) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 3.9880649579338683, LR: 0.0003 +[2026-03-01 06:08:49] (step=0020384) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 3.988260614361182, LR: 0.0003 +[2026-03-01 06:08:56] (step=0020385) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.9884562707884954, LR: 0.0003 +[2026-03-01 06:09:04] (step=0020386) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 3.988651927215809, LR: 0.0003 +[2026-03-01 06:09:12] (step=0020387) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 3.9888475836431225, LR: 0.0003 +[2026-03-01 06:09:20] (step=0020388) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 3.9890432400704365, LR: 0.0003 +[2026-03-01 06:09:28] (step=0020389) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 3.98923889649775, LR: 0.0003 +[2026-03-01 06:09:36] (step=0020390) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 3.9894345529250637, LR: 0.0003 +[2026-03-01 06:09:43] (step=0020391) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 3.989630209352377, LR: 0.0003 +[2026-03-01 06:09:51] (step=0020392) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 3.989825865779691, LR: 0.0003 +[2026-03-01 06:09:59] (step=0020393) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 3.9900215222070043, LR: 0.0003 +[2026-03-01 06:10:07] (step=0020394) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 3.9902171786343184, LR: 0.0003 +[2026-03-01 06:10:15] (step=0020395) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 3.990412835061632, LR: 0.0003 +[2026-03-01 06:10:23] (step=0020396) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.9906084914889455, LR: 0.0003 +[2026-03-01 06:10:30] (step=0020397) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 3.990804147916259, LR: 0.0003 +[2026-03-01 06:10:38] (step=0020398) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 3.9909998043435726, LR: 0.0003 +[2026-03-01 06:10:46] (step=0020399) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 3.991195460770886, LR: 0.0003 +[2026-03-01 06:10:54] (step=0020400) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 3.9913911171982, LR: 0.0003 +[2026-03-01 06:11:02] (step=0020401) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 3.9915867736255137, LR: 0.0003 +[2026-03-01 06:11:10] (step=0020402) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.9917824300528273, LR: 0.0003 +[2026-03-01 06:11:17] (step=0020403) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 3.991978086480141, LR: 0.0003 +[2026-03-01 06:11:25] (step=0020404) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 3.9921737429074544, LR: 0.0003 +[2026-03-01 06:11:33] (step=0020405) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 3.992369399334768, LR: 0.0003 +[2026-03-01 06:11:41] (step=0020406) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 3.992565055762082, LR: 0.0003 +[2026-03-01 06:11:49] (step=0020407) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 3.9927607121893955, LR: 0.0003 +[2026-03-01 06:11:57] (step=0020408) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 3.992956368616709, LR: 0.0003 +[2026-03-01 06:12:05] (step=0020409) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 3.9931520250440227, LR: 0.0003 +[2026-03-01 06:12:12] (step=0020410) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 3.9933476814713362, LR: 0.0003 +[2026-03-01 06:12:20] (step=0020411) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.99354333789865, LR: 0.0003 +[2026-03-01 06:12:28] (step=0020412) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.993738994325964, LR: 0.0003 +[2026-03-01 06:12:36] (step=0020413) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 3.9939346507532774, LR: 0.0003 +[2026-03-01 06:12:44] (step=0020414) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 3.994130307180591, LR: 0.0003 +[2026-03-01 06:12:52] (step=0020415) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 3.9943259636079045, LR: 0.0003 +[2026-03-01 06:12:59] (step=0020416) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 3.994521620035218, LR: 0.0003 +[2026-03-01 06:13:07] (step=0020417) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 3.9947172764625316, LR: 0.0003 +[2026-03-01 06:13:15] (step=0020418) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 3.9949129328898456, LR: 0.0003 +[2026-03-01 06:13:23] (step=0020419) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 3.995108589317159, LR: 0.0003 +[2026-03-01 06:13:31] (step=0020420) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 3.9953042457444727, LR: 0.0003 +[2026-03-01 06:13:39] (step=0020421) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 3.9954999021717863, LR: 0.0003 +[2026-03-01 06:13:47] (step=0020422) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 3.9956955585991, LR: 0.0003 +[2026-03-01 06:13:54] (step=0020423) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 3.9958912150264134, LR: 0.0003 +[2026-03-01 06:14:02] (step=0020424) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 3.9960868714537274, LR: 0.0003 +[2026-03-01 06:14:10] (step=0020425) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 3.996282527881041, LR: 0.0003 +[2026-03-01 06:14:18] (step=0020426) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 3.9964781843083546, LR: 0.0003 +[2026-03-01 06:14:26] (step=0020427) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 3.996673840735668, LR: 0.0003 +[2026-03-01 06:14:34] (step=0020428) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 3.9968694971629817, LR: 0.0003 +[2026-03-01 06:14:42] (step=0020429) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 3.9970651535902952, LR: 0.0003 +[2026-03-01 06:14:49] (step=0020430) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 3.9972608100176092, LR: 0.0003 +[2026-03-01 06:14:57] (step=0020431) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 3.997456466444923, LR: 0.0003 +[2026-03-01 06:15:05] (step=0020432) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 3.9976521228722364, LR: 0.0003 +[2026-03-01 06:15:13] (step=0020433) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 3.99784777929955, LR: 0.0003 +[2026-03-01 06:15:21] (step=0020434) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 3.9980434357268635, LR: 0.0003 +[2026-03-01 06:15:29] (step=0020435) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 3.998239092154177, LR: 0.0003 +[2026-03-01 06:15:36] (step=0020436) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 3.998434748581491, LR: 0.0003 +[2026-03-01 06:15:44] (step=0020437) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 3.9986304050088046, LR: 0.0003 +[2026-03-01 06:15:52] (step=0020438) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 3.998826061436118, LR: 0.0003 +[2026-03-01 06:16:00] (step=0020439) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 3.9990217178634317, LR: 0.0003 +[2026-03-01 06:16:08] (step=0020440) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 3.9992173742907453, LR: 0.0003 +[2026-03-01 06:16:16] (step=0020441) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 3.999413030718059, LR: 0.0003 +[2026-03-01 06:16:24] (step=0020442) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 3.999608687145373, LR: 0.0003 +[2026-03-01 06:16:32] (step=0020443) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 3.9998043435726864, LR: 0.0003 +[2026-03-01 06:16:40] (step=0020444) Train Loss: 0.4478, Train Steps/Sec: 0.12, Epoch: 4.0, LR: 0.0003 +[2026-03-01 06:16:40] Beginning epoch 4... +[2026-03-01 06:16:49] (step=0020445) Train Loss: 0.4393, Train Steps/Sec: 0.11, Epoch: 4.000195656427314, LR: 0.0003 +[2026-03-01 06:16:57] (step=0020446) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.000391312854627, LR: 0.0003 +[2026-03-01 06:17:05] (step=0020447) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.000586969281941, LR: 0.0003 +[2026-03-01 06:17:13] (step=0020448) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.000782625709254, LR: 0.0003 +[2026-03-01 06:17:20] (step=0020449) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.000978282136568, LR: 0.0003 +[2026-03-01 06:17:28] (step=0020450) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.001173938563882, LR: 0.0003 +[2026-03-01 06:17:36] (step=0020451) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 4.001369594991195, LR: 0.0003 +[2026-03-01 06:17:44] (step=0020452) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.001565251418509, LR: 0.0003 +[2026-03-01 06:17:52] (step=0020453) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.0017609078458225, LR: 0.0003 +[2026-03-01 06:18:00] (step=0020454) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.0019565642731365, LR: 0.0003 +[2026-03-01 06:18:07] (step=0020455) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.00215222070045, LR: 0.0003 +[2026-03-01 06:18:15] (step=0020456) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.002347877127764, LR: 0.0003 +[2026-03-01 06:18:23] (step=0020457) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.002543533555078, LR: 0.0003 +[2026-03-01 06:18:31] (step=0020458) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.002739189982391, LR: 0.0003 +[2026-03-01 06:18:39] (step=0020459) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.002934846409705, LR: 0.0003 +[2026-03-01 06:18:47] (step=0020460) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.003130502837018, LR: 0.0003 +[2026-03-01 06:18:54] (step=0020461) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 4.003326159264332, LR: 0.0003 +[2026-03-01 06:19:02] (step=0020462) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.003521815691646, LR: 0.0003 +[2026-03-01 06:19:10] (step=0020463) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.003717472118959, LR: 0.0003 +[2026-03-01 06:19:18] (step=0020464) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.003913128546273, LR: 0.0003 +[2026-03-01 06:19:26] (step=0020465) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.004108784973586, LR: 0.0003 +[2026-03-01 06:19:34] (step=0020466) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.0043044414009, LR: 0.0003 +[2026-03-01 06:19:41] (step=0020467) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 4.004500097828213, LR: 0.0003 +[2026-03-01 06:19:49] (step=0020468) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.004695754255527, LR: 0.0003 +[2026-03-01 06:19:57] (step=0020469) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.004891410682841, LR: 0.0003 +[2026-03-01 06:20:05] (step=0020470) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.005087067110154, LR: 0.0003 +[2026-03-01 06:20:13] (step=0020471) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 4.005282723537468, LR: 0.0003 +[2026-03-01 06:20:21] (step=0020472) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.0054783799647815, LR: 0.0003 +[2026-03-01 06:20:29] (step=0020473) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.0056740363920955, LR: 0.0003 +[2026-03-01 06:20:37] (step=0020474) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.0058696928194095, LR: 0.0003 +[2026-03-01 06:20:44] (step=0020475) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.006065349246723, LR: 0.0003 +[2026-03-01 06:20:52] (step=0020476) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.006261005674037, LR: 0.0003 +[2026-03-01 06:21:00] (step=0020477) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.00645666210135, LR: 0.0003 +[2026-03-01 06:21:08] (step=0020478) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 4.006652318528664, LR: 0.0003 +[2026-03-01 06:21:16] (step=0020479) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.006847974955977, LR: 0.0003 +[2026-03-01 06:21:24] (step=0020480) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.007043631383291, LR: 0.0003 +[2026-03-01 06:21:31] (step=0020481) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 4.007239287810605, LR: 0.0003 +[2026-03-01 06:21:39] (step=0020482) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.007434944237918, LR: 0.0003 +[2026-03-01 06:21:47] (step=0020483) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.007630600665232, LR: 0.0003 +[2026-03-01 06:21:55] (step=0020484) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 4.007826257092545, LR: 0.0003 +[2026-03-01 06:22:03] (step=0020485) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.008021913519859, LR: 0.0003 +[2026-03-01 06:22:11] (step=0020486) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.008217569947173, LR: 0.0003 +[2026-03-01 06:22:18] (step=0020487) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.008413226374486, LR: 0.0003 +[2026-03-01 06:22:26] (step=0020488) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 4.0086088828018, LR: 0.0003 +[2026-03-01 06:22:34] (step=0020489) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 4.008804539229113, LR: 0.0003 +[2026-03-01 06:22:42] (step=0020490) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.009000195656427, LR: 0.0003 +[2026-03-01 06:22:50] (step=0020491) Train Loss: 0.4541, Train Steps/Sec: 0.12, Epoch: 4.0091958520837405, LR: 0.0003 +[2026-03-01 06:22:58] (step=0020492) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.0093915085110545, LR: 0.0003 +[2026-03-01 06:23:06] (step=0020493) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.0095871649383685, LR: 0.0003 +[2026-03-01 06:23:14] (step=0020494) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 4.009782821365682, LR: 0.0003 +[2026-03-01 06:23:21] (step=0020495) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.009978477792996, LR: 0.0003 +[2026-03-01 06:23:29] (step=0020496) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.010174134220309, LR: 0.0003 +[2026-03-01 06:23:37] (step=0020497) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.010369790647623, LR: 0.0003 +[2026-03-01 06:23:45] (step=0020498) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.010565447074937, LR: 0.0003 +[2026-03-01 06:23:53] (step=0020499) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.01076110350225, LR: 0.0003 +[2026-03-01 06:24:01] (step=0020500) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.010956759929564, LR: 0.0003 +[2026-03-01 06:24:01] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0020500/ +[2026-03-01 06:24:08] (step=0020501) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.011152416356877, LR: 0.0003 +[2026-03-01 06:24:16] (step=0020502) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 4.011348072784191, LR: 0.0003 +[2026-03-01 06:24:24] (step=0020503) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 4.011543729211505, LR: 0.0003 +[2026-03-01 06:24:32] (step=0020504) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.011739385638818, LR: 0.0003 +[2026-03-01 06:24:40] (step=0020505) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.011935042066132, LR: 0.0003 +[2026-03-01 06:24:48] (step=0020506) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.012130698493445, LR: 0.0003 +[2026-03-01 06:24:56] (step=0020507) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.012326354920759, LR: 0.0003 +[2026-03-01 06:25:03] (step=0020508) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.012522011348072, LR: 0.0003 +[2026-03-01 06:25:11] (step=0020509) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.012717667775386, LR: 0.0003 +[2026-03-01 06:25:19] (step=0020510) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 4.0129133242027, LR: 0.0003 +[2026-03-01 06:25:27] (step=0020511) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.0131089806300135, LR: 0.0003 +[2026-03-01 06:25:35] (step=0020512) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 4.0133046370573275, LR: 0.0003 +[2026-03-01 06:25:43] (step=0020513) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.013500293484641, LR: 0.0003 +[2026-03-01 06:25:51] (step=0020514) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 4.013695949911955, LR: 0.0003 +[2026-03-01 06:25:58] (step=0020515) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.013891606339269, LR: 0.0003 +[2026-03-01 06:26:06] (step=0020516) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.014087262766582, LR: 0.0003 +[2026-03-01 06:26:14] (step=0020517) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.014282919193896, LR: 0.0003 +[2026-03-01 06:26:22] (step=0020518) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.014478575621209, LR: 0.0003 +[2026-03-01 06:26:30] (step=0020519) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 4.014674232048523, LR: 0.0003 +[2026-03-01 06:26:38] (step=0020520) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.014869888475836, LR: 0.0003 +[2026-03-01 06:26:45] (step=0020521) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.01506554490315, LR: 0.0003 +[2026-03-01 06:26:53] (step=0020522) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 4.015261201330464, LR: 0.0003 +[2026-03-01 06:27:01] (step=0020523) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.015456857757777, LR: 0.0003 +[2026-03-01 06:27:09] (step=0020524) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.015652514185091, LR: 0.0003 +[2026-03-01 06:27:17] (step=0020525) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.015848170612404, LR: 0.0003 +[2026-03-01 06:27:25] (step=0020526) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.016043827039718, LR: 0.0003 +[2026-03-01 06:27:33] (step=0020527) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.016239483467032, LR: 0.0003 +[2026-03-01 06:27:40] (step=0020528) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.016435139894345, LR: 0.0003 +[2026-03-01 06:27:48] (step=0020529) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.016630796321659, LR: 0.0003 +[2026-03-01 06:27:56] (step=0020530) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.0168264527489725, LR: 0.0003 +[2026-03-01 06:28:04] (step=0020531) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 4.0170221091762865, LR: 0.0003 +[2026-03-01 06:28:12] (step=0020532) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.0172177656036, LR: 0.0003 +[2026-03-01 06:28:20] (step=0020533) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.017413422030914, LR: 0.0003 +[2026-03-01 06:28:27] (step=0020534) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 4.017609078458228, LR: 0.0003 +[2026-03-01 06:28:35] (step=0020535) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.017804734885541, LR: 0.0003 +[2026-03-01 06:28:43] (step=0020536) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.018000391312855, LR: 0.0003 +[2026-03-01 06:28:51] (step=0020537) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.018196047740168, LR: 0.0003 +[2026-03-01 06:28:59] (step=0020538) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.018391704167482, LR: 0.0003 +[2026-03-01 06:29:07] (step=0020539) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.018587360594796, LR: 0.0003 +[2026-03-01 06:29:15] (step=0020540) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 4.018783017022109, LR: 0.0003 +[2026-03-01 06:29:22] (step=0020541) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.018978673449423, LR: 0.0003 +[2026-03-01 06:29:30] (step=0020542) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.019174329876736, LR: 0.0003 +[2026-03-01 06:29:38] (step=0020543) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 4.01936998630405, LR: 0.0003 +[2026-03-01 06:29:46] (step=0020544) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.019565642731363, LR: 0.0003 +[2026-03-01 06:29:54] (step=0020545) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.019761299158677, LR: 0.0003 +[2026-03-01 06:30:02] (step=0020546) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.019956955585991, LR: 0.0003 +[2026-03-01 06:30:10] (step=0020547) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 4.020152612013304, LR: 0.0003 +[2026-03-01 06:30:17] (step=0020548) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.020348268440618, LR: 0.0003 +[2026-03-01 06:30:25] (step=0020549) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.0205439248679316, LR: 0.0003 +[2026-03-01 06:30:33] (step=0020550) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.020739581295246, LR: 0.0003 +[2026-03-01 06:30:41] (step=0020551) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.02093523772256, LR: 0.0003 +[2026-03-01 06:30:49] (step=0020552) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 4.021130894149873, LR: 0.0003 +[2026-03-01 06:30:57] (step=0020553) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 4.021326550577187, LR: 0.0003 +[2026-03-01 06:31:04] (step=0020554) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.0215222070045, LR: 0.0003 +[2026-03-01 06:31:12] (step=0020555) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.021717863431814, LR: 0.0003 +[2026-03-01 06:31:20] (step=0020556) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.021913519859127, LR: 0.0003 +[2026-03-01 06:31:28] (step=0020557) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.022109176286441, LR: 0.0003 +[2026-03-01 06:31:36] (step=0020558) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.022304832713755, LR: 0.0003 +[2026-03-01 06:31:44] (step=0020559) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.022500489141068, LR: 0.0003 +[2026-03-01 06:31:52] (step=0020560) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.022696145568382, LR: 0.0003 +[2026-03-01 06:31:59] (step=0020561) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.022891801995695, LR: 0.0003 +[2026-03-01 06:32:07] (step=0020562) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.023087458423009, LR: 0.0003 +[2026-03-01 06:32:15] (step=0020563) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.023283114850323, LR: 0.0003 +[2026-03-01 06:32:23] (step=0020564) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 4.023478771277636, LR: 0.0003 +[2026-03-01 06:32:31] (step=0020565) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 4.02367442770495, LR: 0.0003 +[2026-03-01 06:32:39] (step=0020566) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 4.023870084132263, LR: 0.0003 +[2026-03-01 06:32:47] (step=0020567) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 4.024065740559577, LR: 0.0003 +[2026-03-01 06:32:54] (step=0020568) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.0242613969868914, LR: 0.0003 +[2026-03-01 06:33:02] (step=0020569) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.024457053414205, LR: 0.0003 +[2026-03-01 06:33:10] (step=0020570) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.024652709841519, LR: 0.0003 +[2026-03-01 06:33:18] (step=0020571) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.024848366268832, LR: 0.0003 +[2026-03-01 06:33:26] (step=0020572) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.025044022696146, LR: 0.0003 +[2026-03-01 06:33:34] (step=0020573) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.025239679123459, LR: 0.0003 +[2026-03-01 06:33:41] (step=0020574) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.025435335550773, LR: 0.0003 +[2026-03-01 06:33:49] (step=0020575) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.025630991978087, LR: 0.0003 +[2026-03-01 06:33:57] (step=0020576) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.0258266484054, LR: 0.0003 +[2026-03-01 06:34:05] (step=0020577) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.026022304832714, LR: 0.0003 +[2026-03-01 06:34:13] (step=0020578) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 4.026217961260027, LR: 0.0003 +[2026-03-01 06:34:21] (step=0020579) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.026413617687341, LR: 0.0003 +[2026-03-01 06:34:28] (step=0020580) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.026609274114655, LR: 0.0003 +[2026-03-01 06:34:36] (step=0020581) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.026804930541968, LR: 0.0003 +[2026-03-01 06:34:44] (step=0020582) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.027000586969282, LR: 0.0003 +[2026-03-01 06:34:52] (step=0020583) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.027196243396595, LR: 0.0003 +[2026-03-01 06:35:00] (step=0020584) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.027391899823909, LR: 0.0003 +[2026-03-01 06:35:08] (step=0020585) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 4.0275875562512224, LR: 0.0003 +[2026-03-01 06:35:16] (step=0020586) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.0277832126785365, LR: 0.0003 +[2026-03-01 06:35:23] (step=0020587) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.0279788691058505, LR: 0.0003 +[2026-03-01 06:35:31] (step=0020588) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.028174525533164, LR: 0.0003 +[2026-03-01 06:35:39] (step=0020589) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.028370181960478, LR: 0.0003 +[2026-03-01 06:35:47] (step=0020590) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.028565838387791, LR: 0.0003 +[2026-03-01 06:35:55] (step=0020591) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.028761494815105, LR: 0.0003 +[2026-03-01 06:36:03] (step=0020592) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 4.028957151242419, LR: 0.0003 +[2026-03-01 06:36:11] (step=0020593) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.029152807669732, LR: 0.0003 +[2026-03-01 06:36:18] (step=0020594) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.029348464097046, LR: 0.0003 +[2026-03-01 06:36:26] (step=0020595) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.029544120524359, LR: 0.0003 +[2026-03-01 06:36:34] (step=0020596) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.029739776951673, LR: 0.0003 +[2026-03-01 06:36:42] (step=0020597) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.029935433378986, LR: 0.0003 +[2026-03-01 06:36:50] (step=0020598) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.0301310898063, LR: 0.0003 +[2026-03-01 06:36:58] (step=0020599) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.030326746233614, LR: 0.0003 +[2026-03-01 06:37:05] (step=0020600) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.030522402660927, LR: 0.0003 +[2026-03-01 06:37:13] (step=0020601) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.030718059088241, LR: 0.0003 +[2026-03-01 06:37:21] (step=0020602) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.030913715515554, LR: 0.0003 +[2026-03-01 06:37:29] (step=0020603) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.031109371942868, LR: 0.0003 +[2026-03-01 06:37:37] (step=0020604) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.031305028370182, LR: 0.0003 +[2026-03-01 06:37:45] (step=0020605) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.0315006847974955, LR: 0.0003 +[2026-03-01 06:37:52] (step=0020606) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 4.0316963412248095, LR: 0.0003 +[2026-03-01 06:38:00] (step=0020607) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.031891997652123, LR: 0.0003 +[2026-03-01 06:38:08] (step=0020608) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 4.032087654079437, LR: 0.0003 +[2026-03-01 06:38:16] (step=0020609) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.03228331050675, LR: 0.0003 +[2026-03-01 06:38:24] (step=0020610) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.032478966934064, LR: 0.0003 +[2026-03-01 06:38:32] (step=0020611) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 4.032674623361378, LR: 0.0003 +[2026-03-01 06:38:40] (step=0020612) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 4.032870279788691, LR: 0.0003 +[2026-03-01 06:38:48] (step=0020613) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.033065936216005, LR: 0.0003 +[2026-03-01 06:38:55] (step=0020614) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.033261592643318, LR: 0.0003 +[2026-03-01 06:39:03] (step=0020615) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.033457249070632, LR: 0.0003 +[2026-03-01 06:39:11] (step=0020616) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 4.033652905497946, LR: 0.0003 +[2026-03-01 06:39:19] (step=0020617) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.033848561925259, LR: 0.0003 +[2026-03-01 06:39:27] (step=0020618) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.034044218352573, LR: 0.0003 +[2026-03-01 06:39:35] (step=0020619) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 4.034239874779886, LR: 0.0003 +[2026-03-01 06:39:42] (step=0020620) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.0344355312072, LR: 0.0003 +[2026-03-01 06:39:50] (step=0020621) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 4.034631187634514, LR: 0.0003 +[2026-03-01 06:39:58] (step=0020622) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.034826844061827, LR: 0.0003 +[2026-03-01 06:40:06] (step=0020623) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 4.035022500489141, LR: 0.0003 +[2026-03-01 06:40:14] (step=0020624) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.0352181569164545, LR: 0.0003 +[2026-03-01 06:40:22] (step=0020625) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 4.0354138133437685, LR: 0.0003 +[2026-03-01 06:40:30] (step=0020626) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.035609469771082, LR: 0.0003 +[2026-03-01 06:40:37] (step=0020627) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 4.035805126198396, LR: 0.0003 +[2026-03-01 06:40:45] (step=0020628) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.03600078262571, LR: 0.0003 +[2026-03-01 06:40:53] (step=0020629) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 4.036196439053023, LR: 0.0003 +[2026-03-01 06:41:01] (step=0020630) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.036392095480337, LR: 0.0003 +[2026-03-01 06:41:09] (step=0020631) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.03658775190765, LR: 0.0003 +[2026-03-01 06:41:17] (step=0020632) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.036783408334964, LR: 0.0003 +[2026-03-01 06:41:24] (step=0020633) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 4.036979064762278, LR: 0.0003 +[2026-03-01 06:41:32] (step=0020634) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 4.037174721189591, LR: 0.0003 +[2026-03-01 06:41:40] (step=0020635) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.037370377616905, LR: 0.0003 +[2026-03-01 06:41:48] (step=0020636) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 4.037566034044218, LR: 0.0003 +[2026-03-01 06:41:56] (step=0020637) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.037761690471532, LR: 0.0003 +[2026-03-01 06:42:04] (step=0020638) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.037957346898845, LR: 0.0003 +[2026-03-01 06:42:11] (step=0020639) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.038153003326159, LR: 0.0003 +[2026-03-01 06:42:19] (step=0020640) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.038348659753473, LR: 0.0003 +[2026-03-01 06:42:27] (step=0020641) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.038544316180786, LR: 0.0003 +[2026-03-01 06:42:35] (step=0020642) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.0387399726081, LR: 0.0003 +[2026-03-01 06:42:43] (step=0020643) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.0389356290354135, LR: 0.0003 +[2026-03-01 06:42:51] (step=0020644) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.0391312854627275, LR: 0.0003 +[2026-03-01 06:42:59] (step=0020645) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 4.0393269418900415, LR: 0.0003 +[2026-03-01 06:43:06] (step=0020646) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.039522598317355, LR: 0.0003 +[2026-03-01 06:43:14] (step=0020647) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 4.039718254744669, LR: 0.0003 +[2026-03-01 06:43:22] (step=0020648) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.039913911171982, LR: 0.0003 +[2026-03-01 06:43:30] (step=0020649) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.040109567599296, LR: 0.0003 +[2026-03-01 06:43:38] (step=0020650) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.040305224026609, LR: 0.0003 +[2026-03-01 06:43:46] (step=0020651) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 4.040500880453923, LR: 0.0003 +[2026-03-01 06:43:54] (step=0020652) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.040696536881237, LR: 0.0003 +[2026-03-01 06:44:01] (step=0020653) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.04089219330855, LR: 0.0003 +[2026-03-01 06:44:09] (step=0020654) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 4.041087849735864, LR: 0.0003 +[2026-03-01 06:44:17] (step=0020655) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.041283506163177, LR: 0.0003 +[2026-03-01 06:44:25] (step=0020656) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.041479162590491, LR: 0.0003 +[2026-03-01 06:44:33] (step=0020657) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.041674819017805, LR: 0.0003 +[2026-03-01 06:44:41] (step=0020658) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.041870475445118, LR: 0.0003 +[2026-03-01 06:44:48] (step=0020659) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.042066131872432, LR: 0.0003 +[2026-03-01 06:44:56] (step=0020660) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.042261788299745, LR: 0.0003 +[2026-03-01 06:45:04] (step=0020661) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.042457444727059, LR: 0.0003 +[2026-03-01 06:45:12] (step=0020662) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.0426531011543725, LR: 0.0003 +[2026-03-01 06:45:20] (step=0020663) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.0428487575816865, LR: 0.0003 +[2026-03-01 06:45:28] (step=0020664) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.0430444140090005, LR: 0.0003 +[2026-03-01 06:45:36] (step=0020665) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.043240070436314, LR: 0.0003 +[2026-03-01 06:45:43] (step=0020666) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 4.043435726863628, LR: 0.0003 +[2026-03-01 06:45:51] (step=0020667) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.043631383290941, LR: 0.0003 +[2026-03-01 06:45:59] (step=0020668) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.043827039718255, LR: 0.0003 +[2026-03-01 06:46:07] (step=0020669) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.044022696145569, LR: 0.0003 +[2026-03-01 06:46:15] (step=0020670) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.044218352572882, LR: 0.0003 +[2026-03-01 06:46:23] (step=0020671) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.044414009000196, LR: 0.0003 +[2026-03-01 06:46:31] (step=0020672) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.044609665427509, LR: 0.0003 +[2026-03-01 06:46:38] (step=0020673) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.044805321854823, LR: 0.0003 +[2026-03-01 06:46:46] (step=0020674) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.045000978282137, LR: 0.0003 +[2026-03-01 06:46:54] (step=0020675) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.04519663470945, LR: 0.0003 +[2026-03-01 06:47:02] (step=0020676) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.045392291136764, LR: 0.0003 +[2026-03-01 06:47:10] (step=0020677) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 4.045587947564077, LR: 0.0003 +[2026-03-01 06:47:18] (step=0020678) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 4.045783603991391, LR: 0.0003 +[2026-03-01 06:47:25] (step=0020679) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.045979260418704, LR: 0.0003 +[2026-03-01 06:47:33] (step=0020680) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.046174916846018, LR: 0.0003 +[2026-03-01 06:47:41] (step=0020681) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 4.046370573273332, LR: 0.0003 +[2026-03-01 06:47:49] (step=0020682) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.0465662297006455, LR: 0.0003 +[2026-03-01 06:47:57] (step=0020683) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.0467618861279595, LR: 0.0003 +[2026-03-01 06:48:05] (step=0020684) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 4.046957542555273, LR: 0.0003 +[2026-03-01 06:48:13] (step=0020685) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.047153198982587, LR: 0.0003 +[2026-03-01 06:48:20] (step=0020686) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.047348855409901, LR: 0.0003 +[2026-03-01 06:48:28] (step=0020687) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.047544511837214, LR: 0.0003 +[2026-03-01 06:48:36] (step=0020688) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 4.047740168264528, LR: 0.0003 +[2026-03-01 06:48:44] (step=0020689) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.047935824691841, LR: 0.0003 +[2026-03-01 06:48:52] (step=0020690) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 4.048131481119155, LR: 0.0003 +[2026-03-01 06:49:00] (step=0020691) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.048327137546468, LR: 0.0003 +[2026-03-01 06:49:07] (step=0020692) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 4.048522793973782, LR: 0.0003 +[2026-03-01 06:49:15] (step=0020693) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.048718450401096, LR: 0.0003 +[2026-03-01 06:49:23] (step=0020694) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 4.048914106828409, LR: 0.0003 +[2026-03-01 06:49:31] (step=0020695) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.049109763255723, LR: 0.0003 +[2026-03-01 06:49:39] (step=0020696) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.049305419683036, LR: 0.0003 +[2026-03-01 06:49:47] (step=0020697) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 4.04950107611035, LR: 0.0003 +[2026-03-01 06:49:55] (step=0020698) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.049696732537664, LR: 0.0003 +[2026-03-01 06:50:02] (step=0020699) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 4.049892388964977, LR: 0.0003 +[2026-03-01 06:50:10] (step=0020700) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.050088045392291, LR: 0.0003 +[2026-03-01 06:50:18] (step=0020701) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 4.0502837018196045, LR: 0.0003 +[2026-03-01 06:50:26] (step=0020702) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.0504793582469185, LR: 0.0003 +[2026-03-01 06:50:34] (step=0020703) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.050675014674232, LR: 0.0003 +[2026-03-01 06:50:42] (step=0020704) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 4.050870671101546, LR: 0.0003 +[2026-03-01 06:50:49] (step=0020705) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.05106632752886, LR: 0.0003 +[2026-03-01 06:50:57] (step=0020706) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 4.051261983956173, LR: 0.0003 +[2026-03-01 06:51:05] (step=0020707) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.051457640383487, LR: 0.0003 +[2026-03-01 06:51:13] (step=0020708) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.0516532968108, LR: 0.0003 +[2026-03-01 06:51:21] (step=0020709) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 4.051848953238114, LR: 0.0003 +[2026-03-01 06:51:29] (step=0020710) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 4.052044609665428, LR: 0.0003 +[2026-03-01 06:51:37] (step=0020711) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.052240266092741, LR: 0.0003 +[2026-03-01 06:51:44] (step=0020712) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.052435922520055, LR: 0.0003 +[2026-03-01 06:51:52] (step=0020713) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 4.052631578947368, LR: 0.0003 +[2026-03-01 06:52:00] (step=0020714) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.052827235374682, LR: 0.0003 +[2026-03-01 06:52:08] (step=0020715) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.053022891801995, LR: 0.0003 +[2026-03-01 06:52:16] (step=0020716) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 4.053218548229309, LR: 0.0003 +[2026-03-01 06:52:24] (step=0020717) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.053414204656623, LR: 0.0003 +[2026-03-01 06:52:32] (step=0020718) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.053609861083936, LR: 0.0003 +[2026-03-01 06:52:39] (step=0020719) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.05380551751125, LR: 0.0003 +[2026-03-01 06:52:47] (step=0020720) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.0540011739385635, LR: 0.0003 +[2026-03-01 06:52:55] (step=0020721) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.0541968303658775, LR: 0.0003 +[2026-03-01 06:53:03] (step=0020722) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.0543924867931915, LR: 0.0003 +[2026-03-01 06:53:11] (step=0020723) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.054588143220505, LR: 0.0003 +[2026-03-01 06:53:19] (step=0020724) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.054783799647819, LR: 0.0003 +[2026-03-01 06:53:26] (step=0020725) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.054979456075132, LR: 0.0003 +[2026-03-01 06:53:34] (step=0020726) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.055175112502446, LR: 0.0003 +[2026-03-01 06:53:42] (step=0020727) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.05537076892976, LR: 0.0003 +[2026-03-01 06:53:50] (step=0020728) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 4.055566425357073, LR: 0.0003 +[2026-03-01 06:53:58] (step=0020729) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.055762081784387, LR: 0.0003 +[2026-03-01 06:54:06] (step=0020730) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.0559577382117, LR: 0.0003 +[2026-03-01 06:54:13] (step=0020731) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.056153394639014, LR: 0.0003 +[2026-03-01 06:54:21] (step=0020732) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 4.056349051066327, LR: 0.0003 +[2026-03-01 06:54:29] (step=0020733) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.056544707493641, LR: 0.0003 +[2026-03-01 06:54:37] (step=0020734) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 4.056740363920955, LR: 0.0003 +[2026-03-01 06:54:45] (step=0020735) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.056936020348268, LR: 0.0003 +[2026-03-01 06:54:53] (step=0020736) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.057131676775582, LR: 0.0003 +[2026-03-01 06:55:00] (step=0020737) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 4.057327333202895, LR: 0.0003 +[2026-03-01 06:55:08] (step=0020738) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 4.057522989630209, LR: 0.0003 +[2026-03-01 06:55:16] (step=0020739) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.057718646057523, LR: 0.0003 +[2026-03-01 06:55:24] (step=0020740) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 4.0579143024848365, LR: 0.0003 +[2026-03-01 06:55:32] (step=0020741) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 4.0581099589121505, LR: 0.0003 +[2026-03-01 06:55:40] (step=0020742) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 4.058305615339464, LR: 0.0003 +[2026-03-01 06:55:48] (step=0020743) Train Loss: 0.4253, Train Steps/Sec: 0.13, Epoch: 4.058501271766778, LR: 0.0003 +[2026-03-01 06:55:55] (step=0020744) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.058696928194091, LR: 0.0003 +[2026-03-01 06:56:03] (step=0020745) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.058892584621405, LR: 0.0003 +[2026-03-01 06:56:11] (step=0020746) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.059088241048719, LR: 0.0003 +[2026-03-01 06:56:19] (step=0020747) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 4.059283897476032, LR: 0.0003 +[2026-03-01 06:56:27] (step=0020748) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.059479553903346, LR: 0.0003 +[2026-03-01 06:56:35] (step=0020749) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.059675210330659, LR: 0.0003 +[2026-03-01 06:56:43] (step=0020750) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.059870866757973, LR: 0.0003 +[2026-03-01 06:56:50] (step=0020751) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.060066523185287, LR: 0.0003 +[2026-03-01 06:56:58] (step=0020752) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 4.0602621796126, LR: 0.0003 +[2026-03-01 06:57:06] (step=0020753) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.060457836039914, LR: 0.0003 +[2026-03-01 06:57:14] (step=0020754) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.060653492467227, LR: 0.0003 +[2026-03-01 06:57:22] (step=0020755) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.060849148894541, LR: 0.0003 +[2026-03-01 06:57:30] (step=0020756) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 4.061044805321854, LR: 0.0003 +[2026-03-01 06:57:37] (step=0020757) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.061240461749168, LR: 0.0003 +[2026-03-01 06:57:45] (step=0020758) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.061436118176482, LR: 0.0003 +[2026-03-01 06:57:53] (step=0020759) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.0616317746037955, LR: 0.0003 +[2026-03-01 06:58:01] (step=0020760) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.0618274310311095, LR: 0.0003 +[2026-03-01 06:58:09] (step=0020761) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.062023087458423, LR: 0.0003 +[2026-03-01 06:58:17] (step=0020762) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 4.062218743885737, LR: 0.0003 +[2026-03-01 06:58:25] (step=0020763) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.062414400313051, LR: 0.0003 +[2026-03-01 06:58:32] (step=0020764) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.062610056740364, LR: 0.0003 +[2026-03-01 06:58:40] (step=0020765) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 4.062805713167678, LR: 0.0003 +[2026-03-01 06:58:48] (step=0020766) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.063001369594991, LR: 0.0003 +[2026-03-01 06:58:56] (step=0020767) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.063197026022305, LR: 0.0003 +[2026-03-01 06:59:04] (step=0020768) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.063392682449618, LR: 0.0003 +[2026-03-01 06:59:12] (step=0020769) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 4.063588338876932, LR: 0.0003 +[2026-03-01 06:59:19] (step=0020770) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.063783995304246, LR: 0.0003 +[2026-03-01 06:59:27] (step=0020771) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.063979651731559, LR: 0.0003 +[2026-03-01 06:59:35] (step=0020772) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 4.064175308158873, LR: 0.0003 +[2026-03-01 06:59:43] (step=0020773) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.064370964586186, LR: 0.0003 +[2026-03-01 06:59:51] (step=0020774) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.0645666210135, LR: 0.0003 +[2026-03-01 06:59:59] (step=0020775) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.064762277440814, LR: 0.0003 +[2026-03-01 07:00:06] (step=0020776) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.064957933868127, LR: 0.0003 +[2026-03-01 07:00:14] (step=0020777) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.065153590295441, LR: 0.0003 +[2026-03-01 07:00:22] (step=0020778) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.0653492467227546, LR: 0.0003 +[2026-03-01 07:00:30] (step=0020779) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.065544903150069, LR: 0.0003 +[2026-03-01 07:00:38] (step=0020780) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.065740559577382, LR: 0.0003 +[2026-03-01 07:00:46] (step=0020781) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.065936216004696, LR: 0.0003 +[2026-03-01 07:00:54] (step=0020782) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.06613187243201, LR: 0.0003 +[2026-03-01 07:01:01] (step=0020783) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.066327528859323, LR: 0.0003 +[2026-03-01 07:01:09] (step=0020784) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.066523185286637, LR: 0.0003 +[2026-03-01 07:01:17] (step=0020785) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 4.06671884171395, LR: 0.0003 +[2026-03-01 07:01:25] (step=0020786) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.066914498141264, LR: 0.0003 +[2026-03-01 07:01:33] (step=0020787) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.067110154568578, LR: 0.0003 +[2026-03-01 07:01:41] (step=0020788) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.067305810995891, LR: 0.0003 +[2026-03-01 07:01:48] (step=0020789) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.067501467423205, LR: 0.0003 +[2026-03-01 07:01:56] (step=0020790) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.067697123850518, LR: 0.0003 +[2026-03-01 07:02:04] (step=0020791) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 4.067892780277832, LR: 0.0003 +[2026-03-01 07:02:12] (step=0020792) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.068088436705146, LR: 0.0003 +[2026-03-01 07:02:20] (step=0020793) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.068284093132459, LR: 0.0003 +[2026-03-01 07:02:28] (step=0020794) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.068479749559773, LR: 0.0003 +[2026-03-01 07:02:36] (step=0020795) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.068675405987086, LR: 0.0003 +[2026-03-01 07:02:43] (step=0020796) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.0688710624144, LR: 0.0003 +[2026-03-01 07:02:51] (step=0020797) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 4.069066718841714, LR: 0.0003 +[2026-03-01 07:02:59] (step=0020798) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.069262375269028, LR: 0.0003 +[2026-03-01 07:03:07] (step=0020799) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.069458031696342, LR: 0.0003 +[2026-03-01 07:03:15] (step=0020800) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 4.069653688123655, LR: 0.0003 +[2026-03-01 07:03:23] (step=0020801) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 4.069849344550969, LR: 0.0003 +[2026-03-01 07:03:30] (step=0020802) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 4.070045000978282, LR: 0.0003 +[2026-03-01 07:03:38] (step=0020803) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.070240657405596, LR: 0.0003 +[2026-03-01 07:03:46] (step=0020804) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.07043631383291, LR: 0.0003 +[2026-03-01 07:03:54] (step=0020805) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 4.070631970260223, LR: 0.0003 +[2026-03-01 07:04:02] (step=0020806) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 4.070827626687537, LR: 0.0003 +[2026-03-01 07:04:10] (step=0020807) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.07102328311485, LR: 0.0003 +[2026-03-01 07:04:18] (step=0020808) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.071218939542164, LR: 0.0003 +[2026-03-01 07:04:25] (step=0020809) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 4.071414595969477, LR: 0.0003 +[2026-03-01 07:04:33] (step=0020810) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 4.071610252396791, LR: 0.0003 +[2026-03-01 07:04:41] (step=0020811) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 4.071805908824105, LR: 0.0003 +[2026-03-01 07:04:49] (step=0020812) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 4.072001565251418, LR: 0.0003 +[2026-03-01 07:04:57] (step=0020813) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.072197221678732, LR: 0.0003 +[2026-03-01 07:05:05] (step=0020814) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.072392878106045, LR: 0.0003 +[2026-03-01 07:05:13] (step=0020815) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.0725885345333595, LR: 0.0003 +[2026-03-01 07:05:20] (step=0020816) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.0727841909606735, LR: 0.0003 +[2026-03-01 07:05:28] (step=0020817) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.072979847387987, LR: 0.0003 +[2026-03-01 07:05:36] (step=0020818) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.073175503815301, LR: 0.0003 +[2026-03-01 07:05:44] (step=0020819) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.073371160242614, LR: 0.0003 +[2026-03-01 07:05:52] (step=0020820) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.073566816669928, LR: 0.0003 +[2026-03-01 07:06:00] (step=0020821) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.073762473097241, LR: 0.0003 +[2026-03-01 07:06:07] (step=0020822) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.073958129524555, LR: 0.0003 +[2026-03-01 07:06:15] (step=0020823) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.074153785951869, LR: 0.0003 +[2026-03-01 07:06:23] (step=0020824) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 4.074349442379182, LR: 0.0003 +[2026-03-01 07:06:31] (step=0020825) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.074545098806496, LR: 0.0003 +[2026-03-01 07:06:39] (step=0020826) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.074740755233809, LR: 0.0003 +[2026-03-01 07:06:47] (step=0020827) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.074936411661123, LR: 0.0003 +[2026-03-01 07:06:54] (step=0020828) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.075132068088437, LR: 0.0003 +[2026-03-01 07:07:02] (step=0020829) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.07532772451575, LR: 0.0003 +[2026-03-01 07:07:10] (step=0020830) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.075523380943064, LR: 0.0003 +[2026-03-01 07:07:18] (step=0020831) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.075719037370377, LR: 0.0003 +[2026-03-01 07:07:26] (step=0020832) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.075914693797691, LR: 0.0003 +[2026-03-01 07:07:34] (step=0020833) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.0761103502250045, LR: 0.0003 +[2026-03-01 07:07:41] (step=0020834) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.0763060066523185, LR: 0.0003 +[2026-03-01 07:07:49] (step=0020835) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.0765016630796325, LR: 0.0003 +[2026-03-01 07:07:57] (step=0020836) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 4.076697319506946, LR: 0.0003 +[2026-03-01 07:08:05] (step=0020837) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.07689297593426, LR: 0.0003 +[2026-03-01 07:08:13] (step=0020838) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.077088632361573, LR: 0.0003 +[2026-03-01 07:08:21] (step=0020839) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.077284288788887, LR: 0.0003 +[2026-03-01 07:08:28] (step=0020840) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.077479945216201, LR: 0.0003 +[2026-03-01 07:08:36] (step=0020841) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.077675601643514, LR: 0.0003 +[2026-03-01 07:08:44] (step=0020842) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.077871258070828, LR: 0.0003 +[2026-03-01 07:08:52] (step=0020843) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.078066914498141, LR: 0.0003 +[2026-03-01 07:09:00] (step=0020844) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 4.078262570925455, LR: 0.0003 +[2026-03-01 07:09:08] (step=0020845) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.078458227352769, LR: 0.0003 +[2026-03-01 07:09:16] (step=0020846) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.078653883780082, LR: 0.0003 +[2026-03-01 07:09:24] (step=0020847) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 4.078849540207396, LR: 0.0003 +[2026-03-01 07:09:31] (step=0020848) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 4.079045196634709, LR: 0.0003 +[2026-03-01 07:09:39] (step=0020849) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.079240853062023, LR: 0.0003 +[2026-03-01 07:09:47] (step=0020850) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.079436509489336, LR: 0.0003 +[2026-03-01 07:09:55] (step=0020851) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.07963216591665, LR: 0.0003 +[2026-03-01 07:10:03] (step=0020852) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.079827822343964, LR: 0.0003 +[2026-03-01 07:10:11] (step=0020853) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.0800234787712775, LR: 0.0003 +[2026-03-01 07:10:18] (step=0020854) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.0802191351985915, LR: 0.0003 +[2026-03-01 07:10:26] (step=0020855) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 4.080414791625905, LR: 0.0003 +[2026-03-01 07:10:34] (step=0020856) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.080610448053219, LR: 0.0003 +[2026-03-01 07:10:42] (step=0020857) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.080806104480533, LR: 0.0003 +[2026-03-01 07:10:50] (step=0020858) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.081001760907846, LR: 0.0003 +[2026-03-01 07:10:58] (step=0020859) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.08119741733516, LR: 0.0003 +[2026-03-01 07:11:06] (step=0020860) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 4.081393073762473, LR: 0.0003 +[2026-03-01 07:11:13] (step=0020861) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.081588730189787, LR: 0.0003 +[2026-03-01 07:11:21] (step=0020862) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.0817843866171, LR: 0.0003 +[2026-03-01 07:11:29] (step=0020863) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.081980043044414, LR: 0.0003 +[2026-03-01 07:11:37] (step=0020864) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 4.082175699471728, LR: 0.0003 +[2026-03-01 07:11:45] (step=0020865) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.082371355899041, LR: 0.0003 +[2026-03-01 07:11:53] (step=0020866) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.082567012326355, LR: 0.0003 +[2026-03-01 07:12:00] (step=0020867) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.082762668753668, LR: 0.0003 +[2026-03-01 07:12:08] (step=0020868) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.082958325180982, LR: 0.0003 +[2026-03-01 07:12:16] (step=0020869) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.083153981608296, LR: 0.0003 +[2026-03-01 07:12:24] (step=0020870) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 4.083349638035609, LR: 0.0003 +[2026-03-01 07:12:32] (step=0020871) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.083545294462923, LR: 0.0003 +[2026-03-01 07:12:40] (step=0020872) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.0837409508902365, LR: 0.0003 +[2026-03-01 07:12:48] (step=0020873) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.0839366073175505, LR: 0.0003 +[2026-03-01 07:12:55] (step=0020874) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.084132263744864, LR: 0.0003 +[2026-03-01 07:13:03] (step=0020875) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.084327920172178, LR: 0.0003 +[2026-03-01 07:13:11] (step=0020876) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 4.084523576599492, LR: 0.0003 +[2026-03-01 07:13:19] (step=0020877) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.084719233026805, LR: 0.0003 +[2026-03-01 07:13:27] (step=0020878) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.084914889454119, LR: 0.0003 +[2026-03-01 07:13:35] (step=0020879) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.085110545881432, LR: 0.0003 +[2026-03-01 07:13:42] (step=0020880) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.085306202308746, LR: 0.0003 +[2026-03-01 07:13:50] (step=0020881) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.08550185873606, LR: 0.0003 +[2026-03-01 07:13:58] (step=0020882) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 4.085697515163373, LR: 0.0003 +[2026-03-01 07:14:06] (step=0020883) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 4.085893171590687, LR: 0.0003 +[2026-03-01 07:14:14] (step=0020884) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.086088828018, LR: 0.0003 +[2026-03-01 07:14:22] (step=0020885) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.086284484445314, LR: 0.0003 +[2026-03-01 07:14:30] (step=0020886) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 4.086480140872627, LR: 0.0003 +[2026-03-01 07:14:37] (step=0020887) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.086675797299941, LR: 0.0003 +[2026-03-01 07:14:45] (step=0020888) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 4.086871453727255, LR: 0.0003 +[2026-03-01 07:14:53] (step=0020889) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 4.087067110154568, LR: 0.0003 +[2026-03-01 07:15:01] (step=0020890) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 4.087262766581882, LR: 0.0003 +[2026-03-01 07:15:09] (step=0020891) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.0874584230091955, LR: 0.0003 +[2026-03-01 07:15:17] (step=0020892) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.0876540794365095, LR: 0.0003 +[2026-03-01 07:15:25] (step=0020893) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.0878497358638235, LR: 0.0003 +[2026-03-01 07:15:32] (step=0020894) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.088045392291137, LR: 0.0003 +[2026-03-01 07:15:40] (step=0020895) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.088241048718451, LR: 0.0003 +[2026-03-01 07:15:48] (step=0020896) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.088436705145764, LR: 0.0003 +[2026-03-01 07:15:56] (step=0020897) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.088632361573078, LR: 0.0003 +[2026-03-01 07:16:04] (step=0020898) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.088828018000392, LR: 0.0003 +[2026-03-01 07:16:12] (step=0020899) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.089023674427705, LR: 0.0003 +[2026-03-01 07:16:19] (step=0020900) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 4.089219330855019, LR: 0.0003 +[2026-03-01 07:16:27] (step=0020901) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.089414987282332, LR: 0.0003 +[2026-03-01 07:16:35] (step=0020902) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.089610643709646, LR: 0.0003 +[2026-03-01 07:16:43] (step=0020903) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.089806300136959, LR: 0.0003 +[2026-03-01 07:16:51] (step=0020904) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.090001956564273, LR: 0.0003 +[2026-03-01 07:16:59] (step=0020905) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 4.090197612991587, LR: 0.0003 +[2026-03-01 07:17:06] (step=0020906) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.0903932694189, LR: 0.0003 +[2026-03-01 07:17:14] (step=0020907) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.090588925846214, LR: 0.0003 +[2026-03-01 07:17:22] (step=0020908) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 4.090784582273527, LR: 0.0003 +[2026-03-01 07:17:30] (step=0020909) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 4.090980238700841, LR: 0.0003 +[2026-03-01 07:17:38] (step=0020910) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.091175895128155, LR: 0.0003 +[2026-03-01 07:17:46] (step=0020911) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 4.0913715515554685, LR: 0.0003 +[2026-03-01 07:17:54] (step=0020912) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.0915672079827825, LR: 0.0003 +[2026-03-01 07:18:01] (step=0020913) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.091762864410096, LR: 0.0003 +[2026-03-01 07:18:09] (step=0020914) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.09195852083741, LR: 0.0003 +[2026-03-01 07:18:17] (step=0020915) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.092154177264723, LR: 0.0003 +[2026-03-01 07:18:25] (step=0020916) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.092349833692037, LR: 0.0003 +[2026-03-01 07:18:33] (step=0020917) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.092545490119351, LR: 0.0003 +[2026-03-01 07:18:41] (step=0020918) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.092741146546664, LR: 0.0003 +[2026-03-01 07:18:48] (step=0020919) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.092936802973978, LR: 0.0003 +[2026-03-01 07:18:56] (step=0020920) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.093132459401291, LR: 0.0003 +[2026-03-01 07:19:04] (step=0020921) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 4.093328115828605, LR: 0.0003 +[2026-03-01 07:19:12] (step=0020922) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.093523772255919, LR: 0.0003 +[2026-03-01 07:19:20] (step=0020923) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.093719428683232, LR: 0.0003 +[2026-03-01 07:19:28] (step=0020924) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.093915085110546, LR: 0.0003 +[2026-03-01 07:19:35] (step=0020925) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.094110741537859, LR: 0.0003 +[2026-03-01 07:19:43] (step=0020926) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 4.094306397965173, LR: 0.0003 +[2026-03-01 07:19:51] (step=0020927) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.094502054392486, LR: 0.0003 +[2026-03-01 07:19:59] (step=0020928) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.0946977108198, LR: 0.0003 +[2026-03-01 07:20:07] (step=0020929) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.094893367247114, LR: 0.0003 +[2026-03-01 07:20:15] (step=0020930) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.0950890236744275, LR: 0.0003 +[2026-03-01 07:20:23] (step=0020931) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.0952846801017415, LR: 0.0003 +[2026-03-01 07:20:30] (step=0020932) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 4.095480336529055, LR: 0.0003 +[2026-03-01 07:20:38] (step=0020933) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.095675992956369, LR: 0.0003 +[2026-03-01 07:20:46] (step=0020934) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.095871649383683, LR: 0.0003 +[2026-03-01 07:20:54] (step=0020935) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.096067305810996, LR: 0.0003 +[2026-03-01 07:21:02] (step=0020936) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 4.09626296223831, LR: 0.0003 +[2026-03-01 07:21:10] (step=0020937) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.096458618665623, LR: 0.0003 +[2026-03-01 07:21:17] (step=0020938) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 4.096654275092937, LR: 0.0003 +[2026-03-01 07:21:25] (step=0020939) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.09684993152025, LR: 0.0003 +[2026-03-01 07:21:33] (step=0020940) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.097045587947564, LR: 0.0003 +[2026-03-01 07:21:41] (step=0020941) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.097241244374878, LR: 0.0003 +[2026-03-01 07:21:49] (step=0020942) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 4.097436900802191, LR: 0.0003 +[2026-03-01 07:21:57] (step=0020943) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.097632557229505, LR: 0.0003 +[2026-03-01 07:22:05] (step=0020944) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.097828213656818, LR: 0.0003 +[2026-03-01 07:22:12] (step=0020945) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.098023870084132, LR: 0.0003 +[2026-03-01 07:22:20] (step=0020946) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.098219526511446, LR: 0.0003 +[2026-03-01 07:22:28] (step=0020947) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.098415182938759, LR: 0.0003 +[2026-03-01 07:22:36] (step=0020948) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.098610839366073, LR: 0.0003 +[2026-03-01 07:22:44] (step=0020949) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.0988064957933865, LR: 0.0003 +[2026-03-01 07:22:52] (step=0020950) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.0990021522207005, LR: 0.0003 +[2026-03-01 07:22:59] (step=0020951) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.0991978086480145, LR: 0.0003 +[2026-03-01 07:23:07] (step=0020952) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.099393465075328, LR: 0.0003 +[2026-03-01 07:23:15] (step=0020953) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.099589121502642, LR: 0.0003 +[2026-03-01 07:23:23] (step=0020954) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.099784777929955, LR: 0.0003 +[2026-03-01 07:23:31] (step=0020955) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.099980434357269, LR: 0.0003 +[2026-03-01 07:23:39] (step=0020956) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 4.100176090784582, LR: 0.0003 +[2026-03-01 07:23:46] (step=0020957) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.100371747211896, LR: 0.0003 +[2026-03-01 07:23:54] (step=0020958) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.10056740363921, LR: 0.0003 +[2026-03-01 07:24:02] (step=0020959) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 4.100763060066523, LR: 0.0003 +[2026-03-01 07:24:10] (step=0020960) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.100958716493837, LR: 0.0003 +[2026-03-01 07:24:18] (step=0020961) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.10115437292115, LR: 0.0003 +[2026-03-01 07:24:26] (step=0020962) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.101350029348464, LR: 0.0003 +[2026-03-01 07:24:34] (step=0020963) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.101545685775778, LR: 0.0003 +[2026-03-01 07:24:41] (step=0020964) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.101741342203091, LR: 0.0003 +[2026-03-01 07:24:49] (step=0020965) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.101936998630405, LR: 0.0003 +[2026-03-01 07:24:57] (step=0020966) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.102132655057718, LR: 0.0003 +[2026-03-01 07:25:05] (step=0020967) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 4.102328311485032, LR: 0.0003 +[2026-03-01 07:25:13] (step=0020968) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.1025239679123455, LR: 0.0003 +[2026-03-01 07:25:21] (step=0020969) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.1027196243396595, LR: 0.0003 +[2026-03-01 07:25:28] (step=0020970) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.1029152807669735, LR: 0.0003 +[2026-03-01 07:25:36] (step=0020971) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.103110937194287, LR: 0.0003 +[2026-03-01 07:25:44] (step=0020972) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.103306593621601, LR: 0.0003 +[2026-03-01 07:25:52] (step=0020973) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.103502250048914, LR: 0.0003 +[2026-03-01 07:26:00] (step=0020974) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.103697906476228, LR: 0.0003 +[2026-03-01 07:26:08] (step=0020975) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.103893562903542, LR: 0.0003 +[2026-03-01 07:26:16] (step=0020976) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 4.104089219330855, LR: 0.0003 +[2026-03-01 07:26:23] (step=0020977) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.104284875758169, LR: 0.0003 +[2026-03-01 07:26:31] (step=0020978) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 4.104480532185482, LR: 0.0003 +[2026-03-01 07:26:39] (step=0020979) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.104676188612796, LR: 0.0003 +[2026-03-01 07:26:47] (step=0020980) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.104871845040109, LR: 0.0003 +[2026-03-01 07:26:55] (step=0020981) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.105067501467423, LR: 0.0003 +[2026-03-01 07:27:03] (step=0020982) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 4.105263157894737, LR: 0.0003 +[2026-03-01 07:27:10] (step=0020983) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 4.10545881432205, LR: 0.0003 +[2026-03-01 07:27:18] (step=0020984) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.105654470749364, LR: 0.0003 +[2026-03-01 07:27:26] (step=0020985) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.105850127176677, LR: 0.0003 +[2026-03-01 07:27:34] (step=0020986) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.106045783603991, LR: 0.0003 +[2026-03-01 07:27:42] (step=0020987) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.106241440031305, LR: 0.0003 +[2026-03-01 07:27:50] (step=0020988) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.1064370964586185, LR: 0.0003 +[2026-03-01 07:27:57] (step=0020989) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.1066327528859325, LR: 0.0003 +[2026-03-01 07:28:05] (step=0020990) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.106828409313246, LR: 0.0003 +[2026-03-01 07:28:13] (step=0020991) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.10702406574056, LR: 0.0003 +[2026-03-01 07:28:21] (step=0020992) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.107219722167873, LR: 0.0003 +[2026-03-01 07:28:29] (step=0020993) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.107415378595187, LR: 0.0003 +[2026-03-01 07:28:37] (step=0020994) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.107611035022501, LR: 0.0003 +[2026-03-01 07:28:45] (step=0020995) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 4.107806691449814, LR: 0.0003 +[2026-03-01 07:28:53] (step=0020996) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.108002347877128, LR: 0.0003 +[2026-03-01 07:29:00] (step=0020997) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.108198004304441, LR: 0.0003 +[2026-03-01 07:29:08] (step=0020998) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 4.108393660731755, LR: 0.0003 +[2026-03-01 07:29:16] (step=0020999) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.108589317159069, LR: 0.0003 +[2026-03-01 07:29:24] (step=0021000) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 4.108784973586382, LR: 0.0003 +[2026-03-01 07:29:24] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0021000/ +[2026-03-01 07:29:32] (step=0021001) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 4.108980630013696, LR: 0.0003 +[2026-03-01 07:29:40] (step=0021002) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.109176286441009, LR: 0.0003 +[2026-03-01 07:29:47] (step=0021003) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.109371942868323, LR: 0.0003 +[2026-03-01 07:29:55] (step=0021004) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.109567599295636, LR: 0.0003 +[2026-03-01 07:30:03] (step=0021005) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.10976325572295, LR: 0.0003 +[2026-03-01 07:30:11] (step=0021006) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.109958912150264, LR: 0.0003 +[2026-03-01 07:30:19] (step=0021007) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 4.1101545685775775, LR: 0.0003 +[2026-03-01 07:30:27] (step=0021008) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.1103502250048916, LR: 0.0003 +[2026-03-01 07:30:35] (step=0021009) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 4.110545881432205, LR: 0.0003 +[2026-03-01 07:30:42] (step=0021010) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.110741537859519, LR: 0.0003 +[2026-03-01 07:30:50] (step=0021011) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.110937194286833, LR: 0.0003 +[2026-03-01 07:30:58] (step=0021012) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.111132850714146, LR: 0.0003 +[2026-03-01 07:31:06] (step=0021013) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 4.11132850714146, LR: 0.0003 +[2026-03-01 07:31:14] (step=0021014) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.111524163568773, LR: 0.0003 +[2026-03-01 07:31:22] (step=0021015) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.111719819996087, LR: 0.0003 +[2026-03-01 07:31:30] (step=0021016) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.111915476423401, LR: 0.0003 +[2026-03-01 07:31:37] (step=0021017) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.112111132850714, LR: 0.0003 +[2026-03-01 07:31:45] (step=0021018) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.112306789278028, LR: 0.0003 +[2026-03-01 07:31:53] (step=0021019) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 4.112502445705341, LR: 0.0003 +[2026-03-01 07:32:01] (step=0021020) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 4.112698102132655, LR: 0.0003 +[2026-03-01 07:32:09] (step=0021021) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.112893758559968, LR: 0.0003 +[2026-03-01 07:32:17] (step=0021022) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.113089414987282, LR: 0.0003 +[2026-03-01 07:32:24] (step=0021023) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.113285071414596, LR: 0.0003 +[2026-03-01 07:32:32] (step=0021024) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.113480727841909, LR: 0.0003 +[2026-03-01 07:32:40] (step=0021025) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.113676384269223, LR: 0.0003 +[2026-03-01 07:32:48] (step=0021026) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.113872040696537, LR: 0.0003 +[2026-03-01 07:32:56] (step=0021027) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.114067697123851, LR: 0.0003 +[2026-03-01 07:33:04] (step=0021028) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.114263353551165, LR: 0.0003 +[2026-03-01 07:33:11] (step=0021029) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.114459009978478, LR: 0.0003 +[2026-03-01 07:33:19] (step=0021030) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 4.114654666405792, LR: 0.0003 +[2026-03-01 07:33:27] (step=0021031) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.114850322833105, LR: 0.0003 +[2026-03-01 07:33:35] (step=0021032) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 4.115045979260419, LR: 0.0003 +[2026-03-01 07:33:43] (step=0021033) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.115241635687732, LR: 0.0003 +[2026-03-01 07:33:51] (step=0021034) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 4.115437292115046, LR: 0.0003 +[2026-03-01 07:33:58] (step=0021035) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 4.11563294854236, LR: 0.0003 +[2026-03-01 07:34:06] (step=0021036) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.115828604969673, LR: 0.0003 +[2026-03-01 07:34:14] (step=0021037) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.116024261396987, LR: 0.0003 +[2026-03-01 07:34:22] (step=0021038) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.1162199178243, LR: 0.0003 +[2026-03-01 07:34:30] (step=0021039) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.116415574251614, LR: 0.0003 +[2026-03-01 07:34:38] (step=0021040) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 4.116611230678928, LR: 0.0003 +[2026-03-01 07:34:46] (step=0021041) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.116806887106241, LR: 0.0003 +[2026-03-01 07:34:53] (step=0021042) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.117002543533555, LR: 0.0003 +[2026-03-01 07:35:01] (step=0021043) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.117198199960868, LR: 0.0003 +[2026-03-01 07:35:09] (step=0021044) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 4.1173938563881824, LR: 0.0003 +[2026-03-01 07:35:17] (step=0021045) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 4.117589512815496, LR: 0.0003 +[2026-03-01 07:35:25] (step=0021046) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.11778516924281, LR: 0.0003 +[2026-03-01 07:35:33] (step=0021047) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.117980825670124, LR: 0.0003 +[2026-03-01 07:35:41] (step=0021048) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.118176482097437, LR: 0.0003 +[2026-03-01 07:35:48] (step=0021049) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.118372138524751, LR: 0.0003 +[2026-03-01 07:35:56] (step=0021050) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.118567794952064, LR: 0.0003 +[2026-03-01 07:36:04] (step=0021051) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.118763451379378, LR: 0.0003 +[2026-03-01 07:36:12] (step=0021052) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 4.118959107806692, LR: 0.0003 +[2026-03-01 07:36:20] (step=0021053) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.119154764234005, LR: 0.0003 +[2026-03-01 07:36:28] (step=0021054) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.119350420661319, LR: 0.0003 +[2026-03-01 07:36:35] (step=0021055) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 4.119546077088632, LR: 0.0003 +[2026-03-01 07:36:43] (step=0021056) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.119741733515946, LR: 0.0003 +[2026-03-01 07:36:51] (step=0021057) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 4.119937389943259, LR: 0.0003 +[2026-03-01 07:36:59] (step=0021058) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 4.120133046370573, LR: 0.0003 +[2026-03-01 07:37:07] (step=0021059) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.120328702797887, LR: 0.0003 +[2026-03-01 07:37:15] (step=0021060) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 4.1205243592252, LR: 0.0003 +[2026-03-01 07:37:23] (step=0021061) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.120720015652514, LR: 0.0003 +[2026-03-01 07:37:30] (step=0021062) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.1209156720798275, LR: 0.0003 +[2026-03-01 07:37:38] (step=0021063) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.1211113285071415, LR: 0.0003 +[2026-03-01 07:37:46] (step=0021064) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.1213069849344555, LR: 0.0003 +[2026-03-01 07:37:54] (step=0021065) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.121502641361769, LR: 0.0003 +[2026-03-01 07:38:02] (step=0021066) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.121698297789083, LR: 0.0003 +[2026-03-01 07:38:10] (step=0021067) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 4.121893954216396, LR: 0.0003 +[2026-03-01 07:38:18] (step=0021068) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 4.12208961064371, LR: 0.0003 +[2026-03-01 07:38:25] (step=0021069) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 4.122285267071024, LR: 0.0003 +[2026-03-01 07:38:33] (step=0021070) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.122480923498337, LR: 0.0003 +[2026-03-01 07:38:41] (step=0021071) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 4.122676579925651, LR: 0.0003 +[2026-03-01 07:38:49] (step=0021072) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.122872236352964, LR: 0.0003 +[2026-03-01 07:38:57] (step=0021073) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.123067892780278, LR: 0.0003 +[2026-03-01 07:39:05] (step=0021074) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.123263549207591, LR: 0.0003 +[2026-03-01 07:39:12] (step=0021075) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.123459205634905, LR: 0.0003 +[2026-03-01 07:39:20] (step=0021076) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.123654862062219, LR: 0.0003 +[2026-03-01 07:39:28] (step=0021077) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.123850518489532, LR: 0.0003 +[2026-03-01 07:39:36] (step=0021078) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.124046174916846, LR: 0.0003 +[2026-03-01 07:39:44] (step=0021079) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.124241831344159, LR: 0.0003 +[2026-03-01 07:39:52] (step=0021080) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.124437487771473, LR: 0.0003 +[2026-03-01 07:39:59] (step=0021081) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.124633144198787, LR: 0.0003 +[2026-03-01 07:40:07] (step=0021082) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 4.1248288006261005, LR: 0.0003 +[2026-03-01 07:40:15] (step=0021083) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.1250244570534145, LR: 0.0003 +[2026-03-01 07:40:23] (step=0021084) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.125220113480728, LR: 0.0003 +[2026-03-01 07:40:31] (step=0021085) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.125415769908042, LR: 0.0003 +[2026-03-01 07:40:39] (step=0021086) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.125611426335355, LR: 0.0003 +[2026-03-01 07:40:47] (step=0021087) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.125807082762669, LR: 0.0003 +[2026-03-01 07:40:54] (step=0021088) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.126002739189983, LR: 0.0003 +[2026-03-01 07:41:02] (step=0021089) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.126198395617296, LR: 0.0003 +[2026-03-01 07:41:10] (step=0021090) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 4.12639405204461, LR: 0.0003 +[2026-03-01 07:41:18] (step=0021091) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.126589708471923, LR: 0.0003 +[2026-03-01 07:41:26] (step=0021092) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.126785364899237, LR: 0.0003 +[2026-03-01 07:41:34] (step=0021093) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 4.126981021326551, LR: 0.0003 +[2026-03-01 07:41:41] (step=0021094) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.127176677753864, LR: 0.0003 +[2026-03-01 07:41:49] (step=0021095) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.127372334181178, LR: 0.0003 +[2026-03-01 07:41:57] (step=0021096) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.127567990608491, LR: 0.0003 +[2026-03-01 07:42:05] (step=0021097) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.127763647035805, LR: 0.0003 +[2026-03-01 07:42:13] (step=0021098) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.127959303463118, LR: 0.0003 +[2026-03-01 07:42:21] (step=0021099) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.128154959890432, LR: 0.0003 +[2026-03-01 07:42:29] (step=0021100) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 4.128350616317746, LR: 0.0003 +[2026-03-01 07:42:36] (step=0021101) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 4.1285462727450595, LR: 0.0003 +[2026-03-01 07:42:44] (step=0021102) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 4.1287419291723735, LR: 0.0003 +[2026-03-01 07:42:52] (step=0021103) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.128937585599687, LR: 0.0003 +[2026-03-01 07:43:00] (step=0021104) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.129133242027001, LR: 0.0003 +[2026-03-01 07:43:08] (step=0021105) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 4.129328898454315, LR: 0.0003 +[2026-03-01 07:43:16] (step=0021106) Train Loss: 0.4464, Train Steps/Sec: 0.12, Epoch: 4.129524554881628, LR: 0.0003 +[2026-03-01 07:43:24] (step=0021107) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.129720211308942, LR: 0.0003 +[2026-03-01 07:43:31] (step=0021108) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.129915867736255, LR: 0.0003 +[2026-03-01 07:43:39] (step=0021109) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.130111524163569, LR: 0.0003 +[2026-03-01 07:43:47] (step=0021110) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.130307180590882, LR: 0.0003 +[2026-03-01 07:43:55] (step=0021111) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 4.130502837018196, LR: 0.0003 +[2026-03-01 07:44:03] (step=0021112) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.13069849344551, LR: 0.0003 +[2026-03-01 07:44:11] (step=0021113) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.130894149872823, LR: 0.0003 +[2026-03-01 07:44:19] (step=0021114) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.131089806300137, LR: 0.0003 +[2026-03-01 07:44:26] (step=0021115) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 4.13128546272745, LR: 0.0003 +[2026-03-01 07:44:34] (step=0021116) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 4.131481119154764, LR: 0.0003 +[2026-03-01 07:44:42] (step=0021117) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.131676775582078, LR: 0.0003 +[2026-03-01 07:44:50] (step=0021118) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.131872432009391, LR: 0.0003 +[2026-03-01 07:44:58] (step=0021119) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 4.132068088436705, LR: 0.0003 +[2026-03-01 07:45:06] (step=0021120) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.1322637448640185, LR: 0.0003 +[2026-03-01 07:45:13] (step=0021121) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.1324594012913325, LR: 0.0003 +[2026-03-01 07:45:21] (step=0021122) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.1326550577186465, LR: 0.0003 +[2026-03-01 07:45:29] (step=0021123) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.13285071414596, LR: 0.0003 +[2026-03-01 07:45:37] (step=0021124) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.133046370573274, LR: 0.0003 +[2026-03-01 07:45:45] (step=0021125) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.133242027000587, LR: 0.0003 +[2026-03-01 07:45:53] (step=0021126) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.133437683427901, LR: 0.0003 +[2026-03-01 07:46:00] (step=0021127) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 4.133633339855214, LR: 0.0003 +[2026-03-01 07:46:08] (step=0021128) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.133828996282528, LR: 0.0003 +[2026-03-01 07:46:16] (step=0021129) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.134024652709842, LR: 0.0003 +[2026-03-01 07:46:24] (step=0021130) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 4.134220309137155, LR: 0.0003 +[2026-03-01 07:46:32] (step=0021131) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.134415965564469, LR: 0.0003 +[2026-03-01 07:46:40] (step=0021132) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.134611621991782, LR: 0.0003 +[2026-03-01 07:46:47] (step=0021133) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.134807278419096, LR: 0.0003 +[2026-03-01 07:46:55] (step=0021134) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.13500293484641, LR: 0.0003 +[2026-03-01 07:47:03] (step=0021135) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.135198591273723, LR: 0.0003 +[2026-03-01 07:47:11] (step=0021136) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.135394247701037, LR: 0.0003 +[2026-03-01 07:47:19] (step=0021137) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 4.13558990412835, LR: 0.0003 +[2026-03-01 07:47:27] (step=0021138) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.135785560555664, LR: 0.0003 +[2026-03-01 07:47:35] (step=0021139) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.1359812169829775, LR: 0.0003 +[2026-03-01 07:47:42] (step=0021140) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.1361768734102915, LR: 0.0003 +[2026-03-01 07:47:50] (step=0021141) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.1363725298376055, LR: 0.0003 +[2026-03-01 07:47:58] (step=0021142) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 4.136568186264919, LR: 0.0003 +[2026-03-01 07:48:06] (step=0021143) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.136763842692233, LR: 0.0003 +[2026-03-01 07:48:14] (step=0021144) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.136959499119546, LR: 0.0003 +[2026-03-01 07:48:22] (step=0021145) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 4.13715515554686, LR: 0.0003 +[2026-03-01 07:48:29] (step=0021146) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.137350811974174, LR: 0.0003 +[2026-03-01 07:48:37] (step=0021147) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.137546468401487, LR: 0.0003 +[2026-03-01 07:48:45] (step=0021148) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 4.137742124828801, LR: 0.0003 +[2026-03-01 07:48:53] (step=0021149) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.137937781256114, LR: 0.0003 +[2026-03-01 07:49:01] (step=0021150) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 4.138133437683428, LR: 0.0003 +[2026-03-01 07:49:09] (step=0021151) Train Loss: 0.4665, Train Steps/Sec: 0.13, Epoch: 4.138329094110741, LR: 0.0003 +[2026-03-01 07:49:17] (step=0021152) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 4.138524750538055, LR: 0.0003 +[2026-03-01 07:49:24] (step=0021153) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.138720406965369, LR: 0.0003 +[2026-03-01 07:49:32] (step=0021154) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.138916063392682, LR: 0.0003 +[2026-03-01 07:49:40] (step=0021155) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.139111719819996, LR: 0.0003 +[2026-03-01 07:49:48] (step=0021156) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.139307376247309, LR: 0.0003 +[2026-03-01 07:49:56] (step=0021157) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.139503032674623, LR: 0.0003 +[2026-03-01 07:50:04] (step=0021158) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.139698689101937, LR: 0.0003 +[2026-03-01 07:50:11] (step=0021159) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.1398943455292505, LR: 0.0003 +[2026-03-01 07:50:19] (step=0021160) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.1400900019565645, LR: 0.0003 +[2026-03-01 07:50:27] (step=0021161) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.140285658383878, LR: 0.0003 +[2026-03-01 07:50:35] (step=0021162) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 4.140481314811192, LR: 0.0003 +[2026-03-01 07:50:43] (step=0021163) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.140676971238505, LR: 0.0003 +[2026-03-01 07:50:51] (step=0021164) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.140872627665819, LR: 0.0003 +[2026-03-01 07:50:58] (step=0021165) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.141068284093133, LR: 0.0003 +[2026-03-01 07:51:06] (step=0021166) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.141263940520446, LR: 0.0003 +[2026-03-01 07:51:14] (step=0021167) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.14145959694776, LR: 0.0003 +[2026-03-01 07:51:22] (step=0021168) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 4.141655253375073, LR: 0.0003 +[2026-03-01 07:51:30] (step=0021169) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.141850909802387, LR: 0.0003 +[2026-03-01 07:51:38] (step=0021170) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.142046566229701, LR: 0.0003 +[2026-03-01 07:51:46] (step=0021171) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.142242222657014, LR: 0.0003 +[2026-03-01 07:51:53] (step=0021172) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.142437879084328, LR: 0.0003 +[2026-03-01 07:52:01] (step=0021173) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.142633535511641, LR: 0.0003 +[2026-03-01 07:52:09] (step=0021174) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 4.142829191938955, LR: 0.0003 +[2026-03-01 07:52:17] (step=0021175) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.143024848366269, LR: 0.0003 +[2026-03-01 07:52:25] (step=0021176) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.143220504793582, LR: 0.0003 +[2026-03-01 07:52:33] (step=0021177) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.143416161220896, LR: 0.0003 +[2026-03-01 07:52:40] (step=0021178) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.1436118176482095, LR: 0.0003 +[2026-03-01 07:52:48] (step=0021179) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.1438074740755235, LR: 0.0003 +[2026-03-01 07:52:56] (step=0021180) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.144003130502837, LR: 0.0003 +[2026-03-01 07:53:04] (step=0021181) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.144198786930151, LR: 0.0003 +[2026-03-01 07:53:12] (step=0021182) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.144394443357465, LR: 0.0003 +[2026-03-01 07:53:20] (step=0021183) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.144590099784778, LR: 0.0003 +[2026-03-01 07:53:27] (step=0021184) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.144785756212092, LR: 0.0003 +[2026-03-01 07:53:35] (step=0021185) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 4.144981412639405, LR: 0.0003 +[2026-03-01 07:53:43] (step=0021186) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.145177069066719, LR: 0.0003 +[2026-03-01 07:53:51] (step=0021187) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 4.145372725494033, LR: 0.0003 +[2026-03-01 07:53:59] (step=0021188) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.145568381921346, LR: 0.0003 +[2026-03-01 07:54:07] (step=0021189) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.14576403834866, LR: 0.0003 +[2026-03-01 07:54:15] (step=0021190) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.145959694775973, LR: 0.0003 +[2026-03-01 07:54:22] (step=0021191) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.146155351203287, LR: 0.0003 +[2026-03-01 07:54:30] (step=0021192) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.1463510076306, LR: 0.0003 +[2026-03-01 07:54:38] (step=0021193) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 4.146546664057914, LR: 0.0003 +[2026-03-01 07:54:46] (step=0021194) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.146742320485228, LR: 0.0003 +[2026-03-01 07:54:54] (step=0021195) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.146937976912541, LR: 0.0003 +[2026-03-01 07:55:02] (step=0021196) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 4.147133633339855, LR: 0.0003 +[2026-03-01 07:55:09] (step=0021197) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.1473292897671685, LR: 0.0003 +[2026-03-01 07:55:17] (step=0021198) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.1475249461944825, LR: 0.0003 +[2026-03-01 07:55:25] (step=0021199) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 4.1477206026217965, LR: 0.0003 +[2026-03-01 07:55:33] (step=0021200) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.14791625904911, LR: 0.0003 +[2026-03-01 07:55:41] (step=0021201) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 4.148111915476424, LR: 0.0003 +[2026-03-01 07:55:49] (step=0021202) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.148307571903737, LR: 0.0003 +[2026-03-01 07:55:57] (step=0021203) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.148503228331051, LR: 0.0003 +[2026-03-01 07:56:05] (step=0021204) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 4.148698884758364, LR: 0.0003 +[2026-03-01 07:56:12] (step=0021205) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 4.148894541185678, LR: 0.0003 +[2026-03-01 07:56:20] (step=0021206) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.149090197612992, LR: 0.0003 +[2026-03-01 07:56:28] (step=0021207) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 4.149285854040305, LR: 0.0003 +[2026-03-01 07:56:36] (step=0021208) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.149481510467619, LR: 0.0003 +[2026-03-01 07:56:44] (step=0021209) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.149677166894932, LR: 0.0003 +[2026-03-01 07:56:52] (step=0021210) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.149872823322246, LR: 0.0003 +[2026-03-01 07:56:59] (step=0021211) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 4.15006847974956, LR: 0.0003 +[2026-03-01 07:57:07] (step=0021212) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.150264136176873, LR: 0.0003 +[2026-03-01 07:57:15] (step=0021213) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.150459792604187, LR: 0.0003 +[2026-03-01 07:57:23] (step=0021214) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.1506554490315, LR: 0.0003 +[2026-03-01 07:57:31] (step=0021215) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.150851105458814, LR: 0.0003 +[2026-03-01 07:57:39] (step=0021216) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.1510467618861275, LR: 0.0003 +[2026-03-01 07:57:46] (step=0021217) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.1512424183134415, LR: 0.0003 +[2026-03-01 07:57:54] (step=0021218) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.1514380747407555, LR: 0.0003 +[2026-03-01 07:58:02] (step=0021219) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.151633731168069, LR: 0.0003 +[2026-03-01 07:58:10] (step=0021220) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.151829387595383, LR: 0.0003 +[2026-03-01 07:58:18] (step=0021221) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 4.152025044022696, LR: 0.0003 +[2026-03-01 07:58:26] (step=0021222) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 4.15222070045001, LR: 0.0003 +[2026-03-01 07:58:34] (step=0021223) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.152416356877324, LR: 0.0003 +[2026-03-01 07:58:41] (step=0021224) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.152612013304637, LR: 0.0003 +[2026-03-01 07:58:49] (step=0021225) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 4.152807669731951, LR: 0.0003 +[2026-03-01 07:58:57] (step=0021226) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.153003326159264, LR: 0.0003 +[2026-03-01 07:59:05] (step=0021227) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.153198982586578, LR: 0.0003 +[2026-03-01 07:59:13] (step=0021228) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.153394639013891, LR: 0.0003 +[2026-03-01 07:59:21] (step=0021229) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 4.153590295441205, LR: 0.0003 +[2026-03-01 07:59:28] (step=0021230) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.153785951868519, LR: 0.0003 +[2026-03-01 07:59:36] (step=0021231) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.153981608295832, LR: 0.0003 +[2026-03-01 07:59:44] (step=0021232) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.154177264723146, LR: 0.0003 +[2026-03-01 07:59:52] (step=0021233) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 4.154372921150459, LR: 0.0003 +[2026-03-01 08:00:00] (step=0021234) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.154568577577773, LR: 0.0003 +[2026-03-01 08:00:08] (step=0021235) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.154764234005087, LR: 0.0003 +[2026-03-01 08:00:15] (step=0021236) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.1549598904324005, LR: 0.0003 +[2026-03-01 08:00:23] (step=0021237) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.1551555468597146, LR: 0.0003 +[2026-03-01 08:00:31] (step=0021238) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 4.155351203287028, LR: 0.0003 +[2026-03-01 08:00:39] (step=0021239) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 4.155546859714342, LR: 0.0003 +[2026-03-01 08:00:47] (step=0021240) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 4.155742516141656, LR: 0.0003 +[2026-03-01 08:00:55] (step=0021241) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.155938172568969, LR: 0.0003 +[2026-03-01 08:01:03] (step=0021242) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.156133828996283, LR: 0.0003 +[2026-03-01 08:01:10] (step=0021243) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.156329485423596, LR: 0.0003 +[2026-03-01 08:01:18] (step=0021244) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.15652514185091, LR: 0.0003 +[2026-03-01 08:01:26] (step=0021245) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.156720798278223, LR: 0.0003 +[2026-03-01 08:01:34] (step=0021246) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.156916454705537, LR: 0.0003 +[2026-03-01 08:01:42] (step=0021247) Train Loss: 0.4408, Train Steps/Sec: 0.12, Epoch: 4.157112111132851, LR: 0.0003 +[2026-03-01 08:01:50] (step=0021248) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.157307767560164, LR: 0.0003 +[2026-03-01 08:01:58] (step=0021249) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.157503423987478, LR: 0.0003 +[2026-03-01 08:02:06] (step=0021250) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 4.157699080414791, LR: 0.0003 +[2026-03-01 08:02:13] (step=0021251) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.157894736842105, LR: 0.0003 +[2026-03-01 08:02:21] (step=0021252) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.158090393269419, LR: 0.0003 +[2026-03-01 08:02:29] (step=0021253) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 4.158286049696732, LR: 0.0003 +[2026-03-01 08:02:37] (step=0021254) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 4.158481706124046, LR: 0.0003 +[2026-03-01 08:02:45] (step=0021255) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.15867736255136, LR: 0.0003 +[2026-03-01 08:02:53] (step=0021256) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.158873018978674, LR: 0.0003 +[2026-03-01 08:03:00] (step=0021257) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.159068675405987, LR: 0.0003 +[2026-03-01 08:03:08] (step=0021258) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 4.159264331833301, LR: 0.0003 +[2026-03-01 08:03:16] (step=0021259) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.159459988260615, LR: 0.0003 +[2026-03-01 08:03:24] (step=0021260) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.159655644687928, LR: 0.0003 +[2026-03-01 08:03:32] (step=0021261) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.159851301115242, LR: 0.0003 +[2026-03-01 08:03:40] (step=0021262) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.160046957542555, LR: 0.0003 +[2026-03-01 08:03:48] (step=0021263) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.160242613969869, LR: 0.0003 +[2026-03-01 08:03:55] (step=0021264) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.160438270397183, LR: 0.0003 +[2026-03-01 08:04:03] (step=0021265) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.160633926824496, LR: 0.0003 +[2026-03-01 08:04:11] (step=0021266) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.16082958325181, LR: 0.0003 +[2026-03-01 08:04:19] (step=0021267) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 4.161025239679123, LR: 0.0003 +[2026-03-01 08:04:27] (step=0021268) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.161220896106437, LR: 0.0003 +[2026-03-01 08:04:35] (step=0021269) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.16141655253375, LR: 0.0003 +[2026-03-01 08:04:42] (step=0021270) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.161612208961064, LR: 0.0003 +[2026-03-01 08:04:50] (step=0021271) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.161807865388378, LR: 0.0003 +[2026-03-01 08:04:58] (step=0021272) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.162003521815691, LR: 0.0003 +[2026-03-01 08:05:06] (step=0021273) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.1621991782430054, LR: 0.0003 +[2026-03-01 08:05:14] (step=0021274) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 4.162394834670319, LR: 0.0003 +[2026-03-01 08:05:22] (step=0021275) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 4.162590491097633, LR: 0.0003 +[2026-03-01 08:05:29] (step=0021276) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.162786147524947, LR: 0.0003 +[2026-03-01 08:05:37] (step=0021277) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.16298180395226, LR: 0.0003 +[2026-03-01 08:05:45] (step=0021278) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 4.163177460379574, LR: 0.0003 +[2026-03-01 08:05:53] (step=0021279) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.163373116806887, LR: 0.0003 +[2026-03-01 08:06:01] (step=0021280) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.163568773234201, LR: 0.0003 +[2026-03-01 08:06:09] (step=0021281) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.163764429661514, LR: 0.0003 +[2026-03-01 08:06:17] (step=0021282) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.163960086088828, LR: 0.0003 +[2026-03-01 08:06:24] (step=0021283) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.164155742516142, LR: 0.0003 +[2026-03-01 08:06:32] (step=0021284) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.164351398943455, LR: 0.0003 +[2026-03-01 08:06:40] (step=0021285) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.164547055370769, LR: 0.0003 +[2026-03-01 08:06:48] (step=0021286) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 4.164742711798082, LR: 0.0003 +[2026-03-01 08:06:56] (step=0021287) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.164938368225396, LR: 0.0003 +[2026-03-01 08:07:04] (step=0021288) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.16513402465271, LR: 0.0003 +[2026-03-01 08:07:11] (step=0021289) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.165329681080023, LR: 0.0003 +[2026-03-01 08:07:19] (step=0021290) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 4.165525337507337, LR: 0.0003 +[2026-03-01 08:07:27] (step=0021291) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 4.1657209939346505, LR: 0.0003 +[2026-03-01 08:07:35] (step=0021292) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 4.1659166503619645, LR: 0.0003 +[2026-03-01 08:07:43] (step=0021293) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.1661123067892785, LR: 0.0003 +[2026-03-01 08:07:51] (step=0021294) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.166307963216592, LR: 0.0003 +[2026-03-01 08:07:59] (step=0021295) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.166503619643906, LR: 0.0003 +[2026-03-01 08:08:07] (step=0021296) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.166699276071219, LR: 0.0003 +[2026-03-01 08:08:14] (step=0021297) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 4.166894932498533, LR: 0.0003 +[2026-03-01 08:08:22] (step=0021298) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.167090588925846, LR: 0.0003 +[2026-03-01 08:08:30] (step=0021299) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 4.16728624535316, LR: 0.0003 +[2026-03-01 08:08:38] (step=0021300) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 4.167481901780474, LR: 0.0003 +[2026-03-01 08:08:46] (step=0021301) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.167677558207787, LR: 0.0003 +[2026-03-01 08:08:54] (step=0021302) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.167873214635101, LR: 0.0003 +[2026-03-01 08:09:02] (step=0021303) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 4.168068871062414, LR: 0.0003 +[2026-03-01 08:09:09] (step=0021304) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 4.168264527489728, LR: 0.0003 +[2026-03-01 08:09:17] (step=0021305) Train Loss: 0.4698, Train Steps/Sec: 0.13, Epoch: 4.168460183917042, LR: 0.0003 +[2026-03-01 08:09:25] (step=0021306) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 4.168655840344355, LR: 0.0003 +[2026-03-01 08:09:33] (step=0021307) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.168851496771669, LR: 0.0003 +[2026-03-01 08:09:41] (step=0021308) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.169047153198982, LR: 0.0003 +[2026-03-01 08:09:49] (step=0021309) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.169242809626296, LR: 0.0003 +[2026-03-01 08:09:56] (step=0021310) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.1694384660536095, LR: 0.0003 +[2026-03-01 08:10:04] (step=0021311) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.1696341224809235, LR: 0.0003 +[2026-03-01 08:10:12] (step=0021312) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 4.1698297789082375, LR: 0.0003 +[2026-03-01 08:10:20] (step=0021313) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.170025435335551, LR: 0.0003 +[2026-03-01 08:10:28] (step=0021314) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 4.170221091762865, LR: 0.0003 +[2026-03-01 08:10:36] (step=0021315) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.170416748190178, LR: 0.0003 +[2026-03-01 08:10:43] (step=0021316) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.170612404617492, LR: 0.0003 +[2026-03-01 08:10:51] (step=0021317) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 4.170808061044806, LR: 0.0003 +[2026-03-01 08:10:59] (step=0021318) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.171003717472119, LR: 0.0003 +[2026-03-01 08:11:07] (step=0021319) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.171199373899433, LR: 0.0003 +[2026-03-01 08:11:15] (step=0021320) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.171395030326746, LR: 0.0003 +[2026-03-01 08:11:23] (step=0021321) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 4.17159068675406, LR: 0.0003 +[2026-03-01 08:11:30] (step=0021322) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.171786343181373, LR: 0.0003 +[2026-03-01 08:11:38] (step=0021323) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.171981999608687, LR: 0.0003 +[2026-03-01 08:11:46] (step=0021324) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.172177656036001, LR: 0.0003 +[2026-03-01 08:11:54] (step=0021325) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.172373312463314, LR: 0.0003 +[2026-03-01 08:12:02] (step=0021326) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 4.172568968890628, LR: 0.0003 +[2026-03-01 08:12:10] (step=0021327) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.172764625317941, LR: 0.0003 +[2026-03-01 08:12:18] (step=0021328) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 4.172960281745255, LR: 0.0003 +[2026-03-01 08:12:25] (step=0021329) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 4.173155938172569, LR: 0.0003 +[2026-03-01 08:12:33] (step=0021330) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.1733515945998825, LR: 0.0003 +[2026-03-01 08:12:41] (step=0021331) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 4.1735472510271965, LR: 0.0003 +[2026-03-01 08:12:49] (step=0021332) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.17374290745451, LR: 0.0003 +[2026-03-01 08:12:57] (step=0021333) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.173938563881824, LR: 0.0003 +[2026-03-01 08:13:05] (step=0021334) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.174134220309137, LR: 0.0003 +[2026-03-01 08:13:12] (step=0021335) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 4.174329876736451, LR: 0.0003 +[2026-03-01 08:13:20] (step=0021336) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 4.174525533163765, LR: 0.0003 +[2026-03-01 08:13:28] (step=0021337) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 4.174721189591078, LR: 0.0003 +[2026-03-01 08:13:36] (step=0021338) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.174916846018392, LR: 0.0003 +[2026-03-01 08:13:44] (step=0021339) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.175112502445705, LR: 0.0003 +[2026-03-01 08:13:52] (step=0021340) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.175308158873019, LR: 0.0003 +[2026-03-01 08:13:59] (step=0021341) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.175503815300333, LR: 0.0003 +[2026-03-01 08:14:07] (step=0021342) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.175699471727646, LR: 0.0003 +[2026-03-01 08:14:15] (step=0021343) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 4.17589512815496, LR: 0.0003 +[2026-03-01 08:14:23] (step=0021344) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.176090784582273, LR: 0.0003 +[2026-03-01 08:14:31] (step=0021345) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.176286441009587, LR: 0.0003 +[2026-03-01 08:14:39] (step=0021346) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.176482097436901, LR: 0.0003 +[2026-03-01 08:14:47] (step=0021347) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 4.176677753864214, LR: 0.0003 +[2026-03-01 08:14:55] (step=0021348) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.176873410291528, LR: 0.0003 +[2026-03-01 08:15:02] (step=0021349) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.1770690667188415, LR: 0.0003 +[2026-03-01 08:15:10] (step=0021350) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.1772647231461555, LR: 0.0003 +[2026-03-01 08:15:18] (step=0021351) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.177460379573469, LR: 0.0003 +[2026-03-01 08:15:26] (step=0021352) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.177656036000783, LR: 0.0003 +[2026-03-01 08:15:34] (step=0021353) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.177851692428097, LR: 0.0003 +[2026-03-01 08:15:42] (step=0021354) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.17804734885541, LR: 0.0003 +[2026-03-01 08:15:50] (step=0021355) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.178243005282724, LR: 0.0003 +[2026-03-01 08:15:57] (step=0021356) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.178438661710037, LR: 0.0003 +[2026-03-01 08:16:05] (step=0021357) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.178634318137351, LR: 0.0003 +[2026-03-01 08:16:13] (step=0021358) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 4.178829974564665, LR: 0.0003 +[2026-03-01 08:16:21] (step=0021359) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 4.179025630991978, LR: 0.0003 +[2026-03-01 08:16:29] (step=0021360) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 4.179221287419292, LR: 0.0003 +[2026-03-01 08:16:37] (step=0021361) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.179416943846605, LR: 0.0003 +[2026-03-01 08:16:44] (step=0021362) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.179612600273919, LR: 0.0003 +[2026-03-01 08:16:52] (step=0021363) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.179808256701232, LR: 0.0003 +[2026-03-01 08:17:00] (step=0021364) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.180003913128546, LR: 0.0003 +[2026-03-01 08:17:08] (step=0021365) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.18019956955586, LR: 0.0003 +[2026-03-01 08:17:16] (step=0021366) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 4.180395225983173, LR: 0.0003 +[2026-03-01 08:17:24] (step=0021367) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.180590882410487, LR: 0.0003 +[2026-03-01 08:17:31] (step=0021368) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.1807865388378005, LR: 0.0003 +[2026-03-01 08:17:39] (step=0021369) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.1809821952651145, LR: 0.0003 +[2026-03-01 08:17:47] (step=0021370) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.1811778516924285, LR: 0.0003 +[2026-03-01 08:17:55] (step=0021371) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 4.181373508119742, LR: 0.0003 +[2026-03-01 08:18:03] (step=0021372) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.181569164547056, LR: 0.0003 +[2026-03-01 08:18:11] (step=0021373) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.181764820974369, LR: 0.0003 +[2026-03-01 08:18:18] (step=0021374) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.181960477401683, LR: 0.0003 +[2026-03-01 08:18:26] (step=0021375) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.182156133828996, LR: 0.0003 +[2026-03-01 08:18:34] (step=0021376) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 4.18235179025631, LR: 0.0003 +[2026-03-01 08:18:42] (step=0021377) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.182547446683624, LR: 0.0003 +[2026-03-01 08:18:50] (step=0021378) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.182743103110937, LR: 0.0003 +[2026-03-01 08:18:58] (step=0021379) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.182938759538251, LR: 0.0003 +[2026-03-01 08:19:05] (step=0021380) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.183134415965564, LR: 0.0003 +[2026-03-01 08:19:13] (step=0021381) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 4.183330072392878, LR: 0.0003 +[2026-03-01 08:19:21] (step=0021382) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.183525728820192, LR: 0.0003 +[2026-03-01 08:19:29] (step=0021383) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 4.183721385247505, LR: 0.0003 +[2026-03-01 08:19:37] (step=0021384) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.183917041674819, LR: 0.0003 +[2026-03-01 08:19:45] (step=0021385) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.184112698102132, LR: 0.0003 +[2026-03-01 08:19:53] (step=0021386) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.184308354529446, LR: 0.0003 +[2026-03-01 08:20:00] (step=0021387) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.1845040109567595, LR: 0.0003 +[2026-03-01 08:20:08] (step=0021388) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.1846996673840735, LR: 0.0003 +[2026-03-01 08:20:16] (step=0021389) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.1848953238113875, LR: 0.0003 +[2026-03-01 08:20:24] (step=0021390) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.185090980238701, LR: 0.0003 +[2026-03-01 08:20:32] (step=0021391) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.185286636666015, LR: 0.0003 +[2026-03-01 08:20:40] (step=0021392) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.185482293093328, LR: 0.0003 +[2026-03-01 08:20:48] (step=0021393) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.185677949520642, LR: 0.0003 +[2026-03-01 08:20:55] (step=0021394) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.185873605947956, LR: 0.0003 +[2026-03-01 08:21:03] (step=0021395) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.186069262375269, LR: 0.0003 +[2026-03-01 08:21:11] (step=0021396) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.186264918802583, LR: 0.0003 +[2026-03-01 08:21:19] (step=0021397) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 4.186460575229896, LR: 0.0003 +[2026-03-01 08:21:27] (step=0021398) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.18665623165721, LR: 0.0003 +[2026-03-01 08:21:35] (step=0021399) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.186851888084524, LR: 0.0003 +[2026-03-01 08:21:43] (step=0021400) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.187047544511837, LR: 0.0003 +[2026-03-01 08:21:50] (step=0021401) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.187243200939151, LR: 0.0003 +[2026-03-01 08:21:58] (step=0021402) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.187438857366464, LR: 0.0003 +[2026-03-01 08:22:06] (step=0021403) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 4.187634513793778, LR: 0.0003 +[2026-03-01 08:22:14] (step=0021404) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.187830170221091, LR: 0.0003 +[2026-03-01 08:22:22] (step=0021405) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.188025826648405, LR: 0.0003 +[2026-03-01 08:22:30] (step=0021406) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.188221483075719, LR: 0.0003 +[2026-03-01 08:22:37] (step=0021407) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.1884171395030325, LR: 0.0003 +[2026-03-01 08:22:45] (step=0021408) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 4.1886127959303465, LR: 0.0003 +[2026-03-01 08:22:53] (step=0021409) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.18880845235766, LR: 0.0003 +[2026-03-01 08:23:01] (step=0021410) Train Loss: 0.4700, Train Steps/Sec: 0.13, Epoch: 4.189004108784974, LR: 0.0003 +[2026-03-01 08:23:09] (step=0021411) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.189199765212288, LR: 0.0003 +[2026-03-01 08:23:17] (step=0021412) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 4.189395421639601, LR: 0.0003 +[2026-03-01 08:23:24] (step=0021413) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.189591078066915, LR: 0.0003 +[2026-03-01 08:23:32] (step=0021414) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.189786734494228, LR: 0.0003 +[2026-03-01 08:23:40] (step=0021415) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 4.189982390921542, LR: 0.0003 +[2026-03-01 08:23:48] (step=0021416) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.190178047348855, LR: 0.0003 +[2026-03-01 08:23:56] (step=0021417) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.190373703776169, LR: 0.0003 +[2026-03-01 08:24:04] (step=0021418) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.190569360203483, LR: 0.0003 +[2026-03-01 08:24:12] (step=0021419) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 4.190765016630796, LR: 0.0003 +[2026-03-01 08:24:19] (step=0021420) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.19096067305811, LR: 0.0003 +[2026-03-01 08:24:27] (step=0021421) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.191156329485423, LR: 0.0003 +[2026-03-01 08:24:35] (step=0021422) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.191351985912737, LR: 0.0003 +[2026-03-01 08:24:43] (step=0021423) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.191547642340051, LR: 0.0003 +[2026-03-01 08:24:51] (step=0021424) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.191743298767364, LR: 0.0003 +[2026-03-01 08:24:59] (step=0021425) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.191938955194678, LR: 0.0003 +[2026-03-01 08:25:06] (step=0021426) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.1921346116219915, LR: 0.0003 +[2026-03-01 08:25:14] (step=0021427) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 4.1923302680493055, LR: 0.0003 +[2026-03-01 08:25:22] (step=0021428) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 4.192525924476619, LR: 0.0003 +[2026-03-01 08:25:30] (step=0021429) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.192721580903933, LR: 0.0003 +[2026-03-01 08:25:38] (step=0021430) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.192917237331247, LR: 0.0003 +[2026-03-01 08:25:46] (step=0021431) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 4.19311289375856, LR: 0.0003 +[2026-03-01 08:25:53] (step=0021432) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.193308550185874, LR: 0.0003 +[2026-03-01 08:26:01] (step=0021433) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.193504206613187, LR: 0.0003 +[2026-03-01 08:26:09] (step=0021434) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.193699863040501, LR: 0.0003 +[2026-03-01 08:26:17] (step=0021435) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.193895519467815, LR: 0.0003 +[2026-03-01 08:26:25] (step=0021436) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 4.194091175895128, LR: 0.0003 +[2026-03-01 08:26:33] (step=0021437) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.194286832322442, LR: 0.0003 +[2026-03-01 08:26:41] (step=0021438) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.194482488749755, LR: 0.0003 +[2026-03-01 08:26:48] (step=0021439) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.194678145177069, LR: 0.0003 +[2026-03-01 08:26:56] (step=0021440) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.194873801604382, LR: 0.0003 +[2026-03-01 08:27:04] (step=0021441) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.195069458031696, LR: 0.0003 +[2026-03-01 08:27:12] (step=0021442) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.19526511445901, LR: 0.0003 +[2026-03-01 08:27:20] (step=0021443) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.195460770886323, LR: 0.0003 +[2026-03-01 08:27:28] (step=0021444) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.195656427313637, LR: 0.0003 +[2026-03-01 08:27:36] (step=0021445) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 4.1958520837409505, LR: 0.0003 +[2026-03-01 08:27:43] (step=0021446) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.1960477401682645, LR: 0.0003 +[2026-03-01 08:27:51] (step=0021447) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.1962433965955785, LR: 0.0003 +[2026-03-01 08:27:59] (step=0021448) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.196439053022892, LR: 0.0003 +[2026-03-01 08:28:07] (step=0021449) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.196634709450206, LR: 0.0003 +[2026-03-01 08:28:15] (step=0021450) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.196830365877519, LR: 0.0003 +[2026-03-01 08:28:23] (step=0021451) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.197026022304833, LR: 0.0003 +[2026-03-01 08:28:31] (step=0021452) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 4.197221678732146, LR: 0.0003 +[2026-03-01 08:28:38] (step=0021453) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.19741733515946, LR: 0.0003 +[2026-03-01 08:28:46] (step=0021454) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.197612991586774, LR: 0.0003 +[2026-03-01 08:28:54] (step=0021455) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.197808648014087, LR: 0.0003 +[2026-03-01 08:29:02] (step=0021456) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.198004304441401, LR: 0.0003 +[2026-03-01 08:29:10] (step=0021457) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.198199960868714, LR: 0.0003 +[2026-03-01 08:29:18] (step=0021458) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 4.198395617296028, LR: 0.0003 +[2026-03-01 08:29:26] (step=0021459) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 4.198591273723342, LR: 0.0003 +[2026-03-01 08:29:33] (step=0021460) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 4.198786930150655, LR: 0.0003 +[2026-03-01 08:29:41] (step=0021461) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.198982586577969, LR: 0.0003 +[2026-03-01 08:29:49] (step=0021462) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.199178243005282, LR: 0.0003 +[2026-03-01 08:29:57] (step=0021463) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.199373899432596, LR: 0.0003 +[2026-03-01 08:30:05] (step=0021464) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 4.19956955585991, LR: 0.0003 +[2026-03-01 08:30:13] (step=0021465) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.1997652122872235, LR: 0.0003 +[2026-03-01 08:30:20] (step=0021466) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.1999608687145376, LR: 0.0003 +[2026-03-01 08:30:28] (step=0021467) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.200156525141851, LR: 0.0003 +[2026-03-01 08:30:36] (step=0021468) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.200352181569165, LR: 0.0003 +[2026-03-01 08:30:44] (step=0021469) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 4.200547837996478, LR: 0.0003 +[2026-03-01 08:30:52] (step=0021470) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.200743494423792, LR: 0.0003 +[2026-03-01 08:31:00] (step=0021471) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.200939150851106, LR: 0.0003 +[2026-03-01 08:31:07] (step=0021472) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 4.201134807278419, LR: 0.0003 +[2026-03-01 08:31:15] (step=0021473) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.201330463705733, LR: 0.0003 +[2026-03-01 08:31:23] (step=0021474) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 4.201526120133046, LR: 0.0003 +[2026-03-01 08:31:31] (step=0021475) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 4.20172177656036, LR: 0.0003 +[2026-03-01 08:31:39] (step=0021476) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.201917432987674, LR: 0.0003 +[2026-03-01 08:31:47] (step=0021477) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.202113089414987, LR: 0.0003 +[2026-03-01 08:31:55] (step=0021478) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.202308745842301, LR: 0.0003 +[2026-03-01 08:32:02] (step=0021479) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.202504402269614, LR: 0.0003 +[2026-03-01 08:32:10] (step=0021480) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.202700058696928, LR: 0.0003 +[2026-03-01 08:32:18] (step=0021481) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.202895715124241, LR: 0.0003 +[2026-03-01 08:32:26] (step=0021482) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 4.203091371551555, LR: 0.0003 +[2026-03-01 08:32:34] (step=0021483) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 4.203287027978869, LR: 0.0003 +[2026-03-01 08:32:42] (step=0021484) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 4.203482684406183, LR: 0.0003 +[2026-03-01 08:32:49] (step=0021485) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.203678340833497, LR: 0.0003 +[2026-03-01 08:32:57] (step=0021486) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.20387399726081, LR: 0.0003 +[2026-03-01 08:33:05] (step=0021487) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.204069653688124, LR: 0.0003 +[2026-03-01 08:33:13] (step=0021488) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 4.204265310115438, LR: 0.0003 +[2026-03-01 08:33:21] (step=0021489) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 4.204460966542751, LR: 0.0003 +[2026-03-01 08:33:29] (step=0021490) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 4.204656622970065, LR: 0.0003 +[2026-03-01 08:33:37] (step=0021491) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.204852279397378, LR: 0.0003 +[2026-03-01 08:33:44] (step=0021492) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.205047935824692, LR: 0.0003 +[2026-03-01 08:33:52] (step=0021493) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.205243592252005, LR: 0.0003 +[2026-03-01 08:34:00] (step=0021494) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.205439248679319, LR: 0.0003 +[2026-03-01 08:34:08] (step=0021495) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.205634905106633, LR: 0.0003 +[2026-03-01 08:34:16] (step=0021496) Train Loss: 0.4583, Train Steps/Sec: 0.12, Epoch: 4.205830561533946, LR: 0.0003 +[2026-03-01 08:34:24] (step=0021497) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 4.20602621796126, LR: 0.0003 +[2026-03-01 08:34:32] (step=0021498) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.206221874388573, LR: 0.0003 +[2026-03-01 08:34:39] (step=0021499) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.206417530815887, LR: 0.0003 +[2026-03-01 08:34:47] (step=0021500) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 4.206613187243201, LR: 0.0003 +[2026-03-01 08:34:47] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0021500/ +[2026-03-01 08:34:55] (step=0021501) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.206808843670514, LR: 0.0003 +[2026-03-01 08:35:03] (step=0021502) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.2070045000978284, LR: 0.0003 +[2026-03-01 08:35:11] (step=0021503) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 4.207200156525142, LR: 0.0003 +[2026-03-01 08:35:19] (step=0021504) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.207395812952456, LR: 0.0003 +[2026-03-01 08:35:27] (step=0021505) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.207591469379769, LR: 0.0003 +[2026-03-01 08:35:34] (step=0021506) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 4.207787125807083, LR: 0.0003 +[2026-03-01 08:35:42] (step=0021507) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.207982782234397, LR: 0.0003 +[2026-03-01 08:35:50] (step=0021508) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.20817843866171, LR: 0.0003 +[2026-03-01 08:35:58] (step=0021509) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.208374095089024, LR: 0.0003 +[2026-03-01 08:36:06] (step=0021510) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 4.208569751516337, LR: 0.0003 +[2026-03-01 08:36:14] (step=0021511) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.208765407943651, LR: 0.0003 +[2026-03-01 08:36:21] (step=0021512) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.208961064370965, LR: 0.0003 +[2026-03-01 08:36:29] (step=0021513) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.209156720798278, LR: 0.0003 +[2026-03-01 08:36:37] (step=0021514) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.209352377225592, LR: 0.0003 +[2026-03-01 08:36:45] (step=0021515) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.209548033652905, LR: 0.0003 +[2026-03-01 08:36:53] (step=0021516) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.209743690080219, LR: 0.0003 +[2026-03-01 08:37:01] (step=0021517) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 4.209939346507533, LR: 0.0003 +[2026-03-01 08:37:09] (step=0021518) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.210135002934846, LR: 0.0003 +[2026-03-01 08:37:16] (step=0021519) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.21033065936216, LR: 0.0003 +[2026-03-01 08:37:24] (step=0021520) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 4.2105263157894735, LR: 0.0003 +[2026-03-01 08:37:32] (step=0021521) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.2107219722167875, LR: 0.0003 +[2026-03-01 08:37:40] (step=0021522) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.210917628644101, LR: 0.0003 +[2026-03-01 08:37:48] (step=0021523) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.211113285071415, LR: 0.0003 +[2026-03-01 08:37:56] (step=0021524) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.211308941498729, LR: 0.0003 +[2026-03-01 08:38:03] (step=0021525) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 4.211504597926042, LR: 0.0003 +[2026-03-01 08:38:11] (step=0021526) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.211700254353356, LR: 0.0003 +[2026-03-01 08:38:19] (step=0021527) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 4.211895910780669, LR: 0.0003 +[2026-03-01 08:38:27] (step=0021528) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.212091567207983, LR: 0.0003 +[2026-03-01 08:38:35] (step=0021529) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.212287223635297, LR: 0.0003 +[2026-03-01 08:38:43] (step=0021530) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.21248288006261, LR: 0.0003 +[2026-03-01 08:38:50] (step=0021531) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.212678536489924, LR: 0.0003 +[2026-03-01 08:38:58] (step=0021532) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.212874192917237, LR: 0.0003 +[2026-03-01 08:39:06] (step=0021533) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 4.213069849344551, LR: 0.0003 +[2026-03-01 08:39:14] (step=0021534) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.213265505771864, LR: 0.0003 +[2026-03-01 08:39:22] (step=0021535) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.213461162199178, LR: 0.0003 +[2026-03-01 08:39:30] (step=0021536) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 4.213656818626492, LR: 0.0003 +[2026-03-01 08:39:37] (step=0021537) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.213852475053805, LR: 0.0003 +[2026-03-01 08:39:45] (step=0021538) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.214048131481119, LR: 0.0003 +[2026-03-01 08:39:53] (step=0021539) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 4.2142437879084325, LR: 0.0003 +[2026-03-01 08:40:01] (step=0021540) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.2144394443357465, LR: 0.0003 +[2026-03-01 08:40:09] (step=0021541) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 4.2146351007630605, LR: 0.0003 +[2026-03-01 08:40:17] (step=0021542) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.214830757190374, LR: 0.0003 +[2026-03-01 08:40:25] (step=0021543) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.215026413617688, LR: 0.0003 +[2026-03-01 08:40:33] (step=0021544) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.215222070045001, LR: 0.0003 +[2026-03-01 08:40:40] (step=0021545) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.215417726472315, LR: 0.0003 +[2026-03-01 08:40:48] (step=0021546) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.215613382899628, LR: 0.0003 +[2026-03-01 08:40:56] (step=0021547) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.215809039326942, LR: 0.0003 +[2026-03-01 08:41:04] (step=0021548) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.216004695754256, LR: 0.0003 +[2026-03-01 08:41:12] (step=0021549) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 4.216200352181569, LR: 0.0003 +[2026-03-01 08:41:20] (step=0021550) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.216396008608883, LR: 0.0003 +[2026-03-01 08:41:28] (step=0021551) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 4.216591665036196, LR: 0.0003 +[2026-03-01 08:41:35] (step=0021552) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.21678732146351, LR: 0.0003 +[2026-03-01 08:41:43] (step=0021553) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.216982977890824, LR: 0.0003 +[2026-03-01 08:41:51] (step=0021554) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.217178634318137, LR: 0.0003 +[2026-03-01 08:41:59] (step=0021555) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.217374290745451, LR: 0.0003 +[2026-03-01 08:42:07] (step=0021556) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 4.217569947172764, LR: 0.0003 +[2026-03-01 08:42:15] (step=0021557) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 4.217765603600078, LR: 0.0003 +[2026-03-01 08:42:22] (step=0021558) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 4.2179612600273915, LR: 0.0003 +[2026-03-01 08:42:30] (step=0021559) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.2181569164547055, LR: 0.0003 +[2026-03-01 08:42:38] (step=0021560) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 4.2183525728820195, LR: 0.0003 +[2026-03-01 08:42:46] (step=0021561) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.218548229309333, LR: 0.0003 +[2026-03-01 08:42:54] (step=0021562) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 4.218743885736647, LR: 0.0003 +[2026-03-01 08:43:02] (step=0021563) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.21893954216396, LR: 0.0003 +[2026-03-01 08:43:09] (step=0021564) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.219135198591274, LR: 0.0003 +[2026-03-01 08:43:17] (step=0021565) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.219330855018588, LR: 0.0003 +[2026-03-01 08:43:25] (step=0021566) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.219526511445901, LR: 0.0003 +[2026-03-01 08:43:33] (step=0021567) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.219722167873215, LR: 0.0003 +[2026-03-01 08:43:41] (step=0021568) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 4.219917824300528, LR: 0.0003 +[2026-03-01 08:43:49] (step=0021569) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 4.220113480727842, LR: 0.0003 +[2026-03-01 08:43:56] (step=0021570) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.220309137155156, LR: 0.0003 +[2026-03-01 08:44:04] (step=0021571) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 4.220504793582469, LR: 0.0003 +[2026-03-01 08:44:12] (step=0021572) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.220700450009783, LR: 0.0003 +[2026-03-01 08:44:20] (step=0021573) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 4.220896106437096, LR: 0.0003 +[2026-03-01 08:44:28] (step=0021574) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 4.22109176286441, LR: 0.0003 +[2026-03-01 08:44:36] (step=0021575) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.221287419291723, LR: 0.0003 +[2026-03-01 08:44:43] (step=0021576) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 4.221483075719037, LR: 0.0003 +[2026-03-01 08:44:51] (step=0021577) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 4.221678732146351, LR: 0.0003 +[2026-03-01 08:44:59] (step=0021578) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 4.2218743885736645, LR: 0.0003 +[2026-03-01 08:45:07] (step=0021579) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.2220700450009785, LR: 0.0003 +[2026-03-01 08:45:15] (step=0021580) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 4.222265701428292, LR: 0.0003 +[2026-03-01 08:45:23] (step=0021581) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.222461357855606, LR: 0.0003 +[2026-03-01 08:45:31] (step=0021582) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 4.22265701428292, LR: 0.0003 +[2026-03-01 08:45:38] (step=0021583) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 4.222852670710233, LR: 0.0003 +[2026-03-01 08:45:46] (step=0021584) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 4.223048327137547, LR: 0.0003 +[2026-03-01 08:45:54] (step=0021585) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.22324398356486, LR: 0.0003 +[2026-03-01 08:46:02] (step=0021586) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.223439639992174, LR: 0.0003 +[2026-03-01 08:46:10] (step=0021587) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.223635296419487, LR: 0.0003 +[2026-03-01 08:46:18] (step=0021588) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 4.223830952846801, LR: 0.0003 +[2026-03-01 08:46:25] (step=0021589) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.224026609274115, LR: 0.0003 +[2026-03-01 08:46:33] (step=0021590) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.224222265701428, LR: 0.0003 +[2026-03-01 08:46:41] (step=0021591) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.224417922128742, LR: 0.0003 +[2026-03-01 08:46:49] (step=0021592) Train Loss: 0.4470, Train Steps/Sec: 0.12, Epoch: 4.224613578556055, LR: 0.0003 +[2026-03-01 08:46:57] (step=0021593) Train Loss: 0.4247, Train Steps/Sec: 0.13, Epoch: 4.224809234983369, LR: 0.0003 +[2026-03-01 08:47:05] (step=0021594) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 4.225004891410683, LR: 0.0003 +[2026-03-01 08:47:13] (step=0021595) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.225200547837996, LR: 0.0003 +[2026-03-01 08:47:21] (step=0021596) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 4.22539620426531, LR: 0.0003 +[2026-03-01 08:47:28] (step=0021597) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.2255918606926235, LR: 0.0003 +[2026-03-01 08:47:36] (step=0021598) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.2257875171199375, LR: 0.0003 +[2026-03-01 08:47:44] (step=0021599) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.225983173547251, LR: 0.0003 +[2026-03-01 08:47:52] (step=0021600) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.226178829974565, LR: 0.0003 +[2026-03-01 08:48:00] (step=0021601) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.226374486401879, LR: 0.0003 +[2026-03-01 08:48:08] (step=0021602) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.226570142829192, LR: 0.0003 +[2026-03-01 08:48:15] (step=0021603) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.226765799256506, LR: 0.0003 +[2026-03-01 08:48:23] (step=0021604) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.226961455683819, LR: 0.0003 +[2026-03-01 08:48:31] (step=0021605) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.227157112111133, LR: 0.0003 +[2026-03-01 08:48:39] (step=0021606) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 4.227352768538447, LR: 0.0003 +[2026-03-01 08:48:47] (step=0021607) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.22754842496576, LR: 0.0003 +[2026-03-01 08:48:55] (step=0021608) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.227744081393074, LR: 0.0003 +[2026-03-01 08:49:03] (step=0021609) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.227939737820387, LR: 0.0003 +[2026-03-01 08:49:10] (step=0021610) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.228135394247701, LR: 0.0003 +[2026-03-01 08:49:18] (step=0021611) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.228331050675014, LR: 0.0003 +[2026-03-01 08:49:26] (step=0021612) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.228526707102328, LR: 0.0003 +[2026-03-01 08:49:34] (step=0021613) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.228722363529642, LR: 0.0003 +[2026-03-01 08:49:42] (step=0021614) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 4.228918019956955, LR: 0.0003 +[2026-03-01 08:49:50] (step=0021615) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 4.229113676384269, LR: 0.0003 +[2026-03-01 08:49:57] (step=0021616) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 4.2293093328115825, LR: 0.0003 +[2026-03-01 08:50:05] (step=0021617) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 4.2295049892388965, LR: 0.0003 +[2026-03-01 08:50:13] (step=0021618) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.2297006456662105, LR: 0.0003 +[2026-03-01 08:50:21] (step=0021619) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.229896302093524, LR: 0.0003 +[2026-03-01 08:50:29] (step=0021620) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.230091958520838, LR: 0.0003 +[2026-03-01 08:50:37] (step=0021621) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 4.230287614948151, LR: 0.0003 +[2026-03-01 08:50:44] (step=0021622) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 4.230483271375465, LR: 0.0003 +[2026-03-01 08:50:52] (step=0021623) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.230678927802778, LR: 0.0003 +[2026-03-01 08:51:00] (step=0021624) Train Loss: 0.4239, Train Steps/Sec: 0.13, Epoch: 4.230874584230092, LR: 0.0003 +[2026-03-01 08:51:08] (step=0021625) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.231070240657406, LR: 0.0003 +[2026-03-01 08:51:16] (step=0021626) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.231265897084719, LR: 0.0003 +[2026-03-01 08:51:24] (step=0021627) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.231461553512033, LR: 0.0003 +[2026-03-01 08:51:32] (step=0021628) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.231657209939346, LR: 0.0003 +[2026-03-01 08:51:39] (step=0021629) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.23185286636666, LR: 0.0003 +[2026-03-01 08:51:47] (step=0021630) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.232048522793974, LR: 0.0003 +[2026-03-01 08:51:55] (step=0021631) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 4.232244179221287, LR: 0.0003 +[2026-03-01 08:52:03] (step=0021632) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.232439835648601, LR: 0.0003 +[2026-03-01 08:52:11] (step=0021633) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 4.232635492075914, LR: 0.0003 +[2026-03-01 08:52:19] (step=0021634) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.232831148503228, LR: 0.0003 +[2026-03-01 08:52:26] (step=0021635) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.233026804930542, LR: 0.0003 +[2026-03-01 08:52:34] (step=0021636) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 4.2332224613578555, LR: 0.0003 +[2026-03-01 08:52:42] (step=0021637) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.2334181177851695, LR: 0.0003 +[2026-03-01 08:52:50] (step=0021638) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.233613774212483, LR: 0.0003 +[2026-03-01 08:52:58] (step=0021639) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 4.233809430639797, LR: 0.0003 +[2026-03-01 08:53:06] (step=0021640) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.23400508706711, LR: 0.0003 +[2026-03-01 08:53:14] (step=0021641) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 4.234200743494424, LR: 0.0003 +[2026-03-01 08:53:21] (step=0021642) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.234396399921738, LR: 0.0003 +[2026-03-01 08:53:29] (step=0021643) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.234592056349051, LR: 0.0003 +[2026-03-01 08:53:37] (step=0021644) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.234787712776365, LR: 0.0003 +[2026-03-01 08:53:45] (step=0021645) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 4.234983369203678, LR: 0.0003 +[2026-03-01 08:53:53] (step=0021646) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.235179025630992, LR: 0.0003 +[2026-03-01 08:54:01] (step=0021647) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 4.235374682058306, LR: 0.0003 +[2026-03-01 08:54:09] (step=0021648) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.235570338485619, LR: 0.0003 +[2026-03-01 08:54:16] (step=0021649) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.235765994912933, LR: 0.0003 +[2026-03-01 08:54:24] (step=0021650) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.235961651340246, LR: 0.0003 +[2026-03-01 08:54:32] (step=0021651) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.23615730776756, LR: 0.0003 +[2026-03-01 08:54:40] (step=0021652) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.236352964194873, LR: 0.0003 +[2026-03-01 08:54:48] (step=0021653) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.236548620622187, LR: 0.0003 +[2026-03-01 08:54:56] (step=0021654) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 4.236744277049501, LR: 0.0003 +[2026-03-01 08:55:04] (step=0021655) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.2369399334768145, LR: 0.0003 +[2026-03-01 08:55:11] (step=0021656) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 4.2371355899041285, LR: 0.0003 +[2026-03-01 08:55:19] (step=0021657) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.237331246331442, LR: 0.0003 +[2026-03-01 08:55:27] (step=0021658) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.237526902758756, LR: 0.0003 +[2026-03-01 08:55:35] (step=0021659) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.23772255918607, LR: 0.0003 +[2026-03-01 08:55:43] (step=0021660) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.237918215613383, LR: 0.0003 +[2026-03-01 08:55:51] (step=0021661) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 4.238113872040697, LR: 0.0003 +[2026-03-01 08:55:58] (step=0021662) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.23830952846801, LR: 0.0003 +[2026-03-01 08:56:06] (step=0021663) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.238505184895324, LR: 0.0003 +[2026-03-01 08:56:14] (step=0021664) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.238700841322637, LR: 0.0003 +[2026-03-01 08:56:22] (step=0021665) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.238896497749951, LR: 0.0003 +[2026-03-01 08:56:30] (step=0021666) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 4.239092154177265, LR: 0.0003 +[2026-03-01 08:56:38] (step=0021667) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 4.239287810604578, LR: 0.0003 +[2026-03-01 08:56:45] (step=0021668) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.239483467031892, LR: 0.0003 +[2026-03-01 08:56:53] (step=0021669) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.239679123459205, LR: 0.0003 +[2026-03-01 08:57:01] (step=0021670) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.239874779886519, LR: 0.0003 +[2026-03-01 08:57:09] (step=0021671) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.240070436313833, LR: 0.0003 +[2026-03-01 08:57:17] (step=0021672) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 4.240266092741146, LR: 0.0003 +[2026-03-01 08:57:25] (step=0021673) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.24046174916846, LR: 0.0003 +[2026-03-01 08:57:33] (step=0021674) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.2406574055957735, LR: 0.0003 +[2026-03-01 08:57:40] (step=0021675) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 4.2408530620230875, LR: 0.0003 +[2026-03-01 08:57:48] (step=0021676) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.241048718450401, LR: 0.0003 +[2026-03-01 08:57:56] (step=0021677) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.241244374877715, LR: 0.0003 +[2026-03-01 08:58:04] (step=0021678) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 4.241440031305029, LR: 0.0003 +[2026-03-01 08:58:12] (step=0021679) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.241635687732342, LR: 0.0003 +[2026-03-01 08:58:20] (step=0021680) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.241831344159656, LR: 0.0003 +[2026-03-01 08:58:27] (step=0021681) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 4.242027000586969, LR: 0.0003 +[2026-03-01 08:58:35] (step=0021682) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.242222657014283, LR: 0.0003 +[2026-03-01 08:58:43] (step=0021683) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.242418313441597, LR: 0.0003 +[2026-03-01 08:58:51] (step=0021684) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.24261396986891, LR: 0.0003 +[2026-03-01 08:58:59] (step=0021685) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.242809626296224, LR: 0.0003 +[2026-03-01 08:59:07] (step=0021686) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 4.243005282723537, LR: 0.0003 +[2026-03-01 08:59:14] (step=0021687) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.243200939150851, LR: 0.0003 +[2026-03-01 08:59:22] (step=0021688) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.243396595578165, LR: 0.0003 +[2026-03-01 08:59:30] (step=0021689) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.243592252005478, LR: 0.0003 +[2026-03-01 08:59:38] (step=0021690) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.243787908432792, LR: 0.0003 +[2026-03-01 08:59:46] (step=0021691) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.243983564860105, LR: 0.0003 +[2026-03-01 08:59:54] (step=0021692) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.244179221287419, LR: 0.0003 +[2026-03-01 09:00:02] (step=0021693) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.2443748777147325, LR: 0.0003 +[2026-03-01 09:00:09] (step=0021694) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 4.2445705341420465, LR: 0.0003 +[2026-03-01 09:00:17] (step=0021695) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.2447661905693606, LR: 0.0003 +[2026-03-01 09:00:25] (step=0021696) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.244961846996674, LR: 0.0003 +[2026-03-01 09:00:33] (step=0021697) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.245157503423988, LR: 0.0003 +[2026-03-01 09:00:41] (step=0021698) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.245353159851301, LR: 0.0003 +[2026-03-01 09:00:49] (step=0021699) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 4.245548816278615, LR: 0.0003 +[2026-03-01 09:00:57] (step=0021700) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.245744472705929, LR: 0.0003 +[2026-03-01 09:01:04] (step=0021701) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.245940129133242, LR: 0.0003 +[2026-03-01 09:01:12] (step=0021702) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.246135785560556, LR: 0.0003 +[2026-03-01 09:01:20] (step=0021703) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.246331441987869, LR: 0.0003 +[2026-03-01 09:01:28] (step=0021704) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.246527098415183, LR: 0.0003 +[2026-03-01 09:01:36] (step=0021705) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 4.246722754842496, LR: 0.0003 +[2026-03-01 09:01:44] (step=0021706) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.24691841126981, LR: 0.0003 +[2026-03-01 09:01:52] (step=0021707) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.247114067697124, LR: 0.0003 +[2026-03-01 09:01:59] (step=0021708) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.247309724124437, LR: 0.0003 +[2026-03-01 09:02:07] (step=0021709) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 4.247505380551751, LR: 0.0003 +[2026-03-01 09:02:15] (step=0021710) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.247701036979064, LR: 0.0003 +[2026-03-01 09:02:23] (step=0021711) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.247896693406378, LR: 0.0003 +[2026-03-01 09:02:31] (step=0021712) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 4.248092349833692, LR: 0.0003 +[2026-03-01 09:02:39] (step=0021713) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 4.248288006261006, LR: 0.0003 +[2026-03-01 09:02:46] (step=0021714) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 4.24848366268832, LR: 0.0003 +[2026-03-01 09:02:54] (step=0021715) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.248679319115633, LR: 0.0003 +[2026-03-01 09:03:02] (step=0021716) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.248874975542947, LR: 0.0003 +[2026-03-01 09:03:10] (step=0021717) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.24907063197026, LR: 0.0003 +[2026-03-01 09:03:18] (step=0021718) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.249266288397574, LR: 0.0003 +[2026-03-01 09:03:26] (step=0021719) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 4.249461944824888, LR: 0.0003 +[2026-03-01 09:03:33] (step=0021720) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 4.249657601252201, LR: 0.0003 +[2026-03-01 09:03:41] (step=0021721) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.249853257679515, LR: 0.0003 +[2026-03-01 09:03:49] (step=0021722) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.250048914106828, LR: 0.0003 +[2026-03-01 09:03:57] (step=0021723) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.250244570534142, LR: 0.0003 +[2026-03-01 09:04:05] (step=0021724) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.250440226961456, LR: 0.0003 +[2026-03-01 09:04:13] (step=0021725) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.250635883388769, LR: 0.0003 +[2026-03-01 09:04:21] (step=0021726) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.250831539816083, LR: 0.0003 +[2026-03-01 09:04:28] (step=0021727) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.251027196243396, LR: 0.0003 +[2026-03-01 09:04:36] (step=0021728) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.25122285267071, LR: 0.0003 +[2026-03-01 09:04:44] (step=0021729) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.251418509098023, LR: 0.0003 +[2026-03-01 09:04:52] (step=0021730) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.251614165525337, LR: 0.0003 +[2026-03-01 09:05:00] (step=0021731) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.2518098219526514, LR: 0.0003 +[2026-03-01 09:05:08] (step=0021732) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.252005478379965, LR: 0.0003 +[2026-03-01 09:05:15] (step=0021733) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.252201134807279, LR: 0.0003 +[2026-03-01 09:05:23] (step=0021734) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 4.252396791234592, LR: 0.0003 +[2026-03-01 09:05:31] (step=0021735) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 4.252592447661906, LR: 0.0003 +[2026-03-01 09:05:39] (step=0021736) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.25278810408922, LR: 0.0003 +[2026-03-01 09:05:47] (step=0021737) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.252983760516533, LR: 0.0003 +[2026-03-01 09:05:55] (step=0021738) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.253179416943847, LR: 0.0003 +[2026-03-01 09:06:02] (step=0021739) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.25337507337116, LR: 0.0003 +[2026-03-01 09:06:10] (step=0021740) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.253570729798474, LR: 0.0003 +[2026-03-01 09:06:18] (step=0021741) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.253766386225788, LR: 0.0003 +[2026-03-01 09:06:26] (step=0021742) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.253962042653101, LR: 0.0003 +[2026-03-01 09:06:34] (step=0021743) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.254157699080415, LR: 0.0003 +[2026-03-01 09:06:42] (step=0021744) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 4.254353355507728, LR: 0.0003 +[2026-03-01 09:06:50] (step=0021745) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.254549011935042, LR: 0.0003 +[2026-03-01 09:06:58] (step=0021746) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.254744668362355, LR: 0.0003 +[2026-03-01 09:07:05] (step=0021747) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.254940324789669, LR: 0.0003 +[2026-03-01 09:07:13] (step=0021748) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.255135981216983, LR: 0.0003 +[2026-03-01 09:07:21] (step=0021749) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.2553316376442964, LR: 0.0003 +[2026-03-01 09:07:29] (step=0021750) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.2555272940716105, LR: 0.0003 +[2026-03-01 09:07:37] (step=0021751) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.255722950498924, LR: 0.0003 +[2026-03-01 09:07:45] (step=0021752) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.255918606926238, LR: 0.0003 +[2026-03-01 09:07:52] (step=0021753) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.256114263353552, LR: 0.0003 +[2026-03-01 09:08:00] (step=0021754) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.256309919780865, LR: 0.0003 +[2026-03-01 09:08:08] (step=0021755) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.256505576208179, LR: 0.0003 +[2026-03-01 09:08:16] (step=0021756) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 4.256701232635492, LR: 0.0003 +[2026-03-01 09:08:24] (step=0021757) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.256896889062806, LR: 0.0003 +[2026-03-01 09:08:32] (step=0021758) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 4.257092545490119, LR: 0.0003 +[2026-03-01 09:08:39] (step=0021759) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.257288201917433, LR: 0.0003 +[2026-03-01 09:08:47] (step=0021760) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.257483858344747, LR: 0.0003 +[2026-03-01 09:08:55] (step=0021761) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.25767951477206, LR: 0.0003 +[2026-03-01 09:09:03] (step=0021762) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 4.257875171199374, LR: 0.0003 +[2026-03-01 09:09:11] (step=0021763) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.258070827626687, LR: 0.0003 +[2026-03-01 09:09:19] (step=0021764) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.258266484054001, LR: 0.0003 +[2026-03-01 09:09:27] (step=0021765) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 4.258462140481315, LR: 0.0003 +[2026-03-01 09:09:34] (step=0021766) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.258657796908628, LR: 0.0003 +[2026-03-01 09:09:42] (step=0021767) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 4.258853453335942, LR: 0.0003 +[2026-03-01 09:09:50] (step=0021768) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.2590491097632555, LR: 0.0003 +[2026-03-01 09:09:58] (step=0021769) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.2592447661905695, LR: 0.0003 +[2026-03-01 09:10:06] (step=0021770) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.259440422617883, LR: 0.0003 +[2026-03-01 09:10:14] (step=0021771) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 4.259636079045197, LR: 0.0003 +[2026-03-01 09:10:21] (step=0021772) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 4.259831735472511, LR: 0.0003 +[2026-03-01 09:10:29] (step=0021773) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 4.260027391899824, LR: 0.0003 +[2026-03-01 09:10:37] (step=0021774) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.260223048327138, LR: 0.0003 +[2026-03-01 09:10:45] (step=0021775) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 4.260418704754451, LR: 0.0003 +[2026-03-01 09:10:53] (step=0021776) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.260614361181765, LR: 0.0003 +[2026-03-01 09:11:01] (step=0021777) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 4.260810017609079, LR: 0.0003 +[2026-03-01 09:11:09] (step=0021778) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.261005674036392, LR: 0.0003 +[2026-03-01 09:11:16] (step=0021779) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 4.261201330463706, LR: 0.0003 +[2026-03-01 09:11:24] (step=0021780) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.261396986891019, LR: 0.0003 +[2026-03-01 09:11:32] (step=0021781) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 4.261592643318333, LR: 0.0003 +[2026-03-01 09:11:40] (step=0021782) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.261788299745646, LR: 0.0003 +[2026-03-01 09:11:48] (step=0021783) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.26198395617296, LR: 0.0003 +[2026-03-01 09:11:56] (step=0021784) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.262179612600274, LR: 0.0003 +[2026-03-01 09:12:03] (step=0021785) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.262375269027587, LR: 0.0003 +[2026-03-01 09:12:11] (step=0021786) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.262570925454901, LR: 0.0003 +[2026-03-01 09:12:19] (step=0021787) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.2627665818822145, LR: 0.0003 +[2026-03-01 09:12:27] (step=0021788) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 4.2629622383095285, LR: 0.0003 +[2026-03-01 09:12:35] (step=0021789) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.2631578947368425, LR: 0.0003 +[2026-03-01 09:12:43] (step=0021790) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.263353551164156, LR: 0.0003 +[2026-03-01 09:12:51] (step=0021791) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.26354920759147, LR: 0.0003 +[2026-03-01 09:12:58] (step=0021792) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.263744864018783, LR: 0.0003 +[2026-03-01 09:13:06] (step=0021793) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.263940520446097, LR: 0.0003 +[2026-03-01 09:13:14] (step=0021794) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.264136176873411, LR: 0.0003 +[2026-03-01 09:13:22] (step=0021795) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.264331833300724, LR: 0.0003 +[2026-03-01 09:13:30] (step=0021796) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 4.264527489728038, LR: 0.0003 +[2026-03-01 09:13:38] (step=0021797) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.264723146155351, LR: 0.0003 +[2026-03-01 09:13:45] (step=0021798) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 4.264918802582665, LR: 0.0003 +[2026-03-01 09:13:53] (step=0021799) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.265114459009978, LR: 0.0003 +[2026-03-01 09:14:01] (step=0021800) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.265310115437292, LR: 0.0003 +[2026-03-01 09:14:09] (step=0021801) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.265505771864606, LR: 0.0003 +[2026-03-01 09:14:17] (step=0021802) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.265701428291919, LR: 0.0003 +[2026-03-01 09:14:25] (step=0021803) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.265897084719233, LR: 0.0003 +[2026-03-01 09:14:33] (step=0021804) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 4.266092741146546, LR: 0.0003 +[2026-03-01 09:14:40] (step=0021805) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 4.26628839757386, LR: 0.0003 +[2026-03-01 09:14:48] (step=0021806) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.266484054001174, LR: 0.0003 +[2026-03-01 09:14:56] (step=0021807) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 4.2666797104284875, LR: 0.0003 +[2026-03-01 09:15:04] (step=0021808) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 4.2668753668558015, LR: 0.0003 +[2026-03-01 09:15:12] (step=0021809) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.267071023283115, LR: 0.0003 +[2026-03-01 09:15:20] (step=0021810) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 4.267266679710429, LR: 0.0003 +[2026-03-01 09:15:28] (step=0021811) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.267462336137742, LR: 0.0003 +[2026-03-01 09:15:35] (step=0021812) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.267657992565056, LR: 0.0003 +[2026-03-01 09:15:43] (step=0021813) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.26785364899237, LR: 0.0003 +[2026-03-01 09:15:51] (step=0021814) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.268049305419683, LR: 0.0003 +[2026-03-01 09:15:59] (step=0021815) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.268244961846997, LR: 0.0003 +[2026-03-01 09:16:07] (step=0021816) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.26844061827431, LR: 0.0003 +[2026-03-01 09:16:15] (step=0021817) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.268636274701624, LR: 0.0003 +[2026-03-01 09:16:22] (step=0021818) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.268831931128938, LR: 0.0003 +[2026-03-01 09:16:30] (step=0021819) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 4.269027587556251, LR: 0.0003 +[2026-03-01 09:16:38] (step=0021820) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.269223243983565, LR: 0.0003 +[2026-03-01 09:16:46] (step=0021821) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 4.269418900410878, LR: 0.0003 +[2026-03-01 09:16:54] (step=0021822) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.269614556838192, LR: 0.0003 +[2026-03-01 09:17:02] (step=0021823) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 4.269810213265505, LR: 0.0003 +[2026-03-01 09:17:09] (step=0021824) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.270005869692819, LR: 0.0003 +[2026-03-01 09:17:17] (step=0021825) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.270201526120133, LR: 0.0003 +[2026-03-01 09:17:25] (step=0021826) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.2703971825474465, LR: 0.0003 +[2026-03-01 09:17:33] (step=0021827) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.2705928389747605, LR: 0.0003 +[2026-03-01 09:17:41] (step=0021828) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 4.270788495402074, LR: 0.0003 +[2026-03-01 09:17:49] (step=0021829) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.270984151829388, LR: 0.0003 +[2026-03-01 09:17:57] (step=0021830) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.271179808256702, LR: 0.0003 +[2026-03-01 09:18:04] (step=0021831) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.271375464684015, LR: 0.0003 +[2026-03-01 09:18:12] (step=0021832) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 4.271571121111329, LR: 0.0003 +[2026-03-01 09:18:20] (step=0021833) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.271766777538642, LR: 0.0003 +[2026-03-01 09:18:28] (step=0021834) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 4.271962433965956, LR: 0.0003 +[2026-03-01 09:18:36] (step=0021835) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.272158090393269, LR: 0.0003 +[2026-03-01 09:18:44] (step=0021836) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.272353746820583, LR: 0.0003 +[2026-03-01 09:18:51] (step=0021837) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.272549403247897, LR: 0.0003 +[2026-03-01 09:18:59] (step=0021838) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 4.27274505967521, LR: 0.0003 +[2026-03-01 09:19:07] (step=0021839) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 4.272940716102524, LR: 0.0003 +[2026-03-01 09:19:15] (step=0021840) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.273136372529837, LR: 0.0003 +[2026-03-01 09:19:23] (step=0021841) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 4.273332028957151, LR: 0.0003 +[2026-03-01 09:19:31] (step=0021842) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.273527685384465, LR: 0.0003 +[2026-03-01 09:19:39] (step=0021843) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 4.273723341811778, LR: 0.0003 +[2026-03-01 09:19:46] (step=0021844) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.273918998239092, LR: 0.0003 +[2026-03-01 09:19:54] (step=0021845) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 4.2741146546664055, LR: 0.0003 +[2026-03-01 09:20:02] (step=0021846) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.2743103110937195, LR: 0.0003 +[2026-03-01 09:20:10] (step=0021847) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 4.274505967521033, LR: 0.0003 +[2026-03-01 09:20:18] (step=0021848) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.274701623948347, LR: 0.0003 +[2026-03-01 09:20:26] (step=0021849) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.274897280375661, LR: 0.0003 +[2026-03-01 09:20:34] (step=0021850) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.275092936802974, LR: 0.0003 +[2026-03-01 09:20:41] (step=0021851) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 4.275288593230288, LR: 0.0003 +[2026-03-01 09:20:49] (step=0021852) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.275484249657601, LR: 0.0003 +[2026-03-01 09:20:57] (step=0021853) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.275679906084915, LR: 0.0003 +[2026-03-01 09:21:05] (step=0021854) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.275875562512229, LR: 0.0003 +[2026-03-01 09:21:13] (step=0021855) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.276071218939542, LR: 0.0003 +[2026-03-01 09:21:21] (step=0021856) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.276266875366856, LR: 0.0003 +[2026-03-01 09:21:29] (step=0021857) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.276462531794169, LR: 0.0003 +[2026-03-01 09:21:36] (step=0021858) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.276658188221483, LR: 0.0003 +[2026-03-01 09:21:44] (step=0021859) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.276853844648797, LR: 0.0003 +[2026-03-01 09:21:52] (step=0021860) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 4.27704950107611, LR: 0.0003 +[2026-03-01 09:22:00] (step=0021861) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 4.277245157503424, LR: 0.0003 +[2026-03-01 09:22:08] (step=0021862) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.277440813930737, LR: 0.0003 +[2026-03-01 09:22:16] (step=0021863) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 4.277636470358051, LR: 0.0003 +[2026-03-01 09:22:23] (step=0021864) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 4.2778321267853645, LR: 0.0003 +[2026-03-01 09:22:31] (step=0021865) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.2780277832126785, LR: 0.0003 +[2026-03-01 09:22:39] (step=0021866) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.2782234396399925, LR: 0.0003 +[2026-03-01 09:22:47] (step=0021867) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 4.278419096067306, LR: 0.0003 +[2026-03-01 09:22:55] (step=0021868) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.27861475249462, LR: 0.0003 +[2026-03-01 09:23:03] (step=0021869) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.278810408921933, LR: 0.0003 +[2026-03-01 09:23:10] (step=0021870) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.279006065349247, LR: 0.0003 +[2026-03-01 09:23:18] (step=0021871) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.279201721776561, LR: 0.0003 +[2026-03-01 09:23:26] (step=0021872) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.279397378203874, LR: 0.0003 +[2026-03-01 09:23:34] (step=0021873) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.279593034631188, LR: 0.0003 +[2026-03-01 09:23:42] (step=0021874) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 4.279788691058501, LR: 0.0003 +[2026-03-01 09:23:50] (step=0021875) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.279984347485815, LR: 0.0003 +[2026-03-01 09:23:58] (step=0021876) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.280180003913128, LR: 0.0003 +[2026-03-01 09:24:05] (step=0021877) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.280375660340442, LR: 0.0003 +[2026-03-01 09:24:13] (step=0021878) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.280571316767756, LR: 0.0003 +[2026-03-01 09:24:21] (step=0021879) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.280766973195069, LR: 0.0003 +[2026-03-01 09:24:29] (step=0021880) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.280962629622383, LR: 0.0003 +[2026-03-01 09:24:37] (step=0021881) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.281158286049696, LR: 0.0003 +[2026-03-01 09:24:45] (step=0021882) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.28135394247701, LR: 0.0003 +[2026-03-01 09:24:52] (step=0021883) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.281549598904324, LR: 0.0003 +[2026-03-01 09:25:00] (step=0021884) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.2817452553316375, LR: 0.0003 +[2026-03-01 09:25:08] (step=0021885) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.2819409117589515, LR: 0.0003 +[2026-03-01 09:25:16] (step=0021886) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.282136568186265, LR: 0.0003 +[2026-03-01 09:25:24] (step=0021887) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.282332224613579, LR: 0.0003 +[2026-03-01 09:25:32] (step=0021888) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.282527881040892, LR: 0.0003 +[2026-03-01 09:25:40] (step=0021889) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.282723537468206, LR: 0.0003 +[2026-03-01 09:25:47] (step=0021890) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.28291919389552, LR: 0.0003 +[2026-03-01 09:25:55] (step=0021891) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.283114850322833, LR: 0.0003 +[2026-03-01 09:26:03] (step=0021892) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.283310506750147, LR: 0.0003 +[2026-03-01 09:26:11] (step=0021893) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.28350616317746, LR: 0.0003 +[2026-03-01 09:26:19] (step=0021894) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 4.283701819604774, LR: 0.0003 +[2026-03-01 09:26:27] (step=0021895) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.283897476032088, LR: 0.0003 +[2026-03-01 09:26:34] (step=0021896) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 4.284093132459401, LR: 0.0003 +[2026-03-01 09:26:42] (step=0021897) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 4.284288788886715, LR: 0.0003 +[2026-03-01 09:26:50] (step=0021898) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.284484445314028, LR: 0.0003 +[2026-03-01 09:26:58] (step=0021899) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 4.284680101741342, LR: 0.0003 +[2026-03-01 09:27:06] (step=0021900) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.284875758168655, LR: 0.0003 +[2026-03-01 09:27:14] (step=0021901) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.285071414595969, LR: 0.0003 +[2026-03-01 09:27:22] (step=0021902) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.285267071023283, LR: 0.0003 +[2026-03-01 09:27:29] (step=0021903) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 4.2854627274505965, LR: 0.0003 +[2026-03-01 09:27:37] (step=0021904) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.2856583838779105, LR: 0.0003 +[2026-03-01 09:27:45] (step=0021905) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.285854040305224, LR: 0.0003 +[2026-03-01 09:27:53] (step=0021906) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.286049696732538, LR: 0.0003 +[2026-03-01 09:28:01] (step=0021907) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.286245353159852, LR: 0.0003 +[2026-03-01 09:28:09] (step=0021908) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 4.286441009587165, LR: 0.0003 +[2026-03-01 09:28:17] (step=0021909) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 4.286636666014479, LR: 0.0003 +[2026-03-01 09:28:24] (step=0021910) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.286832322441792, LR: 0.0003 +[2026-03-01 09:28:32] (step=0021911) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.287027978869106, LR: 0.0003 +[2026-03-01 09:28:40] (step=0021912) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.28722363529642, LR: 0.0003 +[2026-03-01 09:28:48] (step=0021913) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.287419291723733, LR: 0.0003 +[2026-03-01 09:28:56] (step=0021914) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 4.287614948151047, LR: 0.0003 +[2026-03-01 09:29:04] (step=0021915) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.28781060457836, LR: 0.0003 +[2026-03-01 09:29:11] (step=0021916) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 4.288006261005674, LR: 0.0003 +[2026-03-01 09:29:19] (step=0021917) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 4.288201917432987, LR: 0.0003 +[2026-03-01 09:29:27] (step=0021918) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.288397573860301, LR: 0.0003 +[2026-03-01 09:29:35] (step=0021919) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 4.288593230287615, LR: 0.0003 +[2026-03-01 09:29:43] (step=0021920) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.288788886714928, LR: 0.0003 +[2026-03-01 09:29:51] (step=0021921) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.288984543142242, LR: 0.0003 +[2026-03-01 09:29:58] (step=0021922) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.2891801995695555, LR: 0.0003 +[2026-03-01 09:30:06] (step=0021923) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 4.2893758559968695, LR: 0.0003 +[2026-03-01 09:30:14] (step=0021924) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.2895715124241836, LR: 0.0003 +[2026-03-01 09:30:22] (step=0021925) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 4.289767168851497, LR: 0.0003 +[2026-03-01 09:30:30] (step=0021926) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.289962825278811, LR: 0.0003 +[2026-03-01 09:30:38] (step=0021927) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.290158481706124, LR: 0.0003 +[2026-03-01 09:30:46] (step=0021928) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.290354138133438, LR: 0.0003 +[2026-03-01 09:30:53] (step=0021929) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.290549794560751, LR: 0.0003 +[2026-03-01 09:31:01] (step=0021930) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.290745450988065, LR: 0.0003 +[2026-03-01 09:31:09] (step=0021931) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 4.290941107415379, LR: 0.0003 +[2026-03-01 09:31:17] (step=0021932) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 4.291136763842692, LR: 0.0003 +[2026-03-01 09:31:25] (step=0021933) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.291332420270006, LR: 0.0003 +[2026-03-01 09:31:33] (step=0021934) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.291528076697319, LR: 0.0003 +[2026-03-01 09:31:40] (step=0021935) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 4.291723733124633, LR: 0.0003 +[2026-03-01 09:31:48] (step=0021936) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 4.291919389551947, LR: 0.0003 +[2026-03-01 09:31:56] (step=0021937) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 4.29211504597926, LR: 0.0003 +[2026-03-01 09:32:04] (step=0021938) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.292310702406574, LR: 0.0003 +[2026-03-01 09:32:12] (step=0021939) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.292506358833887, LR: 0.0003 +[2026-03-01 09:32:20] (step=0021940) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.292702015261201, LR: 0.0003 +[2026-03-01 09:32:28] (step=0021941) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.2928976716885145, LR: 0.0003 +[2026-03-01 09:32:35] (step=0021942) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.2930933281158286, LR: 0.0003 +[2026-03-01 09:32:43] (step=0021943) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 4.293288984543143, LR: 0.0003 +[2026-03-01 09:32:51] (step=0021944) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.293484640970456, LR: 0.0003 +[2026-03-01 09:32:59] (step=0021945) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.29368029739777, LR: 0.0003 +[2026-03-01 09:33:07] (step=0021946) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.293875953825083, LR: 0.0003 +[2026-03-01 09:33:15] (step=0021947) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 4.294071610252397, LR: 0.0003 +[2026-03-01 09:33:22] (step=0021948) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.294267266679711, LR: 0.0003 +[2026-03-01 09:33:30] (step=0021949) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.294462923107024, LR: 0.0003 +[2026-03-01 09:33:38] (step=0021950) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.294658579534338, LR: 0.0003 +[2026-03-01 09:33:46] (step=0021951) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.294854235961651, LR: 0.0003 +[2026-03-01 09:33:54] (step=0021952) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.295049892388965, LR: 0.0003 +[2026-03-01 09:34:02] (step=0021953) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.295245548816278, LR: 0.0003 +[2026-03-01 09:34:10] (step=0021954) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.295441205243592, LR: 0.0003 +[2026-03-01 09:34:18] (step=0021955) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.295636861670906, LR: 0.0003 +[2026-03-01 09:34:25] (step=0021956) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 4.295832518098219, LR: 0.0003 +[2026-03-01 09:34:33] (step=0021957) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.296028174525533, LR: 0.0003 +[2026-03-01 09:34:41] (step=0021958) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.296223830952846, LR: 0.0003 +[2026-03-01 09:34:49] (step=0021959) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.29641948738016, LR: 0.0003 +[2026-03-01 09:34:57] (step=0021960) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 4.2966151438074744, LR: 0.0003 +[2026-03-01 09:35:05] (step=0021961) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 4.296810800234788, LR: 0.0003 +[2026-03-01 09:35:12] (step=0021962) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.297006456662102, LR: 0.0003 +[2026-03-01 09:35:20] (step=0021963) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.297202113089415, LR: 0.0003 +[2026-03-01 09:35:28] (step=0021964) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.297397769516729, LR: 0.0003 +[2026-03-01 09:35:36] (step=0021965) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.297593425944043, LR: 0.0003 +[2026-03-01 09:35:44] (step=0021966) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.297789082371356, LR: 0.0003 +[2026-03-01 09:35:52] (step=0021967) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 4.29798473879867, LR: 0.0003 +[2026-03-01 09:35:59] (step=0021968) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 4.298180395225983, LR: 0.0003 +[2026-03-01 09:36:07] (step=0021969) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.298376051653297, LR: 0.0003 +[2026-03-01 09:36:15] (step=0021970) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.29857170808061, LR: 0.0003 +[2026-03-01 09:36:23] (step=0021971) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.298767364507924, LR: 0.0003 +[2026-03-01 09:36:31] (step=0021972) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 4.298963020935238, LR: 0.0003 +[2026-03-01 09:36:39] (step=0021973) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 4.299158677362551, LR: 0.0003 +[2026-03-01 09:36:47] (step=0021974) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.299354333789865, LR: 0.0003 +[2026-03-01 09:36:54] (step=0021975) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.299549990217178, LR: 0.0003 +[2026-03-01 09:37:02] (step=0021976) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.299745646644492, LR: 0.0003 +[2026-03-01 09:37:10] (step=0021977) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.299941303071806, LR: 0.0003 +[2026-03-01 09:37:18] (step=0021978) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 4.3001369594991194, LR: 0.0003 +[2026-03-01 09:37:26] (step=0021979) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 4.3003326159264335, LR: 0.0003 +[2026-03-01 09:37:34] (step=0021980) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.300528272353747, LR: 0.0003 +[2026-03-01 09:37:41] (step=0021981) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 4.300723928781061, LR: 0.0003 +[2026-03-01 09:37:49] (step=0021982) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.300919585208374, LR: 0.0003 +[2026-03-01 09:37:57] (step=0021983) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.301115241635688, LR: 0.0003 +[2026-03-01 09:38:05] (step=0021984) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.301310898063002, LR: 0.0003 +[2026-03-01 09:38:13] (step=0021985) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.301506554490315, LR: 0.0003 +[2026-03-01 09:38:21] (step=0021986) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.301702210917629, LR: 0.0003 +[2026-03-01 09:38:28] (step=0021987) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.301897867344942, LR: 0.0003 +[2026-03-01 09:38:36] (step=0021988) Train Loss: 0.4478, Train Steps/Sec: 0.12, Epoch: 4.302093523772256, LR: 0.0003 +[2026-03-01 09:38:44] (step=0021989) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.30228918019957, LR: 0.0003 +[2026-03-01 09:38:52] (step=0021990) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.302484836626883, LR: 0.0003 +[2026-03-01 09:39:00] (step=0021991) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.302680493054197, LR: 0.0003 +[2026-03-01 09:39:08] (step=0021992) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 4.30287614948151, LR: 0.0003 +[2026-03-01 09:39:16] (step=0021993) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.303071805908824, LR: 0.0003 +[2026-03-01 09:39:24] (step=0021994) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.303267462336137, LR: 0.0003 +[2026-03-01 09:39:31] (step=0021995) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 4.303463118763451, LR: 0.0003 +[2026-03-01 09:39:39] (step=0021996) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.303658775190765, LR: 0.0003 +[2026-03-01 09:39:47] (step=0021997) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 4.3038544316180785, LR: 0.0003 +[2026-03-01 09:39:55] (step=0021998) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.3040500880453925, LR: 0.0003 +[2026-03-01 09:40:03] (step=0021999) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.304245744472706, LR: 0.0003 +[2026-03-01 09:40:11] (step=0022000) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.30444140090002, LR: 0.0003 +[2026-03-01 09:40:11] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0022000/ +[2026-03-01 09:40:19] (step=0022001) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.304637057327334, LR: 0.0003 +[2026-03-01 09:40:26] (step=0022002) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.304832713754647, LR: 0.0003 +[2026-03-01 09:40:34] (step=0022003) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.305028370181961, LR: 0.0003 +[2026-03-01 09:40:42] (step=0022004) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.305224026609274, LR: 0.0003 +[2026-03-01 09:40:50] (step=0022005) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.305419683036588, LR: 0.0003 +[2026-03-01 09:40:58] (step=0022006) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 4.305615339463901, LR: 0.0003 +[2026-03-01 09:41:06] (step=0022007) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 4.305810995891215, LR: 0.0003 +[2026-03-01 09:41:14] (step=0022008) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.306006652318529, LR: 0.0003 +[2026-03-01 09:41:21] (step=0022009) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.306202308745842, LR: 0.0003 +[2026-03-01 09:41:29] (step=0022010) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.306397965173156, LR: 0.0003 +[2026-03-01 09:41:37] (step=0022011) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.306593621600469, LR: 0.0003 +[2026-03-01 09:41:45] (step=0022012) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.306789278027783, LR: 0.0003 +[2026-03-01 09:41:53] (step=0022013) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 4.306984934455097, LR: 0.0003 +[2026-03-01 09:42:01] (step=0022014) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.30718059088241, LR: 0.0003 +[2026-03-01 09:42:08] (step=0022015) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.307376247309724, LR: 0.0003 +[2026-03-01 09:42:16] (step=0022016) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.3075719037370375, LR: 0.0003 +[2026-03-01 09:42:24] (step=0022017) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.3077675601643515, LR: 0.0003 +[2026-03-01 09:42:32] (step=0022018) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.3079632165916655, LR: 0.0003 +[2026-03-01 09:42:40] (step=0022019) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.308158873018979, LR: 0.0003 +[2026-03-01 09:42:48] (step=0022020) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 4.308354529446293, LR: 0.0003 +[2026-03-01 09:42:56] (step=0022021) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.308550185873606, LR: 0.0003 +[2026-03-01 09:43:03] (step=0022022) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.30874584230092, LR: 0.0003 +[2026-03-01 09:43:11] (step=0022023) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 4.308941498728233, LR: 0.0003 +[2026-03-01 09:43:19] (step=0022024) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 4.309137155155547, LR: 0.0003 +[2026-03-01 09:43:27] (step=0022025) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 4.309332811582861, LR: 0.0003 +[2026-03-01 09:43:35] (step=0022026) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.309528468010174, LR: 0.0003 +[2026-03-01 09:43:43] (step=0022027) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.309724124437488, LR: 0.0003 +[2026-03-01 09:43:50] (step=0022028) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.309919780864801, LR: 0.0003 +[2026-03-01 09:43:58] (step=0022029) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.310115437292115, LR: 0.0003 +[2026-03-01 09:44:06] (step=0022030) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.310311093719429, LR: 0.0003 +[2026-03-01 09:44:14] (step=0022031) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 4.310506750146742, LR: 0.0003 +[2026-03-01 09:44:22] (step=0022032) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.310702406574056, LR: 0.0003 +[2026-03-01 09:44:30] (step=0022033) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 4.310898063001369, LR: 0.0003 +[2026-03-01 09:44:37] (step=0022034) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.311093719428683, LR: 0.0003 +[2026-03-01 09:44:45] (step=0022035) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 4.3112893758559965, LR: 0.0003 +[2026-03-01 09:44:53] (step=0022036) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.3114850322833105, LR: 0.0003 +[2026-03-01 09:45:01] (step=0022037) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.3116806887106245, LR: 0.0003 +[2026-03-01 09:45:09] (step=0022038) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.311876345137938, LR: 0.0003 +[2026-03-01 09:45:17] (step=0022039) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 4.312072001565252, LR: 0.0003 +[2026-03-01 09:45:25] (step=0022040) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.312267657992565, LR: 0.0003 +[2026-03-01 09:45:32] (step=0022041) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 4.312463314419879, LR: 0.0003 +[2026-03-01 09:45:40] (step=0022042) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.312658970847193, LR: 0.0003 +[2026-03-01 09:45:48] (step=0022043) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.312854627274506, LR: 0.0003 +[2026-03-01 09:45:56] (step=0022044) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.31305028370182, LR: 0.0003 +[2026-03-01 09:46:04] (step=0022045) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.313245940129133, LR: 0.0003 +[2026-03-01 09:46:12] (step=0022046) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 4.313441596556447, LR: 0.0003 +[2026-03-01 09:46:20] (step=0022047) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.31363725298376, LR: 0.0003 +[2026-03-01 09:46:27] (step=0022048) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 4.313832909411074, LR: 0.0003 +[2026-03-01 09:46:35] (step=0022049) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.314028565838388, LR: 0.0003 +[2026-03-01 09:46:43] (step=0022050) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 4.314224222265701, LR: 0.0003 +[2026-03-01 09:46:51] (step=0022051) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.314419878693015, LR: 0.0003 +[2026-03-01 09:46:59] (step=0022052) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.314615535120328, LR: 0.0003 +[2026-03-01 09:47:07] (step=0022053) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.314811191547642, LR: 0.0003 +[2026-03-01 09:47:15] (step=0022054) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 4.315006847974956, LR: 0.0003 +[2026-03-01 09:47:22] (step=0022055) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.3152025044022695, LR: 0.0003 +[2026-03-01 09:47:30] (step=0022056) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.3153981608295835, LR: 0.0003 +[2026-03-01 09:47:38] (step=0022057) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.315593817256897, LR: 0.0003 +[2026-03-01 09:47:46] (step=0022058) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.315789473684211, LR: 0.0003 +[2026-03-01 09:47:54] (step=0022059) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.315985130111524, LR: 0.0003 +[2026-03-01 09:48:02] (step=0022060) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 4.316180786538838, LR: 0.0003 +[2026-03-01 09:48:09] (step=0022061) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.316376442966152, LR: 0.0003 +[2026-03-01 09:48:17] (step=0022062) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 4.316572099393465, LR: 0.0003 +[2026-03-01 09:48:25] (step=0022063) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.316767755820779, LR: 0.0003 +[2026-03-01 09:48:33] (step=0022064) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 4.316963412248092, LR: 0.0003 +[2026-03-01 09:48:41] (step=0022065) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 4.317159068675406, LR: 0.0003 +[2026-03-01 09:48:49] (step=0022066) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.31735472510272, LR: 0.0003 +[2026-03-01 09:48:56] (step=0022067) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.317550381530033, LR: 0.0003 +[2026-03-01 09:49:04] (step=0022068) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.317746037957347, LR: 0.0003 +[2026-03-01 09:49:12] (step=0022069) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.31794169438466, LR: 0.0003 +[2026-03-01 09:49:20] (step=0022070) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.318137350811974, LR: 0.0003 +[2026-03-01 09:49:28] (step=0022071) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.318333007239287, LR: 0.0003 +[2026-03-01 09:49:36] (step=0022072) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.318528663666601, LR: 0.0003 +[2026-03-01 09:49:43] (step=0022073) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.318724320093915, LR: 0.0003 +[2026-03-01 09:49:51] (step=0022074) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.3189199765212285, LR: 0.0003 +[2026-03-01 09:49:59] (step=0022075) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.3191156329485425, LR: 0.0003 +[2026-03-01 09:50:07] (step=0022076) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.319311289375856, LR: 0.0003 +[2026-03-01 09:50:15] (step=0022077) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.31950694580317, LR: 0.0003 +[2026-03-01 09:50:23] (step=0022078) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.319702602230484, LR: 0.0003 +[2026-03-01 09:50:30] (step=0022079) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.319898258657797, LR: 0.0003 +[2026-03-01 09:50:38] (step=0022080) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.320093915085111, LR: 0.0003 +[2026-03-01 09:50:46] (step=0022081) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.320289571512424, LR: 0.0003 +[2026-03-01 09:50:54] (step=0022082) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.320485227939738, LR: 0.0003 +[2026-03-01 09:51:02] (step=0022083) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.320680884367052, LR: 0.0003 +[2026-03-01 09:51:10] (step=0022084) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 4.320876540794365, LR: 0.0003 +[2026-03-01 09:51:18] (step=0022085) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.321072197221679, LR: 0.0003 +[2026-03-01 09:51:25] (step=0022086) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.321267853648992, LR: 0.0003 +[2026-03-01 09:51:33] (step=0022087) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.321463510076306, LR: 0.0003 +[2026-03-01 09:51:41] (step=0022088) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 4.321659166503619, LR: 0.0003 +[2026-03-01 09:51:49] (step=0022089) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.321854822930933, LR: 0.0003 +[2026-03-01 09:51:57] (step=0022090) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.322050479358247, LR: 0.0003 +[2026-03-01 09:52:05] (step=0022091) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.32224613578556, LR: 0.0003 +[2026-03-01 09:52:13] (step=0022092) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.322441792212874, LR: 0.0003 +[2026-03-01 09:52:20] (step=0022093) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.3226374486401875, LR: 0.0003 +[2026-03-01 09:52:28] (step=0022094) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.3228331050675015, LR: 0.0003 +[2026-03-01 09:52:36] (step=0022095) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 4.3230287614948155, LR: 0.0003 +[2026-03-01 09:52:44] (step=0022096) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.323224417922129, LR: 0.0003 +[2026-03-01 09:52:52] (step=0022097) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.323420074349443, LR: 0.0003 +[2026-03-01 09:53:00] (step=0022098) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.323615730776756, LR: 0.0003 +[2026-03-01 09:53:07] (step=0022099) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.32381138720407, LR: 0.0003 +[2026-03-01 09:53:15] (step=0022100) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 4.324007043631383, LR: 0.0003 +[2026-03-01 09:53:23] (step=0022101) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.324202700058697, LR: 0.0003 +[2026-03-01 09:53:31] (step=0022102) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 4.324398356486011, LR: 0.0003 +[2026-03-01 09:53:39] (step=0022103) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 4.324594012913324, LR: 0.0003 +[2026-03-01 09:53:47] (step=0022104) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 4.324789669340638, LR: 0.0003 +[2026-03-01 09:53:54] (step=0022105) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 4.324985325767951, LR: 0.0003 +[2026-03-01 09:54:02] (step=0022106) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.325180982195265, LR: 0.0003 +[2026-03-01 09:54:10] (step=0022107) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 4.325376638622579, LR: 0.0003 +[2026-03-01 09:54:18] (step=0022108) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.325572295049892, LR: 0.0003 +[2026-03-01 09:54:26] (step=0022109) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.325767951477206, LR: 0.0003 +[2026-03-01 09:54:34] (step=0022110) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.325963607904519, LR: 0.0003 +[2026-03-01 09:54:41] (step=0022111) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.326159264331833, LR: 0.0003 +[2026-03-01 09:54:49] (step=0022112) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.3263549207591465, LR: 0.0003 +[2026-03-01 09:54:57] (step=0022113) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 4.3265505771864605, LR: 0.0003 +[2026-03-01 09:55:05] (step=0022114) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.3267462336137745, LR: 0.0003 +[2026-03-01 09:55:13] (step=0022115) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.326941890041088, LR: 0.0003 +[2026-03-01 09:55:21] (step=0022116) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 4.327137546468402, LR: 0.0003 +[2026-03-01 09:55:28] (step=0022117) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.327333202895715, LR: 0.0003 +[2026-03-01 09:55:36] (step=0022118) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.327528859323029, LR: 0.0003 +[2026-03-01 09:55:44] (step=0022119) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.327724515750343, LR: 0.0003 +[2026-03-01 09:55:52] (step=0022120) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.327920172177656, LR: 0.0003 +[2026-03-01 09:56:00] (step=0022121) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.32811582860497, LR: 0.0003 +[2026-03-01 09:56:08] (step=0022122) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 4.328311485032283, LR: 0.0003 +[2026-03-01 09:56:15] (step=0022123) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.328507141459597, LR: 0.0003 +[2026-03-01 09:56:23] (step=0022124) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.32870279788691, LR: 0.0003 +[2026-03-01 09:56:31] (step=0022125) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.328898454314224, LR: 0.0003 +[2026-03-01 09:56:39] (step=0022126) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.329094110741538, LR: 0.0003 +[2026-03-01 09:56:47] (step=0022127) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 4.329289767168851, LR: 0.0003 +[2026-03-01 09:56:55] (step=0022128) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.329485423596165, LR: 0.0003 +[2026-03-01 09:57:02] (step=0022129) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.329681080023478, LR: 0.0003 +[2026-03-01 09:57:10] (step=0022130) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 4.329876736450792, LR: 0.0003 +[2026-03-01 09:57:18] (step=0022131) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.330072392878106, LR: 0.0003 +[2026-03-01 09:57:26] (step=0022132) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.3302680493054195, LR: 0.0003 +[2026-03-01 09:57:34] (step=0022133) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.3304637057327335, LR: 0.0003 +[2026-03-01 09:57:42] (step=0022134) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.330659362160047, LR: 0.0003 +[2026-03-01 09:57:49] (step=0022135) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.330855018587361, LR: 0.0003 +[2026-03-01 09:57:57] (step=0022136) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 4.331050675014675, LR: 0.0003 +[2026-03-01 09:58:05] (step=0022137) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.331246331441988, LR: 0.0003 +[2026-03-01 09:58:13] (step=0022138) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.331441987869302, LR: 0.0003 +[2026-03-01 09:58:21] (step=0022139) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.331637644296615, LR: 0.0003 +[2026-03-01 09:58:29] (step=0022140) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.331833300723929, LR: 0.0003 +[2026-03-01 09:58:36] (step=0022141) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.332028957151242, LR: 0.0003 +[2026-03-01 09:58:44] (step=0022142) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 4.332224613578556, LR: 0.0003 +[2026-03-01 09:58:52] (step=0022143) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 4.33242027000587, LR: 0.0003 +[2026-03-01 09:59:00] (step=0022144) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.332615926433183, LR: 0.0003 +[2026-03-01 09:59:08] (step=0022145) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 4.332811582860497, LR: 0.0003 +[2026-03-01 09:59:16] (step=0022146) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 4.33300723928781, LR: 0.0003 +[2026-03-01 09:59:24] (step=0022147) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.333202895715124, LR: 0.0003 +[2026-03-01 09:59:31] (step=0022148) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 4.333398552142438, LR: 0.0003 +[2026-03-01 09:59:39] (step=0022149) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.333594208569751, LR: 0.0003 +[2026-03-01 09:59:47] (step=0022150) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 4.333789864997065, LR: 0.0003 +[2026-03-01 09:59:55] (step=0022151) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.3339855214243785, LR: 0.0003 +[2026-03-01 10:00:03] (step=0022152) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 4.3341811778516925, LR: 0.0003 +[2026-03-01 10:00:11] (step=0022153) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.334376834279006, LR: 0.0003 +[2026-03-01 10:00:19] (step=0022154) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.33457249070632, LR: 0.0003 +[2026-03-01 10:00:26] (step=0022155) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.334768147133634, LR: 0.0003 +[2026-03-01 10:00:34] (step=0022156) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.334963803560947, LR: 0.0003 +[2026-03-01 10:00:42] (step=0022157) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.335159459988261, LR: 0.0003 +[2026-03-01 10:00:50] (step=0022158) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 4.335355116415574, LR: 0.0003 +[2026-03-01 10:00:58] (step=0022159) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 4.335550772842888, LR: 0.0003 +[2026-03-01 10:01:06] (step=0022160) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 4.335746429270202, LR: 0.0003 +[2026-03-01 10:01:13] (step=0022161) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.335942085697515, LR: 0.0003 +[2026-03-01 10:01:21] (step=0022162) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 4.336137742124829, LR: 0.0003 +[2026-03-01 10:01:29] (step=0022163) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.336333398552142, LR: 0.0003 +[2026-03-01 10:01:37] (step=0022164) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.336529054979456, LR: 0.0003 +[2026-03-01 10:01:45] (step=0022165) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.336724711406769, LR: 0.0003 +[2026-03-01 10:01:53] (step=0022166) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.336920367834083, LR: 0.0003 +[2026-03-01 10:02:00] (step=0022167) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.337116024261397, LR: 0.0003 +[2026-03-01 10:02:08] (step=0022168) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.33731168068871, LR: 0.0003 +[2026-03-01 10:02:16] (step=0022169) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 4.337507337116024, LR: 0.0003 +[2026-03-01 10:02:24] (step=0022170) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.3377029935433375, LR: 0.0003 +[2026-03-01 10:02:32] (step=0022171) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.3378986499706516, LR: 0.0003 +[2026-03-01 10:02:40] (step=0022172) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.338094306397966, LR: 0.0003 +[2026-03-01 10:02:47] (step=0022173) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.338289962825279, LR: 0.0003 +[2026-03-01 10:02:55] (step=0022174) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.338485619252593, LR: 0.0003 +[2026-03-01 10:03:03] (step=0022175) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 4.338681275679906, LR: 0.0003 +[2026-03-01 10:03:11] (step=0022176) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.33887693210722, LR: 0.0003 +[2026-03-01 10:03:19] (step=0022177) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.339072588534533, LR: 0.0003 +[2026-03-01 10:03:27] (step=0022178) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.339268244961847, LR: 0.0003 +[2026-03-01 10:03:34] (step=0022179) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 4.339463901389161, LR: 0.0003 +[2026-03-01 10:03:42] (step=0022180) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.339659557816474, LR: 0.0003 +[2026-03-01 10:03:50] (step=0022181) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.339855214243788, LR: 0.0003 +[2026-03-01 10:03:58] (step=0022182) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.340050870671101, LR: 0.0003 +[2026-03-01 10:04:06] (step=0022183) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.340246527098415, LR: 0.0003 +[2026-03-01 10:04:14] (step=0022184) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.340442183525729, LR: 0.0003 +[2026-03-01 10:04:21] (step=0022185) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.340637839953042, LR: 0.0003 +[2026-03-01 10:04:29] (step=0022186) Train Loss: 0.4269, Train Steps/Sec: 0.13, Epoch: 4.340833496380356, LR: 0.0003 +[2026-03-01 10:04:37] (step=0022187) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.341029152807669, LR: 0.0003 +[2026-03-01 10:04:45] (step=0022188) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.341224809234983, LR: 0.0003 +[2026-03-01 10:04:53] (step=0022189) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.3414204656622974, LR: 0.0003 +[2026-03-01 10:05:01] (step=0022190) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.341616122089611, LR: 0.0003 +[2026-03-01 10:05:08] (step=0022191) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 4.341811778516925, LR: 0.0003 +[2026-03-01 10:05:16] (step=0022192) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.342007434944238, LR: 0.0003 +[2026-03-01 10:05:24] (step=0022193) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 4.342203091371552, LR: 0.0003 +[2026-03-01 10:05:32] (step=0022194) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.342398747798865, LR: 0.0003 +[2026-03-01 10:05:40] (step=0022195) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 4.342594404226179, LR: 0.0003 +[2026-03-01 10:05:48] (step=0022196) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.342790060653493, LR: 0.0003 +[2026-03-01 10:05:55] (step=0022197) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.342985717080806, LR: 0.0003 +[2026-03-01 10:06:03] (step=0022198) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.34318137350812, LR: 0.0003 +[2026-03-01 10:06:11] (step=0022199) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 4.343377029935433, LR: 0.0003 +[2026-03-01 10:06:19] (step=0022200) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 4.343572686362747, LR: 0.0003 +[2026-03-01 10:06:27] (step=0022201) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.343768342790061, LR: 0.0003 +[2026-03-01 10:06:35] (step=0022202) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 4.343963999217374, LR: 0.0003 +[2026-03-01 10:06:43] (step=0022203) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.344159655644688, LR: 0.0003 +[2026-03-01 10:06:50] (step=0022204) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.344355312072001, LR: 0.0003 +[2026-03-01 10:06:58] (step=0022205) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.344550968499315, LR: 0.0003 +[2026-03-01 10:07:06] (step=0022206) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 4.344746624926628, LR: 0.0003 +[2026-03-01 10:07:14] (step=0022207) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.3449422813539424, LR: 0.0003 +[2026-03-01 10:07:22] (step=0022208) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 4.3451379377812565, LR: 0.0003 +[2026-03-01 10:07:30] (step=0022209) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.34533359420857, LR: 0.0003 +[2026-03-01 10:07:38] (step=0022210) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.345529250635884, LR: 0.0003 +[2026-03-01 10:07:45] (step=0022211) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.345724907063197, LR: 0.0003 +[2026-03-01 10:07:53] (step=0022212) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.345920563490511, LR: 0.0003 +[2026-03-01 10:08:01] (step=0022213) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.346116219917825, LR: 0.0003 +[2026-03-01 10:08:09] (step=0022214) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.346311876345138, LR: 0.0003 +[2026-03-01 10:08:17] (step=0022215) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.346507532772452, LR: 0.0003 +[2026-03-01 10:08:25] (step=0022216) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 4.346703189199765, LR: 0.0003 +[2026-03-01 10:08:32] (step=0022217) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 4.346898845627079, LR: 0.0003 +[2026-03-01 10:08:40] (step=0022218) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.347094502054392, LR: 0.0003 +[2026-03-01 10:08:48] (step=0022219) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.347290158481706, LR: 0.0003 +[2026-03-01 10:08:56] (step=0022220) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 4.34748581490902, LR: 0.0003 +[2026-03-01 10:09:04] (step=0022221) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 4.347681471336333, LR: 0.0003 +[2026-03-01 10:09:12] (step=0022222) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.347877127763647, LR: 0.0003 +[2026-03-01 10:09:19] (step=0022223) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.34807278419096, LR: 0.0003 +[2026-03-01 10:09:27] (step=0022224) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.348268440618274, LR: 0.0003 +[2026-03-01 10:09:35] (step=0022225) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.348464097045588, LR: 0.0003 +[2026-03-01 10:09:43] (step=0022226) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.3486597534729015, LR: 0.0003 +[2026-03-01 10:09:51] (step=0022227) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.3488554099002155, LR: 0.0003 +[2026-03-01 10:09:59] (step=0022228) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.349051066327529, LR: 0.0003 +[2026-03-01 10:10:06] (step=0022229) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.349246722754843, LR: 0.0003 +[2026-03-01 10:10:14] (step=0022230) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.349442379182156, LR: 0.0003 +[2026-03-01 10:10:22] (step=0022231) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.34963803560947, LR: 0.0003 +[2026-03-01 10:10:30] (step=0022232) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.349833692036784, LR: 0.0003 +[2026-03-01 10:10:38] (step=0022233) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.350029348464097, LR: 0.0003 +[2026-03-01 10:10:46] (step=0022234) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 4.350225004891411, LR: 0.0003 +[2026-03-01 10:10:53] (step=0022235) Train Loss: 0.4704, Train Steps/Sec: 0.13, Epoch: 4.350420661318724, LR: 0.0003 +[2026-03-01 10:11:01] (step=0022236) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.350616317746038, LR: 0.0003 +[2026-03-01 10:11:09] (step=0022237) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.350811974173352, LR: 0.0003 +[2026-03-01 10:11:17] (step=0022238) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 4.351007630600665, LR: 0.0003 +[2026-03-01 10:11:25] (step=0022239) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.351203287027979, LR: 0.0003 +[2026-03-01 10:11:33] (step=0022240) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.351398943455292, LR: 0.0003 +[2026-03-01 10:11:41] (step=0022241) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.351594599882606, LR: 0.0003 +[2026-03-01 10:11:48] (step=0022242) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.35179025630992, LR: 0.0003 +[2026-03-01 10:11:56] (step=0022243) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.351985912737233, LR: 0.0003 +[2026-03-01 10:12:04] (step=0022244) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.352181569164547, LR: 0.0003 +[2026-03-01 10:12:12] (step=0022245) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 4.3523772255918605, LR: 0.0003 +[2026-03-01 10:12:20] (step=0022246) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.3525728820191745, LR: 0.0003 +[2026-03-01 10:12:28] (step=0022247) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.352768538446488, LR: 0.0003 +[2026-03-01 10:12:35] (step=0022248) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.352964194873802, LR: 0.0003 +[2026-03-01 10:12:43] (step=0022249) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.353159851301116, LR: 0.0003 +[2026-03-01 10:12:51] (step=0022250) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.353355507728429, LR: 0.0003 +[2026-03-01 10:12:59] (step=0022251) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.353551164155743, LR: 0.0003 +[2026-03-01 10:13:07] (step=0022252) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.353746820583056, LR: 0.0003 +[2026-03-01 10:13:15] (step=0022253) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.35394247701037, LR: 0.0003 +[2026-03-01 10:13:22] (step=0022254) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.354138133437684, LR: 0.0003 +[2026-03-01 10:13:30] (step=0022255) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.354333789864997, LR: 0.0003 +[2026-03-01 10:13:38] (step=0022256) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.354529446292311, LR: 0.0003 +[2026-03-01 10:13:46] (step=0022257) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.354725102719624, LR: 0.0003 +[2026-03-01 10:13:54] (step=0022258) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 4.354920759146938, LR: 0.0003 +[2026-03-01 10:14:02] (step=0022259) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.355116415574251, LR: 0.0003 +[2026-03-01 10:14:09] (step=0022260) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.355312072001565, LR: 0.0003 +[2026-03-01 10:14:17] (step=0022261) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.355507728428879, LR: 0.0003 +[2026-03-01 10:14:25] (step=0022262) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.355703384856192, LR: 0.0003 +[2026-03-01 10:14:33] (step=0022263) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.355899041283506, LR: 0.0003 +[2026-03-01 10:14:41] (step=0022264) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.3560946977108195, LR: 0.0003 +[2026-03-01 10:14:49] (step=0022265) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.3562903541381335, LR: 0.0003 +[2026-03-01 10:14:56] (step=0022266) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.3564860105654475, LR: 0.0003 +[2026-03-01 10:15:04] (step=0022267) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.356681666992761, LR: 0.0003 +[2026-03-01 10:15:12] (step=0022268) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.356877323420075, LR: 0.0003 +[2026-03-01 10:15:20] (step=0022269) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.357072979847388, LR: 0.0003 +[2026-03-01 10:15:28] (step=0022270) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 4.357268636274702, LR: 0.0003 +[2026-03-01 10:15:36] (step=0022271) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 4.357464292702015, LR: 0.0003 +[2026-03-01 10:15:43] (step=0022272) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.357659949129329, LR: 0.0003 +[2026-03-01 10:15:51] (step=0022273) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.357855605556643, LR: 0.0003 +[2026-03-01 10:15:59] (step=0022274) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.358051261983956, LR: 0.0003 +[2026-03-01 10:16:07] (step=0022275) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.35824691841127, LR: 0.0003 +[2026-03-01 10:16:15] (step=0022276) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.358442574838583, LR: 0.0003 +[2026-03-01 10:16:23] (step=0022277) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.358638231265897, LR: 0.0003 +[2026-03-01 10:16:30] (step=0022278) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.358833887693211, LR: 0.0003 +[2026-03-01 10:16:38] (step=0022279) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 4.359029544120524, LR: 0.0003 +[2026-03-01 10:16:46] (step=0022280) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.359225200547838, LR: 0.0003 +[2026-03-01 10:16:54] (step=0022281) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.359420856975151, LR: 0.0003 +[2026-03-01 10:17:02] (step=0022282) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.359616513402465, LR: 0.0003 +[2026-03-01 10:17:10] (step=0022283) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.3598121698297785, LR: 0.0003 +[2026-03-01 10:17:17] (step=0022284) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.3600078262570925, LR: 0.0003 +[2026-03-01 10:17:25] (step=0022285) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 4.3602034826844065, LR: 0.0003 +[2026-03-01 10:17:33] (step=0022286) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 4.36039913911172, LR: 0.0003 +[2026-03-01 10:17:41] (step=0022287) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.360594795539034, LR: 0.0003 +[2026-03-01 10:17:49] (step=0022288) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.360790451966347, LR: 0.0003 +[2026-03-01 10:17:57] (step=0022289) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 4.360986108393661, LR: 0.0003 +[2026-03-01 10:18:05] (step=0022290) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.361181764820975, LR: 0.0003 +[2026-03-01 10:18:12] (step=0022291) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.361377421248288, LR: 0.0003 +[2026-03-01 10:18:20] (step=0022292) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 4.361573077675602, LR: 0.0003 +[2026-03-01 10:18:28] (step=0022293) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.361768734102915, LR: 0.0003 +[2026-03-01 10:18:36] (step=0022294) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 4.361964390530229, LR: 0.0003 +[2026-03-01 10:18:44] (step=0022295) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.362160046957542, LR: 0.0003 +[2026-03-01 10:18:52] (step=0022296) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 4.362355703384856, LR: 0.0003 +[2026-03-01 10:18:59] (step=0022297) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 4.36255135981217, LR: 0.0003 +[2026-03-01 10:19:07] (step=0022298) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.362747016239483, LR: 0.0003 +[2026-03-01 10:19:15] (step=0022299) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 4.362942672666797, LR: 0.0003 +[2026-03-01 10:19:23] (step=0022300) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.36313832909411, LR: 0.0003 +[2026-03-01 10:19:31] (step=0022301) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.363333985521424, LR: 0.0003 +[2026-03-01 10:19:39] (step=0022302) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.363529641948738, LR: 0.0003 +[2026-03-01 10:19:47] (step=0022303) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.3637252983760515, LR: 0.0003 +[2026-03-01 10:19:54] (step=0022304) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 4.3639209548033655, LR: 0.0003 +[2026-03-01 10:20:02] (step=0022305) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.364116611230679, LR: 0.0003 +[2026-03-01 10:20:10] (step=0022306) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 4.364312267657993, LR: 0.0003 +[2026-03-01 10:20:18] (step=0022307) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 4.364507924085307, LR: 0.0003 +[2026-03-01 10:20:26] (step=0022308) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.36470358051262, LR: 0.0003 +[2026-03-01 10:20:34] (step=0022309) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.364899236939934, LR: 0.0003 +[2026-03-01 10:20:41] (step=0022310) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.365094893367247, LR: 0.0003 +[2026-03-01 10:20:49] (step=0022311) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.365290549794561, LR: 0.0003 +[2026-03-01 10:20:57] (step=0022312) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.365486206221874, LR: 0.0003 +[2026-03-01 10:21:05] (step=0022313) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.365681862649188, LR: 0.0003 +[2026-03-01 10:21:13] (step=0022314) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.365877519076502, LR: 0.0003 +[2026-03-01 10:21:21] (step=0022315) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.366073175503815, LR: 0.0003 +[2026-03-01 10:21:28] (step=0022316) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 4.366268831931129, LR: 0.0003 +[2026-03-01 10:21:36] (step=0022317) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 4.366464488358442, LR: 0.0003 +[2026-03-01 10:21:44] (step=0022318) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.366660144785756, LR: 0.0003 +[2026-03-01 10:21:52] (step=0022319) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 4.36685580121307, LR: 0.0003 +[2026-03-01 10:22:00] (step=0022320) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.367051457640383, LR: 0.0003 +[2026-03-01 10:22:08] (step=0022321) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.367247114067697, LR: 0.0003 +[2026-03-01 10:22:15] (step=0022322) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 4.3674427704950105, LR: 0.0003 +[2026-03-01 10:22:23] (step=0022323) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.3676384269223245, LR: 0.0003 +[2026-03-01 10:22:31] (step=0022324) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.367834083349638, LR: 0.0003 +[2026-03-01 10:22:39] (step=0022325) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.368029739776952, LR: 0.0003 +[2026-03-01 10:22:47] (step=0022326) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.368225396204266, LR: 0.0003 +[2026-03-01 10:22:55] (step=0022327) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 4.368421052631579, LR: 0.0003 +[2026-03-01 10:23:02] (step=0022328) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.368616709058893, LR: 0.0003 +[2026-03-01 10:23:10] (step=0022329) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.368812365486206, LR: 0.0003 +[2026-03-01 10:23:18] (step=0022330) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.36900802191352, LR: 0.0003 +[2026-03-01 10:23:26] (step=0022331) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 4.369203678340834, LR: 0.0003 +[2026-03-01 10:23:34] (step=0022332) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.369399334768147, LR: 0.0003 +[2026-03-01 10:23:42] (step=0022333) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.369594991195461, LR: 0.0003 +[2026-03-01 10:23:50] (step=0022334) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 4.369790647622774, LR: 0.0003 +[2026-03-01 10:23:57] (step=0022335) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.369986304050088, LR: 0.0003 +[2026-03-01 10:24:05] (step=0022336) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.370181960477401, LR: 0.0003 +[2026-03-01 10:24:13] (step=0022337) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.370377616904715, LR: 0.0003 +[2026-03-01 10:24:21] (step=0022338) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.370573273332029, LR: 0.0003 +[2026-03-01 10:24:29] (step=0022339) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.370768929759342, LR: 0.0003 +[2026-03-01 10:24:37] (step=0022340) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.370964586186656, LR: 0.0003 +[2026-03-01 10:24:44] (step=0022341) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.3711602426139695, LR: 0.0003 +[2026-03-01 10:24:52] (step=0022342) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 4.3713558990412835, LR: 0.0003 +[2026-03-01 10:25:00] (step=0022343) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.3715515554685975, LR: 0.0003 +[2026-03-01 10:25:08] (step=0022344) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.371747211895911, LR: 0.0003 +[2026-03-01 10:25:16] (step=0022345) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.371942868323225, LR: 0.0003 +[2026-03-01 10:25:24] (step=0022346) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.372138524750538, LR: 0.0003 +[2026-03-01 10:25:31] (step=0022347) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 4.372334181177852, LR: 0.0003 +[2026-03-01 10:25:39] (step=0022348) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 4.372529837605165, LR: 0.0003 +[2026-03-01 10:25:47] (step=0022349) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.372725494032479, LR: 0.0003 +[2026-03-01 10:25:55] (step=0022350) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.372921150459793, LR: 0.0003 +[2026-03-01 10:26:03] (step=0022351) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.373116806887106, LR: 0.0003 +[2026-03-01 10:26:11] (step=0022352) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 4.37331246331442, LR: 0.0003 +[2026-03-01 10:26:19] (step=0022353) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.373508119741733, LR: 0.0003 +[2026-03-01 10:26:26] (step=0022354) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.373703776169047, LR: 0.0003 +[2026-03-01 10:26:34] (step=0022355) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.373899432596361, LR: 0.0003 +[2026-03-01 10:26:42] (step=0022356) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.374095089023674, LR: 0.0003 +[2026-03-01 10:26:50] (step=0022357) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 4.374290745450988, LR: 0.0003 +[2026-03-01 10:26:58] (step=0022358) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.374486401878301, LR: 0.0003 +[2026-03-01 10:27:06] (step=0022359) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.374682058305615, LR: 0.0003 +[2026-03-01 10:27:13] (step=0022360) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.374877714732929, LR: 0.0003 +[2026-03-01 10:27:21] (step=0022361) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 4.3750733711602425, LR: 0.0003 +[2026-03-01 10:27:29] (step=0022362) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 4.3752690275875565, LR: 0.0003 +[2026-03-01 10:27:37] (step=0022363) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.37546468401487, LR: 0.0003 +[2026-03-01 10:27:45] (step=0022364) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.375660340442184, LR: 0.0003 +[2026-03-01 10:27:53] (step=0022365) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.375855996869497, LR: 0.0003 +[2026-03-01 10:28:00] (step=0022366) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 4.376051653296811, LR: 0.0003 +[2026-03-01 10:28:08] (step=0022367) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.376247309724125, LR: 0.0003 +[2026-03-01 10:28:16] (step=0022368) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.376442966151438, LR: 0.0003 +[2026-03-01 10:28:24] (step=0022369) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 4.376638622578752, LR: 0.0003 +[2026-03-01 10:28:32] (step=0022370) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 4.376834279006065, LR: 0.0003 +[2026-03-01 10:28:40] (step=0022371) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.377029935433379, LR: 0.0003 +[2026-03-01 10:28:47] (step=0022372) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 4.377225591860693, LR: 0.0003 +[2026-03-01 10:28:55] (step=0022373) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.377421248288006, LR: 0.0003 +[2026-03-01 10:29:03] (step=0022374) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.37761690471532, LR: 0.0003 +[2026-03-01 10:29:11] (step=0022375) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.377812561142633, LR: 0.0003 +[2026-03-01 10:29:19] (step=0022376) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.378008217569947, LR: 0.0003 +[2026-03-01 10:29:27] (step=0022377) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.37820387399726, LR: 0.0003 +[2026-03-01 10:29:34] (step=0022378) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 4.378399530424574, LR: 0.0003 +[2026-03-01 10:29:42] (step=0022379) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.378595186851888, LR: 0.0003 +[2026-03-01 10:29:50] (step=0022380) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 4.3787908432792015, LR: 0.0003 +[2026-03-01 10:29:58] (step=0022381) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.3789864997065155, LR: 0.0003 +[2026-03-01 10:30:06] (step=0022382) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 4.379182156133829, LR: 0.0003 +[2026-03-01 10:30:14] (step=0022383) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.379377812561143, LR: 0.0003 +[2026-03-01 10:30:22] (step=0022384) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 4.379573468988457, LR: 0.0003 +[2026-03-01 10:30:29] (step=0022385) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.37976912541577, LR: 0.0003 +[2026-03-01 10:30:37] (step=0022386) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 4.379964781843084, LR: 0.0003 +[2026-03-01 10:30:45] (step=0022387) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.380160438270397, LR: 0.0003 +[2026-03-01 10:30:53] (step=0022388) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.380356094697711, LR: 0.0003 +[2026-03-01 10:31:01] (step=0022389) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.380551751125024, LR: 0.0003 +[2026-03-01 10:31:09] (step=0022390) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.380747407552338, LR: 0.0003 +[2026-03-01 10:31:16] (step=0022391) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.380943063979652, LR: 0.0003 +[2026-03-01 10:31:24] (step=0022392) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.381138720406965, LR: 0.0003 +[2026-03-01 10:31:32] (step=0022393) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 4.381334376834279, LR: 0.0003 +[2026-03-01 10:31:40] (step=0022394) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.381530033261592, LR: 0.0003 +[2026-03-01 10:31:48] (step=0022395) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.381725689688906, LR: 0.0003 +[2026-03-01 10:31:56] (step=0022396) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.38192134611622, LR: 0.0003 +[2026-03-01 10:32:03] (step=0022397) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.382117002543533, LR: 0.0003 +[2026-03-01 10:32:11] (step=0022398) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.382312658970847, LR: 0.0003 +[2026-03-01 10:32:19] (step=0022399) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.3825083153981605, LR: 0.0003 +[2026-03-01 10:32:27] (step=0022400) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.3827039718254746, LR: 0.0003 +[2026-03-01 10:32:35] (step=0022401) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.382899628252788, LR: 0.0003 +[2026-03-01 10:32:43] (step=0022402) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 4.383095284680102, LR: 0.0003 +[2026-03-01 10:32:51] (step=0022403) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.383290941107416, LR: 0.0003 +[2026-03-01 10:32:58] (step=0022404) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.383486597534729, LR: 0.0003 +[2026-03-01 10:33:06] (step=0022405) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.383682253962043, LR: 0.0003 +[2026-03-01 10:33:14] (step=0022406) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.383877910389356, LR: 0.0003 +[2026-03-01 10:33:22] (step=0022407) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.38407356681667, LR: 0.0003 +[2026-03-01 10:33:30] (step=0022408) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.384269223243984, LR: 0.0003 +[2026-03-01 10:33:38] (step=0022409) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 4.384464879671297, LR: 0.0003 +[2026-03-01 10:33:45] (step=0022410) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.384660536098611, LR: 0.0003 +[2026-03-01 10:33:53] (step=0022411) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.384856192525924, LR: 0.0003 +[2026-03-01 10:34:01] (step=0022412) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.385051848953238, LR: 0.0003 +[2026-03-01 10:34:09] (step=0022413) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.385247505380552, LR: 0.0003 +[2026-03-01 10:34:17] (step=0022414) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.385443161807865, LR: 0.0003 +[2026-03-01 10:34:24] (step=0022415) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 4.385638818235179, LR: 0.0003 +[2026-03-01 10:34:32] (step=0022416) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.385834474662492, LR: 0.0003 +[2026-03-01 10:34:40] (step=0022417) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.386030131089806, LR: 0.0003 +[2026-03-01 10:34:48] (step=0022418) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.38622578751712, LR: 0.0003 +[2026-03-01 10:34:56] (step=0022419) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.386421443944434, LR: 0.0003 +[2026-03-01 10:35:04] (step=0022420) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.386617100371748, LR: 0.0003 +[2026-03-01 10:35:11] (step=0022421) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.386812756799061, LR: 0.0003 +[2026-03-01 10:35:19] (step=0022422) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.387008413226375, LR: 0.0003 +[2026-03-01 10:35:27] (step=0022423) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.387204069653688, LR: 0.0003 +[2026-03-01 10:35:35] (step=0022424) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.387399726081002, LR: 0.0003 +[2026-03-01 10:35:43] (step=0022425) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.387595382508316, LR: 0.0003 +[2026-03-01 10:35:51] (step=0022426) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.387791038935629, LR: 0.0003 +[2026-03-01 10:35:58] (step=0022427) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.387986695362943, LR: 0.0003 +[2026-03-01 10:36:06] (step=0022428) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 4.388182351790256, LR: 0.0003 +[2026-03-01 10:36:14] (step=0022429) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 4.38837800821757, LR: 0.0003 +[2026-03-01 10:36:22] (step=0022430) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.388573664644883, LR: 0.0003 +[2026-03-01 10:36:30] (step=0022431) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.388769321072197, LR: 0.0003 +[2026-03-01 10:36:38] (step=0022432) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.388964977499511, LR: 0.0003 +[2026-03-01 10:36:45] (step=0022433) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 4.389160633926824, LR: 0.0003 +[2026-03-01 10:36:53] (step=0022434) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.389356290354138, LR: 0.0003 +[2026-03-01 10:37:01] (step=0022435) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 4.389551946781451, LR: 0.0003 +[2026-03-01 10:37:09] (step=0022436) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.3897476032087654, LR: 0.0003 +[2026-03-01 10:37:17] (step=0022437) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.3899432596360795, LR: 0.0003 +[2026-03-01 10:37:25] (step=0022438) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 4.390138916063393, LR: 0.0003 +[2026-03-01 10:37:33] (step=0022439) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 4.390334572490707, LR: 0.0003 +[2026-03-01 10:37:40] (step=0022440) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.39053022891802, LR: 0.0003 +[2026-03-01 10:37:48] (step=0022441) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.390725885345334, LR: 0.0003 +[2026-03-01 10:37:56] (step=0022442) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.390921541772647, LR: 0.0003 +[2026-03-01 10:38:04] (step=0022443) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 4.391117198199961, LR: 0.0003 +[2026-03-01 10:38:12] (step=0022444) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.391312854627275, LR: 0.0003 +[2026-03-01 10:38:20] (step=0022445) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.391508511054588, LR: 0.0003 +[2026-03-01 10:38:27] (step=0022446) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.391704167481902, LR: 0.0003 +[2026-03-01 10:38:35] (step=0022447) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.391899823909215, LR: 0.0003 +[2026-03-01 10:38:43] (step=0022448) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 4.392095480336529, LR: 0.0003 +[2026-03-01 10:38:51] (step=0022449) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.392291136763843, LR: 0.0003 +[2026-03-01 10:38:59] (step=0022450) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.392486793191156, LR: 0.0003 +[2026-03-01 10:39:07] (step=0022451) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 4.39268244961847, LR: 0.0003 +[2026-03-01 10:39:14] (step=0022452) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 4.392878106045783, LR: 0.0003 +[2026-03-01 10:39:22] (step=0022453) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.393073762473097, LR: 0.0003 +[2026-03-01 10:39:30] (step=0022454) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.3932694189004105, LR: 0.0003 +[2026-03-01 10:39:38] (step=0022455) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.3934650753277245, LR: 0.0003 +[2026-03-01 10:39:46] (step=0022456) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.3936607317550385, LR: 0.0003 +[2026-03-01 10:39:54] (step=0022457) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.393856388182352, LR: 0.0003 +[2026-03-01 10:40:02] (step=0022458) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 4.394052044609666, LR: 0.0003 +[2026-03-01 10:40:10] (step=0022459) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.394247701036979, LR: 0.0003 +[2026-03-01 10:40:17] (step=0022460) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.394443357464293, LR: 0.0003 +[2026-03-01 10:40:25] (step=0022461) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 4.394639013891607, LR: 0.0003 +[2026-03-01 10:40:33] (step=0022462) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.39483467031892, LR: 0.0003 +[2026-03-01 10:40:41] (step=0022463) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.395030326746234, LR: 0.0003 +[2026-03-01 10:40:49] (step=0022464) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.395225983173547, LR: 0.0003 +[2026-03-01 10:40:57] (step=0022465) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.395421639600861, LR: 0.0003 +[2026-03-01 10:41:04] (step=0022466) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.395617296028175, LR: 0.0003 +[2026-03-01 10:41:12] (step=0022467) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.395812952455488, LR: 0.0003 +[2026-03-01 10:41:20] (step=0022468) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.396008608882802, LR: 0.0003 +[2026-03-01 10:41:28] (step=0022469) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.396204265310115, LR: 0.0003 +[2026-03-01 10:41:36] (step=0022470) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.396399921737429, LR: 0.0003 +[2026-03-01 10:41:44] (step=0022471) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 4.396595578164742, LR: 0.0003 +[2026-03-01 10:41:51] (step=0022472) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.396791234592056, LR: 0.0003 +[2026-03-01 10:41:59] (step=0022473) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 4.39698689101937, LR: 0.0003 +[2026-03-01 10:42:07] (step=0022474) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.3971825474466835, LR: 0.0003 +[2026-03-01 10:42:15] (step=0022475) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.3973782038739975, LR: 0.0003 +[2026-03-01 10:42:23] (step=0022476) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.397573860301311, LR: 0.0003 +[2026-03-01 10:42:31] (step=0022477) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.397769516728625, LR: 0.0003 +[2026-03-01 10:42:39] (step=0022478) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 4.397965173155939, LR: 0.0003 +[2026-03-01 10:42:46] (step=0022479) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 4.398160829583252, LR: 0.0003 +[2026-03-01 10:42:54] (step=0022480) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.398356486010566, LR: 0.0003 +[2026-03-01 10:43:02] (step=0022481) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.398552142437879, LR: 0.0003 +[2026-03-01 10:43:10] (step=0022482) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.398747798865193, LR: 0.0003 +[2026-03-01 10:43:18] (step=0022483) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.398943455292506, LR: 0.0003 +[2026-03-01 10:43:26] (step=0022484) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.39913911171982, LR: 0.0003 +[2026-03-01 10:43:34] (step=0022485) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 4.399334768147134, LR: 0.0003 +[2026-03-01 10:43:41] (step=0022486) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.399530424574447, LR: 0.0003 +[2026-03-01 10:43:49] (step=0022487) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.399726081001761, LR: 0.0003 +[2026-03-01 10:43:57] (step=0022488) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 4.399921737429074, LR: 0.0003 +[2026-03-01 10:44:05] (step=0022489) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 4.400117393856388, LR: 0.0003 +[2026-03-01 10:44:13] (step=0022490) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 4.400313050283702, LR: 0.0003 +[2026-03-01 10:44:21] (step=0022491) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.400508706711015, LR: 0.0003 +[2026-03-01 10:44:28] (step=0022492) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.400704363138329, LR: 0.0003 +[2026-03-01 10:44:36] (step=0022493) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.4009000195656425, LR: 0.0003 +[2026-03-01 10:44:44] (step=0022494) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.4010956759929565, LR: 0.0003 +[2026-03-01 10:44:52] (step=0022495) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.40129133242027, LR: 0.0003 +[2026-03-01 10:45:00] (step=0022496) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.401486988847584, LR: 0.0003 +[2026-03-01 10:45:08] (step=0022497) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.401682645274898, LR: 0.0003 +[2026-03-01 10:45:15] (step=0022498) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.401878301702211, LR: 0.0003 +[2026-03-01 10:45:23] (step=0022499) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.402073958129525, LR: 0.0003 +[2026-03-01 10:45:31] (step=0022500) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.402269614556838, LR: 0.0003 +[2026-03-01 10:45:31] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0022500/ +[2026-03-01 10:45:39] (step=0022501) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.402465270984152, LR: 0.0003 +[2026-03-01 10:45:47] (step=0022502) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.402660927411466, LR: 0.0003 +[2026-03-01 10:45:55] (step=0022503) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 4.402856583838779, LR: 0.0003 +[2026-03-01 10:46:03] (step=0022504) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.403052240266093, LR: 0.0003 +[2026-03-01 10:46:10] (step=0022505) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 4.403247896693406, LR: 0.0003 +[2026-03-01 10:46:18] (step=0022506) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.40344355312072, LR: 0.0003 +[2026-03-01 10:46:26] (step=0022507) Train Loss: 0.4703, Train Steps/Sec: 0.13, Epoch: 4.403639209548033, LR: 0.0003 +[2026-03-01 10:46:34] (step=0022508) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.403834865975347, LR: 0.0003 +[2026-03-01 10:46:42] (step=0022509) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 4.404030522402661, LR: 0.0003 +[2026-03-01 10:46:50] (step=0022510) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.404226178829974, LR: 0.0003 +[2026-03-01 10:46:58] (step=0022511) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.404421835257288, LR: 0.0003 +[2026-03-01 10:47:05] (step=0022512) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.4046174916846015, LR: 0.0003 +[2026-03-01 10:47:13] (step=0022513) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.4048131481119155, LR: 0.0003 +[2026-03-01 10:47:21] (step=0022514) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.4050088045392295, LR: 0.0003 +[2026-03-01 10:47:29] (step=0022515) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.405204460966543, LR: 0.0003 +[2026-03-01 10:47:37] (step=0022516) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.405400117393857, LR: 0.0003 +[2026-03-01 10:47:45] (step=0022517) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.40559577382117, LR: 0.0003 +[2026-03-01 10:47:52] (step=0022518) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.405791430248484, LR: 0.0003 +[2026-03-01 10:48:00] (step=0022519) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.405987086675797, LR: 0.0003 +[2026-03-01 10:48:08] (step=0022520) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 4.406182743103111, LR: 0.0003 +[2026-03-01 10:48:16] (step=0022521) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 4.406378399530425, LR: 0.0003 +[2026-03-01 10:48:24] (step=0022522) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.406574055957738, LR: 0.0003 +[2026-03-01 10:48:32] (step=0022523) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.406769712385052, LR: 0.0003 +[2026-03-01 10:48:39] (step=0022524) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.406965368812365, LR: 0.0003 +[2026-03-01 10:48:47] (step=0022525) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 4.407161025239679, LR: 0.0003 +[2026-03-01 10:48:55] (step=0022526) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.407356681666993, LR: 0.0003 +[2026-03-01 10:49:03] (step=0022527) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.407552338094306, LR: 0.0003 +[2026-03-01 10:49:11] (step=0022528) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 4.40774799452162, LR: 0.0003 +[2026-03-01 10:49:19] (step=0022529) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.407943650948933, LR: 0.0003 +[2026-03-01 10:49:27] (step=0022530) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.408139307376247, LR: 0.0003 +[2026-03-01 10:49:35] (step=0022531) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.408334963803561, LR: 0.0003 +[2026-03-01 10:49:42] (step=0022532) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.4085306202308745, LR: 0.0003 +[2026-03-01 10:49:50] (step=0022533) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.4087262766581885, LR: 0.0003 +[2026-03-01 10:49:58] (step=0022534) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.408921933085502, LR: 0.0003 +[2026-03-01 10:50:06] (step=0022535) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.409117589512816, LR: 0.0003 +[2026-03-01 10:50:14] (step=0022536) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 4.409313245940129, LR: 0.0003 +[2026-03-01 10:50:22] (step=0022537) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.409508902367443, LR: 0.0003 +[2026-03-01 10:50:29] (step=0022538) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.409704558794757, LR: 0.0003 +[2026-03-01 10:50:37] (step=0022539) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.40990021522207, LR: 0.0003 +[2026-03-01 10:50:45] (step=0022540) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.410095871649384, LR: 0.0003 +[2026-03-01 10:50:53] (step=0022541) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 4.410291528076697, LR: 0.0003 +[2026-03-01 10:51:01] (step=0022542) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.410487184504011, LR: 0.0003 +[2026-03-01 10:51:09] (step=0022543) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.410682840931325, LR: 0.0003 +[2026-03-01 10:51:16] (step=0022544) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.410878497358638, LR: 0.0003 +[2026-03-01 10:51:24] (step=0022545) Train Loss: 0.4256, Train Steps/Sec: 0.13, Epoch: 4.411074153785952, LR: 0.0003 +[2026-03-01 10:51:32] (step=0022546) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.411269810213265, LR: 0.0003 +[2026-03-01 10:51:40] (step=0022547) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.411465466640579, LR: 0.0003 +[2026-03-01 10:51:48] (step=0022548) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.411661123067892, LR: 0.0003 +[2026-03-01 10:51:56] (step=0022549) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.411856779495206, LR: 0.0003 +[2026-03-01 10:52:03] (step=0022550) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 4.41205243592252, LR: 0.0003 +[2026-03-01 10:52:11] (step=0022551) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 4.4122480923498335, LR: 0.0003 +[2026-03-01 10:52:19] (step=0022552) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 4.4124437487771475, LR: 0.0003 +[2026-03-01 10:52:27] (step=0022553) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.412639405204461, LR: 0.0003 +[2026-03-01 10:52:35] (step=0022554) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.412835061631775, LR: 0.0003 +[2026-03-01 10:52:43] (step=0022555) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 4.413030718059089, LR: 0.0003 +[2026-03-01 10:52:51] (step=0022556) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.413226374486402, LR: 0.0003 +[2026-03-01 10:52:58] (step=0022557) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.413422030913716, LR: 0.0003 +[2026-03-01 10:53:06] (step=0022558) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.413617687341029, LR: 0.0003 +[2026-03-01 10:53:14] (step=0022559) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.413813343768343, LR: 0.0003 +[2026-03-01 10:53:22] (step=0022560) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.414009000195656, LR: 0.0003 +[2026-03-01 10:53:30] (step=0022561) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.41420465662297, LR: 0.0003 +[2026-03-01 10:53:38] (step=0022562) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 4.414400313050284, LR: 0.0003 +[2026-03-01 10:53:45] (step=0022563) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.414595969477597, LR: 0.0003 +[2026-03-01 10:53:53] (step=0022564) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.414791625904911, LR: 0.0003 +[2026-03-01 10:54:01] (step=0022565) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.414987282332224, LR: 0.0003 +[2026-03-01 10:54:09] (step=0022566) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 4.415182938759538, LR: 0.0003 +[2026-03-01 10:54:17] (step=0022567) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.415378595186852, LR: 0.0003 +[2026-03-01 10:54:25] (step=0022568) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 4.415574251614165, LR: 0.0003 +[2026-03-01 10:54:33] (step=0022569) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.415769908041479, LR: 0.0003 +[2026-03-01 10:54:40] (step=0022570) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.4159655644687925, LR: 0.0003 +[2026-03-01 10:54:48] (step=0022571) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 4.4161612208961065, LR: 0.0003 +[2026-03-01 10:54:56] (step=0022572) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.41635687732342, LR: 0.0003 +[2026-03-01 10:55:04] (step=0022573) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.416552533750734, LR: 0.0003 +[2026-03-01 10:55:12] (step=0022574) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 4.416748190178048, LR: 0.0003 +[2026-03-01 10:55:20] (step=0022575) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.416943846605361, LR: 0.0003 +[2026-03-01 10:55:27] (step=0022576) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 4.417139503032675, LR: 0.0003 +[2026-03-01 10:55:35] (step=0022577) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 4.417335159459988, LR: 0.0003 +[2026-03-01 10:55:43] (step=0022578) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.417530815887302, LR: 0.0003 +[2026-03-01 10:55:51] (step=0022579) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.417726472314616, LR: 0.0003 +[2026-03-01 10:55:59] (step=0022580) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.417922128741929, LR: 0.0003 +[2026-03-01 10:56:07] (step=0022581) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 4.418117785169243, LR: 0.0003 +[2026-03-01 10:56:15] (step=0022582) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.418313441596556, LR: 0.0003 +[2026-03-01 10:56:22] (step=0022583) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.41850909802387, LR: 0.0003 +[2026-03-01 10:56:30] (step=0022584) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.418704754451184, LR: 0.0003 +[2026-03-01 10:56:38] (step=0022585) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.418900410878497, LR: 0.0003 +[2026-03-01 10:56:46] (step=0022586) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.419096067305811, LR: 0.0003 +[2026-03-01 10:56:54] (step=0022587) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.419291723733124, LR: 0.0003 +[2026-03-01 10:57:02] (step=0022588) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.419487380160438, LR: 0.0003 +[2026-03-01 10:57:10] (step=0022589) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.4196830365877515, LR: 0.0003 +[2026-03-01 10:57:17] (step=0022590) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.4198786930150655, LR: 0.0003 +[2026-03-01 10:57:25] (step=0022591) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 4.4200743494423795, LR: 0.0003 +[2026-03-01 10:57:33] (step=0022592) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 4.420270005869693, LR: 0.0003 +[2026-03-01 10:57:41] (step=0022593) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 4.420465662297007, LR: 0.0003 +[2026-03-01 10:57:49] (step=0022594) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 4.42066131872432, LR: 0.0003 +[2026-03-01 10:57:57] (step=0022595) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.420856975151634, LR: 0.0003 +[2026-03-01 10:58:04] (step=0022596) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.421052631578948, LR: 0.0003 +[2026-03-01 10:58:12] (step=0022597) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.421248288006261, LR: 0.0003 +[2026-03-01 10:58:20] (step=0022598) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.421443944433575, LR: 0.0003 +[2026-03-01 10:58:28] (step=0022599) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.421639600860888, LR: 0.0003 +[2026-03-01 10:58:36] (step=0022600) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 4.421835257288202, LR: 0.0003 +[2026-03-01 10:58:44] (step=0022601) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.422030913715515, LR: 0.0003 +[2026-03-01 10:58:51] (step=0022602) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.422226570142829, LR: 0.0003 +[2026-03-01 10:58:59] (step=0022603) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 4.422422226570143, LR: 0.0003 +[2026-03-01 10:59:07] (step=0022604) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 4.422617882997456, LR: 0.0003 +[2026-03-01 10:59:15] (step=0022605) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.42281353942477, LR: 0.0003 +[2026-03-01 10:59:23] (step=0022606) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.423009195852083, LR: 0.0003 +[2026-03-01 10:59:31] (step=0022607) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.423204852279397, LR: 0.0003 +[2026-03-01 10:59:39] (step=0022608) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.423400508706711, LR: 0.0003 +[2026-03-01 10:59:46] (step=0022609) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.4235961651340245, LR: 0.0003 +[2026-03-01 10:59:54] (step=0022610) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.4237918215613385, LR: 0.0003 +[2026-03-01 11:00:02] (step=0022611) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.423987477988652, LR: 0.0003 +[2026-03-01 11:00:10] (step=0022612) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.424183134415966, LR: 0.0003 +[2026-03-01 11:00:18] (step=0022613) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 4.424378790843279, LR: 0.0003 +[2026-03-01 11:00:26] (step=0022614) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.424574447270593, LR: 0.0003 +[2026-03-01 11:00:34] (step=0022615) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.424770103697907, LR: 0.0003 +[2026-03-01 11:00:41] (step=0022616) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.42496576012522, LR: 0.0003 +[2026-03-01 11:00:49] (step=0022617) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.425161416552534, LR: 0.0003 +[2026-03-01 11:00:57] (step=0022618) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.425357072979847, LR: 0.0003 +[2026-03-01 11:01:05] (step=0022619) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 4.425552729407161, LR: 0.0003 +[2026-03-01 11:01:13] (step=0022620) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.425748385834475, LR: 0.0003 +[2026-03-01 11:01:21] (step=0022621) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.425944042261788, LR: 0.0003 +[2026-03-01 11:01:28] (step=0022622) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.426139698689102, LR: 0.0003 +[2026-03-01 11:01:36] (step=0022623) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.426335355116415, LR: 0.0003 +[2026-03-01 11:01:44] (step=0022624) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.426531011543729, LR: 0.0003 +[2026-03-01 11:01:52] (step=0022625) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.426726667971042, LR: 0.0003 +[2026-03-01 11:02:00] (step=0022626) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.426922324398356, LR: 0.0003 +[2026-03-01 11:02:08] (step=0022627) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.42711798082567, LR: 0.0003 +[2026-03-01 11:02:15] (step=0022628) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 4.4273136372529835, LR: 0.0003 +[2026-03-01 11:02:23] (step=0022629) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 4.4275092936802976, LR: 0.0003 +[2026-03-01 11:02:31] (step=0022630) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.427704950107611, LR: 0.0003 +[2026-03-01 11:02:39] (step=0022631) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 4.427900606534925, LR: 0.0003 +[2026-03-01 11:02:47] (step=0022632) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.428096262962239, LR: 0.0003 +[2026-03-01 11:02:55] (step=0022633) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.428291919389552, LR: 0.0003 +[2026-03-01 11:03:03] (step=0022634) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.428487575816866, LR: 0.0003 +[2026-03-01 11:03:10] (step=0022635) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.428683232244179, LR: 0.0003 +[2026-03-01 11:03:18] (step=0022636) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.428878888671493, LR: 0.0003 +[2026-03-01 11:03:26] (step=0022637) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 4.429074545098807, LR: 0.0003 +[2026-03-01 11:03:34] (step=0022638) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 4.42927020152612, LR: 0.0003 +[2026-03-01 11:03:42] (step=0022639) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 4.429465857953434, LR: 0.0003 +[2026-03-01 11:03:50] (step=0022640) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.429661514380747, LR: 0.0003 +[2026-03-01 11:03:57] (step=0022641) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.429857170808061, LR: 0.0003 +[2026-03-01 11:04:05] (step=0022642) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.430052827235374, LR: 0.0003 +[2026-03-01 11:04:13] (step=0022643) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 4.430248483662688, LR: 0.0003 +[2026-03-01 11:04:21] (step=0022644) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.430444140090002, LR: 0.0003 +[2026-03-01 11:04:29] (step=0022645) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 4.430639796517315, LR: 0.0003 +[2026-03-01 11:04:37] (step=0022646) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.430835452944629, LR: 0.0003 +[2026-03-01 11:04:45] (step=0022647) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.4310311093719426, LR: 0.0003 +[2026-03-01 11:04:52] (step=0022648) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 4.431226765799257, LR: 0.0003 +[2026-03-01 11:05:00] (step=0022649) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.431422422226571, LR: 0.0003 +[2026-03-01 11:05:08] (step=0022650) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.431618078653884, LR: 0.0003 +[2026-03-01 11:05:16] (step=0022651) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.431813735081198, LR: 0.0003 +[2026-03-01 11:05:24] (step=0022652) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.432009391508511, LR: 0.0003 +[2026-03-01 11:05:32] (step=0022653) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.432205047935825, LR: 0.0003 +[2026-03-01 11:05:40] (step=0022654) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.432400704363138, LR: 0.0003 +[2026-03-01 11:05:47] (step=0022655) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 4.432596360790452, LR: 0.0003 +[2026-03-01 11:05:55] (step=0022656) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.432792017217766, LR: 0.0003 +[2026-03-01 11:06:03] (step=0022657) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.432987673645079, LR: 0.0003 +[2026-03-01 11:06:11] (step=0022658) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.433183330072393, LR: 0.0003 +[2026-03-01 11:06:19] (step=0022659) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 4.433378986499706, LR: 0.0003 +[2026-03-01 11:06:27] (step=0022660) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.43357464292702, LR: 0.0003 +[2026-03-01 11:06:34] (step=0022661) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.433770299354334, LR: 0.0003 +[2026-03-01 11:06:42] (step=0022662) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.433965955781647, LR: 0.0003 +[2026-03-01 11:06:50] (step=0022663) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 4.434161612208961, LR: 0.0003 +[2026-03-01 11:06:58] (step=0022664) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 4.434357268636274, LR: 0.0003 +[2026-03-01 11:07:06] (step=0022665) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 4.4345529250635884, LR: 0.0003 +[2026-03-01 11:07:14] (step=0022666) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.434748581490902, LR: 0.0003 +[2026-03-01 11:07:21] (step=0022667) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.434944237918216, LR: 0.0003 +[2026-03-01 11:07:29] (step=0022668) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.43513989434553, LR: 0.0003 +[2026-03-01 11:07:37] (step=0022669) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.435335550772843, LR: 0.0003 +[2026-03-01 11:07:45] (step=0022670) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.435531207200157, LR: 0.0003 +[2026-03-01 11:07:53] (step=0022671) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.43572686362747, LR: 0.0003 +[2026-03-01 11:08:01] (step=0022672) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 4.435922520054784, LR: 0.0003 +[2026-03-01 11:08:09] (step=0022673) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 4.436118176482098, LR: 0.0003 +[2026-03-01 11:08:16] (step=0022674) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.436313832909411, LR: 0.0003 +[2026-03-01 11:08:24] (step=0022675) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.436509489336725, LR: 0.0003 +[2026-03-01 11:08:32] (step=0022676) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.436705145764038, LR: 0.0003 +[2026-03-01 11:08:40] (step=0022677) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.436900802191352, LR: 0.0003 +[2026-03-01 11:08:48] (step=0022678) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.437096458618665, LR: 0.0003 +[2026-03-01 11:08:56] (step=0022679) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.437292115045979, LR: 0.0003 +[2026-03-01 11:09:04] (step=0022680) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 4.437487771473293, LR: 0.0003 +[2026-03-01 11:09:11] (step=0022681) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.437683427900606, LR: 0.0003 +[2026-03-01 11:09:19] (step=0022682) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.43787908432792, LR: 0.0003 +[2026-03-01 11:09:27] (step=0022683) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 4.4380747407552334, LR: 0.0003 +[2026-03-01 11:09:35] (step=0022684) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.4382703971825475, LR: 0.0003 +[2026-03-01 11:09:43] (step=0022685) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.4384660536098615, LR: 0.0003 +[2026-03-01 11:09:51] (step=0022686) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 4.438661710037175, LR: 0.0003 +[2026-03-01 11:09:58] (step=0022687) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.438857366464489, LR: 0.0003 +[2026-03-01 11:10:06] (step=0022688) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.439053022891802, LR: 0.0003 +[2026-03-01 11:10:14] (step=0022689) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.439248679319116, LR: 0.0003 +[2026-03-01 11:10:22] (step=0022690) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.439444335746429, LR: 0.0003 +[2026-03-01 11:10:30] (step=0022691) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.439639992173743, LR: 0.0003 +[2026-03-01 11:10:38] (step=0022692) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.439835648601057, LR: 0.0003 +[2026-03-01 11:10:46] (step=0022693) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.44003130502837, LR: 0.0003 +[2026-03-01 11:10:53] (step=0022694) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.440226961455684, LR: 0.0003 +[2026-03-01 11:11:01] (step=0022695) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.440422617882997, LR: 0.0003 +[2026-03-01 11:11:09] (step=0022696) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.440618274310311, LR: 0.0003 +[2026-03-01 11:11:17] (step=0022697) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 4.440813930737625, LR: 0.0003 +[2026-03-01 11:11:25] (step=0022698) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.441009587164938, LR: 0.0003 +[2026-03-01 11:11:33] (step=0022699) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.441205243592252, LR: 0.0003 +[2026-03-01 11:11:40] (step=0022700) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.441400900019565, LR: 0.0003 +[2026-03-01 11:11:48] (step=0022701) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.441596556446879, LR: 0.0003 +[2026-03-01 11:11:56] (step=0022702) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 4.441792212874193, LR: 0.0003 +[2026-03-01 11:12:04] (step=0022703) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.4419878693015065, LR: 0.0003 +[2026-03-01 11:12:12] (step=0022704) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 4.4421835257288205, LR: 0.0003 +[2026-03-01 11:12:20] (step=0022705) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.442379182156134, LR: 0.0003 +[2026-03-01 11:12:27] (step=0022706) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.442574838583448, LR: 0.0003 +[2026-03-01 11:12:35] (step=0022707) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.442770495010761, LR: 0.0003 +[2026-03-01 11:12:43] (step=0022708) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.442966151438075, LR: 0.0003 +[2026-03-01 11:12:51] (step=0022709) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 4.443161807865389, LR: 0.0003 +[2026-03-01 11:12:59] (step=0022710) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.443357464292702, LR: 0.0003 +[2026-03-01 11:13:07] (step=0022711) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.443553120720016, LR: 0.0003 +[2026-03-01 11:13:15] (step=0022712) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.443748777147329, LR: 0.0003 +[2026-03-01 11:13:23] (step=0022713) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 4.443944433574643, LR: 0.0003 +[2026-03-01 11:13:30] (step=0022714) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.444140090001957, LR: 0.0003 +[2026-03-01 11:13:38] (step=0022715) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.44433574642927, LR: 0.0003 +[2026-03-01 11:13:46] (step=0022716) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.444531402856584, LR: 0.0003 +[2026-03-01 11:13:54] (step=0022717) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.444727059283897, LR: 0.0003 +[2026-03-01 11:14:02] (step=0022718) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 4.444922715711211, LR: 0.0003 +[2026-03-01 11:14:10] (step=0022719) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 4.445118372138524, LR: 0.0003 +[2026-03-01 11:14:17] (step=0022720) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.445314028565838, LR: 0.0003 +[2026-03-01 11:14:25] (step=0022721) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.445509684993152, LR: 0.0003 +[2026-03-01 11:14:33] (step=0022722) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.4457053414204655, LR: 0.0003 +[2026-03-01 11:14:41] (step=0022723) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.4459009978477795, LR: 0.0003 +[2026-03-01 11:14:49] (step=0022724) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.446096654275093, LR: 0.0003 +[2026-03-01 11:14:57] (step=0022725) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.446292310702407, LR: 0.0003 +[2026-03-01 11:15:04] (step=0022726) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.446487967129721, LR: 0.0003 +[2026-03-01 11:15:12] (step=0022727) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.446683623557034, LR: 0.0003 +[2026-03-01 11:15:20] (step=0022728) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.446879279984348, LR: 0.0003 +[2026-03-01 11:15:28] (step=0022729) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.447074936411661, LR: 0.0003 +[2026-03-01 11:15:36] (step=0022730) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.447270592838975, LR: 0.0003 +[2026-03-01 11:15:44] (step=0022731) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.447466249266288, LR: 0.0003 +[2026-03-01 11:15:52] (step=0022732) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.447661905693602, LR: 0.0003 +[2026-03-01 11:15:59] (step=0022733) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 4.447857562120916, LR: 0.0003 +[2026-03-01 11:16:07] (step=0022734) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 4.448053218548229, LR: 0.0003 +[2026-03-01 11:16:15] (step=0022735) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 4.448248874975543, LR: 0.0003 +[2026-03-01 11:16:23] (step=0022736) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.448444531402856, LR: 0.0003 +[2026-03-01 11:16:31] (step=0022737) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.44864018783017, LR: 0.0003 +[2026-03-01 11:16:39] (step=0022738) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.448835844257484, LR: 0.0003 +[2026-03-01 11:16:46] (step=0022739) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.449031500684797, LR: 0.0003 +[2026-03-01 11:16:54] (step=0022740) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.449227157112111, LR: 0.0003 +[2026-03-01 11:17:02] (step=0022741) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.4494228135394245, LR: 0.0003 +[2026-03-01 11:17:10] (step=0022742) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.4496184699667385, LR: 0.0003 +[2026-03-01 11:17:18] (step=0022743) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.449814126394052, LR: 0.0003 +[2026-03-01 11:17:26] (step=0022744) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.450009782821366, LR: 0.0003 +[2026-03-01 11:17:34] (step=0022745) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.45020543924868, LR: 0.0003 +[2026-03-01 11:17:41] (step=0022746) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 4.450401095675993, LR: 0.0003 +[2026-03-01 11:17:49] (step=0022747) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 4.450596752103307, LR: 0.0003 +[2026-03-01 11:17:57] (step=0022748) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.45079240853062, LR: 0.0003 +[2026-03-01 11:18:05] (step=0022749) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 4.450988064957934, LR: 0.0003 +[2026-03-01 11:18:13] (step=0022750) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.451183721385248, LR: 0.0003 +[2026-03-01 11:18:21] (step=0022751) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.451379377812561, LR: 0.0003 +[2026-03-01 11:18:28] (step=0022752) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 4.451575034239875, LR: 0.0003 +[2026-03-01 11:18:36] (step=0022753) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.451770690667188, LR: 0.0003 +[2026-03-01 11:18:44] (step=0022754) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.451966347094502, LR: 0.0003 +[2026-03-01 11:18:52] (step=0022755) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 4.452162003521816, LR: 0.0003 +[2026-03-01 11:19:00] (step=0022756) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.452357659949129, LR: 0.0003 +[2026-03-01 11:19:08] (step=0022757) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.452553316376443, LR: 0.0003 +[2026-03-01 11:19:15] (step=0022758) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.452748972803756, LR: 0.0003 +[2026-03-01 11:19:23] (step=0022759) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.45294462923107, LR: 0.0003 +[2026-03-01 11:19:31] (step=0022760) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.4531402856583835, LR: 0.0003 +[2026-03-01 11:19:39] (step=0022761) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.4533359420856975, LR: 0.0003 +[2026-03-01 11:19:47] (step=0022762) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.4535315985130115, LR: 0.0003 +[2026-03-01 11:19:55] (step=0022763) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.453727254940325, LR: 0.0003 +[2026-03-01 11:20:03] (step=0022764) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 4.453922911367639, LR: 0.0003 +[2026-03-01 11:20:10] (step=0022765) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 4.454118567794952, LR: 0.0003 +[2026-03-01 11:20:18] (step=0022766) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.454314224222266, LR: 0.0003 +[2026-03-01 11:20:26] (step=0022767) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.45450988064958, LR: 0.0003 +[2026-03-01 11:20:34] (step=0022768) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.454705537076893, LR: 0.0003 +[2026-03-01 11:20:42] (step=0022769) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.454901193504207, LR: 0.0003 +[2026-03-01 11:20:50] (step=0022770) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.45509684993152, LR: 0.0003 +[2026-03-01 11:20:57] (step=0022771) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.455292506358834, LR: 0.0003 +[2026-03-01 11:21:05] (step=0022772) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 4.455488162786147, LR: 0.0003 +[2026-03-01 11:21:13] (step=0022773) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.455683819213461, LR: 0.0003 +[2026-03-01 11:21:21] (step=0022774) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.455879475640775, LR: 0.0003 +[2026-03-01 11:21:29] (step=0022775) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.456075132068088, LR: 0.0003 +[2026-03-01 11:21:37] (step=0022776) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.456270788495402, LR: 0.0003 +[2026-03-01 11:21:45] (step=0022777) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.456466444922715, LR: 0.0003 +[2026-03-01 11:21:53] (step=0022778) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.456662101350029, LR: 0.0003 +[2026-03-01 11:22:00] (step=0022779) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.456857757777343, LR: 0.0003 +[2026-03-01 11:22:08] (step=0022780) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.4570534142046565, LR: 0.0003 +[2026-03-01 11:22:16] (step=0022781) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.4572490706319705, LR: 0.0003 +[2026-03-01 11:22:24] (step=0022782) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.457444727059284, LR: 0.0003 +[2026-03-01 11:22:32] (step=0022783) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 4.457640383486598, LR: 0.0003 +[2026-03-01 11:22:40] (step=0022784) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.457836039913911, LR: 0.0003 +[2026-03-01 11:22:47] (step=0022785) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.458031696341225, LR: 0.0003 +[2026-03-01 11:22:55] (step=0022786) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.458227352768539, LR: 0.0003 +[2026-03-01 11:23:03] (step=0022787) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.458423009195852, LR: 0.0003 +[2026-03-01 11:23:11] (step=0022788) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 4.458618665623166, LR: 0.0003 +[2026-03-01 11:23:19] (step=0022789) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 4.458814322050479, LR: 0.0003 +[2026-03-01 11:23:27] (step=0022790) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.459009978477793, LR: 0.0003 +[2026-03-01 11:23:34] (step=0022791) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 4.459205634905107, LR: 0.0003 +[2026-03-01 11:23:42] (step=0022792) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.45940129133242, LR: 0.0003 +[2026-03-01 11:23:50] (step=0022793) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 4.459596947759734, LR: 0.0003 +[2026-03-01 11:23:58] (step=0022794) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.459792604187047, LR: 0.0003 +[2026-03-01 11:24:06] (step=0022795) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.459988260614361, LR: 0.0003 +[2026-03-01 11:24:14] (step=0022796) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.460183917041674, LR: 0.0003 +[2026-03-01 11:24:22] (step=0022797) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.460379573468988, LR: 0.0003 +[2026-03-01 11:24:29] (step=0022798) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.460575229896302, LR: 0.0003 +[2026-03-01 11:24:37] (step=0022799) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.4607708863236155, LR: 0.0003 +[2026-03-01 11:24:45] (step=0022800) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.4609665427509295, LR: 0.0003 +[2026-03-01 11:24:53] (step=0022801) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.461162199178243, LR: 0.0003 +[2026-03-01 11:25:01] (step=0022802) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 4.461357855605557, LR: 0.0003 +[2026-03-01 11:25:09] (step=0022803) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.461553512032871, LR: 0.0003 +[2026-03-01 11:25:16] (step=0022804) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.461749168460184, LR: 0.0003 +[2026-03-01 11:25:24] (step=0022805) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.461944824887498, LR: 0.0003 +[2026-03-01 11:25:32] (step=0022806) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.462140481314811, LR: 0.0003 +[2026-03-01 11:25:40] (step=0022807) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.462336137742125, LR: 0.0003 +[2026-03-01 11:25:48] (step=0022808) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.462531794169439, LR: 0.0003 +[2026-03-01 11:25:56] (step=0022809) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.462727450596752, LR: 0.0003 +[2026-03-01 11:26:04] (step=0022810) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.462923107024066, LR: 0.0003 +[2026-03-01 11:26:11] (step=0022811) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.463118763451379, LR: 0.0003 +[2026-03-01 11:26:19] (step=0022812) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 4.463314419878693, LR: 0.0003 +[2026-03-01 11:26:27] (step=0022813) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.463510076306006, LR: 0.0003 +[2026-03-01 11:26:35] (step=0022814) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 4.46370573273332, LR: 0.0003 +[2026-03-01 11:26:43] (step=0022815) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.463901389160634, LR: 0.0003 +[2026-03-01 11:26:51] (step=0022816) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.464097045587947, LR: 0.0003 +[2026-03-01 11:26:59] (step=0022817) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.464292702015261, LR: 0.0003 +[2026-03-01 11:27:07] (step=0022818) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.4644883584425745, LR: 0.0003 +[2026-03-01 11:27:14] (step=0022819) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.4646840148698885, LR: 0.0003 +[2026-03-01 11:27:22] (step=0022820) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 4.4648796712972025, LR: 0.0003 +[2026-03-01 11:27:30] (step=0022821) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 4.465075327724516, LR: 0.0003 +[2026-03-01 11:27:38] (step=0022822) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.46527098415183, LR: 0.0003 +[2026-03-01 11:27:46] (step=0022823) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.465466640579143, LR: 0.0003 +[2026-03-01 11:27:54] (step=0022824) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.465662297006457, LR: 0.0003 +[2026-03-01 11:28:01] (step=0022825) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.46585795343377, LR: 0.0003 +[2026-03-01 11:28:09] (step=0022826) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.466053609861084, LR: 0.0003 +[2026-03-01 11:28:17] (step=0022827) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.466249266288398, LR: 0.0003 +[2026-03-01 11:28:25] (step=0022828) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.466444922715711, LR: 0.0003 +[2026-03-01 11:28:33] (step=0022829) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.466640579143025, LR: 0.0003 +[2026-03-01 11:28:41] (step=0022830) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.466836235570338, LR: 0.0003 +[2026-03-01 11:28:48] (step=0022831) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.467031891997652, LR: 0.0003 +[2026-03-01 11:28:56] (step=0022832) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 4.467227548424966, LR: 0.0003 +[2026-03-01 11:29:04] (step=0022833) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.467423204852279, LR: 0.0003 +[2026-03-01 11:29:12] (step=0022834) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.467618861279593, LR: 0.0003 +[2026-03-01 11:29:20] (step=0022835) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.467814517706906, LR: 0.0003 +[2026-03-01 11:29:28] (step=0022836) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.46801017413422, LR: 0.0003 +[2026-03-01 11:29:35] (step=0022837) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.4682058305615335, LR: 0.0003 +[2026-03-01 11:29:43] (step=0022838) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.4684014869888475, LR: 0.0003 +[2026-03-01 11:29:51] (step=0022839) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.4685971434161615, LR: 0.0003 +[2026-03-01 11:29:59] (step=0022840) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.468792799843475, LR: 0.0003 +[2026-03-01 11:30:07] (step=0022841) Train Loss: 0.4278, Train Steps/Sec: 0.13, Epoch: 4.468988456270789, LR: 0.0003 +[2026-03-01 11:30:15] (step=0022842) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.469184112698102, LR: 0.0003 +[2026-03-01 11:30:23] (step=0022843) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.469379769125416, LR: 0.0003 +[2026-03-01 11:30:30] (step=0022844) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 4.46957542555273, LR: 0.0003 +[2026-03-01 11:30:38] (step=0022845) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 4.469771081980043, LR: 0.0003 +[2026-03-01 11:30:46] (step=0022846) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.469966738407357, LR: 0.0003 +[2026-03-01 11:30:54] (step=0022847) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.47016239483467, LR: 0.0003 +[2026-03-01 11:31:02] (step=0022848) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.470358051261984, LR: 0.0003 +[2026-03-01 11:31:10] (step=0022849) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.470553707689297, LR: 0.0003 +[2026-03-01 11:31:17] (step=0022850) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.470749364116611, LR: 0.0003 +[2026-03-01 11:31:25] (step=0022851) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 4.470945020543925, LR: 0.0003 +[2026-03-01 11:31:33] (step=0022852) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.471140676971238, LR: 0.0003 +[2026-03-01 11:31:41] (step=0022853) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.471336333398552, LR: 0.0003 +[2026-03-01 11:31:49] (step=0022854) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.471531989825865, LR: 0.0003 +[2026-03-01 11:31:57] (step=0022855) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 4.471727646253179, LR: 0.0003 +[2026-03-01 11:32:04] (step=0022856) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.471923302680493, LR: 0.0003 +[2026-03-01 11:32:12] (step=0022857) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.4721189591078065, LR: 0.0003 +[2026-03-01 11:32:20] (step=0022858) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.4723146155351206, LR: 0.0003 +[2026-03-01 11:32:28] (step=0022859) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.472510271962434, LR: 0.0003 +[2026-03-01 11:32:36] (step=0022860) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 4.472705928389748, LR: 0.0003 +[2026-03-01 11:32:44] (step=0022861) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 4.472901584817062, LR: 0.0003 +[2026-03-01 11:32:52] (step=0022862) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.473097241244375, LR: 0.0003 +[2026-03-01 11:32:59] (step=0022863) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.473292897671689, LR: 0.0003 +[2026-03-01 11:33:07] (step=0022864) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.473488554099002, LR: 0.0003 +[2026-03-01 11:33:15] (step=0022865) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 4.473684210526316, LR: 0.0003 +[2026-03-01 11:33:23] (step=0022866) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.473879866953629, LR: 0.0003 +[2026-03-01 11:33:31] (step=0022867) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.474075523380943, LR: 0.0003 +[2026-03-01 11:33:39] (step=0022868) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.474271179808257, LR: 0.0003 +[2026-03-01 11:33:47] (step=0022869) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.47446683623557, LR: 0.0003 +[2026-03-01 11:33:54] (step=0022870) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.474662492662884, LR: 0.0003 +[2026-03-01 11:34:02] (step=0022871) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.474858149090197, LR: 0.0003 +[2026-03-01 11:34:10] (step=0022872) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.475053805517511, LR: 0.0003 +[2026-03-01 11:34:18] (step=0022873) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.475249461944825, LR: 0.0003 +[2026-03-01 11:34:26] (step=0022874) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.475445118372138, LR: 0.0003 +[2026-03-01 11:34:34] (step=0022875) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.475640774799452, LR: 0.0003 +[2026-03-01 11:34:42] (step=0022876) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.4758364312267656, LR: 0.0003 +[2026-03-01 11:34:49] (step=0022877) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.47603208765408, LR: 0.0003 +[2026-03-01 11:34:57] (step=0022878) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.476227744081393, LR: 0.0003 +[2026-03-01 11:35:05] (step=0022879) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 4.476423400508707, LR: 0.0003 +[2026-03-01 11:35:13] (step=0022880) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.476619056936021, LR: 0.0003 +[2026-03-01 11:35:21] (step=0022881) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.476814713363334, LR: 0.0003 +[2026-03-01 11:35:29] (step=0022882) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.477010369790648, LR: 0.0003 +[2026-03-01 11:35:36] (step=0022883) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 4.477206026217961, LR: 0.0003 +[2026-03-01 11:35:44] (step=0022884) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.477401682645275, LR: 0.0003 +[2026-03-01 11:35:52] (step=0022885) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.477597339072589, LR: 0.0003 +[2026-03-01 11:36:00] (step=0022886) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.477792995499902, LR: 0.0003 +[2026-03-01 11:36:08] (step=0022887) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.477988651927216, LR: 0.0003 +[2026-03-01 11:36:16] (step=0022888) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 4.478184308354529, LR: 0.0003 +[2026-03-01 11:36:23] (step=0022889) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.478379964781843, LR: 0.0003 +[2026-03-01 11:36:31] (step=0022890) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 4.478575621209156, LR: 0.0003 +[2026-03-01 11:36:39] (step=0022891) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.47877127763647, LR: 0.0003 +[2026-03-01 11:36:47] (step=0022892) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.478966934063784, LR: 0.0003 +[2026-03-01 11:36:55] (step=0022893) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.479162590491097, LR: 0.0003 +[2026-03-01 11:37:03] (step=0022894) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.4793582469184114, LR: 0.0003 +[2026-03-01 11:37:11] (step=0022895) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.479553903345725, LR: 0.0003 +[2026-03-01 11:37:18] (step=0022896) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.479749559773039, LR: 0.0003 +[2026-03-01 11:37:26] (step=0022897) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.479945216200353, LR: 0.0003 +[2026-03-01 11:37:34] (step=0022898) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.480140872627666, LR: 0.0003 +[2026-03-01 11:37:42] (step=0022899) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.48033652905498, LR: 0.0003 +[2026-03-01 11:37:50] (step=0022900) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.480532185482293, LR: 0.0003 +[2026-03-01 11:37:58] (step=0022901) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 4.480727841909607, LR: 0.0003 +[2026-03-01 11:38:05] (step=0022902) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 4.48092349833692, LR: 0.0003 +[2026-03-01 11:38:13] (step=0022903) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.481119154764234, LR: 0.0003 +[2026-03-01 11:38:21] (step=0022904) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.481314811191548, LR: 0.0003 +[2026-03-01 11:38:29] (step=0022905) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.481510467618861, LR: 0.0003 +[2026-03-01 11:38:37] (step=0022906) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.481706124046175, LR: 0.0003 +[2026-03-01 11:38:45] (step=0022907) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 4.481901780473488, LR: 0.0003 +[2026-03-01 11:38:52] (step=0022908) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.482097436900802, LR: 0.0003 +[2026-03-01 11:39:00] (step=0022909) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.482293093328116, LR: 0.0003 +[2026-03-01 11:39:08] (step=0022910) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.482488749755429, LR: 0.0003 +[2026-03-01 11:39:16] (step=0022911) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.482684406182743, LR: 0.0003 +[2026-03-01 11:39:24] (step=0022912) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 4.4828800626100564, LR: 0.0003 +[2026-03-01 11:39:32] (step=0022913) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.4830757190373705, LR: 0.0003 +[2026-03-01 11:39:40] (step=0022914) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.483271375464684, LR: 0.0003 +[2026-03-01 11:39:47] (step=0022915) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.483467031891998, LR: 0.0003 +[2026-03-01 11:39:55] (step=0022916) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 4.483662688319312, LR: 0.0003 +[2026-03-01 11:40:03] (step=0022917) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.483858344746625, LR: 0.0003 +[2026-03-01 11:40:11] (step=0022918) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.484054001173939, LR: 0.0003 +[2026-03-01 11:40:19] (step=0022919) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.484249657601252, LR: 0.0003 +[2026-03-01 11:40:27] (step=0022920) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.484445314028566, LR: 0.0003 +[2026-03-01 11:40:35] (step=0022921) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 4.48464097045588, LR: 0.0003 +[2026-03-01 11:40:42] (step=0022922) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.484836626883193, LR: 0.0003 +[2026-03-01 11:40:50] (step=0022923) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 4.485032283310507, LR: 0.0003 +[2026-03-01 11:40:58] (step=0022924) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.48522793973782, LR: 0.0003 +[2026-03-01 11:41:06] (step=0022925) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 4.485423596165134, LR: 0.0003 +[2026-03-01 11:41:14] (step=0022926) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 4.485619252592448, LR: 0.0003 +[2026-03-01 11:41:22] (step=0022927) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.485814909019761, LR: 0.0003 +[2026-03-01 11:41:30] (step=0022928) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.486010565447075, LR: 0.0003 +[2026-03-01 11:41:37] (step=0022929) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.486206221874388, LR: 0.0003 +[2026-03-01 11:41:45] (step=0022930) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.486401878301702, LR: 0.0003 +[2026-03-01 11:41:53] (step=0022931) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.4865975347290155, LR: 0.0003 +[2026-03-01 11:42:01] (step=0022932) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.4867931911563295, LR: 0.0003 +[2026-03-01 11:42:09] (step=0022933) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.4869888475836435, LR: 0.0003 +[2026-03-01 11:42:17] (step=0022934) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.487184504010957, LR: 0.0003 +[2026-03-01 11:42:24] (step=0022935) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 4.487380160438271, LR: 0.0003 +[2026-03-01 11:42:32] (step=0022936) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.487575816865584, LR: 0.0003 +[2026-03-01 11:42:40] (step=0022937) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.487771473292898, LR: 0.0003 +[2026-03-01 11:42:48] (step=0022938) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.487967129720212, LR: 0.0003 +[2026-03-01 11:42:56] (step=0022939) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.488162786147525, LR: 0.0003 +[2026-03-01 11:43:04] (step=0022940) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 4.488358442574839, LR: 0.0003 +[2026-03-01 11:43:11] (step=0022941) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.488554099002152, LR: 0.0003 +[2026-03-01 11:43:19] (step=0022942) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 4.488749755429466, LR: 0.0003 +[2026-03-01 11:43:27] (step=0022943) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.488945411856779, LR: 0.0003 +[2026-03-01 11:43:35] (step=0022944) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 4.489141068284093, LR: 0.0003 +[2026-03-01 11:43:43] (step=0022945) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.489336724711407, LR: 0.0003 +[2026-03-01 11:43:51] (step=0022946) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.48953238113872, LR: 0.0003 +[2026-03-01 11:43:58] (step=0022947) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 4.489728037566034, LR: 0.0003 +[2026-03-01 11:44:06] (step=0022948) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.489923693993347, LR: 0.0003 +[2026-03-01 11:44:14] (step=0022949) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.490119350420661, LR: 0.0003 +[2026-03-01 11:44:22] (step=0022950) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.490315006847975, LR: 0.0003 +[2026-03-01 11:44:30] (step=0022951) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.4905106632752885, LR: 0.0003 +[2026-03-01 11:44:38] (step=0022952) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.4907063197026025, LR: 0.0003 +[2026-03-01 11:44:45] (step=0022953) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 4.490901976129916, LR: 0.0003 +[2026-03-01 11:44:53] (step=0022954) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 4.49109763255723, LR: 0.0003 +[2026-03-01 11:45:01] (step=0022955) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.491293288984543, LR: 0.0003 +[2026-03-01 11:45:09] (step=0022956) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 4.491488945411857, LR: 0.0003 +[2026-03-01 11:45:17] (step=0022957) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.491684601839171, LR: 0.0003 +[2026-03-01 11:45:25] (step=0022958) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.491880258266484, LR: 0.0003 +[2026-03-01 11:45:33] (step=0022959) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.492075914693798, LR: 0.0003 +[2026-03-01 11:45:40] (step=0022960) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 4.492271571121111, LR: 0.0003 +[2026-03-01 11:45:48] (step=0022961) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 4.492467227548425, LR: 0.0003 +[2026-03-01 11:45:56] (step=0022962) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.492662883975739, LR: 0.0003 +[2026-03-01 11:46:04] (step=0022963) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.492858540403052, LR: 0.0003 +[2026-03-01 11:46:12] (step=0022964) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.493054196830366, LR: 0.0003 +[2026-03-01 11:46:20] (step=0022965) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.493249853257679, LR: 0.0003 +[2026-03-01 11:46:27] (step=0022966) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.493445509684993, LR: 0.0003 +[2026-03-01 11:46:35] (step=0022967) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.493641166112306, LR: 0.0003 +[2026-03-01 11:46:43] (step=0022968) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 4.49383682253962, LR: 0.0003 +[2026-03-01 11:46:51] (step=0022969) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.494032478966934, LR: 0.0003 +[2026-03-01 11:46:59] (step=0022970) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.4942281353942475, LR: 0.0003 +[2026-03-01 11:47:07] (step=0022971) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.4944237918215615, LR: 0.0003 +[2026-03-01 11:47:15] (step=0022972) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.494619448248875, LR: 0.0003 +[2026-03-01 11:47:23] (step=0022973) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.494815104676189, LR: 0.0003 +[2026-03-01 11:47:30] (step=0022974) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.495010761103503, LR: 0.0003 +[2026-03-01 11:47:38] (step=0022975) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.495206417530816, LR: 0.0003 +[2026-03-01 11:47:46] (step=0022976) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.49540207395813, LR: 0.0003 +[2026-03-01 11:47:54] (step=0022977) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.495597730385443, LR: 0.0003 +[2026-03-01 11:48:02] (step=0022978) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.495793386812757, LR: 0.0003 +[2026-03-01 11:48:10] (step=0022979) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.495989043240071, LR: 0.0003 +[2026-03-01 11:48:17] (step=0022980) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.496184699667384, LR: 0.0003 +[2026-03-01 11:48:25] (step=0022981) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.496380356094698, LR: 0.0003 +[2026-03-01 11:48:33] (step=0022982) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.496576012522011, LR: 0.0003 +[2026-03-01 11:48:41] (step=0022983) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 4.496771668949325, LR: 0.0003 +[2026-03-01 11:48:49] (step=0022984) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.496967325376638, LR: 0.0003 +[2026-03-01 11:48:57] (step=0022985) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.497162981803952, LR: 0.0003 +[2026-03-01 11:49:05] (step=0022986) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 4.497358638231266, LR: 0.0003 +[2026-03-01 11:49:12] (step=0022987) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 4.497554294658579, LR: 0.0003 +[2026-03-01 11:49:20] (step=0022988) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.497749951085893, LR: 0.0003 +[2026-03-01 11:49:28] (step=0022989) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.4979456075132065, LR: 0.0003 +[2026-03-01 11:49:36] (step=0022990) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.4981412639405205, LR: 0.0003 +[2026-03-01 11:49:44] (step=0022991) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.4983369203678345, LR: 0.0003 +[2026-03-01 11:49:52] (step=0022992) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 4.498532576795148, LR: 0.0003 +[2026-03-01 11:49:59] (step=0022993) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.498728233222462, LR: 0.0003 +[2026-03-01 11:50:07] (step=0022994) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.498923889649775, LR: 0.0003 +[2026-03-01 11:50:15] (step=0022995) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 4.499119546077089, LR: 0.0003 +[2026-03-01 11:50:23] (step=0022996) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 4.499315202504402, LR: 0.0003 +[2026-03-01 11:50:31] (step=0022997) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.499510858931716, LR: 0.0003 +[2026-03-01 11:50:39] (step=0022998) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.49970651535903, LR: 0.0003 +[2026-03-01 11:50:46] (step=0022999) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 4.499902171786343, LR: 0.0003 +[2026-03-01 11:50:54] (step=0023000) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.500097828213657, LR: 0.0003 +[2026-03-01 11:50:54] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0023000/ +[2026-03-01 11:51:02] (step=0023001) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.50029348464097, LR: 0.0003 +[2026-03-01 11:51:10] (step=0023002) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.500489141068284, LR: 0.0003 +[2026-03-01 11:51:18] (step=0023003) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 4.500684797495598, LR: 0.0003 +[2026-03-01 11:51:26] (step=0023004) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.500880453922911, LR: 0.0003 +[2026-03-01 11:51:34] (step=0023005) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.501076110350225, LR: 0.0003 +[2026-03-01 11:51:41] (step=0023006) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.501271766777538, LR: 0.0003 +[2026-03-01 11:51:49] (step=0023007) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.501467423204852, LR: 0.0003 +[2026-03-01 11:51:57] (step=0023008) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 4.5016630796321655, LR: 0.0003 +[2026-03-01 11:52:05] (step=0023009) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.5018587360594795, LR: 0.0003 +[2026-03-01 11:52:13] (step=0023010) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.5020543924867935, LR: 0.0003 +[2026-03-01 11:52:21] (step=0023011) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 4.502250048914107, LR: 0.0003 +[2026-03-01 11:52:28] (step=0023012) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.502445705341421, LR: 0.0003 +[2026-03-01 11:52:36] (step=0023013) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 4.502641361768734, LR: 0.0003 +[2026-03-01 11:52:44] (step=0023014) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.502837018196048, LR: 0.0003 +[2026-03-01 11:52:52] (step=0023015) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.503032674623362, LR: 0.0003 +[2026-03-01 11:53:00] (step=0023016) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.503228331050675, LR: 0.0003 +[2026-03-01 11:53:08] (step=0023017) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 4.503423987477989, LR: 0.0003 +[2026-03-01 11:53:16] (step=0023018) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.503619643905302, LR: 0.0003 +[2026-03-01 11:53:23] (step=0023019) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.503815300332616, LR: 0.0003 +[2026-03-01 11:53:31] (step=0023020) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.504010956759929, LR: 0.0003 +[2026-03-01 11:53:39] (step=0023021) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.504206613187243, LR: 0.0003 +[2026-03-01 11:53:47] (step=0023022) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.504402269614557, LR: 0.0003 +[2026-03-01 11:53:55] (step=0023023) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.50459792604187, LR: 0.0003 +[2026-03-01 11:54:03] (step=0023024) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.504793582469184, LR: 0.0003 +[2026-03-01 11:54:11] (step=0023025) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 4.504989238896497, LR: 0.0003 +[2026-03-01 11:54:18] (step=0023026) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.505184895323811, LR: 0.0003 +[2026-03-01 11:54:26] (step=0023027) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.505380551751125, LR: 0.0003 +[2026-03-01 11:54:34] (step=0023028) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 4.5055762081784385, LR: 0.0003 +[2026-03-01 11:54:42] (step=0023029) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 4.5057718646057525, LR: 0.0003 +[2026-03-01 11:54:50] (step=0023030) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 4.505967521033066, LR: 0.0003 +[2026-03-01 11:54:58] (step=0023031) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 4.50616317746038, LR: 0.0003 +[2026-03-01 11:55:05] (step=0023032) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.506358833887694, LR: 0.0003 +[2026-03-01 11:55:13] (step=0023033) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.506554490315007, LR: 0.0003 +[2026-03-01 11:55:21] (step=0023034) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.506750146742321, LR: 0.0003 +[2026-03-01 11:55:29] (step=0023035) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.506945803169634, LR: 0.0003 +[2026-03-01 11:55:37] (step=0023036) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 4.507141459596948, LR: 0.0003 +[2026-03-01 11:55:45] (step=0023037) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.507337116024261, LR: 0.0003 +[2026-03-01 11:55:52] (step=0023038) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 4.507532772451575, LR: 0.0003 +[2026-03-01 11:56:00] (step=0023039) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.507728428878889, LR: 0.0003 +[2026-03-01 11:56:08] (step=0023040) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 4.507924085306202, LR: 0.0003 +[2026-03-01 11:56:16] (step=0023041) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 4.508119741733516, LR: 0.0003 +[2026-03-01 11:56:24] (step=0023042) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 4.508315398160829, LR: 0.0003 +[2026-03-01 11:56:32] (step=0023043) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.508511054588143, LR: 0.0003 +[2026-03-01 11:56:40] (step=0023044) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.508706711015457, LR: 0.0003 +[2026-03-01 11:56:47] (step=0023045) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 4.50890236744277, LR: 0.0003 +[2026-03-01 11:56:55] (step=0023046) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.509098023870084, LR: 0.0003 +[2026-03-01 11:57:03] (step=0023047) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.5092936802973975, LR: 0.0003 +[2026-03-01 11:57:11] (step=0023048) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.5094893367247115, LR: 0.0003 +[2026-03-01 11:57:19] (step=0023049) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.509684993152025, LR: 0.0003 +[2026-03-01 11:57:27] (step=0023050) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 4.509880649579339, LR: 0.0003 +[2026-03-01 11:57:34] (step=0023051) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.510076306006653, LR: 0.0003 +[2026-03-01 11:57:42] (step=0023052) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.510271962433966, LR: 0.0003 +[2026-03-01 11:57:50] (step=0023053) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.51046761886128, LR: 0.0003 +[2026-03-01 11:57:58] (step=0023054) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.510663275288593, LR: 0.0003 +[2026-03-01 11:58:06] (step=0023055) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.510858931715907, LR: 0.0003 +[2026-03-01 11:58:14] (step=0023056) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.511054588143221, LR: 0.0003 +[2026-03-01 11:58:21] (step=0023057) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.511250244570534, LR: 0.0003 +[2026-03-01 11:58:29] (step=0023058) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.511445900997848, LR: 0.0003 +[2026-03-01 11:58:37] (step=0023059) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.511641557425161, LR: 0.0003 +[2026-03-01 11:58:45] (step=0023060) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.511837213852475, LR: 0.0003 +[2026-03-01 11:58:53] (step=0023061) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.512032870279788, LR: 0.0003 +[2026-03-01 11:59:01] (step=0023062) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 4.512228526707102, LR: 0.0003 +[2026-03-01 11:59:09] (step=0023063) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 4.512424183134416, LR: 0.0003 +[2026-03-01 11:59:16] (step=0023064) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.512619839561729, LR: 0.0003 +[2026-03-01 11:59:24] (step=0023065) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.512815495989043, LR: 0.0003 +[2026-03-01 11:59:32] (step=0023066) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.5130111524163565, LR: 0.0003 +[2026-03-01 11:59:40] (step=0023067) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 4.5132068088436705, LR: 0.0003 +[2026-03-01 11:59:48] (step=0023068) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.5134024652709845, LR: 0.0003 +[2026-03-01 11:59:56] (step=0023069) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.513598121698298, LR: 0.0003 +[2026-03-01 12:00:04] (step=0023070) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.513793778125612, LR: 0.0003 +[2026-03-01 12:00:11] (step=0023071) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.513989434552925, LR: 0.0003 +[2026-03-01 12:00:19] (step=0023072) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.514185090980239, LR: 0.0003 +[2026-03-01 12:00:27] (step=0023073) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.514380747407552, LR: 0.0003 +[2026-03-01 12:00:35] (step=0023074) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.514576403834866, LR: 0.0003 +[2026-03-01 12:00:43] (step=0023075) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.51477206026218, LR: 0.0003 +[2026-03-01 12:00:51] (step=0023076) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.514967716689493, LR: 0.0003 +[2026-03-01 12:00:59] (step=0023077) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.515163373116807, LR: 0.0003 +[2026-03-01 12:01:06] (step=0023078) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.51535902954412, LR: 0.0003 +[2026-03-01 12:01:14] (step=0023079) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.515554685971434, LR: 0.0003 +[2026-03-01 12:01:22] (step=0023080) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.515750342398748, LR: 0.0003 +[2026-03-01 12:01:30] (step=0023081) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.515945998826061, LR: 0.0003 +[2026-03-01 12:01:38] (step=0023082) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 4.516141655253375, LR: 0.0003 +[2026-03-01 12:01:46] (step=0023083) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 4.516337311680688, LR: 0.0003 +[2026-03-01 12:01:53] (step=0023084) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.516532968108002, LR: 0.0003 +[2026-03-01 12:02:01] (step=0023085) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.516728624535316, LR: 0.0003 +[2026-03-01 12:02:09] (step=0023086) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.5169242809626295, LR: 0.0003 +[2026-03-01 12:02:17] (step=0023087) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.5171199373899436, LR: 0.0003 +[2026-03-01 12:02:25] (step=0023088) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.517315593817257, LR: 0.0003 +[2026-03-01 12:02:33] (step=0023089) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 4.517511250244571, LR: 0.0003 +[2026-03-01 12:02:40] (step=0023090) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.517706906671884, LR: 0.0003 +[2026-03-01 12:02:48] (step=0023091) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.517902563099198, LR: 0.0003 +[2026-03-01 12:02:56] (step=0023092) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 4.518098219526512, LR: 0.0003 +[2026-03-01 12:03:04] (step=0023093) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.518293875953825, LR: 0.0003 +[2026-03-01 12:03:12] (step=0023094) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.518489532381139, LR: 0.0003 +[2026-03-01 12:03:20] (step=0023095) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.518685188808452, LR: 0.0003 +[2026-03-01 12:03:27] (step=0023096) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.518880845235766, LR: 0.0003 +[2026-03-01 12:03:35] (step=0023097) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.51907650166308, LR: 0.0003 +[2026-03-01 12:03:43] (step=0023098) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.519272158090393, LR: 0.0003 +[2026-03-01 12:03:51] (step=0023099) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 4.519467814517707, LR: 0.0003 +[2026-03-01 12:03:59] (step=0023100) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.51966347094502, LR: 0.0003 +[2026-03-01 12:04:07] (step=0023101) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.519859127372334, LR: 0.0003 +[2026-03-01 12:04:15] (step=0023102) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.520054783799647, LR: 0.0003 +[2026-03-01 12:04:22] (step=0023103) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 4.520250440226961, LR: 0.0003 +[2026-03-01 12:04:30] (step=0023104) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.520446096654275, LR: 0.0003 +[2026-03-01 12:04:38] (step=0023105) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.5206417530815886, LR: 0.0003 +[2026-03-01 12:04:46] (step=0023106) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.520837409508903, LR: 0.0003 +[2026-03-01 12:04:54] (step=0023107) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.521033065936216, LR: 0.0003 +[2026-03-01 12:05:02] (step=0023108) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 4.52122872236353, LR: 0.0003 +[2026-03-01 12:05:09] (step=0023109) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.521424378790844, LR: 0.0003 +[2026-03-01 12:05:17] (step=0023110) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.521620035218157, LR: 0.0003 +[2026-03-01 12:05:25] (step=0023111) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.521815691645471, LR: 0.0003 +[2026-03-01 12:05:33] (step=0023112) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.522011348072784, LR: 0.0003 +[2026-03-01 12:05:41] (step=0023113) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.522207004500098, LR: 0.0003 +[2026-03-01 12:05:49] (step=0023114) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 4.522402660927411, LR: 0.0003 +[2026-03-01 12:05:57] (step=0023115) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 4.522598317354725, LR: 0.0003 +[2026-03-01 12:06:04] (step=0023116) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.522793973782039, LR: 0.0003 +[2026-03-01 12:06:12] (step=0023117) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.522989630209352, LR: 0.0003 +[2026-03-01 12:06:20] (step=0023118) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.523185286636666, LR: 0.0003 +[2026-03-01 12:06:28] (step=0023119) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.523380943063979, LR: 0.0003 +[2026-03-01 12:06:36] (step=0023120) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.523576599491293, LR: 0.0003 +[2026-03-01 12:06:44] (step=0023121) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.523772255918607, LR: 0.0003 +[2026-03-01 12:06:52] (step=0023122) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.52396791234592, LR: 0.0003 +[2026-03-01 12:06:59] (step=0023123) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 4.5241635687732344, LR: 0.0003 +[2026-03-01 12:07:07] (step=0023124) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.524359225200548, LR: 0.0003 +[2026-03-01 12:07:15] (step=0023125) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.524554881627862, LR: 0.0003 +[2026-03-01 12:07:23] (step=0023126) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.524750538055175, LR: 0.0003 +[2026-03-01 12:07:31] (step=0023127) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.524946194482489, LR: 0.0003 +[2026-03-01 12:07:39] (step=0023128) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.525141850909803, LR: 0.0003 +[2026-03-01 12:07:46] (step=0023129) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.525337507337116, LR: 0.0003 +[2026-03-01 12:07:54] (step=0023130) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.52553316376443, LR: 0.0003 +[2026-03-01 12:08:02] (step=0023131) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.525728820191743, LR: 0.0003 +[2026-03-01 12:08:10] (step=0023132) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.525924476619057, LR: 0.0003 +[2026-03-01 12:08:18] (step=0023133) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.526120133046371, LR: 0.0003 +[2026-03-01 12:08:26] (step=0023134) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 4.526315789473684, LR: 0.0003 +[2026-03-01 12:08:34] (step=0023135) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.526511445900998, LR: 0.0003 +[2026-03-01 12:08:41] (step=0023136) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.526707102328311, LR: 0.0003 +[2026-03-01 12:08:49] (step=0023137) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.526902758755625, LR: 0.0003 +[2026-03-01 12:08:57] (step=0023138) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.527098415182938, LR: 0.0003 +[2026-03-01 12:09:05] (step=0023139) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.527294071610252, LR: 0.0003 +[2026-03-01 12:09:13] (step=0023140) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.527489728037566, LR: 0.0003 +[2026-03-01 12:09:21] (step=0023141) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.5276853844648794, LR: 0.0003 +[2026-03-01 12:09:28] (step=0023142) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.5278810408921935, LR: 0.0003 +[2026-03-01 12:09:36] (step=0023143) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.528076697319507, LR: 0.0003 +[2026-03-01 12:09:44] (step=0023144) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.528272353746821, LR: 0.0003 +[2026-03-01 12:09:52] (step=0023145) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 4.528468010174135, LR: 0.0003 +[2026-03-01 12:10:00] (step=0023146) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.528663666601448, LR: 0.0003 +[2026-03-01 12:10:08] (step=0023147) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.528859323028762, LR: 0.0003 +[2026-03-01 12:10:15] (step=0023148) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.529054979456075, LR: 0.0003 +[2026-03-01 12:10:23] (step=0023149) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.529250635883389, LR: 0.0003 +[2026-03-01 12:10:31] (step=0023150) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.529446292310703, LR: 0.0003 +[2026-03-01 12:10:39] (step=0023151) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.529641948738016, LR: 0.0003 +[2026-03-01 12:10:47] (step=0023152) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.52983760516533, LR: 0.0003 +[2026-03-01 12:10:55] (step=0023153) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.530033261592643, LR: 0.0003 +[2026-03-01 12:11:02] (step=0023154) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 4.530228918019957, LR: 0.0003 +[2026-03-01 12:11:10] (step=0023155) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.53042457444727, LR: 0.0003 +[2026-03-01 12:11:18] (step=0023156) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.530620230874584, LR: 0.0003 +[2026-03-01 12:11:26] (step=0023157) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.530815887301898, LR: 0.0003 +[2026-03-01 12:11:34] (step=0023158) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.531011543729211, LR: 0.0003 +[2026-03-01 12:11:42] (step=0023159) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 4.531207200156525, LR: 0.0003 +[2026-03-01 12:11:49] (step=0023160) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.5314028565838385, LR: 0.0003 +[2026-03-01 12:11:57] (step=0023161) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.5315985130111525, LR: 0.0003 +[2026-03-01 12:12:05] (step=0023162) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.5317941694384665, LR: 0.0003 +[2026-03-01 12:12:13] (step=0023163) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.53198982586578, LR: 0.0003 +[2026-03-01 12:12:21] (step=0023164) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.532185482293094, LR: 0.0003 +[2026-03-01 12:12:29] (step=0023165) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.532381138720407, LR: 0.0003 +[2026-03-01 12:12:37] (step=0023166) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 4.532576795147721, LR: 0.0003 +[2026-03-01 12:12:44] (step=0023167) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.532772451575034, LR: 0.0003 +[2026-03-01 12:12:52] (step=0023168) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.532968108002348, LR: 0.0003 +[2026-03-01 12:13:00] (step=0023169) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.533163764429662, LR: 0.0003 +[2026-03-01 12:13:08] (step=0023170) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.533359420856975, LR: 0.0003 +[2026-03-01 12:13:16] (step=0023171) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 4.533555077284289, LR: 0.0003 +[2026-03-01 12:13:24] (step=0023172) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.533750733711602, LR: 0.0003 +[2026-03-01 12:13:32] (step=0023173) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.533946390138916, LR: 0.0003 +[2026-03-01 12:13:39] (step=0023174) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.53414204656623, LR: 0.0003 +[2026-03-01 12:13:47] (step=0023175) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.534337702993543, LR: 0.0003 +[2026-03-01 12:13:55] (step=0023176) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.534533359420857, LR: 0.0003 +[2026-03-01 12:14:03] (step=0023177) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.53472901584817, LR: 0.0003 +[2026-03-01 12:14:11] (step=0023178) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.534924672275484, LR: 0.0003 +[2026-03-01 12:14:19] (step=0023179) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.5351203287027975, LR: 0.0003 +[2026-03-01 12:14:26] (step=0023180) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.5353159851301115, LR: 0.0003 +[2026-03-01 12:14:34] (step=0023181) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.5355116415574255, LR: 0.0003 +[2026-03-01 12:14:42] (step=0023182) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.535707297984739, LR: 0.0003 +[2026-03-01 12:14:50] (step=0023183) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.535902954412053, LR: 0.0003 +[2026-03-01 12:14:58] (step=0023184) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.536098610839366, LR: 0.0003 +[2026-03-01 12:15:06] (step=0023185) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.53629426726668, LR: 0.0003 +[2026-03-01 12:15:13] (step=0023186) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.536489923693994, LR: 0.0003 +[2026-03-01 12:15:21] (step=0023187) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.536685580121307, LR: 0.0003 +[2026-03-01 12:15:29] (step=0023188) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.536881236548621, LR: 0.0003 +[2026-03-01 12:15:37] (step=0023189) Train Loss: 0.4250, Train Steps/Sec: 0.13, Epoch: 4.537076892975934, LR: 0.0003 +[2026-03-01 12:15:45] (step=0023190) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.537272549403248, LR: 0.0003 +[2026-03-01 12:15:53] (step=0023191) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 4.537468205830561, LR: 0.0003 +[2026-03-01 12:16:01] (step=0023192) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.537663862257875, LR: 0.0003 +[2026-03-01 12:16:08] (step=0023193) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 4.537859518685189, LR: 0.0003 +[2026-03-01 12:16:16] (step=0023194) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.538055175112502, LR: 0.0003 +[2026-03-01 12:16:24] (step=0023195) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 4.538250831539816, LR: 0.0003 +[2026-03-01 12:16:32] (step=0023196) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 4.538446487967129, LR: 0.0003 +[2026-03-01 12:16:40] (step=0023197) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.538642144394443, LR: 0.0003 +[2026-03-01 12:16:48] (step=0023198) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.538837800821757, LR: 0.0003 +[2026-03-01 12:16:55] (step=0023199) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.5390334572490705, LR: 0.0003 +[2026-03-01 12:17:03] (step=0023200) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.5392291136763845, LR: 0.0003 +[2026-03-01 12:17:11] (step=0023201) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 4.539424770103698, LR: 0.0003 +[2026-03-01 12:17:19] (step=0023202) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.539620426531012, LR: 0.0003 +[2026-03-01 12:17:27] (step=0023203) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.539816082958326, LR: 0.0003 +[2026-03-01 12:17:35] (step=0023204) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 4.540011739385639, LR: 0.0003 +[2026-03-01 12:17:42] (step=0023205) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 4.540207395812953, LR: 0.0003 +[2026-03-01 12:17:50] (step=0023206) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.540403052240266, LR: 0.0003 +[2026-03-01 12:17:58] (step=0023207) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.54059870866758, LR: 0.0003 +[2026-03-01 12:18:06] (step=0023208) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.540794365094893, LR: 0.0003 +[2026-03-01 12:18:14] (step=0023209) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.540990021522207, LR: 0.0003 +[2026-03-01 12:18:22] (step=0023210) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.541185677949521, LR: 0.0003 +[2026-03-01 12:18:30] (step=0023211) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.541381334376834, LR: 0.0003 +[2026-03-01 12:18:37] (step=0023212) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 4.541576990804148, LR: 0.0003 +[2026-03-01 12:18:45] (step=0023213) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.541772647231461, LR: 0.0003 +[2026-03-01 12:18:53] (step=0023214) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 4.541968303658775, LR: 0.0003 +[2026-03-01 12:19:01] (step=0023215) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.542163960086089, LR: 0.0003 +[2026-03-01 12:19:09] (step=0023216) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.542359616513402, LR: 0.0003 +[2026-03-01 12:19:17] (step=0023217) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.542555272940716, LR: 0.0003 +[2026-03-01 12:19:25] (step=0023218) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.5427509293680295, LR: 0.0003 +[2026-03-01 12:19:32] (step=0023219) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.5429465857953435, LR: 0.0003 +[2026-03-01 12:19:40] (step=0023220) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.543142242222657, LR: 0.0003 +[2026-03-01 12:19:48] (step=0023221) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.543337898649971, LR: 0.0003 +[2026-03-01 12:19:56] (step=0023222) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 4.543533555077285, LR: 0.0003 +[2026-03-01 12:20:04] (step=0023223) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.543729211504598, LR: 0.0003 +[2026-03-01 12:20:12] (step=0023224) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.543924867931912, LR: 0.0003 +[2026-03-01 12:20:20] (step=0023225) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.544120524359225, LR: 0.0003 +[2026-03-01 12:20:27] (step=0023226) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.544316180786539, LR: 0.0003 +[2026-03-01 12:20:35] (step=0023227) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.544511837213853, LR: 0.0003 +[2026-03-01 12:20:43] (step=0023228) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.544707493641166, LR: 0.0003 +[2026-03-01 12:20:51] (step=0023229) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.54490315006848, LR: 0.0003 +[2026-03-01 12:20:59] (step=0023230) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.545098806495793, LR: 0.0003 +[2026-03-01 12:21:06] (step=0023231) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 4.545294462923107, LR: 0.0003 +[2026-03-01 12:21:14] (step=0023232) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 4.54549011935042, LR: 0.0003 +[2026-03-01 12:21:22] (step=0023233) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 4.545685775777734, LR: 0.0003 +[2026-03-01 12:21:30] (step=0023234) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 4.545881432205048, LR: 0.0003 +[2026-03-01 12:21:38] (step=0023235) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 4.546077088632361, LR: 0.0003 +[2026-03-01 12:21:46] (step=0023236) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.546272745059675, LR: 0.0003 +[2026-03-01 12:21:54] (step=0023237) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.5464684014869885, LR: 0.0003 +[2026-03-01 12:22:01] (step=0023238) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.5466640579143025, LR: 0.0003 +[2026-03-01 12:22:09] (step=0023239) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 4.5468597143416165, LR: 0.0003 +[2026-03-01 12:22:17] (step=0023240) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.54705537076893, LR: 0.0003 +[2026-03-01 12:22:25] (step=0023241) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.547251027196244, LR: 0.0003 +[2026-03-01 12:22:33] (step=0023242) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.547446683623557, LR: 0.0003 +[2026-03-01 12:22:41] (step=0023243) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 4.547642340050871, LR: 0.0003 +[2026-03-01 12:22:48] (step=0023244) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 4.547837996478184, LR: 0.0003 +[2026-03-01 12:22:56] (step=0023245) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.548033652905498, LR: 0.0003 +[2026-03-01 12:23:04] (step=0023246) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.548229309332812, LR: 0.0003 +[2026-03-01 12:23:12] (step=0023247) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.548424965760125, LR: 0.0003 +[2026-03-01 12:23:20] (step=0023248) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.548620622187439, LR: 0.0003 +[2026-03-01 12:23:28] (step=0023249) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.548816278614752, LR: 0.0003 +[2026-03-01 12:23:35] (step=0023250) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 4.549011935042066, LR: 0.0003 +[2026-03-01 12:23:43] (step=0023251) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.54920759146938, LR: 0.0003 +[2026-03-01 12:23:51] (step=0023252) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 4.549403247896693, LR: 0.0003 +[2026-03-01 12:23:59] (step=0023253) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 4.549598904324007, LR: 0.0003 +[2026-03-01 12:24:07] (step=0023254) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.54979456075132, LR: 0.0003 +[2026-03-01 12:24:15] (step=0023255) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.549990217178634, LR: 0.0003 +[2026-03-01 12:24:23] (step=0023256) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.550185873605948, LR: 0.0003 +[2026-03-01 12:24:30] (step=0023257) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.5503815300332615, LR: 0.0003 +[2026-03-01 12:24:38] (step=0023258) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.5505771864605755, LR: 0.0003 +[2026-03-01 12:24:46] (step=0023259) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.550772842887889, LR: 0.0003 +[2026-03-01 12:24:54] (step=0023260) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.550968499315203, LR: 0.0003 +[2026-03-01 12:25:02] (step=0023261) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.551164155742516, LR: 0.0003 +[2026-03-01 12:25:10] (step=0023262) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.55135981216983, LR: 0.0003 +[2026-03-01 12:25:18] (step=0023263) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.551555468597144, LR: 0.0003 +[2026-03-01 12:25:25] (step=0023264) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 4.551751125024457, LR: 0.0003 +[2026-03-01 12:25:33] (step=0023265) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.551946781451771, LR: 0.0003 +[2026-03-01 12:25:41] (step=0023266) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 4.552142437879084, LR: 0.0003 +[2026-03-01 12:25:49] (step=0023267) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.552338094306398, LR: 0.0003 +[2026-03-01 12:25:57] (step=0023268) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.552533750733712, LR: 0.0003 +[2026-03-01 12:26:05] (step=0023269) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.552729407161025, LR: 0.0003 +[2026-03-01 12:26:13] (step=0023270) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.552925063588339, LR: 0.0003 +[2026-03-01 12:26:20] (step=0023271) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.553120720015652, LR: 0.0003 +[2026-03-01 12:26:28] (step=0023272) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.553316376442966, LR: 0.0003 +[2026-03-01 12:26:36] (step=0023273) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 4.553512032870279, LR: 0.0003 +[2026-03-01 12:26:44] (step=0023274) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.553707689297593, LR: 0.0003 +[2026-03-01 12:26:52] (step=0023275) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.553903345724907, LR: 0.0003 +[2026-03-01 12:27:00] (step=0023276) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.5540990021522205, LR: 0.0003 +[2026-03-01 12:27:07] (step=0023277) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 4.5542946585795345, LR: 0.0003 +[2026-03-01 12:27:15] (step=0023278) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 4.554490315006848, LR: 0.0003 +[2026-03-01 12:27:23] (step=0023279) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 4.554685971434162, LR: 0.0003 +[2026-03-01 12:27:31] (step=0023280) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.554881627861476, LR: 0.0003 +[2026-03-01 12:27:39] (step=0023281) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 4.555077284288789, LR: 0.0003 +[2026-03-01 12:27:47] (step=0023282) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.555272940716103, LR: 0.0003 +[2026-03-01 12:27:55] (step=0023283) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.555468597143416, LR: 0.0003 +[2026-03-01 12:28:02] (step=0023284) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.55566425357073, LR: 0.0003 +[2026-03-01 12:28:10] (step=0023285) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.555859909998043, LR: 0.0003 +[2026-03-01 12:28:18] (step=0023286) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.556055566425357, LR: 0.0003 +[2026-03-01 12:28:26] (step=0023287) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 4.556251222852671, LR: 0.0003 +[2026-03-01 12:28:34] (step=0023288) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.556446879279984, LR: 0.0003 +[2026-03-01 12:28:42] (step=0023289) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.556642535707298, LR: 0.0003 +[2026-03-01 12:28:49] (step=0023290) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.556838192134611, LR: 0.0003 +[2026-03-01 12:28:57] (step=0023291) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.557033848561925, LR: 0.0003 +[2026-03-01 12:29:05] (step=0023292) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 4.557229504989239, LR: 0.0003 +[2026-03-01 12:29:13] (step=0023293) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.557425161416552, LR: 0.0003 +[2026-03-01 12:29:21] (step=0023294) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 4.557620817843866, LR: 0.0003 +[2026-03-01 12:29:29] (step=0023295) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.5578164742711795, LR: 0.0003 +[2026-03-01 12:29:36] (step=0023296) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.5580121306984935, LR: 0.0003 +[2026-03-01 12:29:44] (step=0023297) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 4.558207787125807, LR: 0.0003 +[2026-03-01 12:29:52] (step=0023298) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.558403443553121, LR: 0.0003 +[2026-03-01 12:30:00] (step=0023299) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.558599099980435, LR: 0.0003 +[2026-03-01 12:30:08] (step=0023300) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.558794756407748, LR: 0.0003 +[2026-03-01 12:30:16] (step=0023301) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.558990412835062, LR: 0.0003 +[2026-03-01 12:30:23] (step=0023302) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 4.559186069262375, LR: 0.0003 +[2026-03-01 12:30:31] (step=0023303) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.559381725689689, LR: 0.0003 +[2026-03-01 12:30:39] (step=0023304) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.559577382117003, LR: 0.0003 +[2026-03-01 12:30:47] (step=0023305) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 4.559773038544316, LR: 0.0003 +[2026-03-01 12:30:55] (step=0023306) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.55996869497163, LR: 0.0003 +[2026-03-01 12:31:03] (step=0023307) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 4.560164351398943, LR: 0.0003 +[2026-03-01 12:31:11] (step=0023308) Train Loss: 0.4503, Train Steps/Sec: 0.12, Epoch: 4.560360007826257, LR: 0.0003 +[2026-03-01 12:31:19] (step=0023309) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.560555664253571, LR: 0.0003 +[2026-03-01 12:31:26] (step=0023310) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.560751320680884, LR: 0.0003 +[2026-03-01 12:31:34] (step=0023311) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.560946977108198, LR: 0.0003 +[2026-03-01 12:31:42] (step=0023312) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.561142633535511, LR: 0.0003 +[2026-03-01 12:31:50] (step=0023313) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 4.561338289962825, LR: 0.0003 +[2026-03-01 12:31:58] (step=0023314) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.5615339463901385, LR: 0.0003 +[2026-03-01 12:32:06] (step=0023315) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.5617296028174525, LR: 0.0003 +[2026-03-01 12:32:13] (step=0023316) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.5619252592447666, LR: 0.0003 +[2026-03-01 12:32:21] (step=0023317) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.56212091567208, LR: 0.0003 +[2026-03-01 12:32:29] (step=0023318) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.562316572099394, LR: 0.0003 +[2026-03-01 12:32:37] (step=0023319) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.562512228526707, LR: 0.0003 +[2026-03-01 12:32:45] (step=0023320) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 4.562707884954021, LR: 0.0003 +[2026-03-01 12:32:53] (step=0023321) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.562903541381335, LR: 0.0003 +[2026-03-01 12:33:01] (step=0023322) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.563099197808648, LR: 0.0003 +[2026-03-01 12:33:08] (step=0023323) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.563294854235962, LR: 0.0003 +[2026-03-01 12:33:16] (step=0023324) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 4.563490510663275, LR: 0.0003 +[2026-03-01 12:33:24] (step=0023325) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.563686167090589, LR: 0.0003 +[2026-03-01 12:33:32] (step=0023326) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.563881823517902, LR: 0.0003 +[2026-03-01 12:33:40] (step=0023327) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.564077479945216, LR: 0.0003 +[2026-03-01 12:33:48] (step=0023328) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 4.56427313637253, LR: 0.0003 +[2026-03-01 12:33:55] (step=0023329) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 4.564468792799843, LR: 0.0003 +[2026-03-01 12:34:03] (step=0023330) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.564664449227157, LR: 0.0003 +[2026-03-01 12:34:11] (step=0023331) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.56486010565447, LR: 0.0003 +[2026-03-01 12:34:19] (step=0023332) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.565055762081784, LR: 0.0003 +[2026-03-01 12:34:27] (step=0023333) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.565251418509098, LR: 0.0003 +[2026-03-01 12:34:35] (step=0023334) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.5654470749364116, LR: 0.0003 +[2026-03-01 12:34:42] (step=0023335) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 4.565642731363726, LR: 0.0003 +[2026-03-01 12:34:50] (step=0023336) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.565838387791039, LR: 0.0003 +[2026-03-01 12:34:58] (step=0023337) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 4.566034044218353, LR: 0.0003 +[2026-03-01 12:35:06] (step=0023338) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 4.566229700645666, LR: 0.0003 +[2026-03-01 12:35:14] (step=0023339) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.56642535707298, LR: 0.0003 +[2026-03-01 12:35:22] (step=0023340) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.566621013500294, LR: 0.0003 +[2026-03-01 12:35:30] (step=0023341) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.566816669927607, LR: 0.0003 +[2026-03-01 12:35:37] (step=0023342) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 4.567012326354921, LR: 0.0003 +[2026-03-01 12:35:45] (step=0023343) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.567207982782234, LR: 0.0003 +[2026-03-01 12:35:53] (step=0023344) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 4.567403639209548, LR: 0.0003 +[2026-03-01 12:36:01] (step=0023345) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.567599295636862, LR: 0.0003 +[2026-03-01 12:36:09] (step=0023346) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.567794952064175, LR: 0.0003 +[2026-03-01 12:36:17] (step=0023347) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.567990608491489, LR: 0.0003 +[2026-03-01 12:36:24] (step=0023348) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.568186264918802, LR: 0.0003 +[2026-03-01 12:36:32] (step=0023349) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.568381921346116, LR: 0.0003 +[2026-03-01 12:36:40] (step=0023350) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.568577577773429, LR: 0.0003 +[2026-03-01 12:36:48] (step=0023351) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.568773234200743, LR: 0.0003 +[2026-03-01 12:36:56] (step=0023352) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.5689688906280574, LR: 0.0003 +[2026-03-01 12:37:04] (step=0023353) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.569164547055371, LR: 0.0003 +[2026-03-01 12:37:11] (step=0023354) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.569360203482685, LR: 0.0003 +[2026-03-01 12:37:19] (step=0023355) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.569555859909998, LR: 0.0003 +[2026-03-01 12:37:27] (step=0023356) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.569751516337312, LR: 0.0003 +[2026-03-01 12:37:35] (step=0023357) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.569947172764626, LR: 0.0003 +[2026-03-01 12:37:43] (step=0023358) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 4.570142829191939, LR: 0.0003 +[2026-03-01 12:37:51] (step=0023359) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 4.570338485619253, LR: 0.0003 +[2026-03-01 12:37:59] (step=0023360) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.570534142046566, LR: 0.0003 +[2026-03-01 12:38:07] (step=0023361) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.57072979847388, LR: 0.0003 +[2026-03-01 12:38:14] (step=0023362) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.570925454901193, LR: 0.0003 +[2026-03-01 12:38:22] (step=0023363) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.571121111328507, LR: 0.0003 +[2026-03-01 12:38:30] (step=0023364) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.571316767755821, LR: 0.0003 +[2026-03-01 12:38:38] (step=0023365) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 4.571512424183134, LR: 0.0003 +[2026-03-01 12:38:46] (step=0023366) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 4.571708080610448, LR: 0.0003 +[2026-03-01 12:38:54] (step=0023367) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.571903737037761, LR: 0.0003 +[2026-03-01 12:39:01] (step=0023368) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.572099393465075, LR: 0.0003 +[2026-03-01 12:39:09] (step=0023369) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.572295049892389, LR: 0.0003 +[2026-03-01 12:39:17] (step=0023370) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.5724907063197024, LR: 0.0003 +[2026-03-01 12:39:25] (step=0023371) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.5726863627470165, LR: 0.0003 +[2026-03-01 12:39:33] (step=0023372) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.57288201917433, LR: 0.0003 +[2026-03-01 12:39:41] (step=0023373) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.573077675601644, LR: 0.0003 +[2026-03-01 12:39:48] (step=0023374) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.573273332028958, LR: 0.0003 +[2026-03-01 12:39:56] (step=0023375) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 4.573468988456271, LR: 0.0003 +[2026-03-01 12:40:04] (step=0023376) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.573664644883585, LR: 0.0003 +[2026-03-01 12:40:12] (step=0023377) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.573860301310898, LR: 0.0003 +[2026-03-01 12:40:20] (step=0023378) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.574055957738212, LR: 0.0003 +[2026-03-01 12:40:28] (step=0023379) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.574251614165525, LR: 0.0003 +[2026-03-01 12:40:36] (step=0023380) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.574447270592839, LR: 0.0003 +[2026-03-01 12:40:43] (step=0023381) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 4.574642927020153, LR: 0.0003 +[2026-03-01 12:40:51] (step=0023382) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.574838583447466, LR: 0.0003 +[2026-03-01 12:40:59] (step=0023383) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 4.57503423987478, LR: 0.0003 +[2026-03-01 12:41:07] (step=0023384) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.575229896302093, LR: 0.0003 +[2026-03-01 12:41:15] (step=0023385) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.575425552729407, LR: 0.0003 +[2026-03-01 12:41:23] (step=0023386) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 4.575621209156721, LR: 0.0003 +[2026-03-01 12:41:30] (step=0023387) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.575816865584034, LR: 0.0003 +[2026-03-01 12:41:38] (step=0023388) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.576012522011348, LR: 0.0003 +[2026-03-01 12:41:46] (step=0023389) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.5762081784386615, LR: 0.0003 +[2026-03-01 12:41:54] (step=0023390) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.5764038348659755, LR: 0.0003 +[2026-03-01 12:42:02] (step=0023391) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.576599491293289, LR: 0.0003 +[2026-03-01 12:42:10] (step=0023392) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.576795147720603, LR: 0.0003 +[2026-03-01 12:42:17] (step=0023393) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 4.576990804147917, LR: 0.0003 +[2026-03-01 12:42:25] (step=0023394) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 4.57718646057523, LR: 0.0003 +[2026-03-01 12:42:33] (step=0023395) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.577382117002544, LR: 0.0003 +[2026-03-01 12:42:41] (step=0023396) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.577577773429857, LR: 0.0003 +[2026-03-01 12:42:49] (step=0023397) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 4.577773429857171, LR: 0.0003 +[2026-03-01 12:42:57] (step=0023398) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.577969086284485, LR: 0.0003 +[2026-03-01 12:43:05] (step=0023399) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.578164742711798, LR: 0.0003 +[2026-03-01 12:43:12] (step=0023400) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.578360399139112, LR: 0.0003 +[2026-03-01 12:43:20] (step=0023401) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 4.578556055566425, LR: 0.0003 +[2026-03-01 12:43:28] (step=0023402) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.578751711993739, LR: 0.0003 +[2026-03-01 12:43:36] (step=0023403) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.578947368421052, LR: 0.0003 +[2026-03-01 12:43:44] (step=0023404) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.579143024848366, LR: 0.0003 +[2026-03-01 12:43:52] (step=0023405) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.57933868127568, LR: 0.0003 +[2026-03-01 12:44:00] (step=0023406) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.579534337702993, LR: 0.0003 +[2026-03-01 12:44:07] (step=0023407) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.579729994130307, LR: 0.0003 +[2026-03-01 12:44:15] (step=0023408) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.5799256505576205, LR: 0.0003 +[2026-03-01 12:44:23] (step=0023409) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.5801213069849345, LR: 0.0003 +[2026-03-01 12:44:31] (step=0023410) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.5803169634122485, LR: 0.0003 +[2026-03-01 12:44:39] (step=0023411) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.580512619839562, LR: 0.0003 +[2026-03-01 12:44:47] (step=0023412) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.580708276266876, LR: 0.0003 +[2026-03-01 12:44:54] (step=0023413) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.580903932694189, LR: 0.0003 +[2026-03-01 12:45:02] (step=0023414) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 4.581099589121503, LR: 0.0003 +[2026-03-01 12:45:10] (step=0023415) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.581295245548816, LR: 0.0003 +[2026-03-01 12:45:18] (step=0023416) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.58149090197613, LR: 0.0003 +[2026-03-01 12:45:26] (step=0023417) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.581686558403444, LR: 0.0003 +[2026-03-01 12:45:34] (step=0023418) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 4.581882214830757, LR: 0.0003 +[2026-03-01 12:45:42] (step=0023419) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 4.582077871258071, LR: 0.0003 +[2026-03-01 12:45:49] (step=0023420) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.582273527685384, LR: 0.0003 +[2026-03-01 12:45:57] (step=0023421) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 4.582469184112698, LR: 0.0003 +[2026-03-01 12:46:05] (step=0023422) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 4.582664840540012, LR: 0.0003 +[2026-03-01 12:46:13] (step=0023423) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.582860496967325, LR: 0.0003 +[2026-03-01 12:46:21] (step=0023424) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.583056153394639, LR: 0.0003 +[2026-03-01 12:46:29] (step=0023425) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.583251809821952, LR: 0.0003 +[2026-03-01 12:46:37] (step=0023426) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.583447466249266, LR: 0.0003 +[2026-03-01 12:46:44] (step=0023427) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.58364312267658, LR: 0.0003 +[2026-03-01 12:46:52] (step=0023428) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.5838387791038935, LR: 0.0003 +[2026-03-01 12:47:00] (step=0023429) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 4.5840344355312075, LR: 0.0003 +[2026-03-01 12:47:08] (step=0023430) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.584230091958521, LR: 0.0003 +[2026-03-01 12:47:16] (step=0023431) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 4.584425748385835, LR: 0.0003 +[2026-03-01 12:47:24] (step=0023432) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 4.584621404813148, LR: 0.0003 +[2026-03-01 12:47:31] (step=0023433) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.584817061240462, LR: 0.0003 +[2026-03-01 12:47:39] (step=0023434) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.585012717667776, LR: 0.0003 +[2026-03-01 12:47:47] (step=0023435) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 4.585208374095089, LR: 0.0003 +[2026-03-01 12:47:55] (step=0023436) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.585404030522403, LR: 0.0003 +[2026-03-01 12:48:03] (step=0023437) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.585599686949716, LR: 0.0003 +[2026-03-01 12:48:11] (step=0023438) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.58579534337703, LR: 0.0003 +[2026-03-01 12:48:19] (step=0023439) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 4.585990999804344, LR: 0.0003 +[2026-03-01 12:48:26] (step=0023440) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 4.586186656231657, LR: 0.0003 +[2026-03-01 12:48:34] (step=0023441) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.586382312658971, LR: 0.0003 +[2026-03-01 12:48:42] (step=0023442) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.586577969086284, LR: 0.0003 +[2026-03-01 12:48:50] (step=0023443) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.586773625513598, LR: 0.0003 +[2026-03-01 12:48:58] (step=0023444) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.586969281940911, LR: 0.0003 +[2026-03-01 12:49:06] (step=0023445) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.587164938368225, LR: 0.0003 +[2026-03-01 12:49:13] (step=0023446) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.587360594795539, LR: 0.0003 +[2026-03-01 12:49:21] (step=0023447) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.5875562512228525, LR: 0.0003 +[2026-03-01 12:49:29] (step=0023448) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 4.5877519076501665, LR: 0.0003 +[2026-03-01 12:49:37] (step=0023449) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.58794756407748, LR: 0.0003 +[2026-03-01 12:49:45] (step=0023450) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.588143220504794, LR: 0.0003 +[2026-03-01 12:49:53] (step=0023451) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.588338876932108, LR: 0.0003 +[2026-03-01 12:50:01] (step=0023452) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.588534533359421, LR: 0.0003 +[2026-03-01 12:50:08] (step=0023453) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 4.588730189786735, LR: 0.0003 +[2026-03-01 12:50:16] (step=0023454) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 4.588925846214048, LR: 0.0003 +[2026-03-01 12:50:24] (step=0023455) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 4.589121502641362, LR: 0.0003 +[2026-03-01 12:50:32] (step=0023456) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 4.589317159068675, LR: 0.0003 +[2026-03-01 12:50:40] (step=0023457) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 4.589512815495989, LR: 0.0003 +[2026-03-01 12:50:48] (step=0023458) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.589708471923303, LR: 0.0003 +[2026-03-01 12:50:56] (step=0023459) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.589904128350616, LR: 0.0003 +[2026-03-01 12:51:03] (step=0023460) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.59009978477793, LR: 0.0003 +[2026-03-01 12:51:11] (step=0023461) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 4.590295441205243, LR: 0.0003 +[2026-03-01 12:51:19] (step=0023462) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.590491097632557, LR: 0.0003 +[2026-03-01 12:51:27] (step=0023463) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.590686754059871, LR: 0.0003 +[2026-03-01 12:51:35] (step=0023464) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.590882410487184, LR: 0.0003 +[2026-03-01 12:51:43] (step=0023465) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.591078066914498, LR: 0.0003 +[2026-03-01 12:51:50] (step=0023466) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.5912737233418115, LR: 0.0003 +[2026-03-01 12:51:58] (step=0023467) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 4.5914693797691255, LR: 0.0003 +[2026-03-01 12:52:06] (step=0023468) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.591665036196439, LR: 0.0003 +[2026-03-01 12:52:14] (step=0023469) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.591860692623753, LR: 0.0003 +[2026-03-01 12:52:22] (step=0023470) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.592056349051067, LR: 0.0003 +[2026-03-01 12:52:30] (step=0023471) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.59225200547838, LR: 0.0003 +[2026-03-01 12:52:38] (step=0023472) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.592447661905694, LR: 0.0003 +[2026-03-01 12:52:45] (step=0023473) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 4.592643318333007, LR: 0.0003 +[2026-03-01 12:52:53] (step=0023474) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.592838974760321, LR: 0.0003 +[2026-03-01 12:53:01] (step=0023475) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.593034631187635, LR: 0.0003 +[2026-03-01 12:53:09] (step=0023476) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.593230287614948, LR: 0.0003 +[2026-03-01 12:53:17] (step=0023477) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.593425944042262, LR: 0.0003 +[2026-03-01 12:53:25] (step=0023478) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 4.593621600469575, LR: 0.0003 +[2026-03-01 12:53:32] (step=0023479) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.593817256896889, LR: 0.0003 +[2026-03-01 12:53:40] (step=0023480) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.594012913324203, LR: 0.0003 +[2026-03-01 12:53:48] (step=0023481) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 4.594208569751516, LR: 0.0003 +[2026-03-01 12:53:56] (step=0023482) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.59440422617883, LR: 0.0003 +[2026-03-01 12:54:04] (step=0023483) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.594599882606143, LR: 0.0003 +[2026-03-01 12:54:12] (step=0023484) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.594795539033457, LR: 0.0003 +[2026-03-01 12:54:20] (step=0023485) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.5949911954607705, LR: 0.0003 +[2026-03-01 12:54:27] (step=0023486) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.5951868518880845, LR: 0.0003 +[2026-03-01 12:54:35] (step=0023487) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 4.5953825083153985, LR: 0.0003 +[2026-03-01 12:54:43] (step=0023488) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 4.595578164742712, LR: 0.0003 +[2026-03-01 12:54:51] (step=0023489) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.595773821170026, LR: 0.0003 +[2026-03-01 12:54:59] (step=0023490) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 4.595969477597339, LR: 0.0003 +[2026-03-01 12:55:07] (step=0023491) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.596165134024653, LR: 0.0003 +[2026-03-01 12:55:14] (step=0023492) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 4.596360790451967, LR: 0.0003 +[2026-03-01 12:55:22] (step=0023493) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.59655644687928, LR: 0.0003 +[2026-03-01 12:55:30] (step=0023494) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 4.596752103306594, LR: 0.0003 +[2026-03-01 12:55:38] (step=0023495) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.596947759733907, LR: 0.0003 +[2026-03-01 12:55:46] (step=0023496) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 4.597143416161221, LR: 0.0003 +[2026-03-01 12:55:54] (step=0023497) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.597339072588534, LR: 0.0003 +[2026-03-01 12:56:01] (step=0023498) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.597534729015848, LR: 0.0003 +[2026-03-01 12:56:09] (step=0023499) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.597730385443162, LR: 0.0003 +[2026-03-01 12:56:17] (step=0023500) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 4.597926041870475, LR: 0.0003 +[2026-03-01 12:56:17] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0023500/ +[2026-03-01 12:56:25] (step=0023501) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.598121698297789, LR: 0.0003 +[2026-03-01 12:56:33] (step=0023502) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.598317354725102, LR: 0.0003 +[2026-03-01 12:56:41] (step=0023503) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.598513011152416, LR: 0.0003 +[2026-03-01 12:56:49] (step=0023504) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.59870866757973, LR: 0.0003 +[2026-03-01 12:56:56] (step=0023505) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.5989043240070435, LR: 0.0003 +[2026-03-01 12:57:04] (step=0023506) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.5990999804343575, LR: 0.0003 +[2026-03-01 12:57:12] (step=0023507) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.599295636861671, LR: 0.0003 +[2026-03-01 12:57:20] (step=0023508) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.599491293288985, LR: 0.0003 +[2026-03-01 12:57:28] (step=0023509) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.599686949716298, LR: 0.0003 +[2026-03-01 12:57:36] (step=0023510) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.599882606143612, LR: 0.0003 +[2026-03-01 12:57:43] (step=0023511) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.600078262570926, LR: 0.0003 +[2026-03-01 12:57:51] (step=0023512) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 4.600273918998239, LR: 0.0003 +[2026-03-01 12:57:59] (step=0023513) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.600469575425553, LR: 0.0003 +[2026-03-01 12:58:07] (step=0023514) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.600665231852866, LR: 0.0003 +[2026-03-01 12:58:15] (step=0023515) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.60086088828018, LR: 0.0003 +[2026-03-01 12:58:23] (step=0023516) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 4.601056544707494, LR: 0.0003 +[2026-03-01 12:58:31] (step=0023517) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.601252201134807, LR: 0.0003 +[2026-03-01 12:58:38] (step=0023518) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.601447857562121, LR: 0.0003 +[2026-03-01 12:58:46] (step=0023519) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.601643513989434, LR: 0.0003 +[2026-03-01 12:58:54] (step=0023520) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.601839170416748, LR: 0.0003 +[2026-03-01 12:59:02] (step=0023521) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.602034826844061, LR: 0.0003 +[2026-03-01 12:59:10] (step=0023522) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.602230483271375, LR: 0.0003 +[2026-03-01 12:59:18] (step=0023523) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 4.602426139698689, LR: 0.0003 +[2026-03-01 12:59:26] (step=0023524) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.6026217961260025, LR: 0.0003 +[2026-03-01 12:59:33] (step=0023525) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.6028174525533165, LR: 0.0003 +[2026-03-01 12:59:41] (step=0023526) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.60301310898063, LR: 0.0003 +[2026-03-01 12:59:49] (step=0023527) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 4.603208765407944, LR: 0.0003 +[2026-03-01 12:59:57] (step=0023528) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.603404421835258, LR: 0.0003 +[2026-03-01 13:00:05] (step=0023529) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 4.603600078262571, LR: 0.0003 +[2026-03-01 13:00:13] (step=0023530) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.603795734689885, LR: 0.0003 +[2026-03-01 13:00:20] (step=0023531) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 4.603991391117198, LR: 0.0003 +[2026-03-01 13:00:28] (step=0023532) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.604187047544512, LR: 0.0003 +[2026-03-01 13:00:36] (step=0023533) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.604382703971825, LR: 0.0003 +[2026-03-01 13:00:44] (step=0023534) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.604578360399139, LR: 0.0003 +[2026-03-01 13:00:52] (step=0023535) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 4.604774016826453, LR: 0.0003 +[2026-03-01 13:01:00] (step=0023536) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.604969673253766, LR: 0.0003 +[2026-03-01 13:01:07] (step=0023537) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 4.60516532968108, LR: 0.0003 +[2026-03-01 13:01:15] (step=0023538) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.605360986108393, LR: 0.0003 +[2026-03-01 13:01:23] (step=0023539) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.605556642535707, LR: 0.0003 +[2026-03-01 13:01:31] (step=0023540) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.605752298963021, LR: 0.0003 +[2026-03-01 13:01:39] (step=0023541) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.605947955390334, LR: 0.0003 +[2026-03-01 13:01:47] (step=0023542) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.606143611817648, LR: 0.0003 +[2026-03-01 13:01:55] (step=0023543) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 4.6063392682449615, LR: 0.0003 +[2026-03-01 13:02:02] (step=0023544) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.6065349246722755, LR: 0.0003 +[2026-03-01 13:02:10] (step=0023545) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 4.6067305810995895, LR: 0.0003 +[2026-03-01 13:02:18] (step=0023546) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.606926237526903, LR: 0.0003 +[2026-03-01 13:02:26] (step=0023547) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.607121893954217, LR: 0.0003 +[2026-03-01 13:02:34] (step=0023548) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.60731755038153, LR: 0.0003 +[2026-03-01 13:02:42] (step=0023549) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.607513206808844, LR: 0.0003 +[2026-03-01 13:02:50] (step=0023550) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 4.607708863236157, LR: 0.0003 +[2026-03-01 13:02:57] (step=0023551) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.607904519663471, LR: 0.0003 +[2026-03-01 13:03:05] (step=0023552) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.608100176090785, LR: 0.0003 +[2026-03-01 13:03:13] (step=0023553) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 4.608295832518098, LR: 0.0003 +[2026-03-01 13:03:21] (step=0023554) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.608491488945412, LR: 0.0003 +[2026-03-01 13:03:29] (step=0023555) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.608687145372725, LR: 0.0003 +[2026-03-01 13:03:37] (step=0023556) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.608882801800039, LR: 0.0003 +[2026-03-01 13:03:44] (step=0023557) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 4.609078458227353, LR: 0.0003 +[2026-03-01 13:03:52] (step=0023558) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.609274114654666, LR: 0.0003 +[2026-03-01 13:04:00] (step=0023559) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 4.60946977108198, LR: 0.0003 +[2026-03-01 13:04:08] (step=0023560) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.609665427509293, LR: 0.0003 +[2026-03-01 13:04:16] (step=0023561) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 4.609861083936607, LR: 0.0003 +[2026-03-01 13:04:24] (step=0023562) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.6100567403639205, LR: 0.0003 +[2026-03-01 13:04:32] (step=0023563) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 4.6102523967912346, LR: 0.0003 +[2026-03-01 13:04:39] (step=0023564) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 4.610448053218549, LR: 0.0003 +[2026-03-01 13:04:47] (step=0023565) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.610643709645862, LR: 0.0003 +[2026-03-01 13:04:55] (step=0023566) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.610839366073176, LR: 0.0003 +[2026-03-01 13:05:03] (step=0023567) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 4.611035022500489, LR: 0.0003 +[2026-03-01 13:05:11] (step=0023568) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.611230678927803, LR: 0.0003 +[2026-03-01 13:05:19] (step=0023569) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.611426335355117, LR: 0.0003 +[2026-03-01 13:05:27] (step=0023570) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.61162199178243, LR: 0.0003 +[2026-03-01 13:05:34] (step=0023571) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.611817648209744, LR: 0.0003 +[2026-03-01 13:05:42] (step=0023572) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 4.612013304637057, LR: 0.0003 +[2026-03-01 13:05:50] (step=0023573) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 4.612208961064371, LR: 0.0003 +[2026-03-01 13:05:58] (step=0023574) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.612404617491684, LR: 0.0003 +[2026-03-01 13:06:06] (step=0023575) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.612600273918998, LR: 0.0003 +[2026-03-01 13:06:14] (step=0023576) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.612795930346312, LR: 0.0003 +[2026-03-01 13:06:21] (step=0023577) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.612991586773625, LR: 0.0003 +[2026-03-01 13:06:29] (step=0023578) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.613187243200939, LR: 0.0003 +[2026-03-01 13:06:37] (step=0023579) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 4.613382899628252, LR: 0.0003 +[2026-03-01 13:06:45] (step=0023580) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 4.613578556055566, LR: 0.0003 +[2026-03-01 13:06:53] (step=0023581) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 4.61377421248288, LR: 0.0003 +[2026-03-01 13:07:01] (step=0023582) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.613969868910194, LR: 0.0003 +[2026-03-01 13:07:08] (step=0023583) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.614165525337508, LR: 0.0003 +[2026-03-01 13:07:16] (step=0023584) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.614361181764821, LR: 0.0003 +[2026-03-01 13:07:24] (step=0023585) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.614556838192135, LR: 0.0003 +[2026-03-01 13:07:32] (step=0023586) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.614752494619448, LR: 0.0003 +[2026-03-01 13:07:40] (step=0023587) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.614948151046762, LR: 0.0003 +[2026-03-01 13:07:48] (step=0023588) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.615143807474076, LR: 0.0003 +[2026-03-01 13:07:56] (step=0023589) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.615339463901389, LR: 0.0003 +[2026-03-01 13:08:03] (step=0023590) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.615535120328703, LR: 0.0003 +[2026-03-01 13:08:11] (step=0023591) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.615730776756016, LR: 0.0003 +[2026-03-01 13:08:19] (step=0023592) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.61592643318333, LR: 0.0003 +[2026-03-01 13:08:27] (step=0023593) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.616122089610644, LR: 0.0003 +[2026-03-01 13:08:35] (step=0023594) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.616317746037957, LR: 0.0003 +[2026-03-01 13:08:43] (step=0023595) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.616513402465271, LR: 0.0003 +[2026-03-01 13:08:50] (step=0023596) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 4.616709058892584, LR: 0.0003 +[2026-03-01 13:08:58] (step=0023597) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.616904715319898, LR: 0.0003 +[2026-03-01 13:09:06] (step=0023598) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 4.617100371747212, LR: 0.0003 +[2026-03-01 13:09:14] (step=0023599) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 4.6172960281745254, LR: 0.0003 +[2026-03-01 13:09:22] (step=0023600) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.6174916846018395, LR: 0.0003 +[2026-03-01 13:09:30] (step=0023601) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 4.617687341029153, LR: 0.0003 +[2026-03-01 13:09:38] (step=0023602) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.617882997456467, LR: 0.0003 +[2026-03-01 13:09:45] (step=0023603) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.61807865388378, LR: 0.0003 +[2026-03-01 13:09:53] (step=0023604) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.618274310311094, LR: 0.0003 +[2026-03-01 13:10:01] (step=0023605) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.618469966738408, LR: 0.0003 +[2026-03-01 13:10:09] (step=0023606) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.618665623165721, LR: 0.0003 +[2026-03-01 13:10:17] (step=0023607) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.618861279593035, LR: 0.0003 +[2026-03-01 13:10:25] (step=0023608) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.619056936020348, LR: 0.0003 +[2026-03-01 13:10:32] (step=0023609) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.619252592447662, LR: 0.0003 +[2026-03-01 13:10:40] (step=0023610) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.619448248874976, LR: 0.0003 +[2026-03-01 13:10:48] (step=0023611) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.619643905302289, LR: 0.0003 +[2026-03-01 13:10:56] (step=0023612) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.619839561729603, LR: 0.0003 +[2026-03-01 13:11:04] (step=0023613) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.620035218156916, LR: 0.0003 +[2026-03-01 13:11:12] (step=0023614) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 4.62023087458423, LR: 0.0003 +[2026-03-01 13:11:20] (step=0023615) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 4.620426531011543, LR: 0.0003 +[2026-03-01 13:11:28] (step=0023616) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.620622187438857, LR: 0.0003 +[2026-03-01 13:11:35] (step=0023617) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.620817843866171, LR: 0.0003 +[2026-03-01 13:11:43] (step=0023618) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.6210135002934845, LR: 0.0003 +[2026-03-01 13:11:51] (step=0023619) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 4.6212091567207985, LR: 0.0003 +[2026-03-01 13:11:59] (step=0023620) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.621404813148112, LR: 0.0003 +[2026-03-01 13:12:07] (step=0023621) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.621600469575426, LR: 0.0003 +[2026-03-01 13:12:15] (step=0023622) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.62179612600274, LR: 0.0003 +[2026-03-01 13:12:22] (step=0023623) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 4.621991782430053, LR: 0.0003 +[2026-03-01 13:12:30] (step=0023624) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.622187438857367, LR: 0.0003 +[2026-03-01 13:12:38] (step=0023625) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.62238309528468, LR: 0.0003 +[2026-03-01 13:12:46] (step=0023626) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.622578751711994, LR: 0.0003 +[2026-03-01 13:12:54] (step=0023627) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.622774408139307, LR: 0.0003 +[2026-03-01 13:13:02] (step=0023628) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.622970064566621, LR: 0.0003 +[2026-03-01 13:13:09] (step=0023629) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.623165720993935, LR: 0.0003 +[2026-03-01 13:13:17] (step=0023630) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.623361377421248, LR: 0.0003 +[2026-03-01 13:13:25] (step=0023631) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.623557033848562, LR: 0.0003 +[2026-03-01 13:13:33] (step=0023632) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.623752690275875, LR: 0.0003 +[2026-03-01 13:13:41] (step=0023633) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.623948346703189, LR: 0.0003 +[2026-03-01 13:13:49] (step=0023634) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 4.624144003130503, LR: 0.0003 +[2026-03-01 13:13:56] (step=0023635) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.624339659557816, LR: 0.0003 +[2026-03-01 13:14:04] (step=0023636) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 4.62453531598513, LR: 0.0003 +[2026-03-01 13:14:12] (step=0023637) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.6247309724124435, LR: 0.0003 +[2026-03-01 13:14:20] (step=0023638) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.6249266288397575, LR: 0.0003 +[2026-03-01 13:14:28] (step=0023639) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.625122285267071, LR: 0.0003 +[2026-03-01 13:14:36] (step=0023640) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.625317941694385, LR: 0.0003 +[2026-03-01 13:14:43] (step=0023641) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 4.625513598121699, LR: 0.0003 +[2026-03-01 13:14:51] (step=0023642) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 4.625709254549012, LR: 0.0003 +[2026-03-01 13:14:59] (step=0023643) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.625904910976326, LR: 0.0003 +[2026-03-01 13:15:07] (step=0023644) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.626100567403639, LR: 0.0003 +[2026-03-01 13:15:15] (step=0023645) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.626296223830953, LR: 0.0003 +[2026-03-01 13:15:23] (step=0023646) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 4.626491880258267, LR: 0.0003 +[2026-03-01 13:15:30] (step=0023647) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.62668753668558, LR: 0.0003 +[2026-03-01 13:15:38] (step=0023648) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.626883193112894, LR: 0.0003 +[2026-03-01 13:15:46] (step=0023649) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.627078849540207, LR: 0.0003 +[2026-03-01 13:15:54] (step=0023650) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 4.627274505967521, LR: 0.0003 +[2026-03-01 13:16:02] (step=0023651) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 4.627470162394835, LR: 0.0003 +[2026-03-01 13:16:10] (step=0023652) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.627665818822148, LR: 0.0003 +[2026-03-01 13:16:18] (step=0023653) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.627861475249462, LR: 0.0003 +[2026-03-01 13:16:26] (step=0023654) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.628057131676775, LR: 0.0003 +[2026-03-01 13:16:33] (step=0023655) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.628252788104089, LR: 0.0003 +[2026-03-01 13:16:41] (step=0023656) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 4.6284484445314025, LR: 0.0003 +[2026-03-01 13:16:49] (step=0023657) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.6286441009587165, LR: 0.0003 +[2026-03-01 13:16:57] (step=0023658) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.6288397573860305, LR: 0.0003 +[2026-03-01 13:17:05] (step=0023659) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.629035413813344, LR: 0.0003 +[2026-03-01 13:17:13] (step=0023660) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 4.629231070240658, LR: 0.0003 +[2026-03-01 13:17:20] (step=0023661) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.629426726667971, LR: 0.0003 +[2026-03-01 13:17:28] (step=0023662) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.629622383095285, LR: 0.0003 +[2026-03-01 13:17:36] (step=0023663) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 4.629818039522599, LR: 0.0003 +[2026-03-01 13:17:44] (step=0023664) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.630013695949912, LR: 0.0003 +[2026-03-01 13:17:52] (step=0023665) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.630209352377226, LR: 0.0003 +[2026-03-01 13:18:00] (step=0023666) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.630405008804539, LR: 0.0003 +[2026-03-01 13:18:08] (step=0023667) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.630600665231853, LR: 0.0003 +[2026-03-01 13:18:15] (step=0023668) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.630796321659166, LR: 0.0003 +[2026-03-01 13:18:23] (step=0023669) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.63099197808648, LR: 0.0003 +[2026-03-01 13:18:31] (step=0023670) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.631187634513794, LR: 0.0003 +[2026-03-01 13:18:39] (step=0023671) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 4.631383290941107, LR: 0.0003 +[2026-03-01 13:18:47] (step=0023672) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 4.631578947368421, LR: 0.0003 +[2026-03-01 13:18:55] (step=0023673) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.631774603795734, LR: 0.0003 +[2026-03-01 13:19:02] (step=0023674) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 4.631970260223048, LR: 0.0003 +[2026-03-01 13:19:10] (step=0023675) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.632165916650362, LR: 0.0003 +[2026-03-01 13:19:18] (step=0023676) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.6323615730776755, LR: 0.0003 +[2026-03-01 13:19:26] (step=0023677) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 4.6325572295049895, LR: 0.0003 +[2026-03-01 13:19:34] (step=0023678) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.632752885932303, LR: 0.0003 +[2026-03-01 13:19:42] (step=0023679) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.632948542359617, LR: 0.0003 +[2026-03-01 13:19:49] (step=0023680) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.63314419878693, LR: 0.0003 +[2026-03-01 13:19:57] (step=0023681) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.633339855214244, LR: 0.0003 +[2026-03-01 13:20:05] (step=0023682) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.633535511641558, LR: 0.0003 +[2026-03-01 13:20:13] (step=0023683) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.633731168068871, LR: 0.0003 +[2026-03-01 13:20:21] (step=0023684) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.633926824496185, LR: 0.0003 +[2026-03-01 13:20:29] (step=0023685) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 4.634122480923498, LR: 0.0003 +[2026-03-01 13:20:37] (step=0023686) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.634318137350812, LR: 0.0003 +[2026-03-01 13:20:44] (step=0023687) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.634513793778126, LR: 0.0003 +[2026-03-01 13:20:52] (step=0023688) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.634709450205439, LR: 0.0003 +[2026-03-01 13:21:00] (step=0023689) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 4.634905106632753, LR: 0.0003 +[2026-03-01 13:21:08] (step=0023690) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.635100763060066, LR: 0.0003 +[2026-03-01 13:21:16] (step=0023691) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 4.63529641948738, LR: 0.0003 +[2026-03-01 13:21:24] (step=0023692) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.635492075914693, LR: 0.0003 +[2026-03-01 13:21:31] (step=0023693) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.635687732342007, LR: 0.0003 +[2026-03-01 13:21:39] (step=0023694) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.635883388769321, LR: 0.0003 +[2026-03-01 13:21:47] (step=0023695) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.6360790451966345, LR: 0.0003 +[2026-03-01 13:21:55] (step=0023696) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.6362747016239485, LR: 0.0003 +[2026-03-01 13:22:03] (step=0023697) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 4.636470358051262, LR: 0.0003 +[2026-03-01 13:22:11] (step=0023698) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.636666014478576, LR: 0.0003 +[2026-03-01 13:22:19] (step=0023699) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 4.63686167090589, LR: 0.0003 +[2026-03-01 13:22:26] (step=0023700) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.637057327333203, LR: 0.0003 +[2026-03-01 13:22:34] (step=0023701) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.637252983760517, LR: 0.0003 +[2026-03-01 13:22:42] (step=0023702) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 4.63744864018783, LR: 0.0003 +[2026-03-01 13:22:50] (step=0023703) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.637644296615144, LR: 0.0003 +[2026-03-01 13:22:58] (step=0023704) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.637839953042458, LR: 0.0003 +[2026-03-01 13:23:06] (step=0023705) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.638035609469771, LR: 0.0003 +[2026-03-01 13:23:14] (step=0023706) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 4.638231265897085, LR: 0.0003 +[2026-03-01 13:23:21] (step=0023707) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.638426922324398, LR: 0.0003 +[2026-03-01 13:23:29] (step=0023708) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 4.638622578751712, LR: 0.0003 +[2026-03-01 13:23:37] (step=0023709) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.638818235179025, LR: 0.0003 +[2026-03-01 13:23:45] (step=0023710) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.639013891606339, LR: 0.0003 +[2026-03-01 13:23:53] (step=0023711) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.639209548033653, LR: 0.0003 +[2026-03-01 13:24:01] (step=0023712) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.639405204460966, LR: 0.0003 +[2026-03-01 13:24:09] (step=0023713) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 4.63960086088828, LR: 0.0003 +[2026-03-01 13:24:16] (step=0023714) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.6397965173155935, LR: 0.0003 +[2026-03-01 13:24:24] (step=0023715) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.6399921737429075, LR: 0.0003 +[2026-03-01 13:24:32] (step=0023716) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.6401878301702215, LR: 0.0003 +[2026-03-01 13:24:40] (step=0023717) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.640383486597535, LR: 0.0003 +[2026-03-01 13:24:48] (step=0023718) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.640579143024849, LR: 0.0003 +[2026-03-01 13:24:56] (step=0023719) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 4.640774799452162, LR: 0.0003 +[2026-03-01 13:25:03] (step=0023720) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 4.640970455879476, LR: 0.0003 +[2026-03-01 13:25:11] (step=0023721) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.641166112306789, LR: 0.0003 +[2026-03-01 13:25:19] (step=0023722) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 4.641361768734103, LR: 0.0003 +[2026-03-01 13:25:27] (step=0023723) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.641557425161417, LR: 0.0003 +[2026-03-01 13:25:35] (step=0023724) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.64175308158873, LR: 0.0003 +[2026-03-01 13:25:43] (step=0023725) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.641948738016044, LR: 0.0003 +[2026-03-01 13:25:50] (step=0023726) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.642144394443357, LR: 0.0003 +[2026-03-01 13:25:58] (step=0023727) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.642340050870671, LR: 0.0003 +[2026-03-01 13:26:06] (step=0023728) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.642535707297985, LR: 0.0003 +[2026-03-01 13:26:14] (step=0023729) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 4.642731363725298, LR: 0.0003 +[2026-03-01 13:26:22] (step=0023730) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.642927020152612, LR: 0.0003 +[2026-03-01 13:26:30] (step=0023731) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.643122676579925, LR: 0.0003 +[2026-03-01 13:26:38] (step=0023732) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.643318333007239, LR: 0.0003 +[2026-03-01 13:26:45] (step=0023733) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 4.6435139894345525, LR: 0.0003 +[2026-03-01 13:26:53] (step=0023734) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 4.6437096458618665, LR: 0.0003 +[2026-03-01 13:27:01] (step=0023735) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 4.6439053022891805, LR: 0.0003 +[2026-03-01 13:27:09] (step=0023736) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.644100958716494, LR: 0.0003 +[2026-03-01 13:27:17] (step=0023737) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.644296615143808, LR: 0.0003 +[2026-03-01 13:27:25] (step=0023738) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.644492271571121, LR: 0.0003 +[2026-03-01 13:27:32] (step=0023739) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 4.644687927998435, LR: 0.0003 +[2026-03-01 13:27:40] (step=0023740) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 4.644883584425749, LR: 0.0003 +[2026-03-01 13:27:48] (step=0023741) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 4.645079240853062, LR: 0.0003 +[2026-03-01 13:27:56] (step=0023742) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.645274897280376, LR: 0.0003 +[2026-03-01 13:28:04] (step=0023743) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.645470553707689, LR: 0.0003 +[2026-03-01 13:28:12] (step=0023744) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.645666210135003, LR: 0.0003 +[2026-03-01 13:28:20] (step=0023745) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.645861866562316, LR: 0.0003 +[2026-03-01 13:28:27] (step=0023746) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.64605752298963, LR: 0.0003 +[2026-03-01 13:28:35] (step=0023747) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.646253179416944, LR: 0.0003 +[2026-03-01 13:28:43] (step=0023748) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 4.646448835844257, LR: 0.0003 +[2026-03-01 13:28:51] (step=0023749) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.646644492271571, LR: 0.0003 +[2026-03-01 13:28:59] (step=0023750) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.646840148698884, LR: 0.0003 +[2026-03-01 13:29:07] (step=0023751) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 4.647035805126198, LR: 0.0003 +[2026-03-01 13:29:15] (step=0023752) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.647231461553512, LR: 0.0003 +[2026-03-01 13:29:22] (step=0023753) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.6474271179808255, LR: 0.0003 +[2026-03-01 13:29:30] (step=0023754) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.6476227744081395, LR: 0.0003 +[2026-03-01 13:29:38] (step=0023755) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 4.647818430835453, LR: 0.0003 +[2026-03-01 13:29:46] (step=0023756) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 4.648014087262767, LR: 0.0003 +[2026-03-01 13:29:54] (step=0023757) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.64820974369008, LR: 0.0003 +[2026-03-01 13:30:02] (step=0023758) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.648405400117394, LR: 0.0003 +[2026-03-01 13:30:09] (step=0023759) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.648601056544708, LR: 0.0003 +[2026-03-01 13:30:17] (step=0023760) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.648796712972021, LR: 0.0003 +[2026-03-01 13:30:25] (step=0023761) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.648992369399335, LR: 0.0003 +[2026-03-01 13:30:33] (step=0023762) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 4.649188025826648, LR: 0.0003 +[2026-03-01 13:30:41] (step=0023763) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.649383682253962, LR: 0.0003 +[2026-03-01 13:30:49] (step=0023764) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 4.649579338681276, LR: 0.0003 +[2026-03-01 13:30:57] (step=0023765) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.649774995108589, LR: 0.0003 +[2026-03-01 13:31:05] (step=0023766) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.649970651535903, LR: 0.0003 +[2026-03-01 13:31:12] (step=0023767) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.650166307963216, LR: 0.0003 +[2026-03-01 13:31:20] (step=0023768) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.65036196439053, LR: 0.0003 +[2026-03-01 13:31:28] (step=0023769) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 4.650557620817844, LR: 0.0003 +[2026-03-01 13:31:36] (step=0023770) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.650753277245157, LR: 0.0003 +[2026-03-01 13:31:44] (step=0023771) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 4.650948933672471, LR: 0.0003 +[2026-03-01 13:31:52] (step=0023772) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.6511445900997845, LR: 0.0003 +[2026-03-01 13:31:59] (step=0023773) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.6513402465270985, LR: 0.0003 +[2026-03-01 13:32:07] (step=0023774) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.651535902954412, LR: 0.0003 +[2026-03-01 13:32:15] (step=0023775) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.651731559381726, LR: 0.0003 +[2026-03-01 13:32:23] (step=0023776) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 4.65192721580904, LR: 0.0003 +[2026-03-01 13:32:31] (step=0023777) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.652122872236353, LR: 0.0003 +[2026-03-01 13:32:39] (step=0023778) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.652318528663667, LR: 0.0003 +[2026-03-01 13:32:46] (step=0023779) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.65251418509098, LR: 0.0003 +[2026-03-01 13:32:54] (step=0023780) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.652709841518294, LR: 0.0003 +[2026-03-01 13:33:02] (step=0023781) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.652905497945608, LR: 0.0003 +[2026-03-01 13:33:10] (step=0023782) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 4.653101154372921, LR: 0.0003 +[2026-03-01 13:33:18] (step=0023783) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 4.653296810800235, LR: 0.0003 +[2026-03-01 13:33:26] (step=0023784) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.653492467227548, LR: 0.0003 +[2026-03-01 13:33:33] (step=0023785) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.653688123654862, LR: 0.0003 +[2026-03-01 13:33:41] (step=0023786) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 4.653883780082175, LR: 0.0003 +[2026-03-01 13:33:49] (step=0023787) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.654079436509489, LR: 0.0003 +[2026-03-01 13:33:57] (step=0023788) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.654275092936803, LR: 0.0003 +[2026-03-01 13:34:05] (step=0023789) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 4.654470749364116, LR: 0.0003 +[2026-03-01 13:34:13] (step=0023790) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.65466640579143, LR: 0.0003 +[2026-03-01 13:34:21] (step=0023791) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.6548620622187435, LR: 0.0003 +[2026-03-01 13:34:28] (step=0023792) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.6550577186460576, LR: 0.0003 +[2026-03-01 13:34:36] (step=0023793) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.655253375073372, LR: 0.0003 +[2026-03-01 13:34:44] (step=0023794) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.655449031500685, LR: 0.0003 +[2026-03-01 13:34:52] (step=0023795) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 4.655644687927999, LR: 0.0003 +[2026-03-01 13:35:00] (step=0023796) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.655840344355312, LR: 0.0003 +[2026-03-01 13:35:08] (step=0023797) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.656036000782626, LR: 0.0003 +[2026-03-01 13:35:16] (step=0023798) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.656231657209939, LR: 0.0003 +[2026-03-01 13:35:23] (step=0023799) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 4.656427313637253, LR: 0.0003 +[2026-03-01 13:35:31] (step=0023800) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.656622970064567, LR: 0.0003 +[2026-03-01 13:35:39] (step=0023801) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.65681862649188, LR: 0.0003 +[2026-03-01 13:35:47] (step=0023802) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.657014282919194, LR: 0.0003 +[2026-03-01 13:35:55] (step=0023803) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.657209939346507, LR: 0.0003 +[2026-03-01 13:36:03] (step=0023804) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 4.657405595773821, LR: 0.0003 +[2026-03-01 13:36:10] (step=0023805) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.657601252201135, LR: 0.0003 +[2026-03-01 13:36:18] (step=0023806) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.657796908628448, LR: 0.0003 +[2026-03-01 13:36:26] (step=0023807) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 4.657992565055762, LR: 0.0003 +[2026-03-01 13:36:34] (step=0023808) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.658188221483075, LR: 0.0003 +[2026-03-01 13:36:42] (step=0023809) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.658383877910389, LR: 0.0003 +[2026-03-01 13:36:50] (step=0023810) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.6585795343377026, LR: 0.0003 +[2026-03-01 13:36:58] (step=0023811) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.658775190765017, LR: 0.0003 +[2026-03-01 13:37:05] (step=0023812) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.658970847192331, LR: 0.0003 +[2026-03-01 13:37:13] (step=0023813) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.659166503619644, LR: 0.0003 +[2026-03-01 13:37:21] (step=0023814) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.659362160046958, LR: 0.0003 +[2026-03-01 13:37:29] (step=0023815) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.659557816474271, LR: 0.0003 +[2026-03-01 13:37:37] (step=0023816) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 4.659753472901585, LR: 0.0003 +[2026-03-01 13:37:45] (step=0023817) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.659949129328899, LR: 0.0003 +[2026-03-01 13:37:52] (step=0023818) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.660144785756212, LR: 0.0003 +[2026-03-01 13:38:00] (step=0023819) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 4.660340442183526, LR: 0.0003 +[2026-03-01 13:38:08] (step=0023820) Train Loss: 0.4709, Train Steps/Sec: 0.13, Epoch: 4.660536098610839, LR: 0.0003 +[2026-03-01 13:38:16] (step=0023821) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.660731755038153, LR: 0.0003 +[2026-03-01 13:38:24] (step=0023822) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 4.660927411465467, LR: 0.0003 +[2026-03-01 13:38:32] (step=0023823) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.66112306789278, LR: 0.0003 +[2026-03-01 13:38:39] (step=0023824) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.661318724320094, LR: 0.0003 +[2026-03-01 13:38:47] (step=0023825) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.661514380747407, LR: 0.0003 +[2026-03-01 13:38:55] (step=0023826) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.661710037174721, LR: 0.0003 +[2026-03-01 13:39:03] (step=0023827) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.661905693602034, LR: 0.0003 +[2026-03-01 13:39:11] (step=0023828) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.6621013500293484, LR: 0.0003 +[2026-03-01 13:39:19] (step=0023829) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.6622970064566625, LR: 0.0003 +[2026-03-01 13:39:27] (step=0023830) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.662492662883976, LR: 0.0003 +[2026-03-01 13:39:34] (step=0023831) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.66268831931129, LR: 0.0003 +[2026-03-01 13:39:42] (step=0023832) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.662883975738603, LR: 0.0003 +[2026-03-01 13:39:50] (step=0023833) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.663079632165917, LR: 0.0003 +[2026-03-01 13:39:58] (step=0023834) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.663275288593231, LR: 0.0003 +[2026-03-01 13:40:06] (step=0023835) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.663470945020544, LR: 0.0003 +[2026-03-01 13:40:14] (step=0023836) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.663666601447858, LR: 0.0003 +[2026-03-01 13:40:21] (step=0023837) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 4.663862257875171, LR: 0.0003 +[2026-03-01 13:40:29] (step=0023838) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.664057914302485, LR: 0.0003 +[2026-03-01 13:40:37] (step=0023839) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.664253570729798, LR: 0.0003 +[2026-03-01 13:40:45] (step=0023840) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.664449227157112, LR: 0.0003 +[2026-03-01 13:40:53] (step=0023841) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.664644883584426, LR: 0.0003 +[2026-03-01 13:41:01] (step=0023842) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.664840540011739, LR: 0.0003 +[2026-03-01 13:41:09] (step=0023843) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.665036196439053, LR: 0.0003 +[2026-03-01 13:41:16] (step=0023844) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 4.665231852866366, LR: 0.0003 +[2026-03-01 13:41:24] (step=0023845) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 4.66542750929368, LR: 0.0003 +[2026-03-01 13:41:32] (step=0023846) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 4.665623165720994, LR: 0.0003 +[2026-03-01 13:41:40] (step=0023847) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.6658188221483075, LR: 0.0003 +[2026-03-01 13:41:48] (step=0023848) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.6660144785756215, LR: 0.0003 +[2026-03-01 13:41:56] (step=0023849) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.666210135002935, LR: 0.0003 +[2026-03-01 13:42:03] (step=0023850) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.666405791430249, LR: 0.0003 +[2026-03-01 13:42:11] (step=0023851) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.666601447857562, LR: 0.0003 +[2026-03-01 13:42:19] (step=0023852) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 4.666797104284876, LR: 0.0003 +[2026-03-01 13:42:27] (step=0023853) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.66699276071219, LR: 0.0003 +[2026-03-01 13:42:35] (step=0023854) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 4.667188417139503, LR: 0.0003 +[2026-03-01 13:42:43] (step=0023855) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.667384073566817, LR: 0.0003 +[2026-03-01 13:42:50] (step=0023856) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.66757972999413, LR: 0.0003 +[2026-03-01 13:42:58] (step=0023857) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 4.667775386421444, LR: 0.0003 +[2026-03-01 13:43:06] (step=0023858) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.667971042848758, LR: 0.0003 +[2026-03-01 13:43:14] (step=0023859) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.668166699276071, LR: 0.0003 +[2026-03-01 13:43:22] (step=0023860) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.668362355703385, LR: 0.0003 +[2026-03-01 13:43:30] (step=0023861) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.668558012130698, LR: 0.0003 +[2026-03-01 13:43:38] (step=0023862) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 4.668753668558012, LR: 0.0003 +[2026-03-01 13:43:45] (step=0023863) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 4.668949324985325, LR: 0.0003 +[2026-03-01 13:43:53] (step=0023864) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.669144981412639, LR: 0.0003 +[2026-03-01 13:44:01] (step=0023865) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 4.669340637839953, LR: 0.0003 +[2026-03-01 13:44:09] (step=0023866) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.6695362942672665, LR: 0.0003 +[2026-03-01 13:44:17] (step=0023867) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.6697319506945805, LR: 0.0003 +[2026-03-01 13:44:25] (step=0023868) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.669927607121894, LR: 0.0003 +[2026-03-01 13:44:33] (step=0023869) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.670123263549208, LR: 0.0003 +[2026-03-01 13:44:40] (step=0023870) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.670318919976522, LR: 0.0003 +[2026-03-01 13:44:48] (step=0023871) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.670514576403835, LR: 0.0003 +[2026-03-01 13:44:56] (step=0023872) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 4.670710232831149, LR: 0.0003 +[2026-03-01 13:45:04] (step=0023873) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 4.670905889258462, LR: 0.0003 +[2026-03-01 13:45:12] (step=0023874) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.671101545685776, LR: 0.0003 +[2026-03-01 13:45:20] (step=0023875) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.67129720211309, LR: 0.0003 +[2026-03-01 13:45:27] (step=0023876) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.671492858540403, LR: 0.0003 +[2026-03-01 13:45:35] (step=0023877) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.671688514967717, LR: 0.0003 +[2026-03-01 13:45:43] (step=0023878) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.67188417139503, LR: 0.0003 +[2026-03-01 13:45:51] (step=0023879) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 4.672079827822344, LR: 0.0003 +[2026-03-01 13:45:59] (step=0023880) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.672275484249657, LR: 0.0003 +[2026-03-01 13:46:07] (step=0023881) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 4.672471140676971, LR: 0.0003 +[2026-03-01 13:46:14] (step=0023882) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.672666797104285, LR: 0.0003 +[2026-03-01 13:46:22] (step=0023883) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.672862453531598, LR: 0.0003 +[2026-03-01 13:46:30] (step=0023884) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.673058109958912, LR: 0.0003 +[2026-03-01 13:46:38] (step=0023885) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.6732537663862255, LR: 0.0003 +[2026-03-01 13:46:46] (step=0023886) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.6734494228135395, LR: 0.0003 +[2026-03-01 13:46:54] (step=0023887) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.6736450792408535, LR: 0.0003 +[2026-03-01 13:47:02] (step=0023888) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.673840735668167, LR: 0.0003 +[2026-03-01 13:47:09] (step=0023889) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.674036392095481, LR: 0.0003 +[2026-03-01 13:47:17] (step=0023890) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.674232048522794, LR: 0.0003 +[2026-03-01 13:47:25] (step=0023891) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.674427704950108, LR: 0.0003 +[2026-03-01 13:47:33] (step=0023892) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.674623361377421, LR: 0.0003 +[2026-03-01 13:47:41] (step=0023893) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 4.674819017804735, LR: 0.0003 +[2026-03-01 13:47:49] (step=0023894) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 4.675014674232049, LR: 0.0003 +[2026-03-01 13:47:57] (step=0023895) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.675210330659362, LR: 0.0003 +[2026-03-01 13:48:04] (step=0023896) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.675405987086676, LR: 0.0003 +[2026-03-01 13:48:12] (step=0023897) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 4.675601643513989, LR: 0.0003 +[2026-03-01 13:48:20] (step=0023898) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.675797299941303, LR: 0.0003 +[2026-03-01 13:48:28] (step=0023899) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.675992956368617, LR: 0.0003 +[2026-03-01 13:48:36] (step=0023900) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 4.67618861279593, LR: 0.0003 +[2026-03-01 13:48:44] (step=0023901) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.676384269223244, LR: 0.0003 +[2026-03-01 13:48:51] (step=0023902) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 4.676579925650557, LR: 0.0003 +[2026-03-01 13:48:59] (step=0023903) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.676775582077871, LR: 0.0003 +[2026-03-01 13:49:07] (step=0023904) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.6769712385051845, LR: 0.0003 +[2026-03-01 13:49:15] (step=0023905) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.6771668949324985, LR: 0.0003 +[2026-03-01 13:49:23] (step=0023906) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.6773625513598125, LR: 0.0003 +[2026-03-01 13:49:31] (step=0023907) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.677558207787126, LR: 0.0003 +[2026-03-01 13:49:38] (step=0023908) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.67775386421444, LR: 0.0003 +[2026-03-01 13:49:46] (step=0023909) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.677949520641753, LR: 0.0003 +[2026-03-01 13:49:54] (step=0023910) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.678145177069067, LR: 0.0003 +[2026-03-01 13:50:02] (step=0023911) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.678340833496381, LR: 0.0003 +[2026-03-01 13:50:10] (step=0023912) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.678536489923694, LR: 0.0003 +[2026-03-01 13:50:18] (step=0023913) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 4.678732146351008, LR: 0.0003 +[2026-03-01 13:50:26] (step=0023914) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.678927802778321, LR: 0.0003 +[2026-03-01 13:50:33] (step=0023915) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.679123459205635, LR: 0.0003 +[2026-03-01 13:50:41] (step=0023916) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.679319115632948, LR: 0.0003 +[2026-03-01 13:50:49] (step=0023917) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 4.679514772060262, LR: 0.0003 +[2026-03-01 13:50:57] (step=0023918) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.679710428487576, LR: 0.0003 +[2026-03-01 13:51:05] (step=0023919) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.679906084914889, LR: 0.0003 +[2026-03-01 13:51:13] (step=0023920) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 4.680101741342203, LR: 0.0003 +[2026-03-01 13:51:21] (step=0023921) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 4.680297397769516, LR: 0.0003 +[2026-03-01 13:51:28] (step=0023922) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.68049305419683, LR: 0.0003 +[2026-03-01 13:51:36] (step=0023923) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.680688710624144, LR: 0.0003 +[2026-03-01 13:51:44] (step=0023924) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.6808843670514575, LR: 0.0003 +[2026-03-01 13:51:52] (step=0023925) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.6810800234787715, LR: 0.0003 +[2026-03-01 13:52:00] (step=0023926) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.681275679906085, LR: 0.0003 +[2026-03-01 13:52:08] (step=0023927) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.681471336333399, LR: 0.0003 +[2026-03-01 13:52:15] (step=0023928) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.681666992760713, LR: 0.0003 +[2026-03-01 13:52:23] (step=0023929) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 4.681862649188026, LR: 0.0003 +[2026-03-01 13:52:31] (step=0023930) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.68205830561534, LR: 0.0003 +[2026-03-01 13:52:39] (step=0023931) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.682253962042653, LR: 0.0003 +[2026-03-01 13:52:47] (step=0023932) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.682449618469967, LR: 0.0003 +[2026-03-01 13:52:55] (step=0023933) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.68264527489728, LR: 0.0003 +[2026-03-01 13:53:03] (step=0023934) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 4.682840931324594, LR: 0.0003 +[2026-03-01 13:53:10] (step=0023935) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 4.683036587751908, LR: 0.0003 +[2026-03-01 13:53:18] (step=0023936) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.683232244179221, LR: 0.0003 +[2026-03-01 13:53:26] (step=0023937) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.683427900606535, LR: 0.0003 +[2026-03-01 13:53:34] (step=0023938) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.683623557033848, LR: 0.0003 +[2026-03-01 13:53:42] (step=0023939) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.683819213461162, LR: 0.0003 +[2026-03-01 13:53:50] (step=0023940) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.684014869888476, LR: 0.0003 +[2026-03-01 13:53:58] (step=0023941) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.684210526315789, LR: 0.0003 +[2026-03-01 13:54:05] (step=0023942) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.684406182743103, LR: 0.0003 +[2026-03-01 13:54:13] (step=0023943) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 4.6846018391704165, LR: 0.0003 +[2026-03-01 13:54:21] (step=0023944) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.6847974955977305, LR: 0.0003 +[2026-03-01 13:54:29] (step=0023945) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 4.684993152025044, LR: 0.0003 +[2026-03-01 13:54:37] (step=0023946) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.685188808452358, LR: 0.0003 +[2026-03-01 13:54:45] (step=0023947) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.685384464879672, LR: 0.0003 +[2026-03-01 13:54:52] (step=0023948) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 4.685580121306985, LR: 0.0003 +[2026-03-01 13:55:00] (step=0023949) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.685775777734299, LR: 0.0003 +[2026-03-01 13:55:08] (step=0023950) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.685971434161612, LR: 0.0003 +[2026-03-01 13:55:16] (step=0023951) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.686167090588926, LR: 0.0003 +[2026-03-01 13:55:24] (step=0023952) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 4.68636274701624, LR: 0.0003 +[2026-03-01 13:55:32] (step=0023953) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 4.686558403443553, LR: 0.0003 +[2026-03-01 13:55:40] (step=0023954) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 4.686754059870867, LR: 0.0003 +[2026-03-01 13:55:47] (step=0023955) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.68694971629818, LR: 0.0003 +[2026-03-01 13:55:55] (step=0023956) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.687145372725494, LR: 0.0003 +[2026-03-01 13:56:03] (step=0023957) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 4.687341029152807, LR: 0.0003 +[2026-03-01 13:56:11] (step=0023958) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.687536685580121, LR: 0.0003 +[2026-03-01 13:56:19] (step=0023959) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.687732342007435, LR: 0.0003 +[2026-03-01 13:56:27] (step=0023960) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 4.687927998434748, LR: 0.0003 +[2026-03-01 13:56:34] (step=0023961) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 4.688123654862062, LR: 0.0003 +[2026-03-01 13:56:42] (step=0023962) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 4.6883193112893755, LR: 0.0003 +[2026-03-01 13:56:50] (step=0023963) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.6885149677166895, LR: 0.0003 +[2026-03-01 13:56:58] (step=0023964) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 4.6887106241440035, LR: 0.0003 +[2026-03-01 13:57:06] (step=0023965) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.688906280571317, LR: 0.0003 +[2026-03-01 13:57:14] (step=0023966) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 4.689101936998631, LR: 0.0003 +[2026-03-01 13:57:22] (step=0023967) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 4.689297593425944, LR: 0.0003 +[2026-03-01 13:57:29] (step=0023968) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.689493249853258, LR: 0.0003 +[2026-03-01 13:57:37] (step=0023969) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 4.689688906280571, LR: 0.0003 +[2026-03-01 13:57:45] (step=0023970) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.689884562707885, LR: 0.0003 +[2026-03-01 13:57:53] (step=0023971) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.690080219135199, LR: 0.0003 +[2026-03-01 13:58:01] (step=0023972) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.690275875562512, LR: 0.0003 +[2026-03-01 13:58:09] (step=0023973) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.690471531989826, LR: 0.0003 +[2026-03-01 13:58:16] (step=0023974) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.690667188417139, LR: 0.0003 +[2026-03-01 13:58:24] (step=0023975) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.690862844844453, LR: 0.0003 +[2026-03-01 13:58:32] (step=0023976) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.691058501271767, LR: 0.0003 +[2026-03-01 13:58:40] (step=0023977) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.69125415769908, LR: 0.0003 +[2026-03-01 13:58:48] (step=0023978) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 4.691449814126394, LR: 0.0003 +[2026-03-01 13:58:56] (step=0023979) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.691645470553707, LR: 0.0003 +[2026-03-01 13:59:04] (step=0023980) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.691841126981021, LR: 0.0003 +[2026-03-01 13:59:11] (step=0023981) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.6920367834083345, LR: 0.0003 +[2026-03-01 13:59:19] (step=0023982) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 4.6922324398356485, LR: 0.0003 +[2026-03-01 13:59:27] (step=0023983) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.6924280962629625, LR: 0.0003 +[2026-03-01 13:59:35] (step=0023984) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.692623752690276, LR: 0.0003 +[2026-03-01 13:59:43] (step=0023985) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.69281940911759, LR: 0.0003 +[2026-03-01 13:59:51] (step=0023986) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.693015065544903, LR: 0.0003 +[2026-03-01 13:59:58] (step=0023987) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.693210721972217, LR: 0.0003 +[2026-03-01 14:00:06] (step=0023988) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 4.693406378399531, LR: 0.0003 +[2026-03-01 14:00:14] (step=0023989) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 4.693602034826844, LR: 0.0003 +[2026-03-01 14:00:22] (step=0023990) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.693797691254158, LR: 0.0003 +[2026-03-01 14:00:30] (step=0023991) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.693993347681471, LR: 0.0003 +[2026-03-01 14:00:38] (step=0023992) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 4.694189004108785, LR: 0.0003 +[2026-03-01 14:00:46] (step=0023993) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.694384660536099, LR: 0.0003 +[2026-03-01 14:00:53] (step=0023994) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.694580316963412, LR: 0.0003 +[2026-03-01 14:01:01] (step=0023995) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.694775973390726, LR: 0.0003 +[2026-03-01 14:01:09] (step=0023996) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.694971629818039, LR: 0.0003 +[2026-03-01 14:01:17] (step=0023997) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.695167286245353, LR: 0.0003 +[2026-03-01 14:01:25] (step=0023998) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.695362942672666, LR: 0.0003 +[2026-03-01 14:01:33] (step=0023999) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.69555859909998, LR: 0.0003 +[2026-03-01 14:01:41] (step=0024000) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.695754255527294, LR: 0.0003 +[2026-03-01 14:01:41] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0024000/ +[2026-03-01 14:01:48] (step=0024001) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.6959499119546075, LR: 0.0003 +[2026-03-01 14:01:56] (step=0024002) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.6961455683819215, LR: 0.0003 +[2026-03-01 14:02:04] (step=0024003) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.696341224809235, LR: 0.0003 +[2026-03-01 14:02:12] (step=0024004) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.696536881236549, LR: 0.0003 +[2026-03-01 14:02:20] (step=0024005) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.696732537663863, LR: 0.0003 +[2026-03-01 14:02:28] (step=0024006) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.696928194091176, LR: 0.0003 +[2026-03-01 14:02:35] (step=0024007) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.69712385051849, LR: 0.0003 +[2026-03-01 14:02:43] (step=0024008) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 4.697319506945803, LR: 0.0003 +[2026-03-01 14:02:51] (step=0024009) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.697515163373117, LR: 0.0003 +[2026-03-01 14:02:59] (step=0024010) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.69771081980043, LR: 0.0003 +[2026-03-01 14:03:07] (step=0024011) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 4.697906476227744, LR: 0.0003 +[2026-03-01 14:03:15] (step=0024012) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.698102132655058, LR: 0.0003 +[2026-03-01 14:03:22] (step=0024013) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.698297789082371, LR: 0.0003 +[2026-03-01 14:03:30] (step=0024014) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.698493445509685, LR: 0.0003 +[2026-03-01 14:03:38] (step=0024015) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.698689101936998, LR: 0.0003 +[2026-03-01 14:03:46] (step=0024016) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.698884758364312, LR: 0.0003 +[2026-03-01 14:03:54] (step=0024017) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 4.699080414791626, LR: 0.0003 +[2026-03-01 14:04:02] (step=0024018) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.699276071218939, LR: 0.0003 +[2026-03-01 14:04:10] (step=0024019) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.699471727646253, LR: 0.0003 +[2026-03-01 14:04:17] (step=0024020) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 4.6996673840735665, LR: 0.0003 +[2026-03-01 14:04:25] (step=0024021) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.6998630405008806, LR: 0.0003 +[2026-03-01 14:04:33] (step=0024022) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.700058696928194, LR: 0.0003 +[2026-03-01 14:04:41] (step=0024023) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.700254353355508, LR: 0.0003 +[2026-03-01 14:04:49] (step=0024024) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 4.700450009782822, LR: 0.0003 +[2026-03-01 14:04:57] (step=0024025) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.700645666210135, LR: 0.0003 +[2026-03-01 14:05:05] (step=0024026) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.700841322637449, LR: 0.0003 +[2026-03-01 14:05:12] (step=0024027) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.701036979064762, LR: 0.0003 +[2026-03-01 14:05:20] (step=0024028) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.701232635492076, LR: 0.0003 +[2026-03-01 14:05:28] (step=0024029) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.70142829191939, LR: 0.0003 +[2026-03-01 14:05:36] (step=0024030) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 4.701623948346703, LR: 0.0003 +[2026-03-01 14:05:44] (step=0024031) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.701819604774017, LR: 0.0003 +[2026-03-01 14:05:52] (step=0024032) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.70201526120133, LR: 0.0003 +[2026-03-01 14:05:59] (step=0024033) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.702210917628644, LR: 0.0003 +[2026-03-01 14:06:07] (step=0024034) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.702406574055957, LR: 0.0003 +[2026-03-01 14:06:15] (step=0024035) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.702602230483271, LR: 0.0003 +[2026-03-01 14:06:23] (step=0024036) Train Loss: 0.4730, Train Steps/Sec: 0.13, Epoch: 4.702797886910585, LR: 0.0003 +[2026-03-01 14:06:31] (step=0024037) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.702993543337898, LR: 0.0003 +[2026-03-01 14:06:39] (step=0024038) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.703189199765212, LR: 0.0003 +[2026-03-01 14:06:46] (step=0024039) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.7033848561925256, LR: 0.0003 +[2026-03-01 14:06:54] (step=0024040) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.70358051261984, LR: 0.0003 +[2026-03-01 14:07:02] (step=0024041) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 4.703776169047154, LR: 0.0003 +[2026-03-01 14:07:10] (step=0024042) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.703971825474467, LR: 0.0003 +[2026-03-01 14:07:18] (step=0024043) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.704167481901781, LR: 0.0003 +[2026-03-01 14:07:26] (step=0024044) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.704363138329094, LR: 0.0003 +[2026-03-01 14:07:34] (step=0024045) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 4.704558794756408, LR: 0.0003 +[2026-03-01 14:07:41] (step=0024046) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.704754451183722, LR: 0.0003 +[2026-03-01 14:07:49] (step=0024047) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 4.704950107611035, LR: 0.0003 +[2026-03-01 14:07:57] (step=0024048) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.705145764038349, LR: 0.0003 +[2026-03-01 14:08:05] (step=0024049) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.705341420465662, LR: 0.0003 +[2026-03-01 14:08:13] (step=0024050) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.705537076892976, LR: 0.0003 +[2026-03-01 14:08:21] (step=0024051) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 4.705732733320289, LR: 0.0003 +[2026-03-01 14:08:28] (step=0024052) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.705928389747603, LR: 0.0003 +[2026-03-01 14:08:36] (step=0024053) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.706124046174917, LR: 0.0003 +[2026-03-01 14:08:44] (step=0024054) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.70631970260223, LR: 0.0003 +[2026-03-01 14:08:52] (step=0024055) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.706515359029544, LR: 0.0003 +[2026-03-01 14:09:00] (step=0024056) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.706711015456857, LR: 0.0003 +[2026-03-01 14:09:08] (step=0024057) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.7069066718841714, LR: 0.0003 +[2026-03-01 14:09:16] (step=0024058) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.7071023283114855, LR: 0.0003 +[2026-03-01 14:09:23] (step=0024059) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.707297984738799, LR: 0.0003 +[2026-03-01 14:09:31] (step=0024060) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.707493641166113, LR: 0.0003 +[2026-03-01 14:09:39] (step=0024061) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.707689297593426, LR: 0.0003 +[2026-03-01 14:09:47] (step=0024062) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.70788495402074, LR: 0.0003 +[2026-03-01 14:09:55] (step=0024063) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.708080610448053, LR: 0.0003 +[2026-03-01 14:10:03] (step=0024064) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.708276266875367, LR: 0.0003 +[2026-03-01 14:10:10] (step=0024065) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.708471923302681, LR: 0.0003 +[2026-03-01 14:10:18] (step=0024066) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 4.708667579729994, LR: 0.0003 +[2026-03-01 14:10:26] (step=0024067) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.708863236157308, LR: 0.0003 +[2026-03-01 14:10:34] (step=0024068) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 4.709058892584621, LR: 0.0003 +[2026-03-01 14:10:42] (step=0024069) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 4.709254549011935, LR: 0.0003 +[2026-03-01 14:10:50] (step=0024070) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.709450205439249, LR: 0.0003 +[2026-03-01 14:10:58] (step=0024071) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.709645861866562, LR: 0.0003 +[2026-03-01 14:11:05] (step=0024072) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.709841518293876, LR: 0.0003 +[2026-03-01 14:11:13] (step=0024073) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.710037174721189, LR: 0.0003 +[2026-03-01 14:11:21] (step=0024074) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.710232831148503, LR: 0.0003 +[2026-03-01 14:11:29] (step=0024075) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.7104284875758164, LR: 0.0003 +[2026-03-01 14:11:37] (step=0024076) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.7106241440031305, LR: 0.0003 +[2026-03-01 14:11:45] (step=0024077) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.7108198004304445, LR: 0.0003 +[2026-03-01 14:11:52] (step=0024078) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.711015456857758, LR: 0.0003 +[2026-03-01 14:12:00] (step=0024079) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.711211113285072, LR: 0.0003 +[2026-03-01 14:12:08] (step=0024080) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.711406769712385, LR: 0.0003 +[2026-03-01 14:12:16] (step=0024081) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.711602426139699, LR: 0.0003 +[2026-03-01 14:12:24] (step=0024082) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 4.711798082567013, LR: 0.0003 +[2026-03-01 14:12:32] (step=0024083) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 4.711993738994326, LR: 0.0003 +[2026-03-01 14:12:39] (step=0024084) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.71218939542164, LR: 0.0003 +[2026-03-01 14:12:47] (step=0024085) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.712385051848953, LR: 0.0003 +[2026-03-01 14:12:55] (step=0024086) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.712580708276267, LR: 0.0003 +[2026-03-01 14:13:03] (step=0024087) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 4.71277636470358, LR: 0.0003 +[2026-03-01 14:13:11] (step=0024088) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.712972021130894, LR: 0.0003 +[2026-03-01 14:13:19] (step=0024089) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.713167677558208, LR: 0.0003 +[2026-03-01 14:13:27] (step=0024090) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.713363333985521, LR: 0.0003 +[2026-03-01 14:13:34] (step=0024091) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.713558990412835, LR: 0.0003 +[2026-03-01 14:13:42] (step=0024092) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.713754646840148, LR: 0.0003 +[2026-03-01 14:13:50] (step=0024093) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.713950303267462, LR: 0.0003 +[2026-03-01 14:13:58] (step=0024094) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.714145959694776, LR: 0.0003 +[2026-03-01 14:14:06] (step=0024095) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 4.7143416161220895, LR: 0.0003 +[2026-03-01 14:14:14] (step=0024096) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.7145372725494035, LR: 0.0003 +[2026-03-01 14:14:22] (step=0024097) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.714732928976717, LR: 0.0003 +[2026-03-01 14:14:29] (step=0024098) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.714928585404031, LR: 0.0003 +[2026-03-01 14:14:37] (step=0024099) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.715124241831345, LR: 0.0003 +[2026-03-01 14:14:45] (step=0024100) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.715319898258658, LR: 0.0003 +[2026-03-01 14:14:53] (step=0024101) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.715515554685972, LR: 0.0003 +[2026-03-01 14:15:01] (step=0024102) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 4.715711211113285, LR: 0.0003 +[2026-03-01 14:15:09] (step=0024103) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.715906867540599, LR: 0.0003 +[2026-03-01 14:15:17] (step=0024104) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.716102523967912, LR: 0.0003 +[2026-03-01 14:15:24] (step=0024105) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.716298180395226, LR: 0.0003 +[2026-03-01 14:15:32] (step=0024106) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.71649383682254, LR: 0.0003 +[2026-03-01 14:15:40] (step=0024107) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.716689493249853, LR: 0.0003 +[2026-03-01 14:15:48] (step=0024108) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 4.716885149677167, LR: 0.0003 +[2026-03-01 14:15:56] (step=0024109) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.71708080610448, LR: 0.0003 +[2026-03-01 14:16:04] (step=0024110) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.717276462531794, LR: 0.0003 +[2026-03-01 14:16:11] (step=0024111) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.717472118959108, LR: 0.0003 +[2026-03-01 14:16:19] (step=0024112) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.717667775386421, LR: 0.0003 +[2026-03-01 14:16:27] (step=0024113) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.717863431813735, LR: 0.0003 +[2026-03-01 14:16:35] (step=0024114) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.7180590882410485, LR: 0.0003 +[2026-03-01 14:16:43] (step=0024115) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.7182547446683625, LR: 0.0003 +[2026-03-01 14:16:51] (step=0024116) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.718450401095676, LR: 0.0003 +[2026-03-01 14:16:59] (step=0024117) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.71864605752299, LR: 0.0003 +[2026-03-01 14:17:06] (step=0024118) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.718841713950304, LR: 0.0003 +[2026-03-01 14:17:14] (step=0024119) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 4.719037370377617, LR: 0.0003 +[2026-03-01 14:17:22] (step=0024120) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.719233026804931, LR: 0.0003 +[2026-03-01 14:17:30] (step=0024121) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.719428683232244, LR: 0.0003 +[2026-03-01 14:17:38] (step=0024122) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.719624339659558, LR: 0.0003 +[2026-03-01 14:17:46] (step=0024123) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.719819996086872, LR: 0.0003 +[2026-03-01 14:17:53] (step=0024124) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.720015652514185, LR: 0.0003 +[2026-03-01 14:18:01] (step=0024125) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.720211308941499, LR: 0.0003 +[2026-03-01 14:18:09] (step=0024126) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.720406965368812, LR: 0.0003 +[2026-03-01 14:18:17] (step=0024127) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.720602621796126, LR: 0.0003 +[2026-03-01 14:18:25] (step=0024128) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 4.720798278223439, LR: 0.0003 +[2026-03-01 14:18:33] (step=0024129) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.720993934650753, LR: 0.0003 +[2026-03-01 14:18:41] (step=0024130) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.721189591078067, LR: 0.0003 +[2026-03-01 14:18:48] (step=0024131) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.72138524750538, LR: 0.0003 +[2026-03-01 14:18:56] (step=0024132) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.721580903932694, LR: 0.0003 +[2026-03-01 14:19:04] (step=0024133) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.7217765603600075, LR: 0.0003 +[2026-03-01 14:19:12] (step=0024134) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.7219722167873215, LR: 0.0003 +[2026-03-01 14:19:20] (step=0024135) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 4.7221678732146355, LR: 0.0003 +[2026-03-01 14:19:28] (step=0024136) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 4.722363529641949, LR: 0.0003 +[2026-03-01 14:19:35] (step=0024137) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.722559186069263, LR: 0.0003 +[2026-03-01 14:19:43] (step=0024138) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.722754842496576, LR: 0.0003 +[2026-03-01 14:19:51] (step=0024139) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.72295049892389, LR: 0.0003 +[2026-03-01 14:19:59] (step=0024140) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.723146155351203, LR: 0.0003 +[2026-03-01 14:20:07] (step=0024141) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.723341811778517, LR: 0.0003 +[2026-03-01 14:20:15] (step=0024142) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.723537468205831, LR: 0.0003 +[2026-03-01 14:20:23] (step=0024143) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.723733124633144, LR: 0.0003 +[2026-03-01 14:20:30] (step=0024144) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.723928781060458, LR: 0.0003 +[2026-03-01 14:20:38] (step=0024145) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 4.724124437487771, LR: 0.0003 +[2026-03-01 14:20:46] (step=0024146) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.724320093915085, LR: 0.0003 +[2026-03-01 14:20:54] (step=0024147) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.724515750342399, LR: 0.0003 +[2026-03-01 14:21:02] (step=0024148) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 4.724711406769712, LR: 0.0003 +[2026-03-01 14:21:10] (step=0024149) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.724907063197026, LR: 0.0003 +[2026-03-01 14:21:18] (step=0024150) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 4.725102719624339, LR: 0.0003 +[2026-03-01 14:21:25] (step=0024151) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.725298376051653, LR: 0.0003 +[2026-03-01 14:21:33] (step=0024152) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.725494032478967, LR: 0.0003 +[2026-03-01 14:21:41] (step=0024153) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.7256896889062805, LR: 0.0003 +[2026-03-01 14:21:49] (step=0024154) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.7258853453335945, LR: 0.0003 +[2026-03-01 14:21:57] (step=0024155) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.726081001760908, LR: 0.0003 +[2026-03-01 14:22:05] (step=0024156) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.726276658188222, LR: 0.0003 +[2026-03-01 14:22:12] (step=0024157) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.726472314615535, LR: 0.0003 +[2026-03-01 14:22:20] (step=0024158) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.726667971042849, LR: 0.0003 +[2026-03-01 14:22:28] (step=0024159) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.726863627470163, LR: 0.0003 +[2026-03-01 14:22:36] (step=0024160) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 4.727059283897476, LR: 0.0003 +[2026-03-01 14:22:44] (step=0024161) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.72725494032479, LR: 0.0003 +[2026-03-01 14:22:52] (step=0024162) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.727450596752103, LR: 0.0003 +[2026-03-01 14:22:59] (step=0024163) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 4.727646253179417, LR: 0.0003 +[2026-03-01 14:23:07] (step=0024164) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.727841909606731, LR: 0.0003 +[2026-03-01 14:23:15] (step=0024165) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.728037566034044, LR: 0.0003 +[2026-03-01 14:23:23] (step=0024166) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.728233222461358, LR: 0.0003 +[2026-03-01 14:23:31] (step=0024167) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 4.728428878888671, LR: 0.0003 +[2026-03-01 14:23:39] (step=0024168) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.728624535315985, LR: 0.0003 +[2026-03-01 14:23:47] (step=0024169) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.728820191743298, LR: 0.0003 +[2026-03-01 14:23:55] (step=0024170) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.729015848170612, LR: 0.0003 +[2026-03-01 14:24:02] (step=0024171) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 4.729211504597926, LR: 0.0003 +[2026-03-01 14:24:10] (step=0024172) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.7294071610252395, LR: 0.0003 +[2026-03-01 14:24:18] (step=0024173) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.7296028174525535, LR: 0.0003 +[2026-03-01 14:24:26] (step=0024174) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.729798473879867, LR: 0.0003 +[2026-03-01 14:24:34] (step=0024175) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.729994130307181, LR: 0.0003 +[2026-03-01 14:24:42] (step=0024176) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.730189786734495, LR: 0.0003 +[2026-03-01 14:24:49] (step=0024177) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.730385443161808, LR: 0.0003 +[2026-03-01 14:24:57] (step=0024178) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 4.730581099589122, LR: 0.0003 +[2026-03-01 14:25:05] (step=0024179) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 4.730776756016435, LR: 0.0003 +[2026-03-01 14:25:13] (step=0024180) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 4.730972412443749, LR: 0.0003 +[2026-03-01 14:25:21] (step=0024181) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.731168068871062, LR: 0.0003 +[2026-03-01 14:25:29] (step=0024182) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.731363725298376, LR: 0.0003 +[2026-03-01 14:25:37] (step=0024183) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.73155938172569, LR: 0.0003 +[2026-03-01 14:25:44] (step=0024184) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.731755038153003, LR: 0.0003 +[2026-03-01 14:25:52] (step=0024185) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.731950694580317, LR: 0.0003 +[2026-03-01 14:26:00] (step=0024186) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.73214635100763, LR: 0.0003 +[2026-03-01 14:26:08] (step=0024187) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.732342007434944, LR: 0.0003 +[2026-03-01 14:26:16] (step=0024188) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.732537663862258, LR: 0.0003 +[2026-03-01 14:26:24] (step=0024189) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 4.732733320289571, LR: 0.0003 +[2026-03-01 14:26:31] (step=0024190) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.732928976716885, LR: 0.0003 +[2026-03-01 14:26:39] (step=0024191) Train Loss: 0.4597, Train Steps/Sec: 0.12, Epoch: 4.7331246331441985, LR: 0.0003 +[2026-03-01 14:26:47] (step=0024192) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.7333202895715125, LR: 0.0003 +[2026-03-01 14:26:55] (step=0024193) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 4.733515945998826, LR: 0.0003 +[2026-03-01 14:27:03] (step=0024194) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 4.73371160242614, LR: 0.0003 +[2026-03-01 14:27:11] (step=0024195) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.733907258853454, LR: 0.0003 +[2026-03-01 14:27:19] (step=0024196) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.734102915280767, LR: 0.0003 +[2026-03-01 14:27:26] (step=0024197) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.734298571708081, LR: 0.0003 +[2026-03-01 14:27:34] (step=0024198) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.734494228135394, LR: 0.0003 +[2026-03-01 14:27:42] (step=0024199) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 4.734689884562708, LR: 0.0003 +[2026-03-01 14:27:50] (step=0024200) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.734885540990022, LR: 0.0003 +[2026-03-01 14:27:58] (step=0024201) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.735081197417335, LR: 0.0003 +[2026-03-01 14:28:06] (step=0024202) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.735276853844649, LR: 0.0003 +[2026-03-01 14:28:14] (step=0024203) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.735472510271962, LR: 0.0003 +[2026-03-01 14:28:21] (step=0024204) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.735668166699276, LR: 0.0003 +[2026-03-01 14:28:29] (step=0024205) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.735863823126589, LR: 0.0003 +[2026-03-01 14:28:37] (step=0024206) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.736059479553903, LR: 0.0003 +[2026-03-01 14:28:45] (step=0024207) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 4.736255135981217, LR: 0.0003 +[2026-03-01 14:28:53] (step=0024208) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.73645079240853, LR: 0.0003 +[2026-03-01 14:29:01] (step=0024209) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 4.736646448835844, LR: 0.0003 +[2026-03-01 14:29:09] (step=0024210) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.7368421052631575, LR: 0.0003 +[2026-03-01 14:29:16] (step=0024211) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.7370377616904715, LR: 0.0003 +[2026-03-01 14:29:24] (step=0024212) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 4.7372334181177855, LR: 0.0003 +[2026-03-01 14:29:32] (step=0024213) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 4.737429074545099, LR: 0.0003 +[2026-03-01 14:29:40] (step=0024214) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.737624730972413, LR: 0.0003 +[2026-03-01 14:29:48] (step=0024215) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 4.737820387399726, LR: 0.0003 +[2026-03-01 14:29:56] (step=0024216) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.73801604382704, LR: 0.0003 +[2026-03-01 14:30:03] (step=0024217) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.738211700254354, LR: 0.0003 +[2026-03-01 14:30:11] (step=0024218) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.738407356681667, LR: 0.0003 +[2026-03-01 14:30:19] (step=0024219) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 4.738603013108981, LR: 0.0003 +[2026-03-01 14:30:27] (step=0024220) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.738798669536294, LR: 0.0003 +[2026-03-01 14:30:35] (step=0024221) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.738994325963608, LR: 0.0003 +[2026-03-01 14:30:43] (step=0024222) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.739189982390921, LR: 0.0003 +[2026-03-01 14:30:50] (step=0024223) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.739385638818235, LR: 0.0003 +[2026-03-01 14:30:58] (step=0024224) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 4.739581295245549, LR: 0.0003 +[2026-03-01 14:31:06] (step=0024225) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.739776951672862, LR: 0.0003 +[2026-03-01 14:31:14] (step=0024226) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.739972608100176, LR: 0.0003 +[2026-03-01 14:31:22] (step=0024227) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.740168264527489, LR: 0.0003 +[2026-03-01 14:31:30] (step=0024228) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 4.740363920954803, LR: 0.0003 +[2026-03-01 14:31:38] (step=0024229) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 4.740559577382117, LR: 0.0003 +[2026-03-01 14:31:45] (step=0024230) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.7407552338094305, LR: 0.0003 +[2026-03-01 14:31:53] (step=0024231) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.7409508902367445, LR: 0.0003 +[2026-03-01 14:32:01] (step=0024232) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.741146546664058, LR: 0.0003 +[2026-03-01 14:32:09] (step=0024233) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.741342203091372, LR: 0.0003 +[2026-03-01 14:32:17] (step=0024234) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.741537859518685, LR: 0.0003 +[2026-03-01 14:32:25] (step=0024235) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.741733515945999, LR: 0.0003 +[2026-03-01 14:32:32] (step=0024236) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.741929172373313, LR: 0.0003 +[2026-03-01 14:32:40] (step=0024237) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.742124828800626, LR: 0.0003 +[2026-03-01 14:32:48] (step=0024238) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 4.74232048522794, LR: 0.0003 +[2026-03-01 14:32:56] (step=0024239) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 4.742516141655253, LR: 0.0003 +[2026-03-01 14:33:04] (step=0024240) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 4.742711798082567, LR: 0.0003 +[2026-03-01 14:33:12] (step=0024241) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.742907454509881, LR: 0.0003 +[2026-03-01 14:33:20] (step=0024242) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.743103110937194, LR: 0.0003 +[2026-03-01 14:33:27] (step=0024243) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.743298767364508, LR: 0.0003 +[2026-03-01 14:33:35] (step=0024244) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.743494423791821, LR: 0.0003 +[2026-03-01 14:33:43] (step=0024245) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.743690080219135, LR: 0.0003 +[2026-03-01 14:33:51] (step=0024246) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.743885736646448, LR: 0.0003 +[2026-03-01 14:33:59] (step=0024247) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.744081393073762, LR: 0.0003 +[2026-03-01 14:34:07] (step=0024248) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.744277049501076, LR: 0.0003 +[2026-03-01 14:34:14] (step=0024249) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.7444727059283895, LR: 0.0003 +[2026-03-01 14:34:22] (step=0024250) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.7446683623557036, LR: 0.0003 +[2026-03-01 14:34:30] (step=0024251) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.744864018783017, LR: 0.0003 +[2026-03-01 14:34:38] (step=0024252) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.745059675210331, LR: 0.0003 +[2026-03-01 14:34:46] (step=0024253) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 4.745255331637645, LR: 0.0003 +[2026-03-01 14:34:54] (step=0024254) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.745450988064958, LR: 0.0003 +[2026-03-01 14:35:01] (step=0024255) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.745646644492272, LR: 0.0003 +[2026-03-01 14:35:09] (step=0024256) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.745842300919585, LR: 0.0003 +[2026-03-01 14:35:17] (step=0024257) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 4.746037957346899, LR: 0.0003 +[2026-03-01 14:35:25] (step=0024258) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.746233613774212, LR: 0.0003 +[2026-03-01 14:35:33] (step=0024259) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.746429270201526, LR: 0.0003 +[2026-03-01 14:35:41] (step=0024260) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.74662492662884, LR: 0.0003 +[2026-03-01 14:35:49] (step=0024261) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.746820583056153, LR: 0.0003 +[2026-03-01 14:35:57] (step=0024262) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.747016239483467, LR: 0.0003 +[2026-03-01 14:36:04] (step=0024263) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.74721189591078, LR: 0.0003 +[2026-03-01 14:36:12] (step=0024264) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.747407552338094, LR: 0.0003 +[2026-03-01 14:36:20] (step=0024265) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 4.747603208765408, LR: 0.0003 +[2026-03-01 14:36:28] (step=0024266) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.747798865192721, LR: 0.0003 +[2026-03-01 14:36:36] (step=0024267) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.747994521620035, LR: 0.0003 +[2026-03-01 14:36:44] (step=0024268) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.7481901780473486, LR: 0.0003 +[2026-03-01 14:36:51] (step=0024269) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.748385834474663, LR: 0.0003 +[2026-03-01 14:36:59] (step=0024270) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 4.748581490901977, LR: 0.0003 +[2026-03-01 14:37:07] (step=0024271) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.74877714732929, LR: 0.0003 +[2026-03-01 14:37:15] (step=0024272) Train Loss: 0.4235, Train Steps/Sec: 0.13, Epoch: 4.748972803756604, LR: 0.0003 +[2026-03-01 14:37:23] (step=0024273) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.749168460183917, LR: 0.0003 +[2026-03-01 14:37:31] (step=0024274) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.749364116611231, LR: 0.0003 +[2026-03-01 14:37:38] (step=0024275) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 4.749559773038544, LR: 0.0003 +[2026-03-01 14:37:46] (step=0024276) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.749755429465858, LR: 0.0003 +[2026-03-01 14:37:54] (step=0024277) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.749951085893172, LR: 0.0003 +[2026-03-01 14:38:02] (step=0024278) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 4.750146742320485, LR: 0.0003 +[2026-03-01 14:38:10] (step=0024279) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.750342398747799, LR: 0.0003 +[2026-03-01 14:38:18] (step=0024280) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.750538055175112, LR: 0.0003 +[2026-03-01 14:38:25] (step=0024281) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.750733711602426, LR: 0.0003 +[2026-03-01 14:38:33] (step=0024282) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.75092936802974, LR: 0.0003 +[2026-03-01 14:38:41] (step=0024283) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 4.751125024457053, LR: 0.0003 +[2026-03-01 14:38:49] (step=0024284) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.751320680884367, LR: 0.0003 +[2026-03-01 14:38:57] (step=0024285) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.75151633731168, LR: 0.0003 +[2026-03-01 14:39:05] (step=0024286) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.751711993738994, LR: 0.0003 +[2026-03-01 14:39:13] (step=0024287) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.751907650166308, LR: 0.0003 +[2026-03-01 14:39:20] (step=0024288) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.752103306593622, LR: 0.0003 +[2026-03-01 14:39:28] (step=0024289) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.752298963020936, LR: 0.0003 +[2026-03-01 14:39:36] (step=0024290) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 4.752494619448249, LR: 0.0003 +[2026-03-01 14:39:44] (step=0024291) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 4.752690275875563, LR: 0.0003 +[2026-03-01 14:39:52] (step=0024292) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 4.752885932302876, LR: 0.0003 +[2026-03-01 14:40:00] (step=0024293) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.75308158873019, LR: 0.0003 +[2026-03-01 14:40:08] (step=0024294) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.753277245157504, LR: 0.0003 +[2026-03-01 14:40:15] (step=0024295) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.753472901584817, LR: 0.0003 +[2026-03-01 14:40:23] (step=0024296) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.753668558012131, LR: 0.0003 +[2026-03-01 14:40:31] (step=0024297) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.753864214439444, LR: 0.0003 +[2026-03-01 14:40:39] (step=0024298) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 4.754059870866758, LR: 0.0003 +[2026-03-01 14:40:47] (step=0024299) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.754255527294071, LR: 0.0003 +[2026-03-01 14:40:55] (step=0024300) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.754451183721385, LR: 0.0003 +[2026-03-01 14:41:02] (step=0024301) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.754646840148699, LR: 0.0003 +[2026-03-01 14:41:10] (step=0024302) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.754842496576012, LR: 0.0003 +[2026-03-01 14:41:18] (step=0024303) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 4.755038153003326, LR: 0.0003 +[2026-03-01 14:41:26] (step=0024304) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 4.7552338094306394, LR: 0.0003 +[2026-03-01 14:41:34] (step=0024305) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.7554294658579535, LR: 0.0003 +[2026-03-01 14:41:42] (step=0024306) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.7556251222852675, LR: 0.0003 +[2026-03-01 14:41:50] (step=0024307) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.755820778712581, LR: 0.0003 +[2026-03-01 14:41:57] (step=0024308) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.756016435139895, LR: 0.0003 +[2026-03-01 14:42:05] (step=0024309) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 4.756212091567208, LR: 0.0003 +[2026-03-01 14:42:13] (step=0024310) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.756407747994522, LR: 0.0003 +[2026-03-01 14:42:21] (step=0024311) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.756603404421835, LR: 0.0003 +[2026-03-01 14:42:29] (step=0024312) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.756799060849149, LR: 0.0003 +[2026-03-01 14:42:37] (step=0024313) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.756994717276463, LR: 0.0003 +[2026-03-01 14:42:45] (step=0024314) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.757190373703776, LR: 0.0003 +[2026-03-01 14:42:52] (step=0024315) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.75738603013109, LR: 0.0003 +[2026-03-01 14:43:00] (step=0024316) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 4.757581686558403, LR: 0.0003 +[2026-03-01 14:43:08] (step=0024317) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.757777342985717, LR: 0.0003 +[2026-03-01 14:43:16] (step=0024318) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.757972999413031, LR: 0.0003 +[2026-03-01 14:43:24] (step=0024319) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.758168655840344, LR: 0.0003 +[2026-03-01 14:43:32] (step=0024320) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.758364312267658, LR: 0.0003 +[2026-03-01 14:43:39] (step=0024321) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 4.758559968694971, LR: 0.0003 +[2026-03-01 14:43:47] (step=0024322) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.758755625122285, LR: 0.0003 +[2026-03-01 14:43:55] (step=0024323) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.758951281549599, LR: 0.0003 +[2026-03-01 14:44:03] (step=0024324) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.7591469379769125, LR: 0.0003 +[2026-03-01 14:44:11] (step=0024325) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.7593425944042265, LR: 0.0003 +[2026-03-01 14:44:19] (step=0024326) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 4.75953825083154, LR: 0.0003 +[2026-03-01 14:44:26] (step=0024327) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.759733907258854, LR: 0.0003 +[2026-03-01 14:44:34] (step=0024328) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.759929563686167, LR: 0.0003 +[2026-03-01 14:44:42] (step=0024329) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.760125220113481, LR: 0.0003 +[2026-03-01 14:44:50] (step=0024330) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 4.760320876540795, LR: 0.0003 +[2026-03-01 14:44:58] (step=0024331) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.760516532968108, LR: 0.0003 +[2026-03-01 14:45:06] (step=0024332) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.760712189395422, LR: 0.0003 +[2026-03-01 14:45:14] (step=0024333) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.760907845822735, LR: 0.0003 +[2026-03-01 14:45:21] (step=0024334) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.761103502250049, LR: 0.0003 +[2026-03-01 14:45:29] (step=0024335) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.761299158677363, LR: 0.0003 +[2026-03-01 14:45:37] (step=0024336) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 4.761494815104676, LR: 0.0003 +[2026-03-01 14:45:45] (step=0024337) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 4.76169047153199, LR: 0.0003 +[2026-03-01 14:45:53] (step=0024338) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.761886127959303, LR: 0.0003 +[2026-03-01 14:46:01] (step=0024339) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 4.762081784386617, LR: 0.0003 +[2026-03-01 14:46:09] (step=0024340) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.76227744081393, LR: 0.0003 +[2026-03-01 14:46:16] (step=0024341) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.762473097241244, LR: 0.0003 +[2026-03-01 14:46:24] (step=0024342) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.762668753668558, LR: 0.0003 +[2026-03-01 14:46:32] (step=0024343) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.7628644100958715, LR: 0.0003 +[2026-03-01 14:46:40] (step=0024344) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.7630600665231855, LR: 0.0003 +[2026-03-01 14:46:48] (step=0024345) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 4.763255722950499, LR: 0.0003 +[2026-03-01 14:46:56] (step=0024346) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 4.763451379377813, LR: 0.0003 +[2026-03-01 14:47:03] (step=0024347) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.763647035805127, LR: 0.0003 +[2026-03-01 14:47:11] (step=0024348) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.76384269223244, LR: 0.0003 +[2026-03-01 14:47:19] (step=0024349) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.764038348659754, LR: 0.0003 +[2026-03-01 14:47:27] (step=0024350) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.764234005087067, LR: 0.0003 +[2026-03-01 14:47:35] (step=0024351) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 4.764429661514381, LR: 0.0003 +[2026-03-01 14:47:43] (step=0024352) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.764625317941694, LR: 0.0003 +[2026-03-01 14:47:51] (step=0024353) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.764820974369008, LR: 0.0003 +[2026-03-01 14:47:58] (step=0024354) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.765016630796322, LR: 0.0003 +[2026-03-01 14:48:06] (step=0024355) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.765212287223635, LR: 0.0003 +[2026-03-01 14:48:14] (step=0024356) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.765407943650949, LR: 0.0003 +[2026-03-01 14:48:22] (step=0024357) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.765603600078262, LR: 0.0003 +[2026-03-01 14:48:30] (step=0024358) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.765799256505576, LR: 0.0003 +[2026-03-01 14:48:38] (step=0024359) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.76599491293289, LR: 0.0003 +[2026-03-01 14:48:46] (step=0024360) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 4.766190569360203, LR: 0.0003 +[2026-03-01 14:48:53] (step=0024361) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.766386225787517, LR: 0.0003 +[2026-03-01 14:49:01] (step=0024362) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 4.7665818822148305, LR: 0.0003 +[2026-03-01 14:49:09] (step=0024363) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 4.7667775386421445, LR: 0.0003 +[2026-03-01 14:49:17] (step=0024364) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 4.766973195069458, LR: 0.0003 +[2026-03-01 14:49:25] (step=0024365) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.767168851496772, LR: 0.0003 +[2026-03-01 14:49:32] (step=0024366) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.767364507924086, LR: 0.0003 +[2026-03-01 14:49:40] (step=0024367) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.767560164351399, LR: 0.0003 +[2026-03-01 14:49:48] (step=0024368) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.767755820778713, LR: 0.0003 +[2026-03-01 14:49:56] (step=0024369) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.767951477206026, LR: 0.0003 +[2026-03-01 14:50:04] (step=0024370) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.76814713363334, LR: 0.0003 +[2026-03-01 14:50:12] (step=0024371) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.768342790060654, LR: 0.0003 +[2026-03-01 14:50:19] (step=0024372) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 4.768538446487967, LR: 0.0003 +[2026-03-01 14:50:27] (step=0024373) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.768734102915281, LR: 0.0003 +[2026-03-01 14:50:35] (step=0024374) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.768929759342594, LR: 0.0003 +[2026-03-01 14:50:43] (step=0024375) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 4.769125415769908, LR: 0.0003 +[2026-03-01 14:50:51] (step=0024376) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.769321072197222, LR: 0.0003 +[2026-03-01 14:50:59] (step=0024377) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.769516728624535, LR: 0.0003 +[2026-03-01 14:51:06] (step=0024378) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 4.769712385051849, LR: 0.0003 +[2026-03-01 14:51:14] (step=0024379) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.769908041479162, LR: 0.0003 +[2026-03-01 14:51:22] (step=0024380) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.770103697906476, LR: 0.0003 +[2026-03-01 14:51:30] (step=0024381) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 4.7702993543337895, LR: 0.0003 +[2026-03-01 14:51:38] (step=0024382) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.7704950107611035, LR: 0.0003 +[2026-03-01 14:51:45] (step=0024383) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.7706906671884175, LR: 0.0003 +[2026-03-01 14:51:53] (step=0024384) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.770886323615731, LR: 0.0003 +[2026-03-01 14:52:01] (step=0024385) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.771081980043045, LR: 0.0003 +[2026-03-01 14:52:09] (step=0024386) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 4.771277636470358, LR: 0.0003 +[2026-03-01 14:52:17] (step=0024387) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 4.771473292897672, LR: 0.0003 +[2026-03-01 14:52:25] (step=0024388) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.771668949324986, LR: 0.0003 +[2026-03-01 14:52:32] (step=0024389) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 4.771864605752299, LR: 0.0003 +[2026-03-01 14:52:40] (step=0024390) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.772060262179613, LR: 0.0003 +[2026-03-01 14:52:48] (step=0024391) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.772255918606926, LR: 0.0003 +[2026-03-01 14:52:56] (step=0024392) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 4.77245157503424, LR: 0.0003 +[2026-03-01 14:53:04] (step=0024393) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 4.772647231461553, LR: 0.0003 +[2026-03-01 14:53:12] (step=0024394) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.772842887888867, LR: 0.0003 +[2026-03-01 14:53:19] (step=0024395) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.773038544316181, LR: 0.0003 +[2026-03-01 14:53:27] (step=0024396) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.773234200743494, LR: 0.0003 +[2026-03-01 14:53:35] (step=0024397) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.773429857170808, LR: 0.0003 +[2026-03-01 14:53:43] (step=0024398) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.773625513598121, LR: 0.0003 +[2026-03-01 14:53:51] (step=0024399) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.773821170025435, LR: 0.0003 +[2026-03-01 14:53:59] (step=0024400) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 4.774016826452749, LR: 0.0003 +[2026-03-01 14:54:06] (step=0024401) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.7742124828800625, LR: 0.0003 +[2026-03-01 14:54:14] (step=0024402) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.7744081393073765, LR: 0.0003 +[2026-03-01 14:54:22] (step=0024403) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.77460379573469, LR: 0.0003 +[2026-03-01 14:54:30] (step=0024404) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 4.774799452162004, LR: 0.0003 +[2026-03-01 14:54:38] (step=0024405) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.774995108589317, LR: 0.0003 +[2026-03-01 14:54:46] (step=0024406) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.775190765016631, LR: 0.0003 +[2026-03-01 14:54:53] (step=0024407) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.775386421443945, LR: 0.0003 +[2026-03-01 14:55:01] (step=0024408) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.775582077871258, LR: 0.0003 +[2026-03-01 14:55:09] (step=0024409) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.775777734298572, LR: 0.0003 +[2026-03-01 14:55:17] (step=0024410) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.775973390725885, LR: 0.0003 +[2026-03-01 14:55:25] (step=0024411) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 4.776169047153199, LR: 0.0003 +[2026-03-01 14:55:33] (step=0024412) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 4.776364703580513, LR: 0.0003 +[2026-03-01 14:55:40] (step=0024413) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.776560360007826, LR: 0.0003 +[2026-03-01 14:55:48] (step=0024414) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.77675601643514, LR: 0.0003 +[2026-03-01 14:55:56] (step=0024415) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 4.776951672862453, LR: 0.0003 +[2026-03-01 14:56:04] (step=0024416) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.777147329289767, LR: 0.0003 +[2026-03-01 14:56:12] (step=0024417) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.77734298571708, LR: 0.0003 +[2026-03-01 14:56:20] (step=0024418) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 4.777538642144394, LR: 0.0003 +[2026-03-01 14:56:27] (step=0024419) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.777734298571708, LR: 0.0003 +[2026-03-01 14:56:35] (step=0024420) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.7779299549990215, LR: 0.0003 +[2026-03-01 14:56:43] (step=0024421) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.7781256114263355, LR: 0.0003 +[2026-03-01 14:56:51] (step=0024422) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.778321267853649, LR: 0.0003 +[2026-03-01 14:56:59] (step=0024423) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 4.778516924280963, LR: 0.0003 +[2026-03-01 14:57:07] (step=0024424) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 4.778712580708277, LR: 0.0003 +[2026-03-01 14:57:14] (step=0024425) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.77890823713559, LR: 0.0003 +[2026-03-01 14:57:22] (step=0024426) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.779103893562904, LR: 0.0003 +[2026-03-01 14:57:30] (step=0024427) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.779299549990217, LR: 0.0003 +[2026-03-01 14:57:38] (step=0024428) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 4.779495206417531, LR: 0.0003 +[2026-03-01 14:57:46] (step=0024429) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.779690862844844, LR: 0.0003 +[2026-03-01 14:57:54] (step=0024430) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.779886519272158, LR: 0.0003 +[2026-03-01 14:58:01] (step=0024431) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.780082175699472, LR: 0.0003 +[2026-03-01 14:58:09] (step=0024432) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 4.780277832126785, LR: 0.0003 +[2026-03-01 14:58:17] (step=0024433) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.780473488554099, LR: 0.0003 +[2026-03-01 14:58:25] (step=0024434) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.780669144981412, LR: 0.0003 +[2026-03-01 14:58:33] (step=0024435) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.780864801408726, LR: 0.0003 +[2026-03-01 14:58:41] (step=0024436) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.78106045783604, LR: 0.0003 +[2026-03-01 14:58:49] (step=0024437) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 4.781256114263353, LR: 0.0003 +[2026-03-01 14:58:56] (step=0024438) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 4.781451770690667, LR: 0.0003 +[2026-03-01 14:59:04] (step=0024439) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.7816474271179805, LR: 0.0003 +[2026-03-01 14:59:12] (step=0024440) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.7818430835452945, LR: 0.0003 +[2026-03-01 14:59:20] (step=0024441) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.7820387399726085, LR: 0.0003 +[2026-03-01 14:59:28] (step=0024442) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.782234396399922, LR: 0.0003 +[2026-03-01 14:59:36] (step=0024443) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.782430052827236, LR: 0.0003 +[2026-03-01 14:59:44] (step=0024444) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.782625709254549, LR: 0.0003 +[2026-03-01 14:59:51] (step=0024445) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 4.782821365681863, LR: 0.0003 +[2026-03-01 14:59:59] (step=0024446) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.783017022109176, LR: 0.0003 +[2026-03-01 15:00:07] (step=0024447) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.78321267853649, LR: 0.0003 +[2026-03-01 15:00:15] (step=0024448) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.783408334963804, LR: 0.0003 +[2026-03-01 15:00:23] (step=0024449) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.783603991391117, LR: 0.0003 +[2026-03-01 15:00:31] (step=0024450) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.783799647818431, LR: 0.0003 +[2026-03-01 15:00:38] (step=0024451) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.783995304245744, LR: 0.0003 +[2026-03-01 15:00:46] (step=0024452) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.784190960673058, LR: 0.0003 +[2026-03-01 15:00:54] (step=0024453) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.784386617100372, LR: 0.0003 +[2026-03-01 15:01:02] (step=0024454) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.784582273527685, LR: 0.0003 +[2026-03-01 15:01:10] (step=0024455) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.784777929954999, LR: 0.0003 +[2026-03-01 15:01:18] (step=0024456) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.784973586382312, LR: 0.0003 +[2026-03-01 15:01:26] (step=0024457) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.785169242809626, LR: 0.0003 +[2026-03-01 15:01:34] (step=0024458) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.7853648992369395, LR: 0.0003 +[2026-03-01 15:01:41] (step=0024459) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 4.7855605556642535, LR: 0.0003 +[2026-03-01 15:01:49] (step=0024460) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.7857562120915675, LR: 0.0003 +[2026-03-01 15:01:57] (step=0024461) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.785951868518881, LR: 0.0003 +[2026-03-01 15:02:05] (step=0024462) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 4.786147524946195, LR: 0.0003 +[2026-03-01 15:02:13] (step=0024463) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 4.786343181373508, LR: 0.0003 +[2026-03-01 15:02:21] (step=0024464) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 4.786538837800822, LR: 0.0003 +[2026-03-01 15:02:28] (step=0024465) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.786734494228136, LR: 0.0003 +[2026-03-01 15:02:36] (step=0024466) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.786930150655449, LR: 0.0003 +[2026-03-01 15:02:44] (step=0024467) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.787125807082763, LR: 0.0003 +[2026-03-01 15:02:52] (step=0024468) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.787321463510076, LR: 0.0003 +[2026-03-01 15:03:00] (step=0024469) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.78751711993739, LR: 0.0003 +[2026-03-01 15:03:08] (step=0024470) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.787712776364703, LR: 0.0003 +[2026-03-01 15:03:15] (step=0024471) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 4.787908432792017, LR: 0.0003 +[2026-03-01 15:03:23] (step=0024472) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.788104089219331, LR: 0.0003 +[2026-03-01 15:03:31] (step=0024473) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.788299745646644, LR: 0.0003 +[2026-03-01 15:03:39] (step=0024474) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.788495402073958, LR: 0.0003 +[2026-03-01 15:03:47] (step=0024475) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 4.788691058501271, LR: 0.0003 +[2026-03-01 15:03:55] (step=0024476) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.788886714928585, LR: 0.0003 +[2026-03-01 15:04:03] (step=0024477) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.789082371355899, LR: 0.0003 +[2026-03-01 15:04:10] (step=0024478) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 4.7892780277832125, LR: 0.0003 +[2026-03-01 15:04:18] (step=0024479) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.7894736842105265, LR: 0.0003 +[2026-03-01 15:04:26] (step=0024480) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.78966934063784, LR: 0.0003 +[2026-03-01 15:04:34] (step=0024481) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.789864997065154, LR: 0.0003 +[2026-03-01 15:04:42] (step=0024482) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.790060653492467, LR: 0.0003 +[2026-03-01 15:04:50] (step=0024483) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 4.790256309919781, LR: 0.0003 +[2026-03-01 15:04:57] (step=0024484) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.790451966347095, LR: 0.0003 +[2026-03-01 15:05:05] (step=0024485) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.790647622774408, LR: 0.0003 +[2026-03-01 15:05:13] (step=0024486) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.790843279201722, LR: 0.0003 +[2026-03-01 15:05:21] (step=0024487) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 4.791038935629035, LR: 0.0003 +[2026-03-01 15:05:29] (step=0024488) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.791234592056349, LR: 0.0003 +[2026-03-01 15:05:37] (step=0024489) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.791430248483663, LR: 0.0003 +[2026-03-01 15:05:44] (step=0024490) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 4.791625904910976, LR: 0.0003 +[2026-03-01 15:05:52] (step=0024491) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.79182156133829, LR: 0.0003 +[2026-03-01 15:06:00] (step=0024492) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 4.792017217765603, LR: 0.0003 +[2026-03-01 15:06:08] (step=0024493) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.792212874192917, LR: 0.0003 +[2026-03-01 15:06:16] (step=0024494) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.792408530620231, LR: 0.0003 +[2026-03-01 15:06:24] (step=0024495) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 4.792604187047544, LR: 0.0003 +[2026-03-01 15:06:32] (step=0024496) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 4.792799843474858, LR: 0.0003 +[2026-03-01 15:06:40] (step=0024497) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.7929954999021716, LR: 0.0003 +[2026-03-01 15:06:47] (step=0024498) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.793191156329486, LR: 0.0003 +[2026-03-01 15:06:55] (step=0024499) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.793386812756799, LR: 0.0003 +[2026-03-01 15:07:03] (step=0024500) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 4.793582469184113, LR: 0.0003 +[2026-03-01 15:07:03] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0024500/ +[2026-03-01 15:07:11] (step=0024501) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 4.793778125611427, LR: 0.0003 +[2026-03-01 15:07:19] (step=0024502) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.79397378203874, LR: 0.0003 +[2026-03-01 15:07:27] (step=0024503) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.794169438466054, LR: 0.0003 +[2026-03-01 15:07:34] (step=0024504) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.794365094893367, LR: 0.0003 +[2026-03-01 15:07:42] (step=0024505) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.794560751320681, LR: 0.0003 +[2026-03-01 15:07:50] (step=0024506) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.794756407747995, LR: 0.0003 +[2026-03-01 15:07:58] (step=0024507) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.794952064175308, LR: 0.0003 +[2026-03-01 15:08:06] (step=0024508) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.795147720602622, LR: 0.0003 +[2026-03-01 15:08:14] (step=0024509) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 4.795343377029935, LR: 0.0003 +[2026-03-01 15:08:22] (step=0024510) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 4.795539033457249, LR: 0.0003 +[2026-03-01 15:08:29] (step=0024511) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.795734689884562, LR: 0.0003 +[2026-03-01 15:08:37] (step=0024512) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.795930346311876, LR: 0.0003 +[2026-03-01 15:08:45] (step=0024513) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 4.79612600273919, LR: 0.0003 +[2026-03-01 15:08:53] (step=0024514) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.796321659166503, LR: 0.0003 +[2026-03-01 15:09:01] (step=0024515) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.796517315593817, LR: 0.0003 +[2026-03-01 15:09:09] (step=0024516) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.796712972021131, LR: 0.0003 +[2026-03-01 15:09:17] (step=0024517) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.796908628448445, LR: 0.0003 +[2026-03-01 15:09:24] (step=0024518) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.797104284875759, LR: 0.0003 +[2026-03-01 15:09:32] (step=0024519) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.797299941303072, LR: 0.0003 +[2026-03-01 15:09:40] (step=0024520) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 4.797495597730386, LR: 0.0003 +[2026-03-01 15:09:48] (step=0024521) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.797691254157699, LR: 0.0003 +[2026-03-01 15:09:56] (step=0024522) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.797886910585013, LR: 0.0003 +[2026-03-01 15:10:04] (step=0024523) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 4.798082567012326, LR: 0.0003 +[2026-03-01 15:10:11] (step=0024524) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.79827822343964, LR: 0.0003 +[2026-03-01 15:10:19] (step=0024525) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 4.798473879866954, LR: 0.0003 +[2026-03-01 15:10:27] (step=0024526) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.798669536294267, LR: 0.0003 +[2026-03-01 15:10:35] (step=0024527) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.798865192721581, LR: 0.0003 +[2026-03-01 15:10:43] (step=0024528) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.799060849148894, LR: 0.0003 +[2026-03-01 15:10:51] (step=0024529) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 4.799256505576208, LR: 0.0003 +[2026-03-01 15:10:58] (step=0024530) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.799452162003522, LR: 0.0003 +[2026-03-01 15:11:06] (step=0024531) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 4.799647818430835, LR: 0.0003 +[2026-03-01 15:11:14] (step=0024532) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 4.799843474858149, LR: 0.0003 +[2026-03-01 15:11:22] (step=0024533) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 4.8000391312854624, LR: 0.0003 +[2026-03-01 15:11:30] (step=0024534) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.8002347877127765, LR: 0.0003 +[2026-03-01 15:11:38] (step=0024535) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.80043044414009, LR: 0.0003 +[2026-03-01 15:11:46] (step=0024536) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.800626100567404, LR: 0.0003 +[2026-03-01 15:11:53] (step=0024537) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.800821756994718, LR: 0.0003 +[2026-03-01 15:12:01] (step=0024538) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.801017413422031, LR: 0.0003 +[2026-03-01 15:12:09] (step=0024539) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.801213069849345, LR: 0.0003 +[2026-03-01 15:12:17] (step=0024540) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.801408726276658, LR: 0.0003 +[2026-03-01 15:12:25] (step=0024541) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.801604382703972, LR: 0.0003 +[2026-03-01 15:12:33] (step=0024542) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 4.801800039131286, LR: 0.0003 +[2026-03-01 15:12:40] (step=0024543) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.801995695558599, LR: 0.0003 +[2026-03-01 15:12:48] (step=0024544) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.802191351985913, LR: 0.0003 +[2026-03-01 15:12:56] (step=0024545) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 4.802387008413226, LR: 0.0003 +[2026-03-01 15:13:04] (step=0024546) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 4.80258266484054, LR: 0.0003 +[2026-03-01 15:13:12] (step=0024547) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.802778321267854, LR: 0.0003 +[2026-03-01 15:13:20] (step=0024548) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 4.802973977695167, LR: 0.0003 +[2026-03-01 15:13:28] (step=0024549) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 4.803169634122481, LR: 0.0003 +[2026-03-01 15:13:35] (step=0024550) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 4.803365290549794, LR: 0.0003 +[2026-03-01 15:13:43] (step=0024551) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.803560946977108, LR: 0.0003 +[2026-03-01 15:13:51] (step=0024552) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.8037566034044215, LR: 0.0003 +[2026-03-01 15:13:59] (step=0024553) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.8039522598317355, LR: 0.0003 +[2026-03-01 15:14:07] (step=0024554) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.8041479162590495, LR: 0.0003 +[2026-03-01 15:14:15] (step=0024555) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 4.804343572686363, LR: 0.0003 +[2026-03-01 15:14:22] (step=0024556) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.804539229113677, LR: 0.0003 +[2026-03-01 15:14:30] (step=0024557) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 4.80473488554099, LR: 0.0003 +[2026-03-01 15:14:38] (step=0024558) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.804930541968304, LR: 0.0003 +[2026-03-01 15:14:46] (step=0024559) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.805126198395618, LR: 0.0003 +[2026-03-01 15:14:54] (step=0024560) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 4.805321854822931, LR: 0.0003 +[2026-03-01 15:15:02] (step=0024561) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.805517511250245, LR: 0.0003 +[2026-03-01 15:15:10] (step=0024562) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.805713167677558, LR: 0.0003 +[2026-03-01 15:15:17] (step=0024563) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 4.805908824104872, LR: 0.0003 +[2026-03-01 15:15:25] (step=0024564) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.806104480532185, LR: 0.0003 +[2026-03-01 15:15:33] (step=0024565) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.806300136959499, LR: 0.0003 +[2026-03-01 15:15:41] (step=0024566) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 4.806495793386813, LR: 0.0003 +[2026-03-01 15:15:49] (step=0024567) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.806691449814126, LR: 0.0003 +[2026-03-01 15:15:57] (step=0024568) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.80688710624144, LR: 0.0003 +[2026-03-01 15:16:05] (step=0024569) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 4.807082762668753, LR: 0.0003 +[2026-03-01 15:16:12] (step=0024570) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.807278419096067, LR: 0.0003 +[2026-03-01 15:16:20] (step=0024571) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.807474075523381, LR: 0.0003 +[2026-03-01 15:16:28] (step=0024572) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.8076697319506945, LR: 0.0003 +[2026-03-01 15:16:36] (step=0024573) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.8078653883780085, LR: 0.0003 +[2026-03-01 15:16:44] (step=0024574) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.808061044805322, LR: 0.0003 +[2026-03-01 15:16:52] (step=0024575) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.808256701232636, LR: 0.0003 +[2026-03-01 15:16:59] (step=0024576) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.808452357659949, LR: 0.0003 +[2026-03-01 15:17:07] (step=0024577) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.808648014087263, LR: 0.0003 +[2026-03-01 15:17:15] (step=0024578) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 4.808843670514577, LR: 0.0003 +[2026-03-01 15:17:23] (step=0024579) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.80903932694189, LR: 0.0003 +[2026-03-01 15:17:31] (step=0024580) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.809234983369204, LR: 0.0003 +[2026-03-01 15:17:39] (step=0024581) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 4.809430639796517, LR: 0.0003 +[2026-03-01 15:17:46] (step=0024582) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.809626296223831, LR: 0.0003 +[2026-03-01 15:17:54] (step=0024583) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.809821952651145, LR: 0.0003 +[2026-03-01 15:18:02] (step=0024584) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 4.810017609078458, LR: 0.0003 +[2026-03-01 15:18:10] (step=0024585) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.810213265505772, LR: 0.0003 +[2026-03-01 15:18:18] (step=0024586) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.810408921933085, LR: 0.0003 +[2026-03-01 15:18:26] (step=0024587) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.810604578360399, LR: 0.0003 +[2026-03-01 15:18:33] (step=0024588) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.810800234787712, LR: 0.0003 +[2026-03-01 15:18:41] (step=0024589) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.810995891215026, LR: 0.0003 +[2026-03-01 15:18:49] (step=0024590) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.81119154764234, LR: 0.0003 +[2026-03-01 15:18:57] (step=0024591) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 4.8113872040696535, LR: 0.0003 +[2026-03-01 15:19:05] (step=0024592) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 4.8115828604969675, LR: 0.0003 +[2026-03-01 15:19:13] (step=0024593) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.811778516924281, LR: 0.0003 +[2026-03-01 15:19:20] (step=0024594) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.811974173351595, LR: 0.0003 +[2026-03-01 15:19:28] (step=0024595) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.812169829778909, LR: 0.0003 +[2026-03-01 15:19:36] (step=0024596) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 4.812365486206222, LR: 0.0003 +[2026-03-01 15:19:44] (step=0024597) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.812561142633536, LR: 0.0003 +[2026-03-01 15:19:52] (step=0024598) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.812756799060849, LR: 0.0003 +[2026-03-01 15:20:00] (step=0024599) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 4.812952455488163, LR: 0.0003 +[2026-03-01 15:20:08] (step=0024600) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.813148111915476, LR: 0.0003 +[2026-03-01 15:20:16] (step=0024601) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.81334376834279, LR: 0.0003 +[2026-03-01 15:20:23] (step=0024602) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 4.813539424770104, LR: 0.0003 +[2026-03-01 15:20:31] (step=0024603) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.813735081197417, LR: 0.0003 +[2026-03-01 15:20:39] (step=0024604) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.813930737624731, LR: 0.0003 +[2026-03-01 15:20:47] (step=0024605) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.814126394052044, LR: 0.0003 +[2026-03-01 15:20:55] (step=0024606) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 4.814322050479358, LR: 0.0003 +[2026-03-01 15:21:03] (step=0024607) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.814517706906672, LR: 0.0003 +[2026-03-01 15:21:11] (step=0024608) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.814713363333985, LR: 0.0003 +[2026-03-01 15:21:18] (step=0024609) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.814909019761299, LR: 0.0003 +[2026-03-01 15:21:26] (step=0024610) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.8151046761886125, LR: 0.0003 +[2026-03-01 15:21:34] (step=0024611) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.8153003326159265, LR: 0.0003 +[2026-03-01 15:21:42] (step=0024612) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.8154959890432405, LR: 0.0003 +[2026-03-01 15:21:50] (step=0024613) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.815691645470554, LR: 0.0003 +[2026-03-01 15:21:58] (step=0024614) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.815887301897868, LR: 0.0003 +[2026-03-01 15:22:05] (step=0024615) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.816082958325181, LR: 0.0003 +[2026-03-01 15:22:13] (step=0024616) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.816278614752495, LR: 0.0003 +[2026-03-01 15:22:21] (step=0024617) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.816474271179808, LR: 0.0003 +[2026-03-01 15:22:29] (step=0024618) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.816669927607122, LR: 0.0003 +[2026-03-01 15:22:37] (step=0024619) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 4.816865584034436, LR: 0.0003 +[2026-03-01 15:22:45] (step=0024620) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 4.817061240461749, LR: 0.0003 +[2026-03-01 15:22:52] (step=0024621) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 4.817256896889063, LR: 0.0003 +[2026-03-01 15:23:00] (step=0024622) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.817452553316376, LR: 0.0003 +[2026-03-01 15:23:08] (step=0024623) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.81764820974369, LR: 0.0003 +[2026-03-01 15:23:16] (step=0024624) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.817843866171004, LR: 0.0003 +[2026-03-01 15:23:24] (step=0024625) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 4.818039522598317, LR: 0.0003 +[2026-03-01 15:23:32] (step=0024626) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.818235179025631, LR: 0.0003 +[2026-03-01 15:23:40] (step=0024627) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 4.818430835452944, LR: 0.0003 +[2026-03-01 15:23:47] (step=0024628) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.818626491880258, LR: 0.0003 +[2026-03-01 15:23:55] (step=0024629) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.8188221483075715, LR: 0.0003 +[2026-03-01 15:24:03] (step=0024630) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 4.8190178047348855, LR: 0.0003 +[2026-03-01 15:24:11] (step=0024631) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.8192134611621995, LR: 0.0003 +[2026-03-01 15:24:19] (step=0024632) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.819409117589513, LR: 0.0003 +[2026-03-01 15:24:27] (step=0024633) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 4.819604774016827, LR: 0.0003 +[2026-03-01 15:24:34] (step=0024634) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.81980043044414, LR: 0.0003 +[2026-03-01 15:24:42] (step=0024635) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.819996086871454, LR: 0.0003 +[2026-03-01 15:24:50] (step=0024636) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.820191743298768, LR: 0.0003 +[2026-03-01 15:24:58] (step=0024637) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.820387399726081, LR: 0.0003 +[2026-03-01 15:25:06] (step=0024638) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 4.820583056153395, LR: 0.0003 +[2026-03-01 15:25:14] (step=0024639) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.820778712580708, LR: 0.0003 +[2026-03-01 15:25:21] (step=0024640) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.820974369008022, LR: 0.0003 +[2026-03-01 15:25:29] (step=0024641) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.821170025435335, LR: 0.0003 +[2026-03-01 15:25:37] (step=0024642) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.821365681862649, LR: 0.0003 +[2026-03-01 15:25:45] (step=0024643) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 4.821561338289963, LR: 0.0003 +[2026-03-01 15:25:53] (step=0024644) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.821756994717276, LR: 0.0003 +[2026-03-01 15:26:01] (step=0024645) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 4.82195265114459, LR: 0.0003 +[2026-03-01 15:26:09] (step=0024646) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.822148307571903, LR: 0.0003 +[2026-03-01 15:26:16] (step=0024647) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 4.822343963999217, LR: 0.0003 +[2026-03-01 15:26:24] (step=0024648) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.822539620426531, LR: 0.0003 +[2026-03-01 15:26:32] (step=0024649) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 4.8227352768538445, LR: 0.0003 +[2026-03-01 15:26:40] (step=0024650) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.8229309332811585, LR: 0.0003 +[2026-03-01 15:26:48] (step=0024651) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 4.823126589708472, LR: 0.0003 +[2026-03-01 15:26:56] (step=0024652) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.823322246135786, LR: 0.0003 +[2026-03-01 15:27:04] (step=0024653) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.823517902563099, LR: 0.0003 +[2026-03-01 15:27:12] (step=0024654) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.823713558990413, LR: 0.0003 +[2026-03-01 15:27:19] (step=0024655) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 4.823909215417727, LR: 0.0003 +[2026-03-01 15:27:27] (step=0024656) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.82410487184504, LR: 0.0003 +[2026-03-01 15:27:35] (step=0024657) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.824300528272354, LR: 0.0003 +[2026-03-01 15:27:43] (step=0024658) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 4.824496184699667, LR: 0.0003 +[2026-03-01 15:27:51] (step=0024659) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.824691841126981, LR: 0.0003 +[2026-03-01 15:27:59] (step=0024660) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.824887497554295, LR: 0.0003 +[2026-03-01 15:28:06] (step=0024661) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.825083153981608, LR: 0.0003 +[2026-03-01 15:28:14] (step=0024662) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.825278810408922, LR: 0.0003 +[2026-03-01 15:28:22] (step=0024663) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 4.825474466836235, LR: 0.0003 +[2026-03-01 15:28:30] (step=0024664) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 4.825670123263549, LR: 0.0003 +[2026-03-01 15:28:38] (step=0024665) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.825865779690863, LR: 0.0003 +[2026-03-01 15:28:46] (step=0024666) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.826061436118176, LR: 0.0003 +[2026-03-01 15:28:53] (step=0024667) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.82625709254549, LR: 0.0003 +[2026-03-01 15:29:01] (step=0024668) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.8264527489728035, LR: 0.0003 +[2026-03-01 15:29:09] (step=0024669) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.8266484054001175, LR: 0.0003 +[2026-03-01 15:29:17] (step=0024670) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.826844061827431, LR: 0.0003 +[2026-03-01 15:29:25] (step=0024671) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 4.827039718254745, LR: 0.0003 +[2026-03-01 15:29:33] (step=0024672) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 4.827235374682059, LR: 0.0003 +[2026-03-01 15:29:41] (step=0024673) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.827431031109372, LR: 0.0003 +[2026-03-01 15:29:48] (step=0024674) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.827626687536686, LR: 0.0003 +[2026-03-01 15:29:56] (step=0024675) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 4.827822343963999, LR: 0.0003 +[2026-03-01 15:30:04] (step=0024676) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 4.828018000391313, LR: 0.0003 +[2026-03-01 15:30:12] (step=0024677) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.828213656818627, LR: 0.0003 +[2026-03-01 15:30:20] (step=0024678) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.82840931324594, LR: 0.0003 +[2026-03-01 15:30:28] (step=0024679) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.828604969673254, LR: 0.0003 +[2026-03-01 15:30:35] (step=0024680) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.828800626100567, LR: 0.0003 +[2026-03-01 15:30:43] (step=0024681) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.828996282527881, LR: 0.0003 +[2026-03-01 15:30:51] (step=0024682) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 4.829191938955194, LR: 0.0003 +[2026-03-01 15:30:59] (step=0024683) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.829387595382508, LR: 0.0003 +[2026-03-01 15:31:07] (step=0024684) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 4.829583251809822, LR: 0.0003 +[2026-03-01 15:31:15] (step=0024685) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.829778908237135, LR: 0.0003 +[2026-03-01 15:31:22] (step=0024686) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.829974564664449, LR: 0.0003 +[2026-03-01 15:31:30] (step=0024687) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.8301702210917625, LR: 0.0003 +[2026-03-01 15:31:38] (step=0024688) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 4.8303658775190765, LR: 0.0003 +[2026-03-01 15:31:46] (step=0024689) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.8305615339463905, LR: 0.0003 +[2026-03-01 15:31:54] (step=0024690) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.830757190373704, LR: 0.0003 +[2026-03-01 15:32:02] (step=0024691) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.830952846801018, LR: 0.0003 +[2026-03-01 15:32:10] (step=0024692) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.831148503228331, LR: 0.0003 +[2026-03-01 15:32:17] (step=0024693) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.831344159655645, LR: 0.0003 +[2026-03-01 15:32:25] (step=0024694) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.831539816082958, LR: 0.0003 +[2026-03-01 15:32:33] (step=0024695) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.831735472510272, LR: 0.0003 +[2026-03-01 15:32:41] (step=0024696) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 4.831931128937586, LR: 0.0003 +[2026-03-01 15:32:49] (step=0024697) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.832126785364899, LR: 0.0003 +[2026-03-01 15:32:57] (step=0024698) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.832322441792213, LR: 0.0003 +[2026-03-01 15:33:05] (step=0024699) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.832518098219526, LR: 0.0003 +[2026-03-01 15:33:12] (step=0024700) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.83271375464684, LR: 0.0003 +[2026-03-01 15:33:20] (step=0024701) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 4.832909411074154, LR: 0.0003 +[2026-03-01 15:33:28] (step=0024702) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.833105067501467, LR: 0.0003 +[2026-03-01 15:33:36] (step=0024703) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.833300723928781, LR: 0.0003 +[2026-03-01 15:33:44] (step=0024704) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 4.833496380356094, LR: 0.0003 +[2026-03-01 15:33:52] (step=0024705) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.833692036783408, LR: 0.0003 +[2026-03-01 15:34:00] (step=0024706) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.8338876932107215, LR: 0.0003 +[2026-03-01 15:34:07] (step=0024707) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.8340833496380355, LR: 0.0003 +[2026-03-01 15:34:15] (step=0024708) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 4.8342790060653495, LR: 0.0003 +[2026-03-01 15:34:23] (step=0024709) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.834474662492663, LR: 0.0003 +[2026-03-01 15:34:31] (step=0024710) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 4.834670318919977, LR: 0.0003 +[2026-03-01 15:34:39] (step=0024711) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 4.83486597534729, LR: 0.0003 +[2026-03-01 15:34:47] (step=0024712) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.835061631774604, LR: 0.0003 +[2026-03-01 15:34:54] (step=0024713) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.835257288201918, LR: 0.0003 +[2026-03-01 15:35:02] (step=0024714) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.835452944629231, LR: 0.0003 +[2026-03-01 15:35:10] (step=0024715) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.835648601056545, LR: 0.0003 +[2026-03-01 15:35:18] (step=0024716) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.835844257483858, LR: 0.0003 +[2026-03-01 15:35:26] (step=0024717) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.836039913911172, LR: 0.0003 +[2026-03-01 15:35:34] (step=0024718) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.836235570338486, LR: 0.0003 +[2026-03-01 15:35:41] (step=0024719) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.836431226765799, LR: 0.0003 +[2026-03-01 15:35:49] (step=0024720) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.836626883193113, LR: 0.0003 +[2026-03-01 15:35:57] (step=0024721) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.836822539620426, LR: 0.0003 +[2026-03-01 15:36:05] (step=0024722) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.83701819604774, LR: 0.0003 +[2026-03-01 15:36:13] (step=0024723) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 4.837213852475053, LR: 0.0003 +[2026-03-01 15:36:21] (step=0024724) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.837409508902367, LR: 0.0003 +[2026-03-01 15:36:29] (step=0024725) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.837605165329681, LR: 0.0003 +[2026-03-01 15:36:36] (step=0024726) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 4.8378008217569946, LR: 0.0003 +[2026-03-01 15:36:44] (step=0024727) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 4.837996478184309, LR: 0.0003 +[2026-03-01 15:36:52] (step=0024728) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.838192134611622, LR: 0.0003 +[2026-03-01 15:37:00] (step=0024729) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.838387791038936, LR: 0.0003 +[2026-03-01 15:37:08] (step=0024730) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.83858344746625, LR: 0.0003 +[2026-03-01 15:37:16] (step=0024731) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.838779103893563, LR: 0.0003 +[2026-03-01 15:37:23] (step=0024732) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 4.838974760320877, LR: 0.0003 +[2026-03-01 15:37:31] (step=0024733) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.83917041674819, LR: 0.0003 +[2026-03-01 15:37:39] (step=0024734) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.839366073175504, LR: 0.0003 +[2026-03-01 15:37:47] (step=0024735) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.839561729602817, LR: 0.0003 +[2026-03-01 15:37:55] (step=0024736) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.839757386030131, LR: 0.0003 +[2026-03-01 15:38:03] (step=0024737) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.839953042457445, LR: 0.0003 +[2026-03-01 15:38:10] (step=0024738) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.840148698884758, LR: 0.0003 +[2026-03-01 15:38:18] (step=0024739) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.840344355312072, LR: 0.0003 +[2026-03-01 15:38:26] (step=0024740) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.840540011739385, LR: 0.0003 +[2026-03-01 15:38:34] (step=0024741) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 4.840735668166699, LR: 0.0003 +[2026-03-01 15:38:42] (step=0024742) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.840931324594013, LR: 0.0003 +[2026-03-01 15:38:50] (step=0024743) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 4.841126981021326, LR: 0.0003 +[2026-03-01 15:38:58] (step=0024744) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.84132263744864, LR: 0.0003 +[2026-03-01 15:39:05] (step=0024745) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.841518293875954, LR: 0.0003 +[2026-03-01 15:39:13] (step=0024746) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.841713950303268, LR: 0.0003 +[2026-03-01 15:39:21] (step=0024747) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.841909606730581, LR: 0.0003 +[2026-03-01 15:39:29] (step=0024748) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.842105263157895, LR: 0.0003 +[2026-03-01 15:39:37] (step=0024749) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.842300919585209, LR: 0.0003 +[2026-03-01 15:39:45] (step=0024750) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.842496576012522, LR: 0.0003 +[2026-03-01 15:39:52] (step=0024751) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.842692232439836, LR: 0.0003 +[2026-03-01 15:40:00] (step=0024752) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.842887888867149, LR: 0.0003 +[2026-03-01 15:40:08] (step=0024753) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.843083545294463, LR: 0.0003 +[2026-03-01 15:40:16] (step=0024754) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.843279201721777, LR: 0.0003 +[2026-03-01 15:40:24] (step=0024755) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 4.84347485814909, LR: 0.0003 +[2026-03-01 15:40:32] (step=0024756) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.843670514576404, LR: 0.0003 +[2026-03-01 15:40:40] (step=0024757) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.843866171003717, LR: 0.0003 +[2026-03-01 15:40:48] (step=0024758) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.844061827431031, LR: 0.0003 +[2026-03-01 15:40:55] (step=0024759) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 4.844257483858344, LR: 0.0003 +[2026-03-01 15:41:03] (step=0024760) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 4.844453140285658, LR: 0.0003 +[2026-03-01 15:41:11] (step=0024761) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 4.844648796712972, LR: 0.0003 +[2026-03-01 15:41:19] (step=0024762) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.8448444531402854, LR: 0.0003 +[2026-03-01 15:41:27] (step=0024763) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.8450401095675995, LR: 0.0003 +[2026-03-01 15:41:35] (step=0024764) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.845235765994913, LR: 0.0003 +[2026-03-01 15:41:42] (step=0024765) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.845431422422227, LR: 0.0003 +[2026-03-01 15:41:50] (step=0024766) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 4.845627078849541, LR: 0.0003 +[2026-03-01 15:41:58] (step=0024767) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.845822735276854, LR: 0.0003 +[2026-03-01 15:42:06] (step=0024768) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.846018391704168, LR: 0.0003 +[2026-03-01 15:42:14] (step=0024769) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.846214048131481, LR: 0.0003 +[2026-03-01 15:42:22] (step=0024770) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.846409704558795, LR: 0.0003 +[2026-03-01 15:42:29] (step=0024771) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.846605360986109, LR: 0.0003 +[2026-03-01 15:42:37] (step=0024772) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 4.846801017413422, LR: 0.0003 +[2026-03-01 15:42:45] (step=0024773) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.846996673840736, LR: 0.0003 +[2026-03-01 15:42:53] (step=0024774) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.847192330268049, LR: 0.0003 +[2026-03-01 15:43:01] (step=0024775) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.847387986695363, LR: 0.0003 +[2026-03-01 15:43:09] (step=0024776) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.847583643122676, LR: 0.0003 +[2026-03-01 15:43:17] (step=0024777) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.84777929954999, LR: 0.0003 +[2026-03-01 15:43:24] (step=0024778) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.847974955977304, LR: 0.0003 +[2026-03-01 15:43:32] (step=0024779) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 4.848170612404617, LR: 0.0003 +[2026-03-01 15:43:40] (step=0024780) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 4.848366268831931, LR: 0.0003 +[2026-03-01 15:43:48] (step=0024781) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.8485619252592445, LR: 0.0003 +[2026-03-01 15:43:56] (step=0024782) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.8487575816865585, LR: 0.0003 +[2026-03-01 15:44:04] (step=0024783) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.8489532381138725, LR: 0.0003 +[2026-03-01 15:44:11] (step=0024784) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.849148894541186, LR: 0.0003 +[2026-03-01 15:44:19] (step=0024785) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.8493445509685, LR: 0.0003 +[2026-03-01 15:44:27] (step=0024786) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 4.849540207395813, LR: 0.0003 +[2026-03-01 15:44:35] (step=0024787) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.849735863823127, LR: 0.0003 +[2026-03-01 15:44:43] (step=0024788) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.84993152025044, LR: 0.0003 +[2026-03-01 15:44:51] (step=0024789) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 4.850127176677754, LR: 0.0003 +[2026-03-01 15:44:58] (step=0024790) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.850322833105068, LR: 0.0003 +[2026-03-01 15:45:06] (step=0024791) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.850518489532381, LR: 0.0003 +[2026-03-01 15:45:14] (step=0024792) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.850714145959695, LR: 0.0003 +[2026-03-01 15:45:22] (step=0024793) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 4.850909802387008, LR: 0.0003 +[2026-03-01 15:45:30] (step=0024794) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.851105458814322, LR: 0.0003 +[2026-03-01 15:45:38] (step=0024795) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 4.851301115241636, LR: 0.0003 +[2026-03-01 15:45:46] (step=0024796) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.851496771668949, LR: 0.0003 +[2026-03-01 15:45:53] (step=0024797) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 4.851692428096263, LR: 0.0003 +[2026-03-01 15:46:01] (step=0024798) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.851888084523576, LR: 0.0003 +[2026-03-01 15:46:09] (step=0024799) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.85208374095089, LR: 0.0003 +[2026-03-01 15:46:17] (step=0024800) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 4.8522793973782035, LR: 0.0003 +[2026-03-01 15:46:25] (step=0024801) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.8524750538055175, LR: 0.0003 +[2026-03-01 15:46:33] (step=0024802) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.8526707102328315, LR: 0.0003 +[2026-03-01 15:46:41] (step=0024803) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.852866366660145, LR: 0.0003 +[2026-03-01 15:46:49] (step=0024804) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.853062023087459, LR: 0.0003 +[2026-03-01 15:46:56] (step=0024805) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.853257679514772, LR: 0.0003 +[2026-03-01 15:47:04] (step=0024806) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.853453335942086, LR: 0.0003 +[2026-03-01 15:47:12] (step=0024807) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.8536489923694, LR: 0.0003 +[2026-03-01 15:47:20] (step=0024808) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 4.853844648796713, LR: 0.0003 +[2026-03-01 15:47:28] (step=0024809) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.854040305224027, LR: 0.0003 +[2026-03-01 15:47:36] (step=0024810) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.85423596165134, LR: 0.0003 +[2026-03-01 15:47:43] (step=0024811) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.854431618078654, LR: 0.0003 +[2026-03-01 15:47:51] (step=0024812) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.854627274505967, LR: 0.0003 +[2026-03-01 15:47:59] (step=0024813) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 4.854822930933281, LR: 0.0003 +[2026-03-01 15:48:07] (step=0024814) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.855018587360595, LR: 0.0003 +[2026-03-01 15:48:15] (step=0024815) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.855214243787908, LR: 0.0003 +[2026-03-01 15:48:23] (step=0024816) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.855409900215222, LR: 0.0003 +[2026-03-01 15:48:30] (step=0024817) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 4.855605556642535, LR: 0.0003 +[2026-03-01 15:48:38] (step=0024818) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.855801213069849, LR: 0.0003 +[2026-03-01 15:48:46] (step=0024819) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.855996869497163, LR: 0.0003 +[2026-03-01 15:48:54] (step=0024820) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.8561925259244765, LR: 0.0003 +[2026-03-01 15:49:02] (step=0024821) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.8563881823517905, LR: 0.0003 +[2026-03-01 15:49:10] (step=0024822) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 4.856583838779104, LR: 0.0003 +[2026-03-01 15:49:17] (step=0024823) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.856779495206418, LR: 0.0003 +[2026-03-01 15:49:25] (step=0024824) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.856975151633731, LR: 0.0003 +[2026-03-01 15:49:33] (step=0024825) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.857170808061045, LR: 0.0003 +[2026-03-01 15:49:41] (step=0024826) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.857366464488359, LR: 0.0003 +[2026-03-01 15:49:49] (step=0024827) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.857562120915672, LR: 0.0003 +[2026-03-01 15:49:57] (step=0024828) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.857757777342986, LR: 0.0003 +[2026-03-01 15:50:05] (step=0024829) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 4.857953433770299, LR: 0.0003 +[2026-03-01 15:50:12] (step=0024830) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 4.858149090197613, LR: 0.0003 +[2026-03-01 15:50:20] (step=0024831) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.858344746624927, LR: 0.0003 +[2026-03-01 15:50:28] (step=0024832) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.85854040305224, LR: 0.0003 +[2026-03-01 15:50:36] (step=0024833) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 4.858736059479554, LR: 0.0003 +[2026-03-01 15:50:44] (step=0024834) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.858931715906867, LR: 0.0003 +[2026-03-01 15:50:52] (step=0024835) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.859127372334181, LR: 0.0003 +[2026-03-01 15:50:59] (step=0024836) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.859323028761495, LR: 0.0003 +[2026-03-01 15:51:07] (step=0024837) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.859518685188808, LR: 0.0003 +[2026-03-01 15:51:15] (step=0024838) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.859714341616122, LR: 0.0003 +[2026-03-01 15:51:23] (step=0024839) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.8599099980434355, LR: 0.0003 +[2026-03-01 15:51:31] (step=0024840) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 4.8601056544707495, LR: 0.0003 +[2026-03-01 15:51:39] (step=0024841) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.860301310898063, LR: 0.0003 +[2026-03-01 15:51:47] (step=0024842) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.860496967325377, LR: 0.0003 +[2026-03-01 15:51:55] (step=0024843) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 4.860692623752691, LR: 0.0003 +[2026-03-01 15:52:02] (step=0024844) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.860888280180004, LR: 0.0003 +[2026-03-01 15:52:10] (step=0024845) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 4.861083936607318, LR: 0.0003 +[2026-03-01 15:52:18] (step=0024846) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 4.861279593034631, LR: 0.0003 +[2026-03-01 15:52:26] (step=0024847) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.861475249461945, LR: 0.0003 +[2026-03-01 15:52:34] (step=0024848) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.861670905889259, LR: 0.0003 +[2026-03-01 15:52:42] (step=0024849) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.861866562316572, LR: 0.0003 +[2026-03-01 15:52:49] (step=0024850) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.862062218743886, LR: 0.0003 +[2026-03-01 15:52:57] (step=0024851) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.862257875171199, LR: 0.0003 +[2026-03-01 15:53:05] (step=0024852) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.862453531598513, LR: 0.0003 +[2026-03-01 15:53:13] (step=0024853) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.862649188025826, LR: 0.0003 +[2026-03-01 15:53:21] (step=0024854) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.86284484445314, LR: 0.0003 +[2026-03-01 15:53:29] (step=0024855) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 4.863040500880454, LR: 0.0003 +[2026-03-01 15:53:37] (step=0024856) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 4.863236157307767, LR: 0.0003 +[2026-03-01 15:53:44] (step=0024857) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.863431813735081, LR: 0.0003 +[2026-03-01 15:53:52] (step=0024858) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.8636274701623945, LR: 0.0003 +[2026-03-01 15:54:00] (step=0024859) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.8638231265897085, LR: 0.0003 +[2026-03-01 15:54:08] (step=0024860) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.8640187830170225, LR: 0.0003 +[2026-03-01 15:54:16] (step=0024861) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 4.864214439444336, LR: 0.0003 +[2026-03-01 15:54:24] (step=0024862) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.86441009587165, LR: 0.0003 +[2026-03-01 15:54:32] (step=0024863) Train Loss: 0.4265, Train Steps/Sec: 0.13, Epoch: 4.864605752298963, LR: 0.0003 +[2026-03-01 15:54:39] (step=0024864) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.864801408726277, LR: 0.0003 +[2026-03-01 15:54:47] (step=0024865) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.86499706515359, LR: 0.0003 +[2026-03-01 15:54:55] (step=0024866) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.865192721580904, LR: 0.0003 +[2026-03-01 15:55:03] (step=0024867) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 4.865388378008218, LR: 0.0003 +[2026-03-01 15:55:11] (step=0024868) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.865584034435531, LR: 0.0003 +[2026-03-01 15:55:19] (step=0024869) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 4.865779690862845, LR: 0.0003 +[2026-03-01 15:55:26] (step=0024870) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.865975347290158, LR: 0.0003 +[2026-03-01 15:55:34] (step=0024871) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 4.866171003717472, LR: 0.0003 +[2026-03-01 15:55:42] (step=0024872) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.866366660144786, LR: 0.0003 +[2026-03-01 15:55:50] (step=0024873) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 4.866562316572099, LR: 0.0003 +[2026-03-01 15:55:58] (step=0024874) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.866757972999413, LR: 0.0003 +[2026-03-01 15:56:06] (step=0024875) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 4.866953629426726, LR: 0.0003 +[2026-03-01 15:56:13] (step=0024876) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 4.86714928585404, LR: 0.0003 +[2026-03-01 15:56:21] (step=0024877) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.8673449422813535, LR: 0.0003 +[2026-03-01 15:56:29] (step=0024878) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.8675405987086675, LR: 0.0003 +[2026-03-01 15:56:37] (step=0024879) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.8677362551359815, LR: 0.0003 +[2026-03-01 15:56:45] (step=0024880) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.867931911563295, LR: 0.0003 +[2026-03-01 15:56:53] (step=0024881) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.868127567990609, LR: 0.0003 +[2026-03-01 15:57:01] (step=0024882) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.868323224417922, LR: 0.0003 +[2026-03-01 15:57:08] (step=0024883) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.868518880845236, LR: 0.0003 +[2026-03-01 15:57:16] (step=0024884) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.86871453727255, LR: 0.0003 +[2026-03-01 15:57:24] (step=0024885) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.868910193699863, LR: 0.0003 +[2026-03-01 15:57:32] (step=0024886) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 4.869105850127177, LR: 0.0003 +[2026-03-01 15:57:40] (step=0024887) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.86930150655449, LR: 0.0003 +[2026-03-01 15:57:48] (step=0024888) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.869497162981804, LR: 0.0003 +[2026-03-01 15:57:55] (step=0024889) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.869692819409118, LR: 0.0003 +[2026-03-01 15:58:03] (step=0024890) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.869888475836431, LR: 0.0003 +[2026-03-01 15:58:11] (step=0024891) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.870084132263745, LR: 0.0003 +[2026-03-01 15:58:19] (step=0024892) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.870279788691058, LR: 0.0003 +[2026-03-01 15:58:27] (step=0024893) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.870475445118372, LR: 0.0003 +[2026-03-01 15:58:35] (step=0024894) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.870671101545685, LR: 0.0003 +[2026-03-01 15:58:43] (step=0024895) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.870866757972999, LR: 0.0003 +[2026-03-01 15:58:50] (step=0024896) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.871062414400313, LR: 0.0003 +[2026-03-01 15:58:58] (step=0024897) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.8712580708276265, LR: 0.0003 +[2026-03-01 15:59:06] (step=0024898) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.8714537272549405, LR: 0.0003 +[2026-03-01 15:59:14] (step=0024899) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.871649383682254, LR: 0.0003 +[2026-03-01 15:59:22] (step=0024900) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.871845040109568, LR: 0.0003 +[2026-03-01 15:59:30] (step=0024901) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.872040696536882, LR: 0.0003 +[2026-03-01 15:59:37] (step=0024902) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.872236352964195, LR: 0.0003 +[2026-03-01 15:59:45] (step=0024903) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 4.872432009391509, LR: 0.0003 +[2026-03-01 15:59:53] (step=0024904) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 4.872627665818822, LR: 0.0003 +[2026-03-01 16:00:01] (step=0024905) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 4.872823322246136, LR: 0.0003 +[2026-03-01 16:00:09] (step=0024906) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 4.873018978673449, LR: 0.0003 +[2026-03-01 16:00:17] (step=0024907) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.873214635100763, LR: 0.0003 +[2026-03-01 16:00:25] (step=0024908) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 4.873410291528077, LR: 0.0003 +[2026-03-01 16:00:33] (step=0024909) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 4.87360594795539, LR: 0.0003 +[2026-03-01 16:00:40] (step=0024910) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 4.873801604382704, LR: 0.0003 +[2026-03-01 16:00:48] (step=0024911) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.873997260810017, LR: 0.0003 +[2026-03-01 16:00:56] (step=0024912) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 4.874192917237331, LR: 0.0003 +[2026-03-01 16:01:04] (step=0024913) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.874388573664645, LR: 0.0003 +[2026-03-01 16:01:12] (step=0024914) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.874584230091958, LR: 0.0003 +[2026-03-01 16:01:20] (step=0024915) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.874779886519272, LR: 0.0003 +[2026-03-01 16:01:27] (step=0024916) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 4.8749755429465855, LR: 0.0003 +[2026-03-01 16:01:35] (step=0024917) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 4.8751711993738995, LR: 0.0003 +[2026-03-01 16:01:43] (step=0024918) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.875366855801213, LR: 0.0003 +[2026-03-01 16:01:51] (step=0024919) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.875562512228527, LR: 0.0003 +[2026-03-01 16:01:59] (step=0024920) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.875758168655841, LR: 0.0003 +[2026-03-01 16:02:07] (step=0024921) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 4.875953825083154, LR: 0.0003 +[2026-03-01 16:02:15] (step=0024922) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 4.876149481510468, LR: 0.0003 +[2026-03-01 16:02:22] (step=0024923) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.876345137937781, LR: 0.0003 +[2026-03-01 16:02:30] (step=0024924) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.876540794365095, LR: 0.0003 +[2026-03-01 16:02:38] (step=0024925) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 4.876736450792409, LR: 0.0003 +[2026-03-01 16:02:46] (step=0024926) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.876932107219722, LR: 0.0003 +[2026-03-01 16:02:54] (step=0024927) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 4.877127763647036, LR: 0.0003 +[2026-03-01 16:03:02] (step=0024928) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.877323420074349, LR: 0.0003 +[2026-03-01 16:03:09] (step=0024929) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.877519076501663, LR: 0.0003 +[2026-03-01 16:03:17] (step=0024930) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.877714732928976, LR: 0.0003 +[2026-03-01 16:03:25] (step=0024931) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 4.87791038935629, LR: 0.0003 +[2026-03-01 16:03:33] (step=0024932) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.878106045783604, LR: 0.0003 +[2026-03-01 16:03:41] (step=0024933) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.878301702210917, LR: 0.0003 +[2026-03-01 16:03:49] (step=0024934) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.878497358638231, LR: 0.0003 +[2026-03-01 16:03:56] (step=0024935) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.8786930150655445, LR: 0.0003 +[2026-03-01 16:04:04] (step=0024936) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.8788886714928585, LR: 0.0003 +[2026-03-01 16:04:12] (step=0024937) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.8790843279201725, LR: 0.0003 +[2026-03-01 16:04:20] (step=0024938) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 4.879279984347486, LR: 0.0003 +[2026-03-01 16:04:28] (step=0024939) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.8794756407748, LR: 0.0003 +[2026-03-01 16:04:36] (step=0024940) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.879671297202113, LR: 0.0003 +[2026-03-01 16:04:44] (step=0024941) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.879866953629427, LR: 0.0003 +[2026-03-01 16:04:51] (step=0024942) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.880062610056741, LR: 0.0003 +[2026-03-01 16:04:59] (step=0024943) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 4.880258266484054, LR: 0.0003 +[2026-03-01 16:05:07] (step=0024944) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.880453922911368, LR: 0.0003 +[2026-03-01 16:05:15] (step=0024945) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 4.880649579338681, LR: 0.0003 +[2026-03-01 16:05:23] (step=0024946) Train Loss: 0.4490, Train Steps/Sec: 0.12, Epoch: 4.880845235765995, LR: 0.0003 +[2026-03-01 16:05:31] (step=0024947) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.881040892193308, LR: 0.0003 +[2026-03-01 16:05:39] (step=0024948) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.881236548620622, LR: 0.0003 +[2026-03-01 16:05:46] (step=0024949) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.881432205047936, LR: 0.0003 +[2026-03-01 16:05:54] (step=0024950) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.881627861475249, LR: 0.0003 +[2026-03-01 16:06:02] (step=0024951) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 4.881823517902563, LR: 0.0003 +[2026-03-01 16:06:10] (step=0024952) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.882019174329876, LR: 0.0003 +[2026-03-01 16:06:18] (step=0024953) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 4.88221483075719, LR: 0.0003 +[2026-03-01 16:06:26] (step=0024954) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.882410487184504, LR: 0.0003 +[2026-03-01 16:06:34] (step=0024955) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.8826061436118176, LR: 0.0003 +[2026-03-01 16:06:41] (step=0024956) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 4.882801800039132, LR: 0.0003 +[2026-03-01 16:06:49] (step=0024957) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 4.882997456466445, LR: 0.0003 +[2026-03-01 16:06:57] (step=0024958) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.883193112893759, LR: 0.0003 +[2026-03-01 16:07:05] (step=0024959) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.883388769321072, LR: 0.0003 +[2026-03-01 16:07:13] (step=0024960) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 4.883584425748386, LR: 0.0003 +[2026-03-01 16:07:21] (step=0024961) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 4.8837800821757, LR: 0.0003 +[2026-03-01 16:07:29] (step=0024962) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.883975738603013, LR: 0.0003 +[2026-03-01 16:07:36] (step=0024963) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 4.884171395030327, LR: 0.0003 +[2026-03-01 16:07:44] (step=0024964) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.88436705145764, LR: 0.0003 +[2026-03-01 16:07:52] (step=0024965) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.884562707884954, LR: 0.0003 +[2026-03-01 16:08:00] (step=0024966) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.884758364312268, LR: 0.0003 +[2026-03-01 16:08:08] (step=0024967) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.884954020739581, LR: 0.0003 +[2026-03-01 16:08:16] (step=0024968) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.885149677166895, LR: 0.0003 +[2026-03-01 16:08:23] (step=0024969) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.885345333594208, LR: 0.0003 +[2026-03-01 16:08:31] (step=0024970) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 4.885540990021522, LR: 0.0003 +[2026-03-01 16:08:39] (step=0024971) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.885736646448835, LR: 0.0003 +[2026-03-01 16:08:47] (step=0024972) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.885932302876149, LR: 0.0003 +[2026-03-01 16:08:55] (step=0024973) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.886127959303463, LR: 0.0003 +[2026-03-01 16:09:03] (step=0024974) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 4.886323615730777, LR: 0.0003 +[2026-03-01 16:09:10] (step=0024975) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 4.886519272158091, LR: 0.0003 +[2026-03-01 16:09:18] (step=0024976) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.886714928585404, LR: 0.0003 +[2026-03-01 16:09:26] (step=0024977) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.886910585012718, LR: 0.0003 +[2026-03-01 16:09:34] (step=0024978) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.887106241440032, LR: 0.0003 +[2026-03-01 16:09:42] (step=0024979) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 4.887301897867345, LR: 0.0003 +[2026-03-01 16:09:50] (step=0024980) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 4.887497554294659, LR: 0.0003 +[2026-03-01 16:09:57] (step=0024981) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.887693210721972, LR: 0.0003 +[2026-03-01 16:10:05] (step=0024982) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.887888867149286, LR: 0.0003 +[2026-03-01 16:10:13] (step=0024983) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.888084523576599, LR: 0.0003 +[2026-03-01 16:10:21] (step=0024984) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.888280180003913, LR: 0.0003 +[2026-03-01 16:10:29] (step=0024985) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.888475836431227, LR: 0.0003 +[2026-03-01 16:10:37] (step=0024986) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 4.88867149285854, LR: 0.0003 +[2026-03-01 16:10:44] (step=0024987) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.888867149285854, LR: 0.0003 +[2026-03-01 16:10:52] (step=0024988) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.889062805713167, LR: 0.0003 +[2026-03-01 16:11:00] (step=0024989) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 4.889258462140481, LR: 0.0003 +[2026-03-01 16:11:08] (step=0024990) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 4.889454118567795, LR: 0.0003 +[2026-03-01 16:11:16] (step=0024991) Train Loss: 0.4407, Train Steps/Sec: 0.12, Epoch: 4.8896497749951084, LR: 0.0003 +[2026-03-01 16:11:24] (step=0024992) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 4.8898454314224225, LR: 0.0003 +[2026-03-01 16:11:32] (step=0024993) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.890041087849736, LR: 0.0003 +[2026-03-01 16:11:40] (step=0024994) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.89023674427705, LR: 0.0003 +[2026-03-01 16:11:47] (step=0024995) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.890432400704364, LR: 0.0003 +[2026-03-01 16:11:55] (step=0024996) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.890628057131677, LR: 0.0003 +[2026-03-01 16:12:03] (step=0024997) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.890823713558991, LR: 0.0003 +[2026-03-01 16:12:11] (step=0024998) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 4.891019369986304, LR: 0.0003 +[2026-03-01 16:12:19] (step=0024999) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 4.891215026413618, LR: 0.0003 +[2026-03-01 16:12:27] (step=0025000) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 4.891410682840931, LR: 0.0003 +[2026-03-01 16:12:27] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0025000/ +[2026-03-01 16:12:34] (step=0025001) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.891606339268245, LR: 0.0003 +[2026-03-01 16:12:42] (step=0025002) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 4.891801995695559, LR: 0.0003 +[2026-03-01 16:12:50] (step=0025003) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 4.891997652122872, LR: 0.0003 +[2026-03-01 16:12:58] (step=0025004) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.892193308550186, LR: 0.0003 +[2026-03-01 16:13:06] (step=0025005) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.892388964977499, LR: 0.0003 +[2026-03-01 16:13:14] (step=0025006) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 4.892584621404813, LR: 0.0003 +[2026-03-01 16:13:22] (step=0025007) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 4.892780277832127, LR: 0.0003 +[2026-03-01 16:13:29] (step=0025008) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.89297593425944, LR: 0.0003 +[2026-03-01 16:13:37] (step=0025009) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.893171590686754, LR: 0.0003 +[2026-03-01 16:13:45] (step=0025010) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.8933672471140675, LR: 0.0003 +[2026-03-01 16:13:53] (step=0025011) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.8935629035413815, LR: 0.0003 +[2026-03-01 16:14:01] (step=0025012) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 4.893758559968695, LR: 0.0003 +[2026-03-01 16:14:09] (step=0025013) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 4.893954216396009, LR: 0.0003 +[2026-03-01 16:14:16] (step=0025014) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.894149872823323, LR: 0.0003 +[2026-03-01 16:14:24] (step=0025015) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 4.894345529250636, LR: 0.0003 +[2026-03-01 16:14:32] (step=0025016) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.89454118567795, LR: 0.0003 +[2026-03-01 16:14:40] (step=0025017) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.894736842105263, LR: 0.0003 +[2026-03-01 16:14:48] (step=0025018) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.894932498532577, LR: 0.0003 +[2026-03-01 16:14:56] (step=0025019) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.895128154959891, LR: 0.0003 +[2026-03-01 16:15:04] (step=0025020) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.895323811387204, LR: 0.0003 +[2026-03-01 16:15:11] (step=0025021) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 4.895519467814518, LR: 0.0003 +[2026-03-01 16:15:19] (step=0025022) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.895715124241831, LR: 0.0003 +[2026-03-01 16:15:27] (step=0025023) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.895910780669145, LR: 0.0003 +[2026-03-01 16:15:35] (step=0025024) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.896106437096458, LR: 0.0003 +[2026-03-01 16:15:43] (step=0025025) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.896302093523772, LR: 0.0003 +[2026-03-01 16:15:51] (step=0025026) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 4.896497749951086, LR: 0.0003 +[2026-03-01 16:15:58] (step=0025027) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.896693406378399, LR: 0.0003 +[2026-03-01 16:16:06] (step=0025028) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.896889062805713, LR: 0.0003 +[2026-03-01 16:16:14] (step=0025029) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.8970847192330265, LR: 0.0003 +[2026-03-01 16:16:22] (step=0025030) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 4.8972803756603405, LR: 0.0003 +[2026-03-01 16:16:30] (step=0025031) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.8974760320876545, LR: 0.0003 +[2026-03-01 16:16:38] (step=0025032) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.897671688514968, LR: 0.0003 +[2026-03-01 16:16:45] (step=0025033) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 4.897867344942282, LR: 0.0003 +[2026-03-01 16:16:53] (step=0025034) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.898063001369595, LR: 0.0003 +[2026-03-01 16:17:01] (step=0025035) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.898258657796909, LR: 0.0003 +[2026-03-01 16:17:09] (step=0025036) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 4.898454314224222, LR: 0.0003 +[2026-03-01 16:17:17] (step=0025037) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.898649970651536, LR: 0.0003 +[2026-03-01 16:17:25] (step=0025038) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 4.89884562707885, LR: 0.0003 +[2026-03-01 16:17:33] (step=0025039) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.899041283506163, LR: 0.0003 +[2026-03-01 16:17:40] (step=0025040) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.899236939933477, LR: 0.0003 +[2026-03-01 16:17:48] (step=0025041) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.89943259636079, LR: 0.0003 +[2026-03-01 16:17:56] (step=0025042) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.899628252788104, LR: 0.0003 +[2026-03-01 16:18:04] (step=0025043) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 4.899823909215418, LR: 0.0003 +[2026-03-01 16:18:12] (step=0025044) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.900019565642731, LR: 0.0003 +[2026-03-01 16:18:20] (step=0025045) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.900215222070045, LR: 0.0003 +[2026-03-01 16:18:28] (step=0025046) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.900410878497358, LR: 0.0003 +[2026-03-01 16:18:35] (step=0025047) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.900606534924672, LR: 0.0003 +[2026-03-01 16:18:43] (step=0025048) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 4.9008021913519855, LR: 0.0003 +[2026-03-01 16:18:51] (step=0025049) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.9009978477792995, LR: 0.0003 +[2026-03-01 16:18:59] (step=0025050) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 4.9011935042066135, LR: 0.0003 +[2026-03-01 16:19:07] (step=0025051) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.901389160633927, LR: 0.0003 +[2026-03-01 16:19:15] (step=0025052) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.901584817061241, LR: 0.0003 +[2026-03-01 16:19:22] (step=0025053) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 4.901780473488554, LR: 0.0003 +[2026-03-01 16:19:30] (step=0025054) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 4.901976129915868, LR: 0.0003 +[2026-03-01 16:19:38] (step=0025055) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 4.902171786343182, LR: 0.0003 +[2026-03-01 16:19:46] (step=0025056) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 4.902367442770495, LR: 0.0003 +[2026-03-01 16:19:54] (step=0025057) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.902563099197809, LR: 0.0003 +[2026-03-01 16:20:02] (step=0025058) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.902758755625122, LR: 0.0003 +[2026-03-01 16:20:10] (step=0025059) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.902954412052436, LR: 0.0003 +[2026-03-01 16:20:17] (step=0025060) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 4.90315006847975, LR: 0.0003 +[2026-03-01 16:20:25] (step=0025061) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.903345724907063, LR: 0.0003 +[2026-03-01 16:20:33] (step=0025062) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 4.903541381334377, LR: 0.0003 +[2026-03-01 16:20:41] (step=0025063) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.90373703776169, LR: 0.0003 +[2026-03-01 16:20:49] (step=0025064) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 4.903932694189004, LR: 0.0003 +[2026-03-01 16:20:57] (step=0025065) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.904128350616317, LR: 0.0003 +[2026-03-01 16:21:04] (step=0025066) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.904324007043631, LR: 0.0003 +[2026-03-01 16:21:12] (step=0025067) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.904519663470945, LR: 0.0003 +[2026-03-01 16:21:20] (step=0025068) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.9047153198982585, LR: 0.0003 +[2026-03-01 16:21:28] (step=0025069) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.9049109763255725, LR: 0.0003 +[2026-03-01 16:21:36] (step=0025070) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.905106632752886, LR: 0.0003 +[2026-03-01 16:21:44] (step=0025071) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 4.9053022891802, LR: 0.0003 +[2026-03-01 16:21:52] (step=0025072) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.905497945607514, LR: 0.0003 +[2026-03-01 16:21:59] (step=0025073) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.905693602034827, LR: 0.0003 +[2026-03-01 16:22:07] (step=0025074) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 4.905889258462141, LR: 0.0003 +[2026-03-01 16:22:15] (step=0025075) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 4.906084914889454, LR: 0.0003 +[2026-03-01 16:22:23] (step=0025076) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.906280571316768, LR: 0.0003 +[2026-03-01 16:22:31] (step=0025077) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.906476227744081, LR: 0.0003 +[2026-03-01 16:22:39] (step=0025078) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 4.906671884171395, LR: 0.0003 +[2026-03-01 16:22:46] (step=0025079) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.906867540598709, LR: 0.0003 +[2026-03-01 16:22:54] (step=0025080) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 4.907063197026022, LR: 0.0003 +[2026-03-01 16:23:02] (step=0025081) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 4.907258853453336, LR: 0.0003 +[2026-03-01 16:23:10] (step=0025082) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 4.907454509880649, LR: 0.0003 +[2026-03-01 16:23:18] (step=0025083) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 4.907650166307963, LR: 0.0003 +[2026-03-01 16:23:26] (step=0025084) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.907845822735277, LR: 0.0003 +[2026-03-01 16:23:33] (step=0025085) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 4.90804147916259, LR: 0.0003 +[2026-03-01 16:23:41] (step=0025086) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.908237135589904, LR: 0.0003 +[2026-03-01 16:23:49] (step=0025087) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.9084327920172175, LR: 0.0003 +[2026-03-01 16:23:57] (step=0025088) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.9086284484445315, LR: 0.0003 +[2026-03-01 16:24:05] (step=0025089) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.908824104871845, LR: 0.0003 +[2026-03-01 16:24:13] (step=0025090) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.909019761299159, LR: 0.0003 +[2026-03-01 16:24:21] (step=0025091) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 4.909215417726473, LR: 0.0003 +[2026-03-01 16:24:28] (step=0025092) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 4.909411074153786, LR: 0.0003 +[2026-03-01 16:24:36] (step=0025093) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.9096067305811, LR: 0.0003 +[2026-03-01 16:24:44] (step=0025094) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.909802387008413, LR: 0.0003 +[2026-03-01 16:24:52] (step=0025095) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.909998043435727, LR: 0.0003 +[2026-03-01 16:25:00] (step=0025096) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.910193699863041, LR: 0.0003 +[2026-03-01 16:25:08] (step=0025097) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.910389356290354, LR: 0.0003 +[2026-03-01 16:25:16] (step=0025098) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.910585012717668, LR: 0.0003 +[2026-03-01 16:25:23] (step=0025099) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.910780669144981, LR: 0.0003 +[2026-03-01 16:25:31] (step=0025100) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.910976325572295, LR: 0.0003 +[2026-03-01 16:25:39] (step=0025101) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.911171981999608, LR: 0.0003 +[2026-03-01 16:25:47] (step=0025102) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.911367638426922, LR: 0.0003 +[2026-03-01 16:25:55] (step=0025103) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.911563294854236, LR: 0.0003 +[2026-03-01 16:26:03] (step=0025104) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 4.911758951281549, LR: 0.0003 +[2026-03-01 16:26:10] (step=0025105) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 4.911954607708863, LR: 0.0003 +[2026-03-01 16:26:18] (step=0025106) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.9121502641361765, LR: 0.0003 +[2026-03-01 16:26:26] (step=0025107) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.9123459205634905, LR: 0.0003 +[2026-03-01 16:26:34] (step=0025108) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.9125415769908045, LR: 0.0003 +[2026-03-01 16:26:42] (step=0025109) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.912737233418118, LR: 0.0003 +[2026-03-01 16:26:50] (step=0025110) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 4.912932889845432, LR: 0.0003 +[2026-03-01 16:26:58] (step=0025111) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.913128546272745, LR: 0.0003 +[2026-03-01 16:27:05] (step=0025112) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 4.913324202700059, LR: 0.0003 +[2026-03-01 16:27:13] (step=0025113) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 4.913519859127373, LR: 0.0003 +[2026-03-01 16:27:21] (step=0025114) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.913715515554686, LR: 0.0003 +[2026-03-01 16:27:29] (step=0025115) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.913911171982, LR: 0.0003 +[2026-03-01 16:27:37] (step=0025116) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 4.914106828409313, LR: 0.0003 +[2026-03-01 16:27:45] (step=0025117) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.914302484836627, LR: 0.0003 +[2026-03-01 16:27:52] (step=0025118) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.91449814126394, LR: 0.0003 +[2026-03-01 16:28:00] (step=0025119) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.914693797691254, LR: 0.0003 +[2026-03-01 16:28:08] (step=0025120) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.914889454118568, LR: 0.0003 +[2026-03-01 16:28:16] (step=0025121) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 4.915085110545881, LR: 0.0003 +[2026-03-01 16:28:24] (step=0025122) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.915280766973195, LR: 0.0003 +[2026-03-01 16:28:32] (step=0025123) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 4.915476423400508, LR: 0.0003 +[2026-03-01 16:28:39] (step=0025124) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.915672079827822, LR: 0.0003 +[2026-03-01 16:28:47] (step=0025125) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.915867736255136, LR: 0.0003 +[2026-03-01 16:28:55] (step=0025126) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 4.9160633926824495, LR: 0.0003 +[2026-03-01 16:29:03] (step=0025127) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 4.9162590491097635, LR: 0.0003 +[2026-03-01 16:29:11] (step=0025128) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.916454705537077, LR: 0.0003 +[2026-03-01 16:29:19] (step=0025129) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 4.916650361964391, LR: 0.0003 +[2026-03-01 16:29:27] (step=0025130) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.916846018391704, LR: 0.0003 +[2026-03-01 16:29:34] (step=0025131) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.917041674819018, LR: 0.0003 +[2026-03-01 16:29:42] (step=0025132) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 4.917237331246332, LR: 0.0003 +[2026-03-01 16:29:50] (step=0025133) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 4.917432987673645, LR: 0.0003 +[2026-03-01 16:29:58] (step=0025134) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.917628644100959, LR: 0.0003 +[2026-03-01 16:30:06] (step=0025135) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.917824300528272, LR: 0.0003 +[2026-03-01 16:30:14] (step=0025136) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.918019956955586, LR: 0.0003 +[2026-03-01 16:30:21] (step=0025137) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 4.9182156133829, LR: 0.0003 +[2026-03-01 16:30:29] (step=0025138) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.918411269810213, LR: 0.0003 +[2026-03-01 16:30:37] (step=0025139) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.918606926237527, LR: 0.0003 +[2026-03-01 16:30:45] (step=0025140) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 4.91880258266484, LR: 0.0003 +[2026-03-01 16:30:53] (step=0025141) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.918998239092154, LR: 0.0003 +[2026-03-01 16:31:01] (step=0025142) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.919193895519467, LR: 0.0003 +[2026-03-01 16:31:09] (step=0025143) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.919389551946781, LR: 0.0003 +[2026-03-01 16:31:16] (step=0025144) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.919585208374095, LR: 0.0003 +[2026-03-01 16:31:24] (step=0025145) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 4.9197808648014085, LR: 0.0003 +[2026-03-01 16:31:32] (step=0025146) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.9199765212287225, LR: 0.0003 +[2026-03-01 16:31:40] (step=0025147) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.920172177656036, LR: 0.0003 +[2026-03-01 16:31:48] (step=0025148) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.92036783408335, LR: 0.0003 +[2026-03-01 16:31:56] (step=0025149) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.920563490510664, LR: 0.0003 +[2026-03-01 16:32:03] (step=0025150) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.920759146937977, LR: 0.0003 +[2026-03-01 16:32:11] (step=0025151) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.920954803365291, LR: 0.0003 +[2026-03-01 16:32:19] (step=0025152) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.921150459792604, LR: 0.0003 +[2026-03-01 16:32:27] (step=0025153) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 4.921346116219918, LR: 0.0003 +[2026-03-01 16:32:35] (step=0025154) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 4.921541772647231, LR: 0.0003 +[2026-03-01 16:32:43] (step=0025155) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.921737429074545, LR: 0.0003 +[2026-03-01 16:32:51] (step=0025156) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.921933085501859, LR: 0.0003 +[2026-03-01 16:32:58] (step=0025157) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.922128741929172, LR: 0.0003 +[2026-03-01 16:33:06] (step=0025158) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.922324398356486, LR: 0.0003 +[2026-03-01 16:33:14] (step=0025159) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 4.922520054783799, LR: 0.0003 +[2026-03-01 16:33:22] (step=0025160) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.922715711211113, LR: 0.0003 +[2026-03-01 16:33:30] (step=0025161) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 4.922911367638427, LR: 0.0003 +[2026-03-01 16:33:38] (step=0025162) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 4.92310702406574, LR: 0.0003 +[2026-03-01 16:33:46] (step=0025163) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 4.923302680493054, LR: 0.0003 +[2026-03-01 16:33:53] (step=0025164) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 4.9234983369203675, LR: 0.0003 +[2026-03-01 16:34:01] (step=0025165) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.9236939933476815, LR: 0.0003 +[2026-03-01 16:34:09] (step=0025166) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.9238896497749955, LR: 0.0003 +[2026-03-01 16:34:17] (step=0025167) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.924085306202309, LR: 0.0003 +[2026-03-01 16:34:25] (step=0025168) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 4.924280962629623, LR: 0.0003 +[2026-03-01 16:34:33] (step=0025169) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.924476619056936, LR: 0.0003 +[2026-03-01 16:34:40] (step=0025170) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 4.92467227548425, LR: 0.0003 +[2026-03-01 16:34:48] (step=0025171) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.924867931911563, LR: 0.0003 +[2026-03-01 16:34:56] (step=0025172) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.925063588338877, LR: 0.0003 +[2026-03-01 16:35:04] (step=0025173) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.925259244766191, LR: 0.0003 +[2026-03-01 16:35:12] (step=0025174) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.925454901193504, LR: 0.0003 +[2026-03-01 16:35:20] (step=0025175) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.925650557620818, LR: 0.0003 +[2026-03-01 16:35:27] (step=0025176) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.925846214048131, LR: 0.0003 +[2026-03-01 16:35:35] (step=0025177) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.926041870475445, LR: 0.0003 +[2026-03-01 16:35:43] (step=0025178) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.926237526902759, LR: 0.0003 +[2026-03-01 16:35:51] (step=0025179) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.926433183330072, LR: 0.0003 +[2026-03-01 16:35:59] (step=0025180) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 4.926628839757386, LR: 0.0003 +[2026-03-01 16:36:07] (step=0025181) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 4.926824496184699, LR: 0.0003 +[2026-03-01 16:36:14] (step=0025182) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 4.927020152612013, LR: 0.0003 +[2026-03-01 16:36:22] (step=0025183) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 4.9272158090393265, LR: 0.0003 +[2026-03-01 16:36:30] (step=0025184) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.9274114654666405, LR: 0.0003 +[2026-03-01 16:36:38] (step=0025185) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.927607121893955, LR: 0.0003 +[2026-03-01 16:36:46] (step=0025186) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.927802778321268, LR: 0.0003 +[2026-03-01 16:36:54] (step=0025187) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.927998434748582, LR: 0.0003 +[2026-03-01 16:37:02] (step=0025188) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.928194091175895, LR: 0.0003 +[2026-03-01 16:37:09] (step=0025189) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 4.928389747603209, LR: 0.0003 +[2026-03-01 16:37:17] (step=0025190) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 4.928585404030523, LR: 0.0003 +[2026-03-01 16:37:25] (step=0025191) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.928781060457836, LR: 0.0003 +[2026-03-01 16:37:33] (step=0025192) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 4.92897671688515, LR: 0.0003 +[2026-03-01 16:37:41] (step=0025193) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.929172373312463, LR: 0.0003 +[2026-03-01 16:37:49] (step=0025194) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.929368029739777, LR: 0.0003 +[2026-03-01 16:37:57] (step=0025195) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.92956368616709, LR: 0.0003 +[2026-03-01 16:38:04] (step=0025196) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.929759342594404, LR: 0.0003 +[2026-03-01 16:38:12] (step=0025197) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.929954999021718, LR: 0.0003 +[2026-03-01 16:38:20] (step=0025198) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 4.930150655449031, LR: 0.0003 +[2026-03-01 16:38:28] (step=0025199) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.930346311876345, LR: 0.0003 +[2026-03-01 16:38:36] (step=0025200) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 4.930541968303658, LR: 0.0003 +[2026-03-01 16:38:44] (step=0025201) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.930737624730972, LR: 0.0003 +[2026-03-01 16:38:52] (step=0025202) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 4.930933281158286, LR: 0.0003 +[2026-03-01 16:38:59] (step=0025203) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 4.9311289375856, LR: 0.0003 +[2026-03-01 16:39:07] (step=0025204) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.931324594012914, LR: 0.0003 +[2026-03-01 16:39:15] (step=0025205) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.931520250440227, LR: 0.0003 +[2026-03-01 16:39:23] (step=0025206) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 4.931715906867541, LR: 0.0003 +[2026-03-01 16:39:31] (step=0025207) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.931911563294854, LR: 0.0003 +[2026-03-01 16:39:39] (step=0025208) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.932107219722168, LR: 0.0003 +[2026-03-01 16:39:46] (step=0025209) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 4.932302876149482, LR: 0.0003 +[2026-03-01 16:39:54] (step=0025210) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 4.932498532576795, LR: 0.0003 +[2026-03-01 16:40:02] (step=0025211) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 4.932694189004109, LR: 0.0003 +[2026-03-01 16:40:10] (step=0025212) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.932889845431422, LR: 0.0003 +[2026-03-01 16:40:18] (step=0025213) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.933085501858736, LR: 0.0003 +[2026-03-01 16:40:26] (step=0025214) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.93328115828605, LR: 0.0003 +[2026-03-01 16:40:34] (step=0025215) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.933476814713363, LR: 0.0003 +[2026-03-01 16:40:41] (step=0025216) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.933672471140677, LR: 0.0003 +[2026-03-01 16:40:49] (step=0025217) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 4.93386812756799, LR: 0.0003 +[2026-03-01 16:40:57] (step=0025218) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.934063783995304, LR: 0.0003 +[2026-03-01 16:41:05] (step=0025219) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.934259440422618, LR: 0.0003 +[2026-03-01 16:41:13] (step=0025220) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.934455096849931, LR: 0.0003 +[2026-03-01 16:41:21] (step=0025221) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.9346507532772454, LR: 0.0003 +[2026-03-01 16:41:28] (step=0025222) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.934846409704559, LR: 0.0003 +[2026-03-01 16:41:36] (step=0025223) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 4.935042066131873, LR: 0.0003 +[2026-03-01 16:41:44] (step=0025224) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.935237722559186, LR: 0.0003 +[2026-03-01 16:41:52] (step=0025225) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.9354333789865, LR: 0.0003 +[2026-03-01 16:42:00] (step=0025226) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.935629035413814, LR: 0.0003 +[2026-03-01 16:42:08] (step=0025227) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 4.935824691841127, LR: 0.0003 +[2026-03-01 16:42:15] (step=0025228) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 4.936020348268441, LR: 0.0003 +[2026-03-01 16:42:23] (step=0025229) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.936216004695754, LR: 0.0003 +[2026-03-01 16:42:31] (step=0025230) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.936411661123068, LR: 0.0003 +[2026-03-01 16:42:39] (step=0025231) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 4.936607317550382, LR: 0.0003 +[2026-03-01 16:42:47] (step=0025232) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 4.936802973977695, LR: 0.0003 +[2026-03-01 16:42:55] (step=0025233) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.936998630405009, LR: 0.0003 +[2026-03-01 16:43:02] (step=0025234) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.937194286832322, LR: 0.0003 +[2026-03-01 16:43:10] (step=0025235) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.937389943259636, LR: 0.0003 +[2026-03-01 16:43:18] (step=0025236) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.937585599686949, LR: 0.0003 +[2026-03-01 16:43:26] (step=0025237) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.937781256114263, LR: 0.0003 +[2026-03-01 16:43:34] (step=0025238) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 4.937976912541577, LR: 0.0003 +[2026-03-01 16:43:42] (step=0025239) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.9381725689688905, LR: 0.0003 +[2026-03-01 16:43:50] (step=0025240) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.9383682253962045, LR: 0.0003 +[2026-03-01 16:43:57] (step=0025241) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.938563881823518, LR: 0.0003 +[2026-03-01 16:44:05] (step=0025242) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.938759538250832, LR: 0.0003 +[2026-03-01 16:44:13] (step=0025243) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 4.938955194678146, LR: 0.0003 +[2026-03-01 16:44:21] (step=0025244) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.939150851105459, LR: 0.0003 +[2026-03-01 16:44:29] (step=0025245) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 4.939346507532773, LR: 0.0003 +[2026-03-01 16:44:37] (step=0025246) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 4.939542163960086, LR: 0.0003 +[2026-03-01 16:44:45] (step=0025247) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 4.9397378203874, LR: 0.0003 +[2026-03-01 16:44:52] (step=0025248) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 4.939933476814713, LR: 0.0003 +[2026-03-01 16:45:00] (step=0025249) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 4.940129133242027, LR: 0.0003 +[2026-03-01 16:45:08] (step=0025250) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.940324789669341, LR: 0.0003 +[2026-03-01 16:45:16] (step=0025251) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.940520446096654, LR: 0.0003 +[2026-03-01 16:45:24] (step=0025252) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.940716102523968, LR: 0.0003 +[2026-03-01 16:45:32] (step=0025253) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 4.940911758951281, LR: 0.0003 +[2026-03-01 16:45:39] (step=0025254) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.941107415378595, LR: 0.0003 +[2026-03-01 16:45:47] (step=0025255) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.941303071805909, LR: 0.0003 +[2026-03-01 16:45:55] (step=0025256) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 4.941498728233222, LR: 0.0003 +[2026-03-01 16:46:03] (step=0025257) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.941694384660536, LR: 0.0003 +[2026-03-01 16:46:11] (step=0025258) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.9418900410878495, LR: 0.0003 +[2026-03-01 16:46:19] (step=0025259) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.9420856975151635, LR: 0.0003 +[2026-03-01 16:46:27] (step=0025260) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.942281353942477, LR: 0.0003 +[2026-03-01 16:46:34] (step=0025261) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 4.942477010369791, LR: 0.0003 +[2026-03-01 16:46:42] (step=0025262) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 4.942672666797105, LR: 0.0003 +[2026-03-01 16:46:50] (step=0025263) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 4.942868323224418, LR: 0.0003 +[2026-03-01 16:46:58] (step=0025264) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.943063979651732, LR: 0.0003 +[2026-03-01 16:47:06] (step=0025265) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.943259636079045, LR: 0.0003 +[2026-03-01 16:47:14] (step=0025266) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 4.943455292506359, LR: 0.0003 +[2026-03-01 16:47:21] (step=0025267) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.943650948933673, LR: 0.0003 +[2026-03-01 16:47:29] (step=0025268) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 4.943846605360986, LR: 0.0003 +[2026-03-01 16:47:37] (step=0025269) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.9440422617883, LR: 0.0003 +[2026-03-01 16:47:45] (step=0025270) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 4.944237918215613, LR: 0.0003 +[2026-03-01 16:47:53] (step=0025271) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 4.944433574642927, LR: 0.0003 +[2026-03-01 16:48:01] (step=0025272) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.94462923107024, LR: 0.0003 +[2026-03-01 16:48:08] (step=0025273) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.944824887497554, LR: 0.0003 +[2026-03-01 16:48:16] (step=0025274) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 4.945020543924868, LR: 0.0003 +[2026-03-01 16:48:24] (step=0025275) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.945216200352181, LR: 0.0003 +[2026-03-01 16:48:32] (step=0025276) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.945411856779495, LR: 0.0003 +[2026-03-01 16:48:40] (step=0025277) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.9456075132068085, LR: 0.0003 +[2026-03-01 16:48:48] (step=0025278) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.9458031696341225, LR: 0.0003 +[2026-03-01 16:48:55] (step=0025279) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 4.9459988260614365, LR: 0.0003 +[2026-03-01 16:49:03] (step=0025280) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 4.94619448248875, LR: 0.0003 +[2026-03-01 16:49:11] (step=0025281) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 4.946390138916064, LR: 0.0003 +[2026-03-01 16:49:19] (step=0025282) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.946585795343377, LR: 0.0003 +[2026-03-01 16:49:27] (step=0025283) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.946781451770691, LR: 0.0003 +[2026-03-01 16:49:35] (step=0025284) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 4.946977108198005, LR: 0.0003 +[2026-03-01 16:49:43] (step=0025285) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.947172764625318, LR: 0.0003 +[2026-03-01 16:49:50] (step=0025286) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.947368421052632, LR: 0.0003 +[2026-03-01 16:49:58] (step=0025287) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.947564077479945, LR: 0.0003 +[2026-03-01 16:50:06] (step=0025288) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.947759733907259, LR: 0.0003 +[2026-03-01 16:50:14] (step=0025289) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.947955390334572, LR: 0.0003 +[2026-03-01 16:50:22] (step=0025290) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.948151046761886, LR: 0.0003 +[2026-03-01 16:50:30] (step=0025291) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 4.9483467031892, LR: 0.0003 +[2026-03-01 16:50:38] (step=0025292) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.948542359616513, LR: 0.0003 +[2026-03-01 16:50:45] (step=0025293) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 4.948738016043827, LR: 0.0003 +[2026-03-01 16:50:53] (step=0025294) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.94893367247114, LR: 0.0003 +[2026-03-01 16:51:01] (step=0025295) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.949129328898454, LR: 0.0003 +[2026-03-01 16:51:09] (step=0025296) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.949324985325768, LR: 0.0003 +[2026-03-01 16:51:17] (step=0025297) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.9495206417530815, LR: 0.0003 +[2026-03-01 16:51:25] (step=0025298) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.9497162981803955, LR: 0.0003 +[2026-03-01 16:51:33] (step=0025299) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 4.949911954607709, LR: 0.0003 +[2026-03-01 16:51:40] (step=0025300) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.950107611035023, LR: 0.0003 +[2026-03-01 16:51:48] (step=0025301) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.950303267462336, LR: 0.0003 +[2026-03-01 16:51:56] (step=0025302) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.95049892388965, LR: 0.0003 +[2026-03-01 16:52:04] (step=0025303) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 4.950694580316964, LR: 0.0003 +[2026-03-01 16:52:12] (step=0025304) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.950890236744277, LR: 0.0003 +[2026-03-01 16:52:20] (step=0025305) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.951085893171591, LR: 0.0003 +[2026-03-01 16:52:27] (step=0025306) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 4.951281549598904, LR: 0.0003 +[2026-03-01 16:52:35] (step=0025307) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.951477206026218, LR: 0.0003 +[2026-03-01 16:52:43] (step=0025308) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.951672862453532, LR: 0.0003 +[2026-03-01 16:52:51] (step=0025309) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.951868518880845, LR: 0.0003 +[2026-03-01 16:52:59] (step=0025310) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.952064175308159, LR: 0.0003 +[2026-03-01 16:53:07] (step=0025311) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 4.952259831735472, LR: 0.0003 +[2026-03-01 16:53:15] (step=0025312) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.952455488162786, LR: 0.0003 +[2026-03-01 16:53:22] (step=0025313) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 4.952651144590099, LR: 0.0003 +[2026-03-01 16:53:30] (step=0025314) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 4.952846801017413, LR: 0.0003 +[2026-03-01 16:53:38] (step=0025315) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 4.953042457444727, LR: 0.0003 +[2026-03-01 16:53:46] (step=0025316) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.9532381138720405, LR: 0.0003 +[2026-03-01 16:53:54] (step=0025317) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 4.9534337702993545, LR: 0.0003 +[2026-03-01 16:54:02] (step=0025318) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 4.953629426726668, LR: 0.0003 +[2026-03-01 16:54:09] (step=0025319) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 4.953825083153982, LR: 0.0003 +[2026-03-01 16:54:17] (step=0025320) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 4.954020739581296, LR: 0.0003 +[2026-03-01 16:54:25] (step=0025321) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 4.954216396008609, LR: 0.0003 +[2026-03-01 16:54:33] (step=0025322) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 4.954412052435923, LR: 0.0003 +[2026-03-01 16:54:41] (step=0025323) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.954607708863236, LR: 0.0003 +[2026-03-01 16:54:49] (step=0025324) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 4.95480336529055, LR: 0.0003 +[2026-03-01 16:54:56] (step=0025325) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.954999021717863, LR: 0.0003 +[2026-03-01 16:55:04] (step=0025326) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.955194678145177, LR: 0.0003 +[2026-03-01 16:55:12] (step=0025327) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.955390334572491, LR: 0.0003 +[2026-03-01 16:55:20] (step=0025328) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.955585990999804, LR: 0.0003 +[2026-03-01 16:55:28] (step=0025329) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 4.955781647427118, LR: 0.0003 +[2026-03-01 16:55:36] (step=0025330) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.955977303854431, LR: 0.0003 +[2026-03-01 16:55:44] (step=0025331) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.956172960281745, LR: 0.0003 +[2026-03-01 16:55:51] (step=0025332) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 4.956368616709059, LR: 0.0003 +[2026-03-01 16:55:59] (step=0025333) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.956564273136372, LR: 0.0003 +[2026-03-01 16:56:07] (step=0025334) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.956759929563686, LR: 0.0003 +[2026-03-01 16:56:15] (step=0025335) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.9569555859909995, LR: 0.0003 +[2026-03-01 16:56:23] (step=0025336) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 4.9571512424183135, LR: 0.0003 +[2026-03-01 16:56:31] (step=0025337) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.9573468988456275, LR: 0.0003 +[2026-03-01 16:56:39] (step=0025338) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.957542555272941, LR: 0.0003 +[2026-03-01 16:56:46] (step=0025339) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 4.957738211700255, LR: 0.0003 +[2026-03-01 16:56:54] (step=0025340) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 4.957933868127568, LR: 0.0003 +[2026-03-01 16:57:02] (step=0025341) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 4.958129524554882, LR: 0.0003 +[2026-03-01 16:57:10] (step=0025342) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 4.958325180982195, LR: 0.0003 +[2026-03-01 16:57:18] (step=0025343) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 4.958520837409509, LR: 0.0003 +[2026-03-01 16:57:26] (step=0025344) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.958716493836823, LR: 0.0003 +[2026-03-01 16:57:34] (step=0025345) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 4.958912150264136, LR: 0.0003 +[2026-03-01 16:57:41] (step=0025346) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.95910780669145, LR: 0.0003 +[2026-03-01 16:57:49] (step=0025347) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 4.959303463118763, LR: 0.0003 +[2026-03-01 16:57:57] (step=0025348) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 4.959499119546077, LR: 0.0003 +[2026-03-01 16:58:05] (step=0025349) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.959694775973391, LR: 0.0003 +[2026-03-01 16:58:13] (step=0025350) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.959890432400704, LR: 0.0003 +[2026-03-01 16:58:21] (step=0025351) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 4.960086088828018, LR: 0.0003 +[2026-03-01 16:58:29] (step=0025352) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.960281745255331, LR: 0.0003 +[2026-03-01 16:58:36] (step=0025353) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 4.960477401682645, LR: 0.0003 +[2026-03-01 16:58:44] (step=0025354) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 4.9606730581099585, LR: 0.0003 +[2026-03-01 16:58:52] (step=0025355) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.9608687145372725, LR: 0.0003 +[2026-03-01 16:59:00] (step=0025356) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.9610643709645865, LR: 0.0003 +[2026-03-01 16:59:08] (step=0025357) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.9612600273919, LR: 0.0003 +[2026-03-01 16:59:16] (step=0025358) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 4.961455683819214, LR: 0.0003 +[2026-03-01 16:59:23] (step=0025359) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 4.961651340246527, LR: 0.0003 +[2026-03-01 16:59:31] (step=0025360) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 4.961846996673841, LR: 0.0003 +[2026-03-01 16:59:39] (step=0025361) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 4.962042653101155, LR: 0.0003 +[2026-03-01 16:59:47] (step=0025362) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.962238309528468, LR: 0.0003 +[2026-03-01 16:59:55] (step=0025363) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.962433965955782, LR: 0.0003 +[2026-03-01 17:00:03] (step=0025364) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.962629622383095, LR: 0.0003 +[2026-03-01 17:00:11] (step=0025365) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 4.962825278810409, LR: 0.0003 +[2026-03-01 17:00:18] (step=0025366) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 4.963020935237722, LR: 0.0003 +[2026-03-01 17:00:26] (step=0025367) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.963216591665036, LR: 0.0003 +[2026-03-01 17:00:34] (step=0025368) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.96341224809235, LR: 0.0003 +[2026-03-01 17:00:42] (step=0025369) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.963607904519663, LR: 0.0003 +[2026-03-01 17:00:50] (step=0025370) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 4.963803560946977, LR: 0.0003 +[2026-03-01 17:00:58] (step=0025371) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 4.96399921737429, LR: 0.0003 +[2026-03-01 17:01:05] (step=0025372) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 4.964194873801604, LR: 0.0003 +[2026-03-01 17:01:13] (step=0025373) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 4.964390530228918, LR: 0.0003 +[2026-03-01 17:01:21] (step=0025374) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.9645861866562315, LR: 0.0003 +[2026-03-01 17:01:29] (step=0025375) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 4.9647818430835455, LR: 0.0003 +[2026-03-01 17:01:37] (step=0025376) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.964977499510859, LR: 0.0003 +[2026-03-01 17:01:45] (step=0025377) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 4.965173155938173, LR: 0.0003 +[2026-03-01 17:01:52] (step=0025378) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.965368812365486, LR: 0.0003 +[2026-03-01 17:02:00] (step=0025379) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 4.9655644687928, LR: 0.0003 +[2026-03-01 17:02:08] (step=0025380) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 4.965760125220114, LR: 0.0003 +[2026-03-01 17:02:16] (step=0025381) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.965955781647427, LR: 0.0003 +[2026-03-01 17:02:24] (step=0025382) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.966151438074741, LR: 0.0003 +[2026-03-01 17:02:32] (step=0025383) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.966347094502054, LR: 0.0003 +[2026-03-01 17:02:40] (step=0025384) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 4.966542750929368, LR: 0.0003 +[2026-03-01 17:02:47] (step=0025385) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 4.966738407356682, LR: 0.0003 +[2026-03-01 17:02:55] (step=0025386) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 4.966934063783995, LR: 0.0003 +[2026-03-01 17:03:03] (step=0025387) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 4.967129720211309, LR: 0.0003 +[2026-03-01 17:03:11] (step=0025388) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 4.967325376638622, LR: 0.0003 +[2026-03-01 17:03:19] (step=0025389) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.967521033065936, LR: 0.0003 +[2026-03-01 17:03:27] (step=0025390) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 4.96771668949325, LR: 0.0003 +[2026-03-01 17:03:35] (step=0025391) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 4.967912345920563, LR: 0.0003 +[2026-03-01 17:03:42] (step=0025392) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.968108002347877, LR: 0.0003 +[2026-03-01 17:03:50] (step=0025393) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 4.9683036587751905, LR: 0.0003 +[2026-03-01 17:03:58] (step=0025394) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 4.9684993152025045, LR: 0.0003 +[2026-03-01 17:04:06] (step=0025395) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 4.968694971629818, LR: 0.0003 +[2026-03-01 17:04:14] (step=0025396) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 4.968890628057132, LR: 0.0003 +[2026-03-01 17:04:22] (step=0025397) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.969086284484446, LR: 0.0003 +[2026-03-01 17:04:30] (step=0025398) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 4.969281940911759, LR: 0.0003 +[2026-03-01 17:04:37] (step=0025399) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.969477597339073, LR: 0.0003 +[2026-03-01 17:04:45] (step=0025400) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 4.969673253766386, LR: 0.0003 +[2026-03-01 17:04:53] (step=0025401) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.9698689101937, LR: 0.0003 +[2026-03-01 17:05:01] (step=0025402) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 4.970064566621014, LR: 0.0003 +[2026-03-01 17:05:09] (step=0025403) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 4.970260223048327, LR: 0.0003 +[2026-03-01 17:05:17] (step=0025404) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 4.970455879475641, LR: 0.0003 +[2026-03-01 17:05:25] (step=0025405) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.970651535902954, LR: 0.0003 +[2026-03-01 17:05:32] (step=0025406) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 4.970847192330268, LR: 0.0003 +[2026-03-01 17:05:40] (step=0025407) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.971042848757581, LR: 0.0003 +[2026-03-01 17:05:48] (step=0025408) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.971238505184895, LR: 0.0003 +[2026-03-01 17:05:56] (step=0025409) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 4.971434161612209, LR: 0.0003 +[2026-03-01 17:06:04] (step=0025410) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 4.971629818039522, LR: 0.0003 +[2026-03-01 17:06:12] (step=0025411) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 4.971825474466836, LR: 0.0003 +[2026-03-01 17:06:19] (step=0025412) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 4.9720211308941495, LR: 0.0003 +[2026-03-01 17:06:27] (step=0025413) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 4.9722167873214635, LR: 0.0003 +[2026-03-01 17:06:35] (step=0025414) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 4.9724124437487776, LR: 0.0003 +[2026-03-01 17:06:43] (step=0025415) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.972608100176091, LR: 0.0003 +[2026-03-01 17:06:51] (step=0025416) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 4.972803756603405, LR: 0.0003 +[2026-03-01 17:06:59] (step=0025417) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 4.972999413030718, LR: 0.0003 +[2026-03-01 17:07:06] (step=0025418) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 4.973195069458032, LR: 0.0003 +[2026-03-01 17:07:14] (step=0025419) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 4.973390725885345, LR: 0.0003 +[2026-03-01 17:07:22] (step=0025420) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 4.973586382312659, LR: 0.0003 +[2026-03-01 17:07:30] (step=0025421) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.973782038739973, LR: 0.0003 +[2026-03-01 17:07:38] (step=0025422) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.973977695167286, LR: 0.0003 +[2026-03-01 17:07:46] (step=0025423) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.9741733515946, LR: 0.0003 +[2026-03-01 17:07:54] (step=0025424) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.974369008021913, LR: 0.0003 +[2026-03-01 17:08:01] (step=0025425) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 4.974564664449227, LR: 0.0003 +[2026-03-01 17:08:09] (step=0025426) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 4.974760320876541, LR: 0.0003 +[2026-03-01 17:08:17] (step=0025427) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 4.974955977303854, LR: 0.0003 +[2026-03-01 17:08:25] (step=0025428) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 4.975151633731168, LR: 0.0003 +[2026-03-01 17:08:33] (step=0025429) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 4.975347290158481, LR: 0.0003 +[2026-03-01 17:08:41] (step=0025430) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 4.975542946585795, LR: 0.0003 +[2026-03-01 17:08:48] (step=0025431) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 4.9757386030131086, LR: 0.0003 +[2026-03-01 17:08:56] (step=0025432) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.975934259440423, LR: 0.0003 +[2026-03-01 17:09:04] (step=0025433) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 4.976129915867737, LR: 0.0003 +[2026-03-01 17:09:12] (step=0025434) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 4.97632557229505, LR: 0.0003 +[2026-03-01 17:09:20] (step=0025435) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 4.976521228722364, LR: 0.0003 +[2026-03-01 17:09:28] (step=0025436) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 4.976716885149677, LR: 0.0003 +[2026-03-01 17:09:35] (step=0025437) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.976912541576991, LR: 0.0003 +[2026-03-01 17:09:43] (step=0025438) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 4.977108198004305, LR: 0.0003 +[2026-03-01 17:09:51] (step=0025439) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 4.977303854431618, LR: 0.0003 +[2026-03-01 17:09:59] (step=0025440) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 4.977499510858932, LR: 0.0003 +[2026-03-01 17:10:07] (step=0025441) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 4.977695167286245, LR: 0.0003 +[2026-03-01 17:10:15] (step=0025442) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 4.977890823713559, LR: 0.0003 +[2026-03-01 17:10:23] (step=0025443) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 4.978086480140873, LR: 0.0003 +[2026-03-01 17:10:31] (step=0025444) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 4.978282136568186, LR: 0.0003 +[2026-03-01 17:10:38] (step=0025445) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 4.9784777929955, LR: 0.0003 +[2026-03-01 17:10:46] (step=0025446) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 4.978673449422813, LR: 0.0003 +[2026-03-01 17:10:54] (step=0025447) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 4.978869105850127, LR: 0.0003 +[2026-03-01 17:11:02] (step=0025448) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 4.97906476227744, LR: 0.0003 +[2026-03-01 17:11:10] (step=0025449) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.979260418704754, LR: 0.0003 +[2026-03-01 17:11:18] (step=0025450) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 4.9794560751320684, LR: 0.0003 +[2026-03-01 17:11:26] (step=0025451) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.979651731559382, LR: 0.0003 +[2026-03-01 17:11:33] (step=0025452) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 4.979847387986696, LR: 0.0003 +[2026-03-01 17:11:41] (step=0025453) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.980043044414009, LR: 0.0003 +[2026-03-01 17:11:49] (step=0025454) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 4.980238700841323, LR: 0.0003 +[2026-03-01 17:11:57] (step=0025455) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 4.980434357268637, LR: 0.0003 +[2026-03-01 17:12:05] (step=0025456) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 4.98063001369595, LR: 0.0003 +[2026-03-01 17:12:13] (step=0025457) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 4.980825670123264, LR: 0.0003 +[2026-03-01 17:12:20] (step=0025458) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 4.981021326550577, LR: 0.0003 +[2026-03-01 17:12:28] (step=0025459) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.981216982977891, LR: 0.0003 +[2026-03-01 17:12:36] (step=0025460) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 4.981412639405204, LR: 0.0003 +[2026-03-01 17:12:44] (step=0025461) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 4.981608295832518, LR: 0.0003 +[2026-03-01 17:12:52] (step=0025462) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 4.981803952259832, LR: 0.0003 +[2026-03-01 17:13:00] (step=0025463) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 4.981999608687145, LR: 0.0003 +[2026-03-01 17:13:07] (step=0025464) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 4.982195265114459, LR: 0.0003 +[2026-03-01 17:13:15] (step=0025465) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 4.982390921541772, LR: 0.0003 +[2026-03-01 17:13:23] (step=0025466) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.982586577969086, LR: 0.0003 +[2026-03-01 17:13:31] (step=0025467) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 4.9827822343964, LR: 0.0003 +[2026-03-01 17:13:39] (step=0025468) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 4.9829778908237135, LR: 0.0003 +[2026-03-01 17:13:47] (step=0025469) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 4.9831735472510275, LR: 0.0003 +[2026-03-01 17:13:55] (step=0025470) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 4.983369203678341, LR: 0.0003 +[2026-03-01 17:14:02] (step=0025471) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 4.983564860105655, LR: 0.0003 +[2026-03-01 17:14:10] (step=0025472) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 4.983760516532968, LR: 0.0003 +[2026-03-01 17:14:18] (step=0025473) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 4.983956172960282, LR: 0.0003 +[2026-03-01 17:14:26] (step=0025474) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 4.984151829387596, LR: 0.0003 +[2026-03-01 17:14:34] (step=0025475) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 4.984347485814909, LR: 0.0003 +[2026-03-01 17:14:42] (step=0025476) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.984543142242223, LR: 0.0003 +[2026-03-01 17:14:49] (step=0025477) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.984738798669536, LR: 0.0003 +[2026-03-01 17:14:57] (step=0025478) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.98493445509685, LR: 0.0003 +[2026-03-01 17:15:05] (step=0025479) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 4.985130111524164, LR: 0.0003 +[2026-03-01 17:15:13] (step=0025480) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 4.985325767951477, LR: 0.0003 +[2026-03-01 17:15:21] (step=0025481) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 4.985521424378791, LR: 0.0003 +[2026-03-01 17:15:29] (step=0025482) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 4.985717080806104, LR: 0.0003 +[2026-03-01 17:15:36] (step=0025483) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.985912737233418, LR: 0.0003 +[2026-03-01 17:15:44] (step=0025484) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 4.986108393660731, LR: 0.0003 +[2026-03-01 17:15:52] (step=0025485) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 4.986304050088045, LR: 0.0003 +[2026-03-01 17:16:00] (step=0025486) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 4.986499706515359, LR: 0.0003 +[2026-03-01 17:16:08] (step=0025487) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 4.9866953629426725, LR: 0.0003 +[2026-03-01 17:16:16] (step=0025488) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.9868910193699865, LR: 0.0003 +[2026-03-01 17:16:23] (step=0025489) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 4.9870866757973, LR: 0.0003 +[2026-03-01 17:16:31] (step=0025490) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 4.987282332224614, LR: 0.0003 +[2026-03-01 17:16:39] (step=0025491) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 4.987477988651928, LR: 0.0003 +[2026-03-01 17:16:47] (step=0025492) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 4.987673645079241, LR: 0.0003 +[2026-03-01 17:16:55] (step=0025493) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 4.987869301506555, LR: 0.0003 +[2026-03-01 17:17:03] (step=0025494) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.988064957933868, LR: 0.0003 +[2026-03-01 17:17:11] (step=0025495) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 4.988260614361182, LR: 0.0003 +[2026-03-01 17:17:19] (step=0025496) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 4.988456270788495, LR: 0.0003 +[2026-03-01 17:17:26] (step=0025497) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 4.988651927215809, LR: 0.0003 +[2026-03-01 17:17:34] (step=0025498) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 4.988847583643123, LR: 0.0003 +[2026-03-01 17:17:42] (step=0025499) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 4.989043240070436, LR: 0.0003 +[2026-03-01 17:17:50] (step=0025500) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 4.98923889649775, LR: 0.0003 +[2026-03-01 17:17:50] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0025500/ +[2026-03-01 17:17:58] (step=0025501) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 4.989434552925063, LR: 0.0003 +[2026-03-01 17:18:06] (step=0025502) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.989630209352377, LR: 0.0003 +[2026-03-01 17:18:14] (step=0025503) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 4.989825865779691, LR: 0.0003 +[2026-03-01 17:18:21] (step=0025504) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.990021522207004, LR: 0.0003 +[2026-03-01 17:18:29] (step=0025505) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.990217178634318, LR: 0.0003 +[2026-03-01 17:18:37] (step=0025506) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 4.9904128350616315, LR: 0.0003 +[2026-03-01 17:18:45] (step=0025507) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 4.9906084914889455, LR: 0.0003 +[2026-03-01 17:18:53] (step=0025508) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 4.9908041479162595, LR: 0.0003 +[2026-03-01 17:19:01] (step=0025509) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 4.990999804343573, LR: 0.0003 +[2026-03-01 17:19:08] (step=0025510) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 4.991195460770887, LR: 0.0003 +[2026-03-01 17:19:16] (step=0025511) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 4.9913911171982, LR: 0.0003 +[2026-03-01 17:19:24] (step=0025512) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 4.991586773625514, LR: 0.0003 +[2026-03-01 17:19:32] (step=0025513) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 4.991782430052827, LR: 0.0003 +[2026-03-01 17:19:40] (step=0025514) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 4.991978086480141, LR: 0.0003 +[2026-03-01 17:19:48] (step=0025515) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 4.992173742907455, LR: 0.0003 +[2026-03-01 17:19:56] (step=0025516) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.992369399334768, LR: 0.0003 +[2026-03-01 17:20:03] (step=0025517) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 4.992565055762082, LR: 0.0003 +[2026-03-01 17:20:11] (step=0025518) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.992760712189395, LR: 0.0003 +[2026-03-01 17:20:19] (step=0025519) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 4.992956368616709, LR: 0.0003 +[2026-03-01 17:20:27] (step=0025520) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 4.993152025044023, LR: 0.0003 +[2026-03-01 17:20:35] (step=0025521) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 4.993347681471336, LR: 0.0003 +[2026-03-01 17:20:43] (step=0025522) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 4.99354333789865, LR: 0.0003 +[2026-03-01 17:20:50] (step=0025523) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 4.993738994325963, LR: 0.0003 +[2026-03-01 17:20:58] (step=0025524) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 4.993934650753277, LR: 0.0003 +[2026-03-01 17:21:06] (step=0025525) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 4.9941303071805905, LR: 0.0003 +[2026-03-01 17:21:14] (step=0025526) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 4.9943259636079045, LR: 0.0003 +[2026-03-01 17:21:22] (step=0025527) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 4.9945216200352185, LR: 0.0003 +[2026-03-01 17:21:30] (step=0025528) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 4.994717276462532, LR: 0.0003 +[2026-03-01 17:21:37] (step=0025529) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 4.994912932889846, LR: 0.0003 +[2026-03-01 17:21:45] (step=0025530) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 4.995108589317159, LR: 0.0003 +[2026-03-01 17:21:53] (step=0025531) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 4.995304245744473, LR: 0.0003 +[2026-03-01 17:22:01] (step=0025532) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 4.995499902171787, LR: 0.0003 +[2026-03-01 17:22:09] (step=0025533) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 4.9956955585991, LR: 0.0003 +[2026-03-01 17:22:17] (step=0025534) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 4.995891215026414, LR: 0.0003 +[2026-03-01 17:22:24] (step=0025535) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 4.996086871453727, LR: 0.0003 +[2026-03-01 17:22:32] (step=0025536) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 4.996282527881041, LR: 0.0003 +[2026-03-01 17:22:40] (step=0025537) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 4.996478184308354, LR: 0.0003 +[2026-03-01 17:22:48] (step=0025538) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 4.996673840735668, LR: 0.0003 +[2026-03-01 17:22:56] (step=0025539) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 4.996869497162982, LR: 0.0003 +[2026-03-01 17:23:04] (step=0025540) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 4.997065153590295, LR: 0.0003 +[2026-03-01 17:23:11] (step=0025541) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 4.997260810017609, LR: 0.0003 +[2026-03-01 17:23:19] (step=0025542) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 4.997456466444922, LR: 0.0003 +[2026-03-01 17:23:27] (step=0025543) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 4.997652122872236, LR: 0.0003 +[2026-03-01 17:23:35] (step=0025544) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 4.99784777929955, LR: 0.0003 +[2026-03-01 17:23:43] (step=0025545) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 4.9980434357268635, LR: 0.0003 +[2026-03-01 17:23:51] (step=0025546) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 4.9982390921541775, LR: 0.0003 +[2026-03-01 17:23:59] (step=0025547) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 4.998434748581491, LR: 0.0003 +[2026-03-01 17:24:07] (step=0025548) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 4.998630405008805, LR: 0.0003 +[2026-03-01 17:24:14] (step=0025549) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 4.998826061436118, LR: 0.0003 +[2026-03-01 17:24:22] (step=0025550) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 4.999021717863432, LR: 0.0003 +[2026-03-01 17:24:30] (step=0025551) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 4.999217374290746, LR: 0.0003 +[2026-03-01 17:24:38] (step=0025552) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 4.999413030718059, LR: 0.0003 +[2026-03-01 17:24:46] (step=0025553) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 4.999608687145373, LR: 0.0003 +[2026-03-01 17:24:54] (step=0025554) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 4.999804343572686, LR: 0.0003 +[2026-03-01 17:25:02] (step=0025555) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 5.0, LR: 0.0003 +[2026-03-01 17:25:02] Beginning epoch 5... +[2026-03-01 17:25:11] (step=0025556) Train Loss: 0.4384, Train Steps/Sec: 0.11, Epoch: 5.000195656427314, LR: 0.0003 +[2026-03-01 17:25:19] (step=0025557) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.000391312854627, LR: 0.0003 +[2026-03-01 17:25:27] (step=0025558) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.000586969281941, LR: 0.0003 +[2026-03-01 17:25:35] (step=0025559) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.000782625709254, LR: 0.0003 +[2026-03-01 17:25:43] (step=0025560) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.000978282136568, LR: 0.0003 +[2026-03-01 17:25:50] (step=0025561) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.001173938563882, LR: 0.0003 +[2026-03-01 17:25:58] (step=0025562) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 5.001369594991195, LR: 0.0003 +[2026-03-01 17:26:06] (step=0025563) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.001565251418509, LR: 0.0003 +[2026-03-01 17:26:14] (step=0025564) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.0017609078458225, LR: 0.0003 +[2026-03-01 17:26:22] (step=0025565) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.0019565642731365, LR: 0.0003 +[2026-03-01 17:26:30] (step=0025566) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.00215222070045, LR: 0.0003 +[2026-03-01 17:26:38] (step=0025567) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.002347877127764, LR: 0.0003 +[2026-03-01 17:26:45] (step=0025568) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 5.002543533555078, LR: 0.0003 +[2026-03-01 17:26:53] (step=0025569) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 5.002739189982391, LR: 0.0003 +[2026-03-01 17:27:01] (step=0025570) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.002934846409705, LR: 0.0003 +[2026-03-01 17:27:09] (step=0025571) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 5.003130502837018, LR: 0.0003 +[2026-03-01 17:27:17] (step=0025572) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 5.003326159264332, LR: 0.0003 +[2026-03-01 17:27:25] (step=0025573) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 5.003521815691646, LR: 0.0003 +[2026-03-01 17:27:32] (step=0025574) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.003717472118959, LR: 0.0003 +[2026-03-01 17:27:40] (step=0025575) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.003913128546273, LR: 0.0003 +[2026-03-01 17:27:48] (step=0025576) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.004108784973586, LR: 0.0003 +[2026-03-01 17:27:56] (step=0025577) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.0043044414009, LR: 0.0003 +[2026-03-01 17:28:04] (step=0025578) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.004500097828213, LR: 0.0003 +[2026-03-01 17:28:12] (step=0025579) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 5.004695754255527, LR: 0.0003 +[2026-03-01 17:28:20] (step=0025580) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.004891410682841, LR: 0.0003 +[2026-03-01 17:28:27] (step=0025581) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.005087067110154, LR: 0.0003 +[2026-03-01 17:28:35] (step=0025582) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.005282723537468, LR: 0.0003 +[2026-03-01 17:28:43] (step=0025583) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 5.0054783799647815, LR: 0.0003 +[2026-03-01 17:28:51] (step=0025584) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.0056740363920955, LR: 0.0003 +[2026-03-01 17:28:59] (step=0025585) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.0058696928194095, LR: 0.0003 +[2026-03-01 17:29:07] (step=0025586) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.006065349246723, LR: 0.0003 +[2026-03-01 17:29:14] (step=0025587) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.006261005674037, LR: 0.0003 +[2026-03-01 17:29:22] (step=0025588) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.00645666210135, LR: 0.0003 +[2026-03-01 17:29:30] (step=0025589) Train Loss: 0.4526, Train Steps/Sec: 0.12, Epoch: 5.006652318528664, LR: 0.0003 +[2026-03-01 17:29:38] (step=0025590) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.006847974955977, LR: 0.0003 +[2026-03-01 17:29:46] (step=0025591) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 5.007043631383291, LR: 0.0003 +[2026-03-01 17:29:54] (step=0025592) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.007239287810605, LR: 0.0003 +[2026-03-01 17:30:02] (step=0025593) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 5.007434944237918, LR: 0.0003 +[2026-03-01 17:30:09] (step=0025594) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.007630600665232, LR: 0.0003 +[2026-03-01 17:30:17] (step=0025595) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 5.007826257092545, LR: 0.0003 +[2026-03-01 17:30:25] (step=0025596) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.008021913519859, LR: 0.0003 +[2026-03-01 17:30:33] (step=0025597) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.008217569947173, LR: 0.0003 +[2026-03-01 17:30:41] (step=0025598) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.008413226374486, LR: 0.0003 +[2026-03-01 17:30:49] (step=0025599) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.0086088828018, LR: 0.0003 +[2026-03-01 17:30:57] (step=0025600) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.008804539229113, LR: 0.0003 +[2026-03-01 17:31:05] (step=0025601) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.009000195656427, LR: 0.0003 +[2026-03-01 17:31:12] (step=0025602) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.0091958520837405, LR: 0.0003 +[2026-03-01 17:31:20] (step=0025603) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.0093915085110545, LR: 0.0003 +[2026-03-01 17:31:28] (step=0025604) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.0095871649383685, LR: 0.0003 +[2026-03-01 17:31:36] (step=0025605) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 5.009782821365682, LR: 0.0003 +[2026-03-01 17:31:44] (step=0025606) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.009978477792996, LR: 0.0003 +[2026-03-01 17:31:52] (step=0025607) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.010174134220309, LR: 0.0003 +[2026-03-01 17:31:59] (step=0025608) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.010369790647623, LR: 0.0003 +[2026-03-01 17:32:07] (step=0025609) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.010565447074937, LR: 0.0003 +[2026-03-01 17:32:15] (step=0025610) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.01076110350225, LR: 0.0003 +[2026-03-01 17:32:23] (step=0025611) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 5.010956759929564, LR: 0.0003 +[2026-03-01 17:32:31] (step=0025612) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.011152416356877, LR: 0.0003 +[2026-03-01 17:32:39] (step=0025613) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.011348072784191, LR: 0.0003 +[2026-03-01 17:32:46] (step=0025614) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.011543729211505, LR: 0.0003 +[2026-03-01 17:32:54] (step=0025615) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.011739385638818, LR: 0.0003 +[2026-03-01 17:33:02] (step=0025616) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.011935042066132, LR: 0.0003 +[2026-03-01 17:33:10] (step=0025617) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.012130698493445, LR: 0.0003 +[2026-03-01 17:33:18] (step=0025618) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 5.012326354920759, LR: 0.0003 +[2026-03-01 17:33:26] (step=0025619) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.012522011348072, LR: 0.0003 +[2026-03-01 17:33:33] (step=0025620) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.012717667775386, LR: 0.0003 +[2026-03-01 17:33:41] (step=0025621) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.0129133242027, LR: 0.0003 +[2026-03-01 17:33:49] (step=0025622) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.0131089806300135, LR: 0.0003 +[2026-03-01 17:33:57] (step=0025623) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.0133046370573275, LR: 0.0003 +[2026-03-01 17:34:05] (step=0025624) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.013500293484641, LR: 0.0003 +[2026-03-01 17:34:13] (step=0025625) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 5.013695949911955, LR: 0.0003 +[2026-03-01 17:34:20] (step=0025626) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 5.013891606339269, LR: 0.0003 +[2026-03-01 17:34:28] (step=0025627) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.014087262766582, LR: 0.0003 +[2026-03-01 17:34:36] (step=0025628) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 5.014282919193896, LR: 0.0003 +[2026-03-01 17:34:44] (step=0025629) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 5.014478575621209, LR: 0.0003 +[2026-03-01 17:34:52] (step=0025630) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.014674232048523, LR: 0.0003 +[2026-03-01 17:35:00] (step=0025631) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.014869888475836, LR: 0.0003 +[2026-03-01 17:35:08] (step=0025632) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.01506554490315, LR: 0.0003 +[2026-03-01 17:35:15] (step=0025633) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.015261201330464, LR: 0.0003 +[2026-03-01 17:35:23] (step=0025634) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.015456857757777, LR: 0.0003 +[2026-03-01 17:35:31] (step=0025635) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.015652514185091, LR: 0.0003 +[2026-03-01 17:35:39] (step=0025636) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.015848170612404, LR: 0.0003 +[2026-03-01 17:35:47] (step=0025637) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.016043827039718, LR: 0.0003 +[2026-03-01 17:35:55] (step=0025638) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.016239483467032, LR: 0.0003 +[2026-03-01 17:36:03] (step=0025639) Train Loss: 0.4458, Train Steps/Sec: 0.12, Epoch: 5.016435139894345, LR: 0.0003 +[2026-03-01 17:36:10] (step=0025640) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.016630796321659, LR: 0.0003 +[2026-03-01 17:36:18] (step=0025641) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.0168264527489725, LR: 0.0003 +[2026-03-01 17:36:26] (step=0025642) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.0170221091762865, LR: 0.0003 +[2026-03-01 17:36:34] (step=0025643) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 5.0172177656036, LR: 0.0003 +[2026-03-01 17:36:42] (step=0025644) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.017413422030914, LR: 0.0003 +[2026-03-01 17:36:50] (step=0025645) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 5.017609078458228, LR: 0.0003 +[2026-03-01 17:36:57] (step=0025646) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.017804734885541, LR: 0.0003 +[2026-03-01 17:37:05] (step=0025647) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.018000391312855, LR: 0.0003 +[2026-03-01 17:37:13] (step=0025648) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.018196047740168, LR: 0.0003 +[2026-03-01 17:37:21] (step=0025649) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.018391704167482, LR: 0.0003 +[2026-03-01 17:37:29] (step=0025650) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.018587360594796, LR: 0.0003 +[2026-03-01 17:37:37] (step=0025651) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 5.018783017022109, LR: 0.0003 +[2026-03-01 17:37:45] (step=0025652) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.018978673449423, LR: 0.0003 +[2026-03-01 17:37:52] (step=0025653) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.019174329876736, LR: 0.0003 +[2026-03-01 17:38:00] (step=0025654) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.01936998630405, LR: 0.0003 +[2026-03-01 17:38:08] (step=0025655) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.019565642731363, LR: 0.0003 +[2026-03-01 17:38:16] (step=0025656) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.019761299158677, LR: 0.0003 +[2026-03-01 17:38:24] (step=0025657) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 5.019956955585991, LR: 0.0003 +[2026-03-01 17:38:32] (step=0025658) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.020152612013304, LR: 0.0003 +[2026-03-01 17:38:40] (step=0025659) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.020348268440618, LR: 0.0003 +[2026-03-01 17:38:47] (step=0025660) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.0205439248679316, LR: 0.0003 +[2026-03-01 17:38:55] (step=0025661) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.020739581295246, LR: 0.0003 +[2026-03-01 17:39:03] (step=0025662) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.02093523772256, LR: 0.0003 +[2026-03-01 17:39:11] (step=0025663) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.021130894149873, LR: 0.0003 +[2026-03-01 17:39:19] (step=0025664) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.021326550577187, LR: 0.0003 +[2026-03-01 17:39:27] (step=0025665) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.0215222070045, LR: 0.0003 +[2026-03-01 17:39:34] (step=0025666) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.021717863431814, LR: 0.0003 +[2026-03-01 17:39:42] (step=0025667) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.021913519859127, LR: 0.0003 +[2026-03-01 17:39:50] (step=0025668) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 5.022109176286441, LR: 0.0003 +[2026-03-01 17:39:58] (step=0025669) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.022304832713755, LR: 0.0003 +[2026-03-01 17:40:06] (step=0025670) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.022500489141068, LR: 0.0003 +[2026-03-01 17:40:14] (step=0025671) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.022696145568382, LR: 0.0003 +[2026-03-01 17:40:21] (step=0025672) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.022891801995695, LR: 0.0003 +[2026-03-01 17:40:29] (step=0025673) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.023087458423009, LR: 0.0003 +[2026-03-01 17:40:37] (step=0025674) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 5.023283114850323, LR: 0.0003 +[2026-03-01 17:40:45] (step=0025675) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.023478771277636, LR: 0.0003 +[2026-03-01 17:40:53] (step=0025676) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.02367442770495, LR: 0.0003 +[2026-03-01 17:41:01] (step=0025677) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.023870084132263, LR: 0.0003 +[2026-03-01 17:41:08] (step=0025678) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.024065740559577, LR: 0.0003 +[2026-03-01 17:41:16] (step=0025679) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.0242613969868914, LR: 0.0003 +[2026-03-01 17:41:24] (step=0025680) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.024457053414205, LR: 0.0003 +[2026-03-01 17:41:32] (step=0025681) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 5.024652709841519, LR: 0.0003 +[2026-03-01 17:41:40] (step=0025682) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.024848366268832, LR: 0.0003 +[2026-03-01 17:41:48] (step=0025683) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 5.025044022696146, LR: 0.0003 +[2026-03-01 17:41:55] (step=0025684) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 5.025239679123459, LR: 0.0003 +[2026-03-01 17:42:03] (step=0025685) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.025435335550773, LR: 0.0003 +[2026-03-01 17:42:11] (step=0025686) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.025630991978087, LR: 0.0003 +[2026-03-01 17:42:19] (step=0025687) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.0258266484054, LR: 0.0003 +[2026-03-01 17:42:27] (step=0025688) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.026022304832714, LR: 0.0003 +[2026-03-01 17:42:35] (step=0025689) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.026217961260027, LR: 0.0003 +[2026-03-01 17:42:43] (step=0025690) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.026413617687341, LR: 0.0003 +[2026-03-01 17:42:51] (step=0025691) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.026609274114655, LR: 0.0003 +[2026-03-01 17:42:58] (step=0025692) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 5.026804930541968, LR: 0.0003 +[2026-03-01 17:43:06] (step=0025693) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.027000586969282, LR: 0.0003 +[2026-03-01 17:43:14] (step=0025694) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.027196243396595, LR: 0.0003 +[2026-03-01 17:43:22] (step=0025695) Train Loss: 0.4491, Train Steps/Sec: 0.12, Epoch: 5.027391899823909, LR: 0.0003 +[2026-03-01 17:43:30] (step=0025696) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.0275875562512224, LR: 0.0003 +[2026-03-01 17:43:38] (step=0025697) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.0277832126785365, LR: 0.0003 +[2026-03-01 17:43:46] (step=0025698) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.0279788691058505, LR: 0.0003 +[2026-03-01 17:43:53] (step=0025699) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.028174525533164, LR: 0.0003 +[2026-03-01 17:44:01] (step=0025700) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.028370181960478, LR: 0.0003 +[2026-03-01 17:44:09] (step=0025701) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.028565838387791, LR: 0.0003 +[2026-03-01 17:44:17] (step=0025702) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.028761494815105, LR: 0.0003 +[2026-03-01 17:44:25] (step=0025703) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.028957151242419, LR: 0.0003 +[2026-03-01 17:44:33] (step=0025704) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.029152807669732, LR: 0.0003 +[2026-03-01 17:44:41] (step=0025705) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.029348464097046, LR: 0.0003 +[2026-03-01 17:44:48] (step=0025706) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.029544120524359, LR: 0.0003 +[2026-03-01 17:44:56] (step=0025707) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.029739776951673, LR: 0.0003 +[2026-03-01 17:45:04] (step=0025708) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.029935433378986, LR: 0.0003 +[2026-03-01 17:45:12] (step=0025709) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.0301310898063, LR: 0.0003 +[2026-03-01 17:45:20] (step=0025710) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.030326746233614, LR: 0.0003 +[2026-03-01 17:45:28] (step=0025711) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.030522402660927, LR: 0.0003 +[2026-03-01 17:45:35] (step=0025712) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.030718059088241, LR: 0.0003 +[2026-03-01 17:45:43] (step=0025713) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.030913715515554, LR: 0.0003 +[2026-03-01 17:45:51] (step=0025714) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 5.031109371942868, LR: 0.0003 +[2026-03-01 17:45:59] (step=0025715) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.031305028370182, LR: 0.0003 +[2026-03-01 17:46:07] (step=0025716) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.0315006847974955, LR: 0.0003 +[2026-03-01 17:46:15] (step=0025717) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.0316963412248095, LR: 0.0003 +[2026-03-01 17:46:23] (step=0025718) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.031891997652123, LR: 0.0003 +[2026-03-01 17:46:30] (step=0025719) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.032087654079437, LR: 0.0003 +[2026-03-01 17:46:38] (step=0025720) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.03228331050675, LR: 0.0003 +[2026-03-01 17:46:46] (step=0025721) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.032478966934064, LR: 0.0003 +[2026-03-01 17:46:54] (step=0025722) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.032674623361378, LR: 0.0003 +[2026-03-01 17:47:02] (step=0025723) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 5.032870279788691, LR: 0.0003 +[2026-03-01 17:47:10] (step=0025724) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 5.033065936216005, LR: 0.0003 +[2026-03-01 17:47:17] (step=0025725) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.033261592643318, LR: 0.0003 +[2026-03-01 17:47:25] (step=0025726) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.033457249070632, LR: 0.0003 +[2026-03-01 17:47:33] (step=0025727) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 5.033652905497946, LR: 0.0003 +[2026-03-01 17:47:41] (step=0025728) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.033848561925259, LR: 0.0003 +[2026-03-01 17:47:49] (step=0025729) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 5.034044218352573, LR: 0.0003 +[2026-03-01 17:47:57] (step=0025730) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.034239874779886, LR: 0.0003 +[2026-03-01 17:48:04] (step=0025731) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.0344355312072, LR: 0.0003 +[2026-03-01 17:48:12] (step=0025732) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.034631187634514, LR: 0.0003 +[2026-03-01 17:48:20] (step=0025733) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.034826844061827, LR: 0.0003 +[2026-03-01 17:48:28] (step=0025734) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.035022500489141, LR: 0.0003 +[2026-03-01 17:48:36] (step=0025735) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.0352181569164545, LR: 0.0003 +[2026-03-01 17:48:44] (step=0025736) Train Loss: 0.4249, Train Steps/Sec: 0.13, Epoch: 5.0354138133437685, LR: 0.0003 +[2026-03-01 17:48:52] (step=0025737) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.035609469771082, LR: 0.0003 +[2026-03-01 17:49:00] (step=0025738) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.035805126198396, LR: 0.0003 +[2026-03-01 17:49:07] (step=0025739) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.03600078262571, LR: 0.0003 +[2026-03-01 17:49:15] (step=0025740) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.036196439053023, LR: 0.0003 +[2026-03-01 17:49:23] (step=0025741) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.036392095480337, LR: 0.0003 +[2026-03-01 17:49:31] (step=0025742) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.03658775190765, LR: 0.0003 +[2026-03-01 17:49:39] (step=0025743) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.036783408334964, LR: 0.0003 +[2026-03-01 17:49:47] (step=0025744) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.036979064762278, LR: 0.0003 +[2026-03-01 17:49:54] (step=0025745) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.037174721189591, LR: 0.0003 +[2026-03-01 17:50:02] (step=0025746) Train Loss: 0.4496, Train Steps/Sec: 0.12, Epoch: 5.037370377616905, LR: 0.0003 +[2026-03-01 17:50:10] (step=0025747) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 5.037566034044218, LR: 0.0003 +[2026-03-01 17:50:18] (step=0025748) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 5.037761690471532, LR: 0.0003 +[2026-03-01 17:50:26] (step=0025749) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.037957346898845, LR: 0.0003 +[2026-03-01 17:50:34] (step=0025750) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.038153003326159, LR: 0.0003 +[2026-03-01 17:50:42] (step=0025751) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 5.038348659753473, LR: 0.0003 +[2026-03-01 17:50:50] (step=0025752) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 5.038544316180786, LR: 0.0003 +[2026-03-01 17:50:58] (step=0025753) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.0387399726081, LR: 0.0003 +[2026-03-01 17:51:05] (step=0025754) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.0389356290354135, LR: 0.0003 +[2026-03-01 17:51:13] (step=0025755) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.0391312854627275, LR: 0.0003 +[2026-03-01 17:51:21] (step=0025756) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.0393269418900415, LR: 0.0003 +[2026-03-01 17:51:29] (step=0025757) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.039522598317355, LR: 0.0003 +[2026-03-01 17:51:37] (step=0025758) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 5.039718254744669, LR: 0.0003 +[2026-03-01 17:51:45] (step=0025759) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.039913911171982, LR: 0.0003 +[2026-03-01 17:51:52] (step=0025760) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.040109567599296, LR: 0.0003 +[2026-03-01 17:52:00] (step=0025761) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.040305224026609, LR: 0.0003 +[2026-03-01 17:52:08] (step=0025762) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.040500880453923, LR: 0.0003 +[2026-03-01 17:52:16] (step=0025763) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.040696536881237, LR: 0.0003 +[2026-03-01 17:52:24] (step=0025764) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 5.04089219330855, LR: 0.0003 +[2026-03-01 17:52:32] (step=0025765) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.041087849735864, LR: 0.0003 +[2026-03-01 17:52:40] (step=0025766) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.041283506163177, LR: 0.0003 +[2026-03-01 17:52:47] (step=0025767) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 5.041479162590491, LR: 0.0003 +[2026-03-01 17:52:55] (step=0025768) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.041674819017805, LR: 0.0003 +[2026-03-01 17:53:03] (step=0025769) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 5.041870475445118, LR: 0.0003 +[2026-03-01 17:53:11] (step=0025770) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.042066131872432, LR: 0.0003 +[2026-03-01 17:53:19] (step=0025771) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 5.042261788299745, LR: 0.0003 +[2026-03-01 17:53:27] (step=0025772) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.042457444727059, LR: 0.0003 +[2026-03-01 17:53:35] (step=0025773) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.0426531011543725, LR: 0.0003 +[2026-03-01 17:53:42] (step=0025774) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.0428487575816865, LR: 0.0003 +[2026-03-01 17:53:50] (step=0025775) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.0430444140090005, LR: 0.0003 +[2026-03-01 17:53:58] (step=0025776) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.043240070436314, LR: 0.0003 +[2026-03-01 17:54:06] (step=0025777) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.043435726863628, LR: 0.0003 +[2026-03-01 17:54:14] (step=0025778) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 5.043631383290941, LR: 0.0003 +[2026-03-01 17:54:22] (step=0025779) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 5.043827039718255, LR: 0.0003 +[2026-03-01 17:54:30] (step=0025780) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.044022696145569, LR: 0.0003 +[2026-03-01 17:54:37] (step=0025781) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.044218352572882, LR: 0.0003 +[2026-03-01 17:54:45] (step=0025782) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.044414009000196, LR: 0.0003 +[2026-03-01 17:54:53] (step=0025783) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.044609665427509, LR: 0.0003 +[2026-03-01 17:55:01] (step=0025784) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.044805321854823, LR: 0.0003 +[2026-03-01 17:55:09] (step=0025785) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.045000978282137, LR: 0.0003 +[2026-03-01 17:55:17] (step=0025786) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.04519663470945, LR: 0.0003 +[2026-03-01 17:55:25] (step=0025787) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.045392291136764, LR: 0.0003 +[2026-03-01 17:55:33] (step=0025788) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 5.045587947564077, LR: 0.0003 +[2026-03-01 17:55:40] (step=0025789) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.045783603991391, LR: 0.0003 +[2026-03-01 17:55:48] (step=0025790) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.045979260418704, LR: 0.0003 +[2026-03-01 17:55:56] (step=0025791) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.046174916846018, LR: 0.0003 +[2026-03-01 17:56:04] (step=0025792) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.046370573273332, LR: 0.0003 +[2026-03-01 17:56:12] (step=0025793) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.0465662297006455, LR: 0.0003 +[2026-03-01 17:56:20] (step=0025794) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.0467618861279595, LR: 0.0003 +[2026-03-01 17:56:28] (step=0025795) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.046957542555273, LR: 0.0003 +[2026-03-01 17:56:35] (step=0025796) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 5.047153198982587, LR: 0.0003 +[2026-03-01 17:56:43] (step=0025797) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.047348855409901, LR: 0.0003 +[2026-03-01 17:56:51] (step=0025798) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.047544511837214, LR: 0.0003 +[2026-03-01 17:56:59] (step=0025799) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.047740168264528, LR: 0.0003 +[2026-03-01 17:57:07] (step=0025800) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.047935824691841, LR: 0.0003 +[2026-03-01 17:57:15] (step=0025801) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.048131481119155, LR: 0.0003 +[2026-03-01 17:57:23] (step=0025802) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 5.048327137546468, LR: 0.0003 +[2026-03-01 17:57:30] (step=0025803) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.048522793973782, LR: 0.0003 +[2026-03-01 17:57:38] (step=0025804) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.048718450401096, LR: 0.0003 +[2026-03-01 17:57:46] (step=0025805) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.048914106828409, LR: 0.0003 +[2026-03-01 17:57:54] (step=0025806) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.049109763255723, LR: 0.0003 +[2026-03-01 17:58:02] (step=0025807) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 5.049305419683036, LR: 0.0003 +[2026-03-01 17:58:10] (step=0025808) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.04950107611035, LR: 0.0003 +[2026-03-01 17:58:18] (step=0025809) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.049696732537664, LR: 0.0003 +[2026-03-01 17:58:25] (step=0025810) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.049892388964977, LR: 0.0003 +[2026-03-01 17:58:33] (step=0025811) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.050088045392291, LR: 0.0003 +[2026-03-01 17:58:41] (step=0025812) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.0502837018196045, LR: 0.0003 +[2026-03-01 17:58:49] (step=0025813) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 5.0504793582469185, LR: 0.0003 +[2026-03-01 17:58:57] (step=0025814) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.050675014674232, LR: 0.0003 +[2026-03-01 17:59:05] (step=0025815) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.050870671101546, LR: 0.0003 +[2026-03-01 17:59:13] (step=0025816) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.05106632752886, LR: 0.0003 +[2026-03-01 17:59:20] (step=0025817) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.051261983956173, LR: 0.0003 +[2026-03-01 17:59:28] (step=0025818) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.051457640383487, LR: 0.0003 +[2026-03-01 17:59:36] (step=0025819) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.0516532968108, LR: 0.0003 +[2026-03-01 17:59:44] (step=0025820) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.051848953238114, LR: 0.0003 +[2026-03-01 17:59:52] (step=0025821) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.052044609665428, LR: 0.0003 +[2026-03-01 18:00:00] (step=0025822) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 5.052240266092741, LR: 0.0003 +[2026-03-01 18:00:08] (step=0025823) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.052435922520055, LR: 0.0003 +[2026-03-01 18:00:15] (step=0025824) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 5.052631578947368, LR: 0.0003 +[2026-03-01 18:00:23] (step=0025825) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.052827235374682, LR: 0.0003 +[2026-03-01 18:00:31] (step=0025826) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.053022891801995, LR: 0.0003 +[2026-03-01 18:00:39] (step=0025827) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.053218548229309, LR: 0.0003 +[2026-03-01 18:00:47] (step=0025828) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.053414204656623, LR: 0.0003 +[2026-03-01 18:00:55] (step=0025829) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.053609861083936, LR: 0.0003 +[2026-03-01 18:01:03] (step=0025830) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.05380551751125, LR: 0.0003 +[2026-03-01 18:01:10] (step=0025831) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.0540011739385635, LR: 0.0003 +[2026-03-01 18:01:18] (step=0025832) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.0541968303658775, LR: 0.0003 +[2026-03-01 18:01:26] (step=0025833) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.0543924867931915, LR: 0.0003 +[2026-03-01 18:01:34] (step=0025834) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 5.054588143220505, LR: 0.0003 +[2026-03-01 18:01:42] (step=0025835) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.054783799647819, LR: 0.0003 +[2026-03-01 18:01:50] (step=0025836) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.054979456075132, LR: 0.0003 +[2026-03-01 18:01:58] (step=0025837) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.055175112502446, LR: 0.0003 +[2026-03-01 18:02:05] (step=0025838) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 5.05537076892976, LR: 0.0003 +[2026-03-01 18:02:13] (step=0025839) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.055566425357073, LR: 0.0003 +[2026-03-01 18:02:21] (step=0025840) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 5.055762081784387, LR: 0.0003 +[2026-03-01 18:02:29] (step=0025841) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 5.0559577382117, LR: 0.0003 +[2026-03-01 18:02:37] (step=0025842) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.056153394639014, LR: 0.0003 +[2026-03-01 18:02:45] (step=0025843) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.056349051066327, LR: 0.0003 +[2026-03-01 18:02:53] (step=0025844) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.056544707493641, LR: 0.0003 +[2026-03-01 18:03:00] (step=0025845) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 5.056740363920955, LR: 0.0003 +[2026-03-01 18:03:09] (step=0025846) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.056936020348268, LR: 0.0003 +[2026-03-01 18:03:16] (step=0025847) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.057131676775582, LR: 0.0003 +[2026-03-01 18:03:24] (step=0025848) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 5.057327333202895, LR: 0.0003 +[2026-03-01 18:03:32] (step=0025849) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 5.057522989630209, LR: 0.0003 +[2026-03-01 18:03:40] (step=0025850) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.057718646057523, LR: 0.0003 +[2026-03-01 18:03:48] (step=0025851) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.0579143024848365, LR: 0.0003 +[2026-03-01 18:03:56] (step=0025852) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.0581099589121505, LR: 0.0003 +[2026-03-01 18:04:03] (step=0025853) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.058305615339464, LR: 0.0003 +[2026-03-01 18:04:11] (step=0025854) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.058501271766778, LR: 0.0003 +[2026-03-01 18:04:19] (step=0025855) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.058696928194091, LR: 0.0003 +[2026-03-01 18:04:27] (step=0025856) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.058892584621405, LR: 0.0003 +[2026-03-01 18:04:35] (step=0025857) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.059088241048719, LR: 0.0003 +[2026-03-01 18:04:43] (step=0025858) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.059283897476032, LR: 0.0003 +[2026-03-01 18:04:51] (step=0025859) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 5.059479553903346, LR: 0.0003 +[2026-03-01 18:04:58] (step=0025860) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.059675210330659, LR: 0.0003 +[2026-03-01 18:05:06] (step=0025861) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 5.059870866757973, LR: 0.0003 +[2026-03-01 18:05:14] (step=0025862) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.060066523185287, LR: 0.0003 +[2026-03-01 18:05:22] (step=0025863) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 5.0602621796126, LR: 0.0003 +[2026-03-01 18:05:30] (step=0025864) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 5.060457836039914, LR: 0.0003 +[2026-03-01 18:05:38] (step=0025865) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 5.060653492467227, LR: 0.0003 +[2026-03-01 18:05:46] (step=0025866) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.060849148894541, LR: 0.0003 +[2026-03-01 18:05:53] (step=0025867) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.061044805321854, LR: 0.0003 +[2026-03-01 18:06:01] (step=0025868) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 5.061240461749168, LR: 0.0003 +[2026-03-01 18:06:09] (step=0025869) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.061436118176482, LR: 0.0003 +[2026-03-01 18:06:17] (step=0025870) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.0616317746037955, LR: 0.0003 +[2026-03-01 18:06:25] (step=0025871) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 5.0618274310311095, LR: 0.0003 +[2026-03-01 18:06:33] (step=0025872) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.062023087458423, LR: 0.0003 +[2026-03-01 18:06:40] (step=0025873) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 5.062218743885737, LR: 0.0003 +[2026-03-01 18:06:48] (step=0025874) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 5.062414400313051, LR: 0.0003 +[2026-03-01 18:06:56] (step=0025875) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.062610056740364, LR: 0.0003 +[2026-03-01 18:07:04] (step=0025876) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 5.062805713167678, LR: 0.0003 +[2026-03-01 18:07:12] (step=0025877) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 5.063001369594991, LR: 0.0003 +[2026-03-01 18:07:20] (step=0025878) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 5.063197026022305, LR: 0.0003 +[2026-03-01 18:07:28] (step=0025879) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.063392682449618, LR: 0.0003 +[2026-03-01 18:07:35] (step=0025880) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.063588338876932, LR: 0.0003 +[2026-03-01 18:07:43] (step=0025881) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.063783995304246, LR: 0.0003 +[2026-03-01 18:07:51] (step=0025882) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.063979651731559, LR: 0.0003 +[2026-03-01 18:07:59] (step=0025883) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 5.064175308158873, LR: 0.0003 +[2026-03-01 18:08:07] (step=0025884) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 5.064370964586186, LR: 0.0003 +[2026-03-01 18:08:15] (step=0025885) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.0645666210135, LR: 0.0003 +[2026-03-01 18:08:23] (step=0025886) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.064762277440814, LR: 0.0003 +[2026-03-01 18:08:31] (step=0025887) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.064957933868127, LR: 0.0003 +[2026-03-01 18:08:38] (step=0025888) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 5.065153590295441, LR: 0.0003 +[2026-03-01 18:08:46] (step=0025889) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.0653492467227546, LR: 0.0003 +[2026-03-01 18:08:54] (step=0025890) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.065544903150069, LR: 0.0003 +[2026-03-01 18:09:02] (step=0025891) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.065740559577382, LR: 0.0003 +[2026-03-01 18:09:10] (step=0025892) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.065936216004696, LR: 0.0003 +[2026-03-01 18:09:18] (step=0025893) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.06613187243201, LR: 0.0003 +[2026-03-01 18:09:26] (step=0025894) Train Loss: 0.4544, Train Steps/Sec: 0.12, Epoch: 5.066327528859323, LR: 0.0003 +[2026-03-01 18:09:34] (step=0025895) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.066523185286637, LR: 0.0003 +[2026-03-01 18:09:41] (step=0025896) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.06671884171395, LR: 0.0003 +[2026-03-01 18:09:49] (step=0025897) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 5.066914498141264, LR: 0.0003 +[2026-03-01 18:09:57] (step=0025898) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.067110154568578, LR: 0.0003 +[2026-03-01 18:10:05] (step=0025899) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.067305810995891, LR: 0.0003 +[2026-03-01 18:10:13] (step=0025900) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.067501467423205, LR: 0.0003 +[2026-03-01 18:10:21] (step=0025901) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.067697123850518, LR: 0.0003 +[2026-03-01 18:10:28] (step=0025902) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.067892780277832, LR: 0.0003 +[2026-03-01 18:10:36] (step=0025903) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 5.068088436705146, LR: 0.0003 +[2026-03-01 18:10:44] (step=0025904) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 5.068284093132459, LR: 0.0003 +[2026-03-01 18:10:52] (step=0025905) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.068479749559773, LR: 0.0003 +[2026-03-01 18:11:00] (step=0025906) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.068675405987086, LR: 0.0003 +[2026-03-01 18:11:08] (step=0025907) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.0688710624144, LR: 0.0003 +[2026-03-01 18:11:16] (step=0025908) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 5.069066718841714, LR: 0.0003 +[2026-03-01 18:11:23] (step=0025909) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.069262375269028, LR: 0.0003 +[2026-03-01 18:11:31] (step=0025910) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.069458031696342, LR: 0.0003 +[2026-03-01 18:11:39] (step=0025911) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.069653688123655, LR: 0.0003 +[2026-03-01 18:11:47] (step=0025912) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 5.069849344550969, LR: 0.0003 +[2026-03-01 18:11:55] (step=0025913) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.070045000978282, LR: 0.0003 +[2026-03-01 18:12:03] (step=0025914) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.070240657405596, LR: 0.0003 +[2026-03-01 18:12:11] (step=0025915) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.07043631383291, LR: 0.0003 +[2026-03-01 18:12:18] (step=0025916) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.070631970260223, LR: 0.0003 +[2026-03-01 18:12:26] (step=0025917) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.070827626687537, LR: 0.0003 +[2026-03-01 18:12:34] (step=0025918) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.07102328311485, LR: 0.0003 +[2026-03-01 18:12:42] (step=0025919) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.071218939542164, LR: 0.0003 +[2026-03-01 18:12:50] (step=0025920) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 5.071414595969477, LR: 0.0003 +[2026-03-01 18:12:58] (step=0025921) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.071610252396791, LR: 0.0003 +[2026-03-01 18:13:06] (step=0025922) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 5.071805908824105, LR: 0.0003 +[2026-03-01 18:13:13] (step=0025923) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.072001565251418, LR: 0.0003 +[2026-03-01 18:13:21] (step=0025924) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.072197221678732, LR: 0.0003 +[2026-03-01 18:13:29] (step=0025925) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.072392878106045, LR: 0.0003 +[2026-03-01 18:13:37] (step=0025926) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 5.0725885345333595, LR: 0.0003 +[2026-03-01 18:13:45] (step=0025927) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 5.0727841909606735, LR: 0.0003 +[2026-03-01 18:13:53] (step=0025928) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.072979847387987, LR: 0.0003 +[2026-03-01 18:14:00] (step=0025929) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.073175503815301, LR: 0.0003 +[2026-03-01 18:14:08] (step=0025930) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 5.073371160242614, LR: 0.0003 +[2026-03-01 18:14:16] (step=0025931) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 5.073566816669928, LR: 0.0003 +[2026-03-01 18:14:24] (step=0025932) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.073762473097241, LR: 0.0003 +[2026-03-01 18:14:32] (step=0025933) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.073958129524555, LR: 0.0003 +[2026-03-01 18:14:40] (step=0025934) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 5.074153785951869, LR: 0.0003 +[2026-03-01 18:14:48] (step=0025935) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.074349442379182, LR: 0.0003 +[2026-03-01 18:14:56] (step=0025936) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 5.074545098806496, LR: 0.0003 +[2026-03-01 18:15:03] (step=0025937) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.074740755233809, LR: 0.0003 +[2026-03-01 18:15:11] (step=0025938) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.074936411661123, LR: 0.0003 +[2026-03-01 18:15:19] (step=0025939) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 5.075132068088437, LR: 0.0003 +[2026-03-01 18:15:27] (step=0025940) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 5.07532772451575, LR: 0.0003 +[2026-03-01 18:15:35] (step=0025941) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.075523380943064, LR: 0.0003 +[2026-03-01 18:15:43] (step=0025942) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.075719037370377, LR: 0.0003 +[2026-03-01 18:15:51] (step=0025943) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.075914693797691, LR: 0.0003 +[2026-03-01 18:15:58] (step=0025944) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.0761103502250045, LR: 0.0003 +[2026-03-01 18:16:06] (step=0025945) Train Loss: 0.4383, Train Steps/Sec: 0.12, Epoch: 5.0763060066523185, LR: 0.0003 +[2026-03-01 18:16:14] (step=0025946) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.0765016630796325, LR: 0.0003 +[2026-03-01 18:16:22] (step=0025947) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.076697319506946, LR: 0.0003 +[2026-03-01 18:16:30] (step=0025948) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.07689297593426, LR: 0.0003 +[2026-03-01 18:16:38] (step=0025949) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.077088632361573, LR: 0.0003 +[2026-03-01 18:16:46] (step=0025950) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.077284288788887, LR: 0.0003 +[2026-03-01 18:16:54] (step=0025951) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 5.077479945216201, LR: 0.0003 +[2026-03-01 18:17:01] (step=0025952) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.077675601643514, LR: 0.0003 +[2026-03-01 18:17:09] (step=0025953) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 5.077871258070828, LR: 0.0003 +[2026-03-01 18:17:17] (step=0025954) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.078066914498141, LR: 0.0003 +[2026-03-01 18:17:25] (step=0025955) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.078262570925455, LR: 0.0003 +[2026-03-01 18:17:33] (step=0025956) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.078458227352769, LR: 0.0003 +[2026-03-01 18:17:41] (step=0025957) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.078653883780082, LR: 0.0003 +[2026-03-01 18:17:49] (step=0025958) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.078849540207396, LR: 0.0003 +[2026-03-01 18:17:56] (step=0025959) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.079045196634709, LR: 0.0003 +[2026-03-01 18:18:04] (step=0025960) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.079240853062023, LR: 0.0003 +[2026-03-01 18:18:12] (step=0025961) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 5.079436509489336, LR: 0.0003 +[2026-03-01 18:18:20] (step=0025962) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.07963216591665, LR: 0.0003 +[2026-03-01 18:18:28] (step=0025963) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.079827822343964, LR: 0.0003 +[2026-03-01 18:18:36] (step=0025964) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.0800234787712775, LR: 0.0003 +[2026-03-01 18:18:43] (step=0025965) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.0802191351985915, LR: 0.0003 +[2026-03-01 18:18:51] (step=0025966) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.080414791625905, LR: 0.0003 +[2026-03-01 18:18:59] (step=0025967) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 5.080610448053219, LR: 0.0003 +[2026-03-01 18:19:07] (step=0025968) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.080806104480533, LR: 0.0003 +[2026-03-01 18:19:15] (step=0025969) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.081001760907846, LR: 0.0003 +[2026-03-01 18:19:23] (step=0025970) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 5.08119741733516, LR: 0.0003 +[2026-03-01 18:19:31] (step=0025971) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.081393073762473, LR: 0.0003 +[2026-03-01 18:19:38] (step=0025972) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.081588730189787, LR: 0.0003 +[2026-03-01 18:19:46] (step=0025973) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.0817843866171, LR: 0.0003 +[2026-03-01 18:19:54] (step=0025974) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.081980043044414, LR: 0.0003 +[2026-03-01 18:20:02] (step=0025975) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 5.082175699471728, LR: 0.0003 +[2026-03-01 18:20:10] (step=0025976) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.082371355899041, LR: 0.0003 +[2026-03-01 18:20:18] (step=0025977) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.082567012326355, LR: 0.0003 +[2026-03-01 18:20:26] (step=0025978) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.082762668753668, LR: 0.0003 +[2026-03-01 18:20:33] (step=0025979) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.082958325180982, LR: 0.0003 +[2026-03-01 18:20:41] (step=0025980) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.083153981608296, LR: 0.0003 +[2026-03-01 18:20:49] (step=0025981) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.083349638035609, LR: 0.0003 +[2026-03-01 18:20:57] (step=0025982) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 5.083545294462923, LR: 0.0003 +[2026-03-01 18:21:05] (step=0025983) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.0837409508902365, LR: 0.0003 +[2026-03-01 18:21:13] (step=0025984) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.0839366073175505, LR: 0.0003 +[2026-03-01 18:21:21] (step=0025985) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.084132263744864, LR: 0.0003 +[2026-03-01 18:21:29] (step=0025986) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.084327920172178, LR: 0.0003 +[2026-03-01 18:21:36] (step=0025987) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 5.084523576599492, LR: 0.0003 +[2026-03-01 18:21:44] (step=0025988) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 5.084719233026805, LR: 0.0003 +[2026-03-01 18:21:52] (step=0025989) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.084914889454119, LR: 0.0003 +[2026-03-01 18:22:00] (step=0025990) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.085110545881432, LR: 0.0003 +[2026-03-01 18:22:08] (step=0025991) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.085306202308746, LR: 0.0003 +[2026-03-01 18:22:16] (step=0025992) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.08550185873606, LR: 0.0003 +[2026-03-01 18:22:24] (step=0025993) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.085697515163373, LR: 0.0003 +[2026-03-01 18:22:31] (step=0025994) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.085893171590687, LR: 0.0003 +[2026-03-01 18:22:39] (step=0025995) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.086088828018, LR: 0.0003 +[2026-03-01 18:22:47] (step=0025996) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.086284484445314, LR: 0.0003 +[2026-03-01 18:22:55] (step=0025997) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 5.086480140872627, LR: 0.0003 +[2026-03-01 18:23:03] (step=0025998) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.086675797299941, LR: 0.0003 +[2026-03-01 18:23:11] (step=0025999) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.086871453727255, LR: 0.0003 +[2026-03-01 18:23:19] (step=0026000) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.087067110154568, LR: 0.0003 +[2026-03-01 18:23:19] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0026000/ +[2026-03-01 18:23:26] (step=0026001) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 5.087262766581882, LR: 0.0003 +[2026-03-01 18:23:34] (step=0026002) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.0874584230091955, LR: 0.0003 +[2026-03-01 18:23:42] (step=0026003) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 5.0876540794365095, LR: 0.0003 +[2026-03-01 18:23:50] (step=0026004) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.0878497358638235, LR: 0.0003 +[2026-03-01 18:23:58] (step=0026005) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 5.088045392291137, LR: 0.0003 +[2026-03-01 18:24:06] (step=0026006) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.088241048718451, LR: 0.0003 +[2026-03-01 18:24:14] (step=0026007) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.088436705145764, LR: 0.0003 +[2026-03-01 18:24:21] (step=0026008) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.088632361573078, LR: 0.0003 +[2026-03-01 18:24:29] (step=0026009) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.088828018000392, LR: 0.0003 +[2026-03-01 18:24:37] (step=0026010) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.089023674427705, LR: 0.0003 +[2026-03-01 18:24:45] (step=0026011) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.089219330855019, LR: 0.0003 +[2026-03-01 18:24:53] (step=0026012) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 5.089414987282332, LR: 0.0003 +[2026-03-01 18:25:01] (step=0026013) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.089610643709646, LR: 0.0003 +[2026-03-01 18:25:08] (step=0026014) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.089806300136959, LR: 0.0003 +[2026-03-01 18:25:16] (step=0026015) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 5.090001956564273, LR: 0.0003 +[2026-03-01 18:25:24] (step=0026016) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.090197612991587, LR: 0.0003 +[2026-03-01 18:25:32] (step=0026017) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.0903932694189, LR: 0.0003 +[2026-03-01 18:25:40] (step=0026018) Train Loss: 0.4703, Train Steps/Sec: 0.13, Epoch: 5.090588925846214, LR: 0.0003 +[2026-03-01 18:25:48] (step=0026019) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 5.090784582273527, LR: 0.0003 +[2026-03-01 18:25:56] (step=0026020) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.090980238700841, LR: 0.0003 +[2026-03-01 18:26:03] (step=0026021) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.091175895128155, LR: 0.0003 +[2026-03-01 18:26:11] (step=0026022) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.0913715515554685, LR: 0.0003 +[2026-03-01 18:26:19] (step=0026023) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.0915672079827825, LR: 0.0003 +[2026-03-01 18:26:27] (step=0026024) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.091762864410096, LR: 0.0003 +[2026-03-01 18:26:35] (step=0026025) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.09195852083741, LR: 0.0003 +[2026-03-01 18:26:43] (step=0026026) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 5.092154177264723, LR: 0.0003 +[2026-03-01 18:26:51] (step=0026027) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.092349833692037, LR: 0.0003 +[2026-03-01 18:26:58] (step=0026028) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.092545490119351, LR: 0.0003 +[2026-03-01 18:27:06] (step=0026029) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.092741146546664, LR: 0.0003 +[2026-03-01 18:27:14] (step=0026030) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.092936802973978, LR: 0.0003 +[2026-03-01 18:27:22] (step=0026031) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.093132459401291, LR: 0.0003 +[2026-03-01 18:27:30] (step=0026032) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.093328115828605, LR: 0.0003 +[2026-03-01 18:27:38] (step=0026033) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 5.093523772255919, LR: 0.0003 +[2026-03-01 18:27:45] (step=0026034) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.093719428683232, LR: 0.0003 +[2026-03-01 18:27:53] (step=0026035) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.093915085110546, LR: 0.0003 +[2026-03-01 18:28:01] (step=0026036) Train Loss: 0.4499, Train Steps/Sec: 0.12, Epoch: 5.094110741537859, LR: 0.0003 +[2026-03-01 18:28:09] (step=0026037) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.094306397965173, LR: 0.0003 +[2026-03-01 18:28:17] (step=0026038) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 5.094502054392486, LR: 0.0003 +[2026-03-01 18:28:25] (step=0026039) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.0946977108198, LR: 0.0003 +[2026-03-01 18:28:33] (step=0026040) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.094893367247114, LR: 0.0003 +[2026-03-01 18:28:41] (step=0026041) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.0950890236744275, LR: 0.0003 +[2026-03-01 18:28:49] (step=0026042) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 5.0952846801017415, LR: 0.0003 +[2026-03-01 18:28:56] (step=0026043) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.095480336529055, LR: 0.0003 +[2026-03-01 18:29:04] (step=0026044) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.095675992956369, LR: 0.0003 +[2026-03-01 18:29:12] (step=0026045) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 5.095871649383683, LR: 0.0003 +[2026-03-01 18:29:20] (step=0026046) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.096067305810996, LR: 0.0003 +[2026-03-01 18:29:28] (step=0026047) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.09626296223831, LR: 0.0003 +[2026-03-01 18:29:36] (step=0026048) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.096458618665623, LR: 0.0003 +[2026-03-01 18:29:44] (step=0026049) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.096654275092937, LR: 0.0003 +[2026-03-01 18:29:51] (step=0026050) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.09684993152025, LR: 0.0003 +[2026-03-01 18:29:59] (step=0026051) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.097045587947564, LR: 0.0003 +[2026-03-01 18:30:07] (step=0026052) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 5.097241244374878, LR: 0.0003 +[2026-03-01 18:30:15] (step=0026053) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 5.097436900802191, LR: 0.0003 +[2026-03-01 18:30:23] (step=0026054) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.097632557229505, LR: 0.0003 +[2026-03-01 18:30:31] (step=0026055) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.097828213656818, LR: 0.0003 +[2026-03-01 18:30:39] (step=0026056) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.098023870084132, LR: 0.0003 +[2026-03-01 18:30:46] (step=0026057) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.098219526511446, LR: 0.0003 +[2026-03-01 18:30:54] (step=0026058) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.098415182938759, LR: 0.0003 +[2026-03-01 18:31:02] (step=0026059) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.098610839366073, LR: 0.0003 +[2026-03-01 18:31:10] (step=0026060) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.0988064957933865, LR: 0.0003 +[2026-03-01 18:31:18] (step=0026061) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.0990021522207005, LR: 0.0003 +[2026-03-01 18:31:26] (step=0026062) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.0991978086480145, LR: 0.0003 +[2026-03-01 18:31:33] (step=0026063) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.099393465075328, LR: 0.0003 +[2026-03-01 18:31:41] (step=0026064) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.099589121502642, LR: 0.0003 +[2026-03-01 18:31:49] (step=0026065) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.099784777929955, LR: 0.0003 +[2026-03-01 18:31:57] (step=0026066) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 5.099980434357269, LR: 0.0003 +[2026-03-01 18:32:05] (step=0026067) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 5.100176090784582, LR: 0.0003 +[2026-03-01 18:32:13] (step=0026068) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.100371747211896, LR: 0.0003 +[2026-03-01 18:32:21] (step=0026069) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.10056740363921, LR: 0.0003 +[2026-03-01 18:32:28] (step=0026070) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.100763060066523, LR: 0.0003 +[2026-03-01 18:32:36] (step=0026071) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.100958716493837, LR: 0.0003 +[2026-03-01 18:32:44] (step=0026072) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 5.10115437292115, LR: 0.0003 +[2026-03-01 18:32:52] (step=0026073) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 5.101350029348464, LR: 0.0003 +[2026-03-01 18:33:00] (step=0026074) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 5.101545685775778, LR: 0.0003 +[2026-03-01 18:33:08] (step=0026075) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.101741342203091, LR: 0.0003 +[2026-03-01 18:33:16] (step=0026076) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.101936998630405, LR: 0.0003 +[2026-03-01 18:33:23] (step=0026077) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 5.102132655057718, LR: 0.0003 +[2026-03-01 18:33:31] (step=0026078) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 5.102328311485032, LR: 0.0003 +[2026-03-01 18:33:39] (step=0026079) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.1025239679123455, LR: 0.0003 +[2026-03-01 18:33:47] (step=0026080) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.1027196243396595, LR: 0.0003 +[2026-03-01 18:33:55] (step=0026081) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.1029152807669735, LR: 0.0003 +[2026-03-01 18:34:03] (step=0026082) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.103110937194287, LR: 0.0003 +[2026-03-01 18:34:11] (step=0026083) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.103306593621601, LR: 0.0003 +[2026-03-01 18:34:18] (step=0026084) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.103502250048914, LR: 0.0003 +[2026-03-01 18:34:26] (step=0026085) Train Loss: 0.4544, Train Steps/Sec: 0.12, Epoch: 5.103697906476228, LR: 0.0003 +[2026-03-01 18:34:34] (step=0026086) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.103893562903542, LR: 0.0003 +[2026-03-01 18:34:42] (step=0026087) Train Loss: 0.4378, Train Steps/Sec: 0.12, Epoch: 5.104089219330855, LR: 0.0003 +[2026-03-01 18:34:50] (step=0026088) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 5.104284875758169, LR: 0.0003 +[2026-03-01 18:34:58] (step=0026089) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.104480532185482, LR: 0.0003 +[2026-03-01 18:35:06] (step=0026090) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.104676188612796, LR: 0.0003 +[2026-03-01 18:35:14] (step=0026091) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.104871845040109, LR: 0.0003 +[2026-03-01 18:35:22] (step=0026092) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.105067501467423, LR: 0.0003 +[2026-03-01 18:35:29] (step=0026093) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.105263157894737, LR: 0.0003 +[2026-03-01 18:35:37] (step=0026094) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.10545881432205, LR: 0.0003 +[2026-03-01 18:35:45] (step=0026095) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.105654470749364, LR: 0.0003 +[2026-03-01 18:35:53] (step=0026096) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.105850127176677, LR: 0.0003 +[2026-03-01 18:36:01] (step=0026097) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.106045783603991, LR: 0.0003 +[2026-03-01 18:36:09] (step=0026098) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.106241440031305, LR: 0.0003 +[2026-03-01 18:36:17] (step=0026099) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 5.1064370964586185, LR: 0.0003 +[2026-03-01 18:36:24] (step=0026100) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 5.1066327528859325, LR: 0.0003 +[2026-03-01 18:36:32] (step=0026101) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 5.106828409313246, LR: 0.0003 +[2026-03-01 18:36:40] (step=0026102) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.10702406574056, LR: 0.0003 +[2026-03-01 18:36:48] (step=0026103) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.107219722167873, LR: 0.0003 +[2026-03-01 18:36:56] (step=0026104) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.107415378595187, LR: 0.0003 +[2026-03-01 18:37:04] (step=0026105) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.107611035022501, LR: 0.0003 +[2026-03-01 18:37:12] (step=0026106) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.107806691449814, LR: 0.0003 +[2026-03-01 18:37:19] (step=0026107) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.108002347877128, LR: 0.0003 +[2026-03-01 18:37:27] (step=0026108) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.108198004304441, LR: 0.0003 +[2026-03-01 18:37:35] (step=0026109) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.108393660731755, LR: 0.0003 +[2026-03-01 18:37:43] (step=0026110) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 5.108589317159069, LR: 0.0003 +[2026-03-01 18:37:51] (step=0026111) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.108784973586382, LR: 0.0003 +[2026-03-01 18:37:59] (step=0026112) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 5.108980630013696, LR: 0.0003 +[2026-03-01 18:38:06] (step=0026113) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 5.109176286441009, LR: 0.0003 +[2026-03-01 18:38:14] (step=0026114) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.109371942868323, LR: 0.0003 +[2026-03-01 18:38:22] (step=0026115) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.109567599295636, LR: 0.0003 +[2026-03-01 18:38:30] (step=0026116) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.10976325572295, LR: 0.0003 +[2026-03-01 18:38:38] (step=0026117) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.109958912150264, LR: 0.0003 +[2026-03-01 18:38:46] (step=0026118) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.1101545685775775, LR: 0.0003 +[2026-03-01 18:38:54] (step=0026119) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.1103502250048916, LR: 0.0003 +[2026-03-01 18:39:01] (step=0026120) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.110545881432205, LR: 0.0003 +[2026-03-01 18:39:09] (step=0026121) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 5.110741537859519, LR: 0.0003 +[2026-03-01 18:39:17] (step=0026122) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.110937194286833, LR: 0.0003 +[2026-03-01 18:39:25] (step=0026123) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.111132850714146, LR: 0.0003 +[2026-03-01 18:39:33] (step=0026124) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.11132850714146, LR: 0.0003 +[2026-03-01 18:39:41] (step=0026125) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.111524163568773, LR: 0.0003 +[2026-03-01 18:39:49] (step=0026126) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.111719819996087, LR: 0.0003 +[2026-03-01 18:39:56] (step=0026127) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 5.111915476423401, LR: 0.0003 +[2026-03-01 18:40:04] (step=0026128) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.112111132850714, LR: 0.0003 +[2026-03-01 18:40:12] (step=0026129) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.112306789278028, LR: 0.0003 +[2026-03-01 18:40:20] (step=0026130) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.112502445705341, LR: 0.0003 +[2026-03-01 18:40:28] (step=0026131) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.112698102132655, LR: 0.0003 +[2026-03-01 18:40:36] (step=0026132) Train Loss: 0.4301, Train Steps/Sec: 0.12, Epoch: 5.112893758559968, LR: 0.0003 +[2026-03-01 18:40:44] (step=0026133) Train Loss: 0.4361, Train Steps/Sec: 0.12, Epoch: 5.113089414987282, LR: 0.0003 +[2026-03-01 18:40:52] (step=0026134) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.113285071414596, LR: 0.0003 +[2026-03-01 18:41:00] (step=0026135) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.113480727841909, LR: 0.0003 +[2026-03-01 18:41:07] (step=0026136) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 5.113676384269223, LR: 0.0003 +[2026-03-01 18:41:15] (step=0026137) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 5.113872040696537, LR: 0.0003 +[2026-03-01 18:41:23] (step=0026138) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.114067697123851, LR: 0.0003 +[2026-03-01 18:41:31] (step=0026139) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.114263353551165, LR: 0.0003 +[2026-03-01 18:41:39] (step=0026140) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 5.114459009978478, LR: 0.0003 +[2026-03-01 18:41:47] (step=0026141) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.114654666405792, LR: 0.0003 +[2026-03-01 18:41:54] (step=0026142) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 5.114850322833105, LR: 0.0003 +[2026-03-01 18:42:02] (step=0026143) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.115045979260419, LR: 0.0003 +[2026-03-01 18:42:10] (step=0026144) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.115241635687732, LR: 0.0003 +[2026-03-01 18:42:18] (step=0026145) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 5.115437292115046, LR: 0.0003 +[2026-03-01 18:42:26] (step=0026146) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.11563294854236, LR: 0.0003 +[2026-03-01 18:42:34] (step=0026147) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.115828604969673, LR: 0.0003 +[2026-03-01 18:42:42] (step=0026148) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.116024261396987, LR: 0.0003 +[2026-03-01 18:42:49] (step=0026149) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.1162199178243, LR: 0.0003 +[2026-03-01 18:42:57] (step=0026150) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 5.116415574251614, LR: 0.0003 +[2026-03-01 18:43:05] (step=0026151) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.116611230678928, LR: 0.0003 +[2026-03-01 18:43:13] (step=0026152) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.116806887106241, LR: 0.0003 +[2026-03-01 18:43:21] (step=0026153) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.117002543533555, LR: 0.0003 +[2026-03-01 18:43:29] (step=0026154) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.117198199960868, LR: 0.0003 +[2026-03-01 18:43:37] (step=0026155) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.1173938563881824, LR: 0.0003 +[2026-03-01 18:43:44] (step=0026156) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.117589512815496, LR: 0.0003 +[2026-03-01 18:43:52] (step=0026157) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 5.11778516924281, LR: 0.0003 +[2026-03-01 18:44:00] (step=0026158) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.117980825670124, LR: 0.0003 +[2026-03-01 18:44:08] (step=0026159) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 5.118176482097437, LR: 0.0003 +[2026-03-01 18:44:16] (step=0026160) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.118372138524751, LR: 0.0003 +[2026-03-01 18:44:24] (step=0026161) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.118567794952064, LR: 0.0003 +[2026-03-01 18:44:31] (step=0026162) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 5.118763451379378, LR: 0.0003 +[2026-03-01 18:44:39] (step=0026163) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 5.118959107806692, LR: 0.0003 +[2026-03-01 18:44:47] (step=0026164) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 5.119154764234005, LR: 0.0003 +[2026-03-01 18:44:55] (step=0026165) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.119350420661319, LR: 0.0003 +[2026-03-01 18:45:03] (step=0026166) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.119546077088632, LR: 0.0003 +[2026-03-01 18:45:11] (step=0026167) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.119741733515946, LR: 0.0003 +[2026-03-01 18:45:19] (step=0026168) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.119937389943259, LR: 0.0003 +[2026-03-01 18:45:26] (step=0026169) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.120133046370573, LR: 0.0003 +[2026-03-01 18:45:34] (step=0026170) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 5.120328702797887, LR: 0.0003 +[2026-03-01 18:45:42] (step=0026171) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.1205243592252, LR: 0.0003 +[2026-03-01 18:45:50] (step=0026172) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.120720015652514, LR: 0.0003 +[2026-03-01 18:45:58] (step=0026173) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.1209156720798275, LR: 0.0003 +[2026-03-01 18:46:06] (step=0026174) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.1211113285071415, LR: 0.0003 +[2026-03-01 18:46:14] (step=0026175) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 5.1213069849344555, LR: 0.0003 +[2026-03-01 18:46:21] (step=0026176) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.121502641361769, LR: 0.0003 +[2026-03-01 18:46:29] (step=0026177) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.121698297789083, LR: 0.0003 +[2026-03-01 18:46:37] (step=0026178) Train Loss: 0.4452, Train Steps/Sec: 0.12, Epoch: 5.121893954216396, LR: 0.0003 +[2026-03-01 18:46:45] (step=0026179) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.12208961064371, LR: 0.0003 +[2026-03-01 18:46:53] (step=0026180) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.122285267071024, LR: 0.0003 +[2026-03-01 18:47:01] (step=0026181) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.122480923498337, LR: 0.0003 +[2026-03-01 18:47:09] (step=0026182) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 5.122676579925651, LR: 0.0003 +[2026-03-01 18:47:17] (step=0026183) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 5.122872236352964, LR: 0.0003 +[2026-03-01 18:47:24] (step=0026184) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.123067892780278, LR: 0.0003 +[2026-03-01 18:47:32] (step=0026185) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.123263549207591, LR: 0.0003 +[2026-03-01 18:47:40] (step=0026186) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.123459205634905, LR: 0.0003 +[2026-03-01 18:47:48] (step=0026187) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.123654862062219, LR: 0.0003 +[2026-03-01 18:47:56] (step=0026188) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.123850518489532, LR: 0.0003 +[2026-03-01 18:48:04] (step=0026189) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 5.124046174916846, LR: 0.0003 +[2026-03-01 18:48:12] (step=0026190) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 5.124241831344159, LR: 0.0003 +[2026-03-01 18:48:20] (step=0026191) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.124437487771473, LR: 0.0003 +[2026-03-01 18:48:27] (step=0026192) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.124633144198787, LR: 0.0003 +[2026-03-01 18:48:35] (step=0026193) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.1248288006261005, LR: 0.0003 +[2026-03-01 18:48:43] (step=0026194) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.1250244570534145, LR: 0.0003 +[2026-03-01 18:48:51] (step=0026195) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.125220113480728, LR: 0.0003 +[2026-03-01 18:48:59] (step=0026196) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 5.125415769908042, LR: 0.0003 +[2026-03-01 18:49:07] (step=0026197) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 5.125611426335355, LR: 0.0003 +[2026-03-01 18:49:15] (step=0026198) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.125807082762669, LR: 0.0003 +[2026-03-01 18:49:22] (step=0026199) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.126002739189983, LR: 0.0003 +[2026-03-01 18:49:30] (step=0026200) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 5.126198395617296, LR: 0.0003 +[2026-03-01 18:49:38] (step=0026201) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.12639405204461, LR: 0.0003 +[2026-03-01 18:49:46] (step=0026202) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 5.126589708471923, LR: 0.0003 +[2026-03-01 18:49:54] (step=0026203) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 5.126785364899237, LR: 0.0003 +[2026-03-01 18:50:02] (step=0026204) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.126981021326551, LR: 0.0003 +[2026-03-01 18:50:10] (step=0026205) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.127176677753864, LR: 0.0003 +[2026-03-01 18:50:17] (step=0026206) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 5.127372334181178, LR: 0.0003 +[2026-03-01 18:50:25] (step=0026207) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.127567990608491, LR: 0.0003 +[2026-03-01 18:50:33] (step=0026208) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 5.127763647035805, LR: 0.0003 +[2026-03-01 18:50:41] (step=0026209) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.127959303463118, LR: 0.0003 +[2026-03-01 18:50:49] (step=0026210) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.128154959890432, LR: 0.0003 +[2026-03-01 18:50:57] (step=0026211) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 5.128350616317746, LR: 0.0003 +[2026-03-01 18:51:04] (step=0026212) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.1285462727450595, LR: 0.0003 +[2026-03-01 18:51:12] (step=0026213) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 5.1287419291723735, LR: 0.0003 +[2026-03-01 18:51:20] (step=0026214) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 5.128937585599687, LR: 0.0003 +[2026-03-01 18:51:28] (step=0026215) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.129133242027001, LR: 0.0003 +[2026-03-01 18:51:36] (step=0026216) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 5.129328898454315, LR: 0.0003 +[2026-03-01 18:51:44] (step=0026217) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.129524554881628, LR: 0.0003 +[2026-03-01 18:51:52] (step=0026218) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.129720211308942, LR: 0.0003 +[2026-03-01 18:51:59] (step=0026219) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.129915867736255, LR: 0.0003 +[2026-03-01 18:52:07] (step=0026220) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.130111524163569, LR: 0.0003 +[2026-03-01 18:52:15] (step=0026221) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.130307180590882, LR: 0.0003 +[2026-03-01 18:52:23] (step=0026222) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.130502837018196, LR: 0.0003 +[2026-03-01 18:52:31] (step=0026223) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.13069849344551, LR: 0.0003 +[2026-03-01 18:52:39] (step=0026224) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.130894149872823, LR: 0.0003 +[2026-03-01 18:52:46] (step=0026225) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.131089806300137, LR: 0.0003 +[2026-03-01 18:52:54] (step=0026226) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.13128546272745, LR: 0.0003 +[2026-03-01 18:53:02] (step=0026227) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.131481119154764, LR: 0.0003 +[2026-03-01 18:53:10] (step=0026228) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 5.131676775582078, LR: 0.0003 +[2026-03-01 18:53:18] (step=0026229) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.131872432009391, LR: 0.0003 +[2026-03-01 18:53:26] (step=0026230) Train Loss: 0.4468, Train Steps/Sec: 0.12, Epoch: 5.132068088436705, LR: 0.0003 +[2026-03-01 18:53:34] (step=0026231) Train Loss: 0.4495, Train Steps/Sec: 0.12, Epoch: 5.1322637448640185, LR: 0.0003 +[2026-03-01 18:53:42] (step=0026232) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.1324594012913325, LR: 0.0003 +[2026-03-01 18:53:50] (step=0026233) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 5.1326550577186465, LR: 0.0003 +[2026-03-01 18:53:57] (step=0026234) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.13285071414596, LR: 0.0003 +[2026-03-01 18:54:05] (step=0026235) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.133046370573274, LR: 0.0003 +[2026-03-01 18:54:13] (step=0026236) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.133242027000587, LR: 0.0003 +[2026-03-01 18:54:21] (step=0026237) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.133437683427901, LR: 0.0003 +[2026-03-01 18:54:29] (step=0026238) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 5.133633339855214, LR: 0.0003 +[2026-03-01 18:54:37] (step=0026239) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.133828996282528, LR: 0.0003 +[2026-03-01 18:54:45] (step=0026240) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.134024652709842, LR: 0.0003 +[2026-03-01 18:54:52] (step=0026241) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.134220309137155, LR: 0.0003 +[2026-03-01 18:55:00] (step=0026242) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.134415965564469, LR: 0.0003 +[2026-03-01 18:55:08] (step=0026243) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.134611621991782, LR: 0.0003 +[2026-03-01 18:55:16] (step=0026244) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.134807278419096, LR: 0.0003 +[2026-03-01 18:55:24] (step=0026245) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.13500293484641, LR: 0.0003 +[2026-03-01 18:55:32] (step=0026246) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.135198591273723, LR: 0.0003 +[2026-03-01 18:55:40] (step=0026247) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.135394247701037, LR: 0.0003 +[2026-03-01 18:55:47] (step=0026248) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.13558990412835, LR: 0.0003 +[2026-03-01 18:55:55] (step=0026249) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.135785560555664, LR: 0.0003 +[2026-03-01 18:56:03] (step=0026250) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 5.1359812169829775, LR: 0.0003 +[2026-03-01 18:56:11] (step=0026251) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.1361768734102915, LR: 0.0003 +[2026-03-01 18:56:19] (step=0026252) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 5.1363725298376055, LR: 0.0003 +[2026-03-01 18:56:27] (step=0026253) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 5.136568186264919, LR: 0.0003 +[2026-03-01 18:56:35] (step=0026254) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.136763842692233, LR: 0.0003 +[2026-03-01 18:56:42] (step=0026255) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.136959499119546, LR: 0.0003 +[2026-03-01 18:56:50] (step=0026256) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.13715515554686, LR: 0.0003 +[2026-03-01 18:56:58] (step=0026257) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.137350811974174, LR: 0.0003 +[2026-03-01 18:57:06] (step=0026258) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.137546468401487, LR: 0.0003 +[2026-03-01 18:57:14] (step=0026259) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.137742124828801, LR: 0.0003 +[2026-03-01 18:57:22] (step=0026260) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.137937781256114, LR: 0.0003 +[2026-03-01 18:57:30] (step=0026261) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.138133437683428, LR: 0.0003 +[2026-03-01 18:57:37] (step=0026262) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 5.138329094110741, LR: 0.0003 +[2026-03-01 18:57:45] (step=0026263) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.138524750538055, LR: 0.0003 +[2026-03-01 18:57:53] (step=0026264) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 5.138720406965369, LR: 0.0003 +[2026-03-01 18:58:01] (step=0026265) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.138916063392682, LR: 0.0003 +[2026-03-01 18:58:09] (step=0026266) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 5.139111719819996, LR: 0.0003 +[2026-03-01 18:58:17] (step=0026267) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.139307376247309, LR: 0.0003 +[2026-03-01 18:58:25] (step=0026268) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 5.139503032674623, LR: 0.0003 +[2026-03-01 18:58:32] (step=0026269) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.139698689101937, LR: 0.0003 +[2026-03-01 18:58:40] (step=0026270) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.1398943455292505, LR: 0.0003 +[2026-03-01 18:58:48] (step=0026271) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.1400900019565645, LR: 0.0003 +[2026-03-01 18:58:56] (step=0026272) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.140285658383878, LR: 0.0003 +[2026-03-01 18:59:04] (step=0026273) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.140481314811192, LR: 0.0003 +[2026-03-01 18:59:12] (step=0026274) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.140676971238505, LR: 0.0003 +[2026-03-01 18:59:19] (step=0026275) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.140872627665819, LR: 0.0003 +[2026-03-01 18:59:27] (step=0026276) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.141068284093133, LR: 0.0003 +[2026-03-01 18:59:35] (step=0026277) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.141263940520446, LR: 0.0003 +[2026-03-01 18:59:43] (step=0026278) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 5.14145959694776, LR: 0.0003 +[2026-03-01 18:59:51] (step=0026279) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.141655253375073, LR: 0.0003 +[2026-03-01 18:59:59] (step=0026280) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.141850909802387, LR: 0.0003 +[2026-03-01 19:00:07] (step=0026281) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.142046566229701, LR: 0.0003 +[2026-03-01 19:00:15] (step=0026282) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.142242222657014, LR: 0.0003 +[2026-03-01 19:00:23] (step=0026283) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.142437879084328, LR: 0.0003 +[2026-03-01 19:00:30] (step=0026284) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.142633535511641, LR: 0.0003 +[2026-03-01 19:00:38] (step=0026285) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.142829191938955, LR: 0.0003 +[2026-03-01 19:00:46] (step=0026286) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.143024848366269, LR: 0.0003 +[2026-03-01 19:00:54] (step=0026287) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.143220504793582, LR: 0.0003 +[2026-03-01 19:01:02] (step=0026288) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 5.143416161220896, LR: 0.0003 +[2026-03-01 19:01:10] (step=0026289) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 5.1436118176482095, LR: 0.0003 +[2026-03-01 19:01:17] (step=0026290) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.1438074740755235, LR: 0.0003 +[2026-03-01 19:01:25] (step=0026291) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.144003130502837, LR: 0.0003 +[2026-03-01 19:01:33] (step=0026292) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.144198786930151, LR: 0.0003 +[2026-03-01 19:01:41] (step=0026293) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.144394443357465, LR: 0.0003 +[2026-03-01 19:01:49] (step=0026294) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 5.144590099784778, LR: 0.0003 +[2026-03-01 19:01:57] (step=0026295) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.144785756212092, LR: 0.0003 +[2026-03-01 19:02:05] (step=0026296) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.144981412639405, LR: 0.0003 +[2026-03-01 19:02:12] (step=0026297) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.145177069066719, LR: 0.0003 +[2026-03-01 19:02:20] (step=0026298) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.145372725494033, LR: 0.0003 +[2026-03-01 19:02:28] (step=0026299) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.145568381921346, LR: 0.0003 +[2026-03-01 19:02:36] (step=0026300) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 5.14576403834866, LR: 0.0003 +[2026-03-01 19:02:44] (step=0026301) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.145959694775973, LR: 0.0003 +[2026-03-01 19:02:52] (step=0026302) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.146155351203287, LR: 0.0003 +[2026-03-01 19:03:00] (step=0026303) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.1463510076306, LR: 0.0003 +[2026-03-01 19:03:07] (step=0026304) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.146546664057914, LR: 0.0003 +[2026-03-01 19:03:15] (step=0026305) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.146742320485228, LR: 0.0003 +[2026-03-01 19:03:23] (step=0026306) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.146937976912541, LR: 0.0003 +[2026-03-01 19:03:31] (step=0026307) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.147133633339855, LR: 0.0003 +[2026-03-01 19:03:39] (step=0026308) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.1473292897671685, LR: 0.0003 +[2026-03-01 19:03:47] (step=0026309) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.1475249461944825, LR: 0.0003 +[2026-03-01 19:03:55] (step=0026310) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 5.1477206026217965, LR: 0.0003 +[2026-03-01 19:04:02] (step=0026311) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.14791625904911, LR: 0.0003 +[2026-03-01 19:04:10] (step=0026312) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.148111915476424, LR: 0.0003 +[2026-03-01 19:04:18] (step=0026313) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.148307571903737, LR: 0.0003 +[2026-03-01 19:04:26] (step=0026314) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.148503228331051, LR: 0.0003 +[2026-03-01 19:04:34] (step=0026315) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.148698884758364, LR: 0.0003 +[2026-03-01 19:04:42] (step=0026316) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.148894541185678, LR: 0.0003 +[2026-03-01 19:04:49] (step=0026317) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.149090197612992, LR: 0.0003 +[2026-03-01 19:04:57] (step=0026318) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.149285854040305, LR: 0.0003 +[2026-03-01 19:05:05] (step=0026319) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 5.149481510467619, LR: 0.0003 +[2026-03-01 19:05:13] (step=0026320) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.149677166894932, LR: 0.0003 +[2026-03-01 19:05:21] (step=0026321) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.149872823322246, LR: 0.0003 +[2026-03-01 19:05:29] (step=0026322) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.15006847974956, LR: 0.0003 +[2026-03-01 19:05:37] (step=0026323) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.150264136176873, LR: 0.0003 +[2026-03-01 19:05:44] (step=0026324) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.150459792604187, LR: 0.0003 +[2026-03-01 19:05:52] (step=0026325) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 5.1506554490315, LR: 0.0003 +[2026-03-01 19:06:00] (step=0026326) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 5.150851105458814, LR: 0.0003 +[2026-03-01 19:06:08] (step=0026327) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.1510467618861275, LR: 0.0003 +[2026-03-01 19:06:16] (step=0026328) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 5.1512424183134415, LR: 0.0003 +[2026-03-01 19:06:24] (step=0026329) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 5.1514380747407555, LR: 0.0003 +[2026-03-01 19:06:32] (step=0026330) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.151633731168069, LR: 0.0003 +[2026-03-01 19:06:39] (step=0026331) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.151829387595383, LR: 0.0003 +[2026-03-01 19:06:47] (step=0026332) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.152025044022696, LR: 0.0003 +[2026-03-01 19:06:55] (step=0026333) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.15222070045001, LR: 0.0003 +[2026-03-01 19:07:03] (step=0026334) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.152416356877324, LR: 0.0003 +[2026-03-01 19:07:11] (step=0026335) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.152612013304637, LR: 0.0003 +[2026-03-01 19:07:19] (step=0026336) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.152807669731951, LR: 0.0003 +[2026-03-01 19:07:27] (step=0026337) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 5.153003326159264, LR: 0.0003 +[2026-03-01 19:07:35] (step=0026338) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.153198982586578, LR: 0.0003 +[2026-03-01 19:07:42] (step=0026339) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.153394639013891, LR: 0.0003 +[2026-03-01 19:07:50] (step=0026340) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.153590295441205, LR: 0.0003 +[2026-03-01 19:07:58] (step=0026341) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.153785951868519, LR: 0.0003 +[2026-03-01 19:08:06] (step=0026342) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.153981608295832, LR: 0.0003 +[2026-03-01 19:08:14] (step=0026343) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 5.154177264723146, LR: 0.0003 +[2026-03-01 19:08:22] (step=0026344) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 5.154372921150459, LR: 0.0003 +[2026-03-01 19:08:30] (step=0026345) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.154568577577773, LR: 0.0003 +[2026-03-01 19:08:37] (step=0026346) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 5.154764234005087, LR: 0.0003 +[2026-03-01 19:08:45] (step=0026347) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 5.1549598904324005, LR: 0.0003 +[2026-03-01 19:08:53] (step=0026348) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.1551555468597146, LR: 0.0003 +[2026-03-01 19:09:01] (step=0026349) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 5.155351203287028, LR: 0.0003 +[2026-03-01 19:09:09] (step=0026350) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.155546859714342, LR: 0.0003 +[2026-03-01 19:09:17] (step=0026351) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.155742516141656, LR: 0.0003 +[2026-03-01 19:09:24] (step=0026352) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.155938172568969, LR: 0.0003 +[2026-03-01 19:09:32] (step=0026353) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.156133828996283, LR: 0.0003 +[2026-03-01 19:09:40] (step=0026354) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.156329485423596, LR: 0.0003 +[2026-03-01 19:09:48] (step=0026355) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.15652514185091, LR: 0.0003 +[2026-03-01 19:09:56] (step=0026356) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.156720798278223, LR: 0.0003 +[2026-03-01 19:10:04] (step=0026357) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.156916454705537, LR: 0.0003 +[2026-03-01 19:10:12] (step=0026358) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.157112111132851, LR: 0.0003 +[2026-03-01 19:10:19] (step=0026359) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 5.157307767560164, LR: 0.0003 +[2026-03-01 19:10:27] (step=0026360) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.157503423987478, LR: 0.0003 +[2026-03-01 19:10:35] (step=0026361) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.157699080414791, LR: 0.0003 +[2026-03-01 19:10:43] (step=0026362) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.157894736842105, LR: 0.0003 +[2026-03-01 19:10:51] (step=0026363) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.158090393269419, LR: 0.0003 +[2026-03-01 19:10:59] (step=0026364) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.158286049696732, LR: 0.0003 +[2026-03-01 19:11:07] (step=0026365) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.158481706124046, LR: 0.0003 +[2026-03-01 19:11:14] (step=0026366) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.15867736255136, LR: 0.0003 +[2026-03-01 19:11:22] (step=0026367) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.158873018978674, LR: 0.0003 +[2026-03-01 19:11:30] (step=0026368) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.159068675405987, LR: 0.0003 +[2026-03-01 19:11:38] (step=0026369) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.159264331833301, LR: 0.0003 +[2026-03-01 19:11:46] (step=0026370) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.159459988260615, LR: 0.0003 +[2026-03-01 19:11:54] (step=0026371) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 5.159655644687928, LR: 0.0003 +[2026-03-01 19:12:02] (step=0026372) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.159851301115242, LR: 0.0003 +[2026-03-01 19:12:09] (step=0026373) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.160046957542555, LR: 0.0003 +[2026-03-01 19:12:17] (step=0026374) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.160242613969869, LR: 0.0003 +[2026-03-01 19:12:25] (step=0026375) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.160438270397183, LR: 0.0003 +[2026-03-01 19:12:33] (step=0026376) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 5.160633926824496, LR: 0.0003 +[2026-03-01 19:12:41] (step=0026377) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.16082958325181, LR: 0.0003 +[2026-03-01 19:12:49] (step=0026378) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.161025239679123, LR: 0.0003 +[2026-03-01 19:12:57] (step=0026379) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 5.161220896106437, LR: 0.0003 +[2026-03-01 19:13:05] (step=0026380) Train Loss: 0.4423, Train Steps/Sec: 0.12, Epoch: 5.16141655253375, LR: 0.0003 +[2026-03-01 19:13:12] (step=0026381) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 5.161612208961064, LR: 0.0003 +[2026-03-01 19:13:20] (step=0026382) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.161807865388378, LR: 0.0003 +[2026-03-01 19:13:28] (step=0026383) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.162003521815691, LR: 0.0003 +[2026-03-01 19:13:36] (step=0026384) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 5.1621991782430054, LR: 0.0003 +[2026-03-01 19:13:44] (step=0026385) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.162394834670319, LR: 0.0003 +[2026-03-01 19:13:52] (step=0026386) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 5.162590491097633, LR: 0.0003 +[2026-03-01 19:14:00] (step=0026387) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 5.162786147524947, LR: 0.0003 +[2026-03-01 19:14:07] (step=0026388) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 5.16298180395226, LR: 0.0003 +[2026-03-01 19:14:15] (step=0026389) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.163177460379574, LR: 0.0003 +[2026-03-01 19:14:23] (step=0026390) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 5.163373116806887, LR: 0.0003 +[2026-03-01 19:14:31] (step=0026391) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.163568773234201, LR: 0.0003 +[2026-03-01 19:14:39] (step=0026392) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.163764429661514, LR: 0.0003 +[2026-03-01 19:14:47] (step=0026393) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.163960086088828, LR: 0.0003 +[2026-03-01 19:14:55] (step=0026394) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.164155742516142, LR: 0.0003 +[2026-03-01 19:15:02] (step=0026395) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 5.164351398943455, LR: 0.0003 +[2026-03-01 19:15:10] (step=0026396) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.164547055370769, LR: 0.0003 +[2026-03-01 19:15:18] (step=0026397) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 5.164742711798082, LR: 0.0003 +[2026-03-01 19:15:26] (step=0026398) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.164938368225396, LR: 0.0003 +[2026-03-01 19:15:34] (step=0026399) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.16513402465271, LR: 0.0003 +[2026-03-01 19:15:42] (step=0026400) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.165329681080023, LR: 0.0003 +[2026-03-01 19:15:49] (step=0026401) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 5.165525337507337, LR: 0.0003 +[2026-03-01 19:15:57] (step=0026402) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.1657209939346505, LR: 0.0003 +[2026-03-01 19:16:05] (step=0026403) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.1659166503619645, LR: 0.0003 +[2026-03-01 19:16:13] (step=0026404) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 5.1661123067892785, LR: 0.0003 +[2026-03-01 19:16:21] (step=0026405) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 5.166307963216592, LR: 0.0003 +[2026-03-01 19:16:29] (step=0026406) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.166503619643906, LR: 0.0003 +[2026-03-01 19:16:37] (step=0026407) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.166699276071219, LR: 0.0003 +[2026-03-01 19:16:44] (step=0026408) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.166894932498533, LR: 0.0003 +[2026-03-01 19:16:52] (step=0026409) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.167090588925846, LR: 0.0003 +[2026-03-01 19:17:00] (step=0026410) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.16728624535316, LR: 0.0003 +[2026-03-01 19:17:08] (step=0026411) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.167481901780474, LR: 0.0003 +[2026-03-01 19:17:16] (step=0026412) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.167677558207787, LR: 0.0003 +[2026-03-01 19:17:24] (step=0026413) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.167873214635101, LR: 0.0003 +[2026-03-01 19:17:32] (step=0026414) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.168068871062414, LR: 0.0003 +[2026-03-01 19:17:39] (step=0026415) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 5.168264527489728, LR: 0.0003 +[2026-03-01 19:17:47] (step=0026416) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.168460183917042, LR: 0.0003 +[2026-03-01 19:17:55] (step=0026417) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 5.168655840344355, LR: 0.0003 +[2026-03-01 19:18:03] (step=0026418) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.168851496771669, LR: 0.0003 +[2026-03-01 19:18:11] (step=0026419) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 5.169047153198982, LR: 0.0003 +[2026-03-01 19:18:19] (step=0026420) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.169242809626296, LR: 0.0003 +[2026-03-01 19:18:27] (step=0026421) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.1694384660536095, LR: 0.0003 +[2026-03-01 19:18:34] (step=0026422) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.1696341224809235, LR: 0.0003 +[2026-03-01 19:18:42] (step=0026423) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.1698297789082375, LR: 0.0003 +[2026-03-01 19:18:50] (step=0026424) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.170025435335551, LR: 0.0003 +[2026-03-01 19:18:58] (step=0026425) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.170221091762865, LR: 0.0003 +[2026-03-01 19:19:06] (step=0026426) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.170416748190178, LR: 0.0003 +[2026-03-01 19:19:14] (step=0026427) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 5.170612404617492, LR: 0.0003 +[2026-03-01 19:19:22] (step=0026428) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.170808061044806, LR: 0.0003 +[2026-03-01 19:19:30] (step=0026429) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.171003717472119, LR: 0.0003 +[2026-03-01 19:19:37] (step=0026430) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 5.171199373899433, LR: 0.0003 +[2026-03-01 19:19:45] (step=0026431) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 5.171395030326746, LR: 0.0003 +[2026-03-01 19:19:53] (step=0026432) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.17159068675406, LR: 0.0003 +[2026-03-01 19:20:01] (step=0026433) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 5.171786343181373, LR: 0.0003 +[2026-03-01 19:20:09] (step=0026434) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.171981999608687, LR: 0.0003 +[2026-03-01 19:20:17] (step=0026435) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.172177656036001, LR: 0.0003 +[2026-03-01 19:20:24] (step=0026436) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.172373312463314, LR: 0.0003 +[2026-03-01 19:20:32] (step=0026437) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.172568968890628, LR: 0.0003 +[2026-03-01 19:20:40] (step=0026438) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.172764625317941, LR: 0.0003 +[2026-03-01 19:20:48] (step=0026439) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.172960281745255, LR: 0.0003 +[2026-03-01 19:20:56] (step=0026440) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.173155938172569, LR: 0.0003 +[2026-03-01 19:21:04] (step=0026441) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 5.1733515945998825, LR: 0.0003 +[2026-03-01 19:21:12] (step=0026442) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.1735472510271965, LR: 0.0003 +[2026-03-01 19:21:19] (step=0026443) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.17374290745451, LR: 0.0003 +[2026-03-01 19:21:27] (step=0026444) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.173938563881824, LR: 0.0003 +[2026-03-01 19:21:35] (step=0026445) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.174134220309137, LR: 0.0003 +[2026-03-01 19:21:43] (step=0026446) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.174329876736451, LR: 0.0003 +[2026-03-01 19:21:51] (step=0026447) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 5.174525533163765, LR: 0.0003 +[2026-03-01 19:21:59] (step=0026448) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.174721189591078, LR: 0.0003 +[2026-03-01 19:22:06] (step=0026449) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 5.174916846018392, LR: 0.0003 +[2026-03-01 19:22:14] (step=0026450) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.175112502445705, LR: 0.0003 +[2026-03-01 19:22:22] (step=0026451) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.175308158873019, LR: 0.0003 +[2026-03-01 19:22:30] (step=0026452) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.175503815300333, LR: 0.0003 +[2026-03-01 19:22:38] (step=0026453) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.175699471727646, LR: 0.0003 +[2026-03-01 19:22:46] (step=0026454) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.17589512815496, LR: 0.0003 +[2026-03-01 19:22:54] (step=0026455) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.176090784582273, LR: 0.0003 +[2026-03-01 19:23:01] (step=0026456) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.176286441009587, LR: 0.0003 +[2026-03-01 19:23:09] (step=0026457) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.176482097436901, LR: 0.0003 +[2026-03-01 19:23:17] (step=0026458) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.176677753864214, LR: 0.0003 +[2026-03-01 19:23:25] (step=0026459) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.176873410291528, LR: 0.0003 +[2026-03-01 19:23:33] (step=0026460) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.1770690667188415, LR: 0.0003 +[2026-03-01 19:23:41] (step=0026461) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 5.1772647231461555, LR: 0.0003 +[2026-03-01 19:23:49] (step=0026462) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.177460379573469, LR: 0.0003 +[2026-03-01 19:23:56] (step=0026463) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 5.177656036000783, LR: 0.0003 +[2026-03-01 19:24:04] (step=0026464) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.177851692428097, LR: 0.0003 +[2026-03-01 19:24:12] (step=0026465) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.17804734885541, LR: 0.0003 +[2026-03-01 19:24:20] (step=0026466) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.178243005282724, LR: 0.0003 +[2026-03-01 19:24:28] (step=0026467) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.178438661710037, LR: 0.0003 +[2026-03-01 19:24:36] (step=0026468) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.178634318137351, LR: 0.0003 +[2026-03-01 19:24:43] (step=0026469) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.178829974564665, LR: 0.0003 +[2026-03-01 19:24:51] (step=0026470) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 5.179025630991978, LR: 0.0003 +[2026-03-01 19:24:59] (step=0026471) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 5.179221287419292, LR: 0.0003 +[2026-03-01 19:25:07] (step=0026472) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.179416943846605, LR: 0.0003 +[2026-03-01 19:25:15] (step=0026473) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.179612600273919, LR: 0.0003 +[2026-03-01 19:25:23] (step=0026474) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.179808256701232, LR: 0.0003 +[2026-03-01 19:25:31] (step=0026475) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.180003913128546, LR: 0.0003 +[2026-03-01 19:25:38] (step=0026476) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.18019956955586, LR: 0.0003 +[2026-03-01 19:25:46] (step=0026477) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.180395225983173, LR: 0.0003 +[2026-03-01 19:25:54] (step=0026478) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.180590882410487, LR: 0.0003 +[2026-03-01 19:26:02] (step=0026479) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 5.1807865388378005, LR: 0.0003 +[2026-03-01 19:26:10] (step=0026480) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.1809821952651145, LR: 0.0003 +[2026-03-01 19:26:18] (step=0026481) Train Loss: 0.4463, Train Steps/Sec: 0.12, Epoch: 5.1811778516924285, LR: 0.0003 +[2026-03-01 19:26:26] (step=0026482) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.181373508119742, LR: 0.0003 +[2026-03-01 19:26:34] (step=0026483) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.181569164547056, LR: 0.0003 +[2026-03-01 19:26:42] (step=0026484) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.181764820974369, LR: 0.0003 +[2026-03-01 19:26:49] (step=0026485) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 5.181960477401683, LR: 0.0003 +[2026-03-01 19:26:57] (step=0026486) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 5.182156133828996, LR: 0.0003 +[2026-03-01 19:27:05] (step=0026487) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.18235179025631, LR: 0.0003 +[2026-03-01 19:27:13] (step=0026488) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 5.182547446683624, LR: 0.0003 +[2026-03-01 19:27:21] (step=0026489) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.182743103110937, LR: 0.0003 +[2026-03-01 19:27:29] (step=0026490) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.182938759538251, LR: 0.0003 +[2026-03-01 19:27:37] (step=0026491) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 5.183134415965564, LR: 0.0003 +[2026-03-01 19:27:44] (step=0026492) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.183330072392878, LR: 0.0003 +[2026-03-01 19:27:52] (step=0026493) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 5.183525728820192, LR: 0.0003 +[2026-03-01 19:28:00] (step=0026494) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 5.183721385247505, LR: 0.0003 +[2026-03-01 19:28:08] (step=0026495) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.183917041674819, LR: 0.0003 +[2026-03-01 19:28:16] (step=0026496) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 5.184112698102132, LR: 0.0003 +[2026-03-01 19:28:24] (step=0026497) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.184308354529446, LR: 0.0003 +[2026-03-01 19:28:31] (step=0026498) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 5.1845040109567595, LR: 0.0003 +[2026-03-01 19:28:39] (step=0026499) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.1846996673840735, LR: 0.0003 +[2026-03-01 19:28:47] (step=0026500) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 5.1848953238113875, LR: 0.0003 +[2026-03-01 19:28:47] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0026500/ +[2026-03-01 19:28:55] (step=0026501) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.185090980238701, LR: 0.0003 +[2026-03-01 19:29:03] (step=0026502) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.185286636666015, LR: 0.0003 +[2026-03-01 19:29:11] (step=0026503) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 5.185482293093328, LR: 0.0003 +[2026-03-01 19:29:19] (step=0026504) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.185677949520642, LR: 0.0003 +[2026-03-01 19:29:26] (step=0026505) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.185873605947956, LR: 0.0003 +[2026-03-01 19:29:34] (step=0026506) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 5.186069262375269, LR: 0.0003 +[2026-03-01 19:29:42] (step=0026507) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.186264918802583, LR: 0.0003 +[2026-03-01 19:29:50] (step=0026508) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.186460575229896, LR: 0.0003 +[2026-03-01 19:29:58] (step=0026509) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 5.18665623165721, LR: 0.0003 +[2026-03-01 19:30:06] (step=0026510) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.186851888084524, LR: 0.0003 +[2026-03-01 19:30:14] (step=0026511) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 5.187047544511837, LR: 0.0003 +[2026-03-01 19:30:21] (step=0026512) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.187243200939151, LR: 0.0003 +[2026-03-01 19:30:29] (step=0026513) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.187438857366464, LR: 0.0003 +[2026-03-01 19:30:37] (step=0026514) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 5.187634513793778, LR: 0.0003 +[2026-03-01 19:30:45] (step=0026515) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.187830170221091, LR: 0.0003 +[2026-03-01 19:30:53] (step=0026516) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.188025826648405, LR: 0.0003 +[2026-03-01 19:31:01] (step=0026517) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.188221483075719, LR: 0.0003 +[2026-03-01 19:31:09] (step=0026518) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 5.1884171395030325, LR: 0.0003 +[2026-03-01 19:31:16] (step=0026519) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 5.1886127959303465, LR: 0.0003 +[2026-03-01 19:31:24] (step=0026520) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.18880845235766, LR: 0.0003 +[2026-03-01 19:31:32] (step=0026521) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.189004108784974, LR: 0.0003 +[2026-03-01 19:31:40] (step=0026522) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.189199765212288, LR: 0.0003 +[2026-03-01 19:31:48] (step=0026523) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.189395421639601, LR: 0.0003 +[2026-03-01 19:31:56] (step=0026524) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 5.189591078066915, LR: 0.0003 +[2026-03-01 19:32:04] (step=0026525) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.189786734494228, LR: 0.0003 +[2026-03-01 19:32:11] (step=0026526) Train Loss: 0.4276, Train Steps/Sec: 0.13, Epoch: 5.189982390921542, LR: 0.0003 +[2026-03-01 19:32:19] (step=0026527) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.190178047348855, LR: 0.0003 +[2026-03-01 19:32:27] (step=0026528) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.190373703776169, LR: 0.0003 +[2026-03-01 19:32:35] (step=0026529) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.190569360203483, LR: 0.0003 +[2026-03-01 19:32:43] (step=0026530) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.190765016630796, LR: 0.0003 +[2026-03-01 19:32:51] (step=0026531) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.19096067305811, LR: 0.0003 +[2026-03-01 19:32:59] (step=0026532) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.191156329485423, LR: 0.0003 +[2026-03-01 19:33:07] (step=0026533) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 5.191351985912737, LR: 0.0003 +[2026-03-01 19:33:14] (step=0026534) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 5.191547642340051, LR: 0.0003 +[2026-03-01 19:33:22] (step=0026535) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.191743298767364, LR: 0.0003 +[2026-03-01 19:33:30] (step=0026536) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.191938955194678, LR: 0.0003 +[2026-03-01 19:33:38] (step=0026537) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.1921346116219915, LR: 0.0003 +[2026-03-01 19:33:46] (step=0026538) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.1923302680493055, LR: 0.0003 +[2026-03-01 19:33:54] (step=0026539) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.192525924476619, LR: 0.0003 +[2026-03-01 19:34:02] (step=0026540) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.192721580903933, LR: 0.0003 +[2026-03-01 19:34:09] (step=0026541) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 5.192917237331247, LR: 0.0003 +[2026-03-01 19:34:17] (step=0026542) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 5.19311289375856, LR: 0.0003 +[2026-03-01 19:34:25] (step=0026543) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.193308550185874, LR: 0.0003 +[2026-03-01 19:34:33] (step=0026544) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 5.193504206613187, LR: 0.0003 +[2026-03-01 19:34:41] (step=0026545) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 5.193699863040501, LR: 0.0003 +[2026-03-01 19:34:49] (step=0026546) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.193895519467815, LR: 0.0003 +[2026-03-01 19:34:57] (step=0026547) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.194091175895128, LR: 0.0003 +[2026-03-01 19:35:04] (step=0026548) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.194286832322442, LR: 0.0003 +[2026-03-01 19:35:12] (step=0026549) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 5.194482488749755, LR: 0.0003 +[2026-03-01 19:35:20] (step=0026550) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.194678145177069, LR: 0.0003 +[2026-03-01 19:35:28] (step=0026551) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.194873801604382, LR: 0.0003 +[2026-03-01 19:35:36] (step=0026552) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.195069458031696, LR: 0.0003 +[2026-03-01 19:35:44] (step=0026553) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 5.19526511445901, LR: 0.0003 +[2026-03-01 19:35:52] (step=0026554) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.195460770886323, LR: 0.0003 +[2026-03-01 19:35:59] (step=0026555) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.195656427313637, LR: 0.0003 +[2026-03-01 19:36:07] (step=0026556) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.1958520837409505, LR: 0.0003 +[2026-03-01 19:36:15] (step=0026557) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.1960477401682645, LR: 0.0003 +[2026-03-01 19:36:23] (step=0026558) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.1962433965955785, LR: 0.0003 +[2026-03-01 19:36:31] (step=0026559) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.196439053022892, LR: 0.0003 +[2026-03-01 19:36:39] (step=0026560) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 5.196634709450206, LR: 0.0003 +[2026-03-01 19:36:47] (step=0026561) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.196830365877519, LR: 0.0003 +[2026-03-01 19:36:54] (step=0026562) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 5.197026022304833, LR: 0.0003 +[2026-03-01 19:37:02] (step=0026563) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.197221678732146, LR: 0.0003 +[2026-03-01 19:37:10] (step=0026564) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.19741733515946, LR: 0.0003 +[2026-03-01 19:37:18] (step=0026565) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.197612991586774, LR: 0.0003 +[2026-03-01 19:37:26] (step=0026566) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.197808648014087, LR: 0.0003 +[2026-03-01 19:37:34] (step=0026567) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.198004304441401, LR: 0.0003 +[2026-03-01 19:37:42] (step=0026568) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.198199960868714, LR: 0.0003 +[2026-03-01 19:37:49] (step=0026569) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.198395617296028, LR: 0.0003 +[2026-03-01 19:37:57] (step=0026570) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.198591273723342, LR: 0.0003 +[2026-03-01 19:38:05] (step=0026571) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 5.198786930150655, LR: 0.0003 +[2026-03-01 19:38:13] (step=0026572) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.198982586577969, LR: 0.0003 +[2026-03-01 19:38:21] (step=0026573) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.199178243005282, LR: 0.0003 +[2026-03-01 19:38:29] (step=0026574) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.199373899432596, LR: 0.0003 +[2026-03-01 19:38:36] (step=0026575) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.19956955585991, LR: 0.0003 +[2026-03-01 19:38:44] (step=0026576) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 5.1997652122872235, LR: 0.0003 +[2026-03-01 19:38:52] (step=0026577) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.1999608687145376, LR: 0.0003 +[2026-03-01 19:39:00] (step=0026578) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.200156525141851, LR: 0.0003 +[2026-03-01 19:39:08] (step=0026579) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.200352181569165, LR: 0.0003 +[2026-03-01 19:39:16] (step=0026580) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.200547837996478, LR: 0.0003 +[2026-03-01 19:39:24] (step=0026581) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 5.200743494423792, LR: 0.0003 +[2026-03-01 19:39:32] (step=0026582) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.200939150851106, LR: 0.0003 +[2026-03-01 19:39:40] (step=0026583) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.201134807278419, LR: 0.0003 +[2026-03-01 19:39:47] (step=0026584) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.201330463705733, LR: 0.0003 +[2026-03-01 19:39:55] (step=0026585) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.201526120133046, LR: 0.0003 +[2026-03-01 19:40:03] (step=0026586) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.20172177656036, LR: 0.0003 +[2026-03-01 19:40:11] (step=0026587) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.201917432987674, LR: 0.0003 +[2026-03-01 19:40:19] (step=0026588) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.202113089414987, LR: 0.0003 +[2026-03-01 19:40:27] (step=0026589) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.202308745842301, LR: 0.0003 +[2026-03-01 19:40:35] (step=0026590) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.202504402269614, LR: 0.0003 +[2026-03-01 19:40:42] (step=0026591) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.202700058696928, LR: 0.0003 +[2026-03-01 19:40:50] (step=0026592) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 5.202895715124241, LR: 0.0003 +[2026-03-01 19:40:58] (step=0026593) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.203091371551555, LR: 0.0003 +[2026-03-01 19:41:06] (step=0026594) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.203287027978869, LR: 0.0003 +[2026-03-01 19:41:14] (step=0026595) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.203482684406183, LR: 0.0003 +[2026-03-01 19:41:22] (step=0026596) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.203678340833497, LR: 0.0003 +[2026-03-01 19:41:30] (step=0026597) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.20387399726081, LR: 0.0003 +[2026-03-01 19:41:37] (step=0026598) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.204069653688124, LR: 0.0003 +[2026-03-01 19:41:45] (step=0026599) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.204265310115438, LR: 0.0003 +[2026-03-01 19:41:53] (step=0026600) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.204460966542751, LR: 0.0003 +[2026-03-01 19:42:01] (step=0026601) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 5.204656622970065, LR: 0.0003 +[2026-03-01 19:42:09] (step=0026602) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.204852279397378, LR: 0.0003 +[2026-03-01 19:42:17] (step=0026603) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.205047935824692, LR: 0.0003 +[2026-03-01 19:42:25] (step=0026604) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.205243592252005, LR: 0.0003 +[2026-03-01 19:42:32] (step=0026605) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.205439248679319, LR: 0.0003 +[2026-03-01 19:42:40] (step=0026606) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.205634905106633, LR: 0.0003 +[2026-03-01 19:42:48] (step=0026607) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.205830561533946, LR: 0.0003 +[2026-03-01 19:42:56] (step=0026608) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.20602621796126, LR: 0.0003 +[2026-03-01 19:43:04] (step=0026609) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.206221874388573, LR: 0.0003 +[2026-03-01 19:43:12] (step=0026610) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.206417530815887, LR: 0.0003 +[2026-03-01 19:43:20] (step=0026611) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 5.206613187243201, LR: 0.0003 +[2026-03-01 19:43:27] (step=0026612) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.206808843670514, LR: 0.0003 +[2026-03-01 19:43:35] (step=0026613) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.2070045000978284, LR: 0.0003 +[2026-03-01 19:43:43] (step=0026614) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.207200156525142, LR: 0.0003 +[2026-03-01 19:43:51] (step=0026615) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 5.207395812952456, LR: 0.0003 +[2026-03-01 19:43:59] (step=0026616) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.207591469379769, LR: 0.0003 +[2026-03-01 19:44:07] (step=0026617) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 5.207787125807083, LR: 0.0003 +[2026-03-01 19:44:15] (step=0026618) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.207982782234397, LR: 0.0003 +[2026-03-01 19:44:22] (step=0026619) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.20817843866171, LR: 0.0003 +[2026-03-01 19:44:30] (step=0026620) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.208374095089024, LR: 0.0003 +[2026-03-01 19:44:38] (step=0026621) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.208569751516337, LR: 0.0003 +[2026-03-01 19:44:46] (step=0026622) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.208765407943651, LR: 0.0003 +[2026-03-01 19:44:54] (step=0026623) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.208961064370965, LR: 0.0003 +[2026-03-01 19:45:02] (step=0026624) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.209156720798278, LR: 0.0003 +[2026-03-01 19:45:10] (step=0026625) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.209352377225592, LR: 0.0003 +[2026-03-01 19:45:18] (step=0026626) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.209548033652905, LR: 0.0003 +[2026-03-01 19:45:25] (step=0026627) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 5.209743690080219, LR: 0.0003 +[2026-03-01 19:45:33] (step=0026628) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.209939346507533, LR: 0.0003 +[2026-03-01 19:45:41] (step=0026629) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.210135002934846, LR: 0.0003 +[2026-03-01 19:45:49] (step=0026630) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.21033065936216, LR: 0.0003 +[2026-03-01 19:45:57] (step=0026631) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.2105263157894735, LR: 0.0003 +[2026-03-01 19:46:05] (step=0026632) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.2107219722167875, LR: 0.0003 +[2026-03-01 19:46:13] (step=0026633) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.210917628644101, LR: 0.0003 +[2026-03-01 19:46:20] (step=0026634) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.211113285071415, LR: 0.0003 +[2026-03-01 19:46:28] (step=0026635) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.211308941498729, LR: 0.0003 +[2026-03-01 19:46:36] (step=0026636) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 5.211504597926042, LR: 0.0003 +[2026-03-01 19:46:44] (step=0026637) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.211700254353356, LR: 0.0003 +[2026-03-01 19:46:52] (step=0026638) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.211895910780669, LR: 0.0003 +[2026-03-01 19:47:00] (step=0026639) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 5.212091567207983, LR: 0.0003 +[2026-03-01 19:47:08] (step=0026640) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 5.212287223635297, LR: 0.0003 +[2026-03-01 19:47:15] (step=0026641) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.21248288006261, LR: 0.0003 +[2026-03-01 19:47:23] (step=0026642) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.212678536489924, LR: 0.0003 +[2026-03-01 19:47:31] (step=0026643) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 5.212874192917237, LR: 0.0003 +[2026-03-01 19:47:39] (step=0026644) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.213069849344551, LR: 0.0003 +[2026-03-01 19:47:47] (step=0026645) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 5.213265505771864, LR: 0.0003 +[2026-03-01 19:47:55] (step=0026646) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.213461162199178, LR: 0.0003 +[2026-03-01 19:48:02] (step=0026647) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 5.213656818626492, LR: 0.0003 +[2026-03-01 19:48:10] (step=0026648) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.213852475053805, LR: 0.0003 +[2026-03-01 19:48:18] (step=0026649) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.214048131481119, LR: 0.0003 +[2026-03-01 19:48:26] (step=0026650) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.2142437879084325, LR: 0.0003 +[2026-03-01 19:48:34] (step=0026651) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.2144394443357465, LR: 0.0003 +[2026-03-01 19:48:42] (step=0026652) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.2146351007630605, LR: 0.0003 +[2026-03-01 19:48:50] (step=0026653) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.214830757190374, LR: 0.0003 +[2026-03-01 19:48:57] (step=0026654) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.215026413617688, LR: 0.0003 +[2026-03-01 19:49:05] (step=0026655) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.215222070045001, LR: 0.0003 +[2026-03-01 19:49:13] (step=0026656) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.215417726472315, LR: 0.0003 +[2026-03-01 19:49:21] (step=0026657) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.215613382899628, LR: 0.0003 +[2026-03-01 19:49:29] (step=0026658) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.215809039326942, LR: 0.0003 +[2026-03-01 19:49:37] (step=0026659) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 5.216004695754256, LR: 0.0003 +[2026-03-01 19:49:45] (step=0026660) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 5.216200352181569, LR: 0.0003 +[2026-03-01 19:49:52] (step=0026661) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 5.216396008608883, LR: 0.0003 +[2026-03-01 19:50:00] (step=0026662) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.216591665036196, LR: 0.0003 +[2026-03-01 19:50:08] (step=0026663) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.21678732146351, LR: 0.0003 +[2026-03-01 19:50:16] (step=0026664) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.216982977890824, LR: 0.0003 +[2026-03-01 19:50:24] (step=0026665) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.217178634318137, LR: 0.0003 +[2026-03-01 19:50:32] (step=0026666) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.217374290745451, LR: 0.0003 +[2026-03-01 19:50:40] (step=0026667) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.217569947172764, LR: 0.0003 +[2026-03-01 19:50:47] (step=0026668) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.217765603600078, LR: 0.0003 +[2026-03-01 19:50:55] (step=0026669) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.2179612600273915, LR: 0.0003 +[2026-03-01 19:51:03] (step=0026670) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.2181569164547055, LR: 0.0003 +[2026-03-01 19:51:11] (step=0026671) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 5.2183525728820195, LR: 0.0003 +[2026-03-01 19:51:19] (step=0026672) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.218548229309333, LR: 0.0003 +[2026-03-01 19:51:27] (step=0026673) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 5.218743885736647, LR: 0.0003 +[2026-03-01 19:51:35] (step=0026674) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.21893954216396, LR: 0.0003 +[2026-03-01 19:51:42] (step=0026675) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 5.219135198591274, LR: 0.0003 +[2026-03-01 19:51:50] (step=0026676) Train Loss: 0.4321, Train Steps/Sec: 0.12, Epoch: 5.219330855018588, LR: 0.0003 +[2026-03-01 19:51:58] (step=0026677) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.219526511445901, LR: 0.0003 +[2026-03-01 19:52:06] (step=0026678) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.219722167873215, LR: 0.0003 +[2026-03-01 19:52:14] (step=0026679) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.219917824300528, LR: 0.0003 +[2026-03-01 19:52:22] (step=0026680) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.220113480727842, LR: 0.0003 +[2026-03-01 19:52:30] (step=0026681) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.220309137155156, LR: 0.0003 +[2026-03-01 19:52:38] (step=0026682) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.220504793582469, LR: 0.0003 +[2026-03-01 19:52:46] (step=0026683) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.220700450009783, LR: 0.0003 +[2026-03-01 19:52:53] (step=0026684) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.220896106437096, LR: 0.0003 +[2026-03-01 19:53:01] (step=0026685) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.22109176286441, LR: 0.0003 +[2026-03-01 19:53:09] (step=0026686) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.221287419291723, LR: 0.0003 +[2026-03-01 19:53:17] (step=0026687) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.221483075719037, LR: 0.0003 +[2026-03-01 19:53:25] (step=0026688) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.221678732146351, LR: 0.0003 +[2026-03-01 19:53:33] (step=0026689) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.2218743885736645, LR: 0.0003 +[2026-03-01 19:53:40] (step=0026690) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 5.2220700450009785, LR: 0.0003 +[2026-03-01 19:53:48] (step=0026691) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 5.222265701428292, LR: 0.0003 +[2026-03-01 19:53:56] (step=0026692) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 5.222461357855606, LR: 0.0003 +[2026-03-01 19:54:04] (step=0026693) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 5.22265701428292, LR: 0.0003 +[2026-03-01 19:54:12] (step=0026694) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.222852670710233, LR: 0.0003 +[2026-03-01 19:54:20] (step=0026695) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 5.223048327137547, LR: 0.0003 +[2026-03-01 19:54:28] (step=0026696) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 5.22324398356486, LR: 0.0003 +[2026-03-01 19:54:36] (step=0026697) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.223439639992174, LR: 0.0003 +[2026-03-01 19:54:43] (step=0026698) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.223635296419487, LR: 0.0003 +[2026-03-01 19:54:51] (step=0026699) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.223830952846801, LR: 0.0003 +[2026-03-01 19:54:59] (step=0026700) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 5.224026609274115, LR: 0.0003 +[2026-03-01 19:55:07] (step=0026701) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.224222265701428, LR: 0.0003 +[2026-03-01 19:55:15] (step=0026702) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.224417922128742, LR: 0.0003 +[2026-03-01 19:55:23] (step=0026703) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.224613578556055, LR: 0.0003 +[2026-03-01 19:55:31] (step=0026704) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.224809234983369, LR: 0.0003 +[2026-03-01 19:55:38] (step=0026705) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.225004891410683, LR: 0.0003 +[2026-03-01 19:55:46] (step=0026706) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.225200547837996, LR: 0.0003 +[2026-03-01 19:55:54] (step=0026707) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.22539620426531, LR: 0.0003 +[2026-03-01 19:56:02] (step=0026708) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.2255918606926235, LR: 0.0003 +[2026-03-01 19:56:10] (step=0026709) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.2257875171199375, LR: 0.0003 +[2026-03-01 19:56:18] (step=0026710) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.225983173547251, LR: 0.0003 +[2026-03-01 19:56:25] (step=0026711) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 5.226178829974565, LR: 0.0003 +[2026-03-01 19:56:33] (step=0026712) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.226374486401879, LR: 0.0003 +[2026-03-01 19:56:41] (step=0026713) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.226570142829192, LR: 0.0003 +[2026-03-01 19:56:49] (step=0026714) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.226765799256506, LR: 0.0003 +[2026-03-01 19:56:57] (step=0026715) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.226961455683819, LR: 0.0003 +[2026-03-01 19:57:05] (step=0026716) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 5.227157112111133, LR: 0.0003 +[2026-03-01 19:57:13] (step=0026717) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.227352768538447, LR: 0.0003 +[2026-03-01 19:57:20] (step=0026718) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.22754842496576, LR: 0.0003 +[2026-03-01 19:57:28] (step=0026719) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.227744081393074, LR: 0.0003 +[2026-03-01 19:57:36] (step=0026720) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.227939737820387, LR: 0.0003 +[2026-03-01 19:57:44] (step=0026721) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.228135394247701, LR: 0.0003 +[2026-03-01 19:57:52] (step=0026722) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.228331050675014, LR: 0.0003 +[2026-03-01 19:58:00] (step=0026723) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 5.228526707102328, LR: 0.0003 +[2026-03-01 19:58:08] (step=0026724) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.228722363529642, LR: 0.0003 +[2026-03-01 19:58:16] (step=0026725) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.228918019956955, LR: 0.0003 +[2026-03-01 19:58:23] (step=0026726) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.229113676384269, LR: 0.0003 +[2026-03-01 19:58:31] (step=0026727) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.2293093328115825, LR: 0.0003 +[2026-03-01 19:58:39] (step=0026728) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.2295049892388965, LR: 0.0003 +[2026-03-01 19:58:47] (step=0026729) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.2297006456662105, LR: 0.0003 +[2026-03-01 19:58:55] (step=0026730) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 5.229896302093524, LR: 0.0003 +[2026-03-01 19:59:03] (step=0026731) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.230091958520838, LR: 0.0003 +[2026-03-01 19:59:11] (step=0026732) Train Loss: 0.4442, Train Steps/Sec: 0.12, Epoch: 5.230287614948151, LR: 0.0003 +[2026-03-01 19:59:19] (step=0026733) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 5.230483271375465, LR: 0.0003 +[2026-03-01 19:59:26] (step=0026734) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.230678927802778, LR: 0.0003 +[2026-03-01 19:59:34] (step=0026735) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.230874584230092, LR: 0.0003 +[2026-03-01 19:59:42] (step=0026736) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.231070240657406, LR: 0.0003 +[2026-03-01 19:59:50] (step=0026737) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.231265897084719, LR: 0.0003 +[2026-03-01 19:59:58] (step=0026738) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.231461553512033, LR: 0.0003 +[2026-03-01 20:00:06] (step=0026739) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.231657209939346, LR: 0.0003 +[2026-03-01 20:00:14] (step=0026740) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.23185286636666, LR: 0.0003 +[2026-03-01 20:00:21] (step=0026741) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.232048522793974, LR: 0.0003 +[2026-03-01 20:00:29] (step=0026742) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.232244179221287, LR: 0.0003 +[2026-03-01 20:00:37] (step=0026743) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 5.232439835648601, LR: 0.0003 +[2026-03-01 20:00:45] (step=0026744) Train Loss: 0.4273, Train Steps/Sec: 0.13, Epoch: 5.232635492075914, LR: 0.0003 +[2026-03-01 20:00:53] (step=0026745) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 5.232831148503228, LR: 0.0003 +[2026-03-01 20:01:01] (step=0026746) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 5.233026804930542, LR: 0.0003 +[2026-03-01 20:01:09] (step=0026747) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.2332224613578555, LR: 0.0003 +[2026-03-01 20:01:16] (step=0026748) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.2334181177851695, LR: 0.0003 +[2026-03-01 20:01:24] (step=0026749) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 5.233613774212483, LR: 0.0003 +[2026-03-01 20:01:32] (step=0026750) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.233809430639797, LR: 0.0003 +[2026-03-01 20:01:40] (step=0026751) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.23400508706711, LR: 0.0003 +[2026-03-01 20:01:48] (step=0026752) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.234200743494424, LR: 0.0003 +[2026-03-01 20:01:56] (step=0026753) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.234396399921738, LR: 0.0003 +[2026-03-01 20:02:04] (step=0026754) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.234592056349051, LR: 0.0003 +[2026-03-01 20:02:11] (step=0026755) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.234787712776365, LR: 0.0003 +[2026-03-01 20:02:19] (step=0026756) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.234983369203678, LR: 0.0003 +[2026-03-01 20:02:27] (step=0026757) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.235179025630992, LR: 0.0003 +[2026-03-01 20:02:35] (step=0026758) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.235374682058306, LR: 0.0003 +[2026-03-01 20:02:43] (step=0026759) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.235570338485619, LR: 0.0003 +[2026-03-01 20:02:51] (step=0026760) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 5.235765994912933, LR: 0.0003 +[2026-03-01 20:02:58] (step=0026761) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.235961651340246, LR: 0.0003 +[2026-03-01 20:03:06] (step=0026762) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.23615730776756, LR: 0.0003 +[2026-03-01 20:03:14] (step=0026763) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 5.236352964194873, LR: 0.0003 +[2026-03-01 20:03:22] (step=0026764) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.236548620622187, LR: 0.0003 +[2026-03-01 20:03:30] (step=0026765) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.236744277049501, LR: 0.0003 +[2026-03-01 20:03:38] (step=0026766) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.2369399334768145, LR: 0.0003 +[2026-03-01 20:03:46] (step=0026767) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 5.2371355899041285, LR: 0.0003 +[2026-03-01 20:03:54] (step=0026768) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.237331246331442, LR: 0.0003 +[2026-03-01 20:04:01] (step=0026769) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.237526902758756, LR: 0.0003 +[2026-03-01 20:04:09] (step=0026770) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.23772255918607, LR: 0.0003 +[2026-03-01 20:04:17] (step=0026771) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.237918215613383, LR: 0.0003 +[2026-03-01 20:04:25] (step=0026772) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.238113872040697, LR: 0.0003 +[2026-03-01 20:04:33] (step=0026773) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.23830952846801, LR: 0.0003 +[2026-03-01 20:04:41] (step=0026774) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.238505184895324, LR: 0.0003 +[2026-03-01 20:04:49] (step=0026775) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.238700841322637, LR: 0.0003 +[2026-03-01 20:04:56] (step=0026776) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.238896497749951, LR: 0.0003 +[2026-03-01 20:05:04] (step=0026777) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.239092154177265, LR: 0.0003 +[2026-03-01 20:05:12] (step=0026778) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.239287810604578, LR: 0.0003 +[2026-03-01 20:05:20] (step=0026779) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.239483467031892, LR: 0.0003 +[2026-03-01 20:05:28] (step=0026780) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.239679123459205, LR: 0.0003 +[2026-03-01 20:05:36] (step=0026781) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.239874779886519, LR: 0.0003 +[2026-03-01 20:05:43] (step=0026782) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.240070436313833, LR: 0.0003 +[2026-03-01 20:05:51] (step=0026783) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 5.240266092741146, LR: 0.0003 +[2026-03-01 20:05:59] (step=0026784) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 5.24046174916846, LR: 0.0003 +[2026-03-01 20:06:07] (step=0026785) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.2406574055957735, LR: 0.0003 +[2026-03-01 20:06:15] (step=0026786) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.2408530620230875, LR: 0.0003 +[2026-03-01 20:06:23] (step=0026787) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.241048718450401, LR: 0.0003 +[2026-03-01 20:06:31] (step=0026788) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.241244374877715, LR: 0.0003 +[2026-03-01 20:06:39] (step=0026789) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.241440031305029, LR: 0.0003 +[2026-03-01 20:06:46] (step=0026790) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.241635687732342, LR: 0.0003 +[2026-03-01 20:06:54] (step=0026791) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.241831344159656, LR: 0.0003 +[2026-03-01 20:07:02] (step=0026792) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.242027000586969, LR: 0.0003 +[2026-03-01 20:07:10] (step=0026793) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.242222657014283, LR: 0.0003 +[2026-03-01 20:07:18] (step=0026794) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.242418313441597, LR: 0.0003 +[2026-03-01 20:07:26] (step=0026795) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 5.24261396986891, LR: 0.0003 +[2026-03-01 20:07:33] (step=0026796) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.242809626296224, LR: 0.0003 +[2026-03-01 20:07:41] (step=0026797) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.243005282723537, LR: 0.0003 +[2026-03-01 20:07:49] (step=0026798) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 5.243200939150851, LR: 0.0003 +[2026-03-01 20:07:57] (step=0026799) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.243396595578165, LR: 0.0003 +[2026-03-01 20:08:05] (step=0026800) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.243592252005478, LR: 0.0003 +[2026-03-01 20:08:13] (step=0026801) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.243787908432792, LR: 0.0003 +[2026-03-01 20:08:21] (step=0026802) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 5.243983564860105, LR: 0.0003 +[2026-03-01 20:08:28] (step=0026803) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.244179221287419, LR: 0.0003 +[2026-03-01 20:08:36] (step=0026804) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 5.2443748777147325, LR: 0.0003 +[2026-03-01 20:08:44] (step=0026805) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.2445705341420465, LR: 0.0003 +[2026-03-01 20:08:52] (step=0026806) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.2447661905693606, LR: 0.0003 +[2026-03-01 20:09:00] (step=0026807) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.244961846996674, LR: 0.0003 +[2026-03-01 20:09:08] (step=0026808) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.245157503423988, LR: 0.0003 +[2026-03-01 20:09:16] (step=0026809) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.245353159851301, LR: 0.0003 +[2026-03-01 20:09:23] (step=0026810) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.245548816278615, LR: 0.0003 +[2026-03-01 20:09:31] (step=0026811) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.245744472705929, LR: 0.0003 +[2026-03-01 20:09:39] (step=0026812) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.245940129133242, LR: 0.0003 +[2026-03-01 20:09:47] (step=0026813) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 5.246135785560556, LR: 0.0003 +[2026-03-01 20:09:55] (step=0026814) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.246331441987869, LR: 0.0003 +[2026-03-01 20:10:03] (step=0026815) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.246527098415183, LR: 0.0003 +[2026-03-01 20:10:11] (step=0026816) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.246722754842496, LR: 0.0003 +[2026-03-01 20:10:18] (step=0026817) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.24691841126981, LR: 0.0003 +[2026-03-01 20:10:26] (step=0026818) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.247114067697124, LR: 0.0003 +[2026-03-01 20:10:34] (step=0026819) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 5.247309724124437, LR: 0.0003 +[2026-03-01 20:10:42] (step=0026820) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.247505380551751, LR: 0.0003 +[2026-03-01 20:10:50] (step=0026821) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.247701036979064, LR: 0.0003 +[2026-03-01 20:10:58] (step=0026822) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.247896693406378, LR: 0.0003 +[2026-03-01 20:11:06] (step=0026823) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.248092349833692, LR: 0.0003 +[2026-03-01 20:11:14] (step=0026824) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.248288006261006, LR: 0.0003 +[2026-03-01 20:11:21] (step=0026825) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.24848366268832, LR: 0.0003 +[2026-03-01 20:11:29] (step=0026826) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.248679319115633, LR: 0.0003 +[2026-03-01 20:11:37] (step=0026827) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.248874975542947, LR: 0.0003 +[2026-03-01 20:11:45] (step=0026828) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 5.24907063197026, LR: 0.0003 +[2026-03-01 20:11:53] (step=0026829) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.249266288397574, LR: 0.0003 +[2026-03-01 20:12:01] (step=0026830) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 5.249461944824888, LR: 0.0003 +[2026-03-01 20:12:09] (step=0026831) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.249657601252201, LR: 0.0003 +[2026-03-01 20:12:16] (step=0026832) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.249853257679515, LR: 0.0003 +[2026-03-01 20:12:24] (step=0026833) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 5.250048914106828, LR: 0.0003 +[2026-03-01 20:12:32] (step=0026834) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.250244570534142, LR: 0.0003 +[2026-03-01 20:12:40] (step=0026835) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 5.250440226961456, LR: 0.0003 +[2026-03-01 20:12:48] (step=0026836) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.250635883388769, LR: 0.0003 +[2026-03-01 20:12:56] (step=0026837) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.250831539816083, LR: 0.0003 +[2026-03-01 20:13:04] (step=0026838) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 5.251027196243396, LR: 0.0003 +[2026-03-01 20:13:11] (step=0026839) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.25122285267071, LR: 0.0003 +[2026-03-01 20:13:19] (step=0026840) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.251418509098023, LR: 0.0003 +[2026-03-01 20:13:27] (step=0026841) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.251614165525337, LR: 0.0003 +[2026-03-01 20:13:35] (step=0026842) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 5.2518098219526514, LR: 0.0003 +[2026-03-01 20:13:43] (step=0026843) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.252005478379965, LR: 0.0003 +[2026-03-01 20:13:51] (step=0026844) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 5.252201134807279, LR: 0.0003 +[2026-03-01 20:13:59] (step=0026845) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 5.252396791234592, LR: 0.0003 +[2026-03-01 20:14:06] (step=0026846) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.252592447661906, LR: 0.0003 +[2026-03-01 20:14:14] (step=0026847) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.25278810408922, LR: 0.0003 +[2026-03-01 20:14:22] (step=0026848) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.252983760516533, LR: 0.0003 +[2026-03-01 20:14:30] (step=0026849) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 5.253179416943847, LR: 0.0003 +[2026-03-01 20:14:38] (step=0026850) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.25337507337116, LR: 0.0003 +[2026-03-01 20:14:46] (step=0026851) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.253570729798474, LR: 0.0003 +[2026-03-01 20:14:54] (step=0026852) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.253766386225788, LR: 0.0003 +[2026-03-01 20:15:02] (step=0026853) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.253962042653101, LR: 0.0003 +[2026-03-01 20:15:09] (step=0026854) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 5.254157699080415, LR: 0.0003 +[2026-03-01 20:15:17] (step=0026855) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.254353355507728, LR: 0.0003 +[2026-03-01 20:15:25] (step=0026856) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.254549011935042, LR: 0.0003 +[2026-03-01 20:15:33] (step=0026857) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.254744668362355, LR: 0.0003 +[2026-03-01 20:15:41] (step=0026858) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.254940324789669, LR: 0.0003 +[2026-03-01 20:15:49] (step=0026859) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.255135981216983, LR: 0.0003 +[2026-03-01 20:15:57] (step=0026860) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.2553316376442964, LR: 0.0003 +[2026-03-01 20:16:04] (step=0026861) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.2555272940716105, LR: 0.0003 +[2026-03-01 20:16:12] (step=0026862) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.255722950498924, LR: 0.0003 +[2026-03-01 20:16:20] (step=0026863) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.255918606926238, LR: 0.0003 +[2026-03-01 20:16:28] (step=0026864) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.256114263353552, LR: 0.0003 +[2026-03-01 20:16:36] (step=0026865) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 5.256309919780865, LR: 0.0003 +[2026-03-01 20:16:44] (step=0026866) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.256505576208179, LR: 0.0003 +[2026-03-01 20:16:52] (step=0026867) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.256701232635492, LR: 0.0003 +[2026-03-01 20:16:59] (step=0026868) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.256896889062806, LR: 0.0003 +[2026-03-01 20:17:07] (step=0026869) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 5.257092545490119, LR: 0.0003 +[2026-03-01 20:17:15] (step=0026870) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.257288201917433, LR: 0.0003 +[2026-03-01 20:17:23] (step=0026871) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.257483858344747, LR: 0.0003 +[2026-03-01 20:17:31] (step=0026872) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.25767951477206, LR: 0.0003 +[2026-03-01 20:17:39] (step=0026873) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.257875171199374, LR: 0.0003 +[2026-03-01 20:17:47] (step=0026874) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.258070827626687, LR: 0.0003 +[2026-03-01 20:17:54] (step=0026875) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 5.258266484054001, LR: 0.0003 +[2026-03-01 20:18:02] (step=0026876) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.258462140481315, LR: 0.0003 +[2026-03-01 20:18:10] (step=0026877) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.258657796908628, LR: 0.0003 +[2026-03-01 20:18:18] (step=0026878) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.258853453335942, LR: 0.0003 +[2026-03-01 20:18:26] (step=0026879) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.2590491097632555, LR: 0.0003 +[2026-03-01 20:18:34] (step=0026880) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.2592447661905695, LR: 0.0003 +[2026-03-01 20:18:42] (step=0026881) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.259440422617883, LR: 0.0003 +[2026-03-01 20:18:50] (step=0026882) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.259636079045197, LR: 0.0003 +[2026-03-01 20:18:57] (step=0026883) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.259831735472511, LR: 0.0003 +[2026-03-01 20:19:05] (step=0026884) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.260027391899824, LR: 0.0003 +[2026-03-01 20:19:13] (step=0026885) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.260223048327138, LR: 0.0003 +[2026-03-01 20:19:21] (step=0026886) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.260418704754451, LR: 0.0003 +[2026-03-01 20:19:29] (step=0026887) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.260614361181765, LR: 0.0003 +[2026-03-01 20:19:37] (step=0026888) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 5.260810017609079, LR: 0.0003 +[2026-03-01 20:19:45] (step=0026889) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.261005674036392, LR: 0.0003 +[2026-03-01 20:19:52] (step=0026890) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.261201330463706, LR: 0.0003 +[2026-03-01 20:20:00] (step=0026891) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.261396986891019, LR: 0.0003 +[2026-03-01 20:20:08] (step=0026892) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 5.261592643318333, LR: 0.0003 +[2026-03-01 20:20:16] (step=0026893) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.261788299745646, LR: 0.0003 +[2026-03-01 20:20:24] (step=0026894) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 5.26198395617296, LR: 0.0003 +[2026-03-01 20:20:32] (step=0026895) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.262179612600274, LR: 0.0003 +[2026-03-01 20:20:39] (step=0026896) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 5.262375269027587, LR: 0.0003 +[2026-03-01 20:20:47] (step=0026897) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.262570925454901, LR: 0.0003 +[2026-03-01 20:20:55] (step=0026898) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.2627665818822145, LR: 0.0003 +[2026-03-01 20:21:03] (step=0026899) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 5.2629622383095285, LR: 0.0003 +[2026-03-01 20:21:11] (step=0026900) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.2631578947368425, LR: 0.0003 +[2026-03-01 20:21:19] (step=0026901) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.263353551164156, LR: 0.0003 +[2026-03-01 20:21:27] (step=0026902) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 5.26354920759147, LR: 0.0003 +[2026-03-01 20:21:34] (step=0026903) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 5.263744864018783, LR: 0.0003 +[2026-03-01 20:21:42] (step=0026904) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.263940520446097, LR: 0.0003 +[2026-03-01 20:21:50] (step=0026905) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.264136176873411, LR: 0.0003 +[2026-03-01 20:21:58] (step=0026906) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 5.264331833300724, LR: 0.0003 +[2026-03-01 20:22:06] (step=0026907) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 5.264527489728038, LR: 0.0003 +[2026-03-01 20:22:14] (step=0026908) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.264723146155351, LR: 0.0003 +[2026-03-01 20:22:22] (step=0026909) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.264918802582665, LR: 0.0003 +[2026-03-01 20:22:29] (step=0026910) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.265114459009978, LR: 0.0003 +[2026-03-01 20:22:37] (step=0026911) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.265310115437292, LR: 0.0003 +[2026-03-01 20:22:45] (step=0026912) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.265505771864606, LR: 0.0003 +[2026-03-01 20:22:53] (step=0026913) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.265701428291919, LR: 0.0003 +[2026-03-01 20:23:01] (step=0026914) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.265897084719233, LR: 0.0003 +[2026-03-01 20:23:09] (step=0026915) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.266092741146546, LR: 0.0003 +[2026-03-01 20:23:17] (step=0026916) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 5.26628839757386, LR: 0.0003 +[2026-03-01 20:23:25] (step=0026917) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.266484054001174, LR: 0.0003 +[2026-03-01 20:23:33] (step=0026918) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.2666797104284875, LR: 0.0003 +[2026-03-01 20:23:40] (step=0026919) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.2668753668558015, LR: 0.0003 +[2026-03-01 20:23:48] (step=0026920) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.267071023283115, LR: 0.0003 +[2026-03-01 20:23:56] (step=0026921) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 5.267266679710429, LR: 0.0003 +[2026-03-01 20:24:04] (step=0026922) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 5.267462336137742, LR: 0.0003 +[2026-03-01 20:24:12] (step=0026923) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.267657992565056, LR: 0.0003 +[2026-03-01 20:24:20] (step=0026924) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.26785364899237, LR: 0.0003 +[2026-03-01 20:24:28] (step=0026925) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 5.268049305419683, LR: 0.0003 +[2026-03-01 20:24:35] (step=0026926) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.268244961846997, LR: 0.0003 +[2026-03-01 20:24:43] (step=0026927) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 5.26844061827431, LR: 0.0003 +[2026-03-01 20:24:51] (step=0026928) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.268636274701624, LR: 0.0003 +[2026-03-01 20:24:59] (step=0026929) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.268831931128938, LR: 0.0003 +[2026-03-01 20:25:07] (step=0026930) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.269027587556251, LR: 0.0003 +[2026-03-01 20:25:15] (step=0026931) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.269223243983565, LR: 0.0003 +[2026-03-01 20:25:23] (step=0026932) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 5.269418900410878, LR: 0.0003 +[2026-03-01 20:25:30] (step=0026933) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.269614556838192, LR: 0.0003 +[2026-03-01 20:25:38] (step=0026934) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.269810213265505, LR: 0.0003 +[2026-03-01 20:25:46] (step=0026935) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.270005869692819, LR: 0.0003 +[2026-03-01 20:25:54] (step=0026936) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.270201526120133, LR: 0.0003 +[2026-03-01 20:26:02] (step=0026937) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.2703971825474465, LR: 0.0003 +[2026-03-01 20:26:10] (step=0026938) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.2705928389747605, LR: 0.0003 +[2026-03-01 20:26:18] (step=0026939) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.270788495402074, LR: 0.0003 +[2026-03-01 20:26:25] (step=0026940) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 5.270984151829388, LR: 0.0003 +[2026-03-01 20:26:33] (step=0026941) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.271179808256702, LR: 0.0003 +[2026-03-01 20:26:41] (step=0026942) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.271375464684015, LR: 0.0003 +[2026-03-01 20:26:49] (step=0026943) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.271571121111329, LR: 0.0003 +[2026-03-01 20:26:57] (step=0026944) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.271766777538642, LR: 0.0003 +[2026-03-01 20:27:05] (step=0026945) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.271962433965956, LR: 0.0003 +[2026-03-01 20:27:13] (step=0026946) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.272158090393269, LR: 0.0003 +[2026-03-01 20:27:20] (step=0026947) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 5.272353746820583, LR: 0.0003 +[2026-03-01 20:27:28] (step=0026948) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 5.272549403247897, LR: 0.0003 +[2026-03-01 20:27:36] (step=0026949) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.27274505967521, LR: 0.0003 +[2026-03-01 20:27:44] (step=0026950) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 5.272940716102524, LR: 0.0003 +[2026-03-01 20:27:52] (step=0026951) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.273136372529837, LR: 0.0003 +[2026-03-01 20:28:00] (step=0026952) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 5.273332028957151, LR: 0.0003 +[2026-03-01 20:28:07] (step=0026953) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.273527685384465, LR: 0.0003 +[2026-03-01 20:28:15] (step=0026954) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.273723341811778, LR: 0.0003 +[2026-03-01 20:28:23] (step=0026955) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.273918998239092, LR: 0.0003 +[2026-03-01 20:28:31] (step=0026956) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.2741146546664055, LR: 0.0003 +[2026-03-01 20:28:39] (step=0026957) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.2743103110937195, LR: 0.0003 +[2026-03-01 20:28:47] (step=0026958) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.274505967521033, LR: 0.0003 +[2026-03-01 20:28:55] (step=0026959) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.274701623948347, LR: 0.0003 +[2026-03-01 20:29:02] (step=0026960) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.274897280375661, LR: 0.0003 +[2026-03-01 20:29:10] (step=0026961) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.275092936802974, LR: 0.0003 +[2026-03-01 20:29:18] (step=0026962) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 5.275288593230288, LR: 0.0003 +[2026-03-01 20:29:26] (step=0026963) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.275484249657601, LR: 0.0003 +[2026-03-01 20:29:34] (step=0026964) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 5.275679906084915, LR: 0.0003 +[2026-03-01 20:29:42] (step=0026965) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.275875562512229, LR: 0.0003 +[2026-03-01 20:29:50] (step=0026966) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.276071218939542, LR: 0.0003 +[2026-03-01 20:29:58] (step=0026967) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.276266875366856, LR: 0.0003 +[2026-03-01 20:30:05] (step=0026968) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.276462531794169, LR: 0.0003 +[2026-03-01 20:30:13] (step=0026969) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.276658188221483, LR: 0.0003 +[2026-03-01 20:30:21] (step=0026970) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 5.276853844648797, LR: 0.0003 +[2026-03-01 20:30:29] (step=0026971) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 5.27704950107611, LR: 0.0003 +[2026-03-01 20:30:37] (step=0026972) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.277245157503424, LR: 0.0003 +[2026-03-01 20:30:45] (step=0026973) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.277440813930737, LR: 0.0003 +[2026-03-01 20:30:52] (step=0026974) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.277636470358051, LR: 0.0003 +[2026-03-01 20:31:00] (step=0026975) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.2778321267853645, LR: 0.0003 +[2026-03-01 20:31:08] (step=0026976) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 5.2780277832126785, LR: 0.0003 +[2026-03-01 20:31:16] (step=0026977) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 5.2782234396399925, LR: 0.0003 +[2026-03-01 20:31:24] (step=0026978) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.278419096067306, LR: 0.0003 +[2026-03-01 20:31:32] (step=0026979) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.27861475249462, LR: 0.0003 +[2026-03-01 20:31:40] (step=0026980) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.278810408921933, LR: 0.0003 +[2026-03-01 20:31:47] (step=0026981) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 5.279006065349247, LR: 0.0003 +[2026-03-01 20:31:55] (step=0026982) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 5.279201721776561, LR: 0.0003 +[2026-03-01 20:32:03] (step=0026983) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 5.279397378203874, LR: 0.0003 +[2026-03-01 20:32:11] (step=0026984) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.279593034631188, LR: 0.0003 +[2026-03-01 20:32:19] (step=0026985) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.279788691058501, LR: 0.0003 +[2026-03-01 20:32:27] (step=0026986) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.279984347485815, LR: 0.0003 +[2026-03-01 20:32:35] (step=0026987) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.280180003913128, LR: 0.0003 +[2026-03-01 20:32:42] (step=0026988) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.280375660340442, LR: 0.0003 +[2026-03-01 20:32:50] (step=0026989) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.280571316767756, LR: 0.0003 +[2026-03-01 20:32:58] (step=0026990) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.280766973195069, LR: 0.0003 +[2026-03-01 20:33:06] (step=0026991) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.280962629622383, LR: 0.0003 +[2026-03-01 20:33:14] (step=0026992) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.281158286049696, LR: 0.0003 +[2026-03-01 20:33:22] (step=0026993) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.28135394247701, LR: 0.0003 +[2026-03-01 20:33:30] (step=0026994) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.281549598904324, LR: 0.0003 +[2026-03-01 20:33:37] (step=0026995) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.2817452553316375, LR: 0.0003 +[2026-03-01 20:33:45] (step=0026996) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 5.2819409117589515, LR: 0.0003 +[2026-03-01 20:33:53] (step=0026997) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.282136568186265, LR: 0.0003 +[2026-03-01 20:34:01] (step=0026998) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.282332224613579, LR: 0.0003 +[2026-03-01 20:34:09] (step=0026999) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 5.282527881040892, LR: 0.0003 +[2026-03-01 20:34:17] (step=0027000) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.282723537468206, LR: 0.0003 +[2026-03-01 20:34:17] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0027000/ +[2026-03-01 20:34:25] (step=0027001) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.28291919389552, LR: 0.0003 +[2026-03-01 20:34:32] (step=0027002) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.283114850322833, LR: 0.0003 +[2026-03-01 20:34:40] (step=0027003) Train Loss: 0.4685, Train Steps/Sec: 0.13, Epoch: 5.283310506750147, LR: 0.0003 +[2026-03-01 20:34:48] (step=0027004) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.28350616317746, LR: 0.0003 +[2026-03-01 20:34:56] (step=0027005) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 5.283701819604774, LR: 0.0003 +[2026-03-01 20:35:04] (step=0027006) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.283897476032088, LR: 0.0003 +[2026-03-01 20:35:12] (step=0027007) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.284093132459401, LR: 0.0003 +[2026-03-01 20:35:19] (step=0027008) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.284288788886715, LR: 0.0003 +[2026-03-01 20:35:27] (step=0027009) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.284484445314028, LR: 0.0003 +[2026-03-01 20:35:35] (step=0027010) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.284680101741342, LR: 0.0003 +[2026-03-01 20:35:43] (step=0027011) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.284875758168655, LR: 0.0003 +[2026-03-01 20:35:51] (step=0027012) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.285071414595969, LR: 0.0003 +[2026-03-01 20:35:59] (step=0027013) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.285267071023283, LR: 0.0003 +[2026-03-01 20:36:07] (step=0027014) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.2854627274505965, LR: 0.0003 +[2026-03-01 20:36:15] (step=0027015) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.2856583838779105, LR: 0.0003 +[2026-03-01 20:36:22] (step=0027016) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.285854040305224, LR: 0.0003 +[2026-03-01 20:36:30] (step=0027017) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.286049696732538, LR: 0.0003 +[2026-03-01 20:36:38] (step=0027018) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.286245353159852, LR: 0.0003 +[2026-03-01 20:36:46] (step=0027019) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.286441009587165, LR: 0.0003 +[2026-03-01 20:36:54] (step=0027020) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 5.286636666014479, LR: 0.0003 +[2026-03-01 20:37:02] (step=0027021) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 5.286832322441792, LR: 0.0003 +[2026-03-01 20:37:10] (step=0027022) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 5.287027978869106, LR: 0.0003 +[2026-03-01 20:37:17] (step=0027023) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.28722363529642, LR: 0.0003 +[2026-03-01 20:37:25] (step=0027024) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 5.287419291723733, LR: 0.0003 +[2026-03-01 20:37:33] (step=0027025) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.287614948151047, LR: 0.0003 +[2026-03-01 20:37:41] (step=0027026) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.28781060457836, LR: 0.0003 +[2026-03-01 20:37:49] (step=0027027) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.288006261005674, LR: 0.0003 +[2026-03-01 20:37:57] (step=0027028) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 5.288201917432987, LR: 0.0003 +[2026-03-01 20:38:05] (step=0027029) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.288397573860301, LR: 0.0003 +[2026-03-01 20:38:13] (step=0027030) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.288593230287615, LR: 0.0003 +[2026-03-01 20:38:20] (step=0027031) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.288788886714928, LR: 0.0003 +[2026-03-01 20:38:28] (step=0027032) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 5.288984543142242, LR: 0.0003 +[2026-03-01 20:38:36] (step=0027033) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 5.2891801995695555, LR: 0.0003 +[2026-03-01 20:38:44] (step=0027034) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.2893758559968695, LR: 0.0003 +[2026-03-01 20:38:52] (step=0027035) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.2895715124241836, LR: 0.0003 +[2026-03-01 20:39:00] (step=0027036) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.289767168851497, LR: 0.0003 +[2026-03-01 20:39:07] (step=0027037) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.289962825278811, LR: 0.0003 +[2026-03-01 20:39:15] (step=0027038) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.290158481706124, LR: 0.0003 +[2026-03-01 20:39:23] (step=0027039) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.290354138133438, LR: 0.0003 +[2026-03-01 20:39:31] (step=0027040) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.290549794560751, LR: 0.0003 +[2026-03-01 20:39:39] (step=0027041) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.290745450988065, LR: 0.0003 +[2026-03-01 20:39:47] (step=0027042) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 5.290941107415379, LR: 0.0003 +[2026-03-01 20:39:55] (step=0027043) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.291136763842692, LR: 0.0003 +[2026-03-01 20:40:02] (step=0027044) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.291332420270006, LR: 0.0003 +[2026-03-01 20:40:10] (step=0027045) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.291528076697319, LR: 0.0003 +[2026-03-01 20:40:18] (step=0027046) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.291723733124633, LR: 0.0003 +[2026-03-01 20:40:26] (step=0027047) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 5.291919389551947, LR: 0.0003 +[2026-03-01 20:40:34] (step=0027048) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.29211504597926, LR: 0.0003 +[2026-03-01 20:40:42] (step=0027049) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 5.292310702406574, LR: 0.0003 +[2026-03-01 20:40:50] (step=0027050) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.292506358833887, LR: 0.0003 +[2026-03-01 20:40:57] (step=0027051) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.292702015261201, LR: 0.0003 +[2026-03-01 20:41:05] (step=0027052) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.2928976716885145, LR: 0.0003 +[2026-03-01 20:41:13] (step=0027053) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.2930933281158286, LR: 0.0003 +[2026-03-01 20:41:21] (step=0027054) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.293288984543143, LR: 0.0003 +[2026-03-01 20:41:29] (step=0027055) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 5.293484640970456, LR: 0.0003 +[2026-03-01 20:41:37] (step=0027056) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.29368029739777, LR: 0.0003 +[2026-03-01 20:41:45] (step=0027057) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 5.293875953825083, LR: 0.0003 +[2026-03-01 20:41:52] (step=0027058) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.294071610252397, LR: 0.0003 +[2026-03-01 20:42:00] (step=0027059) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.294267266679711, LR: 0.0003 +[2026-03-01 20:42:08] (step=0027060) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.294462923107024, LR: 0.0003 +[2026-03-01 20:42:16] (step=0027061) Train Loss: 0.4511, Train Steps/Sec: 0.12, Epoch: 5.294658579534338, LR: 0.0003 +[2026-03-01 20:42:24] (step=0027062) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.294854235961651, LR: 0.0003 +[2026-03-01 20:42:32] (step=0027063) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.295049892388965, LR: 0.0003 +[2026-03-01 20:42:40] (step=0027064) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.295245548816278, LR: 0.0003 +[2026-03-01 20:42:48] (step=0027065) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.295441205243592, LR: 0.0003 +[2026-03-01 20:42:55] (step=0027066) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.295636861670906, LR: 0.0003 +[2026-03-01 20:43:03] (step=0027067) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.295832518098219, LR: 0.0003 +[2026-03-01 20:43:11] (step=0027068) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.296028174525533, LR: 0.0003 +[2026-03-01 20:43:19] (step=0027069) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.296223830952846, LR: 0.0003 +[2026-03-01 20:43:27] (step=0027070) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 5.29641948738016, LR: 0.0003 +[2026-03-01 20:43:35] (step=0027071) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.2966151438074744, LR: 0.0003 +[2026-03-01 20:43:43] (step=0027072) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 5.296810800234788, LR: 0.0003 +[2026-03-01 20:43:50] (step=0027073) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.297006456662102, LR: 0.0003 +[2026-03-01 20:43:58] (step=0027074) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.297202113089415, LR: 0.0003 +[2026-03-01 20:44:06] (step=0027075) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.297397769516729, LR: 0.0003 +[2026-03-01 20:44:14] (step=0027076) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.297593425944043, LR: 0.0003 +[2026-03-01 20:44:22] (step=0027077) Train Loss: 0.4461, Train Steps/Sec: 0.12, Epoch: 5.297789082371356, LR: 0.0003 +[2026-03-01 20:44:30] (step=0027078) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.29798473879867, LR: 0.0003 +[2026-03-01 20:44:38] (step=0027079) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.298180395225983, LR: 0.0003 +[2026-03-01 20:44:46] (step=0027080) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.298376051653297, LR: 0.0003 +[2026-03-01 20:44:53] (step=0027081) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.29857170808061, LR: 0.0003 +[2026-03-01 20:45:01] (step=0027082) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.298767364507924, LR: 0.0003 +[2026-03-01 20:45:09] (step=0027083) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.298963020935238, LR: 0.0003 +[2026-03-01 20:45:17] (step=0027084) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.299158677362551, LR: 0.0003 +[2026-03-01 20:45:25] (step=0027085) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.299354333789865, LR: 0.0003 +[2026-03-01 20:45:33] (step=0027086) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.299549990217178, LR: 0.0003 +[2026-03-01 20:45:40] (step=0027087) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.299745646644492, LR: 0.0003 +[2026-03-01 20:45:48] (step=0027088) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.299941303071806, LR: 0.0003 +[2026-03-01 20:45:56] (step=0027089) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.3001369594991194, LR: 0.0003 +[2026-03-01 20:46:04] (step=0027090) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 5.3003326159264335, LR: 0.0003 +[2026-03-01 20:46:12] (step=0027091) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.300528272353747, LR: 0.0003 +[2026-03-01 20:46:20] (step=0027092) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.300723928781061, LR: 0.0003 +[2026-03-01 20:46:28] (step=0027093) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.300919585208374, LR: 0.0003 +[2026-03-01 20:46:35] (step=0027094) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.301115241635688, LR: 0.0003 +[2026-03-01 20:46:43] (step=0027095) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.301310898063002, LR: 0.0003 +[2026-03-01 20:46:51] (step=0027096) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.301506554490315, LR: 0.0003 +[2026-03-01 20:46:59] (step=0027097) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.301702210917629, LR: 0.0003 +[2026-03-01 20:47:07] (step=0027098) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.301897867344942, LR: 0.0003 +[2026-03-01 20:47:15] (step=0027099) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 5.302093523772256, LR: 0.0003 +[2026-03-01 20:47:23] (step=0027100) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.30228918019957, LR: 0.0003 +[2026-03-01 20:47:30] (step=0027101) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.302484836626883, LR: 0.0003 +[2026-03-01 20:47:38] (step=0027102) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.302680493054197, LR: 0.0003 +[2026-03-01 20:47:46] (step=0027103) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.30287614948151, LR: 0.0003 +[2026-03-01 20:47:54] (step=0027104) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.303071805908824, LR: 0.0003 +[2026-03-01 20:48:02] (step=0027105) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 5.303267462336137, LR: 0.0003 +[2026-03-01 20:48:10] (step=0027106) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 5.303463118763451, LR: 0.0003 +[2026-03-01 20:48:18] (step=0027107) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.303658775190765, LR: 0.0003 +[2026-03-01 20:48:25] (step=0027108) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.3038544316180785, LR: 0.0003 +[2026-03-01 20:48:33] (step=0027109) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 5.3040500880453925, LR: 0.0003 +[2026-03-01 20:48:41] (step=0027110) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 5.304245744472706, LR: 0.0003 +[2026-03-01 20:48:49] (step=0027111) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.30444140090002, LR: 0.0003 +[2026-03-01 20:48:57] (step=0027112) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.304637057327334, LR: 0.0003 +[2026-03-01 20:49:05] (step=0027113) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 5.304832713754647, LR: 0.0003 +[2026-03-01 20:49:13] (step=0027114) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.305028370181961, LR: 0.0003 +[2026-03-01 20:49:21] (step=0027115) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 5.305224026609274, LR: 0.0003 +[2026-03-01 20:49:28] (step=0027116) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 5.305419683036588, LR: 0.0003 +[2026-03-01 20:49:36] (step=0027117) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 5.305615339463901, LR: 0.0003 +[2026-03-01 20:49:44] (step=0027118) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.305810995891215, LR: 0.0003 +[2026-03-01 20:49:52] (step=0027119) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.306006652318529, LR: 0.0003 +[2026-03-01 20:50:00] (step=0027120) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.306202308745842, LR: 0.0003 +[2026-03-01 20:50:08] (step=0027121) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 5.306397965173156, LR: 0.0003 +[2026-03-01 20:50:16] (step=0027122) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 5.306593621600469, LR: 0.0003 +[2026-03-01 20:50:23] (step=0027123) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.306789278027783, LR: 0.0003 +[2026-03-01 20:50:31] (step=0027124) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.306984934455097, LR: 0.0003 +[2026-03-01 20:50:39] (step=0027125) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.30718059088241, LR: 0.0003 +[2026-03-01 20:50:47] (step=0027126) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.307376247309724, LR: 0.0003 +[2026-03-01 20:50:55] (step=0027127) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.3075719037370375, LR: 0.0003 +[2026-03-01 20:51:03] (step=0027128) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 5.3077675601643515, LR: 0.0003 +[2026-03-01 20:51:11] (step=0027129) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.3079632165916655, LR: 0.0003 +[2026-03-01 20:51:18] (step=0027130) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 5.308158873018979, LR: 0.0003 +[2026-03-01 20:51:26] (step=0027131) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 5.308354529446293, LR: 0.0003 +[2026-03-01 20:51:34] (step=0027132) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.308550185873606, LR: 0.0003 +[2026-03-01 20:51:42] (step=0027133) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 5.30874584230092, LR: 0.0003 +[2026-03-01 20:51:50] (step=0027134) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.308941498728233, LR: 0.0003 +[2026-03-01 20:51:58] (step=0027135) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.309137155155547, LR: 0.0003 +[2026-03-01 20:52:06] (step=0027136) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.309332811582861, LR: 0.0003 +[2026-03-01 20:52:13] (step=0027137) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.309528468010174, LR: 0.0003 +[2026-03-01 20:52:21] (step=0027138) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.309724124437488, LR: 0.0003 +[2026-03-01 20:52:29] (step=0027139) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.309919780864801, LR: 0.0003 +[2026-03-01 20:52:37] (step=0027140) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.310115437292115, LR: 0.0003 +[2026-03-01 20:52:45] (step=0027141) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.310311093719429, LR: 0.0003 +[2026-03-01 20:52:53] (step=0027142) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 5.310506750146742, LR: 0.0003 +[2026-03-01 20:53:01] (step=0027143) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.310702406574056, LR: 0.0003 +[2026-03-01 20:53:08] (step=0027144) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.310898063001369, LR: 0.0003 +[2026-03-01 20:53:16] (step=0027145) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.311093719428683, LR: 0.0003 +[2026-03-01 20:53:24] (step=0027146) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 5.3112893758559965, LR: 0.0003 +[2026-03-01 20:53:32] (step=0027147) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.3114850322833105, LR: 0.0003 +[2026-03-01 20:53:40] (step=0027148) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.3116806887106245, LR: 0.0003 +[2026-03-01 20:53:48] (step=0027149) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 5.311876345137938, LR: 0.0003 +[2026-03-01 20:53:56] (step=0027150) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.312072001565252, LR: 0.0003 +[2026-03-01 20:54:03] (step=0027151) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.312267657992565, LR: 0.0003 +[2026-03-01 20:54:11] (step=0027152) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.312463314419879, LR: 0.0003 +[2026-03-01 20:54:19] (step=0027153) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.312658970847193, LR: 0.0003 +[2026-03-01 20:54:27] (step=0027154) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.312854627274506, LR: 0.0003 +[2026-03-01 20:54:35] (step=0027155) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 5.31305028370182, LR: 0.0003 +[2026-03-01 20:54:43] (step=0027156) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.313245940129133, LR: 0.0003 +[2026-03-01 20:54:50] (step=0027157) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 5.313441596556447, LR: 0.0003 +[2026-03-01 20:54:58] (step=0027158) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.31363725298376, LR: 0.0003 +[2026-03-01 20:55:06] (step=0027159) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.313832909411074, LR: 0.0003 +[2026-03-01 20:55:14] (step=0027160) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.314028565838388, LR: 0.0003 +[2026-03-01 20:55:22] (step=0027161) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.314224222265701, LR: 0.0003 +[2026-03-01 20:55:30] (step=0027162) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.314419878693015, LR: 0.0003 +[2026-03-01 20:55:38] (step=0027163) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 5.314615535120328, LR: 0.0003 +[2026-03-01 20:55:46] (step=0027164) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.314811191547642, LR: 0.0003 +[2026-03-01 20:55:53] (step=0027165) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.315006847974956, LR: 0.0003 +[2026-03-01 20:56:01] (step=0027166) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.3152025044022695, LR: 0.0003 +[2026-03-01 20:56:09] (step=0027167) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.3153981608295835, LR: 0.0003 +[2026-03-01 20:56:17] (step=0027168) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.315593817256897, LR: 0.0003 +[2026-03-01 20:56:25] (step=0027169) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.315789473684211, LR: 0.0003 +[2026-03-01 20:56:33] (step=0027170) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.315985130111524, LR: 0.0003 +[2026-03-01 20:56:40] (step=0027171) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.316180786538838, LR: 0.0003 +[2026-03-01 20:56:48] (step=0027172) Train Loss: 0.4557, Train Steps/Sec: 0.12, Epoch: 5.316376442966152, LR: 0.0003 +[2026-03-01 20:56:56] (step=0027173) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 5.316572099393465, LR: 0.0003 +[2026-03-01 20:57:04] (step=0027174) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.316767755820779, LR: 0.0003 +[2026-03-01 20:57:12] (step=0027175) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.316963412248092, LR: 0.0003 +[2026-03-01 20:57:20] (step=0027176) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.317159068675406, LR: 0.0003 +[2026-03-01 20:57:28] (step=0027177) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.31735472510272, LR: 0.0003 +[2026-03-01 20:57:36] (step=0027178) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.317550381530033, LR: 0.0003 +[2026-03-01 20:57:43] (step=0027179) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 5.317746037957347, LR: 0.0003 +[2026-03-01 20:57:51] (step=0027180) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.31794169438466, LR: 0.0003 +[2026-03-01 20:57:59] (step=0027181) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.318137350811974, LR: 0.0003 +[2026-03-01 20:58:07] (step=0027182) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.318333007239287, LR: 0.0003 +[2026-03-01 20:58:15] (step=0027183) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 5.318528663666601, LR: 0.0003 +[2026-03-01 20:58:23] (step=0027184) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.318724320093915, LR: 0.0003 +[2026-03-01 20:58:31] (step=0027185) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.3189199765212285, LR: 0.0003 +[2026-03-01 20:58:38] (step=0027186) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.3191156329485425, LR: 0.0003 +[2026-03-01 20:58:46] (step=0027187) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.319311289375856, LR: 0.0003 +[2026-03-01 20:58:54] (step=0027188) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 5.31950694580317, LR: 0.0003 +[2026-03-01 20:59:02] (step=0027189) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.319702602230484, LR: 0.0003 +[2026-03-01 20:59:10] (step=0027190) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.319898258657797, LR: 0.0003 +[2026-03-01 20:59:18] (step=0027191) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.320093915085111, LR: 0.0003 +[2026-03-01 20:59:26] (step=0027192) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.320289571512424, LR: 0.0003 +[2026-03-01 20:59:33] (step=0027193) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.320485227939738, LR: 0.0003 +[2026-03-01 20:59:41] (step=0027194) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.320680884367052, LR: 0.0003 +[2026-03-01 20:59:49] (step=0027195) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.320876540794365, LR: 0.0003 +[2026-03-01 20:59:57] (step=0027196) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.321072197221679, LR: 0.0003 +[2026-03-01 21:00:05] (step=0027197) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 5.321267853648992, LR: 0.0003 +[2026-03-01 21:00:13] (step=0027198) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.321463510076306, LR: 0.0003 +[2026-03-01 21:00:20] (step=0027199) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 5.321659166503619, LR: 0.0003 +[2026-03-01 21:00:28] (step=0027200) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 5.321854822930933, LR: 0.0003 +[2026-03-01 21:00:36] (step=0027201) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.322050479358247, LR: 0.0003 +[2026-03-01 21:00:44] (step=0027202) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.32224613578556, LR: 0.0003 +[2026-03-01 21:00:52] (step=0027203) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.322441792212874, LR: 0.0003 +[2026-03-01 21:01:00] (step=0027204) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 5.3226374486401875, LR: 0.0003 +[2026-03-01 21:01:08] (step=0027205) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.3228331050675015, LR: 0.0003 +[2026-03-01 21:01:15] (step=0027206) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 5.3230287614948155, LR: 0.0003 +[2026-03-01 21:01:23] (step=0027207) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.323224417922129, LR: 0.0003 +[2026-03-01 21:01:31] (step=0027208) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.323420074349443, LR: 0.0003 +[2026-03-01 21:01:39] (step=0027209) Train Loss: 0.4512, Train Steps/Sec: 0.12, Epoch: 5.323615730776756, LR: 0.0003 +[2026-03-01 21:01:47] (step=0027210) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.32381138720407, LR: 0.0003 +[2026-03-01 21:01:55] (step=0027211) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.324007043631383, LR: 0.0003 +[2026-03-01 21:02:03] (step=0027212) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 5.324202700058697, LR: 0.0003 +[2026-03-01 21:02:11] (step=0027213) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.324398356486011, LR: 0.0003 +[2026-03-01 21:02:18] (step=0027214) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.324594012913324, LR: 0.0003 +[2026-03-01 21:02:26] (step=0027215) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.324789669340638, LR: 0.0003 +[2026-03-01 21:02:34] (step=0027216) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.324985325767951, LR: 0.0003 +[2026-03-01 21:02:42] (step=0027217) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.325180982195265, LR: 0.0003 +[2026-03-01 21:02:50] (step=0027218) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.325376638622579, LR: 0.0003 +[2026-03-01 21:02:58] (step=0027219) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.325572295049892, LR: 0.0003 +[2026-03-01 21:03:06] (step=0027220) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.325767951477206, LR: 0.0003 +[2026-03-01 21:03:14] (step=0027221) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.325963607904519, LR: 0.0003 +[2026-03-01 21:03:21] (step=0027222) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.326159264331833, LR: 0.0003 +[2026-03-01 21:03:29] (step=0027223) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 5.3263549207591465, LR: 0.0003 +[2026-03-01 21:03:37] (step=0027224) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.3265505771864605, LR: 0.0003 +[2026-03-01 21:03:45] (step=0027225) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.3267462336137745, LR: 0.0003 +[2026-03-01 21:03:53] (step=0027226) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 5.326941890041088, LR: 0.0003 +[2026-03-01 21:04:01] (step=0027227) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.327137546468402, LR: 0.0003 +[2026-03-01 21:04:09] (step=0027228) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.327333202895715, LR: 0.0003 +[2026-03-01 21:04:16] (step=0027229) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.327528859323029, LR: 0.0003 +[2026-03-01 21:04:24] (step=0027230) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 5.327724515750343, LR: 0.0003 +[2026-03-01 21:04:32] (step=0027231) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.327920172177656, LR: 0.0003 +[2026-03-01 21:04:40] (step=0027232) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.32811582860497, LR: 0.0003 +[2026-03-01 21:04:48] (step=0027233) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.328311485032283, LR: 0.0003 +[2026-03-01 21:04:56] (step=0027234) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.328507141459597, LR: 0.0003 +[2026-03-01 21:05:03] (step=0027235) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.32870279788691, LR: 0.0003 +[2026-03-01 21:05:11] (step=0027236) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.328898454314224, LR: 0.0003 +[2026-03-01 21:05:19] (step=0027237) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.329094110741538, LR: 0.0003 +[2026-03-01 21:05:27] (step=0027238) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.329289767168851, LR: 0.0003 +[2026-03-01 21:05:35] (step=0027239) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.329485423596165, LR: 0.0003 +[2026-03-01 21:05:43] (step=0027240) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.329681080023478, LR: 0.0003 +[2026-03-01 21:05:51] (step=0027241) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.329876736450792, LR: 0.0003 +[2026-03-01 21:05:58] (step=0027242) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 5.330072392878106, LR: 0.0003 +[2026-03-01 21:06:06] (step=0027243) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.3302680493054195, LR: 0.0003 +[2026-03-01 21:06:14] (step=0027244) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.3304637057327335, LR: 0.0003 +[2026-03-01 21:06:22] (step=0027245) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 5.330659362160047, LR: 0.0003 +[2026-03-01 21:06:30] (step=0027246) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.330855018587361, LR: 0.0003 +[2026-03-01 21:06:38] (step=0027247) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 5.331050675014675, LR: 0.0003 +[2026-03-01 21:06:46] (step=0027248) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.331246331441988, LR: 0.0003 +[2026-03-01 21:06:53] (step=0027249) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.331441987869302, LR: 0.0003 +[2026-03-01 21:07:01] (step=0027250) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.331637644296615, LR: 0.0003 +[2026-03-01 21:07:09] (step=0027251) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.331833300723929, LR: 0.0003 +[2026-03-01 21:07:17] (step=0027252) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.332028957151242, LR: 0.0003 +[2026-03-01 21:07:25] (step=0027253) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 5.332224613578556, LR: 0.0003 +[2026-03-01 21:07:33] (step=0027254) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.33242027000587, LR: 0.0003 +[2026-03-01 21:07:40] (step=0027255) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.332615926433183, LR: 0.0003 +[2026-03-01 21:07:48] (step=0027256) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.332811582860497, LR: 0.0003 +[2026-03-01 21:07:56] (step=0027257) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.33300723928781, LR: 0.0003 +[2026-03-01 21:08:04] (step=0027258) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 5.333202895715124, LR: 0.0003 +[2026-03-01 21:08:12] (step=0027259) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.333398552142438, LR: 0.0003 +[2026-03-01 21:08:20] (step=0027260) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.333594208569751, LR: 0.0003 +[2026-03-01 21:08:28] (step=0027261) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.333789864997065, LR: 0.0003 +[2026-03-01 21:08:36] (step=0027262) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.3339855214243785, LR: 0.0003 +[2026-03-01 21:08:43] (step=0027263) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.3341811778516925, LR: 0.0003 +[2026-03-01 21:08:51] (step=0027264) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.334376834279006, LR: 0.0003 +[2026-03-01 21:08:59] (step=0027265) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.33457249070632, LR: 0.0003 +[2026-03-01 21:09:07] (step=0027266) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.334768147133634, LR: 0.0003 +[2026-03-01 21:09:15] (step=0027267) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 5.334963803560947, LR: 0.0003 +[2026-03-01 21:09:23] (step=0027268) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.335159459988261, LR: 0.0003 +[2026-03-01 21:09:30] (step=0027269) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.335355116415574, LR: 0.0003 +[2026-03-01 21:09:38] (step=0027270) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.335550772842888, LR: 0.0003 +[2026-03-01 21:09:46] (step=0027271) Train Loss: 0.4513, Train Steps/Sec: 0.12, Epoch: 5.335746429270202, LR: 0.0003 +[2026-03-01 21:09:54] (step=0027272) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.335942085697515, LR: 0.0003 +[2026-03-01 21:10:02] (step=0027273) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.336137742124829, LR: 0.0003 +[2026-03-01 21:10:10] (step=0027274) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.336333398552142, LR: 0.0003 +[2026-03-01 21:10:18] (step=0027275) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.336529054979456, LR: 0.0003 +[2026-03-01 21:10:26] (step=0027276) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.336724711406769, LR: 0.0003 +[2026-03-01 21:10:33] (step=0027277) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.336920367834083, LR: 0.0003 +[2026-03-01 21:10:41] (step=0027278) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.337116024261397, LR: 0.0003 +[2026-03-01 21:10:49] (step=0027279) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.33731168068871, LR: 0.0003 +[2026-03-01 21:10:57] (step=0027280) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.337507337116024, LR: 0.0003 +[2026-03-01 21:11:05] (step=0027281) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.3377029935433375, LR: 0.0003 +[2026-03-01 21:11:13] (step=0027282) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.3378986499706516, LR: 0.0003 +[2026-03-01 21:11:20] (step=0027283) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.338094306397966, LR: 0.0003 +[2026-03-01 21:11:28] (step=0027284) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 5.338289962825279, LR: 0.0003 +[2026-03-01 21:11:36] (step=0027285) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 5.338485619252593, LR: 0.0003 +[2026-03-01 21:11:44] (step=0027286) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.338681275679906, LR: 0.0003 +[2026-03-01 21:11:52] (step=0027287) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.33887693210722, LR: 0.0003 +[2026-03-01 21:12:00] (step=0027288) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.339072588534533, LR: 0.0003 +[2026-03-01 21:12:08] (step=0027289) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 5.339268244961847, LR: 0.0003 +[2026-03-01 21:12:15] (step=0027290) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.339463901389161, LR: 0.0003 +[2026-03-01 21:12:23] (step=0027291) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.339659557816474, LR: 0.0003 +[2026-03-01 21:12:31] (step=0027292) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.339855214243788, LR: 0.0003 +[2026-03-01 21:12:39] (step=0027293) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.340050870671101, LR: 0.0003 +[2026-03-01 21:12:47] (step=0027294) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.340246527098415, LR: 0.0003 +[2026-03-01 21:12:55] (step=0027295) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.340442183525729, LR: 0.0003 +[2026-03-01 21:13:03] (step=0027296) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.340637839953042, LR: 0.0003 +[2026-03-01 21:13:10] (step=0027297) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.340833496380356, LR: 0.0003 +[2026-03-01 21:13:18] (step=0027298) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.341029152807669, LR: 0.0003 +[2026-03-01 21:13:26] (step=0027299) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 5.341224809234983, LR: 0.0003 +[2026-03-01 21:13:34] (step=0027300) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.3414204656622974, LR: 0.0003 +[2026-03-01 21:13:42] (step=0027301) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.341616122089611, LR: 0.0003 +[2026-03-01 21:13:50] (step=0027302) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.341811778516925, LR: 0.0003 +[2026-03-01 21:13:58] (step=0027303) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.342007434944238, LR: 0.0003 +[2026-03-01 21:14:05] (step=0027304) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.342203091371552, LR: 0.0003 +[2026-03-01 21:14:13] (step=0027305) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 5.342398747798865, LR: 0.0003 +[2026-03-01 21:14:21] (step=0027306) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.342594404226179, LR: 0.0003 +[2026-03-01 21:14:29] (step=0027307) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.342790060653493, LR: 0.0003 +[2026-03-01 21:14:37] (step=0027308) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.342985717080806, LR: 0.0003 +[2026-03-01 21:14:45] (step=0027309) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.34318137350812, LR: 0.0003 +[2026-03-01 21:14:52] (step=0027310) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 5.343377029935433, LR: 0.0003 +[2026-03-01 21:15:00] (step=0027311) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.343572686362747, LR: 0.0003 +[2026-03-01 21:15:08] (step=0027312) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.343768342790061, LR: 0.0003 +[2026-03-01 21:15:16] (step=0027313) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 5.343963999217374, LR: 0.0003 +[2026-03-01 21:15:24] (step=0027314) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.344159655644688, LR: 0.0003 +[2026-03-01 21:15:32] (step=0027315) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.344355312072001, LR: 0.0003 +[2026-03-01 21:15:40] (step=0027316) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 5.344550968499315, LR: 0.0003 +[2026-03-01 21:15:48] (step=0027317) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.344746624926628, LR: 0.0003 +[2026-03-01 21:15:56] (step=0027318) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.3449422813539424, LR: 0.0003 +[2026-03-01 21:16:03] (step=0027319) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 5.3451379377812565, LR: 0.0003 +[2026-03-01 21:16:11] (step=0027320) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.34533359420857, LR: 0.0003 +[2026-03-01 21:16:19] (step=0027321) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.345529250635884, LR: 0.0003 +[2026-03-01 21:16:27] (step=0027322) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.345724907063197, LR: 0.0003 +[2026-03-01 21:16:35] (step=0027323) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.345920563490511, LR: 0.0003 +[2026-03-01 21:16:43] (step=0027324) Train Loss: 0.4276, Train Steps/Sec: 0.13, Epoch: 5.346116219917825, LR: 0.0003 +[2026-03-01 21:16:51] (step=0027325) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 5.346311876345138, LR: 0.0003 +[2026-03-01 21:16:58] (step=0027326) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.346507532772452, LR: 0.0003 +[2026-03-01 21:17:06] (step=0027327) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 5.346703189199765, LR: 0.0003 +[2026-03-01 21:17:14] (step=0027328) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.346898845627079, LR: 0.0003 +[2026-03-01 21:17:22] (step=0027329) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.347094502054392, LR: 0.0003 +[2026-03-01 21:17:30] (step=0027330) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.347290158481706, LR: 0.0003 +[2026-03-01 21:17:38] (step=0027331) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 5.34748581490902, LR: 0.0003 +[2026-03-01 21:17:46] (step=0027332) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 5.347681471336333, LR: 0.0003 +[2026-03-01 21:17:53] (step=0027333) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 5.347877127763647, LR: 0.0003 +[2026-03-01 21:18:01] (step=0027334) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.34807278419096, LR: 0.0003 +[2026-03-01 21:18:09] (step=0027335) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 5.348268440618274, LR: 0.0003 +[2026-03-01 21:18:17] (step=0027336) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 5.348464097045588, LR: 0.0003 +[2026-03-01 21:18:25] (step=0027337) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 5.3486597534729015, LR: 0.0003 +[2026-03-01 21:18:33] (step=0027338) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.3488554099002155, LR: 0.0003 +[2026-03-01 21:18:40] (step=0027339) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 5.349051066327529, LR: 0.0003 +[2026-03-01 21:18:48] (step=0027340) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.349246722754843, LR: 0.0003 +[2026-03-01 21:18:56] (step=0027341) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 5.349442379182156, LR: 0.0003 +[2026-03-01 21:19:04] (step=0027342) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.34963803560947, LR: 0.0003 +[2026-03-01 21:19:12] (step=0027343) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.349833692036784, LR: 0.0003 +[2026-03-01 21:19:20] (step=0027344) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.350029348464097, LR: 0.0003 +[2026-03-01 21:19:27] (step=0027345) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.350225004891411, LR: 0.0003 +[2026-03-01 21:19:35] (step=0027346) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.350420661318724, LR: 0.0003 +[2026-03-01 21:19:43] (step=0027347) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 5.350616317746038, LR: 0.0003 +[2026-03-01 21:19:51] (step=0027348) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.350811974173352, LR: 0.0003 +[2026-03-01 21:19:59] (step=0027349) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.351007630600665, LR: 0.0003 +[2026-03-01 21:20:07] (step=0027350) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 5.351203287027979, LR: 0.0003 +[2026-03-01 21:20:15] (step=0027351) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.351398943455292, LR: 0.0003 +[2026-03-01 21:20:22] (step=0027352) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 5.351594599882606, LR: 0.0003 +[2026-03-01 21:20:30] (step=0027353) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 5.35179025630992, LR: 0.0003 +[2026-03-01 21:20:38] (step=0027354) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.351985912737233, LR: 0.0003 +[2026-03-01 21:20:46] (step=0027355) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.352181569164547, LR: 0.0003 +[2026-03-01 21:20:54] (step=0027356) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.3523772255918605, LR: 0.0003 +[2026-03-01 21:21:02] (step=0027357) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.3525728820191745, LR: 0.0003 +[2026-03-01 21:21:10] (step=0027358) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.352768538446488, LR: 0.0003 +[2026-03-01 21:21:18] (step=0027359) Train Loss: 0.4520, Train Steps/Sec: 0.12, Epoch: 5.352964194873802, LR: 0.0003 +[2026-03-01 21:21:25] (step=0027360) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.353159851301116, LR: 0.0003 +[2026-03-01 21:21:33] (step=0027361) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.353355507728429, LR: 0.0003 +[2026-03-01 21:21:41] (step=0027362) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.353551164155743, LR: 0.0003 +[2026-03-01 21:21:49] (step=0027363) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.353746820583056, LR: 0.0003 +[2026-03-01 21:21:57] (step=0027364) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.35394247701037, LR: 0.0003 +[2026-03-01 21:22:05] (step=0027365) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.354138133437684, LR: 0.0003 +[2026-03-01 21:22:13] (step=0027366) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.354333789864997, LR: 0.0003 +[2026-03-01 21:22:20] (step=0027367) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.354529446292311, LR: 0.0003 +[2026-03-01 21:22:28] (step=0027368) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 5.354725102719624, LR: 0.0003 +[2026-03-01 21:22:36] (step=0027369) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.354920759146938, LR: 0.0003 +[2026-03-01 21:22:44] (step=0027370) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.355116415574251, LR: 0.0003 +[2026-03-01 21:22:52] (step=0027371) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.355312072001565, LR: 0.0003 +[2026-03-01 21:23:00] (step=0027372) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.355507728428879, LR: 0.0003 +[2026-03-01 21:23:08] (step=0027373) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.355703384856192, LR: 0.0003 +[2026-03-01 21:23:16] (step=0027374) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.355899041283506, LR: 0.0003 +[2026-03-01 21:23:23] (step=0027375) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.3560946977108195, LR: 0.0003 +[2026-03-01 21:23:31] (step=0027376) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.3562903541381335, LR: 0.0003 +[2026-03-01 21:23:39] (step=0027377) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 5.3564860105654475, LR: 0.0003 +[2026-03-01 21:23:47] (step=0027378) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.356681666992761, LR: 0.0003 +[2026-03-01 21:23:55] (step=0027379) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 5.356877323420075, LR: 0.0003 +[2026-03-01 21:24:03] (step=0027380) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 5.357072979847388, LR: 0.0003 +[2026-03-01 21:24:10] (step=0027381) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.357268636274702, LR: 0.0003 +[2026-03-01 21:24:18] (step=0027382) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.357464292702015, LR: 0.0003 +[2026-03-01 21:24:26] (step=0027383) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.357659949129329, LR: 0.0003 +[2026-03-01 21:24:34] (step=0027384) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 5.357855605556643, LR: 0.0003 +[2026-03-01 21:24:42] (step=0027385) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.358051261983956, LR: 0.0003 +[2026-03-01 21:24:50] (step=0027386) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.35824691841127, LR: 0.0003 +[2026-03-01 21:24:58] (step=0027387) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.358442574838583, LR: 0.0003 +[2026-03-01 21:25:05] (step=0027388) Train Loss: 0.4742, Train Steps/Sec: 0.13, Epoch: 5.358638231265897, LR: 0.0003 +[2026-03-01 21:25:13] (step=0027389) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 5.358833887693211, LR: 0.0003 +[2026-03-01 21:25:21] (step=0027390) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.359029544120524, LR: 0.0003 +[2026-03-01 21:25:29] (step=0027391) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 5.359225200547838, LR: 0.0003 +[2026-03-01 21:25:37] (step=0027392) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 5.359420856975151, LR: 0.0003 +[2026-03-01 21:25:45] (step=0027393) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.359616513402465, LR: 0.0003 +[2026-03-01 21:25:53] (step=0027394) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.3598121698297785, LR: 0.0003 +[2026-03-01 21:26:00] (step=0027395) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.3600078262570925, LR: 0.0003 +[2026-03-01 21:26:08] (step=0027396) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 5.3602034826844065, LR: 0.0003 +[2026-03-01 21:26:16] (step=0027397) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 5.36039913911172, LR: 0.0003 +[2026-03-01 21:26:24] (step=0027398) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.360594795539034, LR: 0.0003 +[2026-03-01 21:26:32] (step=0027399) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.360790451966347, LR: 0.0003 +[2026-03-01 21:26:40] (step=0027400) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.360986108393661, LR: 0.0003 +[2026-03-01 21:26:48] (step=0027401) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 5.361181764820975, LR: 0.0003 +[2026-03-01 21:26:55] (step=0027402) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.361377421248288, LR: 0.0003 +[2026-03-01 21:27:03] (step=0027403) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 5.361573077675602, LR: 0.0003 +[2026-03-01 21:27:11] (step=0027404) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 5.361768734102915, LR: 0.0003 +[2026-03-01 21:27:19] (step=0027405) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.361964390530229, LR: 0.0003 +[2026-03-01 21:27:27] (step=0027406) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.362160046957542, LR: 0.0003 +[2026-03-01 21:27:35] (step=0027407) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.362355703384856, LR: 0.0003 +[2026-03-01 21:27:42] (step=0027408) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.36255135981217, LR: 0.0003 +[2026-03-01 21:27:50] (step=0027409) Train Loss: 0.4389, Train Steps/Sec: 0.12, Epoch: 5.362747016239483, LR: 0.0003 +[2026-03-01 21:27:58] (step=0027410) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.362942672666797, LR: 0.0003 +[2026-03-01 21:28:06] (step=0027411) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.36313832909411, LR: 0.0003 +[2026-03-01 21:28:14] (step=0027412) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.363333985521424, LR: 0.0003 +[2026-03-01 21:28:22] (step=0027413) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.363529641948738, LR: 0.0003 +[2026-03-01 21:28:30] (step=0027414) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 5.3637252983760515, LR: 0.0003 +[2026-03-01 21:28:38] (step=0027415) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 5.3639209548033655, LR: 0.0003 +[2026-03-01 21:28:45] (step=0027416) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.364116611230679, LR: 0.0003 +[2026-03-01 21:28:53] (step=0027417) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.364312267657993, LR: 0.0003 +[2026-03-01 21:29:01] (step=0027418) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.364507924085307, LR: 0.0003 +[2026-03-01 21:29:09] (step=0027419) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.36470358051262, LR: 0.0003 +[2026-03-01 21:29:17] (step=0027420) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.364899236939934, LR: 0.0003 +[2026-03-01 21:29:25] (step=0027421) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.365094893367247, LR: 0.0003 +[2026-03-01 21:29:33] (step=0027422) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.365290549794561, LR: 0.0003 +[2026-03-01 21:29:41] (step=0027423) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.365486206221874, LR: 0.0003 +[2026-03-01 21:29:48] (step=0027424) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.365681862649188, LR: 0.0003 +[2026-03-01 21:29:56] (step=0027425) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.365877519076502, LR: 0.0003 +[2026-03-01 21:30:04] (step=0027426) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.366073175503815, LR: 0.0003 +[2026-03-01 21:30:12] (step=0027427) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.366268831931129, LR: 0.0003 +[2026-03-01 21:30:20] (step=0027428) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 5.366464488358442, LR: 0.0003 +[2026-03-01 21:30:28] (step=0027429) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 5.366660144785756, LR: 0.0003 +[2026-03-01 21:30:35] (step=0027430) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.36685580121307, LR: 0.0003 +[2026-03-01 21:30:43] (step=0027431) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.367051457640383, LR: 0.0003 +[2026-03-01 21:30:51] (step=0027432) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.367247114067697, LR: 0.0003 +[2026-03-01 21:30:59] (step=0027433) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.3674427704950105, LR: 0.0003 +[2026-03-01 21:31:07] (step=0027434) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.3676384269223245, LR: 0.0003 +[2026-03-01 21:31:15] (step=0027435) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.367834083349638, LR: 0.0003 +[2026-03-01 21:31:23] (step=0027436) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.368029739776952, LR: 0.0003 +[2026-03-01 21:31:30] (step=0027437) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 5.368225396204266, LR: 0.0003 +[2026-03-01 21:31:38] (step=0027438) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.368421052631579, LR: 0.0003 +[2026-03-01 21:31:46] (step=0027439) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 5.368616709058893, LR: 0.0003 +[2026-03-01 21:31:54] (step=0027440) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.368812365486206, LR: 0.0003 +[2026-03-01 21:32:02] (step=0027441) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.36900802191352, LR: 0.0003 +[2026-03-01 21:32:10] (step=0027442) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.369203678340834, LR: 0.0003 +[2026-03-01 21:32:17] (step=0027443) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.369399334768147, LR: 0.0003 +[2026-03-01 21:32:25] (step=0027444) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.369594991195461, LR: 0.0003 +[2026-03-01 21:32:33] (step=0027445) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.369790647622774, LR: 0.0003 +[2026-03-01 21:32:41] (step=0027446) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.369986304050088, LR: 0.0003 +[2026-03-01 21:32:49] (step=0027447) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.370181960477401, LR: 0.0003 +[2026-03-01 21:32:57] (step=0027448) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.370377616904715, LR: 0.0003 +[2026-03-01 21:33:05] (step=0027449) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.370573273332029, LR: 0.0003 +[2026-03-01 21:33:12] (step=0027450) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 5.370768929759342, LR: 0.0003 +[2026-03-01 21:33:20] (step=0027451) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.370964586186656, LR: 0.0003 +[2026-03-01 21:33:28] (step=0027452) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.3711602426139695, LR: 0.0003 +[2026-03-01 21:33:36] (step=0027453) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 5.3713558990412835, LR: 0.0003 +[2026-03-01 21:33:44] (step=0027454) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.3715515554685975, LR: 0.0003 +[2026-03-01 21:33:52] (step=0027455) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.371747211895911, LR: 0.0003 +[2026-03-01 21:34:00] (step=0027456) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.371942868323225, LR: 0.0003 +[2026-03-01 21:34:08] (step=0027457) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.372138524750538, LR: 0.0003 +[2026-03-01 21:34:15] (step=0027458) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 5.372334181177852, LR: 0.0003 +[2026-03-01 21:34:23] (step=0027459) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.372529837605165, LR: 0.0003 +[2026-03-01 21:34:31] (step=0027460) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.372725494032479, LR: 0.0003 +[2026-03-01 21:34:39] (step=0027461) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.372921150459793, LR: 0.0003 +[2026-03-01 21:34:47] (step=0027462) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.373116806887106, LR: 0.0003 +[2026-03-01 21:34:55] (step=0027463) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 5.37331246331442, LR: 0.0003 +[2026-03-01 21:35:03] (step=0027464) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.373508119741733, LR: 0.0003 +[2026-03-01 21:35:10] (step=0027465) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.373703776169047, LR: 0.0003 +[2026-03-01 21:35:18] (step=0027466) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 5.373899432596361, LR: 0.0003 +[2026-03-01 21:35:26] (step=0027467) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.374095089023674, LR: 0.0003 +[2026-03-01 21:35:34] (step=0027468) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 5.374290745450988, LR: 0.0003 +[2026-03-01 21:35:42] (step=0027469) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.374486401878301, LR: 0.0003 +[2026-03-01 21:35:50] (step=0027470) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.374682058305615, LR: 0.0003 +[2026-03-01 21:35:58] (step=0027471) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.374877714732929, LR: 0.0003 +[2026-03-01 21:36:06] (step=0027472) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.3750733711602425, LR: 0.0003 +[2026-03-01 21:36:13] (step=0027473) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.3752690275875565, LR: 0.0003 +[2026-03-01 21:36:21] (step=0027474) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.37546468401487, LR: 0.0003 +[2026-03-01 21:36:29] (step=0027475) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 5.375660340442184, LR: 0.0003 +[2026-03-01 21:36:37] (step=0027476) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.375855996869497, LR: 0.0003 +[2026-03-01 21:36:45] (step=0027477) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.376051653296811, LR: 0.0003 +[2026-03-01 21:36:53] (step=0027478) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.376247309724125, LR: 0.0003 +[2026-03-01 21:37:01] (step=0027479) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.376442966151438, LR: 0.0003 +[2026-03-01 21:37:08] (step=0027480) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.376638622578752, LR: 0.0003 +[2026-03-01 21:37:16] (step=0027481) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.376834279006065, LR: 0.0003 +[2026-03-01 21:37:24] (step=0027482) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.377029935433379, LR: 0.0003 +[2026-03-01 21:37:32] (step=0027483) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.377225591860693, LR: 0.0003 +[2026-03-01 21:37:40] (step=0027484) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.377421248288006, LR: 0.0003 +[2026-03-01 21:37:48] (step=0027485) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.37761690471532, LR: 0.0003 +[2026-03-01 21:37:55] (step=0027486) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.377812561142633, LR: 0.0003 +[2026-03-01 21:38:03] (step=0027487) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.378008217569947, LR: 0.0003 +[2026-03-01 21:38:11] (step=0027488) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.37820387399726, LR: 0.0003 +[2026-03-01 21:38:19] (step=0027489) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.378399530424574, LR: 0.0003 +[2026-03-01 21:38:27] (step=0027490) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.378595186851888, LR: 0.0003 +[2026-03-01 21:38:35] (step=0027491) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.3787908432792015, LR: 0.0003 +[2026-03-01 21:38:43] (step=0027492) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 5.3789864997065155, LR: 0.0003 +[2026-03-01 21:38:50] (step=0027493) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.379182156133829, LR: 0.0003 +[2026-03-01 21:38:58] (step=0027494) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 5.379377812561143, LR: 0.0003 +[2026-03-01 21:39:06] (step=0027495) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.379573468988457, LR: 0.0003 +[2026-03-01 21:39:14] (step=0027496) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.37976912541577, LR: 0.0003 +[2026-03-01 21:39:22] (step=0027497) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.379964781843084, LR: 0.0003 +[2026-03-01 21:39:30] (step=0027498) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.380160438270397, LR: 0.0003 +[2026-03-01 21:39:38] (step=0027499) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.380356094697711, LR: 0.0003 +[2026-03-01 21:39:45] (step=0027500) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.380551751125024, LR: 0.0003 +[2026-03-01 21:39:45] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0027500/ +[2026-03-01 21:39:53] (step=0027501) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.380747407552338, LR: 0.0003 +[2026-03-01 21:40:01] (step=0027502) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.380943063979652, LR: 0.0003 +[2026-03-01 21:40:09] (step=0027503) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.381138720406965, LR: 0.0003 +[2026-03-01 21:40:17] (step=0027504) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.381334376834279, LR: 0.0003 +[2026-03-01 21:40:25] (step=0027505) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 5.381530033261592, LR: 0.0003 +[2026-03-01 21:40:33] (step=0027506) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.381725689688906, LR: 0.0003 +[2026-03-01 21:40:41] (step=0027507) Train Loss: 0.4410, Train Steps/Sec: 0.12, Epoch: 5.38192134611622, LR: 0.0003 +[2026-03-01 21:40:48] (step=0027508) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.382117002543533, LR: 0.0003 +[2026-03-01 21:40:56] (step=0027509) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.382312658970847, LR: 0.0003 +[2026-03-01 21:41:04] (step=0027510) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.3825083153981605, LR: 0.0003 +[2026-03-01 21:41:12] (step=0027511) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.3827039718254746, LR: 0.0003 +[2026-03-01 21:41:20] (step=0027512) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.382899628252788, LR: 0.0003 +[2026-03-01 21:41:28] (step=0027513) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.383095284680102, LR: 0.0003 +[2026-03-01 21:41:36] (step=0027514) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 5.383290941107416, LR: 0.0003 +[2026-03-01 21:41:43] (step=0027515) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 5.383486597534729, LR: 0.0003 +[2026-03-01 21:41:51] (step=0027516) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.383682253962043, LR: 0.0003 +[2026-03-01 21:41:59] (step=0027517) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.383877910389356, LR: 0.0003 +[2026-03-01 21:42:07] (step=0027518) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 5.38407356681667, LR: 0.0003 +[2026-03-01 21:42:15] (step=0027519) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.384269223243984, LR: 0.0003 +[2026-03-01 21:42:23] (step=0027520) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 5.384464879671297, LR: 0.0003 +[2026-03-01 21:42:31] (step=0027521) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 5.384660536098611, LR: 0.0003 +[2026-03-01 21:42:39] (step=0027522) Train Loss: 0.4415, Train Steps/Sec: 0.12, Epoch: 5.384856192525924, LR: 0.0003 +[2026-03-01 21:42:46] (step=0027523) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 5.385051848953238, LR: 0.0003 +[2026-03-01 21:42:54] (step=0027524) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 5.385247505380552, LR: 0.0003 +[2026-03-01 21:43:02] (step=0027525) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.385443161807865, LR: 0.0003 +[2026-03-01 21:43:10] (step=0027526) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.385638818235179, LR: 0.0003 +[2026-03-01 21:43:18] (step=0027527) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 5.385834474662492, LR: 0.0003 +[2026-03-01 21:43:26] (step=0027528) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.386030131089806, LR: 0.0003 +[2026-03-01 21:43:34] (step=0027529) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 5.38622578751712, LR: 0.0003 +[2026-03-01 21:43:41] (step=0027530) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 5.386421443944434, LR: 0.0003 +[2026-03-01 21:43:49] (step=0027531) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.386617100371748, LR: 0.0003 +[2026-03-01 21:43:57] (step=0027532) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 5.386812756799061, LR: 0.0003 +[2026-03-01 21:44:05] (step=0027533) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.387008413226375, LR: 0.0003 +[2026-03-01 21:44:13] (step=0027534) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 5.387204069653688, LR: 0.0003 +[2026-03-01 21:44:21] (step=0027535) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.387399726081002, LR: 0.0003 +[2026-03-01 21:44:29] (step=0027536) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.387595382508316, LR: 0.0003 +[2026-03-01 21:44:36] (step=0027537) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.387791038935629, LR: 0.0003 +[2026-03-01 21:44:44] (step=0027538) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.387986695362943, LR: 0.0003 +[2026-03-01 21:44:52] (step=0027539) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.388182351790256, LR: 0.0003 +[2026-03-01 21:45:00] (step=0027540) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 5.38837800821757, LR: 0.0003 +[2026-03-01 21:45:08] (step=0027541) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.388573664644883, LR: 0.0003 +[2026-03-01 21:45:16] (step=0027542) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.388769321072197, LR: 0.0003 +[2026-03-01 21:45:24] (step=0027543) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.388964977499511, LR: 0.0003 +[2026-03-01 21:45:31] (step=0027544) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.389160633926824, LR: 0.0003 +[2026-03-01 21:45:39] (step=0027545) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.389356290354138, LR: 0.0003 +[2026-03-01 21:45:47] (step=0027546) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 5.389551946781451, LR: 0.0003 +[2026-03-01 21:45:55] (step=0027547) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.3897476032087654, LR: 0.0003 +[2026-03-01 21:46:03] (step=0027548) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.3899432596360795, LR: 0.0003 +[2026-03-01 21:46:11] (step=0027549) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.390138916063393, LR: 0.0003 +[2026-03-01 21:46:19] (step=0027550) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 5.390334572490707, LR: 0.0003 +[2026-03-01 21:46:26] (step=0027551) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.39053022891802, LR: 0.0003 +[2026-03-01 21:46:34] (step=0027552) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.390725885345334, LR: 0.0003 +[2026-03-01 21:46:42] (step=0027553) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.390921541772647, LR: 0.0003 +[2026-03-01 21:46:50] (step=0027554) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.391117198199961, LR: 0.0003 +[2026-03-01 21:46:58] (step=0027555) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.391312854627275, LR: 0.0003 +[2026-03-01 21:47:06] (step=0027556) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.391508511054588, LR: 0.0003 +[2026-03-01 21:47:14] (step=0027557) Train Loss: 0.4566, Train Steps/Sec: 0.12, Epoch: 5.391704167481902, LR: 0.0003 +[2026-03-01 21:47:22] (step=0027558) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.391899823909215, LR: 0.0003 +[2026-03-01 21:47:29] (step=0027559) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.392095480336529, LR: 0.0003 +[2026-03-01 21:47:37] (step=0027560) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.392291136763843, LR: 0.0003 +[2026-03-01 21:47:45] (step=0027561) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.392486793191156, LR: 0.0003 +[2026-03-01 21:47:53] (step=0027562) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.39268244961847, LR: 0.0003 +[2026-03-01 21:48:01] (step=0027563) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.392878106045783, LR: 0.0003 +[2026-03-01 21:48:09] (step=0027564) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 5.393073762473097, LR: 0.0003 +[2026-03-01 21:48:16] (step=0027565) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.3932694189004105, LR: 0.0003 +[2026-03-01 21:48:24] (step=0027566) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.3934650753277245, LR: 0.0003 +[2026-03-01 21:48:32] (step=0027567) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.3936607317550385, LR: 0.0003 +[2026-03-01 21:48:40] (step=0027568) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.393856388182352, LR: 0.0003 +[2026-03-01 21:48:48] (step=0027569) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.394052044609666, LR: 0.0003 +[2026-03-01 21:48:56] (step=0027570) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 5.394247701036979, LR: 0.0003 +[2026-03-01 21:49:04] (step=0027571) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.394443357464293, LR: 0.0003 +[2026-03-01 21:49:12] (step=0027572) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.394639013891607, LR: 0.0003 +[2026-03-01 21:49:19] (step=0027573) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 5.39483467031892, LR: 0.0003 +[2026-03-01 21:49:27] (step=0027574) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.395030326746234, LR: 0.0003 +[2026-03-01 21:49:35] (step=0027575) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.395225983173547, LR: 0.0003 +[2026-03-01 21:49:43] (step=0027576) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 5.395421639600861, LR: 0.0003 +[2026-03-01 21:49:51] (step=0027577) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.395617296028175, LR: 0.0003 +[2026-03-01 21:49:59] (step=0027578) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.395812952455488, LR: 0.0003 +[2026-03-01 21:50:07] (step=0027579) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.396008608882802, LR: 0.0003 +[2026-03-01 21:50:14] (step=0027580) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.396204265310115, LR: 0.0003 +[2026-03-01 21:50:22] (step=0027581) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.396399921737429, LR: 0.0003 +[2026-03-01 21:50:30] (step=0027582) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.396595578164742, LR: 0.0003 +[2026-03-01 21:50:38] (step=0027583) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.396791234592056, LR: 0.0003 +[2026-03-01 21:50:46] (step=0027584) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.39698689101937, LR: 0.0003 +[2026-03-01 21:50:54] (step=0027585) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 5.3971825474466835, LR: 0.0003 +[2026-03-01 21:51:02] (step=0027586) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.3973782038739975, LR: 0.0003 +[2026-03-01 21:51:09] (step=0027587) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.397573860301311, LR: 0.0003 +[2026-03-01 21:51:17] (step=0027588) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.397769516728625, LR: 0.0003 +[2026-03-01 21:51:25] (step=0027589) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.397965173155939, LR: 0.0003 +[2026-03-01 21:51:33] (step=0027590) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.398160829583252, LR: 0.0003 +[2026-03-01 21:51:41] (step=0027591) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 5.398356486010566, LR: 0.0003 +[2026-03-01 21:51:49] (step=0027592) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.398552142437879, LR: 0.0003 +[2026-03-01 21:51:57] (step=0027593) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.398747798865193, LR: 0.0003 +[2026-03-01 21:52:04] (step=0027594) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.398943455292506, LR: 0.0003 +[2026-03-01 21:52:12] (step=0027595) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.39913911171982, LR: 0.0003 +[2026-03-01 21:52:20] (step=0027596) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.399334768147134, LR: 0.0003 +[2026-03-01 21:52:28] (step=0027597) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 5.399530424574447, LR: 0.0003 +[2026-03-01 21:52:36] (step=0027598) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.399726081001761, LR: 0.0003 +[2026-03-01 21:52:44] (step=0027599) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 5.399921737429074, LR: 0.0003 +[2026-03-01 21:52:51] (step=0027600) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.400117393856388, LR: 0.0003 +[2026-03-01 21:52:59] (step=0027601) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.400313050283702, LR: 0.0003 +[2026-03-01 21:53:07] (step=0027602) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 5.400508706711015, LR: 0.0003 +[2026-03-01 21:53:15] (step=0027603) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.400704363138329, LR: 0.0003 +[2026-03-01 21:53:23] (step=0027604) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.4009000195656425, LR: 0.0003 +[2026-03-01 21:53:31] (step=0027605) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.4010956759929565, LR: 0.0003 +[2026-03-01 21:53:39] (step=0027606) Train Loss: 0.4393, Train Steps/Sec: 0.12, Epoch: 5.40129133242027, LR: 0.0003 +[2026-03-01 21:53:47] (step=0027607) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.401486988847584, LR: 0.0003 +[2026-03-01 21:53:54] (step=0027608) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.401682645274898, LR: 0.0003 +[2026-03-01 21:54:02] (step=0027609) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.401878301702211, LR: 0.0003 +[2026-03-01 21:54:10] (step=0027610) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.402073958129525, LR: 0.0003 +[2026-03-01 21:54:18] (step=0027611) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 5.402269614556838, LR: 0.0003 +[2026-03-01 21:54:26] (step=0027612) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.402465270984152, LR: 0.0003 +[2026-03-01 21:54:34] (step=0027613) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 5.402660927411466, LR: 0.0003 +[2026-03-01 21:54:42] (step=0027614) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.402856583838779, LR: 0.0003 +[2026-03-01 21:54:49] (step=0027615) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.403052240266093, LR: 0.0003 +[2026-03-01 21:54:57] (step=0027616) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.403247896693406, LR: 0.0003 +[2026-03-01 21:55:05] (step=0027617) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.40344355312072, LR: 0.0003 +[2026-03-01 21:55:13] (step=0027618) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.403639209548033, LR: 0.0003 +[2026-03-01 21:55:21] (step=0027619) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.403834865975347, LR: 0.0003 +[2026-03-01 21:55:29] (step=0027620) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 5.404030522402661, LR: 0.0003 +[2026-03-01 21:55:37] (step=0027621) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.404226178829974, LR: 0.0003 +[2026-03-01 21:55:45] (step=0027622) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.404421835257288, LR: 0.0003 +[2026-03-01 21:55:52] (step=0027623) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.4046174916846015, LR: 0.0003 +[2026-03-01 21:56:00] (step=0027624) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.4048131481119155, LR: 0.0003 +[2026-03-01 21:56:08] (step=0027625) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.4050088045392295, LR: 0.0003 +[2026-03-01 21:56:16] (step=0027626) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 5.405204460966543, LR: 0.0003 +[2026-03-01 21:56:24] (step=0027627) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.405400117393857, LR: 0.0003 +[2026-03-01 21:56:32] (step=0027628) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.40559577382117, LR: 0.0003 +[2026-03-01 21:56:39] (step=0027629) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 5.405791430248484, LR: 0.0003 +[2026-03-01 21:56:47] (step=0027630) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 5.405987086675797, LR: 0.0003 +[2026-03-01 21:56:55] (step=0027631) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 5.406182743103111, LR: 0.0003 +[2026-03-01 21:57:03] (step=0027632) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 5.406378399530425, LR: 0.0003 +[2026-03-01 21:57:11] (step=0027633) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.406574055957738, LR: 0.0003 +[2026-03-01 21:57:19] (step=0027634) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 5.406769712385052, LR: 0.0003 +[2026-03-01 21:57:27] (step=0027635) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 5.406965368812365, LR: 0.0003 +[2026-03-01 21:57:34] (step=0027636) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.407161025239679, LR: 0.0003 +[2026-03-01 21:57:42] (step=0027637) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.407356681666993, LR: 0.0003 +[2026-03-01 21:57:50] (step=0027638) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 5.407552338094306, LR: 0.0003 +[2026-03-01 21:57:58] (step=0027639) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.40774799452162, LR: 0.0003 +[2026-03-01 21:58:06] (step=0027640) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 5.407943650948933, LR: 0.0003 +[2026-03-01 21:58:14] (step=0027641) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.408139307376247, LR: 0.0003 +[2026-03-01 21:58:22] (step=0027642) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 5.408334963803561, LR: 0.0003 +[2026-03-01 21:58:29] (step=0027643) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.4085306202308745, LR: 0.0003 +[2026-03-01 21:58:37] (step=0027644) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.4087262766581885, LR: 0.0003 +[2026-03-01 21:58:45] (step=0027645) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.408921933085502, LR: 0.0003 +[2026-03-01 21:58:53] (step=0027646) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 5.409117589512816, LR: 0.0003 +[2026-03-01 21:59:01] (step=0027647) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.409313245940129, LR: 0.0003 +[2026-03-01 21:59:09] (step=0027648) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.409508902367443, LR: 0.0003 +[2026-03-01 21:59:17] (step=0027649) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.409704558794757, LR: 0.0003 +[2026-03-01 21:59:24] (step=0027650) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.40990021522207, LR: 0.0003 +[2026-03-01 21:59:32] (step=0027651) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.410095871649384, LR: 0.0003 +[2026-03-01 21:59:40] (step=0027652) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.410291528076697, LR: 0.0003 +[2026-03-01 21:59:48] (step=0027653) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 5.410487184504011, LR: 0.0003 +[2026-03-01 21:59:56] (step=0027654) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.410682840931325, LR: 0.0003 +[2026-03-01 22:00:04] (step=0027655) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.410878497358638, LR: 0.0003 +[2026-03-01 22:00:11] (step=0027656) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.411074153785952, LR: 0.0003 +[2026-03-01 22:00:19] (step=0027657) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.411269810213265, LR: 0.0003 +[2026-03-01 22:00:27] (step=0027658) Train Loss: 0.4568, Train Steps/Sec: 0.12, Epoch: 5.411465466640579, LR: 0.0003 +[2026-03-01 22:00:35] (step=0027659) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 5.411661123067892, LR: 0.0003 +[2026-03-01 22:00:43] (step=0027660) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.411856779495206, LR: 0.0003 +[2026-03-01 22:00:51] (step=0027661) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 5.41205243592252, LR: 0.0003 +[2026-03-01 22:00:59] (step=0027662) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.4122480923498335, LR: 0.0003 +[2026-03-01 22:01:07] (step=0027663) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.4124437487771475, LR: 0.0003 +[2026-03-01 22:01:14] (step=0027664) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.412639405204461, LR: 0.0003 +[2026-03-01 22:01:22] (step=0027665) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 5.412835061631775, LR: 0.0003 +[2026-03-01 22:01:30] (step=0027666) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 5.413030718059089, LR: 0.0003 +[2026-03-01 22:01:38] (step=0027667) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.413226374486402, LR: 0.0003 +[2026-03-01 22:01:46] (step=0027668) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.413422030913716, LR: 0.0003 +[2026-03-01 22:01:54] (step=0027669) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 5.413617687341029, LR: 0.0003 +[2026-03-01 22:02:02] (step=0027670) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.413813343768343, LR: 0.0003 +[2026-03-01 22:02:10] (step=0027671) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.414009000195656, LR: 0.0003 +[2026-03-01 22:02:17] (step=0027672) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.41420465662297, LR: 0.0003 +[2026-03-01 22:02:25] (step=0027673) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.414400313050284, LR: 0.0003 +[2026-03-01 22:02:33] (step=0027674) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.414595969477597, LR: 0.0003 +[2026-03-01 22:02:41] (step=0027675) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.414791625904911, LR: 0.0003 +[2026-03-01 22:02:49] (step=0027676) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.414987282332224, LR: 0.0003 +[2026-03-01 22:02:57] (step=0027677) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.415182938759538, LR: 0.0003 +[2026-03-01 22:03:04] (step=0027678) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.415378595186852, LR: 0.0003 +[2026-03-01 22:03:12] (step=0027679) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 5.415574251614165, LR: 0.0003 +[2026-03-01 22:03:20] (step=0027680) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.415769908041479, LR: 0.0003 +[2026-03-01 22:03:28] (step=0027681) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.4159655644687925, LR: 0.0003 +[2026-03-01 22:03:36] (step=0027682) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 5.4161612208961065, LR: 0.0003 +[2026-03-01 22:03:44] (step=0027683) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 5.41635687732342, LR: 0.0003 +[2026-03-01 22:03:52] (step=0027684) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.416552533750734, LR: 0.0003 +[2026-03-01 22:03:59] (step=0027685) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.416748190178048, LR: 0.0003 +[2026-03-01 22:04:07] (step=0027686) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.416943846605361, LR: 0.0003 +[2026-03-01 22:04:15] (step=0027687) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.417139503032675, LR: 0.0003 +[2026-03-01 22:04:23] (step=0027688) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.417335159459988, LR: 0.0003 +[2026-03-01 22:04:31] (step=0027689) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.417530815887302, LR: 0.0003 +[2026-03-01 22:04:39] (step=0027690) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.417726472314616, LR: 0.0003 +[2026-03-01 22:04:47] (step=0027691) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.417922128741929, LR: 0.0003 +[2026-03-01 22:04:54] (step=0027692) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 5.418117785169243, LR: 0.0003 +[2026-03-01 22:05:02] (step=0027693) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 5.418313441596556, LR: 0.0003 +[2026-03-01 22:05:10] (step=0027694) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 5.41850909802387, LR: 0.0003 +[2026-03-01 22:05:18] (step=0027695) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.418704754451184, LR: 0.0003 +[2026-03-01 22:05:26] (step=0027696) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.418900410878497, LR: 0.0003 +[2026-03-01 22:05:34] (step=0027697) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.419096067305811, LR: 0.0003 +[2026-03-01 22:05:42] (step=0027698) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.419291723733124, LR: 0.0003 +[2026-03-01 22:05:49] (step=0027699) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.419487380160438, LR: 0.0003 +[2026-03-01 22:05:57] (step=0027700) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.4196830365877515, LR: 0.0003 +[2026-03-01 22:06:05] (step=0027701) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.4198786930150655, LR: 0.0003 +[2026-03-01 22:06:13] (step=0027702) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 5.4200743494423795, LR: 0.0003 +[2026-03-01 22:06:21] (step=0027703) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.420270005869693, LR: 0.0003 +[2026-03-01 22:06:29] (step=0027704) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.420465662297007, LR: 0.0003 +[2026-03-01 22:06:37] (step=0027705) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 5.42066131872432, LR: 0.0003 +[2026-03-01 22:06:45] (step=0027706) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.420856975151634, LR: 0.0003 +[2026-03-01 22:06:52] (step=0027707) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.421052631578948, LR: 0.0003 +[2026-03-01 22:07:00] (step=0027708) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.421248288006261, LR: 0.0003 +[2026-03-01 22:07:08] (step=0027709) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.421443944433575, LR: 0.0003 +[2026-03-01 22:07:16] (step=0027710) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.421639600860888, LR: 0.0003 +[2026-03-01 22:07:24] (step=0027711) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 5.421835257288202, LR: 0.0003 +[2026-03-01 22:07:32] (step=0027712) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.422030913715515, LR: 0.0003 +[2026-03-01 22:07:40] (step=0027713) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 5.422226570142829, LR: 0.0003 +[2026-03-01 22:07:47] (step=0027714) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.422422226570143, LR: 0.0003 +[2026-03-01 22:07:55] (step=0027715) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.422617882997456, LR: 0.0003 +[2026-03-01 22:08:03] (step=0027716) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.42281353942477, LR: 0.0003 +[2026-03-01 22:08:11] (step=0027717) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.423009195852083, LR: 0.0003 +[2026-03-01 22:08:19] (step=0027718) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.423204852279397, LR: 0.0003 +[2026-03-01 22:08:27] (step=0027719) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 5.423400508706711, LR: 0.0003 +[2026-03-01 22:08:35] (step=0027720) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.4235961651340245, LR: 0.0003 +[2026-03-01 22:08:42] (step=0027721) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.4237918215613385, LR: 0.0003 +[2026-03-01 22:08:50] (step=0027722) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.423987477988652, LR: 0.0003 +[2026-03-01 22:08:58] (step=0027723) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 5.424183134415966, LR: 0.0003 +[2026-03-01 22:09:06] (step=0027724) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.424378790843279, LR: 0.0003 +[2026-03-01 22:09:14] (step=0027725) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.424574447270593, LR: 0.0003 +[2026-03-01 22:09:22] (step=0027726) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.424770103697907, LR: 0.0003 +[2026-03-01 22:09:30] (step=0027727) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.42496576012522, LR: 0.0003 +[2026-03-01 22:09:37] (step=0027728) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 5.425161416552534, LR: 0.0003 +[2026-03-01 22:09:45] (step=0027729) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.425357072979847, LR: 0.0003 +[2026-03-01 22:09:53] (step=0027730) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.425552729407161, LR: 0.0003 +[2026-03-01 22:10:01] (step=0027731) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.425748385834475, LR: 0.0003 +[2026-03-01 22:10:09] (step=0027732) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.425944042261788, LR: 0.0003 +[2026-03-01 22:10:17] (step=0027733) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 5.426139698689102, LR: 0.0003 +[2026-03-01 22:10:25] (step=0027734) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.426335355116415, LR: 0.0003 +[2026-03-01 22:10:32] (step=0027735) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 5.426531011543729, LR: 0.0003 +[2026-03-01 22:10:40] (step=0027736) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.426726667971042, LR: 0.0003 +[2026-03-01 22:10:48] (step=0027737) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.426922324398356, LR: 0.0003 +[2026-03-01 22:10:56] (step=0027738) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.42711798082567, LR: 0.0003 +[2026-03-01 22:11:04] (step=0027739) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.4273136372529835, LR: 0.0003 +[2026-03-01 22:11:12] (step=0027740) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.4275092936802976, LR: 0.0003 +[2026-03-01 22:11:20] (step=0027741) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.427704950107611, LR: 0.0003 +[2026-03-01 22:11:28] (step=0027742) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 5.427900606534925, LR: 0.0003 +[2026-03-01 22:11:35] (step=0027743) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.428096262962239, LR: 0.0003 +[2026-03-01 22:11:43] (step=0027744) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 5.428291919389552, LR: 0.0003 +[2026-03-01 22:11:51] (step=0027745) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.428487575816866, LR: 0.0003 +[2026-03-01 22:11:59] (step=0027746) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.428683232244179, LR: 0.0003 +[2026-03-01 22:12:07] (step=0027747) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.428878888671493, LR: 0.0003 +[2026-03-01 22:12:15] (step=0027748) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 5.429074545098807, LR: 0.0003 +[2026-03-01 22:12:23] (step=0027749) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 5.42927020152612, LR: 0.0003 +[2026-03-01 22:12:30] (step=0027750) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.429465857953434, LR: 0.0003 +[2026-03-01 22:12:38] (step=0027751) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.429661514380747, LR: 0.0003 +[2026-03-01 22:12:46] (step=0027752) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.429857170808061, LR: 0.0003 +[2026-03-01 22:12:54] (step=0027753) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.430052827235374, LR: 0.0003 +[2026-03-01 22:13:02] (step=0027754) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 5.430248483662688, LR: 0.0003 +[2026-03-01 22:13:10] (step=0027755) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.430444140090002, LR: 0.0003 +[2026-03-01 22:13:18] (step=0027756) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.430639796517315, LR: 0.0003 +[2026-03-01 22:13:26] (step=0027757) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.430835452944629, LR: 0.0003 +[2026-03-01 22:13:33] (step=0027758) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.4310311093719426, LR: 0.0003 +[2026-03-01 22:13:41] (step=0027759) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 5.431226765799257, LR: 0.0003 +[2026-03-01 22:13:49] (step=0027760) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 5.431422422226571, LR: 0.0003 +[2026-03-01 22:13:57] (step=0027761) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.431618078653884, LR: 0.0003 +[2026-03-01 22:14:05] (step=0027762) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.431813735081198, LR: 0.0003 +[2026-03-01 22:14:13] (step=0027763) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.432009391508511, LR: 0.0003 +[2026-03-01 22:14:21] (step=0027764) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.432205047935825, LR: 0.0003 +[2026-03-01 22:14:28] (step=0027765) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.432400704363138, LR: 0.0003 +[2026-03-01 22:14:36] (step=0027766) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.432596360790452, LR: 0.0003 +[2026-03-01 22:14:44] (step=0027767) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.432792017217766, LR: 0.0003 +[2026-03-01 22:14:52] (step=0027768) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 5.432987673645079, LR: 0.0003 +[2026-03-01 22:15:00] (step=0027769) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 5.433183330072393, LR: 0.0003 +[2026-03-01 22:15:08] (step=0027770) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.433378986499706, LR: 0.0003 +[2026-03-01 22:15:16] (step=0027771) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.43357464292702, LR: 0.0003 +[2026-03-01 22:15:24] (step=0027772) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.433770299354334, LR: 0.0003 +[2026-03-01 22:15:31] (step=0027773) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.433965955781647, LR: 0.0003 +[2026-03-01 22:15:39] (step=0027774) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.434161612208961, LR: 0.0003 +[2026-03-01 22:15:47] (step=0027775) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.434357268636274, LR: 0.0003 +[2026-03-01 22:15:55] (step=0027776) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.4345529250635884, LR: 0.0003 +[2026-03-01 22:16:03] (step=0027777) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 5.434748581490902, LR: 0.0003 +[2026-03-01 22:16:11] (step=0027778) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.434944237918216, LR: 0.0003 +[2026-03-01 22:16:19] (step=0027779) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 5.43513989434553, LR: 0.0003 +[2026-03-01 22:16:26] (step=0027780) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.435335550772843, LR: 0.0003 +[2026-03-01 22:16:34] (step=0027781) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 5.435531207200157, LR: 0.0003 +[2026-03-01 22:16:42] (step=0027782) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.43572686362747, LR: 0.0003 +[2026-03-01 22:16:50] (step=0027783) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.435922520054784, LR: 0.0003 +[2026-03-01 22:16:58] (step=0027784) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.436118176482098, LR: 0.0003 +[2026-03-01 22:17:06] (step=0027785) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 5.436313832909411, LR: 0.0003 +[2026-03-01 22:17:14] (step=0027786) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.436509489336725, LR: 0.0003 +[2026-03-01 22:17:21] (step=0027787) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.436705145764038, LR: 0.0003 +[2026-03-01 22:17:29] (step=0027788) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 5.436900802191352, LR: 0.0003 +[2026-03-01 22:17:37] (step=0027789) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.437096458618665, LR: 0.0003 +[2026-03-01 22:17:45] (step=0027790) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.437292115045979, LR: 0.0003 +[2026-03-01 22:17:53] (step=0027791) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.437487771473293, LR: 0.0003 +[2026-03-01 22:18:01] (step=0027792) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 5.437683427900606, LR: 0.0003 +[2026-03-01 22:18:09] (step=0027793) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.43787908432792, LR: 0.0003 +[2026-03-01 22:18:16] (step=0027794) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.4380747407552334, LR: 0.0003 +[2026-03-01 22:18:24] (step=0027795) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.4382703971825475, LR: 0.0003 +[2026-03-01 22:18:32] (step=0027796) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.4384660536098615, LR: 0.0003 +[2026-03-01 22:18:40] (step=0027797) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 5.438661710037175, LR: 0.0003 +[2026-03-01 22:18:48] (step=0027798) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 5.438857366464489, LR: 0.0003 +[2026-03-01 22:18:56] (step=0027799) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.439053022891802, LR: 0.0003 +[2026-03-01 22:19:04] (step=0027800) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 5.439248679319116, LR: 0.0003 +[2026-03-01 22:19:12] (step=0027801) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.439444335746429, LR: 0.0003 +[2026-03-01 22:19:19] (step=0027802) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.439639992173743, LR: 0.0003 +[2026-03-01 22:19:27] (step=0027803) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.439835648601057, LR: 0.0003 +[2026-03-01 22:19:35] (step=0027804) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.44003130502837, LR: 0.0003 +[2026-03-01 22:19:43] (step=0027805) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.440226961455684, LR: 0.0003 +[2026-03-01 22:19:51] (step=0027806) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.440422617882997, LR: 0.0003 +[2026-03-01 22:19:59] (step=0027807) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.440618274310311, LR: 0.0003 +[2026-03-01 22:20:07] (step=0027808) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.440813930737625, LR: 0.0003 +[2026-03-01 22:20:14] (step=0027809) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.441009587164938, LR: 0.0003 +[2026-03-01 22:20:22] (step=0027810) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.441205243592252, LR: 0.0003 +[2026-03-01 22:20:30] (step=0027811) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.441400900019565, LR: 0.0003 +[2026-03-01 22:20:38] (step=0027812) Train Loss: 0.4386, Train Steps/Sec: 0.12, Epoch: 5.441596556446879, LR: 0.0003 +[2026-03-01 22:20:46] (step=0027813) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.441792212874193, LR: 0.0003 +[2026-03-01 22:20:54] (step=0027814) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 5.4419878693015065, LR: 0.0003 +[2026-03-01 22:21:02] (step=0027815) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.4421835257288205, LR: 0.0003 +[2026-03-01 22:21:10] (step=0027816) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 5.442379182156134, LR: 0.0003 +[2026-03-01 22:21:17] (step=0027817) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.442574838583448, LR: 0.0003 +[2026-03-01 22:21:25] (step=0027818) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.442770495010761, LR: 0.0003 +[2026-03-01 22:21:33] (step=0027819) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 5.442966151438075, LR: 0.0003 +[2026-03-01 22:21:41] (step=0027820) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 5.443161807865389, LR: 0.0003 +[2026-03-01 22:21:49] (step=0027821) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 5.443357464292702, LR: 0.0003 +[2026-03-01 22:21:57] (step=0027822) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 5.443553120720016, LR: 0.0003 +[2026-03-01 22:22:05] (step=0027823) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.443748777147329, LR: 0.0003 +[2026-03-01 22:22:12] (step=0027824) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.443944433574643, LR: 0.0003 +[2026-03-01 22:22:20] (step=0027825) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.444140090001957, LR: 0.0003 +[2026-03-01 22:22:28] (step=0027826) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.44433574642927, LR: 0.0003 +[2026-03-01 22:22:36] (step=0027827) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 5.444531402856584, LR: 0.0003 +[2026-03-01 22:22:44] (step=0027828) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.444727059283897, LR: 0.0003 +[2026-03-01 22:22:52] (step=0027829) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.444922715711211, LR: 0.0003 +[2026-03-01 22:22:59] (step=0027830) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.445118372138524, LR: 0.0003 +[2026-03-01 22:23:07] (step=0027831) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.445314028565838, LR: 0.0003 +[2026-03-01 22:23:15] (step=0027832) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.445509684993152, LR: 0.0003 +[2026-03-01 22:23:23] (step=0027833) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.4457053414204655, LR: 0.0003 +[2026-03-01 22:23:31] (step=0027834) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.4459009978477795, LR: 0.0003 +[2026-03-01 22:23:39] (step=0027835) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.446096654275093, LR: 0.0003 +[2026-03-01 22:23:47] (step=0027836) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.446292310702407, LR: 0.0003 +[2026-03-01 22:23:54] (step=0027837) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.446487967129721, LR: 0.0003 +[2026-03-01 22:24:02] (step=0027838) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 5.446683623557034, LR: 0.0003 +[2026-03-01 22:24:10] (step=0027839) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.446879279984348, LR: 0.0003 +[2026-03-01 22:24:18] (step=0027840) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 5.447074936411661, LR: 0.0003 +[2026-03-01 22:24:26] (step=0027841) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.447270592838975, LR: 0.0003 +[2026-03-01 22:24:34] (step=0027842) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.447466249266288, LR: 0.0003 +[2026-03-01 22:24:42] (step=0027843) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.447661905693602, LR: 0.0003 +[2026-03-01 22:24:49] (step=0027844) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.447857562120916, LR: 0.0003 +[2026-03-01 22:24:57] (step=0027845) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.448053218548229, LR: 0.0003 +[2026-03-01 22:25:05] (step=0027846) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 5.448248874975543, LR: 0.0003 +[2026-03-01 22:25:13] (step=0027847) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.448444531402856, LR: 0.0003 +[2026-03-01 22:25:21] (step=0027848) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.44864018783017, LR: 0.0003 +[2026-03-01 22:25:29] (step=0027849) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 5.448835844257484, LR: 0.0003 +[2026-03-01 22:25:37] (step=0027850) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.449031500684797, LR: 0.0003 +[2026-03-01 22:25:45] (step=0027851) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 5.449227157112111, LR: 0.0003 +[2026-03-01 22:25:52] (step=0027852) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 5.4494228135394245, LR: 0.0003 +[2026-03-01 22:26:00] (step=0027853) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 5.4496184699667385, LR: 0.0003 +[2026-03-01 22:26:08] (step=0027854) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.449814126394052, LR: 0.0003 +[2026-03-01 22:26:16] (step=0027855) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 5.450009782821366, LR: 0.0003 +[2026-03-01 22:26:24] (step=0027856) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 5.45020543924868, LR: 0.0003 +[2026-03-01 22:26:32] (step=0027857) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.450401095675993, LR: 0.0003 +[2026-03-01 22:26:40] (step=0027858) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.450596752103307, LR: 0.0003 +[2026-03-01 22:26:47] (step=0027859) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.45079240853062, LR: 0.0003 +[2026-03-01 22:26:55] (step=0027860) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.450988064957934, LR: 0.0003 +[2026-03-01 22:27:03] (step=0027861) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 5.451183721385248, LR: 0.0003 +[2026-03-01 22:27:11] (step=0027862) Train Loss: 0.4428, Train Steps/Sec: 0.12, Epoch: 5.451379377812561, LR: 0.0003 +[2026-03-01 22:27:19] (step=0027863) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.451575034239875, LR: 0.0003 +[2026-03-01 22:27:27] (step=0027864) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 5.451770690667188, LR: 0.0003 +[2026-03-01 22:27:35] (step=0027865) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.451966347094502, LR: 0.0003 +[2026-03-01 22:27:43] (step=0027866) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.452162003521816, LR: 0.0003 +[2026-03-01 22:27:50] (step=0027867) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 5.452357659949129, LR: 0.0003 +[2026-03-01 22:27:58] (step=0027868) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 5.452553316376443, LR: 0.0003 +[2026-03-01 22:28:06] (step=0027869) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.452748972803756, LR: 0.0003 +[2026-03-01 22:28:14] (step=0027870) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.45294462923107, LR: 0.0003 +[2026-03-01 22:28:22] (step=0027871) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 5.4531402856583835, LR: 0.0003 +[2026-03-01 22:28:30] (step=0027872) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.4533359420856975, LR: 0.0003 +[2026-03-01 22:28:38] (step=0027873) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 5.4535315985130115, LR: 0.0003 +[2026-03-01 22:28:46] (step=0027874) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.453727254940325, LR: 0.0003 +[2026-03-01 22:28:53] (step=0027875) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 5.453922911367639, LR: 0.0003 +[2026-03-01 22:29:01] (step=0027876) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.454118567794952, LR: 0.0003 +[2026-03-01 22:29:09] (step=0027877) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.454314224222266, LR: 0.0003 +[2026-03-01 22:29:17] (step=0027878) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.45450988064958, LR: 0.0003 +[2026-03-01 22:29:25] (step=0027879) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 5.454705537076893, LR: 0.0003 +[2026-03-01 22:29:33] (step=0027880) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.454901193504207, LR: 0.0003 +[2026-03-01 22:29:41] (step=0027881) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.45509684993152, LR: 0.0003 +[2026-03-01 22:29:48] (step=0027882) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 5.455292506358834, LR: 0.0003 +[2026-03-01 22:29:56] (step=0027883) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 5.455488162786147, LR: 0.0003 +[2026-03-01 22:30:04] (step=0027884) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.455683819213461, LR: 0.0003 +[2026-03-01 22:30:12] (step=0027885) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.455879475640775, LR: 0.0003 +[2026-03-01 22:30:20] (step=0027886) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.456075132068088, LR: 0.0003 +[2026-03-01 22:30:28] (step=0027887) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.456270788495402, LR: 0.0003 +[2026-03-01 22:30:36] (step=0027888) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.456466444922715, LR: 0.0003 +[2026-03-01 22:30:43] (step=0027889) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.456662101350029, LR: 0.0003 +[2026-03-01 22:30:51] (step=0027890) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.456857757777343, LR: 0.0003 +[2026-03-01 22:30:59] (step=0027891) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.4570534142046565, LR: 0.0003 +[2026-03-01 22:31:07] (step=0027892) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 5.4572490706319705, LR: 0.0003 +[2026-03-01 22:31:15] (step=0027893) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 5.457444727059284, LR: 0.0003 +[2026-03-01 22:31:23] (step=0027894) Train Loss: 0.4490, Train Steps/Sec: 0.12, Epoch: 5.457640383486598, LR: 0.0003 +[2026-03-01 22:31:31] (step=0027895) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 5.457836039913911, LR: 0.0003 +[2026-03-01 22:31:39] (step=0027896) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.458031696341225, LR: 0.0003 +[2026-03-01 22:31:46] (step=0027897) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.458227352768539, LR: 0.0003 +[2026-03-01 22:31:54] (step=0027898) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.458423009195852, LR: 0.0003 +[2026-03-01 22:32:02] (step=0027899) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.458618665623166, LR: 0.0003 +[2026-03-01 22:32:10] (step=0027900) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 5.458814322050479, LR: 0.0003 +[2026-03-01 22:32:18] (step=0027901) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.459009978477793, LR: 0.0003 +[2026-03-01 22:32:26] (step=0027902) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 5.459205634905107, LR: 0.0003 +[2026-03-01 22:32:34] (step=0027903) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.45940129133242, LR: 0.0003 +[2026-03-01 22:32:41] (step=0027904) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.459596947759734, LR: 0.0003 +[2026-03-01 22:32:49] (step=0027905) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 5.459792604187047, LR: 0.0003 +[2026-03-01 22:32:57] (step=0027906) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 5.459988260614361, LR: 0.0003 +[2026-03-01 22:33:05] (step=0027907) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.460183917041674, LR: 0.0003 +[2026-03-01 22:33:13] (step=0027908) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.460379573468988, LR: 0.0003 +[2026-03-01 22:33:21] (step=0027909) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.460575229896302, LR: 0.0003 +[2026-03-01 22:33:29] (step=0027910) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 5.4607708863236155, LR: 0.0003 +[2026-03-01 22:33:37] (step=0027911) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.4609665427509295, LR: 0.0003 +[2026-03-01 22:33:44] (step=0027912) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.461162199178243, LR: 0.0003 +[2026-03-01 22:33:52] (step=0027913) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.461357855605557, LR: 0.0003 +[2026-03-01 22:34:00] (step=0027914) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.461553512032871, LR: 0.0003 +[2026-03-01 22:34:08] (step=0027915) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.461749168460184, LR: 0.0003 +[2026-03-01 22:34:16] (step=0027916) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 5.461944824887498, LR: 0.0003 +[2026-03-01 22:34:24] (step=0027917) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.462140481314811, LR: 0.0003 +[2026-03-01 22:34:31] (step=0027918) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 5.462336137742125, LR: 0.0003 +[2026-03-01 22:34:39] (step=0027919) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 5.462531794169439, LR: 0.0003 +[2026-03-01 22:34:47] (step=0027920) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.462727450596752, LR: 0.0003 +[2026-03-01 22:34:55] (step=0027921) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.462923107024066, LR: 0.0003 +[2026-03-01 22:35:03] (step=0027922) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.463118763451379, LR: 0.0003 +[2026-03-01 22:35:11] (step=0027923) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.463314419878693, LR: 0.0003 +[2026-03-01 22:35:19] (step=0027924) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.463510076306006, LR: 0.0003 +[2026-03-01 22:35:26] (step=0027925) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 5.46370573273332, LR: 0.0003 +[2026-03-01 22:35:34] (step=0027926) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.463901389160634, LR: 0.0003 +[2026-03-01 22:35:42] (step=0027927) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.464097045587947, LR: 0.0003 +[2026-03-01 22:35:50] (step=0027928) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.464292702015261, LR: 0.0003 +[2026-03-01 22:35:58] (step=0027929) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 5.4644883584425745, LR: 0.0003 +[2026-03-01 22:36:06] (step=0027930) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.4646840148698885, LR: 0.0003 +[2026-03-01 22:36:14] (step=0027931) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.4648796712972025, LR: 0.0003 +[2026-03-01 22:36:21] (step=0027932) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.465075327724516, LR: 0.0003 +[2026-03-01 22:36:29] (step=0027933) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.46527098415183, LR: 0.0003 +[2026-03-01 22:36:37] (step=0027934) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.465466640579143, LR: 0.0003 +[2026-03-01 22:36:45] (step=0027935) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.465662297006457, LR: 0.0003 +[2026-03-01 22:36:53] (step=0027936) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 5.46585795343377, LR: 0.0003 +[2026-03-01 22:37:01] (step=0027937) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.466053609861084, LR: 0.0003 +[2026-03-01 22:37:09] (step=0027938) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 5.466249266288398, LR: 0.0003 +[2026-03-01 22:37:16] (step=0027939) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.466444922715711, LR: 0.0003 +[2026-03-01 22:37:24] (step=0027940) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.466640579143025, LR: 0.0003 +[2026-03-01 22:37:32] (step=0027941) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.466836235570338, LR: 0.0003 +[2026-03-01 22:37:40] (step=0027942) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.467031891997652, LR: 0.0003 +[2026-03-01 22:37:48] (step=0027943) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.467227548424966, LR: 0.0003 +[2026-03-01 22:37:56] (step=0027944) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 5.467423204852279, LR: 0.0003 +[2026-03-01 22:38:04] (step=0027945) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.467618861279593, LR: 0.0003 +[2026-03-01 22:38:12] (step=0027946) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 5.467814517706906, LR: 0.0003 +[2026-03-01 22:38:19] (step=0027947) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.46801017413422, LR: 0.0003 +[2026-03-01 22:38:27] (step=0027948) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.4682058305615335, LR: 0.0003 +[2026-03-01 22:38:35] (step=0027949) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 5.4684014869888475, LR: 0.0003 +[2026-03-01 22:38:43] (step=0027950) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.4685971434161615, LR: 0.0003 +[2026-03-01 22:38:51] (step=0027951) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.468792799843475, LR: 0.0003 +[2026-03-01 22:38:59] (step=0027952) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.468988456270789, LR: 0.0003 +[2026-03-01 22:39:07] (step=0027953) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.469184112698102, LR: 0.0003 +[2026-03-01 22:39:14] (step=0027954) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.469379769125416, LR: 0.0003 +[2026-03-01 22:39:22] (step=0027955) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 5.46957542555273, LR: 0.0003 +[2026-03-01 22:39:30] (step=0027956) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.469771081980043, LR: 0.0003 +[2026-03-01 22:39:38] (step=0027957) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.469966738407357, LR: 0.0003 +[2026-03-01 22:39:46] (step=0027958) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.47016239483467, LR: 0.0003 +[2026-03-01 22:39:54] (step=0027959) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.470358051261984, LR: 0.0003 +[2026-03-01 22:40:02] (step=0027960) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.470553707689297, LR: 0.0003 +[2026-03-01 22:40:09] (step=0027961) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 5.470749364116611, LR: 0.0003 +[2026-03-01 22:40:17] (step=0027962) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.470945020543925, LR: 0.0003 +[2026-03-01 22:40:25] (step=0027963) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.471140676971238, LR: 0.0003 +[2026-03-01 22:40:33] (step=0027964) Train Loss: 0.4682, Train Steps/Sec: 0.13, Epoch: 5.471336333398552, LR: 0.0003 +[2026-03-01 22:40:41] (step=0027965) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.471531989825865, LR: 0.0003 +[2026-03-01 22:40:49] (step=0027966) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.471727646253179, LR: 0.0003 +[2026-03-01 22:40:57] (step=0027967) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 5.471923302680493, LR: 0.0003 +[2026-03-01 22:41:05] (step=0027968) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.4721189591078065, LR: 0.0003 +[2026-03-01 22:41:12] (step=0027969) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.4723146155351206, LR: 0.0003 +[2026-03-01 22:41:20] (step=0027970) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.472510271962434, LR: 0.0003 +[2026-03-01 22:41:28] (step=0027971) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.472705928389748, LR: 0.0003 +[2026-03-01 22:41:36] (step=0027972) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.472901584817062, LR: 0.0003 +[2026-03-01 22:41:44] (step=0027973) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 5.473097241244375, LR: 0.0003 +[2026-03-01 22:41:52] (step=0027974) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 5.473292897671689, LR: 0.0003 +[2026-03-01 22:42:00] (step=0027975) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.473488554099002, LR: 0.0003 +[2026-03-01 22:42:07] (step=0027976) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.473684210526316, LR: 0.0003 +[2026-03-01 22:42:15] (step=0027977) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 5.473879866953629, LR: 0.0003 +[2026-03-01 22:42:23] (step=0027978) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.474075523380943, LR: 0.0003 +[2026-03-01 22:42:31] (step=0027979) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.474271179808257, LR: 0.0003 +[2026-03-01 22:42:39] (step=0027980) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.47446683623557, LR: 0.0003 +[2026-03-01 22:42:47] (step=0027981) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.474662492662884, LR: 0.0003 +[2026-03-01 22:42:55] (step=0027982) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 5.474858149090197, LR: 0.0003 +[2026-03-01 22:43:02] (step=0027983) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 5.475053805517511, LR: 0.0003 +[2026-03-01 22:43:10] (step=0027984) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.475249461944825, LR: 0.0003 +[2026-03-01 22:43:18] (step=0027985) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.475445118372138, LR: 0.0003 +[2026-03-01 22:43:26] (step=0027986) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.475640774799452, LR: 0.0003 +[2026-03-01 22:43:34] (step=0027987) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.4758364312267656, LR: 0.0003 +[2026-03-01 22:43:42] (step=0027988) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.47603208765408, LR: 0.0003 +[2026-03-01 22:43:50] (step=0027989) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.476227744081393, LR: 0.0003 +[2026-03-01 22:43:58] (step=0027990) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.476423400508707, LR: 0.0003 +[2026-03-01 22:44:05] (step=0027991) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.476619056936021, LR: 0.0003 +[2026-03-01 22:44:13] (step=0027992) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.476814713363334, LR: 0.0003 +[2026-03-01 22:44:21] (step=0027993) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.477010369790648, LR: 0.0003 +[2026-03-01 22:44:29] (step=0027994) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.477206026217961, LR: 0.0003 +[2026-03-01 22:44:37] (step=0027995) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.477401682645275, LR: 0.0003 +[2026-03-01 22:44:45] (step=0027996) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 5.477597339072589, LR: 0.0003 +[2026-03-01 22:44:53] (step=0027997) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.477792995499902, LR: 0.0003 +[2026-03-01 22:45:00] (step=0027998) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 5.477988651927216, LR: 0.0003 +[2026-03-01 22:45:08] (step=0027999) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.478184308354529, LR: 0.0003 +[2026-03-01 22:45:16] (step=0028000) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.478379964781843, LR: 0.0003 +[2026-03-01 22:45:16] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0028000/ +[2026-03-01 22:45:24] (step=0028001) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 5.478575621209156, LR: 0.0003 +[2026-03-01 22:45:32] (step=0028002) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.47877127763647, LR: 0.0003 +[2026-03-01 22:45:40] (step=0028003) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.478966934063784, LR: 0.0003 +[2026-03-01 22:45:48] (step=0028004) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.479162590491097, LR: 0.0003 +[2026-03-01 22:45:56] (step=0028005) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.4793582469184114, LR: 0.0003 +[2026-03-01 22:46:03] (step=0028006) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.479553903345725, LR: 0.0003 +[2026-03-01 22:46:11] (step=0028007) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.479749559773039, LR: 0.0003 +[2026-03-01 22:46:19] (step=0028008) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.479945216200353, LR: 0.0003 +[2026-03-01 22:46:27] (step=0028009) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.480140872627666, LR: 0.0003 +[2026-03-01 22:46:35] (step=0028010) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 5.48033652905498, LR: 0.0003 +[2026-03-01 22:46:43] (step=0028011) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.480532185482293, LR: 0.0003 +[2026-03-01 22:46:50] (step=0028012) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.480727841909607, LR: 0.0003 +[2026-03-01 22:46:58] (step=0028013) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.48092349833692, LR: 0.0003 +[2026-03-01 22:47:06] (step=0028014) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.481119154764234, LR: 0.0003 +[2026-03-01 22:47:14] (step=0028015) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.481314811191548, LR: 0.0003 +[2026-03-01 22:47:22] (step=0028016) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.481510467618861, LR: 0.0003 +[2026-03-01 22:47:30] (step=0028017) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.481706124046175, LR: 0.0003 +[2026-03-01 22:47:38] (step=0028018) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.481901780473488, LR: 0.0003 +[2026-03-01 22:47:45] (step=0028019) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.482097436900802, LR: 0.0003 +[2026-03-01 22:47:53] (step=0028020) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.482293093328116, LR: 0.0003 +[2026-03-01 22:48:01] (step=0028021) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 5.482488749755429, LR: 0.0003 +[2026-03-01 22:48:09] (step=0028022) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.482684406182743, LR: 0.0003 +[2026-03-01 22:48:17] (step=0028023) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.4828800626100564, LR: 0.0003 +[2026-03-01 22:48:25] (step=0028024) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.4830757190373705, LR: 0.0003 +[2026-03-01 22:48:33] (step=0028025) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 5.483271375464684, LR: 0.0003 +[2026-03-01 22:48:40] (step=0028026) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.483467031891998, LR: 0.0003 +[2026-03-01 22:48:48] (step=0028027) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.483662688319312, LR: 0.0003 +[2026-03-01 22:48:56] (step=0028028) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 5.483858344746625, LR: 0.0003 +[2026-03-01 22:49:04] (step=0028029) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.484054001173939, LR: 0.0003 +[2026-03-01 22:49:12] (step=0028030) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 5.484249657601252, LR: 0.0003 +[2026-03-01 22:49:20] (step=0028031) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.484445314028566, LR: 0.0003 +[2026-03-01 22:49:28] (step=0028032) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.48464097045588, LR: 0.0003 +[2026-03-01 22:49:35] (step=0028033) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 5.484836626883193, LR: 0.0003 +[2026-03-01 22:49:43] (step=0028034) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 5.485032283310507, LR: 0.0003 +[2026-03-01 22:49:51] (step=0028035) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.48522793973782, LR: 0.0003 +[2026-03-01 22:49:59] (step=0028036) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.485423596165134, LR: 0.0003 +[2026-03-01 22:50:07] (step=0028037) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.485619252592448, LR: 0.0003 +[2026-03-01 22:50:15] (step=0028038) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.485814909019761, LR: 0.0003 +[2026-03-01 22:50:23] (step=0028039) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.486010565447075, LR: 0.0003 +[2026-03-01 22:50:31] (step=0028040) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.486206221874388, LR: 0.0003 +[2026-03-01 22:50:38] (step=0028041) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 5.486401878301702, LR: 0.0003 +[2026-03-01 22:50:46] (step=0028042) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.4865975347290155, LR: 0.0003 +[2026-03-01 22:50:54] (step=0028043) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.4867931911563295, LR: 0.0003 +[2026-03-01 22:51:02] (step=0028044) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.4869888475836435, LR: 0.0003 +[2026-03-01 22:51:10] (step=0028045) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 5.487184504010957, LR: 0.0003 +[2026-03-01 22:51:18] (step=0028046) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.487380160438271, LR: 0.0003 +[2026-03-01 22:51:26] (step=0028047) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 5.487575816865584, LR: 0.0003 +[2026-03-01 22:51:33] (step=0028048) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 5.487771473292898, LR: 0.0003 +[2026-03-01 22:51:41] (step=0028049) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.487967129720212, LR: 0.0003 +[2026-03-01 22:51:49] (step=0028050) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.488162786147525, LR: 0.0003 +[2026-03-01 22:51:57] (step=0028051) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.488358442574839, LR: 0.0003 +[2026-03-01 22:52:05] (step=0028052) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.488554099002152, LR: 0.0003 +[2026-03-01 22:52:13] (step=0028053) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.488749755429466, LR: 0.0003 +[2026-03-01 22:52:21] (step=0028054) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.488945411856779, LR: 0.0003 +[2026-03-01 22:52:28] (step=0028055) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.489141068284093, LR: 0.0003 +[2026-03-01 22:52:36] (step=0028056) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.489336724711407, LR: 0.0003 +[2026-03-01 22:52:44] (step=0028057) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.48953238113872, LR: 0.0003 +[2026-03-01 22:52:52] (step=0028058) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.489728037566034, LR: 0.0003 +[2026-03-01 22:53:00] (step=0028059) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.489923693993347, LR: 0.0003 +[2026-03-01 22:53:08] (step=0028060) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.490119350420661, LR: 0.0003 +[2026-03-01 22:53:16] (step=0028061) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.490315006847975, LR: 0.0003 +[2026-03-01 22:53:23] (step=0028062) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 5.4905106632752885, LR: 0.0003 +[2026-03-01 22:53:31] (step=0028063) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.4907063197026025, LR: 0.0003 +[2026-03-01 22:53:39] (step=0028064) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.490901976129916, LR: 0.0003 +[2026-03-01 22:53:47] (step=0028065) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.49109763255723, LR: 0.0003 +[2026-03-01 22:53:55] (step=0028066) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.491293288984543, LR: 0.0003 +[2026-03-01 22:54:03] (step=0028067) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.491488945411857, LR: 0.0003 +[2026-03-01 22:54:11] (step=0028068) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.491684601839171, LR: 0.0003 +[2026-03-01 22:54:18] (step=0028069) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.491880258266484, LR: 0.0003 +[2026-03-01 22:54:26] (step=0028070) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.492075914693798, LR: 0.0003 +[2026-03-01 22:54:34] (step=0028071) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.492271571121111, LR: 0.0003 +[2026-03-01 22:54:42] (step=0028072) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.492467227548425, LR: 0.0003 +[2026-03-01 22:54:50] (step=0028073) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.492662883975739, LR: 0.0003 +[2026-03-01 22:54:58] (step=0028074) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.492858540403052, LR: 0.0003 +[2026-03-01 22:55:06] (step=0028075) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 5.493054196830366, LR: 0.0003 +[2026-03-01 22:55:13] (step=0028076) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.493249853257679, LR: 0.0003 +[2026-03-01 22:55:21] (step=0028077) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 5.493445509684993, LR: 0.0003 +[2026-03-01 22:55:29] (step=0028078) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 5.493641166112306, LR: 0.0003 +[2026-03-01 22:55:37] (step=0028079) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 5.49383682253962, LR: 0.0003 +[2026-03-01 22:55:45] (step=0028080) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.494032478966934, LR: 0.0003 +[2026-03-01 22:55:53] (step=0028081) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.4942281353942475, LR: 0.0003 +[2026-03-01 22:56:01] (step=0028082) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.4944237918215615, LR: 0.0003 +[2026-03-01 22:56:08] (step=0028083) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.494619448248875, LR: 0.0003 +[2026-03-01 22:56:17] (step=0028084) Train Loss: 0.4536, Train Steps/Sec: 0.12, Epoch: 5.494815104676189, LR: 0.0003 +[2026-03-01 22:56:24] (step=0028085) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.495010761103503, LR: 0.0003 +[2026-03-01 22:56:32] (step=0028086) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.495206417530816, LR: 0.0003 +[2026-03-01 22:56:40] (step=0028087) Train Loss: 0.4258, Train Steps/Sec: 0.13, Epoch: 5.49540207395813, LR: 0.0003 +[2026-03-01 22:56:48] (step=0028088) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.495597730385443, LR: 0.0003 +[2026-03-01 22:56:56] (step=0028089) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.495793386812757, LR: 0.0003 +[2026-03-01 22:57:04] (step=0028090) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 5.495989043240071, LR: 0.0003 +[2026-03-01 22:57:12] (step=0028091) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 5.496184699667384, LR: 0.0003 +[2026-03-01 22:57:19] (step=0028092) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.496380356094698, LR: 0.0003 +[2026-03-01 22:57:27] (step=0028093) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 5.496576012522011, LR: 0.0003 +[2026-03-01 22:57:35] (step=0028094) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.496771668949325, LR: 0.0003 +[2026-03-01 22:57:43] (step=0028095) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.496967325376638, LR: 0.0003 +[2026-03-01 22:57:51] (step=0028096) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.497162981803952, LR: 0.0003 +[2026-03-01 22:57:59] (step=0028097) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.497358638231266, LR: 0.0003 +[2026-03-01 22:58:07] (step=0028098) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.497554294658579, LR: 0.0003 +[2026-03-01 22:58:14] (step=0028099) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 5.497749951085893, LR: 0.0003 +[2026-03-01 22:58:22] (step=0028100) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.4979456075132065, LR: 0.0003 +[2026-03-01 22:58:30] (step=0028101) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.4981412639405205, LR: 0.0003 +[2026-03-01 22:58:38] (step=0028102) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.4983369203678345, LR: 0.0003 +[2026-03-01 22:58:46] (step=0028103) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.498532576795148, LR: 0.0003 +[2026-03-01 22:58:54] (step=0028104) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.498728233222462, LR: 0.0003 +[2026-03-01 22:59:02] (step=0028105) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.498923889649775, LR: 0.0003 +[2026-03-01 22:59:10] (step=0028106) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 5.499119546077089, LR: 0.0003 +[2026-03-01 22:59:17] (step=0028107) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.499315202504402, LR: 0.0003 +[2026-03-01 22:59:25] (step=0028108) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.499510858931716, LR: 0.0003 +[2026-03-01 22:59:33] (step=0028109) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.49970651535903, LR: 0.0003 +[2026-03-01 22:59:41] (step=0028110) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.499902171786343, LR: 0.0003 +[2026-03-01 22:59:49] (step=0028111) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.500097828213657, LR: 0.0003 +[2026-03-01 22:59:57] (step=0028112) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 5.50029348464097, LR: 0.0003 +[2026-03-01 23:00:04] (step=0028113) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 5.500489141068284, LR: 0.0003 +[2026-03-01 23:00:12] (step=0028114) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.500684797495598, LR: 0.0003 +[2026-03-01 23:00:20] (step=0028115) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.500880453922911, LR: 0.0003 +[2026-03-01 23:00:28] (step=0028116) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.501076110350225, LR: 0.0003 +[2026-03-01 23:00:36] (step=0028117) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.501271766777538, LR: 0.0003 +[2026-03-01 23:00:44] (step=0028118) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.501467423204852, LR: 0.0003 +[2026-03-01 23:00:52] (step=0028119) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.5016630796321655, LR: 0.0003 +[2026-03-01 23:00:59] (step=0028120) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.5018587360594795, LR: 0.0003 +[2026-03-01 23:01:07] (step=0028121) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.5020543924867935, LR: 0.0003 +[2026-03-01 23:01:15] (step=0028122) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.502250048914107, LR: 0.0003 +[2026-03-01 23:01:23] (step=0028123) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.502445705341421, LR: 0.0003 +[2026-03-01 23:01:31] (step=0028124) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.502641361768734, LR: 0.0003 +[2026-03-01 23:01:39] (step=0028125) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.502837018196048, LR: 0.0003 +[2026-03-01 23:01:47] (step=0028126) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 5.503032674623362, LR: 0.0003 +[2026-03-01 23:01:54] (step=0028127) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 5.503228331050675, LR: 0.0003 +[2026-03-01 23:02:02] (step=0028128) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 5.503423987477989, LR: 0.0003 +[2026-03-01 23:02:10] (step=0028129) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.503619643905302, LR: 0.0003 +[2026-03-01 23:02:18] (step=0028130) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.503815300332616, LR: 0.0003 +[2026-03-01 23:02:26] (step=0028131) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.504010956759929, LR: 0.0003 +[2026-03-01 23:02:34] (step=0028132) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.504206613187243, LR: 0.0003 +[2026-03-01 23:02:42] (step=0028133) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 5.504402269614557, LR: 0.0003 +[2026-03-01 23:02:50] (step=0028134) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.50459792604187, LR: 0.0003 +[2026-03-01 23:02:57] (step=0028135) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 5.504793582469184, LR: 0.0003 +[2026-03-01 23:03:05] (step=0028136) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.504989238896497, LR: 0.0003 +[2026-03-01 23:03:13] (step=0028137) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.505184895323811, LR: 0.0003 +[2026-03-01 23:03:21] (step=0028138) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.505380551751125, LR: 0.0003 +[2026-03-01 23:03:29] (step=0028139) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.5055762081784385, LR: 0.0003 +[2026-03-01 23:03:37] (step=0028140) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.5057718646057525, LR: 0.0003 +[2026-03-01 23:03:45] (step=0028141) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.505967521033066, LR: 0.0003 +[2026-03-01 23:03:53] (step=0028142) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 5.50616317746038, LR: 0.0003 +[2026-03-01 23:04:00] (step=0028143) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 5.506358833887694, LR: 0.0003 +[2026-03-01 23:04:08] (step=0028144) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.506554490315007, LR: 0.0003 +[2026-03-01 23:04:16] (step=0028145) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.506750146742321, LR: 0.0003 +[2026-03-01 23:04:24] (step=0028146) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.506945803169634, LR: 0.0003 +[2026-03-01 23:04:32] (step=0028147) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.507141459596948, LR: 0.0003 +[2026-03-01 23:04:40] (step=0028148) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.507337116024261, LR: 0.0003 +[2026-03-01 23:04:48] (step=0028149) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 5.507532772451575, LR: 0.0003 +[2026-03-01 23:04:55] (step=0028150) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 5.507728428878889, LR: 0.0003 +[2026-03-01 23:05:03] (step=0028151) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.507924085306202, LR: 0.0003 +[2026-03-01 23:05:11] (step=0028152) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.508119741733516, LR: 0.0003 +[2026-03-01 23:05:19] (step=0028153) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.508315398160829, LR: 0.0003 +[2026-03-01 23:05:27] (step=0028154) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.508511054588143, LR: 0.0003 +[2026-03-01 23:05:35] (step=0028155) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 5.508706711015457, LR: 0.0003 +[2026-03-01 23:05:43] (step=0028156) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.50890236744277, LR: 0.0003 +[2026-03-01 23:05:51] (step=0028157) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.509098023870084, LR: 0.0003 +[2026-03-01 23:05:58] (step=0028158) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 5.5092936802973975, LR: 0.0003 +[2026-03-01 23:06:06] (step=0028159) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.5094893367247115, LR: 0.0003 +[2026-03-01 23:06:14] (step=0028160) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.509684993152025, LR: 0.0003 +[2026-03-01 23:06:22] (step=0028161) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.509880649579339, LR: 0.0003 +[2026-03-01 23:06:30] (step=0028162) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.510076306006653, LR: 0.0003 +[2026-03-01 23:06:38] (step=0028163) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 5.510271962433966, LR: 0.0003 +[2026-03-01 23:06:45] (step=0028164) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.51046761886128, LR: 0.0003 +[2026-03-01 23:06:53] (step=0028165) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.510663275288593, LR: 0.0003 +[2026-03-01 23:07:01] (step=0028166) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.510858931715907, LR: 0.0003 +[2026-03-01 23:07:09] (step=0028167) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 5.511054588143221, LR: 0.0003 +[2026-03-01 23:07:17] (step=0028168) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.511250244570534, LR: 0.0003 +[2026-03-01 23:07:25] (step=0028169) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.511445900997848, LR: 0.0003 +[2026-03-01 23:07:33] (step=0028170) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 5.511641557425161, LR: 0.0003 +[2026-03-01 23:07:40] (step=0028171) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.511837213852475, LR: 0.0003 +[2026-03-01 23:07:48] (step=0028172) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 5.512032870279788, LR: 0.0003 +[2026-03-01 23:07:56] (step=0028173) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.512228526707102, LR: 0.0003 +[2026-03-01 23:08:04] (step=0028174) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 5.512424183134416, LR: 0.0003 +[2026-03-01 23:08:12] (step=0028175) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.512619839561729, LR: 0.0003 +[2026-03-01 23:08:20] (step=0028176) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.512815495989043, LR: 0.0003 +[2026-03-01 23:08:28] (step=0028177) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.5130111524163565, LR: 0.0003 +[2026-03-01 23:08:35] (step=0028178) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.5132068088436705, LR: 0.0003 +[2026-03-01 23:08:43] (step=0028179) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 5.5134024652709845, LR: 0.0003 +[2026-03-01 23:08:51] (step=0028180) Train Loss: 0.4498, Train Steps/Sec: 0.12, Epoch: 5.513598121698298, LR: 0.0003 +[2026-03-01 23:08:59] (step=0028181) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 5.513793778125612, LR: 0.0003 +[2026-03-01 23:09:07] (step=0028182) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 5.513989434552925, LR: 0.0003 +[2026-03-01 23:09:15] (step=0028183) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.514185090980239, LR: 0.0003 +[2026-03-01 23:09:23] (step=0028184) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.514380747407552, LR: 0.0003 +[2026-03-01 23:09:31] (step=0028185) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.514576403834866, LR: 0.0003 +[2026-03-01 23:09:38] (step=0028186) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.51477206026218, LR: 0.0003 +[2026-03-01 23:09:46] (step=0028187) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.514967716689493, LR: 0.0003 +[2026-03-01 23:09:54] (step=0028188) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.515163373116807, LR: 0.0003 +[2026-03-01 23:10:02] (step=0028189) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.51535902954412, LR: 0.0003 +[2026-03-01 23:10:10] (step=0028190) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.515554685971434, LR: 0.0003 +[2026-03-01 23:10:18] (step=0028191) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.515750342398748, LR: 0.0003 +[2026-03-01 23:10:26] (step=0028192) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.515945998826061, LR: 0.0003 +[2026-03-01 23:10:33] (step=0028193) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 5.516141655253375, LR: 0.0003 +[2026-03-01 23:10:41] (step=0028194) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.516337311680688, LR: 0.0003 +[2026-03-01 23:10:49] (step=0028195) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.516532968108002, LR: 0.0003 +[2026-03-01 23:10:57] (step=0028196) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.516728624535316, LR: 0.0003 +[2026-03-01 23:11:05] (step=0028197) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.5169242809626295, LR: 0.0003 +[2026-03-01 23:11:13] (step=0028198) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 5.5171199373899436, LR: 0.0003 +[2026-03-01 23:11:21] (step=0028199) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.517315593817257, LR: 0.0003 +[2026-03-01 23:11:29] (step=0028200) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 5.517511250244571, LR: 0.0003 +[2026-03-01 23:11:36] (step=0028201) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 5.517706906671884, LR: 0.0003 +[2026-03-01 23:11:44] (step=0028202) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.517902563099198, LR: 0.0003 +[2026-03-01 23:11:52] (step=0028203) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.518098219526512, LR: 0.0003 +[2026-03-01 23:12:00] (step=0028204) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.518293875953825, LR: 0.0003 +[2026-03-01 23:12:08] (step=0028205) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 5.518489532381139, LR: 0.0003 +[2026-03-01 23:12:16] (step=0028206) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.518685188808452, LR: 0.0003 +[2026-03-01 23:12:23] (step=0028207) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.518880845235766, LR: 0.0003 +[2026-03-01 23:12:31] (step=0028208) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.51907650166308, LR: 0.0003 +[2026-03-01 23:12:39] (step=0028209) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.519272158090393, LR: 0.0003 +[2026-03-01 23:12:47] (step=0028210) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.519467814517707, LR: 0.0003 +[2026-03-01 23:12:55] (step=0028211) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 5.51966347094502, LR: 0.0003 +[2026-03-01 23:13:03] (step=0028212) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.519859127372334, LR: 0.0003 +[2026-03-01 23:13:11] (step=0028213) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.520054783799647, LR: 0.0003 +[2026-03-01 23:13:18] (step=0028214) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.520250440226961, LR: 0.0003 +[2026-03-01 23:13:26] (step=0028215) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.520446096654275, LR: 0.0003 +[2026-03-01 23:13:34] (step=0028216) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.5206417530815886, LR: 0.0003 +[2026-03-01 23:13:42] (step=0028217) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.520837409508903, LR: 0.0003 +[2026-03-01 23:13:50] (step=0028218) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.521033065936216, LR: 0.0003 +[2026-03-01 23:13:58] (step=0028219) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.52122872236353, LR: 0.0003 +[2026-03-01 23:14:06] (step=0028220) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.521424378790844, LR: 0.0003 +[2026-03-01 23:14:13] (step=0028221) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.521620035218157, LR: 0.0003 +[2026-03-01 23:14:21] (step=0028222) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.521815691645471, LR: 0.0003 +[2026-03-01 23:14:29] (step=0028223) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.522011348072784, LR: 0.0003 +[2026-03-01 23:14:37] (step=0028224) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.522207004500098, LR: 0.0003 +[2026-03-01 23:14:45] (step=0028225) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.522402660927411, LR: 0.0003 +[2026-03-01 23:14:53] (step=0028226) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.522598317354725, LR: 0.0003 +[2026-03-01 23:15:01] (step=0028227) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.522793973782039, LR: 0.0003 +[2026-03-01 23:15:08] (step=0028228) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.522989630209352, LR: 0.0003 +[2026-03-01 23:15:16] (step=0028229) Train Loss: 0.4500, Train Steps/Sec: 0.12, Epoch: 5.523185286636666, LR: 0.0003 +[2026-03-01 23:15:24] (step=0028230) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.523380943063979, LR: 0.0003 +[2026-03-01 23:15:32] (step=0028231) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.523576599491293, LR: 0.0003 +[2026-03-01 23:15:40] (step=0028232) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.523772255918607, LR: 0.0003 +[2026-03-01 23:15:48] (step=0028233) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.52396791234592, LR: 0.0003 +[2026-03-01 23:15:56] (step=0028234) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.5241635687732344, LR: 0.0003 +[2026-03-01 23:16:04] (step=0028235) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.524359225200548, LR: 0.0003 +[2026-03-01 23:16:11] (step=0028236) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.524554881627862, LR: 0.0003 +[2026-03-01 23:16:19] (step=0028237) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.524750538055175, LR: 0.0003 +[2026-03-01 23:16:27] (step=0028238) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 5.524946194482489, LR: 0.0003 +[2026-03-01 23:16:35] (step=0028239) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 5.525141850909803, LR: 0.0003 +[2026-03-01 23:16:43] (step=0028240) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.525337507337116, LR: 0.0003 +[2026-03-01 23:16:51] (step=0028241) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 5.52553316376443, LR: 0.0003 +[2026-03-01 23:16:59] (step=0028242) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.525728820191743, LR: 0.0003 +[2026-03-01 23:17:06] (step=0028243) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.525924476619057, LR: 0.0003 +[2026-03-01 23:17:14] (step=0028244) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.526120133046371, LR: 0.0003 +[2026-03-01 23:17:22] (step=0028245) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.526315789473684, LR: 0.0003 +[2026-03-01 23:17:30] (step=0028246) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.526511445900998, LR: 0.0003 +[2026-03-01 23:17:38] (step=0028247) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.526707102328311, LR: 0.0003 +[2026-03-01 23:17:46] (step=0028248) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.526902758755625, LR: 0.0003 +[2026-03-01 23:17:54] (step=0028249) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 5.527098415182938, LR: 0.0003 +[2026-03-01 23:18:02] (step=0028250) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.527294071610252, LR: 0.0003 +[2026-03-01 23:18:09] (step=0028251) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 5.527489728037566, LR: 0.0003 +[2026-03-01 23:18:17] (step=0028252) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 5.5276853844648794, LR: 0.0003 +[2026-03-01 23:18:25] (step=0028253) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.5278810408921935, LR: 0.0003 +[2026-03-01 23:18:33] (step=0028254) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.528076697319507, LR: 0.0003 +[2026-03-01 23:18:41] (step=0028255) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.528272353746821, LR: 0.0003 +[2026-03-01 23:18:49] (step=0028256) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.528468010174135, LR: 0.0003 +[2026-03-01 23:18:57] (step=0028257) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.528663666601448, LR: 0.0003 +[2026-03-01 23:19:04] (step=0028258) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.528859323028762, LR: 0.0003 +[2026-03-01 23:19:12] (step=0028259) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.529054979456075, LR: 0.0003 +[2026-03-01 23:19:20] (step=0028260) Train Loss: 0.4230, Train Steps/Sec: 0.13, Epoch: 5.529250635883389, LR: 0.0003 +[2026-03-01 23:19:28] (step=0028261) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.529446292310703, LR: 0.0003 +[2026-03-01 23:19:36] (step=0028262) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 5.529641948738016, LR: 0.0003 +[2026-03-01 23:19:44] (step=0028263) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 5.52983760516533, LR: 0.0003 +[2026-03-01 23:19:52] (step=0028264) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.530033261592643, LR: 0.0003 +[2026-03-01 23:19:59] (step=0028265) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 5.530228918019957, LR: 0.0003 +[2026-03-01 23:20:07] (step=0028266) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.53042457444727, LR: 0.0003 +[2026-03-01 23:20:15] (step=0028267) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.530620230874584, LR: 0.0003 +[2026-03-01 23:20:23] (step=0028268) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.530815887301898, LR: 0.0003 +[2026-03-01 23:20:31] (step=0028269) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.531011543729211, LR: 0.0003 +[2026-03-01 23:20:39] (step=0028270) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.531207200156525, LR: 0.0003 +[2026-03-01 23:20:47] (step=0028271) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.5314028565838385, LR: 0.0003 +[2026-03-01 23:20:54] (step=0028272) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.5315985130111525, LR: 0.0003 +[2026-03-01 23:21:02] (step=0028273) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.5317941694384665, LR: 0.0003 +[2026-03-01 23:21:10] (step=0028274) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.53198982586578, LR: 0.0003 +[2026-03-01 23:21:18] (step=0028275) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.532185482293094, LR: 0.0003 +[2026-03-01 23:21:26] (step=0028276) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.532381138720407, LR: 0.0003 +[2026-03-01 23:21:34] (step=0028277) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.532576795147721, LR: 0.0003 +[2026-03-01 23:21:42] (step=0028278) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.532772451575034, LR: 0.0003 +[2026-03-01 23:21:49] (step=0028279) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.532968108002348, LR: 0.0003 +[2026-03-01 23:21:57] (step=0028280) Train Loss: 0.4578, Train Steps/Sec: 0.12, Epoch: 5.533163764429662, LR: 0.0003 +[2026-03-01 23:22:05] (step=0028281) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 5.533359420856975, LR: 0.0003 +[2026-03-01 23:22:13] (step=0028282) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 5.533555077284289, LR: 0.0003 +[2026-03-01 23:22:21] (step=0028283) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.533750733711602, LR: 0.0003 +[2026-03-01 23:22:29] (step=0028284) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.533946390138916, LR: 0.0003 +[2026-03-01 23:22:37] (step=0028285) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.53414204656623, LR: 0.0003 +[2026-03-01 23:22:45] (step=0028286) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.534337702993543, LR: 0.0003 +[2026-03-01 23:22:52] (step=0028287) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.534533359420857, LR: 0.0003 +[2026-03-01 23:23:00] (step=0028288) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.53472901584817, LR: 0.0003 +[2026-03-01 23:23:08] (step=0028289) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 5.534924672275484, LR: 0.0003 +[2026-03-01 23:23:16] (step=0028290) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 5.5351203287027975, LR: 0.0003 +[2026-03-01 23:23:24] (step=0028291) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.5353159851301115, LR: 0.0003 +[2026-03-01 23:23:32] (step=0028292) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.5355116415574255, LR: 0.0003 +[2026-03-01 23:23:40] (step=0028293) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.535707297984739, LR: 0.0003 +[2026-03-01 23:23:48] (step=0028294) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.535902954412053, LR: 0.0003 +[2026-03-01 23:23:55] (step=0028295) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.536098610839366, LR: 0.0003 +[2026-03-01 23:24:03] (step=0028296) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.53629426726668, LR: 0.0003 +[2026-03-01 23:24:11] (step=0028297) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.536489923693994, LR: 0.0003 +[2026-03-01 23:24:19] (step=0028298) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.536685580121307, LR: 0.0003 +[2026-03-01 23:24:27] (step=0028299) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 5.536881236548621, LR: 0.0003 +[2026-03-01 23:24:35] (step=0028300) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.537076892975934, LR: 0.0003 +[2026-03-01 23:24:43] (step=0028301) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.537272549403248, LR: 0.0003 +[2026-03-01 23:24:50] (step=0028302) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.537468205830561, LR: 0.0003 +[2026-03-01 23:24:58] (step=0028303) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.537663862257875, LR: 0.0003 +[2026-03-01 23:25:06] (step=0028304) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.537859518685189, LR: 0.0003 +[2026-03-01 23:25:14] (step=0028305) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.538055175112502, LR: 0.0003 +[2026-03-01 23:25:22] (step=0028306) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.538250831539816, LR: 0.0003 +[2026-03-01 23:25:30] (step=0028307) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.538446487967129, LR: 0.0003 +[2026-03-01 23:25:38] (step=0028308) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 5.538642144394443, LR: 0.0003 +[2026-03-01 23:25:45] (step=0028309) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.538837800821757, LR: 0.0003 +[2026-03-01 23:25:53] (step=0028310) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.5390334572490705, LR: 0.0003 +[2026-03-01 23:26:01] (step=0028311) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.5392291136763845, LR: 0.0003 +[2026-03-01 23:26:09] (step=0028312) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.539424770103698, LR: 0.0003 +[2026-03-01 23:26:17] (step=0028313) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.539620426531012, LR: 0.0003 +[2026-03-01 23:26:25] (step=0028314) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 5.539816082958326, LR: 0.0003 +[2026-03-01 23:26:33] (step=0028315) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.540011739385639, LR: 0.0003 +[2026-03-01 23:26:40] (step=0028316) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.540207395812953, LR: 0.0003 +[2026-03-01 23:26:48] (step=0028317) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 5.540403052240266, LR: 0.0003 +[2026-03-01 23:26:56] (step=0028318) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.54059870866758, LR: 0.0003 +[2026-03-01 23:27:04] (step=0028319) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.540794365094893, LR: 0.0003 +[2026-03-01 23:27:12] (step=0028320) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.540990021522207, LR: 0.0003 +[2026-03-01 23:27:20] (step=0028321) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 5.541185677949521, LR: 0.0003 +[2026-03-01 23:27:28] (step=0028322) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.541381334376834, LR: 0.0003 +[2026-03-01 23:27:35] (step=0028323) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.541576990804148, LR: 0.0003 +[2026-03-01 23:27:43] (step=0028324) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 5.541772647231461, LR: 0.0003 +[2026-03-01 23:27:51] (step=0028325) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.541968303658775, LR: 0.0003 +[2026-03-01 23:27:59] (step=0028326) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 5.542163960086089, LR: 0.0003 +[2026-03-01 23:28:07] (step=0028327) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.542359616513402, LR: 0.0003 +[2026-03-01 23:28:15] (step=0028328) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.542555272940716, LR: 0.0003 +[2026-03-01 23:28:23] (step=0028329) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.5427509293680295, LR: 0.0003 +[2026-03-01 23:28:31] (step=0028330) Train Loss: 0.4393, Train Steps/Sec: 0.12, Epoch: 5.5429465857953435, LR: 0.0003 +[2026-03-01 23:28:38] (step=0028331) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.543142242222657, LR: 0.0003 +[2026-03-01 23:28:46] (step=0028332) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.543337898649971, LR: 0.0003 +[2026-03-01 23:28:54] (step=0028333) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.543533555077285, LR: 0.0003 +[2026-03-01 23:29:02] (step=0028334) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.543729211504598, LR: 0.0003 +[2026-03-01 23:29:10] (step=0028335) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.543924867931912, LR: 0.0003 +[2026-03-01 23:29:18] (step=0028336) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.544120524359225, LR: 0.0003 +[2026-03-01 23:29:26] (step=0028337) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.544316180786539, LR: 0.0003 +[2026-03-01 23:29:33] (step=0028338) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.544511837213853, LR: 0.0003 +[2026-03-01 23:29:41] (step=0028339) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 5.544707493641166, LR: 0.0003 +[2026-03-01 23:29:49] (step=0028340) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.54490315006848, LR: 0.0003 +[2026-03-01 23:29:57] (step=0028341) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 5.545098806495793, LR: 0.0003 +[2026-03-01 23:30:05] (step=0028342) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 5.545294462923107, LR: 0.0003 +[2026-03-01 23:30:13] (step=0028343) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.54549011935042, LR: 0.0003 +[2026-03-01 23:30:21] (step=0028344) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.545685775777734, LR: 0.0003 +[2026-03-01 23:30:29] (step=0028345) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 5.545881432205048, LR: 0.0003 +[2026-03-01 23:30:36] (step=0028346) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.546077088632361, LR: 0.0003 +[2026-03-01 23:30:44] (step=0028347) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.546272745059675, LR: 0.0003 +[2026-03-01 23:30:52] (step=0028348) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.5464684014869885, LR: 0.0003 +[2026-03-01 23:31:00] (step=0028349) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.5466640579143025, LR: 0.0003 +[2026-03-01 23:31:08] (step=0028350) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.5468597143416165, LR: 0.0003 +[2026-03-01 23:31:16] (step=0028351) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 5.54705537076893, LR: 0.0003 +[2026-03-01 23:31:23] (step=0028352) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.547251027196244, LR: 0.0003 +[2026-03-01 23:31:31] (step=0028353) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.547446683623557, LR: 0.0003 +[2026-03-01 23:31:39] (step=0028354) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.547642340050871, LR: 0.0003 +[2026-03-01 23:31:47] (step=0028355) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.547837996478184, LR: 0.0003 +[2026-03-01 23:31:55] (step=0028356) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.548033652905498, LR: 0.0003 +[2026-03-01 23:32:03] (step=0028357) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.548229309332812, LR: 0.0003 +[2026-03-01 23:32:11] (step=0028358) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 5.548424965760125, LR: 0.0003 +[2026-03-01 23:32:18] (step=0028359) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 5.548620622187439, LR: 0.0003 +[2026-03-01 23:32:26] (step=0028360) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 5.548816278614752, LR: 0.0003 +[2026-03-01 23:32:34] (step=0028361) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.549011935042066, LR: 0.0003 +[2026-03-01 23:32:42] (step=0028362) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.54920759146938, LR: 0.0003 +[2026-03-01 23:32:50] (step=0028363) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 5.549403247896693, LR: 0.0003 +[2026-03-01 23:32:58] (step=0028364) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.549598904324007, LR: 0.0003 +[2026-03-01 23:33:06] (step=0028365) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 5.54979456075132, LR: 0.0003 +[2026-03-01 23:33:13] (step=0028366) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.549990217178634, LR: 0.0003 +[2026-03-01 23:33:21] (step=0028367) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.550185873605948, LR: 0.0003 +[2026-03-01 23:33:29] (step=0028368) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.5503815300332615, LR: 0.0003 +[2026-03-01 23:33:37] (step=0028369) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.5505771864605755, LR: 0.0003 +[2026-03-01 23:33:45] (step=0028370) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.550772842887889, LR: 0.0003 +[2026-03-01 23:33:53] (step=0028371) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 5.550968499315203, LR: 0.0003 +[2026-03-01 23:34:01] (step=0028372) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.551164155742516, LR: 0.0003 +[2026-03-01 23:34:08] (step=0028373) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 5.55135981216983, LR: 0.0003 +[2026-03-01 23:34:16] (step=0028374) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 5.551555468597144, LR: 0.0003 +[2026-03-01 23:34:24] (step=0028375) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 5.551751125024457, LR: 0.0003 +[2026-03-01 23:34:32] (step=0028376) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.551946781451771, LR: 0.0003 +[2026-03-01 23:34:40] (step=0028377) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.552142437879084, LR: 0.0003 +[2026-03-01 23:34:48] (step=0028378) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.552338094306398, LR: 0.0003 +[2026-03-01 23:34:56] (step=0028379) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.552533750733712, LR: 0.0003 +[2026-03-01 23:35:03] (step=0028380) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.552729407161025, LR: 0.0003 +[2026-03-01 23:35:11] (step=0028381) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 5.552925063588339, LR: 0.0003 +[2026-03-01 23:35:19] (step=0028382) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.553120720015652, LR: 0.0003 +[2026-03-01 23:35:27] (step=0028383) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.553316376442966, LR: 0.0003 +[2026-03-01 23:35:35] (step=0028384) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.553512032870279, LR: 0.0003 +[2026-03-01 23:35:43] (step=0028385) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.553707689297593, LR: 0.0003 +[2026-03-01 23:35:51] (step=0028386) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.553903345724907, LR: 0.0003 +[2026-03-01 23:35:59] (step=0028387) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 5.5540990021522205, LR: 0.0003 +[2026-03-01 23:36:06] (step=0028388) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.5542946585795345, LR: 0.0003 +[2026-03-01 23:36:14] (step=0028389) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.554490315006848, LR: 0.0003 +[2026-03-01 23:36:22] (step=0028390) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 5.554685971434162, LR: 0.0003 +[2026-03-01 23:36:30] (step=0028391) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 5.554881627861476, LR: 0.0003 +[2026-03-01 23:36:38] (step=0028392) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.555077284288789, LR: 0.0003 +[2026-03-01 23:36:46] (step=0028393) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.555272940716103, LR: 0.0003 +[2026-03-01 23:36:54] (step=0028394) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 5.555468597143416, LR: 0.0003 +[2026-03-01 23:37:02] (step=0028395) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.55566425357073, LR: 0.0003 +[2026-03-01 23:37:09] (step=0028396) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 5.555859909998043, LR: 0.0003 +[2026-03-01 23:37:17] (step=0028397) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.556055566425357, LR: 0.0003 +[2026-03-01 23:37:25] (step=0028398) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.556251222852671, LR: 0.0003 +[2026-03-01 23:37:33] (step=0028399) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.556446879279984, LR: 0.0003 +[2026-03-01 23:37:41] (step=0028400) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.556642535707298, LR: 0.0003 +[2026-03-01 23:37:49] (step=0028401) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.556838192134611, LR: 0.0003 +[2026-03-01 23:37:57] (step=0028402) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.557033848561925, LR: 0.0003 +[2026-03-01 23:38:04] (step=0028403) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.557229504989239, LR: 0.0003 +[2026-03-01 23:38:12] (step=0028404) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.557425161416552, LR: 0.0003 +[2026-03-01 23:38:20] (step=0028405) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 5.557620817843866, LR: 0.0003 +[2026-03-01 23:38:28] (step=0028406) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.5578164742711795, LR: 0.0003 +[2026-03-01 23:38:36] (step=0028407) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.5580121306984935, LR: 0.0003 +[2026-03-01 23:38:44] (step=0028408) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.558207787125807, LR: 0.0003 +[2026-03-01 23:38:52] (step=0028409) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.558403443553121, LR: 0.0003 +[2026-03-01 23:38:59] (step=0028410) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.558599099980435, LR: 0.0003 +[2026-03-01 23:39:07] (step=0028411) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.558794756407748, LR: 0.0003 +[2026-03-01 23:39:15] (step=0028412) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.558990412835062, LR: 0.0003 +[2026-03-01 23:39:23] (step=0028413) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.559186069262375, LR: 0.0003 +[2026-03-01 23:39:31] (step=0028414) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.559381725689689, LR: 0.0003 +[2026-03-01 23:39:39] (step=0028415) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.559577382117003, LR: 0.0003 +[2026-03-01 23:39:47] (step=0028416) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.559773038544316, LR: 0.0003 +[2026-03-01 23:39:54] (step=0028417) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.55996869497163, LR: 0.0003 +[2026-03-01 23:40:02] (step=0028418) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.560164351398943, LR: 0.0003 +[2026-03-01 23:40:10] (step=0028419) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.560360007826257, LR: 0.0003 +[2026-03-01 23:40:18] (step=0028420) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.560555664253571, LR: 0.0003 +[2026-03-01 23:40:26] (step=0028421) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.560751320680884, LR: 0.0003 +[2026-03-01 23:40:34] (step=0028422) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.560946977108198, LR: 0.0003 +[2026-03-01 23:40:42] (step=0028423) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.561142633535511, LR: 0.0003 +[2026-03-01 23:40:49] (step=0028424) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.561338289962825, LR: 0.0003 +[2026-03-01 23:40:57] (step=0028425) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 5.5615339463901385, LR: 0.0003 +[2026-03-01 23:41:05] (step=0028426) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.5617296028174525, LR: 0.0003 +[2026-03-01 23:41:13] (step=0028427) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.5619252592447666, LR: 0.0003 +[2026-03-01 23:41:21] (step=0028428) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.56212091567208, LR: 0.0003 +[2026-03-01 23:41:29] (step=0028429) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.562316572099394, LR: 0.0003 +[2026-03-01 23:41:37] (step=0028430) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.562512228526707, LR: 0.0003 +[2026-03-01 23:41:45] (step=0028431) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.562707884954021, LR: 0.0003 +[2026-03-01 23:41:52] (step=0028432) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.562903541381335, LR: 0.0003 +[2026-03-01 23:42:00] (step=0028433) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.563099197808648, LR: 0.0003 +[2026-03-01 23:42:08] (step=0028434) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.563294854235962, LR: 0.0003 +[2026-03-01 23:42:16] (step=0028435) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.563490510663275, LR: 0.0003 +[2026-03-01 23:42:24] (step=0028436) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.563686167090589, LR: 0.0003 +[2026-03-01 23:42:32] (step=0028437) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.563881823517902, LR: 0.0003 +[2026-03-01 23:42:40] (step=0028438) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 5.564077479945216, LR: 0.0003 +[2026-03-01 23:42:47] (step=0028439) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.56427313637253, LR: 0.0003 +[2026-03-01 23:42:55] (step=0028440) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.564468792799843, LR: 0.0003 +[2026-03-01 23:43:03] (step=0028441) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.564664449227157, LR: 0.0003 +[2026-03-01 23:43:11] (step=0028442) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.56486010565447, LR: 0.0003 +[2026-03-01 23:43:19] (step=0028443) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.565055762081784, LR: 0.0003 +[2026-03-01 23:43:27] (step=0028444) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 5.565251418509098, LR: 0.0003 +[2026-03-01 23:43:35] (step=0028445) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.5654470749364116, LR: 0.0003 +[2026-03-01 23:43:43] (step=0028446) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.565642731363726, LR: 0.0003 +[2026-03-01 23:43:51] (step=0028447) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.565838387791039, LR: 0.0003 +[2026-03-01 23:43:58] (step=0028448) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.566034044218353, LR: 0.0003 +[2026-03-01 23:44:06] (step=0028449) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.566229700645666, LR: 0.0003 +[2026-03-01 23:44:14] (step=0028450) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 5.56642535707298, LR: 0.0003 +[2026-03-01 23:44:22] (step=0028451) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 5.566621013500294, LR: 0.0003 +[2026-03-01 23:44:30] (step=0028452) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.566816669927607, LR: 0.0003 +[2026-03-01 23:44:38] (step=0028453) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.567012326354921, LR: 0.0003 +[2026-03-01 23:44:46] (step=0028454) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 5.567207982782234, LR: 0.0003 +[2026-03-01 23:44:53] (step=0028455) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.567403639209548, LR: 0.0003 +[2026-03-01 23:45:01] (step=0028456) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 5.567599295636862, LR: 0.0003 +[2026-03-01 23:45:09] (step=0028457) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.567794952064175, LR: 0.0003 +[2026-03-01 23:45:17] (step=0028458) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.567990608491489, LR: 0.0003 +[2026-03-01 23:45:25] (step=0028459) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 5.568186264918802, LR: 0.0003 +[2026-03-01 23:45:33] (step=0028460) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.568381921346116, LR: 0.0003 +[2026-03-01 23:45:41] (step=0028461) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.568577577773429, LR: 0.0003 +[2026-03-01 23:45:48] (step=0028462) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 5.568773234200743, LR: 0.0003 +[2026-03-01 23:45:56] (step=0028463) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 5.5689688906280574, LR: 0.0003 +[2026-03-01 23:46:04] (step=0028464) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.569164547055371, LR: 0.0003 +[2026-03-01 23:46:12] (step=0028465) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.569360203482685, LR: 0.0003 +[2026-03-01 23:46:20] (step=0028466) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.569555859909998, LR: 0.0003 +[2026-03-01 23:46:28] (step=0028467) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.569751516337312, LR: 0.0003 +[2026-03-01 23:46:36] (step=0028468) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.569947172764626, LR: 0.0003 +[2026-03-01 23:46:44] (step=0028469) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.570142829191939, LR: 0.0003 +[2026-03-01 23:46:51] (step=0028470) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.570338485619253, LR: 0.0003 +[2026-03-01 23:46:59] (step=0028471) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 5.570534142046566, LR: 0.0003 +[2026-03-01 23:47:07] (step=0028472) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.57072979847388, LR: 0.0003 +[2026-03-01 23:47:15] (step=0028473) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.570925454901193, LR: 0.0003 +[2026-03-01 23:47:23] (step=0028474) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 5.571121111328507, LR: 0.0003 +[2026-03-01 23:47:31] (step=0028475) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.571316767755821, LR: 0.0003 +[2026-03-01 23:47:38] (step=0028476) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.571512424183134, LR: 0.0003 +[2026-03-01 23:47:46] (step=0028477) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 5.571708080610448, LR: 0.0003 +[2026-03-01 23:47:54] (step=0028478) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.571903737037761, LR: 0.0003 +[2026-03-01 23:48:02] (step=0028479) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 5.572099393465075, LR: 0.0003 +[2026-03-01 23:48:10] (step=0028480) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.572295049892389, LR: 0.0003 +[2026-03-01 23:48:18] (step=0028481) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.5724907063197024, LR: 0.0003 +[2026-03-01 23:48:26] (step=0028482) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.5726863627470165, LR: 0.0003 +[2026-03-01 23:48:33] (step=0028483) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 5.57288201917433, LR: 0.0003 +[2026-03-01 23:48:41] (step=0028484) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 5.573077675601644, LR: 0.0003 +[2026-03-01 23:48:49] (step=0028485) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.573273332028958, LR: 0.0003 +[2026-03-01 23:48:57] (step=0028486) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.573468988456271, LR: 0.0003 +[2026-03-01 23:49:05] (step=0028487) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.573664644883585, LR: 0.0003 +[2026-03-01 23:49:13] (step=0028488) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.573860301310898, LR: 0.0003 +[2026-03-01 23:49:21] (step=0028489) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.574055957738212, LR: 0.0003 +[2026-03-01 23:49:28] (step=0028490) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 5.574251614165525, LR: 0.0003 +[2026-03-01 23:49:36] (step=0028491) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 5.574447270592839, LR: 0.0003 +[2026-03-01 23:49:44] (step=0028492) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.574642927020153, LR: 0.0003 +[2026-03-01 23:49:52] (step=0028493) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.574838583447466, LR: 0.0003 +[2026-03-01 23:50:00] (step=0028494) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 5.57503423987478, LR: 0.0003 +[2026-03-01 23:50:08] (step=0028495) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.575229896302093, LR: 0.0003 +[2026-03-01 23:50:16] (step=0028496) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.575425552729407, LR: 0.0003 +[2026-03-01 23:50:24] (step=0028497) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.575621209156721, LR: 0.0003 +[2026-03-01 23:50:31] (step=0028498) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.575816865584034, LR: 0.0003 +[2026-03-01 23:50:39] (step=0028499) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.576012522011348, LR: 0.0003 +[2026-03-01 23:50:47] (step=0028500) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.5762081784386615, LR: 0.0003 +[2026-03-01 23:50:47] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0028500/ +[2026-03-01 23:50:55] (step=0028501) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 5.5764038348659755, LR: 0.0003 +[2026-03-01 23:51:03] (step=0028502) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.576599491293289, LR: 0.0003 +[2026-03-01 23:51:11] (step=0028503) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 5.576795147720603, LR: 0.0003 +[2026-03-01 23:51:19] (step=0028504) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.576990804147917, LR: 0.0003 +[2026-03-01 23:51:26] (step=0028505) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.57718646057523, LR: 0.0003 +[2026-03-01 23:51:34] (step=0028506) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 5.577382117002544, LR: 0.0003 +[2026-03-01 23:51:42] (step=0028507) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.577577773429857, LR: 0.0003 +[2026-03-01 23:51:50] (step=0028508) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 5.577773429857171, LR: 0.0003 +[2026-03-01 23:51:58] (step=0028509) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.577969086284485, LR: 0.0003 +[2026-03-01 23:52:06] (step=0028510) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.578164742711798, LR: 0.0003 +[2026-03-01 23:52:14] (step=0028511) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.578360399139112, LR: 0.0003 +[2026-03-01 23:52:21] (step=0028512) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.578556055566425, LR: 0.0003 +[2026-03-01 23:52:29] (step=0028513) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.578751711993739, LR: 0.0003 +[2026-03-01 23:52:37] (step=0028514) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.578947368421052, LR: 0.0003 +[2026-03-01 23:52:45] (step=0028515) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.579143024848366, LR: 0.0003 +[2026-03-01 23:52:53] (step=0028516) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.57933868127568, LR: 0.0003 +[2026-03-01 23:53:01] (step=0028517) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.579534337702993, LR: 0.0003 +[2026-03-01 23:53:09] (step=0028518) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.579729994130307, LR: 0.0003 +[2026-03-01 23:53:17] (step=0028519) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.5799256505576205, LR: 0.0003 +[2026-03-01 23:53:24] (step=0028520) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.5801213069849345, LR: 0.0003 +[2026-03-01 23:53:32] (step=0028521) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.5803169634122485, LR: 0.0003 +[2026-03-01 23:53:40] (step=0028522) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.580512619839562, LR: 0.0003 +[2026-03-01 23:53:48] (step=0028523) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.580708276266876, LR: 0.0003 +[2026-03-01 23:53:56] (step=0028524) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 5.580903932694189, LR: 0.0003 +[2026-03-01 23:54:04] (step=0028525) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.581099589121503, LR: 0.0003 +[2026-03-01 23:54:12] (step=0028526) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.581295245548816, LR: 0.0003 +[2026-03-01 23:54:19] (step=0028527) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 5.58149090197613, LR: 0.0003 +[2026-03-01 23:54:27] (step=0028528) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.581686558403444, LR: 0.0003 +[2026-03-01 23:54:35] (step=0028529) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.581882214830757, LR: 0.0003 +[2026-03-01 23:54:43] (step=0028530) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.582077871258071, LR: 0.0003 +[2026-03-01 23:54:51] (step=0028531) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.582273527685384, LR: 0.0003 +[2026-03-01 23:54:59] (step=0028532) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.582469184112698, LR: 0.0003 +[2026-03-01 23:55:06] (step=0028533) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.582664840540012, LR: 0.0003 +[2026-03-01 23:55:14] (step=0028534) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.582860496967325, LR: 0.0003 +[2026-03-01 23:55:22] (step=0028535) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.583056153394639, LR: 0.0003 +[2026-03-01 23:55:30] (step=0028536) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.583251809821952, LR: 0.0003 +[2026-03-01 23:55:38] (step=0028537) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.583447466249266, LR: 0.0003 +[2026-03-01 23:55:46] (step=0028538) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.58364312267658, LR: 0.0003 +[2026-03-01 23:55:54] (step=0028539) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.5838387791038935, LR: 0.0003 +[2026-03-01 23:56:01] (step=0028540) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.5840344355312075, LR: 0.0003 +[2026-03-01 23:56:09] (step=0028541) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.584230091958521, LR: 0.0003 +[2026-03-01 23:56:17] (step=0028542) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.584425748385835, LR: 0.0003 +[2026-03-01 23:56:25] (step=0028543) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.584621404813148, LR: 0.0003 +[2026-03-01 23:56:33] (step=0028544) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.584817061240462, LR: 0.0003 +[2026-03-01 23:56:41] (step=0028545) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.585012717667776, LR: 0.0003 +[2026-03-01 23:56:49] (step=0028546) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.585208374095089, LR: 0.0003 +[2026-03-01 23:56:57] (step=0028547) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.585404030522403, LR: 0.0003 +[2026-03-01 23:57:04] (step=0028548) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.585599686949716, LR: 0.0003 +[2026-03-01 23:57:12] (step=0028549) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.58579534337703, LR: 0.0003 +[2026-03-01 23:57:20] (step=0028550) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 5.585990999804344, LR: 0.0003 +[2026-03-01 23:57:28] (step=0028551) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.586186656231657, LR: 0.0003 +[2026-03-01 23:57:36] (step=0028552) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.586382312658971, LR: 0.0003 +[2026-03-01 23:57:44] (step=0028553) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.586577969086284, LR: 0.0003 +[2026-03-01 23:57:52] (step=0028554) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.586773625513598, LR: 0.0003 +[2026-03-01 23:57:59] (step=0028555) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.586969281940911, LR: 0.0003 +[2026-03-01 23:58:07] (step=0028556) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.587164938368225, LR: 0.0003 +[2026-03-01 23:58:15] (step=0028557) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 5.587360594795539, LR: 0.0003 +[2026-03-01 23:58:23] (step=0028558) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.5875562512228525, LR: 0.0003 +[2026-03-01 23:58:31] (step=0028559) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.5877519076501665, LR: 0.0003 +[2026-03-01 23:58:39] (step=0028560) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.58794756407748, LR: 0.0003 +[2026-03-01 23:58:47] (step=0028561) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 5.588143220504794, LR: 0.0003 +[2026-03-01 23:58:55] (step=0028562) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.588338876932108, LR: 0.0003 +[2026-03-01 23:59:02] (step=0028563) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.588534533359421, LR: 0.0003 +[2026-03-01 23:59:10] (step=0028564) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.588730189786735, LR: 0.0003 +[2026-03-01 23:59:18] (step=0028565) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.588925846214048, LR: 0.0003 +[2026-03-01 23:59:26] (step=0028566) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.589121502641362, LR: 0.0003 +[2026-03-01 23:59:34] (step=0028567) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.589317159068675, LR: 0.0003 +[2026-03-01 23:59:42] (step=0028568) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.589512815495989, LR: 0.0003 +[2026-03-01 23:59:50] (step=0028569) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.589708471923303, LR: 0.0003 +[2026-03-01 23:59:57] (step=0028570) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.589904128350616, LR: 0.0003 +[2026-03-02 00:00:05] (step=0028571) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.59009978477793, LR: 0.0003 +[2026-03-02 00:00:13] (step=0028572) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 5.590295441205243, LR: 0.0003 +[2026-03-02 00:00:21] (step=0028573) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 5.590491097632557, LR: 0.0003 +[2026-03-02 00:00:29] (step=0028574) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.590686754059871, LR: 0.0003 +[2026-03-02 00:00:37] (step=0028575) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 5.590882410487184, LR: 0.0003 +[2026-03-02 00:00:45] (step=0028576) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 5.591078066914498, LR: 0.0003 +[2026-03-02 00:00:52] (step=0028577) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.5912737233418115, LR: 0.0003 +[2026-03-02 00:01:00] (step=0028578) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 5.5914693797691255, LR: 0.0003 +[2026-03-02 00:01:08] (step=0028579) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 5.591665036196439, LR: 0.0003 +[2026-03-02 00:01:16] (step=0028580) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.591860692623753, LR: 0.0003 +[2026-03-02 00:01:24] (step=0028581) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 5.592056349051067, LR: 0.0003 +[2026-03-02 00:01:32] (step=0028582) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.59225200547838, LR: 0.0003 +[2026-03-02 00:01:40] (step=0028583) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 5.592447661905694, LR: 0.0003 +[2026-03-02 00:01:48] (step=0028584) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.592643318333007, LR: 0.0003 +[2026-03-02 00:01:55] (step=0028585) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 5.592838974760321, LR: 0.0003 +[2026-03-02 00:02:03] (step=0028586) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.593034631187635, LR: 0.0003 +[2026-03-02 00:02:11] (step=0028587) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.593230287614948, LR: 0.0003 +[2026-03-02 00:02:19] (step=0028588) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.593425944042262, LR: 0.0003 +[2026-03-02 00:02:27] (step=0028589) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.593621600469575, LR: 0.0003 +[2026-03-02 00:02:35] (step=0028590) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.593817256896889, LR: 0.0003 +[2026-03-02 00:02:43] (step=0028591) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.594012913324203, LR: 0.0003 +[2026-03-02 00:02:51] (step=0028592) Train Loss: 0.4550, Train Steps/Sec: 0.12, Epoch: 5.594208569751516, LR: 0.0003 +[2026-03-02 00:02:59] (step=0028593) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 5.59440422617883, LR: 0.0003 +[2026-03-02 00:03:06] (step=0028594) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.594599882606143, LR: 0.0003 +[2026-03-02 00:03:14] (step=0028595) Train Loss: 0.4261, Train Steps/Sec: 0.13, Epoch: 5.594795539033457, LR: 0.0003 +[2026-03-02 00:03:22] (step=0028596) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 5.5949911954607705, LR: 0.0003 +[2026-03-02 00:03:30] (step=0028597) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.5951868518880845, LR: 0.0003 +[2026-03-02 00:03:38] (step=0028598) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.5953825083153985, LR: 0.0003 +[2026-03-02 00:03:46] (step=0028599) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.595578164742712, LR: 0.0003 +[2026-03-02 00:03:54] (step=0028600) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.595773821170026, LR: 0.0003 +[2026-03-02 00:04:01] (step=0028601) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 5.595969477597339, LR: 0.0003 +[2026-03-02 00:04:09] (step=0028602) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 5.596165134024653, LR: 0.0003 +[2026-03-02 00:04:17] (step=0028603) Train Loss: 0.4552, Train Steps/Sec: 0.12, Epoch: 5.596360790451967, LR: 0.0003 +[2026-03-02 00:04:25] (step=0028604) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 5.59655644687928, LR: 0.0003 +[2026-03-02 00:04:33] (step=0028605) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 5.596752103306594, LR: 0.0003 +[2026-03-02 00:04:41] (step=0028606) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.596947759733907, LR: 0.0003 +[2026-03-02 00:04:49] (step=0028607) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.597143416161221, LR: 0.0003 +[2026-03-02 00:04:57] (step=0028608) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.597339072588534, LR: 0.0003 +[2026-03-02 00:05:05] (step=0028609) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.597534729015848, LR: 0.0003 +[2026-03-02 00:05:12] (step=0028610) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.597730385443162, LR: 0.0003 +[2026-03-02 00:05:20] (step=0028611) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.597926041870475, LR: 0.0003 +[2026-03-02 00:05:28] (step=0028612) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.598121698297789, LR: 0.0003 +[2026-03-02 00:05:36] (step=0028613) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.598317354725102, LR: 0.0003 +[2026-03-02 00:05:44] (step=0028614) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.598513011152416, LR: 0.0003 +[2026-03-02 00:05:52] (step=0028615) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.59870866757973, LR: 0.0003 +[2026-03-02 00:06:00] (step=0028616) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.5989043240070435, LR: 0.0003 +[2026-03-02 00:06:07] (step=0028617) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.5990999804343575, LR: 0.0003 +[2026-03-02 00:06:15] (step=0028618) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 5.599295636861671, LR: 0.0003 +[2026-03-02 00:06:23] (step=0028619) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 5.599491293288985, LR: 0.0003 +[2026-03-02 00:06:31] (step=0028620) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.599686949716298, LR: 0.0003 +[2026-03-02 00:06:39] (step=0028621) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.599882606143612, LR: 0.0003 +[2026-03-02 00:06:47] (step=0028622) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.600078262570926, LR: 0.0003 +[2026-03-02 00:06:55] (step=0028623) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.600273918998239, LR: 0.0003 +[2026-03-02 00:07:02] (step=0028624) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 5.600469575425553, LR: 0.0003 +[2026-03-02 00:07:10] (step=0028625) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 5.600665231852866, LR: 0.0003 +[2026-03-02 00:07:18] (step=0028626) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.60086088828018, LR: 0.0003 +[2026-03-02 00:07:26] (step=0028627) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.601056544707494, LR: 0.0003 +[2026-03-02 00:07:34] (step=0028628) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.601252201134807, LR: 0.0003 +[2026-03-02 00:07:42] (step=0028629) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.601447857562121, LR: 0.0003 +[2026-03-02 00:07:50] (step=0028630) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.601643513989434, LR: 0.0003 +[2026-03-02 00:07:57] (step=0028631) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.601839170416748, LR: 0.0003 +[2026-03-02 00:08:05] (step=0028632) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.602034826844061, LR: 0.0003 +[2026-03-02 00:08:13] (step=0028633) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.602230483271375, LR: 0.0003 +[2026-03-02 00:08:21] (step=0028634) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.602426139698689, LR: 0.0003 +[2026-03-02 00:08:29] (step=0028635) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.6026217961260025, LR: 0.0003 +[2026-03-02 00:08:37] (step=0028636) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.6028174525533165, LR: 0.0003 +[2026-03-02 00:08:45] (step=0028637) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.60301310898063, LR: 0.0003 +[2026-03-02 00:08:52] (step=0028638) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.603208765407944, LR: 0.0003 +[2026-03-02 00:09:00] (step=0028639) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.603404421835258, LR: 0.0003 +[2026-03-02 00:09:08] (step=0028640) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.603600078262571, LR: 0.0003 +[2026-03-02 00:09:16] (step=0028641) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.603795734689885, LR: 0.0003 +[2026-03-02 00:09:24] (step=0028642) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.603991391117198, LR: 0.0003 +[2026-03-02 00:09:32] (step=0028643) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 5.604187047544512, LR: 0.0003 +[2026-03-02 00:09:40] (step=0028644) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.604382703971825, LR: 0.0003 +[2026-03-02 00:09:48] (step=0028645) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.604578360399139, LR: 0.0003 +[2026-03-02 00:09:55] (step=0028646) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.604774016826453, LR: 0.0003 +[2026-03-02 00:10:03] (step=0028647) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.604969673253766, LR: 0.0003 +[2026-03-02 00:10:11] (step=0028648) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.60516532968108, LR: 0.0003 +[2026-03-02 00:10:19] (step=0028649) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.605360986108393, LR: 0.0003 +[2026-03-02 00:10:27] (step=0028650) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.605556642535707, LR: 0.0003 +[2026-03-02 00:10:35] (step=0028651) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.605752298963021, LR: 0.0003 +[2026-03-02 00:10:43] (step=0028652) Train Loss: 0.4478, Train Steps/Sec: 0.12, Epoch: 5.605947955390334, LR: 0.0003 +[2026-03-02 00:10:51] (step=0028653) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.606143611817648, LR: 0.0003 +[2026-03-02 00:10:58] (step=0028654) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.6063392682449615, LR: 0.0003 +[2026-03-02 00:11:06] (step=0028655) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 5.6065349246722755, LR: 0.0003 +[2026-03-02 00:11:14] (step=0028656) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 5.6067305810995895, LR: 0.0003 +[2026-03-02 00:11:22] (step=0028657) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.606926237526903, LR: 0.0003 +[2026-03-02 00:11:30] (step=0028658) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.607121893954217, LR: 0.0003 +[2026-03-02 00:11:38] (step=0028659) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 5.60731755038153, LR: 0.0003 +[2026-03-02 00:11:46] (step=0028660) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 5.607513206808844, LR: 0.0003 +[2026-03-02 00:11:54] (step=0028661) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.607708863236157, LR: 0.0003 +[2026-03-02 00:12:01] (step=0028662) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.607904519663471, LR: 0.0003 +[2026-03-02 00:12:09] (step=0028663) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 5.608100176090785, LR: 0.0003 +[2026-03-02 00:12:17] (step=0028664) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 5.608295832518098, LR: 0.0003 +[2026-03-02 00:12:25] (step=0028665) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 5.608491488945412, LR: 0.0003 +[2026-03-02 00:12:33] (step=0028666) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.608687145372725, LR: 0.0003 +[2026-03-02 00:12:41] (step=0028667) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.608882801800039, LR: 0.0003 +[2026-03-02 00:12:49] (step=0028668) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.609078458227353, LR: 0.0003 +[2026-03-02 00:12:56] (step=0028669) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 5.609274114654666, LR: 0.0003 +[2026-03-02 00:13:04] (step=0028670) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.60946977108198, LR: 0.0003 +[2026-03-02 00:13:12] (step=0028671) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.609665427509293, LR: 0.0003 +[2026-03-02 00:13:20] (step=0028672) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 5.609861083936607, LR: 0.0003 +[2026-03-02 00:13:28] (step=0028673) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.6100567403639205, LR: 0.0003 +[2026-03-02 00:13:36] (step=0028674) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.6102523967912346, LR: 0.0003 +[2026-03-02 00:13:44] (step=0028675) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.610448053218549, LR: 0.0003 +[2026-03-02 00:13:51] (step=0028676) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.610643709645862, LR: 0.0003 +[2026-03-02 00:13:59] (step=0028677) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.610839366073176, LR: 0.0003 +[2026-03-02 00:14:07] (step=0028678) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.611035022500489, LR: 0.0003 +[2026-03-02 00:14:15] (step=0028679) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.611230678927803, LR: 0.0003 +[2026-03-02 00:14:23] (step=0028680) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.611426335355117, LR: 0.0003 +[2026-03-02 00:14:31] (step=0028681) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.61162199178243, LR: 0.0003 +[2026-03-02 00:14:39] (step=0028682) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.611817648209744, LR: 0.0003 +[2026-03-02 00:14:46] (step=0028683) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 5.612013304637057, LR: 0.0003 +[2026-03-02 00:14:54] (step=0028684) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 5.612208961064371, LR: 0.0003 +[2026-03-02 00:15:02] (step=0028685) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 5.612404617491684, LR: 0.0003 +[2026-03-02 00:15:10] (step=0028686) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.612600273918998, LR: 0.0003 +[2026-03-02 00:15:18] (step=0028687) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.612795930346312, LR: 0.0003 +[2026-03-02 00:15:26] (step=0028688) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 5.612991586773625, LR: 0.0003 +[2026-03-02 00:15:34] (step=0028689) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.613187243200939, LR: 0.0003 +[2026-03-02 00:15:42] (step=0028690) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.613382899628252, LR: 0.0003 +[2026-03-02 00:15:49] (step=0028691) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.613578556055566, LR: 0.0003 +[2026-03-02 00:15:57] (step=0028692) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.61377421248288, LR: 0.0003 +[2026-03-02 00:16:05] (step=0028693) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.613969868910194, LR: 0.0003 +[2026-03-02 00:16:13] (step=0028694) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.614165525337508, LR: 0.0003 +[2026-03-02 00:16:21] (step=0028695) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 5.614361181764821, LR: 0.0003 +[2026-03-02 00:16:29] (step=0028696) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 5.614556838192135, LR: 0.0003 +[2026-03-02 00:16:37] (step=0028697) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.614752494619448, LR: 0.0003 +[2026-03-02 00:16:44] (step=0028698) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.614948151046762, LR: 0.0003 +[2026-03-02 00:16:53] (step=0028699) Train Loss: 0.4385, Train Steps/Sec: 0.12, Epoch: 5.615143807474076, LR: 0.0003 +[2026-03-02 00:17:00] (step=0028700) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 5.615339463901389, LR: 0.0003 +[2026-03-02 00:17:08] (step=0028701) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.615535120328703, LR: 0.0003 +[2026-03-02 00:17:16] (step=0028702) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.615730776756016, LR: 0.0003 +[2026-03-02 00:17:24] (step=0028703) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.61592643318333, LR: 0.0003 +[2026-03-02 00:17:32] (step=0028704) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 5.616122089610644, LR: 0.0003 +[2026-03-02 00:17:40] (step=0028705) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.616317746037957, LR: 0.0003 +[2026-03-02 00:17:48] (step=0028706) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.616513402465271, LR: 0.0003 +[2026-03-02 00:17:55] (step=0028707) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.616709058892584, LR: 0.0003 +[2026-03-02 00:18:03] (step=0028708) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.616904715319898, LR: 0.0003 +[2026-03-02 00:18:11] (step=0028709) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.617100371747212, LR: 0.0003 +[2026-03-02 00:18:19] (step=0028710) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 5.6172960281745254, LR: 0.0003 +[2026-03-02 00:18:27] (step=0028711) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.6174916846018395, LR: 0.0003 +[2026-03-02 00:18:35] (step=0028712) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.617687341029153, LR: 0.0003 +[2026-03-02 00:18:43] (step=0028713) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.617882997456467, LR: 0.0003 +[2026-03-02 00:18:50] (step=0028714) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.61807865388378, LR: 0.0003 +[2026-03-02 00:18:58] (step=0028715) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.618274310311094, LR: 0.0003 +[2026-03-02 00:19:06] (step=0028716) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 5.618469966738408, LR: 0.0003 +[2026-03-02 00:19:14] (step=0028717) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.618665623165721, LR: 0.0003 +[2026-03-02 00:19:22] (step=0028718) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.618861279593035, LR: 0.0003 +[2026-03-02 00:19:30] (step=0028719) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.619056936020348, LR: 0.0003 +[2026-03-02 00:19:38] (step=0028720) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 5.619252592447662, LR: 0.0003 +[2026-03-02 00:19:46] (step=0028721) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.619448248874976, LR: 0.0003 +[2026-03-02 00:19:53] (step=0028722) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.619643905302289, LR: 0.0003 +[2026-03-02 00:20:01] (step=0028723) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.619839561729603, LR: 0.0003 +[2026-03-02 00:20:09] (step=0028724) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.620035218156916, LR: 0.0003 +[2026-03-02 00:20:17] (step=0028725) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 5.62023087458423, LR: 0.0003 +[2026-03-02 00:20:25] (step=0028726) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.620426531011543, LR: 0.0003 +[2026-03-02 00:20:33] (step=0028727) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 5.620622187438857, LR: 0.0003 +[2026-03-02 00:20:41] (step=0028728) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.620817843866171, LR: 0.0003 +[2026-03-02 00:20:48] (step=0028729) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.6210135002934845, LR: 0.0003 +[2026-03-02 00:20:56] (step=0028730) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.6212091567207985, LR: 0.0003 +[2026-03-02 00:21:04] (step=0028731) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.621404813148112, LR: 0.0003 +[2026-03-02 00:21:12] (step=0028732) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 5.621600469575426, LR: 0.0003 +[2026-03-02 00:21:20] (step=0028733) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.62179612600274, LR: 0.0003 +[2026-03-02 00:21:28] (step=0028734) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 5.621991782430053, LR: 0.0003 +[2026-03-02 00:21:36] (step=0028735) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.622187438857367, LR: 0.0003 +[2026-03-02 00:21:44] (step=0028736) Train Loss: 0.4498, Train Steps/Sec: 0.12, Epoch: 5.62238309528468, LR: 0.0003 +[2026-03-02 00:21:51] (step=0028737) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.622578751711994, LR: 0.0003 +[2026-03-02 00:21:59] (step=0028738) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 5.622774408139307, LR: 0.0003 +[2026-03-02 00:22:07] (step=0028739) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.622970064566621, LR: 0.0003 +[2026-03-02 00:22:15] (step=0028740) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.623165720993935, LR: 0.0003 +[2026-03-02 00:22:23] (step=0028741) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.623361377421248, LR: 0.0003 +[2026-03-02 00:22:31] (step=0028742) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 5.623557033848562, LR: 0.0003 +[2026-03-02 00:22:39] (step=0028743) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.623752690275875, LR: 0.0003 +[2026-03-02 00:22:46] (step=0028744) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.623948346703189, LR: 0.0003 +[2026-03-02 00:22:54] (step=0028745) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.624144003130503, LR: 0.0003 +[2026-03-02 00:23:02] (step=0028746) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.624339659557816, LR: 0.0003 +[2026-03-02 00:23:10] (step=0028747) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.62453531598513, LR: 0.0003 +[2026-03-02 00:23:18] (step=0028748) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.6247309724124435, LR: 0.0003 +[2026-03-02 00:23:26] (step=0028749) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.6249266288397575, LR: 0.0003 +[2026-03-02 00:23:34] (step=0028750) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.625122285267071, LR: 0.0003 +[2026-03-02 00:23:42] (step=0028751) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.625317941694385, LR: 0.0003 +[2026-03-02 00:23:49] (step=0028752) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 5.625513598121699, LR: 0.0003 +[2026-03-02 00:23:57] (step=0028753) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 5.625709254549012, LR: 0.0003 +[2026-03-02 00:24:05] (step=0028754) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.625904910976326, LR: 0.0003 +[2026-03-02 00:24:13] (step=0028755) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.626100567403639, LR: 0.0003 +[2026-03-02 00:24:21] (step=0028756) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.626296223830953, LR: 0.0003 +[2026-03-02 00:24:29] (step=0028757) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.626491880258267, LR: 0.0003 +[2026-03-02 00:24:37] (step=0028758) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.62668753668558, LR: 0.0003 +[2026-03-02 00:24:44] (step=0028759) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.626883193112894, LR: 0.0003 +[2026-03-02 00:24:52] (step=0028760) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.627078849540207, LR: 0.0003 +[2026-03-02 00:25:00] (step=0028761) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 5.627274505967521, LR: 0.0003 +[2026-03-02 00:25:08] (step=0028762) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.627470162394835, LR: 0.0003 +[2026-03-02 00:25:16] (step=0028763) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.627665818822148, LR: 0.0003 +[2026-03-02 00:25:24] (step=0028764) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 5.627861475249462, LR: 0.0003 +[2026-03-02 00:25:32] (step=0028765) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 5.628057131676775, LR: 0.0003 +[2026-03-02 00:25:39] (step=0028766) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.628252788104089, LR: 0.0003 +[2026-03-02 00:25:47] (step=0028767) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.6284484445314025, LR: 0.0003 +[2026-03-02 00:25:55] (step=0028768) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.6286441009587165, LR: 0.0003 +[2026-03-02 00:26:03] (step=0028769) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.6288397573860305, LR: 0.0003 +[2026-03-02 00:26:11] (step=0028770) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.629035413813344, LR: 0.0003 +[2026-03-02 00:26:19] (step=0028771) Train Loss: 0.4236, Train Steps/Sec: 0.13, Epoch: 5.629231070240658, LR: 0.0003 +[2026-03-02 00:26:27] (step=0028772) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 5.629426726667971, LR: 0.0003 +[2026-03-02 00:26:35] (step=0028773) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.629622383095285, LR: 0.0003 +[2026-03-02 00:26:42] (step=0028774) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 5.629818039522599, LR: 0.0003 +[2026-03-02 00:26:50] (step=0028775) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.630013695949912, LR: 0.0003 +[2026-03-02 00:26:58] (step=0028776) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 5.630209352377226, LR: 0.0003 +[2026-03-02 00:27:06] (step=0028777) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 5.630405008804539, LR: 0.0003 +[2026-03-02 00:27:14] (step=0028778) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.630600665231853, LR: 0.0003 +[2026-03-02 00:27:22] (step=0028779) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 5.630796321659166, LR: 0.0003 +[2026-03-02 00:27:30] (step=0028780) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.63099197808648, LR: 0.0003 +[2026-03-02 00:27:37] (step=0028781) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.631187634513794, LR: 0.0003 +[2026-03-02 00:27:45] (step=0028782) Train Loss: 0.4328, Train Steps/Sec: 0.12, Epoch: 5.631383290941107, LR: 0.0003 +[2026-03-02 00:27:53] (step=0028783) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.631578947368421, LR: 0.0003 +[2026-03-02 00:28:01] (step=0028784) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.631774603795734, LR: 0.0003 +[2026-03-02 00:28:09] (step=0028785) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.631970260223048, LR: 0.0003 +[2026-03-02 00:28:17] (step=0028786) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.632165916650362, LR: 0.0003 +[2026-03-02 00:28:25] (step=0028787) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.6323615730776755, LR: 0.0003 +[2026-03-02 00:28:33] (step=0028788) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.6325572295049895, LR: 0.0003 +[2026-03-02 00:28:40] (step=0028789) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.632752885932303, LR: 0.0003 +[2026-03-02 00:28:48] (step=0028790) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 5.632948542359617, LR: 0.0003 +[2026-03-02 00:28:56] (step=0028791) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.63314419878693, LR: 0.0003 +[2026-03-02 00:29:04] (step=0028792) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.633339855214244, LR: 0.0003 +[2026-03-02 00:29:12] (step=0028793) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 5.633535511641558, LR: 0.0003 +[2026-03-02 00:29:20] (step=0028794) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.633731168068871, LR: 0.0003 +[2026-03-02 00:29:28] (step=0028795) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.633926824496185, LR: 0.0003 +[2026-03-02 00:29:35] (step=0028796) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.634122480923498, LR: 0.0003 +[2026-03-02 00:29:43] (step=0028797) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.634318137350812, LR: 0.0003 +[2026-03-02 00:29:51] (step=0028798) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 5.634513793778126, LR: 0.0003 +[2026-03-02 00:29:59] (step=0028799) Train Loss: 0.4437, Train Steps/Sec: 0.12, Epoch: 5.634709450205439, LR: 0.0003 +[2026-03-02 00:30:07] (step=0028800) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.634905106632753, LR: 0.0003 +[2026-03-02 00:30:15] (step=0028801) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.635100763060066, LR: 0.0003 +[2026-03-02 00:30:23] (step=0028802) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.63529641948738, LR: 0.0003 +[2026-03-02 00:30:31] (step=0028803) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.635492075914693, LR: 0.0003 +[2026-03-02 00:30:39] (step=0028804) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.635687732342007, LR: 0.0003 +[2026-03-02 00:30:46] (step=0028805) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.635883388769321, LR: 0.0003 +[2026-03-02 00:30:54] (step=0028806) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.6360790451966345, LR: 0.0003 +[2026-03-02 00:31:02] (step=0028807) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.6362747016239485, LR: 0.0003 +[2026-03-02 00:31:10] (step=0028808) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 5.636470358051262, LR: 0.0003 +[2026-03-02 00:31:18] (step=0028809) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.636666014478576, LR: 0.0003 +[2026-03-02 00:31:26] (step=0028810) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.63686167090589, LR: 0.0003 +[2026-03-02 00:31:34] (step=0028811) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.637057327333203, LR: 0.0003 +[2026-03-02 00:31:41] (step=0028812) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 5.637252983760517, LR: 0.0003 +[2026-03-02 00:31:49] (step=0028813) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 5.63744864018783, LR: 0.0003 +[2026-03-02 00:31:57] (step=0028814) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.637644296615144, LR: 0.0003 +[2026-03-02 00:32:05] (step=0028815) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.637839953042458, LR: 0.0003 +[2026-03-02 00:32:13] (step=0028816) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 5.638035609469771, LR: 0.0003 +[2026-03-02 00:32:21] (step=0028817) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.638231265897085, LR: 0.0003 +[2026-03-02 00:32:28] (step=0028818) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.638426922324398, LR: 0.0003 +[2026-03-02 00:32:36] (step=0028819) Train Loss: 0.4272, Train Steps/Sec: 0.13, Epoch: 5.638622578751712, LR: 0.0003 +[2026-03-02 00:32:44] (step=0028820) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.638818235179025, LR: 0.0003 +[2026-03-02 00:32:52] (step=0028821) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.639013891606339, LR: 0.0003 +[2026-03-02 00:33:00] (step=0028822) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.639209548033653, LR: 0.0003 +[2026-03-02 00:33:08] (step=0028823) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.639405204460966, LR: 0.0003 +[2026-03-02 00:33:16] (step=0028824) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.63960086088828, LR: 0.0003 +[2026-03-02 00:33:23] (step=0028825) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 5.6397965173155935, LR: 0.0003 +[2026-03-02 00:33:31] (step=0028826) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.6399921737429075, LR: 0.0003 +[2026-03-02 00:33:39] (step=0028827) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.6401878301702215, LR: 0.0003 +[2026-03-02 00:33:47] (step=0028828) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.640383486597535, LR: 0.0003 +[2026-03-02 00:33:55] (step=0028829) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.640579143024849, LR: 0.0003 +[2026-03-02 00:34:03] (step=0028830) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.640774799452162, LR: 0.0003 +[2026-03-02 00:34:11] (step=0028831) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.640970455879476, LR: 0.0003 +[2026-03-02 00:34:19] (step=0028832) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 5.641166112306789, LR: 0.0003 +[2026-03-02 00:34:26] (step=0028833) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.641361768734103, LR: 0.0003 +[2026-03-02 00:34:34] (step=0028834) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.641557425161417, LR: 0.0003 +[2026-03-02 00:34:42] (step=0028835) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 5.64175308158873, LR: 0.0003 +[2026-03-02 00:34:50] (step=0028836) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.641948738016044, LR: 0.0003 +[2026-03-02 00:34:58] (step=0028837) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.642144394443357, LR: 0.0003 +[2026-03-02 00:35:06] (step=0028838) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.642340050870671, LR: 0.0003 +[2026-03-02 00:35:14] (step=0028839) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 5.642535707297985, LR: 0.0003 +[2026-03-02 00:35:21] (step=0028840) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.642731363725298, LR: 0.0003 +[2026-03-02 00:35:29] (step=0028841) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.642927020152612, LR: 0.0003 +[2026-03-02 00:35:37] (step=0028842) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.643122676579925, LR: 0.0003 +[2026-03-02 00:35:45] (step=0028843) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.643318333007239, LR: 0.0003 +[2026-03-02 00:35:53] (step=0028844) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.6435139894345525, LR: 0.0003 +[2026-03-02 00:36:01] (step=0028845) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.6437096458618665, LR: 0.0003 +[2026-03-02 00:36:09] (step=0028846) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.6439053022891805, LR: 0.0003 +[2026-03-02 00:36:16] (step=0028847) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.644100958716494, LR: 0.0003 +[2026-03-02 00:36:24] (step=0028848) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.644296615143808, LR: 0.0003 +[2026-03-02 00:36:32] (step=0028849) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.644492271571121, LR: 0.0003 +[2026-03-02 00:36:40] (step=0028850) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.644687927998435, LR: 0.0003 +[2026-03-02 00:36:48] (step=0028851) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.644883584425749, LR: 0.0003 +[2026-03-02 00:36:56] (step=0028852) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.645079240853062, LR: 0.0003 +[2026-03-02 00:37:04] (step=0028853) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.645274897280376, LR: 0.0003 +[2026-03-02 00:37:12] (step=0028854) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.645470553707689, LR: 0.0003 +[2026-03-02 00:37:19] (step=0028855) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.645666210135003, LR: 0.0003 +[2026-03-02 00:37:27] (step=0028856) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.645861866562316, LR: 0.0003 +[2026-03-02 00:37:35] (step=0028857) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.64605752298963, LR: 0.0003 +[2026-03-02 00:37:43] (step=0028858) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 5.646253179416944, LR: 0.0003 +[2026-03-02 00:37:51] (step=0028859) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.646448835844257, LR: 0.0003 +[2026-03-02 00:37:59] (step=0028860) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.646644492271571, LR: 0.0003 +[2026-03-02 00:38:07] (step=0028861) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.646840148698884, LR: 0.0003 +[2026-03-02 00:38:14] (step=0028862) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.647035805126198, LR: 0.0003 +[2026-03-02 00:38:22] (step=0028863) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.647231461553512, LR: 0.0003 +[2026-03-02 00:38:30] (step=0028864) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.6474271179808255, LR: 0.0003 +[2026-03-02 00:38:38] (step=0028865) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.6476227744081395, LR: 0.0003 +[2026-03-02 00:38:46] (step=0028866) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.647818430835453, LR: 0.0003 +[2026-03-02 00:38:54] (step=0028867) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.648014087262767, LR: 0.0003 +[2026-03-02 00:39:02] (step=0028868) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.64820974369008, LR: 0.0003 +[2026-03-02 00:39:10] (step=0028869) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 5.648405400117394, LR: 0.0003 +[2026-03-02 00:39:17] (step=0028870) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 5.648601056544708, LR: 0.0003 +[2026-03-02 00:39:25] (step=0028871) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.648796712972021, LR: 0.0003 +[2026-03-02 00:39:33] (step=0028872) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.648992369399335, LR: 0.0003 +[2026-03-02 00:39:41] (step=0028873) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.649188025826648, LR: 0.0003 +[2026-03-02 00:39:49] (step=0028874) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 5.649383682253962, LR: 0.0003 +[2026-03-02 00:39:57] (step=0028875) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.649579338681276, LR: 0.0003 +[2026-03-02 00:40:05] (step=0028876) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 5.649774995108589, LR: 0.0003 +[2026-03-02 00:40:12] (step=0028877) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 5.649970651535903, LR: 0.0003 +[2026-03-02 00:40:20] (step=0028878) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.650166307963216, LR: 0.0003 +[2026-03-02 00:40:28] (step=0028879) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.65036196439053, LR: 0.0003 +[2026-03-02 00:40:36] (step=0028880) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.650557620817844, LR: 0.0003 +[2026-03-02 00:40:44] (step=0028881) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 5.650753277245157, LR: 0.0003 +[2026-03-02 00:40:52] (step=0028882) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.650948933672471, LR: 0.0003 +[2026-03-02 00:41:00] (step=0028883) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.6511445900997845, LR: 0.0003 +[2026-03-02 00:41:08] (step=0028884) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.6513402465270985, LR: 0.0003 +[2026-03-02 00:41:15] (step=0028885) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 5.651535902954412, LR: 0.0003 +[2026-03-02 00:41:23] (step=0028886) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.651731559381726, LR: 0.0003 +[2026-03-02 00:41:31] (step=0028887) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.65192721580904, LR: 0.0003 +[2026-03-02 00:41:39] (step=0028888) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.652122872236353, LR: 0.0003 +[2026-03-02 00:41:47] (step=0028889) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.652318528663667, LR: 0.0003 +[2026-03-02 00:41:55] (step=0028890) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.65251418509098, LR: 0.0003 +[2026-03-02 00:42:03] (step=0028891) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.652709841518294, LR: 0.0003 +[2026-03-02 00:42:11] (step=0028892) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.652905497945608, LR: 0.0003 +[2026-03-02 00:42:18] (step=0028893) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.653101154372921, LR: 0.0003 +[2026-03-02 00:42:26] (step=0028894) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.653296810800235, LR: 0.0003 +[2026-03-02 00:42:34] (step=0028895) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.653492467227548, LR: 0.0003 +[2026-03-02 00:42:42] (step=0028896) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.653688123654862, LR: 0.0003 +[2026-03-02 00:42:50] (step=0028897) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.653883780082175, LR: 0.0003 +[2026-03-02 00:42:58] (step=0028898) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.654079436509489, LR: 0.0003 +[2026-03-02 00:43:06] (step=0028899) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.654275092936803, LR: 0.0003 +[2026-03-02 00:43:13] (step=0028900) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.654470749364116, LR: 0.0003 +[2026-03-02 00:43:21] (step=0028901) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 5.65466640579143, LR: 0.0003 +[2026-03-02 00:43:29] (step=0028902) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.6548620622187435, LR: 0.0003 +[2026-03-02 00:43:37] (step=0028903) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.6550577186460576, LR: 0.0003 +[2026-03-02 00:43:45] (step=0028904) Train Loss: 0.4449, Train Steps/Sec: 0.12, Epoch: 5.655253375073372, LR: 0.0003 +[2026-03-02 00:43:53] (step=0028905) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.655449031500685, LR: 0.0003 +[2026-03-02 00:44:01] (step=0028906) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.655644687927999, LR: 0.0003 +[2026-03-02 00:44:09] (step=0028907) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.655840344355312, LR: 0.0003 +[2026-03-02 00:44:16] (step=0028908) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.656036000782626, LR: 0.0003 +[2026-03-02 00:44:24] (step=0028909) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 5.656231657209939, LR: 0.0003 +[2026-03-02 00:44:32] (step=0028910) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 5.656427313637253, LR: 0.0003 +[2026-03-02 00:44:40] (step=0028911) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.656622970064567, LR: 0.0003 +[2026-03-02 00:44:48] (step=0028912) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 5.65681862649188, LR: 0.0003 +[2026-03-02 00:44:56] (step=0028913) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 5.657014282919194, LR: 0.0003 +[2026-03-02 00:45:04] (step=0028914) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.657209939346507, LR: 0.0003 +[2026-03-02 00:45:12] (step=0028915) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.657405595773821, LR: 0.0003 +[2026-03-02 00:45:19] (step=0028916) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.657601252201135, LR: 0.0003 +[2026-03-02 00:45:27] (step=0028917) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 5.657796908628448, LR: 0.0003 +[2026-03-02 00:45:35] (step=0028918) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.657992565055762, LR: 0.0003 +[2026-03-02 00:45:43] (step=0028919) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.658188221483075, LR: 0.0003 +[2026-03-02 00:45:51] (step=0028920) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.658383877910389, LR: 0.0003 +[2026-03-02 00:45:59] (step=0028921) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.6585795343377026, LR: 0.0003 +[2026-03-02 00:46:07] (step=0028922) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 5.658775190765017, LR: 0.0003 +[2026-03-02 00:46:14] (step=0028923) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.658970847192331, LR: 0.0003 +[2026-03-02 00:46:22] (step=0028924) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 5.659166503619644, LR: 0.0003 +[2026-03-02 00:46:30] (step=0028925) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.659362160046958, LR: 0.0003 +[2026-03-02 00:46:38] (step=0028926) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.659557816474271, LR: 0.0003 +[2026-03-02 00:46:46] (step=0028927) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.659753472901585, LR: 0.0003 +[2026-03-02 00:46:54] (step=0028928) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.659949129328899, LR: 0.0003 +[2026-03-02 00:47:02] (step=0028929) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.660144785756212, LR: 0.0003 +[2026-03-02 00:47:10] (step=0028930) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.660340442183526, LR: 0.0003 +[2026-03-02 00:47:18] (step=0028931) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 5.660536098610839, LR: 0.0003 +[2026-03-02 00:47:25] (step=0028932) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.660731755038153, LR: 0.0003 +[2026-03-02 00:47:33] (step=0028933) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 5.660927411465467, LR: 0.0003 +[2026-03-02 00:47:41] (step=0028934) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.66112306789278, LR: 0.0003 +[2026-03-02 00:47:49] (step=0028935) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.661318724320094, LR: 0.0003 +[2026-03-02 00:47:57] (step=0028936) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 5.661514380747407, LR: 0.0003 +[2026-03-02 00:48:05] (step=0028937) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.661710037174721, LR: 0.0003 +[2026-03-02 00:48:13] (step=0028938) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.661905693602034, LR: 0.0003 +[2026-03-02 00:48:20] (step=0028939) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.6621013500293484, LR: 0.0003 +[2026-03-02 00:48:28] (step=0028940) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.6622970064566625, LR: 0.0003 +[2026-03-02 00:48:36] (step=0028941) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 5.662492662883976, LR: 0.0003 +[2026-03-02 00:48:44] (step=0028942) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.66268831931129, LR: 0.0003 +[2026-03-02 00:48:52] (step=0028943) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.662883975738603, LR: 0.0003 +[2026-03-02 00:49:00] (step=0028944) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.663079632165917, LR: 0.0003 +[2026-03-02 00:49:08] (step=0028945) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 5.663275288593231, LR: 0.0003 +[2026-03-02 00:49:15] (step=0028946) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.663470945020544, LR: 0.0003 +[2026-03-02 00:49:23] (step=0028947) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 5.663666601447858, LR: 0.0003 +[2026-03-02 00:49:31] (step=0028948) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 5.663862257875171, LR: 0.0003 +[2026-03-02 00:49:39] (step=0028949) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 5.664057914302485, LR: 0.0003 +[2026-03-02 00:49:47] (step=0028950) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 5.664253570729798, LR: 0.0003 +[2026-03-02 00:49:55] (step=0028951) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.664449227157112, LR: 0.0003 +[2026-03-02 00:50:03] (step=0028952) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.664644883584426, LR: 0.0003 +[2026-03-02 00:50:10] (step=0028953) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.664840540011739, LR: 0.0003 +[2026-03-02 00:50:18] (step=0028954) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.665036196439053, LR: 0.0003 +[2026-03-02 00:50:26] (step=0028955) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.665231852866366, LR: 0.0003 +[2026-03-02 00:50:34] (step=0028956) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.66542750929368, LR: 0.0003 +[2026-03-02 00:50:42] (step=0028957) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 5.665623165720994, LR: 0.0003 +[2026-03-02 00:50:50] (step=0028958) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.6658188221483075, LR: 0.0003 +[2026-03-02 00:50:58] (step=0028959) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.6660144785756215, LR: 0.0003 +[2026-03-02 00:51:06] (step=0028960) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 5.666210135002935, LR: 0.0003 +[2026-03-02 00:51:13] (step=0028961) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.666405791430249, LR: 0.0003 +[2026-03-02 00:51:21] (step=0028962) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.666601447857562, LR: 0.0003 +[2026-03-02 00:51:29] (step=0028963) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.666797104284876, LR: 0.0003 +[2026-03-02 00:51:37] (step=0028964) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 5.66699276071219, LR: 0.0003 +[2026-03-02 00:51:45] (step=0028965) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.667188417139503, LR: 0.0003 +[2026-03-02 00:51:53] (step=0028966) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 5.667384073566817, LR: 0.0003 +[2026-03-02 00:52:01] (step=0028967) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.66757972999413, LR: 0.0003 +[2026-03-02 00:52:08] (step=0028968) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.667775386421444, LR: 0.0003 +[2026-03-02 00:52:16] (step=0028969) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.667971042848758, LR: 0.0003 +[2026-03-02 00:52:24] (step=0028970) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.668166699276071, LR: 0.0003 +[2026-03-02 00:52:32] (step=0028971) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.668362355703385, LR: 0.0003 +[2026-03-02 00:52:40] (step=0028972) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.668558012130698, LR: 0.0003 +[2026-03-02 00:52:48] (step=0028973) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.668753668558012, LR: 0.0003 +[2026-03-02 00:52:55] (step=0028974) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.668949324985325, LR: 0.0003 +[2026-03-02 00:53:03] (step=0028975) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.669144981412639, LR: 0.0003 +[2026-03-02 00:53:11] (step=0028976) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.669340637839953, LR: 0.0003 +[2026-03-02 00:53:19] (step=0028977) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.6695362942672665, LR: 0.0003 +[2026-03-02 00:53:27] (step=0028978) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.6697319506945805, LR: 0.0003 +[2026-03-02 00:53:35] (step=0028979) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.669927607121894, LR: 0.0003 +[2026-03-02 00:53:43] (step=0028980) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.670123263549208, LR: 0.0003 +[2026-03-02 00:53:51] (step=0028981) Train Loss: 0.4349, Train Steps/Sec: 0.12, Epoch: 5.670318919976522, LR: 0.0003 +[2026-03-02 00:53:59] (step=0028982) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 5.670514576403835, LR: 0.0003 +[2026-03-02 00:54:06] (step=0028983) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.670710232831149, LR: 0.0003 +[2026-03-02 00:54:14] (step=0028984) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 5.670905889258462, LR: 0.0003 +[2026-03-02 00:54:22] (step=0028985) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.671101545685776, LR: 0.0003 +[2026-03-02 00:54:30] (step=0028986) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.67129720211309, LR: 0.0003 +[2026-03-02 00:54:38] (step=0028987) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.671492858540403, LR: 0.0003 +[2026-03-02 00:54:46] (step=0028988) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 5.671688514967717, LR: 0.0003 +[2026-03-02 00:54:54] (step=0028989) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.67188417139503, LR: 0.0003 +[2026-03-02 00:55:01] (step=0028990) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.672079827822344, LR: 0.0003 +[2026-03-02 00:55:09] (step=0028991) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.672275484249657, LR: 0.0003 +[2026-03-02 00:55:17] (step=0028992) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.672471140676971, LR: 0.0003 +[2026-03-02 00:55:25] (step=0028993) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 5.672666797104285, LR: 0.0003 +[2026-03-02 00:55:33] (step=0028994) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.672862453531598, LR: 0.0003 +[2026-03-02 00:55:41] (step=0028995) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.673058109958912, LR: 0.0003 +[2026-03-02 00:55:49] (step=0028996) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.6732537663862255, LR: 0.0003 +[2026-03-02 00:55:56] (step=0028997) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.6734494228135395, LR: 0.0003 +[2026-03-02 00:56:04] (step=0028998) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 5.6736450792408535, LR: 0.0003 +[2026-03-02 00:56:12] (step=0028999) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 5.673840735668167, LR: 0.0003 +[2026-03-02 00:56:20] (step=0029000) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.674036392095481, LR: 0.0003 +[2026-03-02 00:56:20] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0029000/ +[2026-03-02 00:56:28] (step=0029001) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.674232048522794, LR: 0.0003 +[2026-03-02 00:56:36] (step=0029002) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 5.674427704950108, LR: 0.0003 +[2026-03-02 00:56:44] (step=0029003) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.674623361377421, LR: 0.0003 +[2026-03-02 00:56:51] (step=0029004) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.674819017804735, LR: 0.0003 +[2026-03-02 00:56:59] (step=0029005) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 5.675014674232049, LR: 0.0003 +[2026-03-02 00:57:07] (step=0029006) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 5.675210330659362, LR: 0.0003 +[2026-03-02 00:57:15] (step=0029007) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.675405987086676, LR: 0.0003 +[2026-03-02 00:57:23] (step=0029008) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 5.675601643513989, LR: 0.0003 +[2026-03-02 00:57:31] (step=0029009) Train Loss: 0.4573, Train Steps/Sec: 0.12, Epoch: 5.675797299941303, LR: 0.0003 +[2026-03-02 00:57:39] (step=0029010) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.675992956368617, LR: 0.0003 +[2026-03-02 00:57:47] (step=0029011) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 5.67618861279593, LR: 0.0003 +[2026-03-02 00:57:55] (step=0029012) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.676384269223244, LR: 0.0003 +[2026-03-02 00:58:02] (step=0029013) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.676579925650557, LR: 0.0003 +[2026-03-02 00:58:10] (step=0029014) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.676775582077871, LR: 0.0003 +[2026-03-02 00:58:18] (step=0029015) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 5.6769712385051845, LR: 0.0003 +[2026-03-02 00:58:26] (step=0029016) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 5.6771668949324985, LR: 0.0003 +[2026-03-02 00:58:34] (step=0029017) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.6773625513598125, LR: 0.0003 +[2026-03-02 00:58:42] (step=0029018) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.677558207787126, LR: 0.0003 +[2026-03-02 00:58:50] (step=0029019) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 5.67775386421444, LR: 0.0003 +[2026-03-02 00:58:57] (step=0029020) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.677949520641753, LR: 0.0003 +[2026-03-02 00:59:05] (step=0029021) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.678145177069067, LR: 0.0003 +[2026-03-02 00:59:13] (step=0029022) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.678340833496381, LR: 0.0003 +[2026-03-02 00:59:21] (step=0029023) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.678536489923694, LR: 0.0003 +[2026-03-02 00:59:29] (step=0029024) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.678732146351008, LR: 0.0003 +[2026-03-02 00:59:37] (step=0029025) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.678927802778321, LR: 0.0003 +[2026-03-02 00:59:45] (step=0029026) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.679123459205635, LR: 0.0003 +[2026-03-02 00:59:52] (step=0029027) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 5.679319115632948, LR: 0.0003 +[2026-03-02 01:00:00] (step=0029028) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.679514772060262, LR: 0.0003 +[2026-03-02 01:00:08] (step=0029029) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.679710428487576, LR: 0.0003 +[2026-03-02 01:00:16] (step=0029030) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.679906084914889, LR: 0.0003 +[2026-03-02 01:00:24] (step=0029031) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.680101741342203, LR: 0.0003 +[2026-03-02 01:00:32] (step=0029032) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 5.680297397769516, LR: 0.0003 +[2026-03-02 01:00:40] (step=0029033) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.68049305419683, LR: 0.0003 +[2026-03-02 01:00:48] (step=0029034) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.680688710624144, LR: 0.0003 +[2026-03-02 01:00:55] (step=0029035) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.6808843670514575, LR: 0.0003 +[2026-03-02 01:01:03] (step=0029036) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.6810800234787715, LR: 0.0003 +[2026-03-02 01:01:11] (step=0029037) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.681275679906085, LR: 0.0003 +[2026-03-02 01:01:19] (step=0029038) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.681471336333399, LR: 0.0003 +[2026-03-02 01:01:27] (step=0029039) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.681666992760713, LR: 0.0003 +[2026-03-02 01:01:35] (step=0029040) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 5.681862649188026, LR: 0.0003 +[2026-03-02 01:01:43] (step=0029041) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 5.68205830561534, LR: 0.0003 +[2026-03-02 01:01:50] (step=0029042) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.682253962042653, LR: 0.0003 +[2026-03-02 01:01:58] (step=0029043) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 5.682449618469967, LR: 0.0003 +[2026-03-02 01:02:06] (step=0029044) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.68264527489728, LR: 0.0003 +[2026-03-02 01:02:14] (step=0029045) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.682840931324594, LR: 0.0003 +[2026-03-02 01:02:22] (step=0029046) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 5.683036587751908, LR: 0.0003 +[2026-03-02 01:02:30] (step=0029047) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 5.683232244179221, LR: 0.0003 +[2026-03-02 01:02:38] (step=0029048) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.683427900606535, LR: 0.0003 +[2026-03-02 01:02:46] (step=0029049) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.683623557033848, LR: 0.0003 +[2026-03-02 01:02:53] (step=0029050) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.683819213461162, LR: 0.0003 +[2026-03-02 01:03:01] (step=0029051) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.684014869888476, LR: 0.0003 +[2026-03-02 01:03:09] (step=0029052) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.684210526315789, LR: 0.0003 +[2026-03-02 01:03:17] (step=0029053) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.684406182743103, LR: 0.0003 +[2026-03-02 01:03:25] (step=0029054) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.6846018391704165, LR: 0.0003 +[2026-03-02 01:03:33] (step=0029055) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.6847974955977305, LR: 0.0003 +[2026-03-02 01:03:41] (step=0029056) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.684993152025044, LR: 0.0003 +[2026-03-02 01:03:48] (step=0029057) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.685188808452358, LR: 0.0003 +[2026-03-02 01:03:56] (step=0029058) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.685384464879672, LR: 0.0003 +[2026-03-02 01:04:04] (step=0029059) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.685580121306985, LR: 0.0003 +[2026-03-02 01:04:12] (step=0029060) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.685775777734299, LR: 0.0003 +[2026-03-02 01:04:20] (step=0029061) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.685971434161612, LR: 0.0003 +[2026-03-02 01:04:28] (step=0029062) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.686167090588926, LR: 0.0003 +[2026-03-02 01:04:36] (step=0029063) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.68636274701624, LR: 0.0003 +[2026-03-02 01:04:44] (step=0029064) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.686558403443553, LR: 0.0003 +[2026-03-02 01:04:51] (step=0029065) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.686754059870867, LR: 0.0003 +[2026-03-02 01:04:59] (step=0029066) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.68694971629818, LR: 0.0003 +[2026-03-02 01:05:07] (step=0029067) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.687145372725494, LR: 0.0003 +[2026-03-02 01:05:15] (step=0029068) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.687341029152807, LR: 0.0003 +[2026-03-02 01:05:23] (step=0029069) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 5.687536685580121, LR: 0.0003 +[2026-03-02 01:05:31] (step=0029070) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 5.687732342007435, LR: 0.0003 +[2026-03-02 01:05:39] (step=0029071) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.687927998434748, LR: 0.0003 +[2026-03-02 01:05:46] (step=0029072) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.688123654862062, LR: 0.0003 +[2026-03-02 01:05:54] (step=0029073) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.6883193112893755, LR: 0.0003 +[2026-03-02 01:06:02] (step=0029074) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.6885149677166895, LR: 0.0003 +[2026-03-02 01:06:10] (step=0029075) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.6887106241440035, LR: 0.0003 +[2026-03-02 01:06:18] (step=0029076) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 5.688906280571317, LR: 0.0003 +[2026-03-02 01:06:26] (step=0029077) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.689101936998631, LR: 0.0003 +[2026-03-02 01:06:34] (step=0029078) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.689297593425944, LR: 0.0003 +[2026-03-02 01:06:42] (step=0029079) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.689493249853258, LR: 0.0003 +[2026-03-02 01:06:49] (step=0029080) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 5.689688906280571, LR: 0.0003 +[2026-03-02 01:06:57] (step=0029081) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 5.689884562707885, LR: 0.0003 +[2026-03-02 01:07:05] (step=0029082) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.690080219135199, LR: 0.0003 +[2026-03-02 01:07:13] (step=0029083) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 5.690275875562512, LR: 0.0003 +[2026-03-02 01:07:21] (step=0029084) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.690471531989826, LR: 0.0003 +[2026-03-02 01:07:29] (step=0029085) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 5.690667188417139, LR: 0.0003 +[2026-03-02 01:07:37] (step=0029086) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.690862844844453, LR: 0.0003 +[2026-03-02 01:07:44] (step=0029087) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.691058501271767, LR: 0.0003 +[2026-03-02 01:07:52] (step=0029088) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.69125415769908, LR: 0.0003 +[2026-03-02 01:08:00] (step=0029089) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.691449814126394, LR: 0.0003 +[2026-03-02 01:08:08] (step=0029090) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.691645470553707, LR: 0.0003 +[2026-03-02 01:08:16] (step=0029091) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.691841126981021, LR: 0.0003 +[2026-03-02 01:08:24] (step=0029092) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.6920367834083345, LR: 0.0003 +[2026-03-02 01:08:32] (step=0029093) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.6922324398356485, LR: 0.0003 +[2026-03-02 01:08:39] (step=0029094) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.6924280962629625, LR: 0.0003 +[2026-03-02 01:08:47] (step=0029095) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.692623752690276, LR: 0.0003 +[2026-03-02 01:08:55] (step=0029096) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.69281940911759, LR: 0.0003 +[2026-03-02 01:09:03] (step=0029097) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.693015065544903, LR: 0.0003 +[2026-03-02 01:09:11] (step=0029098) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.693210721972217, LR: 0.0003 +[2026-03-02 01:09:19] (step=0029099) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 5.693406378399531, LR: 0.0003 +[2026-03-02 01:09:27] (step=0029100) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.693602034826844, LR: 0.0003 +[2026-03-02 01:09:34] (step=0029101) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.693797691254158, LR: 0.0003 +[2026-03-02 01:09:42] (step=0029102) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 5.693993347681471, LR: 0.0003 +[2026-03-02 01:09:50] (step=0029103) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.694189004108785, LR: 0.0003 +[2026-03-02 01:09:58] (step=0029104) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 5.694384660536099, LR: 0.0003 +[2026-03-02 01:10:06] (step=0029105) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.694580316963412, LR: 0.0003 +[2026-03-02 01:10:14] (step=0029106) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.694775973390726, LR: 0.0003 +[2026-03-02 01:10:22] (step=0029107) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 5.694971629818039, LR: 0.0003 +[2026-03-02 01:10:30] (step=0029108) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.695167286245353, LR: 0.0003 +[2026-03-02 01:10:37] (step=0029109) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.695362942672666, LR: 0.0003 +[2026-03-02 01:10:45] (step=0029110) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.69555859909998, LR: 0.0003 +[2026-03-02 01:10:53] (step=0029111) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.695754255527294, LR: 0.0003 +[2026-03-02 01:11:01] (step=0029112) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.6959499119546075, LR: 0.0003 +[2026-03-02 01:11:09] (step=0029113) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 5.6961455683819215, LR: 0.0003 +[2026-03-02 01:11:17] (step=0029114) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 5.696341224809235, LR: 0.0003 +[2026-03-02 01:11:25] (step=0029115) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.696536881236549, LR: 0.0003 +[2026-03-02 01:11:32] (step=0029116) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.696732537663863, LR: 0.0003 +[2026-03-02 01:11:40] (step=0029117) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.696928194091176, LR: 0.0003 +[2026-03-02 01:11:48] (step=0029118) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.69712385051849, LR: 0.0003 +[2026-03-02 01:11:56] (step=0029119) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.697319506945803, LR: 0.0003 +[2026-03-02 01:12:04] (step=0029120) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.697515163373117, LR: 0.0003 +[2026-03-02 01:12:12] (step=0029121) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 5.69771081980043, LR: 0.0003 +[2026-03-02 01:12:20] (step=0029122) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.697906476227744, LR: 0.0003 +[2026-03-02 01:12:28] (step=0029123) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.698102132655058, LR: 0.0003 +[2026-03-02 01:12:35] (step=0029124) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.698297789082371, LR: 0.0003 +[2026-03-02 01:12:43] (step=0029125) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.698493445509685, LR: 0.0003 +[2026-03-02 01:12:51] (step=0029126) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.698689101936998, LR: 0.0003 +[2026-03-02 01:12:59] (step=0029127) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 5.698884758364312, LR: 0.0003 +[2026-03-02 01:13:07] (step=0029128) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.699080414791626, LR: 0.0003 +[2026-03-02 01:13:15] (step=0029129) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.699276071218939, LR: 0.0003 +[2026-03-02 01:13:22] (step=0029130) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.699471727646253, LR: 0.0003 +[2026-03-02 01:13:30] (step=0029131) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.6996673840735665, LR: 0.0003 +[2026-03-02 01:13:38] (step=0029132) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 5.6998630405008806, LR: 0.0003 +[2026-03-02 01:13:46] (step=0029133) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.700058696928194, LR: 0.0003 +[2026-03-02 01:13:54] (step=0029134) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 5.700254353355508, LR: 0.0003 +[2026-03-02 01:14:02] (step=0029135) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.700450009782822, LR: 0.0003 +[2026-03-02 01:14:10] (step=0029136) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.700645666210135, LR: 0.0003 +[2026-03-02 01:14:17] (step=0029137) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 5.700841322637449, LR: 0.0003 +[2026-03-02 01:14:25] (step=0029138) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.701036979064762, LR: 0.0003 +[2026-03-02 01:14:33] (step=0029139) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.701232635492076, LR: 0.0003 +[2026-03-02 01:14:41] (step=0029140) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.70142829191939, LR: 0.0003 +[2026-03-02 01:14:49] (step=0029141) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.701623948346703, LR: 0.0003 +[2026-03-02 01:14:57] (step=0029142) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 5.701819604774017, LR: 0.0003 +[2026-03-02 01:15:05] (step=0029143) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 5.70201526120133, LR: 0.0003 +[2026-03-02 01:15:12] (step=0029144) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.702210917628644, LR: 0.0003 +[2026-03-02 01:15:20] (step=0029145) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.702406574055957, LR: 0.0003 +[2026-03-02 01:15:28] (step=0029146) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.702602230483271, LR: 0.0003 +[2026-03-02 01:15:36] (step=0029147) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.702797886910585, LR: 0.0003 +[2026-03-02 01:15:44] (step=0029148) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.702993543337898, LR: 0.0003 +[2026-03-02 01:15:52] (step=0029149) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 5.703189199765212, LR: 0.0003 +[2026-03-02 01:15:59] (step=0029150) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.7033848561925256, LR: 0.0003 +[2026-03-02 01:16:07] (step=0029151) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.70358051261984, LR: 0.0003 +[2026-03-02 01:16:15] (step=0029152) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.703776169047154, LR: 0.0003 +[2026-03-02 01:16:23] (step=0029153) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.703971825474467, LR: 0.0003 +[2026-03-02 01:16:31] (step=0029154) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.704167481901781, LR: 0.0003 +[2026-03-02 01:16:39] (step=0029155) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.704363138329094, LR: 0.0003 +[2026-03-02 01:16:47] (step=0029156) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 5.704558794756408, LR: 0.0003 +[2026-03-02 01:16:55] (step=0029157) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.704754451183722, LR: 0.0003 +[2026-03-02 01:17:02] (step=0029158) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.704950107611035, LR: 0.0003 +[2026-03-02 01:17:10] (step=0029159) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 5.705145764038349, LR: 0.0003 +[2026-03-02 01:17:18] (step=0029160) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 5.705341420465662, LR: 0.0003 +[2026-03-02 01:17:26] (step=0029161) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.705537076892976, LR: 0.0003 +[2026-03-02 01:17:34] (step=0029162) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.705732733320289, LR: 0.0003 +[2026-03-02 01:17:42] (step=0029163) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.705928389747603, LR: 0.0003 +[2026-03-02 01:17:50] (step=0029164) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.706124046174917, LR: 0.0003 +[2026-03-02 01:17:57] (step=0029165) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.70631970260223, LR: 0.0003 +[2026-03-02 01:18:05] (step=0029166) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.706515359029544, LR: 0.0003 +[2026-03-02 01:18:13] (step=0029167) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.706711015456857, LR: 0.0003 +[2026-03-02 01:18:21] (step=0029168) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.7069066718841714, LR: 0.0003 +[2026-03-02 01:18:29] (step=0029169) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.7071023283114855, LR: 0.0003 +[2026-03-02 01:18:37] (step=0029170) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.707297984738799, LR: 0.0003 +[2026-03-02 01:18:45] (step=0029171) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 5.707493641166113, LR: 0.0003 +[2026-03-02 01:18:52] (step=0029172) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.707689297593426, LR: 0.0003 +[2026-03-02 01:19:00] (step=0029173) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.70788495402074, LR: 0.0003 +[2026-03-02 01:19:08] (step=0029174) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.708080610448053, LR: 0.0003 +[2026-03-02 01:19:16] (step=0029175) Train Loss: 0.4405, Train Steps/Sec: 0.12, Epoch: 5.708276266875367, LR: 0.0003 +[2026-03-02 01:19:24] (step=0029176) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 5.708471923302681, LR: 0.0003 +[2026-03-02 01:19:32] (step=0029177) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.708667579729994, LR: 0.0003 +[2026-03-02 01:19:40] (step=0029178) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.708863236157308, LR: 0.0003 +[2026-03-02 01:19:48] (step=0029179) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.709058892584621, LR: 0.0003 +[2026-03-02 01:19:55] (step=0029180) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.709254549011935, LR: 0.0003 +[2026-03-02 01:20:03] (step=0029181) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.709450205439249, LR: 0.0003 +[2026-03-02 01:20:11] (step=0029182) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.709645861866562, LR: 0.0003 +[2026-03-02 01:20:19] (step=0029183) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.709841518293876, LR: 0.0003 +[2026-03-02 01:20:27] (step=0029184) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.710037174721189, LR: 0.0003 +[2026-03-02 01:20:35] (step=0029185) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 5.710232831148503, LR: 0.0003 +[2026-03-02 01:20:43] (step=0029186) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 5.7104284875758164, LR: 0.0003 +[2026-03-02 01:20:50] (step=0029187) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 5.7106241440031305, LR: 0.0003 +[2026-03-02 01:20:58] (step=0029188) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.7108198004304445, LR: 0.0003 +[2026-03-02 01:21:06] (step=0029189) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.711015456857758, LR: 0.0003 +[2026-03-02 01:21:14] (step=0029190) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.711211113285072, LR: 0.0003 +[2026-03-02 01:21:22] (step=0029191) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 5.711406769712385, LR: 0.0003 +[2026-03-02 01:21:30] (step=0029192) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 5.711602426139699, LR: 0.0003 +[2026-03-02 01:21:38] (step=0029193) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.711798082567013, LR: 0.0003 +[2026-03-02 01:21:45] (step=0029194) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.711993738994326, LR: 0.0003 +[2026-03-02 01:21:53] (step=0029195) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 5.71218939542164, LR: 0.0003 +[2026-03-02 01:22:01] (step=0029196) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.712385051848953, LR: 0.0003 +[2026-03-02 01:22:09] (step=0029197) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.712580708276267, LR: 0.0003 +[2026-03-02 01:22:17] (step=0029198) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 5.71277636470358, LR: 0.0003 +[2026-03-02 01:22:25] (step=0029199) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.712972021130894, LR: 0.0003 +[2026-03-02 01:22:33] (step=0029200) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.713167677558208, LR: 0.0003 +[2026-03-02 01:22:40] (step=0029201) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 5.713363333985521, LR: 0.0003 +[2026-03-02 01:22:48] (step=0029202) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.713558990412835, LR: 0.0003 +[2026-03-02 01:22:56] (step=0029203) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.713754646840148, LR: 0.0003 +[2026-03-02 01:23:04] (step=0029204) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.713950303267462, LR: 0.0003 +[2026-03-02 01:23:12] (step=0029205) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 5.714145959694776, LR: 0.0003 +[2026-03-02 01:23:20] (step=0029206) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.7143416161220895, LR: 0.0003 +[2026-03-02 01:23:28] (step=0029207) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.7145372725494035, LR: 0.0003 +[2026-03-02 01:23:35] (step=0029208) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.714732928976717, LR: 0.0003 +[2026-03-02 01:23:43] (step=0029209) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 5.714928585404031, LR: 0.0003 +[2026-03-02 01:23:51] (step=0029210) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.715124241831345, LR: 0.0003 +[2026-03-02 01:23:59] (step=0029211) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.715319898258658, LR: 0.0003 +[2026-03-02 01:24:07] (step=0029212) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.715515554685972, LR: 0.0003 +[2026-03-02 01:24:15] (step=0029213) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.715711211113285, LR: 0.0003 +[2026-03-02 01:24:23] (step=0029214) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.715906867540599, LR: 0.0003 +[2026-03-02 01:24:31] (step=0029215) Train Loss: 0.4250, Train Steps/Sec: 0.13, Epoch: 5.716102523967912, LR: 0.0003 +[2026-03-02 01:24:38] (step=0029216) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 5.716298180395226, LR: 0.0003 +[2026-03-02 01:24:46] (step=0029217) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.71649383682254, LR: 0.0003 +[2026-03-02 01:24:54] (step=0029218) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.716689493249853, LR: 0.0003 +[2026-03-02 01:25:02] (step=0029219) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.716885149677167, LR: 0.0003 +[2026-03-02 01:25:10] (step=0029220) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 5.71708080610448, LR: 0.0003 +[2026-03-02 01:25:18] (step=0029221) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 5.717276462531794, LR: 0.0003 +[2026-03-02 01:25:25] (step=0029222) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.717472118959108, LR: 0.0003 +[2026-03-02 01:25:33] (step=0029223) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.717667775386421, LR: 0.0003 +[2026-03-02 01:25:41] (step=0029224) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.717863431813735, LR: 0.0003 +[2026-03-02 01:25:49] (step=0029225) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 5.7180590882410485, LR: 0.0003 +[2026-03-02 01:25:57] (step=0029226) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.7182547446683625, LR: 0.0003 +[2026-03-02 01:26:05] (step=0029227) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.718450401095676, LR: 0.0003 +[2026-03-02 01:26:13] (step=0029228) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.71864605752299, LR: 0.0003 +[2026-03-02 01:26:21] (step=0029229) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.718841713950304, LR: 0.0003 +[2026-03-02 01:26:28] (step=0029230) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 5.719037370377617, LR: 0.0003 +[2026-03-02 01:26:36] (step=0029231) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.719233026804931, LR: 0.0003 +[2026-03-02 01:26:44] (step=0029232) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.719428683232244, LR: 0.0003 +[2026-03-02 01:26:52] (step=0029233) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.719624339659558, LR: 0.0003 +[2026-03-02 01:27:00] (step=0029234) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.719819996086872, LR: 0.0003 +[2026-03-02 01:27:08] (step=0029235) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 5.720015652514185, LR: 0.0003 +[2026-03-02 01:27:16] (step=0029236) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.720211308941499, LR: 0.0003 +[2026-03-02 01:27:23] (step=0029237) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.720406965368812, LR: 0.0003 +[2026-03-02 01:27:31] (step=0029238) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.720602621796126, LR: 0.0003 +[2026-03-02 01:27:39] (step=0029239) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 5.720798278223439, LR: 0.0003 +[2026-03-02 01:27:47] (step=0029240) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.720993934650753, LR: 0.0003 +[2026-03-02 01:27:55] (step=0029241) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 5.721189591078067, LR: 0.0003 +[2026-03-02 01:28:03] (step=0029242) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.72138524750538, LR: 0.0003 +[2026-03-02 01:28:11] (step=0029243) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.721580903932694, LR: 0.0003 +[2026-03-02 01:28:18] (step=0029244) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.7217765603600075, LR: 0.0003 +[2026-03-02 01:28:26] (step=0029245) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.7219722167873215, LR: 0.0003 +[2026-03-02 01:28:34] (step=0029246) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.7221678732146355, LR: 0.0003 +[2026-03-02 01:28:42] (step=0029247) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 5.722363529641949, LR: 0.0003 +[2026-03-02 01:28:50] (step=0029248) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.722559186069263, LR: 0.0003 +[2026-03-02 01:28:58] (step=0029249) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 5.722754842496576, LR: 0.0003 +[2026-03-02 01:29:06] (step=0029250) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.72295049892389, LR: 0.0003 +[2026-03-02 01:29:14] (step=0029251) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 5.723146155351203, LR: 0.0003 +[2026-03-02 01:29:21] (step=0029252) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.723341811778517, LR: 0.0003 +[2026-03-02 01:29:29] (step=0029253) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.723537468205831, LR: 0.0003 +[2026-03-02 01:29:37] (step=0029254) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.723733124633144, LR: 0.0003 +[2026-03-02 01:29:45] (step=0029255) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.723928781060458, LR: 0.0003 +[2026-03-02 01:29:53] (step=0029256) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.724124437487771, LR: 0.0003 +[2026-03-02 01:30:01] (step=0029257) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 5.724320093915085, LR: 0.0003 +[2026-03-02 01:30:09] (step=0029258) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.724515750342399, LR: 0.0003 +[2026-03-02 01:30:16] (step=0029259) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.724711406769712, LR: 0.0003 +[2026-03-02 01:30:24] (step=0029260) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.724907063197026, LR: 0.0003 +[2026-03-02 01:30:32] (step=0029261) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.725102719624339, LR: 0.0003 +[2026-03-02 01:30:40] (step=0029262) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 5.725298376051653, LR: 0.0003 +[2026-03-02 01:30:48] (step=0029263) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 5.725494032478967, LR: 0.0003 +[2026-03-02 01:30:56] (step=0029264) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.7256896889062805, LR: 0.0003 +[2026-03-02 01:31:04] (step=0029265) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 5.7258853453335945, LR: 0.0003 +[2026-03-02 01:31:12] (step=0029266) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 5.726081001760908, LR: 0.0003 +[2026-03-02 01:31:19] (step=0029267) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.726276658188222, LR: 0.0003 +[2026-03-02 01:31:27] (step=0029268) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 5.726472314615535, LR: 0.0003 +[2026-03-02 01:31:35] (step=0029269) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.726667971042849, LR: 0.0003 +[2026-03-02 01:31:43] (step=0029270) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.726863627470163, LR: 0.0003 +[2026-03-02 01:31:51] (step=0029271) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 5.727059283897476, LR: 0.0003 +[2026-03-02 01:31:59] (step=0029272) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.72725494032479, LR: 0.0003 +[2026-03-02 01:32:07] (step=0029273) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.727450596752103, LR: 0.0003 +[2026-03-02 01:32:14] (step=0029274) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.727646253179417, LR: 0.0003 +[2026-03-02 01:32:22] (step=0029275) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.727841909606731, LR: 0.0003 +[2026-03-02 01:32:30] (step=0029276) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.728037566034044, LR: 0.0003 +[2026-03-02 01:32:38] (step=0029277) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.728233222461358, LR: 0.0003 +[2026-03-02 01:32:46] (step=0029278) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 5.728428878888671, LR: 0.0003 +[2026-03-02 01:32:54] (step=0029279) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.728624535315985, LR: 0.0003 +[2026-03-02 01:33:02] (step=0029280) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.728820191743298, LR: 0.0003 +[2026-03-02 01:33:09] (step=0029281) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.729015848170612, LR: 0.0003 +[2026-03-02 01:33:17] (step=0029282) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.729211504597926, LR: 0.0003 +[2026-03-02 01:33:25] (step=0029283) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.7294071610252395, LR: 0.0003 +[2026-03-02 01:33:33] (step=0029284) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.7296028174525535, LR: 0.0003 +[2026-03-02 01:33:41] (step=0029285) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.729798473879867, LR: 0.0003 +[2026-03-02 01:33:49] (step=0029286) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.729994130307181, LR: 0.0003 +[2026-03-02 01:33:57] (step=0029287) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.730189786734495, LR: 0.0003 +[2026-03-02 01:34:04] (step=0029288) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.730385443161808, LR: 0.0003 +[2026-03-02 01:34:12] (step=0029289) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.730581099589122, LR: 0.0003 +[2026-03-02 01:34:20] (step=0029290) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.730776756016435, LR: 0.0003 +[2026-03-02 01:34:28] (step=0029291) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.730972412443749, LR: 0.0003 +[2026-03-02 01:34:36] (step=0029292) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.731168068871062, LR: 0.0003 +[2026-03-02 01:34:44] (step=0029293) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.731363725298376, LR: 0.0003 +[2026-03-02 01:34:52] (step=0029294) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 5.73155938172569, LR: 0.0003 +[2026-03-02 01:34:59] (step=0029295) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.731755038153003, LR: 0.0003 +[2026-03-02 01:35:07] (step=0029296) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.731950694580317, LR: 0.0003 +[2026-03-02 01:35:15] (step=0029297) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.73214635100763, LR: 0.0003 +[2026-03-02 01:35:23] (step=0029298) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.732342007434944, LR: 0.0003 +[2026-03-02 01:35:31] (step=0029299) Train Loss: 0.4547, Train Steps/Sec: 0.12, Epoch: 5.732537663862258, LR: 0.0003 +[2026-03-02 01:35:39] (step=0029300) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.732733320289571, LR: 0.0003 +[2026-03-02 01:35:47] (step=0029301) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.732928976716885, LR: 0.0003 +[2026-03-02 01:35:55] (step=0029302) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 5.7331246331441985, LR: 0.0003 +[2026-03-02 01:36:02] (step=0029303) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.7333202895715125, LR: 0.0003 +[2026-03-02 01:36:10] (step=0029304) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.733515945998826, LR: 0.0003 +[2026-03-02 01:36:18] (step=0029305) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.73371160242614, LR: 0.0003 +[2026-03-02 01:36:26] (step=0029306) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 5.733907258853454, LR: 0.0003 +[2026-03-02 01:36:34] (step=0029307) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.734102915280767, LR: 0.0003 +[2026-03-02 01:36:42] (step=0029308) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.734298571708081, LR: 0.0003 +[2026-03-02 01:36:50] (step=0029309) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.734494228135394, LR: 0.0003 +[2026-03-02 01:36:57] (step=0029310) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.734689884562708, LR: 0.0003 +[2026-03-02 01:37:05] (step=0029311) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 5.734885540990022, LR: 0.0003 +[2026-03-02 01:37:13] (step=0029312) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.735081197417335, LR: 0.0003 +[2026-03-02 01:37:21] (step=0029313) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.735276853844649, LR: 0.0003 +[2026-03-02 01:37:29] (step=0029314) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 5.735472510271962, LR: 0.0003 +[2026-03-02 01:37:37] (step=0029315) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.735668166699276, LR: 0.0003 +[2026-03-02 01:37:45] (step=0029316) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.735863823126589, LR: 0.0003 +[2026-03-02 01:37:53] (step=0029317) Train Loss: 0.4437, Train Steps/Sec: 0.12, Epoch: 5.736059479553903, LR: 0.0003 +[2026-03-02 01:38:00] (step=0029318) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.736255135981217, LR: 0.0003 +[2026-03-02 01:38:08] (step=0029319) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 5.73645079240853, LR: 0.0003 +[2026-03-02 01:38:16] (step=0029320) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 5.736646448835844, LR: 0.0003 +[2026-03-02 01:38:24] (step=0029321) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 5.7368421052631575, LR: 0.0003 +[2026-03-02 01:38:32] (step=0029322) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 5.7370377616904715, LR: 0.0003 +[2026-03-02 01:38:40] (step=0029323) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.7372334181177855, LR: 0.0003 +[2026-03-02 01:38:48] (step=0029324) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.737429074545099, LR: 0.0003 +[2026-03-02 01:38:56] (step=0029325) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 5.737624730972413, LR: 0.0003 +[2026-03-02 01:39:03] (step=0029326) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.737820387399726, LR: 0.0003 +[2026-03-02 01:39:11] (step=0029327) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 5.73801604382704, LR: 0.0003 +[2026-03-02 01:39:19] (step=0029328) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.738211700254354, LR: 0.0003 +[2026-03-02 01:39:27] (step=0029329) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.738407356681667, LR: 0.0003 +[2026-03-02 01:39:35] (step=0029330) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.738603013108981, LR: 0.0003 +[2026-03-02 01:39:43] (step=0029331) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 5.738798669536294, LR: 0.0003 +[2026-03-02 01:39:50] (step=0029332) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.738994325963608, LR: 0.0003 +[2026-03-02 01:39:58] (step=0029333) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 5.739189982390921, LR: 0.0003 +[2026-03-02 01:40:06] (step=0029334) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.739385638818235, LR: 0.0003 +[2026-03-02 01:40:14] (step=0029335) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.739581295245549, LR: 0.0003 +[2026-03-02 01:40:22] (step=0029336) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.739776951672862, LR: 0.0003 +[2026-03-02 01:40:30] (step=0029337) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.739972608100176, LR: 0.0003 +[2026-03-02 01:40:38] (step=0029338) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.740168264527489, LR: 0.0003 +[2026-03-02 01:40:46] (step=0029339) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.740363920954803, LR: 0.0003 +[2026-03-02 01:40:53] (step=0029340) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.740559577382117, LR: 0.0003 +[2026-03-02 01:41:01] (step=0029341) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.7407552338094305, LR: 0.0003 +[2026-03-02 01:41:09] (step=0029342) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.7409508902367445, LR: 0.0003 +[2026-03-02 01:41:17] (step=0029343) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.741146546664058, LR: 0.0003 +[2026-03-02 01:41:25] (step=0029344) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.741342203091372, LR: 0.0003 +[2026-03-02 01:41:33] (step=0029345) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 5.741537859518685, LR: 0.0003 +[2026-03-02 01:41:41] (step=0029346) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 5.741733515945999, LR: 0.0003 +[2026-03-02 01:41:48] (step=0029347) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.741929172373313, LR: 0.0003 +[2026-03-02 01:41:56] (step=0029348) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.742124828800626, LR: 0.0003 +[2026-03-02 01:42:04] (step=0029349) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 5.74232048522794, LR: 0.0003 +[2026-03-02 01:42:12] (step=0029350) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.742516141655253, LR: 0.0003 +[2026-03-02 01:42:20] (step=0029351) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.742711798082567, LR: 0.0003 +[2026-03-02 01:42:28] (step=0029352) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.742907454509881, LR: 0.0003 +[2026-03-02 01:42:36] (step=0029353) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.743103110937194, LR: 0.0003 +[2026-03-02 01:42:43] (step=0029354) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 5.743298767364508, LR: 0.0003 +[2026-03-02 01:42:51] (step=0029355) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.743494423791821, LR: 0.0003 +[2026-03-02 01:42:59] (step=0029356) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.743690080219135, LR: 0.0003 +[2026-03-02 01:43:07] (step=0029357) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.743885736646448, LR: 0.0003 +[2026-03-02 01:43:15] (step=0029358) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.744081393073762, LR: 0.0003 +[2026-03-02 01:43:23] (step=0029359) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.744277049501076, LR: 0.0003 +[2026-03-02 01:43:31] (step=0029360) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.7444727059283895, LR: 0.0003 +[2026-03-02 01:43:38] (step=0029361) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.7446683623557036, LR: 0.0003 +[2026-03-02 01:43:46] (step=0029362) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.744864018783017, LR: 0.0003 +[2026-03-02 01:43:54] (step=0029363) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 5.745059675210331, LR: 0.0003 +[2026-03-02 01:44:02] (step=0029364) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.745255331637645, LR: 0.0003 +[2026-03-02 01:44:10] (step=0029365) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 5.745450988064958, LR: 0.0003 +[2026-03-02 01:44:18] (step=0029366) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.745646644492272, LR: 0.0003 +[2026-03-02 01:44:26] (step=0029367) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 5.745842300919585, LR: 0.0003 +[2026-03-02 01:44:33] (step=0029368) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.746037957346899, LR: 0.0003 +[2026-03-02 01:44:41] (step=0029369) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.746233613774212, LR: 0.0003 +[2026-03-02 01:44:49] (step=0029370) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 5.746429270201526, LR: 0.0003 +[2026-03-02 01:44:57] (step=0029371) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.74662492662884, LR: 0.0003 +[2026-03-02 01:45:05] (step=0029372) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.746820583056153, LR: 0.0003 +[2026-03-02 01:45:13] (step=0029373) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.747016239483467, LR: 0.0003 +[2026-03-02 01:45:20] (step=0029374) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 5.74721189591078, LR: 0.0003 +[2026-03-02 01:45:28] (step=0029375) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.747407552338094, LR: 0.0003 +[2026-03-02 01:45:36] (step=0029376) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.747603208765408, LR: 0.0003 +[2026-03-02 01:45:44] (step=0029377) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.747798865192721, LR: 0.0003 +[2026-03-02 01:45:52] (step=0029378) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 5.747994521620035, LR: 0.0003 +[2026-03-02 01:46:00] (step=0029379) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.7481901780473486, LR: 0.0003 +[2026-03-02 01:46:07] (step=0029380) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.748385834474663, LR: 0.0003 +[2026-03-02 01:46:15] (step=0029381) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 5.748581490901977, LR: 0.0003 +[2026-03-02 01:46:23] (step=0029382) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.74877714732929, LR: 0.0003 +[2026-03-02 01:46:31] (step=0029383) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.748972803756604, LR: 0.0003 +[2026-03-02 01:46:39] (step=0029384) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.749168460183917, LR: 0.0003 +[2026-03-02 01:46:47] (step=0029385) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.749364116611231, LR: 0.0003 +[2026-03-02 01:46:55] (step=0029386) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.749559773038544, LR: 0.0003 +[2026-03-02 01:47:02] (step=0029387) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.749755429465858, LR: 0.0003 +[2026-03-02 01:47:10] (step=0029388) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.749951085893172, LR: 0.0003 +[2026-03-02 01:47:18] (step=0029389) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.750146742320485, LR: 0.0003 +[2026-03-02 01:47:26] (step=0029390) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.750342398747799, LR: 0.0003 +[2026-03-02 01:47:34] (step=0029391) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 5.750538055175112, LR: 0.0003 +[2026-03-02 01:47:42] (step=0029392) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.750733711602426, LR: 0.0003 +[2026-03-02 01:47:50] (step=0029393) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 5.75092936802974, LR: 0.0003 +[2026-03-02 01:47:57] (step=0029394) Train Loss: 0.4219, Train Steps/Sec: 0.13, Epoch: 5.751125024457053, LR: 0.0003 +[2026-03-02 01:48:05] (step=0029395) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.751320680884367, LR: 0.0003 +[2026-03-02 01:48:13] (step=0029396) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.75151633731168, LR: 0.0003 +[2026-03-02 01:48:21] (step=0029397) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.751711993738994, LR: 0.0003 +[2026-03-02 01:48:29] (step=0029398) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.751907650166308, LR: 0.0003 +[2026-03-02 01:48:37] (step=0029399) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.752103306593622, LR: 0.0003 +[2026-03-02 01:48:44] (step=0029400) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.752298963020936, LR: 0.0003 +[2026-03-02 01:48:52] (step=0029401) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.752494619448249, LR: 0.0003 +[2026-03-02 01:49:00] (step=0029402) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 5.752690275875563, LR: 0.0003 +[2026-03-02 01:49:08] (step=0029403) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.752885932302876, LR: 0.0003 +[2026-03-02 01:49:16] (step=0029404) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.75308158873019, LR: 0.0003 +[2026-03-02 01:49:24] (step=0029405) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.753277245157504, LR: 0.0003 +[2026-03-02 01:49:32] (step=0029406) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.753472901584817, LR: 0.0003 +[2026-03-02 01:49:39] (step=0029407) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.753668558012131, LR: 0.0003 +[2026-03-02 01:49:47] (step=0029408) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.753864214439444, LR: 0.0003 +[2026-03-02 01:49:55] (step=0029409) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.754059870866758, LR: 0.0003 +[2026-03-02 01:50:03] (step=0029410) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.754255527294071, LR: 0.0003 +[2026-03-02 01:50:11] (step=0029411) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.754451183721385, LR: 0.0003 +[2026-03-02 01:50:19] (step=0029412) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.754646840148699, LR: 0.0003 +[2026-03-02 01:50:26] (step=0029413) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.754842496576012, LR: 0.0003 +[2026-03-02 01:50:34] (step=0029414) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 5.755038153003326, LR: 0.0003 +[2026-03-02 01:50:42] (step=0029415) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.7552338094306394, LR: 0.0003 +[2026-03-02 01:50:50] (step=0029416) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.7554294658579535, LR: 0.0003 +[2026-03-02 01:50:58] (step=0029417) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.7556251222852675, LR: 0.0003 +[2026-03-02 01:51:06] (step=0029418) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.755820778712581, LR: 0.0003 +[2026-03-02 01:51:14] (step=0029419) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.756016435139895, LR: 0.0003 +[2026-03-02 01:51:21] (step=0029420) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.756212091567208, LR: 0.0003 +[2026-03-02 01:51:29] (step=0029421) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.756407747994522, LR: 0.0003 +[2026-03-02 01:51:37] (step=0029422) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.756603404421835, LR: 0.0003 +[2026-03-02 01:51:45] (step=0029423) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.756799060849149, LR: 0.0003 +[2026-03-02 01:51:53] (step=0029424) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.756994717276463, LR: 0.0003 +[2026-03-02 01:52:01] (step=0029425) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.757190373703776, LR: 0.0003 +[2026-03-02 01:52:08] (step=0029426) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.75738603013109, LR: 0.0003 +[2026-03-02 01:52:16] (step=0029427) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.757581686558403, LR: 0.0003 +[2026-03-02 01:52:24] (step=0029428) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 5.757777342985717, LR: 0.0003 +[2026-03-02 01:52:32] (step=0029429) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.757972999413031, LR: 0.0003 +[2026-03-02 01:52:40] (step=0029430) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 5.758168655840344, LR: 0.0003 +[2026-03-02 01:52:48] (step=0029431) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.758364312267658, LR: 0.0003 +[2026-03-02 01:52:55] (step=0029432) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.758559968694971, LR: 0.0003 +[2026-03-02 01:53:03] (step=0029433) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.758755625122285, LR: 0.0003 +[2026-03-02 01:53:11] (step=0029434) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.758951281549599, LR: 0.0003 +[2026-03-02 01:53:19] (step=0029435) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 5.7591469379769125, LR: 0.0003 +[2026-03-02 01:53:27] (step=0029436) Train Loss: 0.4249, Train Steps/Sec: 0.13, Epoch: 5.7593425944042265, LR: 0.0003 +[2026-03-02 01:53:35] (step=0029437) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.75953825083154, LR: 0.0003 +[2026-03-02 01:53:42] (step=0029438) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 5.759733907258854, LR: 0.0003 +[2026-03-02 01:53:50] (step=0029439) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.759929563686167, LR: 0.0003 +[2026-03-02 01:53:58] (step=0029440) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.760125220113481, LR: 0.0003 +[2026-03-02 01:54:06] (step=0029441) Train Loss: 0.4448, Train Steps/Sec: 0.12, Epoch: 5.760320876540795, LR: 0.0003 +[2026-03-02 01:54:14] (step=0029442) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 5.760516532968108, LR: 0.0003 +[2026-03-02 01:54:22] (step=0029443) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.760712189395422, LR: 0.0003 +[2026-03-02 01:54:30] (step=0029444) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.760907845822735, LR: 0.0003 +[2026-03-02 01:54:38] (step=0029445) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.761103502250049, LR: 0.0003 +[2026-03-02 01:54:45] (step=0029446) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.761299158677363, LR: 0.0003 +[2026-03-02 01:54:53] (step=0029447) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.761494815104676, LR: 0.0003 +[2026-03-02 01:55:01] (step=0029448) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 5.76169047153199, LR: 0.0003 +[2026-03-02 01:55:09] (step=0029449) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.761886127959303, LR: 0.0003 +[2026-03-02 01:55:17] (step=0029450) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.762081784386617, LR: 0.0003 +[2026-03-02 01:55:25] (step=0029451) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.76227744081393, LR: 0.0003 +[2026-03-02 01:55:32] (step=0029452) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 5.762473097241244, LR: 0.0003 +[2026-03-02 01:55:40] (step=0029453) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.762668753668558, LR: 0.0003 +[2026-03-02 01:55:48] (step=0029454) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.7628644100958715, LR: 0.0003 +[2026-03-02 01:55:56] (step=0029455) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.7630600665231855, LR: 0.0003 +[2026-03-02 01:56:04] (step=0029456) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.763255722950499, LR: 0.0003 +[2026-03-02 01:56:12] (step=0029457) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.763451379377813, LR: 0.0003 +[2026-03-02 01:56:19] (step=0029458) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.763647035805127, LR: 0.0003 +[2026-03-02 01:56:27] (step=0029459) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.76384269223244, LR: 0.0003 +[2026-03-02 01:56:35] (step=0029460) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.764038348659754, LR: 0.0003 +[2026-03-02 01:56:43] (step=0029461) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.764234005087067, LR: 0.0003 +[2026-03-02 01:56:51] (step=0029462) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.764429661514381, LR: 0.0003 +[2026-03-02 01:56:59] (step=0029463) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.764625317941694, LR: 0.0003 +[2026-03-02 01:57:07] (step=0029464) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.764820974369008, LR: 0.0003 +[2026-03-02 01:57:14] (step=0029465) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 5.765016630796322, LR: 0.0003 +[2026-03-02 01:57:22] (step=0029466) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.765212287223635, LR: 0.0003 +[2026-03-02 01:57:30] (step=0029467) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.765407943650949, LR: 0.0003 +[2026-03-02 01:57:38] (step=0029468) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.765603600078262, LR: 0.0003 +[2026-03-02 01:57:46] (step=0029469) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 5.765799256505576, LR: 0.0003 +[2026-03-02 01:57:54] (step=0029470) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.76599491293289, LR: 0.0003 +[2026-03-02 01:58:01] (step=0029471) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.766190569360203, LR: 0.0003 +[2026-03-02 01:58:09] (step=0029472) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.766386225787517, LR: 0.0003 +[2026-03-02 01:58:17] (step=0029473) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.7665818822148305, LR: 0.0003 +[2026-03-02 01:58:25] (step=0029474) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.7667775386421445, LR: 0.0003 +[2026-03-02 01:58:33] (step=0029475) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 5.766973195069458, LR: 0.0003 +[2026-03-02 01:58:41] (step=0029476) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.767168851496772, LR: 0.0003 +[2026-03-02 01:58:48] (step=0029477) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.767364507924086, LR: 0.0003 +[2026-03-02 01:58:56] (step=0029478) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.767560164351399, LR: 0.0003 +[2026-03-02 01:59:04] (step=0029479) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.767755820778713, LR: 0.0003 +[2026-03-02 01:59:12] (step=0029480) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.767951477206026, LR: 0.0003 +[2026-03-02 01:59:20] (step=0029481) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 5.76814713363334, LR: 0.0003 +[2026-03-02 01:59:28] (step=0029482) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.768342790060654, LR: 0.0003 +[2026-03-02 01:59:36] (step=0029483) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 5.768538446487967, LR: 0.0003 +[2026-03-02 01:59:43] (step=0029484) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.768734102915281, LR: 0.0003 +[2026-03-02 01:59:51] (step=0029485) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.768929759342594, LR: 0.0003 +[2026-03-02 01:59:59] (step=0029486) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.769125415769908, LR: 0.0003 +[2026-03-02 02:00:07] (step=0029487) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.769321072197222, LR: 0.0003 +[2026-03-02 02:00:15] (step=0029488) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 5.769516728624535, LR: 0.0003 +[2026-03-02 02:00:23] (step=0029489) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.769712385051849, LR: 0.0003 +[2026-03-02 02:00:30] (step=0029490) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.769908041479162, LR: 0.0003 +[2026-03-02 02:00:38] (step=0029491) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.770103697906476, LR: 0.0003 +[2026-03-02 02:00:46] (step=0029492) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.7702993543337895, LR: 0.0003 +[2026-03-02 02:00:54] (step=0029493) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 5.7704950107611035, LR: 0.0003 +[2026-03-02 02:01:02] (step=0029494) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.7706906671884175, LR: 0.0003 +[2026-03-02 02:01:10] (step=0029495) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.770886323615731, LR: 0.0003 +[2026-03-02 02:01:18] (step=0029496) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.771081980043045, LR: 0.0003 +[2026-03-02 02:01:25] (step=0029497) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.771277636470358, LR: 0.0003 +[2026-03-02 02:01:33] (step=0029498) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.771473292897672, LR: 0.0003 +[2026-03-02 02:01:41] (step=0029499) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.771668949324986, LR: 0.0003 +[2026-03-02 02:01:49] (step=0029500) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.771864605752299, LR: 0.0003 +[2026-03-02 02:01:49] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0029500/ +[2026-03-02 02:01:57] (step=0029501) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.772060262179613, LR: 0.0003 +[2026-03-02 02:02:05] (step=0029502) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.772255918606926, LR: 0.0003 +[2026-03-02 02:02:12] (step=0029503) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 5.77245157503424, LR: 0.0003 +[2026-03-02 02:02:20] (step=0029504) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.772647231461553, LR: 0.0003 +[2026-03-02 02:02:28] (step=0029505) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.772842887888867, LR: 0.0003 +[2026-03-02 02:02:36] (step=0029506) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.773038544316181, LR: 0.0003 +[2026-03-02 02:02:44] (step=0029507) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 5.773234200743494, LR: 0.0003 +[2026-03-02 02:02:52] (step=0029508) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.773429857170808, LR: 0.0003 +[2026-03-02 02:03:00] (step=0029509) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.773625513598121, LR: 0.0003 +[2026-03-02 02:03:07] (step=0029510) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 5.773821170025435, LR: 0.0003 +[2026-03-02 02:03:15] (step=0029511) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 5.774016826452749, LR: 0.0003 +[2026-03-02 02:03:23] (step=0029512) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.7742124828800625, LR: 0.0003 +[2026-03-02 02:03:31] (step=0029513) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.7744081393073765, LR: 0.0003 +[2026-03-02 02:03:39] (step=0029514) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.77460379573469, LR: 0.0003 +[2026-03-02 02:03:47] (step=0029515) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.774799452162004, LR: 0.0003 +[2026-03-02 02:03:54] (step=0029516) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.774995108589317, LR: 0.0003 +[2026-03-02 02:04:02] (step=0029517) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 5.775190765016631, LR: 0.0003 +[2026-03-02 02:04:10] (step=0029518) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.775386421443945, LR: 0.0003 +[2026-03-02 02:04:18] (step=0029519) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.775582077871258, LR: 0.0003 +[2026-03-02 02:04:26] (step=0029520) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.775777734298572, LR: 0.0003 +[2026-03-02 02:04:34] (step=0029521) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 5.775973390725885, LR: 0.0003 +[2026-03-02 02:04:42] (step=0029522) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 5.776169047153199, LR: 0.0003 +[2026-03-02 02:04:49] (step=0029523) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.776364703580513, LR: 0.0003 +[2026-03-02 02:04:57] (step=0029524) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.776560360007826, LR: 0.0003 +[2026-03-02 02:05:05] (step=0029525) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 5.77675601643514, LR: 0.0003 +[2026-03-02 02:05:13] (step=0029526) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.776951672862453, LR: 0.0003 +[2026-03-02 02:05:21] (step=0029527) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.777147329289767, LR: 0.0003 +[2026-03-02 02:05:29] (step=0029528) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.77734298571708, LR: 0.0003 +[2026-03-02 02:05:36] (step=0029529) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 5.777538642144394, LR: 0.0003 +[2026-03-02 02:05:44] (step=0029530) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.777734298571708, LR: 0.0003 +[2026-03-02 02:05:52] (step=0029531) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.7779299549990215, LR: 0.0003 +[2026-03-02 02:06:00] (step=0029532) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 5.7781256114263355, LR: 0.0003 +[2026-03-02 02:06:08] (step=0029533) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.778321267853649, LR: 0.0003 +[2026-03-02 02:06:16] (step=0029534) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.778516924280963, LR: 0.0003 +[2026-03-02 02:06:24] (step=0029535) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 5.778712580708277, LR: 0.0003 +[2026-03-02 02:06:31] (step=0029536) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.77890823713559, LR: 0.0003 +[2026-03-02 02:06:39] (step=0029537) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.779103893562904, LR: 0.0003 +[2026-03-02 02:06:47] (step=0029538) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.779299549990217, LR: 0.0003 +[2026-03-02 02:06:55] (step=0029539) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 5.779495206417531, LR: 0.0003 +[2026-03-02 02:07:03] (step=0029540) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.779690862844844, LR: 0.0003 +[2026-03-02 02:07:11] (step=0029541) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.779886519272158, LR: 0.0003 +[2026-03-02 02:07:18] (step=0029542) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.780082175699472, LR: 0.0003 +[2026-03-02 02:07:26] (step=0029543) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.780277832126785, LR: 0.0003 +[2026-03-02 02:07:34] (step=0029544) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.780473488554099, LR: 0.0003 +[2026-03-02 02:07:42] (step=0029545) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.780669144981412, LR: 0.0003 +[2026-03-02 02:07:50] (step=0029546) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 5.780864801408726, LR: 0.0003 +[2026-03-02 02:07:58] (step=0029547) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 5.78106045783604, LR: 0.0003 +[2026-03-02 02:08:06] (step=0029548) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 5.781256114263353, LR: 0.0003 +[2026-03-02 02:08:13] (step=0029549) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.781451770690667, LR: 0.0003 +[2026-03-02 02:08:21] (step=0029550) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.7816474271179805, LR: 0.0003 +[2026-03-02 02:08:29] (step=0029551) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.7818430835452945, LR: 0.0003 +[2026-03-02 02:08:37] (step=0029552) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 5.7820387399726085, LR: 0.0003 +[2026-03-02 02:08:45] (step=0029553) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.782234396399922, LR: 0.0003 +[2026-03-02 02:08:53] (step=0029554) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 5.782430052827236, LR: 0.0003 +[2026-03-02 02:09:00] (step=0029555) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.782625709254549, LR: 0.0003 +[2026-03-02 02:09:08] (step=0029556) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.782821365681863, LR: 0.0003 +[2026-03-02 02:09:16] (step=0029557) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.783017022109176, LR: 0.0003 +[2026-03-02 02:09:24] (step=0029558) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.78321267853649, LR: 0.0003 +[2026-03-02 02:09:32] (step=0029559) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.783408334963804, LR: 0.0003 +[2026-03-02 02:09:40] (step=0029560) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.783603991391117, LR: 0.0003 +[2026-03-02 02:09:48] (step=0029561) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.783799647818431, LR: 0.0003 +[2026-03-02 02:09:55] (step=0029562) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 5.783995304245744, LR: 0.0003 +[2026-03-02 02:10:03] (step=0029563) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 5.784190960673058, LR: 0.0003 +[2026-03-02 02:10:11] (step=0029564) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.784386617100372, LR: 0.0003 +[2026-03-02 02:10:19] (step=0029565) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.784582273527685, LR: 0.0003 +[2026-03-02 02:10:27] (step=0029566) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.784777929954999, LR: 0.0003 +[2026-03-02 02:10:35] (step=0029567) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.784973586382312, LR: 0.0003 +[2026-03-02 02:10:43] (step=0029568) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 5.785169242809626, LR: 0.0003 +[2026-03-02 02:10:50] (step=0029569) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.7853648992369395, LR: 0.0003 +[2026-03-02 02:10:58] (step=0029570) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.7855605556642535, LR: 0.0003 +[2026-03-02 02:11:06] (step=0029571) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.7857562120915675, LR: 0.0003 +[2026-03-02 02:11:14] (step=0029572) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.785951868518881, LR: 0.0003 +[2026-03-02 02:11:22] (step=0029573) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.786147524946195, LR: 0.0003 +[2026-03-02 02:11:30] (step=0029574) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.786343181373508, LR: 0.0003 +[2026-03-02 02:11:37] (step=0029575) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.786538837800822, LR: 0.0003 +[2026-03-02 02:11:45] (step=0029576) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.786734494228136, LR: 0.0003 +[2026-03-02 02:11:53] (step=0029577) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.786930150655449, LR: 0.0003 +[2026-03-02 02:12:01] (step=0029578) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.787125807082763, LR: 0.0003 +[2026-03-02 02:12:09] (step=0029579) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.787321463510076, LR: 0.0003 +[2026-03-02 02:12:17] (step=0029580) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.78751711993739, LR: 0.0003 +[2026-03-02 02:12:24] (step=0029581) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 5.787712776364703, LR: 0.0003 +[2026-03-02 02:12:32] (step=0029582) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.787908432792017, LR: 0.0003 +[2026-03-02 02:12:40] (step=0029583) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.788104089219331, LR: 0.0003 +[2026-03-02 02:12:48] (step=0029584) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.788299745646644, LR: 0.0003 +[2026-03-02 02:12:56] (step=0029585) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.788495402073958, LR: 0.0003 +[2026-03-02 02:13:04] (step=0029586) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.788691058501271, LR: 0.0003 +[2026-03-02 02:13:12] (step=0029587) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.788886714928585, LR: 0.0003 +[2026-03-02 02:13:19] (step=0029588) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.789082371355899, LR: 0.0003 +[2026-03-02 02:13:27] (step=0029589) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.7892780277832125, LR: 0.0003 +[2026-03-02 02:13:35] (step=0029590) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 5.7894736842105265, LR: 0.0003 +[2026-03-02 02:13:43] (step=0029591) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 5.78966934063784, LR: 0.0003 +[2026-03-02 02:13:51] (step=0029592) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.789864997065154, LR: 0.0003 +[2026-03-02 02:13:59] (step=0029593) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.790060653492467, LR: 0.0003 +[2026-03-02 02:14:07] (step=0029594) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 5.790256309919781, LR: 0.0003 +[2026-03-02 02:14:14] (step=0029595) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 5.790451966347095, LR: 0.0003 +[2026-03-02 02:14:22] (step=0029596) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.790647622774408, LR: 0.0003 +[2026-03-02 02:14:30] (step=0029597) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.790843279201722, LR: 0.0003 +[2026-03-02 02:14:38] (step=0029598) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.791038935629035, LR: 0.0003 +[2026-03-02 02:14:46] (step=0029599) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.791234592056349, LR: 0.0003 +[2026-03-02 02:14:54] (step=0029600) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.791430248483663, LR: 0.0003 +[2026-03-02 02:15:01] (step=0029601) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 5.791625904910976, LR: 0.0003 +[2026-03-02 02:15:09] (step=0029602) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.79182156133829, LR: 0.0003 +[2026-03-02 02:15:17] (step=0029603) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.792017217765603, LR: 0.0003 +[2026-03-02 02:15:25] (step=0029604) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 5.792212874192917, LR: 0.0003 +[2026-03-02 02:15:33] (step=0029605) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.792408530620231, LR: 0.0003 +[2026-03-02 02:15:41] (step=0029606) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.792604187047544, LR: 0.0003 +[2026-03-02 02:15:48] (step=0029607) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.792799843474858, LR: 0.0003 +[2026-03-02 02:15:56] (step=0029608) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.7929954999021716, LR: 0.0003 +[2026-03-02 02:16:04] (step=0029609) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.793191156329486, LR: 0.0003 +[2026-03-02 02:16:12] (step=0029610) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 5.793386812756799, LR: 0.0003 +[2026-03-02 02:16:20] (step=0029611) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.793582469184113, LR: 0.0003 +[2026-03-02 02:16:28] (step=0029612) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.793778125611427, LR: 0.0003 +[2026-03-02 02:16:36] (step=0029613) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.79397378203874, LR: 0.0003 +[2026-03-02 02:16:43] (step=0029614) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 5.794169438466054, LR: 0.0003 +[2026-03-02 02:16:51] (step=0029615) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 5.794365094893367, LR: 0.0003 +[2026-03-02 02:16:59] (step=0029616) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.794560751320681, LR: 0.0003 +[2026-03-02 02:17:07] (step=0029617) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.794756407747995, LR: 0.0003 +[2026-03-02 02:17:15] (step=0029618) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.794952064175308, LR: 0.0003 +[2026-03-02 02:17:23] (step=0029619) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.795147720602622, LR: 0.0003 +[2026-03-02 02:17:31] (step=0029620) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.795343377029935, LR: 0.0003 +[2026-03-02 02:17:38] (step=0029621) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.795539033457249, LR: 0.0003 +[2026-03-02 02:17:46] (step=0029622) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.795734689884562, LR: 0.0003 +[2026-03-02 02:17:54] (step=0029623) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 5.795930346311876, LR: 0.0003 +[2026-03-02 02:18:02] (step=0029624) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.79612600273919, LR: 0.0003 +[2026-03-02 02:18:10] (step=0029625) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 5.796321659166503, LR: 0.0003 +[2026-03-02 02:18:18] (step=0029626) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.796517315593817, LR: 0.0003 +[2026-03-02 02:18:25] (step=0029627) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 5.796712972021131, LR: 0.0003 +[2026-03-02 02:18:33] (step=0029628) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 5.796908628448445, LR: 0.0003 +[2026-03-02 02:18:41] (step=0029629) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.797104284875759, LR: 0.0003 +[2026-03-02 02:18:49] (step=0029630) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.797299941303072, LR: 0.0003 +[2026-03-02 02:18:57] (step=0029631) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.797495597730386, LR: 0.0003 +[2026-03-02 02:19:05] (step=0029632) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.797691254157699, LR: 0.0003 +[2026-03-02 02:19:13] (step=0029633) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.797886910585013, LR: 0.0003 +[2026-03-02 02:19:20] (step=0029634) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 5.798082567012326, LR: 0.0003 +[2026-03-02 02:19:28] (step=0029635) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.79827822343964, LR: 0.0003 +[2026-03-02 02:19:36] (step=0029636) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.798473879866954, LR: 0.0003 +[2026-03-02 02:19:44] (step=0029637) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.798669536294267, LR: 0.0003 +[2026-03-02 02:19:52] (step=0029638) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.798865192721581, LR: 0.0003 +[2026-03-02 02:20:00] (step=0029639) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.799060849148894, LR: 0.0003 +[2026-03-02 02:20:08] (step=0029640) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.799256505576208, LR: 0.0003 +[2026-03-02 02:20:15] (step=0029641) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.799452162003522, LR: 0.0003 +[2026-03-02 02:20:23] (step=0029642) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.799647818430835, LR: 0.0003 +[2026-03-02 02:20:31] (step=0029643) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.799843474858149, LR: 0.0003 +[2026-03-02 02:20:39] (step=0029644) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.8000391312854624, LR: 0.0003 +[2026-03-02 02:20:47] (step=0029645) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.8002347877127765, LR: 0.0003 +[2026-03-02 02:20:55] (step=0029646) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.80043044414009, LR: 0.0003 +[2026-03-02 02:21:02] (step=0029647) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 5.800626100567404, LR: 0.0003 +[2026-03-02 02:21:10] (step=0029648) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.800821756994718, LR: 0.0003 +[2026-03-02 02:21:18] (step=0029649) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.801017413422031, LR: 0.0003 +[2026-03-02 02:21:26] (step=0029650) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.801213069849345, LR: 0.0003 +[2026-03-02 02:21:34] (step=0029651) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.801408726276658, LR: 0.0003 +[2026-03-02 02:21:42] (step=0029652) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 5.801604382703972, LR: 0.0003 +[2026-03-02 02:21:50] (step=0029653) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 5.801800039131286, LR: 0.0003 +[2026-03-02 02:21:57] (step=0029654) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.801995695558599, LR: 0.0003 +[2026-03-02 02:22:05] (step=0029655) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 5.802191351985913, LR: 0.0003 +[2026-03-02 02:22:13] (step=0029656) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.802387008413226, LR: 0.0003 +[2026-03-02 02:22:21] (step=0029657) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 5.80258266484054, LR: 0.0003 +[2026-03-02 02:22:29] (step=0029658) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.802778321267854, LR: 0.0003 +[2026-03-02 02:22:37] (step=0029659) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 5.802973977695167, LR: 0.0003 +[2026-03-02 02:22:44] (step=0029660) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.803169634122481, LR: 0.0003 +[2026-03-02 02:22:52] (step=0029661) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.803365290549794, LR: 0.0003 +[2026-03-02 02:23:00] (step=0029662) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.803560946977108, LR: 0.0003 +[2026-03-02 02:23:08] (step=0029663) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.8037566034044215, LR: 0.0003 +[2026-03-02 02:23:16] (step=0029664) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 5.8039522598317355, LR: 0.0003 +[2026-03-02 02:23:24] (step=0029665) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.8041479162590495, LR: 0.0003 +[2026-03-02 02:23:32] (step=0029666) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.804343572686363, LR: 0.0003 +[2026-03-02 02:23:39] (step=0029667) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.804539229113677, LR: 0.0003 +[2026-03-02 02:23:47] (step=0029668) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.80473488554099, LR: 0.0003 +[2026-03-02 02:23:55] (step=0029669) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.804930541968304, LR: 0.0003 +[2026-03-02 02:24:03] (step=0029670) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.805126198395618, LR: 0.0003 +[2026-03-02 02:24:11] (step=0029671) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.805321854822931, LR: 0.0003 +[2026-03-02 02:24:19] (step=0029672) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.805517511250245, LR: 0.0003 +[2026-03-02 02:24:26] (step=0029673) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 5.805713167677558, LR: 0.0003 +[2026-03-02 02:24:34] (step=0029674) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.805908824104872, LR: 0.0003 +[2026-03-02 02:24:42] (step=0029675) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 5.806104480532185, LR: 0.0003 +[2026-03-02 02:24:50] (step=0029676) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.806300136959499, LR: 0.0003 +[2026-03-02 02:24:58] (step=0029677) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.806495793386813, LR: 0.0003 +[2026-03-02 02:25:06] (step=0029678) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 5.806691449814126, LR: 0.0003 +[2026-03-02 02:25:14] (step=0029679) Train Loss: 0.4253, Train Steps/Sec: 0.13, Epoch: 5.80688710624144, LR: 0.0003 +[2026-03-02 02:25:21] (step=0029680) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.807082762668753, LR: 0.0003 +[2026-03-02 02:25:29] (step=0029681) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.807278419096067, LR: 0.0003 +[2026-03-02 02:25:37] (step=0029682) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 5.807474075523381, LR: 0.0003 +[2026-03-02 02:25:45] (step=0029683) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.8076697319506945, LR: 0.0003 +[2026-03-02 02:25:53] (step=0029684) Train Loss: 0.4407, Train Steps/Sec: 0.12, Epoch: 5.8078653883780085, LR: 0.0003 +[2026-03-02 02:26:01] (step=0029685) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.808061044805322, LR: 0.0003 +[2026-03-02 02:26:09] (step=0029686) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.808256701232636, LR: 0.0003 +[2026-03-02 02:26:17] (step=0029687) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.808452357659949, LR: 0.0003 +[2026-03-02 02:26:24] (step=0029688) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.808648014087263, LR: 0.0003 +[2026-03-02 02:26:32] (step=0029689) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.808843670514577, LR: 0.0003 +[2026-03-02 02:26:40] (step=0029690) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.80903932694189, LR: 0.0003 +[2026-03-02 02:26:48] (step=0029691) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.809234983369204, LR: 0.0003 +[2026-03-02 02:26:56] (step=0029692) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.809430639796517, LR: 0.0003 +[2026-03-02 02:27:04] (step=0029693) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 5.809626296223831, LR: 0.0003 +[2026-03-02 02:27:11] (step=0029694) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 5.809821952651145, LR: 0.0003 +[2026-03-02 02:27:19] (step=0029695) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.810017609078458, LR: 0.0003 +[2026-03-02 02:27:27] (step=0029696) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.810213265505772, LR: 0.0003 +[2026-03-02 02:27:35] (step=0029697) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.810408921933085, LR: 0.0003 +[2026-03-02 02:27:43] (step=0029698) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.810604578360399, LR: 0.0003 +[2026-03-02 02:27:51] (step=0029699) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.810800234787712, LR: 0.0003 +[2026-03-02 02:27:59] (step=0029700) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.810995891215026, LR: 0.0003 +[2026-03-02 02:28:06] (step=0029701) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.81119154764234, LR: 0.0003 +[2026-03-02 02:28:14] (step=0029702) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.8113872040696535, LR: 0.0003 +[2026-03-02 02:28:22] (step=0029703) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 5.8115828604969675, LR: 0.0003 +[2026-03-02 02:28:30] (step=0029704) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.811778516924281, LR: 0.0003 +[2026-03-02 02:28:38] (step=0029705) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.811974173351595, LR: 0.0003 +[2026-03-02 02:28:46] (step=0029706) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.812169829778909, LR: 0.0003 +[2026-03-02 02:28:53] (step=0029707) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.812365486206222, LR: 0.0003 +[2026-03-02 02:29:01] (step=0029708) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.812561142633536, LR: 0.0003 +[2026-03-02 02:29:09] (step=0029709) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 5.812756799060849, LR: 0.0003 +[2026-03-02 02:29:17] (step=0029710) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.812952455488163, LR: 0.0003 +[2026-03-02 02:29:25] (step=0029711) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.813148111915476, LR: 0.0003 +[2026-03-02 02:29:33] (step=0029712) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.81334376834279, LR: 0.0003 +[2026-03-02 02:29:40] (step=0029713) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.813539424770104, LR: 0.0003 +[2026-03-02 02:29:48] (step=0029714) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 5.813735081197417, LR: 0.0003 +[2026-03-02 02:29:56] (step=0029715) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 5.813930737624731, LR: 0.0003 +[2026-03-02 02:30:04] (step=0029716) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.814126394052044, LR: 0.0003 +[2026-03-02 02:30:12] (step=0029717) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.814322050479358, LR: 0.0003 +[2026-03-02 02:30:20] (step=0029718) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.814517706906672, LR: 0.0003 +[2026-03-02 02:30:28] (step=0029719) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.814713363333985, LR: 0.0003 +[2026-03-02 02:30:35] (step=0029720) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.814909019761299, LR: 0.0003 +[2026-03-02 02:30:43] (step=0029721) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.8151046761886125, LR: 0.0003 +[2026-03-02 02:30:51] (step=0029722) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.8153003326159265, LR: 0.0003 +[2026-03-02 02:30:59] (step=0029723) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.8154959890432405, LR: 0.0003 +[2026-03-02 02:31:07] (step=0029724) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.815691645470554, LR: 0.0003 +[2026-03-02 02:31:15] (step=0029725) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.815887301897868, LR: 0.0003 +[2026-03-02 02:31:23] (step=0029726) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 5.816082958325181, LR: 0.0003 +[2026-03-02 02:31:30] (step=0029727) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 5.816278614752495, LR: 0.0003 +[2026-03-02 02:31:38] (step=0029728) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.816474271179808, LR: 0.0003 +[2026-03-02 02:31:46] (step=0029729) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.816669927607122, LR: 0.0003 +[2026-03-02 02:31:54] (step=0029730) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.816865584034436, LR: 0.0003 +[2026-03-02 02:32:02] (step=0029731) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.817061240461749, LR: 0.0003 +[2026-03-02 02:32:10] (step=0029732) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.817256896889063, LR: 0.0003 +[2026-03-02 02:32:17] (step=0029733) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.817452553316376, LR: 0.0003 +[2026-03-02 02:32:25] (step=0029734) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 5.81764820974369, LR: 0.0003 +[2026-03-02 02:32:33] (step=0029735) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 5.817843866171004, LR: 0.0003 +[2026-03-02 02:32:41] (step=0029736) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.818039522598317, LR: 0.0003 +[2026-03-02 02:32:49] (step=0029737) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.818235179025631, LR: 0.0003 +[2026-03-02 02:32:57] (step=0029738) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.818430835452944, LR: 0.0003 +[2026-03-02 02:33:05] (step=0029739) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 5.818626491880258, LR: 0.0003 +[2026-03-02 02:33:12] (step=0029740) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 5.8188221483075715, LR: 0.0003 +[2026-03-02 02:33:20] (step=0029741) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.8190178047348855, LR: 0.0003 +[2026-03-02 02:33:28] (step=0029742) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.8192134611621995, LR: 0.0003 +[2026-03-02 02:33:36] (step=0029743) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.819409117589513, LR: 0.0003 +[2026-03-02 02:33:44] (step=0029744) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 5.819604774016827, LR: 0.0003 +[2026-03-02 02:33:52] (step=0029745) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 5.81980043044414, LR: 0.0003 +[2026-03-02 02:33:59] (step=0029746) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 5.819996086871454, LR: 0.0003 +[2026-03-02 02:34:07] (step=0029747) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.820191743298768, LR: 0.0003 +[2026-03-02 02:34:15] (step=0029748) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 5.820387399726081, LR: 0.0003 +[2026-03-02 02:34:23] (step=0029749) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.820583056153395, LR: 0.0003 +[2026-03-02 02:34:31] (step=0029750) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.820778712580708, LR: 0.0003 +[2026-03-02 02:34:39] (step=0029751) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.820974369008022, LR: 0.0003 +[2026-03-02 02:34:46] (step=0029752) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.821170025435335, LR: 0.0003 +[2026-03-02 02:34:54] (step=0029753) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.821365681862649, LR: 0.0003 +[2026-03-02 02:35:02] (step=0029754) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.821561338289963, LR: 0.0003 +[2026-03-02 02:35:10] (step=0029755) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.821756994717276, LR: 0.0003 +[2026-03-02 02:35:18] (step=0029756) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.82195265114459, LR: 0.0003 +[2026-03-02 02:35:26] (step=0029757) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 5.822148307571903, LR: 0.0003 +[2026-03-02 02:35:34] (step=0029758) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.822343963999217, LR: 0.0003 +[2026-03-02 02:35:41] (step=0029759) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 5.822539620426531, LR: 0.0003 +[2026-03-02 02:35:49] (step=0029760) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.8227352768538445, LR: 0.0003 +[2026-03-02 02:35:57] (step=0029761) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.8229309332811585, LR: 0.0003 +[2026-03-02 02:36:05] (step=0029762) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.823126589708472, LR: 0.0003 +[2026-03-02 02:36:13] (step=0029763) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.823322246135786, LR: 0.0003 +[2026-03-02 02:36:21] (step=0029764) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.823517902563099, LR: 0.0003 +[2026-03-02 02:36:28] (step=0029765) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.823713558990413, LR: 0.0003 +[2026-03-02 02:36:36] (step=0029766) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.823909215417727, LR: 0.0003 +[2026-03-02 02:36:44] (step=0029767) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 5.82410487184504, LR: 0.0003 +[2026-03-02 02:36:52] (step=0029768) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 5.824300528272354, LR: 0.0003 +[2026-03-02 02:37:00] (step=0029769) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.824496184699667, LR: 0.0003 +[2026-03-02 02:37:08] (step=0029770) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.824691841126981, LR: 0.0003 +[2026-03-02 02:37:16] (step=0029771) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.824887497554295, LR: 0.0003 +[2026-03-02 02:37:23] (step=0029772) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 5.825083153981608, LR: 0.0003 +[2026-03-02 02:37:31] (step=0029773) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 5.825278810408922, LR: 0.0003 +[2026-03-02 02:37:39] (step=0029774) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.825474466836235, LR: 0.0003 +[2026-03-02 02:37:47] (step=0029775) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.825670123263549, LR: 0.0003 +[2026-03-02 02:37:55] (step=0029776) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.825865779690863, LR: 0.0003 +[2026-03-02 02:38:03] (step=0029777) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.826061436118176, LR: 0.0003 +[2026-03-02 02:38:10] (step=0029778) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.82625709254549, LR: 0.0003 +[2026-03-02 02:38:18] (step=0029779) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 5.8264527489728035, LR: 0.0003 +[2026-03-02 02:38:26] (step=0029780) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.8266484054001175, LR: 0.0003 +[2026-03-02 02:38:34] (step=0029781) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.826844061827431, LR: 0.0003 +[2026-03-02 02:38:42] (step=0029782) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.827039718254745, LR: 0.0003 +[2026-03-02 02:38:50] (step=0029783) Train Loss: 0.4507, Train Steps/Sec: 0.12, Epoch: 5.827235374682059, LR: 0.0003 +[2026-03-02 02:38:58] (step=0029784) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.827431031109372, LR: 0.0003 +[2026-03-02 02:39:06] (step=0029785) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.827626687536686, LR: 0.0003 +[2026-03-02 02:39:13] (step=0029786) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.827822343963999, LR: 0.0003 +[2026-03-02 02:39:21] (step=0029787) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.828018000391313, LR: 0.0003 +[2026-03-02 02:39:29] (step=0029788) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.828213656818627, LR: 0.0003 +[2026-03-02 02:39:37] (step=0029789) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.82840931324594, LR: 0.0003 +[2026-03-02 02:39:45] (step=0029790) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.828604969673254, LR: 0.0003 +[2026-03-02 02:39:53] (step=0029791) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.828800626100567, LR: 0.0003 +[2026-03-02 02:40:00] (step=0029792) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 5.828996282527881, LR: 0.0003 +[2026-03-02 02:40:08] (step=0029793) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.829191938955194, LR: 0.0003 +[2026-03-02 02:40:16] (step=0029794) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.829387595382508, LR: 0.0003 +[2026-03-02 02:40:24] (step=0029795) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.829583251809822, LR: 0.0003 +[2026-03-02 02:40:32] (step=0029796) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.829778908237135, LR: 0.0003 +[2026-03-02 02:40:40] (step=0029797) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.829974564664449, LR: 0.0003 +[2026-03-02 02:40:47] (step=0029798) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.8301702210917625, LR: 0.0003 +[2026-03-02 02:40:55] (step=0029799) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.8303658775190765, LR: 0.0003 +[2026-03-02 02:41:03] (step=0029800) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 5.8305615339463905, LR: 0.0003 +[2026-03-02 02:41:11] (step=0029801) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.830757190373704, LR: 0.0003 +[2026-03-02 02:41:19] (step=0029802) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 5.830952846801018, LR: 0.0003 +[2026-03-02 02:41:27] (step=0029803) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 5.831148503228331, LR: 0.0003 +[2026-03-02 02:41:35] (step=0029804) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.831344159655645, LR: 0.0003 +[2026-03-02 02:41:42] (step=0029805) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.831539816082958, LR: 0.0003 +[2026-03-02 02:41:50] (step=0029806) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.831735472510272, LR: 0.0003 +[2026-03-02 02:41:58] (step=0029807) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.831931128937586, LR: 0.0003 +[2026-03-02 02:42:06] (step=0029808) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.832126785364899, LR: 0.0003 +[2026-03-02 02:42:14] (step=0029809) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 5.832322441792213, LR: 0.0003 +[2026-03-02 02:42:22] (step=0029810) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.832518098219526, LR: 0.0003 +[2026-03-02 02:42:29] (step=0029811) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.83271375464684, LR: 0.0003 +[2026-03-02 02:42:37] (step=0029812) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 5.832909411074154, LR: 0.0003 +[2026-03-02 02:42:45] (step=0029813) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.833105067501467, LR: 0.0003 +[2026-03-02 02:42:53] (step=0029814) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.833300723928781, LR: 0.0003 +[2026-03-02 02:43:01] (step=0029815) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.833496380356094, LR: 0.0003 +[2026-03-02 02:43:09] (step=0029816) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.833692036783408, LR: 0.0003 +[2026-03-02 02:43:16] (step=0029817) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.8338876932107215, LR: 0.0003 +[2026-03-02 02:43:24] (step=0029818) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.8340833496380355, LR: 0.0003 +[2026-03-02 02:43:32] (step=0029819) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 5.8342790060653495, LR: 0.0003 +[2026-03-02 02:43:40] (step=0029820) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.834474662492663, LR: 0.0003 +[2026-03-02 02:43:48] (step=0029821) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 5.834670318919977, LR: 0.0003 +[2026-03-02 02:43:56] (step=0029822) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.83486597534729, LR: 0.0003 +[2026-03-02 02:44:04] (step=0029823) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.835061631774604, LR: 0.0003 +[2026-03-02 02:44:11] (step=0029824) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 5.835257288201918, LR: 0.0003 +[2026-03-02 02:44:19] (step=0029825) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.835452944629231, LR: 0.0003 +[2026-03-02 02:44:27] (step=0029826) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.835648601056545, LR: 0.0003 +[2026-03-02 02:44:35] (step=0029827) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 5.835844257483858, LR: 0.0003 +[2026-03-02 02:44:43] (step=0029828) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.836039913911172, LR: 0.0003 +[2026-03-02 02:44:51] (step=0029829) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.836235570338486, LR: 0.0003 +[2026-03-02 02:44:58] (step=0029830) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.836431226765799, LR: 0.0003 +[2026-03-02 02:45:06] (step=0029831) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.836626883193113, LR: 0.0003 +[2026-03-02 02:45:14] (step=0029832) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.836822539620426, LR: 0.0003 +[2026-03-02 02:45:22] (step=0029833) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 5.83701819604774, LR: 0.0003 +[2026-03-02 02:45:30] (step=0029834) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.837213852475053, LR: 0.0003 +[2026-03-02 02:45:38] (step=0029835) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.837409508902367, LR: 0.0003 +[2026-03-02 02:45:46] (step=0029836) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.837605165329681, LR: 0.0003 +[2026-03-02 02:45:53] (step=0029837) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.8378008217569946, LR: 0.0003 +[2026-03-02 02:46:01] (step=0029838) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.837996478184309, LR: 0.0003 +[2026-03-02 02:46:09] (step=0029839) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.838192134611622, LR: 0.0003 +[2026-03-02 02:46:17] (step=0029840) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 5.838387791038936, LR: 0.0003 +[2026-03-02 02:46:25] (step=0029841) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.83858344746625, LR: 0.0003 +[2026-03-02 02:46:32] (step=0029842) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 5.838779103893563, LR: 0.0003 +[2026-03-02 02:46:40] (step=0029843) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 5.838974760320877, LR: 0.0003 +[2026-03-02 02:46:48] (step=0029844) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 5.83917041674819, LR: 0.0003 +[2026-03-02 02:46:56] (step=0029845) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.839366073175504, LR: 0.0003 +[2026-03-02 02:47:04] (step=0029846) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.839561729602817, LR: 0.0003 +[2026-03-02 02:47:12] (step=0029847) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.839757386030131, LR: 0.0003 +[2026-03-02 02:47:19] (step=0029848) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.839953042457445, LR: 0.0003 +[2026-03-02 02:47:27] (step=0029849) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 5.840148698884758, LR: 0.0003 +[2026-03-02 02:47:35] (step=0029850) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.840344355312072, LR: 0.0003 +[2026-03-02 02:47:43] (step=0029851) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.840540011739385, LR: 0.0003 +[2026-03-02 02:47:51] (step=0029852) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 5.840735668166699, LR: 0.0003 +[2026-03-02 02:47:59] (step=0029853) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.840931324594013, LR: 0.0003 +[2026-03-02 02:48:06] (step=0029854) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 5.841126981021326, LR: 0.0003 +[2026-03-02 02:48:14] (step=0029855) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.84132263744864, LR: 0.0003 +[2026-03-02 02:48:22] (step=0029856) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.841518293875954, LR: 0.0003 +[2026-03-02 02:48:30] (step=0029857) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 5.841713950303268, LR: 0.0003 +[2026-03-02 02:48:38] (step=0029858) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.841909606730581, LR: 0.0003 +[2026-03-02 02:48:46] (step=0029859) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.842105263157895, LR: 0.0003 +[2026-03-02 02:48:53] (step=0029860) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 5.842300919585209, LR: 0.0003 +[2026-03-02 02:49:01] (step=0029861) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.842496576012522, LR: 0.0003 +[2026-03-02 02:49:09] (step=0029862) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.842692232439836, LR: 0.0003 +[2026-03-02 02:49:17] (step=0029863) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.842887888867149, LR: 0.0003 +[2026-03-02 02:49:25] (step=0029864) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.843083545294463, LR: 0.0003 +[2026-03-02 02:49:33] (step=0029865) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.843279201721777, LR: 0.0003 +[2026-03-02 02:49:40] (step=0029866) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.84347485814909, LR: 0.0003 +[2026-03-02 02:49:48] (step=0029867) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.843670514576404, LR: 0.0003 +[2026-03-02 02:49:56] (step=0029868) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 5.843866171003717, LR: 0.0003 +[2026-03-02 02:50:04] (step=0029869) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.844061827431031, LR: 0.0003 +[2026-03-02 02:50:12] (step=0029870) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.844257483858344, LR: 0.0003 +[2026-03-02 02:50:20] (step=0029871) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.844453140285658, LR: 0.0003 +[2026-03-02 02:50:28] (step=0029872) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 5.844648796712972, LR: 0.0003 +[2026-03-02 02:50:35] (step=0029873) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 5.8448444531402854, LR: 0.0003 +[2026-03-02 02:50:43] (step=0029874) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.8450401095675995, LR: 0.0003 +[2026-03-02 02:50:51] (step=0029875) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.845235765994913, LR: 0.0003 +[2026-03-02 02:50:59] (step=0029876) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.845431422422227, LR: 0.0003 +[2026-03-02 02:51:07] (step=0029877) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.845627078849541, LR: 0.0003 +[2026-03-02 02:51:15] (step=0029878) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.845822735276854, LR: 0.0003 +[2026-03-02 02:51:22] (step=0029879) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.846018391704168, LR: 0.0003 +[2026-03-02 02:51:30] (step=0029880) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.846214048131481, LR: 0.0003 +[2026-03-02 02:51:38] (step=0029881) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.846409704558795, LR: 0.0003 +[2026-03-02 02:51:46] (step=0029882) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.846605360986109, LR: 0.0003 +[2026-03-02 02:51:54] (step=0029883) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 5.846801017413422, LR: 0.0003 +[2026-03-02 02:52:02] (step=0029884) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.846996673840736, LR: 0.0003 +[2026-03-02 02:52:09] (step=0029885) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.847192330268049, LR: 0.0003 +[2026-03-02 02:52:17] (step=0029886) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.847387986695363, LR: 0.0003 +[2026-03-02 02:52:25] (step=0029887) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 5.847583643122676, LR: 0.0003 +[2026-03-02 02:52:33] (step=0029888) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.84777929954999, LR: 0.0003 +[2026-03-02 02:52:41] (step=0029889) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.847974955977304, LR: 0.0003 +[2026-03-02 02:52:49] (step=0029890) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.848170612404617, LR: 0.0003 +[2026-03-02 02:52:57] (step=0029891) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.848366268831931, LR: 0.0003 +[2026-03-02 02:53:04] (step=0029892) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.8485619252592445, LR: 0.0003 +[2026-03-02 02:53:12] (step=0029893) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.8487575816865585, LR: 0.0003 +[2026-03-02 02:53:20] (step=0029894) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 5.8489532381138725, LR: 0.0003 +[2026-03-02 02:53:28] (step=0029895) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 5.849148894541186, LR: 0.0003 +[2026-03-02 02:53:36] (step=0029896) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.8493445509685, LR: 0.0003 +[2026-03-02 02:53:44] (step=0029897) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.849540207395813, LR: 0.0003 +[2026-03-02 02:53:51] (step=0029898) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 5.849735863823127, LR: 0.0003 +[2026-03-02 02:53:59] (step=0029899) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.84993152025044, LR: 0.0003 +[2026-03-02 02:54:07] (step=0029900) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.850127176677754, LR: 0.0003 +[2026-03-02 02:54:15] (step=0029901) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.850322833105068, LR: 0.0003 +[2026-03-02 02:54:23] (step=0029902) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.850518489532381, LR: 0.0003 +[2026-03-02 02:54:31] (step=0029903) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 5.850714145959695, LR: 0.0003 +[2026-03-02 02:54:39] (step=0029904) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.850909802387008, LR: 0.0003 +[2026-03-02 02:54:46] (step=0029905) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.851105458814322, LR: 0.0003 +[2026-03-02 02:54:54] (step=0029906) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.851301115241636, LR: 0.0003 +[2026-03-02 02:55:02] (step=0029907) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.851496771668949, LR: 0.0003 +[2026-03-02 02:55:10] (step=0029908) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.851692428096263, LR: 0.0003 +[2026-03-02 02:55:18] (step=0029909) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.851888084523576, LR: 0.0003 +[2026-03-02 02:55:26] (step=0029910) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.85208374095089, LR: 0.0003 +[2026-03-02 02:55:33] (step=0029911) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.8522793973782035, LR: 0.0003 +[2026-03-02 02:55:41] (step=0029912) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.8524750538055175, LR: 0.0003 +[2026-03-02 02:55:49] (step=0029913) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.8526707102328315, LR: 0.0003 +[2026-03-02 02:55:57] (step=0029914) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.852866366660145, LR: 0.0003 +[2026-03-02 02:56:05] (step=0029915) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.853062023087459, LR: 0.0003 +[2026-03-02 02:56:13] (step=0029916) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.853257679514772, LR: 0.0003 +[2026-03-02 02:56:20] (step=0029917) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.853453335942086, LR: 0.0003 +[2026-03-02 02:56:28] (step=0029918) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 5.8536489923694, LR: 0.0003 +[2026-03-02 02:56:36] (step=0029919) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.853844648796713, LR: 0.0003 +[2026-03-02 02:56:44] (step=0029920) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.854040305224027, LR: 0.0003 +[2026-03-02 02:56:52] (step=0029921) Train Loss: 0.4229, Train Steps/Sec: 0.13, Epoch: 5.85423596165134, LR: 0.0003 +[2026-03-02 02:57:00] (step=0029922) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.854431618078654, LR: 0.0003 +[2026-03-02 02:57:07] (step=0029923) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 5.854627274505967, LR: 0.0003 +[2026-03-02 02:57:15] (step=0029924) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.854822930933281, LR: 0.0003 +[2026-03-02 02:57:23] (step=0029925) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 5.855018587360595, LR: 0.0003 +[2026-03-02 02:57:31] (step=0029926) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 5.855214243787908, LR: 0.0003 +[2026-03-02 02:57:39] (step=0029927) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.855409900215222, LR: 0.0003 +[2026-03-02 02:57:47] (step=0029928) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.855605556642535, LR: 0.0003 +[2026-03-02 02:57:55] (step=0029929) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.855801213069849, LR: 0.0003 +[2026-03-02 02:58:02] (step=0029930) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.855996869497163, LR: 0.0003 +[2026-03-02 02:58:10] (step=0029931) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.8561925259244765, LR: 0.0003 +[2026-03-02 02:58:18] (step=0029932) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.8563881823517905, LR: 0.0003 +[2026-03-02 02:58:26] (step=0029933) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.856583838779104, LR: 0.0003 +[2026-03-02 02:58:34] (step=0029934) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.856779495206418, LR: 0.0003 +[2026-03-02 02:58:42] (step=0029935) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.856975151633731, LR: 0.0003 +[2026-03-02 02:58:50] (step=0029936) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 5.857170808061045, LR: 0.0003 +[2026-03-02 02:58:57] (step=0029937) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.857366464488359, LR: 0.0003 +[2026-03-02 02:59:05] (step=0029938) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 5.857562120915672, LR: 0.0003 +[2026-03-02 02:59:13] (step=0029939) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.857757777342986, LR: 0.0003 +[2026-03-02 02:59:21] (step=0029940) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.857953433770299, LR: 0.0003 +[2026-03-02 02:59:29] (step=0029941) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.858149090197613, LR: 0.0003 +[2026-03-02 02:59:37] (step=0029942) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 5.858344746624927, LR: 0.0003 +[2026-03-02 02:59:45] (step=0029943) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.85854040305224, LR: 0.0003 +[2026-03-02 02:59:52] (step=0029944) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.858736059479554, LR: 0.0003 +[2026-03-02 03:00:00] (step=0029945) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.858931715906867, LR: 0.0003 +[2026-03-02 03:00:08] (step=0029946) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.859127372334181, LR: 0.0003 +[2026-03-02 03:00:16] (step=0029947) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 5.859323028761495, LR: 0.0003 +[2026-03-02 03:00:24] (step=0029948) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.859518685188808, LR: 0.0003 +[2026-03-02 03:00:32] (step=0029949) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 5.859714341616122, LR: 0.0003 +[2026-03-02 03:00:39] (step=0029950) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.8599099980434355, LR: 0.0003 +[2026-03-02 03:00:47] (step=0029951) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 5.8601056544707495, LR: 0.0003 +[2026-03-02 03:00:55] (step=0029952) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.860301310898063, LR: 0.0003 +[2026-03-02 03:01:03] (step=0029953) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.860496967325377, LR: 0.0003 +[2026-03-02 03:01:11] (step=0029954) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.860692623752691, LR: 0.0003 +[2026-03-02 03:01:19] (step=0029955) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.860888280180004, LR: 0.0003 +[2026-03-02 03:01:26] (step=0029956) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.861083936607318, LR: 0.0003 +[2026-03-02 03:01:34] (step=0029957) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.861279593034631, LR: 0.0003 +[2026-03-02 03:01:42] (step=0029958) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.861475249461945, LR: 0.0003 +[2026-03-02 03:01:50] (step=0029959) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.861670905889259, LR: 0.0003 +[2026-03-02 03:01:58] (step=0029960) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.861866562316572, LR: 0.0003 +[2026-03-02 03:02:06] (step=0029961) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.862062218743886, LR: 0.0003 +[2026-03-02 03:02:13] (step=0029962) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 5.862257875171199, LR: 0.0003 +[2026-03-02 03:02:21] (step=0029963) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.862453531598513, LR: 0.0003 +[2026-03-02 03:02:29] (step=0029964) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.862649188025826, LR: 0.0003 +[2026-03-02 03:02:37] (step=0029965) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.86284484445314, LR: 0.0003 +[2026-03-02 03:02:45] (step=0029966) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.863040500880454, LR: 0.0003 +[2026-03-02 03:02:53] (step=0029967) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.863236157307767, LR: 0.0003 +[2026-03-02 03:03:01] (step=0029968) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.863431813735081, LR: 0.0003 +[2026-03-02 03:03:08] (step=0029969) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.8636274701623945, LR: 0.0003 +[2026-03-02 03:03:16] (step=0029970) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.8638231265897085, LR: 0.0003 +[2026-03-02 03:03:24] (step=0029971) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.8640187830170225, LR: 0.0003 +[2026-03-02 03:03:32] (step=0029972) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.864214439444336, LR: 0.0003 +[2026-03-02 03:03:40] (step=0029973) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.86441009587165, LR: 0.0003 +[2026-03-02 03:03:48] (step=0029974) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 5.864605752298963, LR: 0.0003 +[2026-03-02 03:03:56] (step=0029975) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 5.864801408726277, LR: 0.0003 +[2026-03-02 03:04:03] (step=0029976) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.86499706515359, LR: 0.0003 +[2026-03-02 03:04:11] (step=0029977) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.865192721580904, LR: 0.0003 +[2026-03-02 03:04:19] (step=0029978) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.865388378008218, LR: 0.0003 +[2026-03-02 03:04:27] (step=0029979) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.865584034435531, LR: 0.0003 +[2026-03-02 03:04:35] (step=0029980) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.865779690862845, LR: 0.0003 +[2026-03-02 03:04:43] (step=0029981) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.865975347290158, LR: 0.0003 +[2026-03-02 03:04:50] (step=0029982) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 5.866171003717472, LR: 0.0003 +[2026-03-02 03:04:58] (step=0029983) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.866366660144786, LR: 0.0003 +[2026-03-02 03:05:06] (step=0029984) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.866562316572099, LR: 0.0003 +[2026-03-02 03:05:14] (step=0029985) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.866757972999413, LR: 0.0003 +[2026-03-02 03:05:22] (step=0029986) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.866953629426726, LR: 0.0003 +[2026-03-02 03:05:30] (step=0029987) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.86714928585404, LR: 0.0003 +[2026-03-02 03:05:38] (step=0029988) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.8673449422813535, LR: 0.0003 +[2026-03-02 03:05:45] (step=0029989) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 5.8675405987086675, LR: 0.0003 +[2026-03-02 03:05:53] (step=0029990) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.8677362551359815, LR: 0.0003 +[2026-03-02 03:06:01] (step=0029991) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 5.867931911563295, LR: 0.0003 +[2026-03-02 03:06:09] (step=0029992) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.868127567990609, LR: 0.0003 +[2026-03-02 03:06:17] (step=0029993) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.868323224417922, LR: 0.0003 +[2026-03-02 03:06:25] (step=0029994) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.868518880845236, LR: 0.0003 +[2026-03-02 03:06:32] (step=0029995) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.86871453727255, LR: 0.0003 +[2026-03-02 03:06:40] (step=0029996) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.868910193699863, LR: 0.0003 +[2026-03-02 03:06:48] (step=0029997) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.869105850127177, LR: 0.0003 +[2026-03-02 03:06:56] (step=0029998) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 5.86930150655449, LR: 0.0003 +[2026-03-02 03:07:04] (step=0029999) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 5.869497162981804, LR: 0.0003 +[2026-03-02 03:07:12] (step=0030000) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.869692819409118, LR: 0.0003 +[2026-03-02 03:07:12] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0030000/ +[2026-03-02 03:07:20] (step=0030001) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 5.869888475836431, LR: 0.0003 +[2026-03-02 03:07:27] (step=0030002) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.870084132263745, LR: 0.0003 +[2026-03-02 03:07:35] (step=0030003) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.870279788691058, LR: 0.0003 +[2026-03-02 03:07:43] (step=0030004) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.870475445118372, LR: 0.0003 +[2026-03-02 03:07:51] (step=0030005) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.870671101545685, LR: 0.0003 +[2026-03-02 03:07:59] (step=0030006) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.870866757972999, LR: 0.0003 +[2026-03-02 03:08:07] (step=0030007) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.871062414400313, LR: 0.0003 +[2026-03-02 03:08:14] (step=0030008) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.8712580708276265, LR: 0.0003 +[2026-03-02 03:08:22] (step=0030009) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 5.8714537272549405, LR: 0.0003 +[2026-03-02 03:08:30] (step=0030010) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.871649383682254, LR: 0.0003 +[2026-03-02 03:08:38] (step=0030011) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.871845040109568, LR: 0.0003 +[2026-03-02 03:08:46] (step=0030012) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.872040696536882, LR: 0.0003 +[2026-03-02 03:08:54] (step=0030013) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.872236352964195, LR: 0.0003 +[2026-03-02 03:09:02] (step=0030014) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 5.872432009391509, LR: 0.0003 +[2026-03-02 03:09:09] (step=0030015) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 5.872627665818822, LR: 0.0003 +[2026-03-02 03:09:17] (step=0030016) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.872823322246136, LR: 0.0003 +[2026-03-02 03:09:25] (step=0030017) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.873018978673449, LR: 0.0003 +[2026-03-02 03:09:33] (step=0030018) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 5.873214635100763, LR: 0.0003 +[2026-03-02 03:09:41] (step=0030019) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.873410291528077, LR: 0.0003 +[2026-03-02 03:09:49] (step=0030020) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.87360594795539, LR: 0.0003 +[2026-03-02 03:09:56] (step=0030021) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.873801604382704, LR: 0.0003 +[2026-03-02 03:10:04] (step=0030022) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.873997260810017, LR: 0.0003 +[2026-03-02 03:10:12] (step=0030023) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 5.874192917237331, LR: 0.0003 +[2026-03-02 03:10:20] (step=0030024) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.874388573664645, LR: 0.0003 +[2026-03-02 03:10:28] (step=0030025) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 5.874584230091958, LR: 0.0003 +[2026-03-02 03:10:36] (step=0030026) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.874779886519272, LR: 0.0003 +[2026-03-02 03:10:44] (step=0030027) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.8749755429465855, LR: 0.0003 +[2026-03-02 03:10:51] (step=0030028) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 5.8751711993738995, LR: 0.0003 +[2026-03-02 03:10:59] (step=0030029) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.875366855801213, LR: 0.0003 +[2026-03-02 03:11:07] (step=0030030) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 5.875562512228527, LR: 0.0003 +[2026-03-02 03:11:15] (step=0030031) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 5.875758168655841, LR: 0.0003 +[2026-03-02 03:11:23] (step=0030032) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.875953825083154, LR: 0.0003 +[2026-03-02 03:11:31] (step=0030033) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 5.876149481510468, LR: 0.0003 +[2026-03-02 03:11:39] (step=0030034) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 5.876345137937781, LR: 0.0003 +[2026-03-02 03:11:46] (step=0030035) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.876540794365095, LR: 0.0003 +[2026-03-02 03:11:54] (step=0030036) Train Loss: 0.4537, Train Steps/Sec: 0.12, Epoch: 5.876736450792409, LR: 0.0003 +[2026-03-02 03:12:02] (step=0030037) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.876932107219722, LR: 0.0003 +[2026-03-02 03:12:10] (step=0030038) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.877127763647036, LR: 0.0003 +[2026-03-02 03:12:18] (step=0030039) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.877323420074349, LR: 0.0003 +[2026-03-02 03:12:26] (step=0030040) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 5.877519076501663, LR: 0.0003 +[2026-03-02 03:12:34] (step=0030041) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.877714732928976, LR: 0.0003 +[2026-03-02 03:12:41] (step=0030042) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.87791038935629, LR: 0.0003 +[2026-03-02 03:12:49] (step=0030043) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.878106045783604, LR: 0.0003 +[2026-03-02 03:12:57] (step=0030044) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.878301702210917, LR: 0.0003 +[2026-03-02 03:13:05] (step=0030045) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 5.878497358638231, LR: 0.0003 +[2026-03-02 03:13:13] (step=0030046) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 5.8786930150655445, LR: 0.0003 +[2026-03-02 03:13:21] (step=0030047) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 5.8788886714928585, LR: 0.0003 +[2026-03-02 03:13:29] (step=0030048) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.8790843279201725, LR: 0.0003 +[2026-03-02 03:13:36] (step=0030049) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.879279984347486, LR: 0.0003 +[2026-03-02 03:13:44] (step=0030050) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 5.8794756407748, LR: 0.0003 +[2026-03-02 03:13:52] (step=0030051) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 5.879671297202113, LR: 0.0003 +[2026-03-02 03:14:00] (step=0030052) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.879866953629427, LR: 0.0003 +[2026-03-02 03:14:08] (step=0030053) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.880062610056741, LR: 0.0003 +[2026-03-02 03:14:16] (step=0030054) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.880258266484054, LR: 0.0003 +[2026-03-02 03:14:23] (step=0030055) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.880453922911368, LR: 0.0003 +[2026-03-02 03:14:31] (step=0030056) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.880649579338681, LR: 0.0003 +[2026-03-02 03:14:39] (step=0030057) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.880845235765995, LR: 0.0003 +[2026-03-02 03:14:47] (step=0030058) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.881040892193308, LR: 0.0003 +[2026-03-02 03:14:55] (step=0030059) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 5.881236548620622, LR: 0.0003 +[2026-03-02 03:15:03] (step=0030060) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.881432205047936, LR: 0.0003 +[2026-03-02 03:15:10] (step=0030061) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.881627861475249, LR: 0.0003 +[2026-03-02 03:15:18] (step=0030062) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 5.881823517902563, LR: 0.0003 +[2026-03-02 03:15:26] (step=0030063) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 5.882019174329876, LR: 0.0003 +[2026-03-02 03:15:34] (step=0030064) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.88221483075719, LR: 0.0003 +[2026-03-02 03:15:42] (step=0030065) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.882410487184504, LR: 0.0003 +[2026-03-02 03:15:50] (step=0030066) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.8826061436118176, LR: 0.0003 +[2026-03-02 03:15:57] (step=0030067) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.882801800039132, LR: 0.0003 +[2026-03-02 03:16:05] (step=0030068) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.882997456466445, LR: 0.0003 +[2026-03-02 03:16:13] (step=0030069) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 5.883193112893759, LR: 0.0003 +[2026-03-02 03:16:21] (step=0030070) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 5.883388769321072, LR: 0.0003 +[2026-03-02 03:16:29] (step=0030071) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 5.883584425748386, LR: 0.0003 +[2026-03-02 03:16:37] (step=0030072) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 5.8837800821757, LR: 0.0003 +[2026-03-02 03:16:45] (step=0030073) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.883975738603013, LR: 0.0003 +[2026-03-02 03:16:52] (step=0030074) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.884171395030327, LR: 0.0003 +[2026-03-02 03:17:00] (step=0030075) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.88436705145764, LR: 0.0003 +[2026-03-02 03:17:08] (step=0030076) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 5.884562707884954, LR: 0.0003 +[2026-03-02 03:17:16] (step=0030077) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 5.884758364312268, LR: 0.0003 +[2026-03-02 03:17:24] (step=0030078) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.884954020739581, LR: 0.0003 +[2026-03-02 03:17:32] (step=0030079) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.885149677166895, LR: 0.0003 +[2026-03-02 03:17:40] (step=0030080) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.885345333594208, LR: 0.0003 +[2026-03-02 03:17:47] (step=0030081) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.885540990021522, LR: 0.0003 +[2026-03-02 03:17:55] (step=0030082) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.885736646448835, LR: 0.0003 +[2026-03-02 03:18:03] (step=0030083) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.885932302876149, LR: 0.0003 +[2026-03-02 03:18:11] (step=0030084) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 5.886127959303463, LR: 0.0003 +[2026-03-02 03:18:19] (step=0030085) Train Loss: 0.4332, Train Steps/Sec: 0.12, Epoch: 5.886323615730777, LR: 0.0003 +[2026-03-02 03:18:27] (step=0030086) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 5.886519272158091, LR: 0.0003 +[2026-03-02 03:18:35] (step=0030087) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.886714928585404, LR: 0.0003 +[2026-03-02 03:18:42] (step=0030088) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.886910585012718, LR: 0.0003 +[2026-03-02 03:18:50] (step=0030089) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 5.887106241440032, LR: 0.0003 +[2026-03-02 03:18:58] (step=0030090) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.887301897867345, LR: 0.0003 +[2026-03-02 03:19:06] (step=0030091) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.887497554294659, LR: 0.0003 +[2026-03-02 03:19:14] (step=0030092) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.887693210721972, LR: 0.0003 +[2026-03-02 03:19:22] (step=0030093) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.887888867149286, LR: 0.0003 +[2026-03-02 03:19:29] (step=0030094) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 5.888084523576599, LR: 0.0003 +[2026-03-02 03:19:37] (step=0030095) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 5.888280180003913, LR: 0.0003 +[2026-03-02 03:19:45] (step=0030096) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 5.888475836431227, LR: 0.0003 +[2026-03-02 03:19:53] (step=0030097) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.88867149285854, LR: 0.0003 +[2026-03-02 03:20:01] (step=0030098) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.888867149285854, LR: 0.0003 +[2026-03-02 03:20:09] (step=0030099) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 5.889062805713167, LR: 0.0003 +[2026-03-02 03:20:16] (step=0030100) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.889258462140481, LR: 0.0003 +[2026-03-02 03:20:24] (step=0030101) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.889454118567795, LR: 0.0003 +[2026-03-02 03:20:32] (step=0030102) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 5.8896497749951084, LR: 0.0003 +[2026-03-02 03:20:40] (step=0030103) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 5.8898454314224225, LR: 0.0003 +[2026-03-02 03:20:48] (step=0030104) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.890041087849736, LR: 0.0003 +[2026-03-02 03:20:56] (step=0030105) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.89023674427705, LR: 0.0003 +[2026-03-02 03:21:03] (step=0030106) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.890432400704364, LR: 0.0003 +[2026-03-02 03:21:11] (step=0030107) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 5.890628057131677, LR: 0.0003 +[2026-03-02 03:21:19] (step=0030108) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.890823713558991, LR: 0.0003 +[2026-03-02 03:21:27] (step=0030109) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.891019369986304, LR: 0.0003 +[2026-03-02 03:21:35] (step=0030110) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 5.891215026413618, LR: 0.0003 +[2026-03-02 03:21:43] (step=0030111) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.891410682840931, LR: 0.0003 +[2026-03-02 03:21:50] (step=0030112) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.891606339268245, LR: 0.0003 +[2026-03-02 03:21:58] (step=0030113) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 5.891801995695559, LR: 0.0003 +[2026-03-02 03:22:06] (step=0030114) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 5.891997652122872, LR: 0.0003 +[2026-03-02 03:22:14] (step=0030115) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.892193308550186, LR: 0.0003 +[2026-03-02 03:22:22] (step=0030116) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 5.892388964977499, LR: 0.0003 +[2026-03-02 03:22:30] (step=0030117) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.892584621404813, LR: 0.0003 +[2026-03-02 03:22:38] (step=0030118) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.892780277832127, LR: 0.0003 +[2026-03-02 03:22:45] (step=0030119) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.89297593425944, LR: 0.0003 +[2026-03-02 03:22:53] (step=0030120) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.893171590686754, LR: 0.0003 +[2026-03-02 03:23:01] (step=0030121) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.8933672471140675, LR: 0.0003 +[2026-03-02 03:23:09] (step=0030122) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 5.8935629035413815, LR: 0.0003 +[2026-03-02 03:23:17] (step=0030123) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 5.893758559968695, LR: 0.0003 +[2026-03-02 03:23:25] (step=0030124) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.893954216396009, LR: 0.0003 +[2026-03-02 03:23:33] (step=0030125) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.894149872823323, LR: 0.0003 +[2026-03-02 03:23:40] (step=0030126) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.894345529250636, LR: 0.0003 +[2026-03-02 03:23:48] (step=0030127) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.89454118567795, LR: 0.0003 +[2026-03-02 03:23:56] (step=0030128) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.894736842105263, LR: 0.0003 +[2026-03-02 03:24:04] (step=0030129) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.894932498532577, LR: 0.0003 +[2026-03-02 03:24:12] (step=0030130) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.895128154959891, LR: 0.0003 +[2026-03-02 03:24:20] (step=0030131) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.895323811387204, LR: 0.0003 +[2026-03-02 03:24:27] (step=0030132) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.895519467814518, LR: 0.0003 +[2026-03-02 03:24:35] (step=0030133) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.895715124241831, LR: 0.0003 +[2026-03-02 03:24:43] (step=0030134) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 5.895910780669145, LR: 0.0003 +[2026-03-02 03:24:51] (step=0030135) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.896106437096458, LR: 0.0003 +[2026-03-02 03:24:59] (step=0030136) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 5.896302093523772, LR: 0.0003 +[2026-03-02 03:25:07] (step=0030137) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.896497749951086, LR: 0.0003 +[2026-03-02 03:25:15] (step=0030138) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.896693406378399, LR: 0.0003 +[2026-03-02 03:25:22] (step=0030139) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.896889062805713, LR: 0.0003 +[2026-03-02 03:25:30] (step=0030140) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.8970847192330265, LR: 0.0003 +[2026-03-02 03:25:38] (step=0030141) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 5.8972803756603405, LR: 0.0003 +[2026-03-02 03:25:46] (step=0030142) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 5.8974760320876545, LR: 0.0003 +[2026-03-02 03:25:54] (step=0030143) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.897671688514968, LR: 0.0003 +[2026-03-02 03:26:02] (step=0030144) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 5.897867344942282, LR: 0.0003 +[2026-03-02 03:26:10] (step=0030145) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 5.898063001369595, LR: 0.0003 +[2026-03-02 03:26:17] (step=0030146) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 5.898258657796909, LR: 0.0003 +[2026-03-02 03:26:25] (step=0030147) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.898454314224222, LR: 0.0003 +[2026-03-02 03:26:33] (step=0030148) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.898649970651536, LR: 0.0003 +[2026-03-02 03:26:41] (step=0030149) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.89884562707885, LR: 0.0003 +[2026-03-02 03:26:49] (step=0030150) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.899041283506163, LR: 0.0003 +[2026-03-02 03:26:57] (step=0030151) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.899236939933477, LR: 0.0003 +[2026-03-02 03:27:04] (step=0030152) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.89943259636079, LR: 0.0003 +[2026-03-02 03:27:12] (step=0030153) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 5.899628252788104, LR: 0.0003 +[2026-03-02 03:27:20] (step=0030154) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.899823909215418, LR: 0.0003 +[2026-03-02 03:27:28] (step=0030155) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.900019565642731, LR: 0.0003 +[2026-03-02 03:27:36] (step=0030156) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.900215222070045, LR: 0.0003 +[2026-03-02 03:27:44] (step=0030157) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.900410878497358, LR: 0.0003 +[2026-03-02 03:27:51] (step=0030158) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.900606534924672, LR: 0.0003 +[2026-03-02 03:27:59] (step=0030159) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.9008021913519855, LR: 0.0003 +[2026-03-02 03:28:07] (step=0030160) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.9009978477792995, LR: 0.0003 +[2026-03-02 03:28:15] (step=0030161) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.9011935042066135, LR: 0.0003 +[2026-03-02 03:28:23] (step=0030162) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.901389160633927, LR: 0.0003 +[2026-03-02 03:28:31] (step=0030163) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.901584817061241, LR: 0.0003 +[2026-03-02 03:28:38] (step=0030164) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.901780473488554, LR: 0.0003 +[2026-03-02 03:28:46] (step=0030165) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 5.901976129915868, LR: 0.0003 +[2026-03-02 03:28:54] (step=0030166) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.902171786343182, LR: 0.0003 +[2026-03-02 03:29:02] (step=0030167) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.902367442770495, LR: 0.0003 +[2026-03-02 03:29:10] (step=0030168) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.902563099197809, LR: 0.0003 +[2026-03-02 03:29:18] (step=0030169) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.902758755625122, LR: 0.0003 +[2026-03-02 03:29:25] (step=0030170) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 5.902954412052436, LR: 0.0003 +[2026-03-02 03:29:33] (step=0030171) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.90315006847975, LR: 0.0003 +[2026-03-02 03:29:41] (step=0030172) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.903345724907063, LR: 0.0003 +[2026-03-02 03:29:49] (step=0030173) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.903541381334377, LR: 0.0003 +[2026-03-02 03:29:57] (step=0030174) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 5.90373703776169, LR: 0.0003 +[2026-03-02 03:30:05] (step=0030175) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 5.903932694189004, LR: 0.0003 +[2026-03-02 03:30:13] (step=0030176) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.904128350616317, LR: 0.0003 +[2026-03-02 03:30:20] (step=0030177) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.904324007043631, LR: 0.0003 +[2026-03-02 03:30:28] (step=0030178) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.904519663470945, LR: 0.0003 +[2026-03-02 03:30:36] (step=0030179) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.9047153198982585, LR: 0.0003 +[2026-03-02 03:30:44] (step=0030180) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 5.9049109763255725, LR: 0.0003 +[2026-03-02 03:30:52] (step=0030181) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 5.905106632752886, LR: 0.0003 +[2026-03-02 03:31:00] (step=0030182) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.9053022891802, LR: 0.0003 +[2026-03-02 03:31:07] (step=0030183) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 5.905497945607514, LR: 0.0003 +[2026-03-02 03:31:15] (step=0030184) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.905693602034827, LR: 0.0003 +[2026-03-02 03:31:23] (step=0030185) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.905889258462141, LR: 0.0003 +[2026-03-02 03:31:31] (step=0030186) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.906084914889454, LR: 0.0003 +[2026-03-02 03:31:39] (step=0030187) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.906280571316768, LR: 0.0003 +[2026-03-02 03:31:47] (step=0030188) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.906476227744081, LR: 0.0003 +[2026-03-02 03:31:55] (step=0030189) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 5.906671884171395, LR: 0.0003 +[2026-03-02 03:32:02] (step=0030190) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.906867540598709, LR: 0.0003 +[2026-03-02 03:32:10] (step=0030191) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.907063197026022, LR: 0.0003 +[2026-03-02 03:32:18] (step=0030192) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.907258853453336, LR: 0.0003 +[2026-03-02 03:32:26] (step=0030193) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 5.907454509880649, LR: 0.0003 +[2026-03-02 03:32:34] (step=0030194) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.907650166307963, LR: 0.0003 +[2026-03-02 03:32:42] (step=0030195) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.907845822735277, LR: 0.0003 +[2026-03-02 03:32:49] (step=0030196) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.90804147916259, LR: 0.0003 +[2026-03-02 03:32:57] (step=0030197) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.908237135589904, LR: 0.0003 +[2026-03-02 03:33:05] (step=0030198) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.9084327920172175, LR: 0.0003 +[2026-03-02 03:33:13] (step=0030199) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 5.9086284484445315, LR: 0.0003 +[2026-03-02 03:33:21] (step=0030200) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.908824104871845, LR: 0.0003 +[2026-03-02 03:33:29] (step=0030201) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.909019761299159, LR: 0.0003 +[2026-03-02 03:33:36] (step=0030202) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 5.909215417726473, LR: 0.0003 +[2026-03-02 03:33:44] (step=0030203) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 5.909411074153786, LR: 0.0003 +[2026-03-02 03:33:52] (step=0030204) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.9096067305811, LR: 0.0003 +[2026-03-02 03:34:00] (step=0030205) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.909802387008413, LR: 0.0003 +[2026-03-02 03:34:08] (step=0030206) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.909998043435727, LR: 0.0003 +[2026-03-02 03:34:16] (step=0030207) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.910193699863041, LR: 0.0003 +[2026-03-02 03:34:23] (step=0030208) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 5.910389356290354, LR: 0.0003 +[2026-03-02 03:34:31] (step=0030209) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.910585012717668, LR: 0.0003 +[2026-03-02 03:34:39] (step=0030210) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.910780669144981, LR: 0.0003 +[2026-03-02 03:34:47] (step=0030211) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.910976325572295, LR: 0.0003 +[2026-03-02 03:34:55] (step=0030212) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 5.911171981999608, LR: 0.0003 +[2026-03-02 03:35:03] (step=0030213) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 5.911367638426922, LR: 0.0003 +[2026-03-02 03:35:10] (step=0030214) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 5.911563294854236, LR: 0.0003 +[2026-03-02 03:35:18] (step=0030215) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 5.911758951281549, LR: 0.0003 +[2026-03-02 03:35:26] (step=0030216) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.911954607708863, LR: 0.0003 +[2026-03-02 03:35:34] (step=0030217) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 5.9121502641361765, LR: 0.0003 +[2026-03-02 03:35:42] (step=0030218) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 5.9123459205634905, LR: 0.0003 +[2026-03-02 03:35:49] (step=0030219) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.9125415769908045, LR: 0.0003 +[2026-03-02 03:35:57] (step=0030220) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 5.912737233418118, LR: 0.0003 +[2026-03-02 03:36:05] (step=0030221) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.912932889845432, LR: 0.0003 +[2026-03-02 03:36:13] (step=0030222) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 5.913128546272745, LR: 0.0003 +[2026-03-02 03:36:21] (step=0030223) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 5.913324202700059, LR: 0.0003 +[2026-03-02 03:36:29] (step=0030224) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.913519859127373, LR: 0.0003 +[2026-03-02 03:36:36] (step=0030225) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.913715515554686, LR: 0.0003 +[2026-03-02 03:36:44] (step=0030226) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.913911171982, LR: 0.0003 +[2026-03-02 03:36:52] (step=0030227) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.914106828409313, LR: 0.0003 +[2026-03-02 03:37:00] (step=0030228) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.914302484836627, LR: 0.0003 +[2026-03-02 03:37:08] (step=0030229) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.91449814126394, LR: 0.0003 +[2026-03-02 03:37:16] (step=0030230) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.914693797691254, LR: 0.0003 +[2026-03-02 03:37:24] (step=0030231) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.914889454118568, LR: 0.0003 +[2026-03-02 03:37:32] (step=0030232) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 5.915085110545881, LR: 0.0003 +[2026-03-02 03:37:39] (step=0030233) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 5.915280766973195, LR: 0.0003 +[2026-03-02 03:37:47] (step=0030234) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.915476423400508, LR: 0.0003 +[2026-03-02 03:37:55] (step=0030235) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.915672079827822, LR: 0.0003 +[2026-03-02 03:38:03] (step=0030236) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 5.915867736255136, LR: 0.0003 +[2026-03-02 03:38:11] (step=0030237) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 5.9160633926824495, LR: 0.0003 +[2026-03-02 03:38:19] (step=0030238) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 5.9162590491097635, LR: 0.0003 +[2026-03-02 03:38:26] (step=0030239) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 5.916454705537077, LR: 0.0003 +[2026-03-02 03:38:34] (step=0030240) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.916650361964391, LR: 0.0003 +[2026-03-02 03:38:42] (step=0030241) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.916846018391704, LR: 0.0003 +[2026-03-02 03:38:50] (step=0030242) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 5.917041674819018, LR: 0.0003 +[2026-03-02 03:38:58] (step=0030243) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.917237331246332, LR: 0.0003 +[2026-03-02 03:39:06] (step=0030244) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 5.917432987673645, LR: 0.0003 +[2026-03-02 03:39:14] (step=0030245) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 5.917628644100959, LR: 0.0003 +[2026-03-02 03:39:21] (step=0030246) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.917824300528272, LR: 0.0003 +[2026-03-02 03:39:29] (step=0030247) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.918019956955586, LR: 0.0003 +[2026-03-02 03:39:37] (step=0030248) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.9182156133829, LR: 0.0003 +[2026-03-02 03:39:45] (step=0030249) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.918411269810213, LR: 0.0003 +[2026-03-02 03:39:53] (step=0030250) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.918606926237527, LR: 0.0003 +[2026-03-02 03:40:01] (step=0030251) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 5.91880258266484, LR: 0.0003 +[2026-03-02 03:40:08] (step=0030252) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 5.918998239092154, LR: 0.0003 +[2026-03-02 03:40:16] (step=0030253) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.919193895519467, LR: 0.0003 +[2026-03-02 03:40:24] (step=0030254) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.919389551946781, LR: 0.0003 +[2026-03-02 03:40:32] (step=0030255) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 5.919585208374095, LR: 0.0003 +[2026-03-02 03:40:40] (step=0030256) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.9197808648014085, LR: 0.0003 +[2026-03-02 03:40:48] (step=0030257) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 5.9199765212287225, LR: 0.0003 +[2026-03-02 03:40:55] (step=0030258) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.920172177656036, LR: 0.0003 +[2026-03-02 03:41:03] (step=0030259) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 5.92036783408335, LR: 0.0003 +[2026-03-02 03:41:11] (step=0030260) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 5.920563490510664, LR: 0.0003 +[2026-03-02 03:41:19] (step=0030261) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.920759146937977, LR: 0.0003 +[2026-03-02 03:41:27] (step=0030262) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 5.920954803365291, LR: 0.0003 +[2026-03-02 03:41:35] (step=0030263) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.921150459792604, LR: 0.0003 +[2026-03-02 03:41:42] (step=0030264) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.921346116219918, LR: 0.0003 +[2026-03-02 03:41:50] (step=0030265) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.921541772647231, LR: 0.0003 +[2026-03-02 03:41:58] (step=0030266) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.921737429074545, LR: 0.0003 +[2026-03-02 03:42:06] (step=0030267) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.921933085501859, LR: 0.0003 +[2026-03-02 03:42:14] (step=0030268) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.922128741929172, LR: 0.0003 +[2026-03-02 03:42:22] (step=0030269) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.922324398356486, LR: 0.0003 +[2026-03-02 03:42:29] (step=0030270) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.922520054783799, LR: 0.0003 +[2026-03-02 03:42:37] (step=0030271) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.922715711211113, LR: 0.0003 +[2026-03-02 03:42:45] (step=0030272) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.922911367638427, LR: 0.0003 +[2026-03-02 03:42:53] (step=0030273) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 5.92310702406574, LR: 0.0003 +[2026-03-02 03:43:01] (step=0030274) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.923302680493054, LR: 0.0003 +[2026-03-02 03:43:09] (step=0030275) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.9234983369203675, LR: 0.0003 +[2026-03-02 03:43:17] (step=0030276) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.9236939933476815, LR: 0.0003 +[2026-03-02 03:43:24] (step=0030277) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.9238896497749955, LR: 0.0003 +[2026-03-02 03:43:32] (step=0030278) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.924085306202309, LR: 0.0003 +[2026-03-02 03:43:40] (step=0030279) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.924280962629623, LR: 0.0003 +[2026-03-02 03:43:48] (step=0030280) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.924476619056936, LR: 0.0003 +[2026-03-02 03:43:56] (step=0030281) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 5.92467227548425, LR: 0.0003 +[2026-03-02 03:44:04] (step=0030282) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.924867931911563, LR: 0.0003 +[2026-03-02 03:44:12] (step=0030283) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.925063588338877, LR: 0.0003 +[2026-03-02 03:44:19] (step=0030284) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 5.925259244766191, LR: 0.0003 +[2026-03-02 03:44:27] (step=0030285) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.925454901193504, LR: 0.0003 +[2026-03-02 03:44:35] (step=0030286) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.925650557620818, LR: 0.0003 +[2026-03-02 03:44:43] (step=0030287) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 5.925846214048131, LR: 0.0003 +[2026-03-02 03:44:51] (step=0030288) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 5.926041870475445, LR: 0.0003 +[2026-03-02 03:44:59] (step=0030289) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.926237526902759, LR: 0.0003 +[2026-03-02 03:45:07] (step=0030290) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 5.926433183330072, LR: 0.0003 +[2026-03-02 03:45:14] (step=0030291) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 5.926628839757386, LR: 0.0003 +[2026-03-02 03:45:22] (step=0030292) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.926824496184699, LR: 0.0003 +[2026-03-02 03:45:30] (step=0030293) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.927020152612013, LR: 0.0003 +[2026-03-02 03:45:38] (step=0030294) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.9272158090393265, LR: 0.0003 +[2026-03-02 03:45:46] (step=0030295) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 5.9274114654666405, LR: 0.0003 +[2026-03-02 03:45:54] (step=0030296) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.927607121893955, LR: 0.0003 +[2026-03-02 03:46:01] (step=0030297) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.927802778321268, LR: 0.0003 +[2026-03-02 03:46:09] (step=0030298) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.927998434748582, LR: 0.0003 +[2026-03-02 03:46:17] (step=0030299) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 5.928194091175895, LR: 0.0003 +[2026-03-02 03:46:25] (step=0030300) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 5.928389747603209, LR: 0.0003 +[2026-03-02 03:46:33] (step=0030301) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 5.928585404030523, LR: 0.0003 +[2026-03-02 03:46:41] (step=0030302) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.928781060457836, LR: 0.0003 +[2026-03-02 03:46:49] (step=0030303) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.92897671688515, LR: 0.0003 +[2026-03-02 03:46:56] (step=0030304) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.929172373312463, LR: 0.0003 +[2026-03-02 03:47:04] (step=0030305) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 5.929368029739777, LR: 0.0003 +[2026-03-02 03:47:12] (step=0030306) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 5.92956368616709, LR: 0.0003 +[2026-03-02 03:47:20] (step=0030307) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.929759342594404, LR: 0.0003 +[2026-03-02 03:47:28] (step=0030308) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.929954999021718, LR: 0.0003 +[2026-03-02 03:47:36] (step=0030309) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.930150655449031, LR: 0.0003 +[2026-03-02 03:47:43] (step=0030310) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 5.930346311876345, LR: 0.0003 +[2026-03-02 03:47:51] (step=0030311) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.930541968303658, LR: 0.0003 +[2026-03-02 03:47:59] (step=0030312) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.930737624730972, LR: 0.0003 +[2026-03-02 03:48:07] (step=0030313) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 5.930933281158286, LR: 0.0003 +[2026-03-02 03:48:15] (step=0030314) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.9311289375856, LR: 0.0003 +[2026-03-02 03:48:23] (step=0030315) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.931324594012914, LR: 0.0003 +[2026-03-02 03:48:31] (step=0030316) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 5.931520250440227, LR: 0.0003 +[2026-03-02 03:48:38] (step=0030317) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 5.931715906867541, LR: 0.0003 +[2026-03-02 03:48:46] (step=0030318) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.931911563294854, LR: 0.0003 +[2026-03-02 03:48:54] (step=0030319) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.932107219722168, LR: 0.0003 +[2026-03-02 03:49:02] (step=0030320) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 5.932302876149482, LR: 0.0003 +[2026-03-02 03:49:10] (step=0030321) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.932498532576795, LR: 0.0003 +[2026-03-02 03:49:18] (step=0030322) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 5.932694189004109, LR: 0.0003 +[2026-03-02 03:49:25] (step=0030323) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 5.932889845431422, LR: 0.0003 +[2026-03-02 03:49:33] (step=0030324) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 5.933085501858736, LR: 0.0003 +[2026-03-02 03:49:41] (step=0030325) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 5.93328115828605, LR: 0.0003 +[2026-03-02 03:49:49] (step=0030326) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.933476814713363, LR: 0.0003 +[2026-03-02 03:49:57] (step=0030327) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.933672471140677, LR: 0.0003 +[2026-03-02 03:50:05] (step=0030328) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.93386812756799, LR: 0.0003 +[2026-03-02 03:50:13] (step=0030329) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.934063783995304, LR: 0.0003 +[2026-03-02 03:50:21] (step=0030330) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.934259440422618, LR: 0.0003 +[2026-03-02 03:50:28] (step=0030331) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 5.934455096849931, LR: 0.0003 +[2026-03-02 03:50:36] (step=0030332) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.9346507532772454, LR: 0.0003 +[2026-03-02 03:50:44] (step=0030333) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.934846409704559, LR: 0.0003 +[2026-03-02 03:50:52] (step=0030334) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.935042066131873, LR: 0.0003 +[2026-03-02 03:51:00] (step=0030335) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.935237722559186, LR: 0.0003 +[2026-03-02 03:51:08] (step=0030336) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.9354333789865, LR: 0.0003 +[2026-03-02 03:51:15] (step=0030337) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.935629035413814, LR: 0.0003 +[2026-03-02 03:51:23] (step=0030338) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 5.935824691841127, LR: 0.0003 +[2026-03-02 03:51:31] (step=0030339) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.936020348268441, LR: 0.0003 +[2026-03-02 03:51:39] (step=0030340) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.936216004695754, LR: 0.0003 +[2026-03-02 03:51:47] (step=0030341) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.936411661123068, LR: 0.0003 +[2026-03-02 03:51:55] (step=0030342) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 5.936607317550382, LR: 0.0003 +[2026-03-02 03:52:02] (step=0030343) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 5.936802973977695, LR: 0.0003 +[2026-03-02 03:52:10] (step=0030344) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.936998630405009, LR: 0.0003 +[2026-03-02 03:52:18] (step=0030345) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.937194286832322, LR: 0.0003 +[2026-03-02 03:52:26] (step=0030346) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.937389943259636, LR: 0.0003 +[2026-03-02 03:52:34] (step=0030347) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.937585599686949, LR: 0.0003 +[2026-03-02 03:52:42] (step=0030348) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 5.937781256114263, LR: 0.0003 +[2026-03-02 03:52:49] (step=0030349) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.937976912541577, LR: 0.0003 +[2026-03-02 03:52:57] (step=0030350) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 5.9381725689688905, LR: 0.0003 +[2026-03-02 03:53:05] (step=0030351) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.9383682253962045, LR: 0.0003 +[2026-03-02 03:53:13] (step=0030352) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 5.938563881823518, LR: 0.0003 +[2026-03-02 03:53:21] (step=0030353) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 5.938759538250832, LR: 0.0003 +[2026-03-02 03:53:29] (step=0030354) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.938955194678146, LR: 0.0003 +[2026-03-02 03:53:37] (step=0030355) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.939150851105459, LR: 0.0003 +[2026-03-02 03:53:44] (step=0030356) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.939346507532773, LR: 0.0003 +[2026-03-02 03:53:52] (step=0030357) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.939542163960086, LR: 0.0003 +[2026-03-02 03:54:00] (step=0030358) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.9397378203874, LR: 0.0003 +[2026-03-02 03:54:08] (step=0030359) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 5.939933476814713, LR: 0.0003 +[2026-03-02 03:54:16] (step=0030360) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 5.940129133242027, LR: 0.0003 +[2026-03-02 03:54:24] (step=0030361) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 5.940324789669341, LR: 0.0003 +[2026-03-02 03:54:31] (step=0030362) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.940520446096654, LR: 0.0003 +[2026-03-02 03:54:39] (step=0030363) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.940716102523968, LR: 0.0003 +[2026-03-02 03:54:47] (step=0030364) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 5.940911758951281, LR: 0.0003 +[2026-03-02 03:54:55] (step=0030365) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.941107415378595, LR: 0.0003 +[2026-03-02 03:55:03] (step=0030366) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.941303071805909, LR: 0.0003 +[2026-03-02 03:55:11] (step=0030367) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 5.941498728233222, LR: 0.0003 +[2026-03-02 03:55:19] (step=0030368) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.941694384660536, LR: 0.0003 +[2026-03-02 03:55:26] (step=0030369) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 5.9418900410878495, LR: 0.0003 +[2026-03-02 03:55:34] (step=0030370) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 5.9420856975151635, LR: 0.0003 +[2026-03-02 03:55:42] (step=0030371) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.942281353942477, LR: 0.0003 +[2026-03-02 03:55:50] (step=0030372) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.942477010369791, LR: 0.0003 +[2026-03-02 03:55:58] (step=0030373) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 5.942672666797105, LR: 0.0003 +[2026-03-02 03:56:06] (step=0030374) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.942868323224418, LR: 0.0003 +[2026-03-02 03:56:14] (step=0030375) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.943063979651732, LR: 0.0003 +[2026-03-02 03:56:22] (step=0030376) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.943259636079045, LR: 0.0003 +[2026-03-02 03:56:29] (step=0030377) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 5.943455292506359, LR: 0.0003 +[2026-03-02 03:56:37] (step=0030378) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.943650948933673, LR: 0.0003 +[2026-03-02 03:56:45] (step=0030379) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 5.943846605360986, LR: 0.0003 +[2026-03-02 03:56:53] (step=0030380) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 5.9440422617883, LR: 0.0003 +[2026-03-02 03:57:01] (step=0030381) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.944237918215613, LR: 0.0003 +[2026-03-02 03:57:09] (step=0030382) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 5.944433574642927, LR: 0.0003 +[2026-03-02 03:57:16] (step=0030383) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.94462923107024, LR: 0.0003 +[2026-03-02 03:57:24] (step=0030384) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.944824887497554, LR: 0.0003 +[2026-03-02 03:57:32] (step=0030385) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.945020543924868, LR: 0.0003 +[2026-03-02 03:57:40] (step=0030386) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.945216200352181, LR: 0.0003 +[2026-03-02 03:57:48] (step=0030387) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 5.945411856779495, LR: 0.0003 +[2026-03-02 03:57:56] (step=0030388) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 5.9456075132068085, LR: 0.0003 +[2026-03-02 03:58:04] (step=0030389) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.9458031696341225, LR: 0.0003 +[2026-03-02 03:58:11] (step=0030390) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.9459988260614365, LR: 0.0003 +[2026-03-02 03:58:19] (step=0030391) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.94619448248875, LR: 0.0003 +[2026-03-02 03:58:27] (step=0030392) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.946390138916064, LR: 0.0003 +[2026-03-02 03:58:35] (step=0030393) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.946585795343377, LR: 0.0003 +[2026-03-02 03:58:43] (step=0030394) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.946781451770691, LR: 0.0003 +[2026-03-02 03:58:51] (step=0030395) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 5.946977108198005, LR: 0.0003 +[2026-03-02 03:58:58] (step=0030396) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.947172764625318, LR: 0.0003 +[2026-03-02 03:59:06] (step=0030397) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.947368421052632, LR: 0.0003 +[2026-03-02 03:59:14] (step=0030398) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.947564077479945, LR: 0.0003 +[2026-03-02 03:59:22] (step=0030399) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.947759733907259, LR: 0.0003 +[2026-03-02 03:59:30] (step=0030400) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.947955390334572, LR: 0.0003 +[2026-03-02 03:59:38] (step=0030401) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 5.948151046761886, LR: 0.0003 +[2026-03-02 03:59:46] (step=0030402) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.9483467031892, LR: 0.0003 +[2026-03-02 03:59:53] (step=0030403) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.948542359616513, LR: 0.0003 +[2026-03-02 04:00:01] (step=0030404) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.948738016043827, LR: 0.0003 +[2026-03-02 04:00:09] (step=0030405) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 5.94893367247114, LR: 0.0003 +[2026-03-02 04:00:17] (step=0030406) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 5.949129328898454, LR: 0.0003 +[2026-03-02 04:00:25] (step=0030407) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 5.949324985325768, LR: 0.0003 +[2026-03-02 04:00:33] (step=0030408) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.9495206417530815, LR: 0.0003 +[2026-03-02 04:00:40] (step=0030409) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.9497162981803955, LR: 0.0003 +[2026-03-02 04:00:48] (step=0030410) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 5.949911954607709, LR: 0.0003 +[2026-03-02 04:00:56] (step=0030411) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.950107611035023, LR: 0.0003 +[2026-03-02 04:01:04] (step=0030412) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 5.950303267462336, LR: 0.0003 +[2026-03-02 04:01:12] (step=0030413) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 5.95049892388965, LR: 0.0003 +[2026-03-02 04:01:20] (step=0030414) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 5.950694580316964, LR: 0.0003 +[2026-03-02 04:01:27] (step=0030415) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.950890236744277, LR: 0.0003 +[2026-03-02 04:01:35] (step=0030416) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.951085893171591, LR: 0.0003 +[2026-03-02 04:01:43] (step=0030417) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 5.951281549598904, LR: 0.0003 +[2026-03-02 04:01:51] (step=0030418) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.951477206026218, LR: 0.0003 +[2026-03-02 04:01:59] (step=0030419) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 5.951672862453532, LR: 0.0003 +[2026-03-02 04:02:07] (step=0030420) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.951868518880845, LR: 0.0003 +[2026-03-02 04:02:14] (step=0030421) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.952064175308159, LR: 0.0003 +[2026-03-02 04:02:22] (step=0030422) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.952259831735472, LR: 0.0003 +[2026-03-02 04:02:30] (step=0030423) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 5.952455488162786, LR: 0.0003 +[2026-03-02 04:02:38] (step=0030424) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.952651144590099, LR: 0.0003 +[2026-03-02 04:02:46] (step=0030425) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 5.952846801017413, LR: 0.0003 +[2026-03-02 04:02:54] (step=0030426) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.953042457444727, LR: 0.0003 +[2026-03-02 04:03:02] (step=0030427) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 5.9532381138720405, LR: 0.0003 +[2026-03-02 04:03:10] (step=0030428) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.9534337702993545, LR: 0.0003 +[2026-03-02 04:03:17] (step=0030429) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.953629426726668, LR: 0.0003 +[2026-03-02 04:03:25] (step=0030430) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 5.953825083153982, LR: 0.0003 +[2026-03-02 04:03:33] (step=0030431) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 5.954020739581296, LR: 0.0003 +[2026-03-02 04:03:41] (step=0030432) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 5.954216396008609, LR: 0.0003 +[2026-03-02 04:03:49] (step=0030433) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 5.954412052435923, LR: 0.0003 +[2026-03-02 04:03:57] (step=0030434) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 5.954607708863236, LR: 0.0003 +[2026-03-02 04:04:04] (step=0030435) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.95480336529055, LR: 0.0003 +[2026-03-02 04:04:12] (step=0030436) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 5.954999021717863, LR: 0.0003 +[2026-03-02 04:04:20] (step=0030437) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.955194678145177, LR: 0.0003 +[2026-03-02 04:04:28] (step=0030438) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.955390334572491, LR: 0.0003 +[2026-03-02 04:04:36] (step=0030439) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.955585990999804, LR: 0.0003 +[2026-03-02 04:04:44] (step=0030440) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 5.955781647427118, LR: 0.0003 +[2026-03-02 04:04:51] (step=0030441) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 5.955977303854431, LR: 0.0003 +[2026-03-02 04:04:59] (step=0030442) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.956172960281745, LR: 0.0003 +[2026-03-02 04:05:07] (step=0030443) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.956368616709059, LR: 0.0003 +[2026-03-02 04:05:15] (step=0030444) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.956564273136372, LR: 0.0003 +[2026-03-02 04:05:23] (step=0030445) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.956759929563686, LR: 0.0003 +[2026-03-02 04:05:31] (step=0030446) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 5.9569555859909995, LR: 0.0003 +[2026-03-02 04:05:38] (step=0030447) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 5.9571512424183135, LR: 0.0003 +[2026-03-02 04:05:46] (step=0030448) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 5.9573468988456275, LR: 0.0003 +[2026-03-02 04:05:54] (step=0030449) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 5.957542555272941, LR: 0.0003 +[2026-03-02 04:06:02] (step=0030450) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 5.957738211700255, LR: 0.0003 +[2026-03-02 04:06:10] (step=0030451) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 5.957933868127568, LR: 0.0003 +[2026-03-02 04:06:18] (step=0030452) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 5.958129524554882, LR: 0.0003 +[2026-03-02 04:06:25] (step=0030453) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.958325180982195, LR: 0.0003 +[2026-03-02 04:06:33] (step=0030454) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.958520837409509, LR: 0.0003 +[2026-03-02 04:06:41] (step=0030455) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.958716493836823, LR: 0.0003 +[2026-03-02 04:06:49] (step=0030456) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 5.958912150264136, LR: 0.0003 +[2026-03-02 04:06:57] (step=0030457) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.95910780669145, LR: 0.0003 +[2026-03-02 04:07:05] (step=0030458) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.959303463118763, LR: 0.0003 +[2026-03-02 04:07:13] (step=0030459) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 5.959499119546077, LR: 0.0003 +[2026-03-02 04:07:20] (step=0030460) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 5.959694775973391, LR: 0.0003 +[2026-03-02 04:07:28] (step=0030461) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 5.959890432400704, LR: 0.0003 +[2026-03-02 04:07:36] (step=0030462) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.960086088828018, LR: 0.0003 +[2026-03-02 04:07:44] (step=0030463) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.960281745255331, LR: 0.0003 +[2026-03-02 04:07:52] (step=0030464) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 5.960477401682645, LR: 0.0003 +[2026-03-02 04:08:00] (step=0030465) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 5.9606730581099585, LR: 0.0003 +[2026-03-02 04:08:07] (step=0030466) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.9608687145372725, LR: 0.0003 +[2026-03-02 04:08:15] (step=0030467) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.9610643709645865, LR: 0.0003 +[2026-03-02 04:08:23] (step=0030468) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.9612600273919, LR: 0.0003 +[2026-03-02 04:08:31] (step=0030469) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.961455683819214, LR: 0.0003 +[2026-03-02 04:08:39] (step=0030470) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 5.961651340246527, LR: 0.0003 +[2026-03-02 04:08:47] (step=0030471) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 5.961846996673841, LR: 0.0003 +[2026-03-02 04:08:55] (step=0030472) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 5.962042653101155, LR: 0.0003 +[2026-03-02 04:09:03] (step=0030473) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.962238309528468, LR: 0.0003 +[2026-03-02 04:09:10] (step=0030474) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.962433965955782, LR: 0.0003 +[2026-03-02 04:09:18] (step=0030475) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.962629622383095, LR: 0.0003 +[2026-03-02 04:09:26] (step=0030476) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 5.962825278810409, LR: 0.0003 +[2026-03-02 04:09:34] (step=0030477) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 5.963020935237722, LR: 0.0003 +[2026-03-02 04:09:42] (step=0030478) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 5.963216591665036, LR: 0.0003 +[2026-03-02 04:09:50] (step=0030479) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 5.96341224809235, LR: 0.0003 +[2026-03-02 04:09:58] (step=0030480) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.963607904519663, LR: 0.0003 +[2026-03-02 04:10:05] (step=0030481) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 5.963803560946977, LR: 0.0003 +[2026-03-02 04:10:13] (step=0030482) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.96399921737429, LR: 0.0003 +[2026-03-02 04:10:21] (step=0030483) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 5.964194873801604, LR: 0.0003 +[2026-03-02 04:10:29] (step=0030484) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 5.964390530228918, LR: 0.0003 +[2026-03-02 04:10:37] (step=0030485) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.9645861866562315, LR: 0.0003 +[2026-03-02 04:10:45] (step=0030486) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 5.9647818430835455, LR: 0.0003 +[2026-03-02 04:10:52] (step=0030487) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.964977499510859, LR: 0.0003 +[2026-03-02 04:11:00] (step=0030488) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.965173155938173, LR: 0.0003 +[2026-03-02 04:11:08] (step=0030489) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.965368812365486, LR: 0.0003 +[2026-03-02 04:11:16] (step=0030490) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.9655644687928, LR: 0.0003 +[2026-03-02 04:11:24] (step=0030491) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.965760125220114, LR: 0.0003 +[2026-03-02 04:11:32] (step=0030492) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.965955781647427, LR: 0.0003 +[2026-03-02 04:11:40] (step=0030493) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 5.966151438074741, LR: 0.0003 +[2026-03-02 04:11:47] (step=0030494) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.966347094502054, LR: 0.0003 +[2026-03-02 04:11:55] (step=0030495) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.966542750929368, LR: 0.0003 +[2026-03-02 04:12:03] (step=0030496) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 5.966738407356682, LR: 0.0003 +[2026-03-02 04:12:11] (step=0030497) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.966934063783995, LR: 0.0003 +[2026-03-02 04:12:19] (step=0030498) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 5.967129720211309, LR: 0.0003 +[2026-03-02 04:12:27] (step=0030499) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.967325376638622, LR: 0.0003 +[2026-03-02 04:12:34] (step=0030500) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.967521033065936, LR: 0.0003 +[2026-03-02 04:12:34] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0030500/ +[2026-03-02 04:12:42] (step=0030501) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.96771668949325, LR: 0.0003 +[2026-03-02 04:12:50] (step=0030502) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 5.967912345920563, LR: 0.0003 +[2026-03-02 04:12:58] (step=0030503) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 5.968108002347877, LR: 0.0003 +[2026-03-02 04:13:06] (step=0030504) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.9683036587751905, LR: 0.0003 +[2026-03-02 04:13:14] (step=0030505) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 5.9684993152025045, LR: 0.0003 +[2026-03-02 04:13:21] (step=0030506) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 5.968694971629818, LR: 0.0003 +[2026-03-02 04:13:29] (step=0030507) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 5.968890628057132, LR: 0.0003 +[2026-03-02 04:13:37] (step=0030508) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 5.969086284484446, LR: 0.0003 +[2026-03-02 04:13:45] (step=0030509) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.969281940911759, LR: 0.0003 +[2026-03-02 04:13:53] (step=0030510) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.969477597339073, LR: 0.0003 +[2026-03-02 04:14:01] (step=0030511) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.969673253766386, LR: 0.0003 +[2026-03-02 04:14:09] (step=0030512) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.9698689101937, LR: 0.0003 +[2026-03-02 04:14:16] (step=0030513) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 5.970064566621014, LR: 0.0003 +[2026-03-02 04:14:24] (step=0030514) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.970260223048327, LR: 0.0003 +[2026-03-02 04:14:32] (step=0030515) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 5.970455879475641, LR: 0.0003 +[2026-03-02 04:14:40] (step=0030516) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 5.970651535902954, LR: 0.0003 +[2026-03-02 04:14:48] (step=0030517) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 5.970847192330268, LR: 0.0003 +[2026-03-02 04:14:56] (step=0030518) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 5.971042848757581, LR: 0.0003 +[2026-03-02 04:15:04] (step=0030519) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 5.971238505184895, LR: 0.0003 +[2026-03-02 04:15:11] (step=0030520) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 5.971434161612209, LR: 0.0003 +[2026-03-02 04:15:19] (step=0030521) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 5.971629818039522, LR: 0.0003 +[2026-03-02 04:15:27] (step=0030522) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 5.971825474466836, LR: 0.0003 +[2026-03-02 04:15:35] (step=0030523) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.9720211308941495, LR: 0.0003 +[2026-03-02 04:15:43] (step=0030524) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 5.9722167873214635, LR: 0.0003 +[2026-03-02 04:15:51] (step=0030525) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.9724124437487776, LR: 0.0003 +[2026-03-02 04:15:59] (step=0030526) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.972608100176091, LR: 0.0003 +[2026-03-02 04:16:06] (step=0030527) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 5.972803756603405, LR: 0.0003 +[2026-03-02 04:16:14] (step=0030528) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.972999413030718, LR: 0.0003 +[2026-03-02 04:16:22] (step=0030529) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 5.973195069458032, LR: 0.0003 +[2026-03-02 04:16:30] (step=0030530) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.973390725885345, LR: 0.0003 +[2026-03-02 04:16:38] (step=0030531) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.973586382312659, LR: 0.0003 +[2026-03-02 04:16:46] (step=0030532) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 5.973782038739973, LR: 0.0003 +[2026-03-02 04:16:54] (step=0030533) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 5.973977695167286, LR: 0.0003 +[2026-03-02 04:17:01] (step=0030534) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 5.9741733515946, LR: 0.0003 +[2026-03-02 04:17:09] (step=0030535) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.974369008021913, LR: 0.0003 +[2026-03-02 04:17:17] (step=0030536) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 5.974564664449227, LR: 0.0003 +[2026-03-02 04:17:25] (step=0030537) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 5.974760320876541, LR: 0.0003 +[2026-03-02 04:17:33] (step=0030538) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 5.974955977303854, LR: 0.0003 +[2026-03-02 04:17:41] (step=0030539) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 5.975151633731168, LR: 0.0003 +[2026-03-02 04:17:48] (step=0030540) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 5.975347290158481, LR: 0.0003 +[2026-03-02 04:17:56] (step=0030541) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 5.975542946585795, LR: 0.0003 +[2026-03-02 04:18:04] (step=0030542) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 5.9757386030131086, LR: 0.0003 +[2026-03-02 04:18:12] (step=0030543) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 5.975934259440423, LR: 0.0003 +[2026-03-02 04:18:20] (step=0030544) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 5.976129915867737, LR: 0.0003 +[2026-03-02 04:18:28] (step=0030545) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.97632557229505, LR: 0.0003 +[2026-03-02 04:18:35] (step=0030546) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 5.976521228722364, LR: 0.0003 +[2026-03-02 04:18:43] (step=0030547) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 5.976716885149677, LR: 0.0003 +[2026-03-02 04:18:51] (step=0030548) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.976912541576991, LR: 0.0003 +[2026-03-02 04:18:59] (step=0030549) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 5.977108198004305, LR: 0.0003 +[2026-03-02 04:19:07] (step=0030550) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 5.977303854431618, LR: 0.0003 +[2026-03-02 04:19:15] (step=0030551) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.977499510858932, LR: 0.0003 +[2026-03-02 04:19:22] (step=0030552) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 5.977695167286245, LR: 0.0003 +[2026-03-02 04:19:30] (step=0030553) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 5.977890823713559, LR: 0.0003 +[2026-03-02 04:19:38] (step=0030554) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 5.978086480140873, LR: 0.0003 +[2026-03-02 04:19:46] (step=0030555) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 5.978282136568186, LR: 0.0003 +[2026-03-02 04:19:54] (step=0030556) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 5.9784777929955, LR: 0.0003 +[2026-03-02 04:20:02] (step=0030557) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.978673449422813, LR: 0.0003 +[2026-03-02 04:20:09] (step=0030558) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.978869105850127, LR: 0.0003 +[2026-03-02 04:20:17] (step=0030559) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.97906476227744, LR: 0.0003 +[2026-03-02 04:20:25] (step=0030560) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.979260418704754, LR: 0.0003 +[2026-03-02 04:20:33] (step=0030561) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 5.9794560751320684, LR: 0.0003 +[2026-03-02 04:20:41] (step=0030562) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.979651731559382, LR: 0.0003 +[2026-03-02 04:20:49] (step=0030563) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.979847387986696, LR: 0.0003 +[2026-03-02 04:20:57] (step=0030564) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 5.980043044414009, LR: 0.0003 +[2026-03-02 04:21:04] (step=0030565) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.980238700841323, LR: 0.0003 +[2026-03-02 04:21:12] (step=0030566) Train Loss: 0.4430, Train Steps/Sec: 0.12, Epoch: 5.980434357268637, LR: 0.0003 +[2026-03-02 04:21:20] (step=0030567) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 5.98063001369595, LR: 0.0003 +[2026-03-02 04:21:28] (step=0030568) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 5.980825670123264, LR: 0.0003 +[2026-03-02 04:21:36] (step=0030569) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.981021326550577, LR: 0.0003 +[2026-03-02 04:21:44] (step=0030570) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.981216982977891, LR: 0.0003 +[2026-03-02 04:21:52] (step=0030571) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 5.981412639405204, LR: 0.0003 +[2026-03-02 04:22:00] (step=0030572) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 5.981608295832518, LR: 0.0003 +[2026-03-02 04:22:07] (step=0030573) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.981803952259832, LR: 0.0003 +[2026-03-02 04:22:15] (step=0030574) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 5.981999608687145, LR: 0.0003 +[2026-03-02 04:22:23] (step=0030575) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 5.982195265114459, LR: 0.0003 +[2026-03-02 04:22:31] (step=0030576) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 5.982390921541772, LR: 0.0003 +[2026-03-02 04:22:39] (step=0030577) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 5.982586577969086, LR: 0.0003 +[2026-03-02 04:22:47] (step=0030578) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 5.9827822343964, LR: 0.0003 +[2026-03-02 04:22:55] (step=0030579) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 5.9829778908237135, LR: 0.0003 +[2026-03-02 04:23:02] (step=0030580) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 5.9831735472510275, LR: 0.0003 +[2026-03-02 04:23:10] (step=0030581) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 5.983369203678341, LR: 0.0003 +[2026-03-02 04:23:18] (step=0030582) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 5.983564860105655, LR: 0.0003 +[2026-03-02 04:23:26] (step=0030583) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 5.983760516532968, LR: 0.0003 +[2026-03-02 04:23:34] (step=0030584) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 5.983956172960282, LR: 0.0003 +[2026-03-02 04:23:42] (step=0030585) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 5.984151829387596, LR: 0.0003 +[2026-03-02 04:23:50] (step=0030586) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.984347485814909, LR: 0.0003 +[2026-03-02 04:23:57] (step=0030587) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.984543142242223, LR: 0.0003 +[2026-03-02 04:24:05] (step=0030588) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 5.984738798669536, LR: 0.0003 +[2026-03-02 04:24:13] (step=0030589) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 5.98493445509685, LR: 0.0003 +[2026-03-02 04:24:21] (step=0030590) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.985130111524164, LR: 0.0003 +[2026-03-02 04:24:29] (step=0030591) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 5.985325767951477, LR: 0.0003 +[2026-03-02 04:24:37] (step=0030592) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 5.985521424378791, LR: 0.0003 +[2026-03-02 04:24:44] (step=0030593) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 5.985717080806104, LR: 0.0003 +[2026-03-02 04:24:52] (step=0030594) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.985912737233418, LR: 0.0003 +[2026-03-02 04:25:00] (step=0030595) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 5.986108393660731, LR: 0.0003 +[2026-03-02 04:25:08] (step=0030596) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 5.986304050088045, LR: 0.0003 +[2026-03-02 04:25:16] (step=0030597) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 5.986499706515359, LR: 0.0003 +[2026-03-02 04:25:24] (step=0030598) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 5.9866953629426725, LR: 0.0003 +[2026-03-02 04:25:32] (step=0030599) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 5.9868910193699865, LR: 0.0003 +[2026-03-02 04:25:39] (step=0030600) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 5.9870866757973, LR: 0.0003 +[2026-03-02 04:25:47] (step=0030601) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 5.987282332224614, LR: 0.0003 +[2026-03-02 04:25:55] (step=0030602) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 5.987477988651928, LR: 0.0003 +[2026-03-02 04:26:03] (step=0030603) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 5.987673645079241, LR: 0.0003 +[2026-03-02 04:26:11] (step=0030604) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 5.987869301506555, LR: 0.0003 +[2026-03-02 04:26:19] (step=0030605) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 5.988064957933868, LR: 0.0003 +[2026-03-02 04:26:26] (step=0030606) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 5.988260614361182, LR: 0.0003 +[2026-03-02 04:26:34] (step=0030607) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 5.988456270788495, LR: 0.0003 +[2026-03-02 04:26:42] (step=0030608) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 5.988651927215809, LR: 0.0003 +[2026-03-02 04:26:50] (step=0030609) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 5.988847583643123, LR: 0.0003 +[2026-03-02 04:26:58] (step=0030610) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 5.989043240070436, LR: 0.0003 +[2026-03-02 04:27:06] (step=0030611) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.98923889649775, LR: 0.0003 +[2026-03-02 04:27:13] (step=0030612) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 5.989434552925063, LR: 0.0003 +[2026-03-02 04:27:21] (step=0030613) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 5.989630209352377, LR: 0.0003 +[2026-03-02 04:27:29] (step=0030614) Train Loss: 0.4434, Train Steps/Sec: 0.12, Epoch: 5.989825865779691, LR: 0.0003 +[2026-03-02 04:27:37] (step=0030615) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 5.990021522207004, LR: 0.0003 +[2026-03-02 04:27:45] (step=0030616) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 5.990217178634318, LR: 0.0003 +[2026-03-02 04:27:53] (step=0030617) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 5.9904128350616315, LR: 0.0003 +[2026-03-02 04:28:01] (step=0030618) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 5.9906084914889455, LR: 0.0003 +[2026-03-02 04:28:09] (step=0030619) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 5.9908041479162595, LR: 0.0003 +[2026-03-02 04:28:16] (step=0030620) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 5.990999804343573, LR: 0.0003 +[2026-03-02 04:28:24] (step=0030621) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 5.991195460770887, LR: 0.0003 +[2026-03-02 04:28:32] (step=0030622) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 5.9913911171982, LR: 0.0003 +[2026-03-02 04:28:40] (step=0030623) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 5.991586773625514, LR: 0.0003 +[2026-03-02 04:28:48] (step=0030624) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 5.991782430052827, LR: 0.0003 +[2026-03-02 04:28:56] (step=0030625) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 5.991978086480141, LR: 0.0003 +[2026-03-02 04:29:04] (step=0030626) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 5.992173742907455, LR: 0.0003 +[2026-03-02 04:29:11] (step=0030627) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 5.992369399334768, LR: 0.0003 +[2026-03-02 04:29:19] (step=0030628) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 5.992565055762082, LR: 0.0003 +[2026-03-02 04:29:27] (step=0030629) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.992760712189395, LR: 0.0003 +[2026-03-02 04:29:35] (step=0030630) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 5.992956368616709, LR: 0.0003 +[2026-03-02 04:29:43] (step=0030631) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 5.993152025044023, LR: 0.0003 +[2026-03-02 04:29:51] (step=0030632) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 5.993347681471336, LR: 0.0003 +[2026-03-02 04:29:58] (step=0030633) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 5.99354333789865, LR: 0.0003 +[2026-03-02 04:30:06] (step=0030634) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.993738994325963, LR: 0.0003 +[2026-03-02 04:30:14] (step=0030635) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 5.993934650753277, LR: 0.0003 +[2026-03-02 04:30:22] (step=0030636) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 5.9941303071805905, LR: 0.0003 +[2026-03-02 04:30:30] (step=0030637) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.9943259636079045, LR: 0.0003 +[2026-03-02 04:30:38] (step=0030638) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.9945216200352185, LR: 0.0003 +[2026-03-02 04:30:45] (step=0030639) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 5.994717276462532, LR: 0.0003 +[2026-03-02 04:30:53] (step=0030640) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 5.994912932889846, LR: 0.0003 +[2026-03-02 04:31:01] (step=0030641) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 5.995108589317159, LR: 0.0003 +[2026-03-02 04:31:09] (step=0030642) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 5.995304245744473, LR: 0.0003 +[2026-03-02 04:31:17] (step=0030643) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.995499902171787, LR: 0.0003 +[2026-03-02 04:31:25] (step=0030644) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 5.9956955585991, LR: 0.0003 +[2026-03-02 04:31:33] (step=0030645) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 5.995891215026414, LR: 0.0003 +[2026-03-02 04:31:40] (step=0030646) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 5.996086871453727, LR: 0.0003 +[2026-03-02 04:31:48] (step=0030647) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 5.996282527881041, LR: 0.0003 +[2026-03-02 04:31:56] (step=0030648) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 5.996478184308354, LR: 0.0003 +[2026-03-02 04:32:04] (step=0030649) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 5.996673840735668, LR: 0.0003 +[2026-03-02 04:32:12] (step=0030650) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 5.996869497162982, LR: 0.0003 +[2026-03-02 04:32:20] (step=0030651) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 5.997065153590295, LR: 0.0003 +[2026-03-02 04:32:28] (step=0030652) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 5.997260810017609, LR: 0.0003 +[2026-03-02 04:32:35] (step=0030653) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 5.997456466444922, LR: 0.0003 +[2026-03-02 04:32:43] (step=0030654) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 5.997652122872236, LR: 0.0003 +[2026-03-02 04:32:51] (step=0030655) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 5.99784777929955, LR: 0.0003 +[2026-03-02 04:32:59] (step=0030656) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 5.9980434357268635, LR: 0.0003 +[2026-03-02 04:33:07] (step=0030657) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 5.9982390921541775, LR: 0.0003 +[2026-03-02 04:33:15] (step=0030658) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 5.998434748581491, LR: 0.0003 +[2026-03-02 04:33:22] (step=0030659) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 5.998630405008805, LR: 0.0003 +[2026-03-02 04:33:30] (step=0030660) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 5.998826061436118, LR: 0.0003 +[2026-03-02 04:33:38] (step=0030661) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 5.999021717863432, LR: 0.0003 +[2026-03-02 04:33:46] (step=0030662) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 5.999217374290746, LR: 0.0003 +[2026-03-02 04:33:54] (step=0030663) Train Loss: 0.4255, Train Steps/Sec: 0.13, Epoch: 5.999413030718059, LR: 0.0003 +[2026-03-02 04:34:02] (step=0030664) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 5.999608687145373, LR: 0.0003 +[2026-03-02 04:34:10] (step=0030665) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 5.999804343572686, LR: 0.0003 +[2026-03-02 04:34:18] (step=0030666) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.0, LR: 0.0003 +[2026-03-02 04:34:18] Beginning epoch 6... +[2026-03-02 04:34:28] (step=0030667) Train Loss: 0.4397, Train Steps/Sec: 0.10, Epoch: 6.000195656427314, LR: 0.0003 +[2026-03-02 04:34:35] (step=0030668) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.000391312854627, LR: 0.0003 +[2026-03-02 04:34:43] (step=0030669) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 6.000586969281941, LR: 0.0003 +[2026-03-02 04:34:51] (step=0030670) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.000782625709254, LR: 0.0003 +[2026-03-02 04:34:59] (step=0030671) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.000978282136568, LR: 0.0003 +[2026-03-02 04:35:07] (step=0030672) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.001173938563882, LR: 0.0003 +[2026-03-02 04:35:15] (step=0030673) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.001369594991195, LR: 0.0003 +[2026-03-02 04:35:23] (step=0030674) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.001565251418509, LR: 0.0003 +[2026-03-02 04:35:30] (step=0030675) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.0017609078458225, LR: 0.0003 +[2026-03-02 04:35:38] (step=0030676) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.0019565642731365, LR: 0.0003 +[2026-03-02 04:35:46] (step=0030677) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 6.00215222070045, LR: 0.0003 +[2026-03-02 04:35:54] (step=0030678) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.002347877127764, LR: 0.0003 +[2026-03-02 04:36:02] (step=0030679) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.002543533555078, LR: 0.0003 +[2026-03-02 04:36:10] (step=0030680) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.002739189982391, LR: 0.0003 +[2026-03-02 04:36:18] (step=0030681) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.002934846409705, LR: 0.0003 +[2026-03-02 04:36:25] (step=0030682) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.003130502837018, LR: 0.0003 +[2026-03-02 04:36:33] (step=0030683) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.003326159264332, LR: 0.0003 +[2026-03-02 04:36:41] (step=0030684) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 6.003521815691646, LR: 0.0003 +[2026-03-02 04:36:49] (step=0030685) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.003717472118959, LR: 0.0003 +[2026-03-02 04:36:57] (step=0030686) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 6.003913128546273, LR: 0.0003 +[2026-03-02 04:37:05] (step=0030687) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.004108784973586, LR: 0.0003 +[2026-03-02 04:37:13] (step=0030688) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.0043044414009, LR: 0.0003 +[2026-03-02 04:37:20] (step=0030689) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.004500097828213, LR: 0.0003 +[2026-03-02 04:37:28] (step=0030690) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.004695754255527, LR: 0.0003 +[2026-03-02 04:37:36] (step=0030691) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 6.004891410682841, LR: 0.0003 +[2026-03-02 04:37:44] (step=0030692) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.005087067110154, LR: 0.0003 +[2026-03-02 04:37:52] (step=0030693) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.005282723537468, LR: 0.0003 +[2026-03-02 04:38:00] (step=0030694) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.0054783799647815, LR: 0.0003 +[2026-03-02 04:38:07] (step=0030695) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.0056740363920955, LR: 0.0003 +[2026-03-02 04:38:15] (step=0030696) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.0058696928194095, LR: 0.0003 +[2026-03-02 04:38:23] (step=0030697) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.006065349246723, LR: 0.0003 +[2026-03-02 04:38:31] (step=0030698) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.006261005674037, LR: 0.0003 +[2026-03-02 04:38:39] (step=0030699) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.00645666210135, LR: 0.0003 +[2026-03-02 04:38:47] (step=0030700) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.006652318528664, LR: 0.0003 +[2026-03-02 04:38:54] (step=0030701) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 6.006847974955977, LR: 0.0003 +[2026-03-02 04:39:02] (step=0030702) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.007043631383291, LR: 0.0003 +[2026-03-02 04:39:10] (step=0030703) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.007239287810605, LR: 0.0003 +[2026-03-02 04:39:18] (step=0030704) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.007434944237918, LR: 0.0003 +[2026-03-02 04:39:26] (step=0030705) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.007630600665232, LR: 0.0003 +[2026-03-02 04:39:34] (step=0030706) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.007826257092545, LR: 0.0003 +[2026-03-02 04:39:41] (step=0030707) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 6.008021913519859, LR: 0.0003 +[2026-03-02 04:39:49] (step=0030708) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.008217569947173, LR: 0.0003 +[2026-03-02 04:39:57] (step=0030709) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.008413226374486, LR: 0.0003 +[2026-03-02 04:40:05] (step=0030710) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.0086088828018, LR: 0.0003 +[2026-03-02 04:40:13] (step=0030711) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.008804539229113, LR: 0.0003 +[2026-03-02 04:40:21] (step=0030712) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.009000195656427, LR: 0.0003 +[2026-03-02 04:40:29] (step=0030713) Train Loss: 0.4385, Train Steps/Sec: 0.12, Epoch: 6.0091958520837405, LR: 0.0003 +[2026-03-02 04:40:37] (step=0030714) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.0093915085110545, LR: 0.0003 +[2026-03-02 04:40:44] (step=0030715) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.0095871649383685, LR: 0.0003 +[2026-03-02 04:40:52] (step=0030716) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.009782821365682, LR: 0.0003 +[2026-03-02 04:41:00] (step=0030717) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.009978477792996, LR: 0.0003 +[2026-03-02 04:41:08] (step=0030718) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.010174134220309, LR: 0.0003 +[2026-03-02 04:41:16] (step=0030719) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.010369790647623, LR: 0.0003 +[2026-03-02 04:41:24] (step=0030720) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.010565447074937, LR: 0.0003 +[2026-03-02 04:41:32] (step=0030721) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 6.01076110350225, LR: 0.0003 +[2026-03-02 04:41:39] (step=0030722) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 6.010956759929564, LR: 0.0003 +[2026-03-02 04:41:47] (step=0030723) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.011152416356877, LR: 0.0003 +[2026-03-02 04:41:55] (step=0030724) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.011348072784191, LR: 0.0003 +[2026-03-02 04:42:03] (step=0030725) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.011543729211505, LR: 0.0003 +[2026-03-02 04:42:11] (step=0030726) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.011739385638818, LR: 0.0003 +[2026-03-02 04:42:19] (step=0030727) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 6.011935042066132, LR: 0.0003 +[2026-03-02 04:42:26] (step=0030728) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.012130698493445, LR: 0.0003 +[2026-03-02 04:42:34] (step=0030729) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.012326354920759, LR: 0.0003 +[2026-03-02 04:42:42] (step=0030730) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.012522011348072, LR: 0.0003 +[2026-03-02 04:42:50] (step=0030731) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 6.012717667775386, LR: 0.0003 +[2026-03-02 04:42:58] (step=0030732) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.0129133242027, LR: 0.0003 +[2026-03-02 04:43:06] (step=0030733) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.0131089806300135, LR: 0.0003 +[2026-03-02 04:43:14] (step=0030734) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.0133046370573275, LR: 0.0003 +[2026-03-02 04:43:21] (step=0030735) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 6.013500293484641, LR: 0.0003 +[2026-03-02 04:43:29] (step=0030736) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.013695949911955, LR: 0.0003 +[2026-03-02 04:43:37] (step=0030737) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.013891606339269, LR: 0.0003 +[2026-03-02 04:43:45] (step=0030738) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.014087262766582, LR: 0.0003 +[2026-03-02 04:43:53] (step=0030739) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.014282919193896, LR: 0.0003 +[2026-03-02 04:44:01] (step=0030740) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.014478575621209, LR: 0.0003 +[2026-03-02 04:44:08] (step=0030741) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.014674232048523, LR: 0.0003 +[2026-03-02 04:44:16] (step=0030742) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.014869888475836, LR: 0.0003 +[2026-03-02 04:44:24] (step=0030743) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.01506554490315, LR: 0.0003 +[2026-03-02 04:44:32] (step=0030744) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.015261201330464, LR: 0.0003 +[2026-03-02 04:44:40] (step=0030745) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.015456857757777, LR: 0.0003 +[2026-03-02 04:44:48] (step=0030746) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.015652514185091, LR: 0.0003 +[2026-03-02 04:44:55] (step=0030747) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 6.015848170612404, LR: 0.0003 +[2026-03-02 04:45:03] (step=0030748) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.016043827039718, LR: 0.0003 +[2026-03-02 04:45:11] (step=0030749) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.016239483467032, LR: 0.0003 +[2026-03-02 04:45:19] (step=0030750) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.016435139894345, LR: 0.0003 +[2026-03-02 04:45:27] (step=0030751) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.016630796321659, LR: 0.0003 +[2026-03-02 04:45:35] (step=0030752) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.0168264527489725, LR: 0.0003 +[2026-03-02 04:45:43] (step=0030753) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.0170221091762865, LR: 0.0003 +[2026-03-02 04:45:50] (step=0030754) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 6.0172177656036, LR: 0.0003 +[2026-03-02 04:45:58] (step=0030755) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.017413422030914, LR: 0.0003 +[2026-03-02 04:46:06] (step=0030756) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.017609078458228, LR: 0.0003 +[2026-03-02 04:46:14] (step=0030757) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.017804734885541, LR: 0.0003 +[2026-03-02 04:46:22] (step=0030758) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.018000391312855, LR: 0.0003 +[2026-03-02 04:46:30] (step=0030759) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.018196047740168, LR: 0.0003 +[2026-03-02 04:46:37] (step=0030760) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.018391704167482, LR: 0.0003 +[2026-03-02 04:46:45] (step=0030761) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.018587360594796, LR: 0.0003 +[2026-03-02 04:46:53] (step=0030762) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.018783017022109, LR: 0.0003 +[2026-03-02 04:47:01] (step=0030763) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.018978673449423, LR: 0.0003 +[2026-03-02 04:47:09] (step=0030764) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 6.019174329876736, LR: 0.0003 +[2026-03-02 04:47:17] (step=0030765) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.01936998630405, LR: 0.0003 +[2026-03-02 04:47:25] (step=0030766) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.019565642731363, LR: 0.0003 +[2026-03-02 04:47:33] (step=0030767) Train Loss: 0.4455, Train Steps/Sec: 0.12, Epoch: 6.019761299158677, LR: 0.0003 +[2026-03-02 04:47:40] (step=0030768) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.019956955585991, LR: 0.0003 +[2026-03-02 04:47:48] (step=0030769) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.020152612013304, LR: 0.0003 +[2026-03-02 04:47:56] (step=0030770) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.020348268440618, LR: 0.0003 +[2026-03-02 04:48:04] (step=0030771) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 6.0205439248679316, LR: 0.0003 +[2026-03-02 04:48:12] (step=0030772) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 6.020739581295246, LR: 0.0003 +[2026-03-02 04:48:20] (step=0030773) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 6.02093523772256, LR: 0.0003 +[2026-03-02 04:48:27] (step=0030774) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.021130894149873, LR: 0.0003 +[2026-03-02 04:48:35] (step=0030775) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.021326550577187, LR: 0.0003 +[2026-03-02 04:48:43] (step=0030776) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.0215222070045, LR: 0.0003 +[2026-03-02 04:48:51] (step=0030777) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.021717863431814, LR: 0.0003 +[2026-03-02 04:48:59] (step=0030778) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 6.021913519859127, LR: 0.0003 +[2026-03-02 04:49:07] (step=0030779) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.022109176286441, LR: 0.0003 +[2026-03-02 04:49:15] (step=0030780) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.022304832713755, LR: 0.0003 +[2026-03-02 04:49:22] (step=0030781) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.022500489141068, LR: 0.0003 +[2026-03-02 04:49:30] (step=0030782) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.022696145568382, LR: 0.0003 +[2026-03-02 04:49:38] (step=0030783) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.022891801995695, LR: 0.0003 +[2026-03-02 04:49:46] (step=0030784) Train Loss: 0.4395, Train Steps/Sec: 0.12, Epoch: 6.023087458423009, LR: 0.0003 +[2026-03-02 04:49:54] (step=0030785) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 6.023283114850323, LR: 0.0003 +[2026-03-02 04:50:02] (step=0030786) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.023478771277636, LR: 0.0003 +[2026-03-02 04:50:10] (step=0030787) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.02367442770495, LR: 0.0003 +[2026-03-02 04:50:18] (step=0030788) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.023870084132263, LR: 0.0003 +[2026-03-02 04:50:26] (step=0030789) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 6.024065740559577, LR: 0.0003 +[2026-03-02 04:50:33] (step=0030790) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.0242613969868914, LR: 0.0003 +[2026-03-02 04:50:41] (step=0030791) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.024457053414205, LR: 0.0003 +[2026-03-02 04:50:49] (step=0030792) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.024652709841519, LR: 0.0003 +[2026-03-02 04:50:57] (step=0030793) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.024848366268832, LR: 0.0003 +[2026-03-02 04:51:05] (step=0030794) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.025044022696146, LR: 0.0003 +[2026-03-02 04:51:13] (step=0030795) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.025239679123459, LR: 0.0003 +[2026-03-02 04:51:20] (step=0030796) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 6.025435335550773, LR: 0.0003 +[2026-03-02 04:51:28] (step=0030797) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.025630991978087, LR: 0.0003 +[2026-03-02 04:51:36] (step=0030798) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.0258266484054, LR: 0.0003 +[2026-03-02 04:51:44] (step=0030799) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.026022304832714, LR: 0.0003 +[2026-03-02 04:51:52] (step=0030800) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.026217961260027, LR: 0.0003 +[2026-03-02 04:52:00] (step=0030801) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 6.026413617687341, LR: 0.0003 +[2026-03-02 04:52:08] (step=0030802) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.026609274114655, LR: 0.0003 +[2026-03-02 04:52:15] (step=0030803) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 6.026804930541968, LR: 0.0003 +[2026-03-02 04:52:23] (step=0030804) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.027000586969282, LR: 0.0003 +[2026-03-02 04:52:31] (step=0030805) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.027196243396595, LR: 0.0003 +[2026-03-02 04:52:39] (step=0030806) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.027391899823909, LR: 0.0003 +[2026-03-02 04:52:47] (step=0030807) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.0275875562512224, LR: 0.0003 +[2026-03-02 04:52:55] (step=0030808) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.0277832126785365, LR: 0.0003 +[2026-03-02 04:53:02] (step=0030809) Train Loss: 0.4240, Train Steps/Sec: 0.13, Epoch: 6.0279788691058505, LR: 0.0003 +[2026-03-02 04:53:10] (step=0030810) Train Loss: 0.4479, Train Steps/Sec: 0.12, Epoch: 6.028174525533164, LR: 0.0003 +[2026-03-02 04:53:18] (step=0030811) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.028370181960478, LR: 0.0003 +[2026-03-02 04:53:26] (step=0030812) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.028565838387791, LR: 0.0003 +[2026-03-02 04:53:34] (step=0030813) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.028761494815105, LR: 0.0003 +[2026-03-02 04:53:42] (step=0030814) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.028957151242419, LR: 0.0003 +[2026-03-02 04:53:50] (step=0030815) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.029152807669732, LR: 0.0003 +[2026-03-02 04:53:58] (step=0030816) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.029348464097046, LR: 0.0003 +[2026-03-02 04:54:05] (step=0030817) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 6.029544120524359, LR: 0.0003 +[2026-03-02 04:54:13] (step=0030818) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.029739776951673, LR: 0.0003 +[2026-03-02 04:54:21] (step=0030819) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.029935433378986, LR: 0.0003 +[2026-03-02 04:54:29] (step=0030820) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.0301310898063, LR: 0.0003 +[2026-03-02 04:54:37] (step=0030821) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 6.030326746233614, LR: 0.0003 +[2026-03-02 04:54:45] (step=0030822) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.030522402660927, LR: 0.0003 +[2026-03-02 04:54:53] (step=0030823) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 6.030718059088241, LR: 0.0003 +[2026-03-02 04:55:00] (step=0030824) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.030913715515554, LR: 0.0003 +[2026-03-02 04:55:08] (step=0030825) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.031109371942868, LR: 0.0003 +[2026-03-02 04:55:16] (step=0030826) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.031305028370182, LR: 0.0003 +[2026-03-02 04:55:24] (step=0030827) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 6.0315006847974955, LR: 0.0003 +[2026-03-02 04:55:32] (step=0030828) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 6.0316963412248095, LR: 0.0003 +[2026-03-02 04:55:40] (step=0030829) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.031891997652123, LR: 0.0003 +[2026-03-02 04:55:48] (step=0030830) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.032087654079437, LR: 0.0003 +[2026-03-02 04:55:55] (step=0030831) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 6.03228331050675, LR: 0.0003 +[2026-03-02 04:56:03] (step=0030832) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.032478966934064, LR: 0.0003 +[2026-03-02 04:56:11] (step=0030833) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.032674623361378, LR: 0.0003 +[2026-03-02 04:56:19] (step=0030834) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.032870279788691, LR: 0.0003 +[2026-03-02 04:56:27] (step=0030835) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.033065936216005, LR: 0.0003 +[2026-03-02 04:56:35] (step=0030836) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 6.033261592643318, LR: 0.0003 +[2026-03-02 04:56:42] (step=0030837) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.033457249070632, LR: 0.0003 +[2026-03-02 04:56:50] (step=0030838) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 6.033652905497946, LR: 0.0003 +[2026-03-02 04:56:58] (step=0030839) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.033848561925259, LR: 0.0003 +[2026-03-02 04:57:06] (step=0030840) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.034044218352573, LR: 0.0003 +[2026-03-02 04:57:14] (step=0030841) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.034239874779886, LR: 0.0003 +[2026-03-02 04:57:22] (step=0030842) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 6.0344355312072, LR: 0.0003 +[2026-03-02 04:57:30] (step=0030843) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.034631187634514, LR: 0.0003 +[2026-03-02 04:57:37] (step=0030844) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.034826844061827, LR: 0.0003 +[2026-03-02 04:57:45] (step=0030845) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 6.035022500489141, LR: 0.0003 +[2026-03-02 04:57:53] (step=0030846) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.0352181569164545, LR: 0.0003 +[2026-03-02 04:58:01] (step=0030847) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.0354138133437685, LR: 0.0003 +[2026-03-02 04:58:09] (step=0030848) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.035609469771082, LR: 0.0003 +[2026-03-02 04:58:17] (step=0030849) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 6.035805126198396, LR: 0.0003 +[2026-03-02 04:58:24] (step=0030850) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 6.03600078262571, LR: 0.0003 +[2026-03-02 04:58:32] (step=0030851) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.036196439053023, LR: 0.0003 +[2026-03-02 04:58:40] (step=0030852) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.036392095480337, LR: 0.0003 +[2026-03-02 04:58:48] (step=0030853) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 6.03658775190765, LR: 0.0003 +[2026-03-02 04:58:56] (step=0030854) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.036783408334964, LR: 0.0003 +[2026-03-02 04:59:04] (step=0030855) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 6.036979064762278, LR: 0.0003 +[2026-03-02 04:59:11] (step=0030856) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.037174721189591, LR: 0.0003 +[2026-03-02 04:59:19] (step=0030857) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.037370377616905, LR: 0.0003 +[2026-03-02 04:59:27] (step=0030858) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 6.037566034044218, LR: 0.0003 +[2026-03-02 04:59:35] (step=0030859) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 6.037761690471532, LR: 0.0003 +[2026-03-02 04:59:43] (step=0030860) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.037957346898845, LR: 0.0003 +[2026-03-02 04:59:51] (step=0030861) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.038153003326159, LR: 0.0003 +[2026-03-02 04:59:59] (step=0030862) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.038348659753473, LR: 0.0003 +[2026-03-02 05:00:07] (step=0030863) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.038544316180786, LR: 0.0003 +[2026-03-02 05:00:15] (step=0030864) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 6.0387399726081, LR: 0.0003 +[2026-03-02 05:00:22] (step=0030865) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.0389356290354135, LR: 0.0003 +[2026-03-02 05:00:30] (step=0030866) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.0391312854627275, LR: 0.0003 +[2026-03-02 05:00:38] (step=0030867) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 6.0393269418900415, LR: 0.0003 +[2026-03-02 05:00:46] (step=0030868) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.039522598317355, LR: 0.0003 +[2026-03-02 05:00:54] (step=0030869) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.039718254744669, LR: 0.0003 +[2026-03-02 05:01:02] (step=0030870) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.039913911171982, LR: 0.0003 +[2026-03-02 05:01:09] (step=0030871) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.040109567599296, LR: 0.0003 +[2026-03-02 05:01:17] (step=0030872) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.040305224026609, LR: 0.0003 +[2026-03-02 05:01:25] (step=0030873) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.040500880453923, LR: 0.0003 +[2026-03-02 05:01:33] (step=0030874) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.040696536881237, LR: 0.0003 +[2026-03-02 05:01:41] (step=0030875) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.04089219330855, LR: 0.0003 +[2026-03-02 05:01:49] (step=0030876) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 6.041087849735864, LR: 0.0003 +[2026-03-02 05:01:56] (step=0030877) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.041283506163177, LR: 0.0003 +[2026-03-02 05:02:04] (step=0030878) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 6.041479162590491, LR: 0.0003 +[2026-03-02 05:02:12] (step=0030879) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.041674819017805, LR: 0.0003 +[2026-03-02 05:02:20] (step=0030880) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.041870475445118, LR: 0.0003 +[2026-03-02 05:02:28] (step=0030881) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.042066131872432, LR: 0.0003 +[2026-03-02 05:02:36] (step=0030882) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.042261788299745, LR: 0.0003 +[2026-03-02 05:02:44] (step=0030883) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.042457444727059, LR: 0.0003 +[2026-03-02 05:02:51] (step=0030884) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.0426531011543725, LR: 0.0003 +[2026-03-02 05:02:59] (step=0030885) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.0428487575816865, LR: 0.0003 +[2026-03-02 05:03:07] (step=0030886) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 6.0430444140090005, LR: 0.0003 +[2026-03-02 05:03:15] (step=0030887) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 6.043240070436314, LR: 0.0003 +[2026-03-02 05:03:23] (step=0030888) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.043435726863628, LR: 0.0003 +[2026-03-02 05:03:31] (step=0030889) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.043631383290941, LR: 0.0003 +[2026-03-02 05:03:39] (step=0030890) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.043827039718255, LR: 0.0003 +[2026-03-02 05:03:46] (step=0030891) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.044022696145569, LR: 0.0003 +[2026-03-02 05:03:54] (step=0030892) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.044218352572882, LR: 0.0003 +[2026-03-02 05:04:02] (step=0030893) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 6.044414009000196, LR: 0.0003 +[2026-03-02 05:04:10] (step=0030894) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.044609665427509, LR: 0.0003 +[2026-03-02 05:04:18] (step=0030895) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.044805321854823, LR: 0.0003 +[2026-03-02 05:04:26] (step=0030896) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.045000978282137, LR: 0.0003 +[2026-03-02 05:04:34] (step=0030897) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.04519663470945, LR: 0.0003 +[2026-03-02 05:04:42] (step=0030898) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.045392291136764, LR: 0.0003 +[2026-03-02 05:04:49] (step=0030899) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.045587947564077, LR: 0.0003 +[2026-03-02 05:04:57] (step=0030900) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.045783603991391, LR: 0.0003 +[2026-03-02 05:05:05] (step=0030901) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 6.045979260418704, LR: 0.0003 +[2026-03-02 05:05:13] (step=0030902) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.046174916846018, LR: 0.0003 +[2026-03-02 05:05:21] (step=0030903) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.046370573273332, LR: 0.0003 +[2026-03-02 05:05:29] (step=0030904) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.0465662297006455, LR: 0.0003 +[2026-03-02 05:05:37] (step=0030905) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.0467618861279595, LR: 0.0003 +[2026-03-02 05:05:44] (step=0030906) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.046957542555273, LR: 0.0003 +[2026-03-02 05:05:52] (step=0030907) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.047153198982587, LR: 0.0003 +[2026-03-02 05:06:00] (step=0030908) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.047348855409901, LR: 0.0003 +[2026-03-02 05:06:08] (step=0030909) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 6.047544511837214, LR: 0.0003 +[2026-03-02 05:06:16] (step=0030910) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 6.047740168264528, LR: 0.0003 +[2026-03-02 05:06:24] (step=0030911) Train Loss: 0.4374, Train Steps/Sec: 0.12, Epoch: 6.047935824691841, LR: 0.0003 +[2026-03-02 05:06:32] (step=0030912) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.048131481119155, LR: 0.0003 +[2026-03-02 05:06:40] (step=0030913) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.048327137546468, LR: 0.0003 +[2026-03-02 05:06:48] (step=0030914) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.048522793973782, LR: 0.0003 +[2026-03-02 05:06:56] (step=0030915) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 6.048718450401096, LR: 0.0003 +[2026-03-02 05:07:03] (step=0030916) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.048914106828409, LR: 0.0003 +[2026-03-02 05:07:11] (step=0030917) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.049109763255723, LR: 0.0003 +[2026-03-02 05:07:19] (step=0030918) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.049305419683036, LR: 0.0003 +[2026-03-02 05:07:27] (step=0030919) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.04950107611035, LR: 0.0003 +[2026-03-02 05:07:35] (step=0030920) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.049696732537664, LR: 0.0003 +[2026-03-02 05:07:43] (step=0030921) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.049892388964977, LR: 0.0003 +[2026-03-02 05:07:50] (step=0030922) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.050088045392291, LR: 0.0003 +[2026-03-02 05:07:58] (step=0030923) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.0502837018196045, LR: 0.0003 +[2026-03-02 05:08:06] (step=0030924) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.0504793582469185, LR: 0.0003 +[2026-03-02 05:08:14] (step=0030925) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.050675014674232, LR: 0.0003 +[2026-03-02 05:08:22] (step=0030926) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 6.050870671101546, LR: 0.0003 +[2026-03-02 05:08:30] (step=0030927) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.05106632752886, LR: 0.0003 +[2026-03-02 05:08:38] (step=0030928) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 6.051261983956173, LR: 0.0003 +[2026-03-02 05:08:45] (step=0030929) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.051457640383487, LR: 0.0003 +[2026-03-02 05:08:53] (step=0030930) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.0516532968108, LR: 0.0003 +[2026-03-02 05:09:01] (step=0030931) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 6.051848953238114, LR: 0.0003 +[2026-03-02 05:09:09] (step=0030932) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 6.052044609665428, LR: 0.0003 +[2026-03-02 05:09:17] (step=0030933) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 6.052240266092741, LR: 0.0003 +[2026-03-02 05:09:25] (step=0030934) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.052435922520055, LR: 0.0003 +[2026-03-02 05:09:32] (step=0030935) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.052631578947368, LR: 0.0003 +[2026-03-02 05:09:40] (step=0030936) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.052827235374682, LR: 0.0003 +[2026-03-02 05:09:48] (step=0030937) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.053022891801995, LR: 0.0003 +[2026-03-02 05:09:56] (step=0030938) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.053218548229309, LR: 0.0003 +[2026-03-02 05:10:04] (step=0030939) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.053414204656623, LR: 0.0003 +[2026-03-02 05:10:12] (step=0030940) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.053609861083936, LR: 0.0003 +[2026-03-02 05:10:20] (step=0030941) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 6.05380551751125, LR: 0.0003 +[2026-03-02 05:10:28] (step=0030942) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.0540011739385635, LR: 0.0003 +[2026-03-02 05:10:35] (step=0030943) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.0541968303658775, LR: 0.0003 +[2026-03-02 05:10:43] (step=0030944) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.0543924867931915, LR: 0.0003 +[2026-03-02 05:10:51] (step=0030945) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.054588143220505, LR: 0.0003 +[2026-03-02 05:10:59] (step=0030946) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.054783799647819, LR: 0.0003 +[2026-03-02 05:11:07] (step=0030947) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 6.054979456075132, LR: 0.0003 +[2026-03-02 05:11:15] (step=0030948) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.055175112502446, LR: 0.0003 +[2026-03-02 05:11:22] (step=0030949) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.05537076892976, LR: 0.0003 +[2026-03-02 05:11:30] (step=0030950) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.055566425357073, LR: 0.0003 +[2026-03-02 05:11:38] (step=0030951) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.055762081784387, LR: 0.0003 +[2026-03-02 05:11:46] (step=0030952) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.0559577382117, LR: 0.0003 +[2026-03-02 05:11:54] (step=0030953) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 6.056153394639014, LR: 0.0003 +[2026-03-02 05:12:02] (step=0030954) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.056349051066327, LR: 0.0003 +[2026-03-02 05:12:10] (step=0030955) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.056544707493641, LR: 0.0003 +[2026-03-02 05:12:18] (step=0030956) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.056740363920955, LR: 0.0003 +[2026-03-02 05:12:25] (step=0030957) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.056936020348268, LR: 0.0003 +[2026-03-02 05:12:33] (step=0030958) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.057131676775582, LR: 0.0003 +[2026-03-02 05:12:41] (step=0030959) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.057327333202895, LR: 0.0003 +[2026-03-02 05:12:49] (step=0030960) Train Loss: 0.4576, Train Steps/Sec: 0.12, Epoch: 6.057522989630209, LR: 0.0003 +[2026-03-02 05:12:57] (step=0030961) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.057718646057523, LR: 0.0003 +[2026-03-02 05:13:05] (step=0030962) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.0579143024848365, LR: 0.0003 +[2026-03-02 05:13:13] (step=0030963) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.0581099589121505, LR: 0.0003 +[2026-03-02 05:13:21] (step=0030964) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.058305615339464, LR: 0.0003 +[2026-03-02 05:13:28] (step=0030965) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.058501271766778, LR: 0.0003 +[2026-03-02 05:13:36] (step=0030966) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 6.058696928194091, LR: 0.0003 +[2026-03-02 05:13:44] (step=0030967) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.058892584621405, LR: 0.0003 +[2026-03-02 05:13:52] (step=0030968) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.059088241048719, LR: 0.0003 +[2026-03-02 05:14:00] (step=0030969) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.059283897476032, LR: 0.0003 +[2026-03-02 05:14:08] (step=0030970) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.059479553903346, LR: 0.0003 +[2026-03-02 05:14:15] (step=0030971) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.059675210330659, LR: 0.0003 +[2026-03-02 05:14:23] (step=0030972) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 6.059870866757973, LR: 0.0003 +[2026-03-02 05:14:31] (step=0030973) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.060066523185287, LR: 0.0003 +[2026-03-02 05:14:39] (step=0030974) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.0602621796126, LR: 0.0003 +[2026-03-02 05:14:47] (step=0030975) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.060457836039914, LR: 0.0003 +[2026-03-02 05:14:55] (step=0030976) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 6.060653492467227, LR: 0.0003 +[2026-03-02 05:15:02] (step=0030977) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 6.060849148894541, LR: 0.0003 +[2026-03-02 05:15:10] (step=0030978) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.061044805321854, LR: 0.0003 +[2026-03-02 05:15:18] (step=0030979) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.061240461749168, LR: 0.0003 +[2026-03-02 05:15:26] (step=0030980) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.061436118176482, LR: 0.0003 +[2026-03-02 05:15:34] (step=0030981) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.0616317746037955, LR: 0.0003 +[2026-03-02 05:15:42] (step=0030982) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.0618274310311095, LR: 0.0003 +[2026-03-02 05:15:49] (step=0030983) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.062023087458423, LR: 0.0003 +[2026-03-02 05:15:57] (step=0030984) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.062218743885737, LR: 0.0003 +[2026-03-02 05:16:05] (step=0030985) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.062414400313051, LR: 0.0003 +[2026-03-02 05:16:13] (step=0030986) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.062610056740364, LR: 0.0003 +[2026-03-02 05:16:21] (step=0030987) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.062805713167678, LR: 0.0003 +[2026-03-02 05:16:29] (step=0030988) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.063001369594991, LR: 0.0003 +[2026-03-02 05:16:37] (step=0030989) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.063197026022305, LR: 0.0003 +[2026-03-02 05:16:44] (step=0030990) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.063392682449618, LR: 0.0003 +[2026-03-02 05:16:52] (step=0030991) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.063588338876932, LR: 0.0003 +[2026-03-02 05:17:00] (step=0030992) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.063783995304246, LR: 0.0003 +[2026-03-02 05:17:08] (step=0030993) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 6.063979651731559, LR: 0.0003 +[2026-03-02 05:17:16] (step=0030994) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.064175308158873, LR: 0.0003 +[2026-03-02 05:17:24] (step=0030995) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.064370964586186, LR: 0.0003 +[2026-03-02 05:17:32] (step=0030996) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.0645666210135, LR: 0.0003 +[2026-03-02 05:17:40] (step=0030997) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.064762277440814, LR: 0.0003 +[2026-03-02 05:17:47] (step=0030998) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 6.064957933868127, LR: 0.0003 +[2026-03-02 05:17:55] (step=0030999) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.065153590295441, LR: 0.0003 +[2026-03-02 05:18:03] (step=0031000) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.0653492467227546, LR: 0.0003 +[2026-03-02 05:18:03] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0031000/ +[2026-03-02 05:18:11] (step=0031001) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.065544903150069, LR: 0.0003 +[2026-03-02 05:18:19] (step=0031002) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.065740559577382, LR: 0.0003 +[2026-03-02 05:18:27] (step=0031003) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.065936216004696, LR: 0.0003 +[2026-03-02 05:18:35] (step=0031004) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.06613187243201, LR: 0.0003 +[2026-03-02 05:18:43] (step=0031005) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.066327528859323, LR: 0.0003 +[2026-03-02 05:18:50] (step=0031006) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.066523185286637, LR: 0.0003 +[2026-03-02 05:18:58] (step=0031007) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 6.06671884171395, LR: 0.0003 +[2026-03-02 05:19:06] (step=0031008) Train Loss: 0.4456, Train Steps/Sec: 0.12, Epoch: 6.066914498141264, LR: 0.0003 +[2026-03-02 05:19:14] (step=0031009) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.067110154568578, LR: 0.0003 +[2026-03-02 05:19:22] (step=0031010) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.067305810995891, LR: 0.0003 +[2026-03-02 05:19:30] (step=0031011) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.067501467423205, LR: 0.0003 +[2026-03-02 05:19:38] (step=0031012) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.067697123850518, LR: 0.0003 +[2026-03-02 05:19:45] (step=0031013) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.067892780277832, LR: 0.0003 +[2026-03-02 05:19:53] (step=0031014) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.068088436705146, LR: 0.0003 +[2026-03-02 05:20:01] (step=0031015) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.068284093132459, LR: 0.0003 +[2026-03-02 05:20:09] (step=0031016) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.068479749559773, LR: 0.0003 +[2026-03-02 05:20:17] (step=0031017) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.068675405987086, LR: 0.0003 +[2026-03-02 05:20:25] (step=0031018) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.0688710624144, LR: 0.0003 +[2026-03-02 05:20:33] (step=0031019) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.069066718841714, LR: 0.0003 +[2026-03-02 05:20:40] (step=0031020) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.069262375269028, LR: 0.0003 +[2026-03-02 05:20:48] (step=0031021) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.069458031696342, LR: 0.0003 +[2026-03-02 05:20:56] (step=0031022) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 6.069653688123655, LR: 0.0003 +[2026-03-02 05:21:04] (step=0031023) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.069849344550969, LR: 0.0003 +[2026-03-02 05:21:12] (step=0031024) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.070045000978282, LR: 0.0003 +[2026-03-02 05:21:20] (step=0031025) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 6.070240657405596, LR: 0.0003 +[2026-03-02 05:21:28] (step=0031026) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.07043631383291, LR: 0.0003 +[2026-03-02 05:21:35] (step=0031027) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 6.070631970260223, LR: 0.0003 +[2026-03-02 05:21:43] (step=0031028) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.070827626687537, LR: 0.0003 +[2026-03-02 05:21:51] (step=0031029) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.07102328311485, LR: 0.0003 +[2026-03-02 05:21:59] (step=0031030) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.071218939542164, LR: 0.0003 +[2026-03-02 05:22:07] (step=0031031) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.071414595969477, LR: 0.0003 +[2026-03-02 05:22:15] (step=0031032) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.071610252396791, LR: 0.0003 +[2026-03-02 05:22:22] (step=0031033) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.071805908824105, LR: 0.0003 +[2026-03-02 05:22:30] (step=0031034) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 6.072001565251418, LR: 0.0003 +[2026-03-02 05:22:38] (step=0031035) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.072197221678732, LR: 0.0003 +[2026-03-02 05:22:46] (step=0031036) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.072392878106045, LR: 0.0003 +[2026-03-02 05:22:54] (step=0031037) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.0725885345333595, LR: 0.0003 +[2026-03-02 05:23:02] (step=0031038) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.0727841909606735, LR: 0.0003 +[2026-03-02 05:23:10] (step=0031039) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.072979847387987, LR: 0.0003 +[2026-03-02 05:23:18] (step=0031040) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.073175503815301, LR: 0.0003 +[2026-03-02 05:23:25] (step=0031041) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 6.073371160242614, LR: 0.0003 +[2026-03-02 05:23:33] (step=0031042) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.073566816669928, LR: 0.0003 +[2026-03-02 05:23:41] (step=0031043) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.073762473097241, LR: 0.0003 +[2026-03-02 05:23:49] (step=0031044) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 6.073958129524555, LR: 0.0003 +[2026-03-02 05:23:57] (step=0031045) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 6.074153785951869, LR: 0.0003 +[2026-03-02 05:24:05] (step=0031046) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.074349442379182, LR: 0.0003 +[2026-03-02 05:24:13] (step=0031047) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.074545098806496, LR: 0.0003 +[2026-03-02 05:24:20] (step=0031048) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.074740755233809, LR: 0.0003 +[2026-03-02 05:24:28] (step=0031049) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.074936411661123, LR: 0.0003 +[2026-03-02 05:24:36] (step=0031050) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.075132068088437, LR: 0.0003 +[2026-03-02 05:24:44] (step=0031051) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.07532772451575, LR: 0.0003 +[2026-03-02 05:24:52] (step=0031052) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.075523380943064, LR: 0.0003 +[2026-03-02 05:25:00] (step=0031053) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.075719037370377, LR: 0.0003 +[2026-03-02 05:25:08] (step=0031054) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.075914693797691, LR: 0.0003 +[2026-03-02 05:25:15] (step=0031055) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.0761103502250045, LR: 0.0003 +[2026-03-02 05:25:23] (step=0031056) Train Loss: 0.4254, Train Steps/Sec: 0.13, Epoch: 6.0763060066523185, LR: 0.0003 +[2026-03-02 05:25:31] (step=0031057) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.0765016630796325, LR: 0.0003 +[2026-03-02 05:25:39] (step=0031058) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.076697319506946, LR: 0.0003 +[2026-03-02 05:25:47] (step=0031059) Train Loss: 0.4515, Train Steps/Sec: 0.12, Epoch: 6.07689297593426, LR: 0.0003 +[2026-03-02 05:25:55] (step=0031060) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.077088632361573, LR: 0.0003 +[2026-03-02 05:26:03] (step=0031061) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.077284288788887, LR: 0.0003 +[2026-03-02 05:26:11] (step=0031062) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.077479945216201, LR: 0.0003 +[2026-03-02 05:26:18] (step=0031063) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.077675601643514, LR: 0.0003 +[2026-03-02 05:26:26] (step=0031064) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.077871258070828, LR: 0.0003 +[2026-03-02 05:26:34] (step=0031065) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.078066914498141, LR: 0.0003 +[2026-03-02 05:26:42] (step=0031066) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.078262570925455, LR: 0.0003 +[2026-03-02 05:26:50] (step=0031067) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.078458227352769, LR: 0.0003 +[2026-03-02 05:26:58] (step=0031068) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 6.078653883780082, LR: 0.0003 +[2026-03-02 05:27:06] (step=0031069) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 6.078849540207396, LR: 0.0003 +[2026-03-02 05:27:13] (step=0031070) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.079045196634709, LR: 0.0003 +[2026-03-02 05:27:21] (step=0031071) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.079240853062023, LR: 0.0003 +[2026-03-02 05:27:29] (step=0031072) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.079436509489336, LR: 0.0003 +[2026-03-02 05:27:37] (step=0031073) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.07963216591665, LR: 0.0003 +[2026-03-02 05:27:45] (step=0031074) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.079827822343964, LR: 0.0003 +[2026-03-02 05:27:53] (step=0031075) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 6.0800234787712775, LR: 0.0003 +[2026-03-02 05:28:01] (step=0031076) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.0802191351985915, LR: 0.0003 +[2026-03-02 05:28:08] (step=0031077) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 6.080414791625905, LR: 0.0003 +[2026-03-02 05:28:16] (step=0031078) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.080610448053219, LR: 0.0003 +[2026-03-02 05:28:24] (step=0031079) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.080806104480533, LR: 0.0003 +[2026-03-02 05:28:32] (step=0031080) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.081001760907846, LR: 0.0003 +[2026-03-02 05:28:40] (step=0031081) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.08119741733516, LR: 0.0003 +[2026-03-02 05:28:48] (step=0031082) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 6.081393073762473, LR: 0.0003 +[2026-03-02 05:28:55] (step=0031083) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.081588730189787, LR: 0.0003 +[2026-03-02 05:29:03] (step=0031084) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 6.0817843866171, LR: 0.0003 +[2026-03-02 05:29:11] (step=0031085) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.081980043044414, LR: 0.0003 +[2026-03-02 05:29:19] (step=0031086) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 6.082175699471728, LR: 0.0003 +[2026-03-02 05:29:27] (step=0031087) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.082371355899041, LR: 0.0003 +[2026-03-02 05:29:35] (step=0031088) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.082567012326355, LR: 0.0003 +[2026-03-02 05:29:43] (step=0031089) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.082762668753668, LR: 0.0003 +[2026-03-02 05:29:50] (step=0031090) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.082958325180982, LR: 0.0003 +[2026-03-02 05:29:58] (step=0031091) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.083153981608296, LR: 0.0003 +[2026-03-02 05:30:06] (step=0031092) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.083349638035609, LR: 0.0003 +[2026-03-02 05:30:14] (step=0031093) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.083545294462923, LR: 0.0003 +[2026-03-02 05:30:22] (step=0031094) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.0837409508902365, LR: 0.0003 +[2026-03-02 05:30:30] (step=0031095) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.0839366073175505, LR: 0.0003 +[2026-03-02 05:30:38] (step=0031096) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 6.084132263744864, LR: 0.0003 +[2026-03-02 05:30:46] (step=0031097) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.084327920172178, LR: 0.0003 +[2026-03-02 05:30:53] (step=0031098) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.084523576599492, LR: 0.0003 +[2026-03-02 05:31:01] (step=0031099) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.084719233026805, LR: 0.0003 +[2026-03-02 05:31:09] (step=0031100) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.084914889454119, LR: 0.0003 +[2026-03-02 05:31:17] (step=0031101) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.085110545881432, LR: 0.0003 +[2026-03-02 05:31:25] (step=0031102) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 6.085306202308746, LR: 0.0003 +[2026-03-02 05:31:33] (step=0031103) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.08550185873606, LR: 0.0003 +[2026-03-02 05:31:41] (step=0031104) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.085697515163373, LR: 0.0003 +[2026-03-02 05:31:48] (step=0031105) Train Loss: 0.4278, Train Steps/Sec: 0.13, Epoch: 6.085893171590687, LR: 0.0003 +[2026-03-02 05:31:56] (step=0031106) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.086088828018, LR: 0.0003 +[2026-03-02 05:32:04] (step=0031107) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 6.086284484445314, LR: 0.0003 +[2026-03-02 05:32:12] (step=0031108) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.086480140872627, LR: 0.0003 +[2026-03-02 05:32:20] (step=0031109) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.086675797299941, LR: 0.0003 +[2026-03-02 05:32:28] (step=0031110) Train Loss: 0.4527, Train Steps/Sec: 0.12, Epoch: 6.086871453727255, LR: 0.0003 +[2026-03-02 05:32:36] (step=0031111) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 6.087067110154568, LR: 0.0003 +[2026-03-02 05:32:44] (step=0031112) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.087262766581882, LR: 0.0003 +[2026-03-02 05:32:51] (step=0031113) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 6.0874584230091955, LR: 0.0003 +[2026-03-02 05:32:59] (step=0031114) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 6.0876540794365095, LR: 0.0003 +[2026-03-02 05:33:07] (step=0031115) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.0878497358638235, LR: 0.0003 +[2026-03-02 05:33:15] (step=0031116) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.088045392291137, LR: 0.0003 +[2026-03-02 05:33:23] (step=0031117) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.088241048718451, LR: 0.0003 +[2026-03-02 05:33:31] (step=0031118) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.088436705145764, LR: 0.0003 +[2026-03-02 05:33:38] (step=0031119) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.088632361573078, LR: 0.0003 +[2026-03-02 05:33:46] (step=0031120) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.088828018000392, LR: 0.0003 +[2026-03-02 05:33:54] (step=0031121) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 6.089023674427705, LR: 0.0003 +[2026-03-02 05:34:02] (step=0031122) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 6.089219330855019, LR: 0.0003 +[2026-03-02 05:34:10] (step=0031123) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.089414987282332, LR: 0.0003 +[2026-03-02 05:34:18] (step=0031124) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 6.089610643709646, LR: 0.0003 +[2026-03-02 05:34:26] (step=0031125) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.089806300136959, LR: 0.0003 +[2026-03-02 05:34:33] (step=0031126) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.090001956564273, LR: 0.0003 +[2026-03-02 05:34:41] (step=0031127) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.090197612991587, LR: 0.0003 +[2026-03-02 05:34:49] (step=0031128) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 6.0903932694189, LR: 0.0003 +[2026-03-02 05:34:57] (step=0031129) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.090588925846214, LR: 0.0003 +[2026-03-02 05:35:05] (step=0031130) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.090784582273527, LR: 0.0003 +[2026-03-02 05:35:13] (step=0031131) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.090980238700841, LR: 0.0003 +[2026-03-02 05:35:21] (step=0031132) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.091175895128155, LR: 0.0003 +[2026-03-02 05:35:29] (step=0031133) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 6.0913715515554685, LR: 0.0003 +[2026-03-02 05:35:37] (step=0031134) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 6.0915672079827825, LR: 0.0003 +[2026-03-02 05:35:44] (step=0031135) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.091762864410096, LR: 0.0003 +[2026-03-02 05:35:52] (step=0031136) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 6.09195852083741, LR: 0.0003 +[2026-03-02 05:36:00] (step=0031137) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 6.092154177264723, LR: 0.0003 +[2026-03-02 05:36:08] (step=0031138) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 6.092349833692037, LR: 0.0003 +[2026-03-02 05:36:16] (step=0031139) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.092545490119351, LR: 0.0003 +[2026-03-02 05:36:24] (step=0031140) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.092741146546664, LR: 0.0003 +[2026-03-02 05:36:31] (step=0031141) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.092936802973978, LR: 0.0003 +[2026-03-02 05:36:39] (step=0031142) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.093132459401291, LR: 0.0003 +[2026-03-02 05:36:47] (step=0031143) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.093328115828605, LR: 0.0003 +[2026-03-02 05:36:55] (step=0031144) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.093523772255919, LR: 0.0003 +[2026-03-02 05:37:03] (step=0031145) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 6.093719428683232, LR: 0.0003 +[2026-03-02 05:37:11] (step=0031146) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.093915085110546, LR: 0.0003 +[2026-03-02 05:37:19] (step=0031147) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.094110741537859, LR: 0.0003 +[2026-03-02 05:37:27] (step=0031148) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.094306397965173, LR: 0.0003 +[2026-03-02 05:37:34] (step=0031149) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 6.094502054392486, LR: 0.0003 +[2026-03-02 05:37:42] (step=0031150) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.0946977108198, LR: 0.0003 +[2026-03-02 05:37:50] (step=0031151) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 6.094893367247114, LR: 0.0003 +[2026-03-02 05:37:58] (step=0031152) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.0950890236744275, LR: 0.0003 +[2026-03-02 05:38:06] (step=0031153) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 6.0952846801017415, LR: 0.0003 +[2026-03-02 05:38:14] (step=0031154) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.095480336529055, LR: 0.0003 +[2026-03-02 05:38:21] (step=0031155) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.095675992956369, LR: 0.0003 +[2026-03-02 05:38:29] (step=0031156) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.095871649383683, LR: 0.0003 +[2026-03-02 05:38:37] (step=0031157) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.096067305810996, LR: 0.0003 +[2026-03-02 05:38:45] (step=0031158) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.09626296223831, LR: 0.0003 +[2026-03-02 05:38:53] (step=0031159) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 6.096458618665623, LR: 0.0003 +[2026-03-02 05:39:01] (step=0031160) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 6.096654275092937, LR: 0.0003 +[2026-03-02 05:39:09] (step=0031161) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.09684993152025, LR: 0.0003 +[2026-03-02 05:39:17] (step=0031162) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.097045587947564, LR: 0.0003 +[2026-03-02 05:39:24] (step=0031163) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.097241244374878, LR: 0.0003 +[2026-03-02 05:39:32] (step=0031164) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.097436900802191, LR: 0.0003 +[2026-03-02 05:39:40] (step=0031165) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.097632557229505, LR: 0.0003 +[2026-03-02 05:39:48] (step=0031166) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.097828213656818, LR: 0.0003 +[2026-03-02 05:39:56] (step=0031167) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.098023870084132, LR: 0.0003 +[2026-03-02 05:40:04] (step=0031168) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 6.098219526511446, LR: 0.0003 +[2026-03-02 05:40:12] (step=0031169) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.098415182938759, LR: 0.0003 +[2026-03-02 05:40:19] (step=0031170) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.098610839366073, LR: 0.0003 +[2026-03-02 05:40:27] (step=0031171) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.0988064957933865, LR: 0.0003 +[2026-03-02 05:40:35] (step=0031172) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.0990021522207005, LR: 0.0003 +[2026-03-02 05:40:43] (step=0031173) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.0991978086480145, LR: 0.0003 +[2026-03-02 05:40:51] (step=0031174) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.099393465075328, LR: 0.0003 +[2026-03-02 05:40:59] (step=0031175) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.099589121502642, LR: 0.0003 +[2026-03-02 05:41:07] (step=0031176) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.099784777929955, LR: 0.0003 +[2026-03-02 05:41:14] (step=0031177) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 6.099980434357269, LR: 0.0003 +[2026-03-02 05:41:22] (step=0031178) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.100176090784582, LR: 0.0003 +[2026-03-02 05:41:30] (step=0031179) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.100371747211896, LR: 0.0003 +[2026-03-02 05:41:38] (step=0031180) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.10056740363921, LR: 0.0003 +[2026-03-02 05:41:46] (step=0031181) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.100763060066523, LR: 0.0003 +[2026-03-02 05:41:54] (step=0031182) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 6.100958716493837, LR: 0.0003 +[2026-03-02 05:42:02] (step=0031183) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.10115437292115, LR: 0.0003 +[2026-03-02 05:42:09] (step=0031184) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 6.101350029348464, LR: 0.0003 +[2026-03-02 05:42:17] (step=0031185) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.101545685775778, LR: 0.0003 +[2026-03-02 05:42:25] (step=0031186) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.101741342203091, LR: 0.0003 +[2026-03-02 05:42:33] (step=0031187) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.101936998630405, LR: 0.0003 +[2026-03-02 05:42:41] (step=0031188) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.102132655057718, LR: 0.0003 +[2026-03-02 05:42:49] (step=0031189) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 6.102328311485032, LR: 0.0003 +[2026-03-02 05:42:57] (step=0031190) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.1025239679123455, LR: 0.0003 +[2026-03-02 05:43:04] (step=0031191) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 6.1027196243396595, LR: 0.0003 +[2026-03-02 05:43:12] (step=0031192) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 6.1029152807669735, LR: 0.0003 +[2026-03-02 05:43:20] (step=0031193) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.103110937194287, LR: 0.0003 +[2026-03-02 05:43:28] (step=0031194) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.103306593621601, LR: 0.0003 +[2026-03-02 05:43:36] (step=0031195) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 6.103502250048914, LR: 0.0003 +[2026-03-02 05:43:44] (step=0031196) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.103697906476228, LR: 0.0003 +[2026-03-02 05:43:52] (step=0031197) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.103893562903542, LR: 0.0003 +[2026-03-02 05:43:59] (step=0031198) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.104089219330855, LR: 0.0003 +[2026-03-02 05:44:07] (step=0031199) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.104284875758169, LR: 0.0003 +[2026-03-02 05:44:15] (step=0031200) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 6.104480532185482, LR: 0.0003 +[2026-03-02 05:44:23] (step=0031201) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 6.104676188612796, LR: 0.0003 +[2026-03-02 05:44:31] (step=0031202) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.104871845040109, LR: 0.0003 +[2026-03-02 05:44:39] (step=0031203) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.105067501467423, LR: 0.0003 +[2026-03-02 05:44:47] (step=0031204) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.105263157894737, LR: 0.0003 +[2026-03-02 05:44:54] (step=0031205) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.10545881432205, LR: 0.0003 +[2026-03-02 05:45:02] (step=0031206) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.105654470749364, LR: 0.0003 +[2026-03-02 05:45:10] (step=0031207) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.105850127176677, LR: 0.0003 +[2026-03-02 05:45:18] (step=0031208) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.106045783603991, LR: 0.0003 +[2026-03-02 05:45:26] (step=0031209) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.106241440031305, LR: 0.0003 +[2026-03-02 05:45:34] (step=0031210) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 6.1064370964586185, LR: 0.0003 +[2026-03-02 05:45:42] (step=0031211) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.1066327528859325, LR: 0.0003 +[2026-03-02 05:45:50] (step=0031212) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.106828409313246, LR: 0.0003 +[2026-03-02 05:45:57] (step=0031213) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.10702406574056, LR: 0.0003 +[2026-03-02 05:46:05] (step=0031214) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.107219722167873, LR: 0.0003 +[2026-03-02 05:46:13] (step=0031215) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 6.107415378595187, LR: 0.0003 +[2026-03-02 05:46:21] (step=0031216) Train Loss: 0.4270, Train Steps/Sec: 0.13, Epoch: 6.107611035022501, LR: 0.0003 +[2026-03-02 05:46:29] (step=0031217) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.107806691449814, LR: 0.0003 +[2026-03-02 05:46:37] (step=0031218) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 6.108002347877128, LR: 0.0003 +[2026-03-02 05:46:44] (step=0031219) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.108198004304441, LR: 0.0003 +[2026-03-02 05:46:52] (step=0031220) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.108393660731755, LR: 0.0003 +[2026-03-02 05:47:00] (step=0031221) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.108589317159069, LR: 0.0003 +[2026-03-02 05:47:08] (step=0031222) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.108784973586382, LR: 0.0003 +[2026-03-02 05:47:16] (step=0031223) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.108980630013696, LR: 0.0003 +[2026-03-02 05:47:24] (step=0031224) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.109176286441009, LR: 0.0003 +[2026-03-02 05:47:32] (step=0031225) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.109371942868323, LR: 0.0003 +[2026-03-02 05:47:39] (step=0031226) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.109567599295636, LR: 0.0003 +[2026-03-02 05:47:47] (step=0031227) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 6.10976325572295, LR: 0.0003 +[2026-03-02 05:47:55] (step=0031228) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.109958912150264, LR: 0.0003 +[2026-03-02 05:48:03] (step=0031229) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.1101545685775775, LR: 0.0003 +[2026-03-02 05:48:11] (step=0031230) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.1103502250048916, LR: 0.0003 +[2026-03-02 05:48:19] (step=0031231) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.110545881432205, LR: 0.0003 +[2026-03-02 05:48:27] (step=0031232) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.110741537859519, LR: 0.0003 +[2026-03-02 05:48:34] (step=0031233) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.110937194286833, LR: 0.0003 +[2026-03-02 05:48:42] (step=0031234) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.111132850714146, LR: 0.0003 +[2026-03-02 05:48:50] (step=0031235) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.11132850714146, LR: 0.0003 +[2026-03-02 05:48:58] (step=0031236) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.111524163568773, LR: 0.0003 +[2026-03-02 05:49:06] (step=0031237) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.111719819996087, LR: 0.0003 +[2026-03-02 05:49:14] (step=0031238) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.111915476423401, LR: 0.0003 +[2026-03-02 05:49:22] (step=0031239) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.112111132850714, LR: 0.0003 +[2026-03-02 05:49:29] (step=0031240) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 6.112306789278028, LR: 0.0003 +[2026-03-02 05:49:37] (step=0031241) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.112502445705341, LR: 0.0003 +[2026-03-02 05:49:45] (step=0031242) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 6.112698102132655, LR: 0.0003 +[2026-03-02 05:49:53] (step=0031243) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.112893758559968, LR: 0.0003 +[2026-03-02 05:50:01] (step=0031244) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.113089414987282, LR: 0.0003 +[2026-03-02 05:50:09] (step=0031245) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.113285071414596, LR: 0.0003 +[2026-03-02 05:50:17] (step=0031246) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.113480727841909, LR: 0.0003 +[2026-03-02 05:50:24] (step=0031247) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.113676384269223, LR: 0.0003 +[2026-03-02 05:50:32] (step=0031248) Train Loss: 0.4583, Train Steps/Sec: 0.12, Epoch: 6.113872040696537, LR: 0.0003 +[2026-03-02 05:50:40] (step=0031249) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 6.114067697123851, LR: 0.0003 +[2026-03-02 05:50:48] (step=0031250) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.114263353551165, LR: 0.0003 +[2026-03-02 05:50:56] (step=0031251) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.114459009978478, LR: 0.0003 +[2026-03-02 05:51:04] (step=0031252) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.114654666405792, LR: 0.0003 +[2026-03-02 05:51:12] (step=0031253) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 6.114850322833105, LR: 0.0003 +[2026-03-02 05:51:19] (step=0031254) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 6.115045979260419, LR: 0.0003 +[2026-03-02 05:51:27] (step=0031255) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.115241635687732, LR: 0.0003 +[2026-03-02 05:51:35] (step=0031256) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.115437292115046, LR: 0.0003 +[2026-03-02 05:51:43] (step=0031257) Train Loss: 0.4241, Train Steps/Sec: 0.13, Epoch: 6.11563294854236, LR: 0.0003 +[2026-03-02 05:51:51] (step=0031258) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 6.115828604969673, LR: 0.0003 +[2026-03-02 05:51:59] (step=0031259) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.116024261396987, LR: 0.0003 +[2026-03-02 05:52:07] (step=0031260) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 6.1162199178243, LR: 0.0003 +[2026-03-02 05:52:14] (step=0031261) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.116415574251614, LR: 0.0003 +[2026-03-02 05:52:22] (step=0031262) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.116611230678928, LR: 0.0003 +[2026-03-02 05:52:30] (step=0031263) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.116806887106241, LR: 0.0003 +[2026-03-02 05:52:38] (step=0031264) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.117002543533555, LR: 0.0003 +[2026-03-02 05:52:46] (step=0031265) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.117198199960868, LR: 0.0003 +[2026-03-02 05:52:54] (step=0031266) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.1173938563881824, LR: 0.0003 +[2026-03-02 05:53:02] (step=0031267) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.117589512815496, LR: 0.0003 +[2026-03-02 05:53:10] (step=0031268) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 6.11778516924281, LR: 0.0003 +[2026-03-02 05:53:17] (step=0031269) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 6.117980825670124, LR: 0.0003 +[2026-03-02 05:53:25] (step=0031270) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.118176482097437, LR: 0.0003 +[2026-03-02 05:53:33] (step=0031271) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.118372138524751, LR: 0.0003 +[2026-03-02 05:53:41] (step=0031272) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 6.118567794952064, LR: 0.0003 +[2026-03-02 05:53:49] (step=0031273) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 6.118763451379378, LR: 0.0003 +[2026-03-02 05:53:57] (step=0031274) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.118959107806692, LR: 0.0003 +[2026-03-02 05:54:05] (step=0031275) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.119154764234005, LR: 0.0003 +[2026-03-02 05:54:12] (step=0031276) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.119350420661319, LR: 0.0003 +[2026-03-02 05:54:20] (step=0031277) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 6.119546077088632, LR: 0.0003 +[2026-03-02 05:54:28] (step=0031278) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 6.119741733515946, LR: 0.0003 +[2026-03-02 05:54:36] (step=0031279) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 6.119937389943259, LR: 0.0003 +[2026-03-02 05:54:44] (step=0031280) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 6.120133046370573, LR: 0.0003 +[2026-03-02 05:54:52] (step=0031281) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.120328702797887, LR: 0.0003 +[2026-03-02 05:55:00] (step=0031282) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.1205243592252, LR: 0.0003 +[2026-03-02 05:55:07] (step=0031283) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.120720015652514, LR: 0.0003 +[2026-03-02 05:55:15] (step=0031284) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.1209156720798275, LR: 0.0003 +[2026-03-02 05:55:23] (step=0031285) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.1211113285071415, LR: 0.0003 +[2026-03-02 05:55:31] (step=0031286) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.1213069849344555, LR: 0.0003 +[2026-03-02 05:55:39] (step=0031287) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.121502641361769, LR: 0.0003 +[2026-03-02 05:55:47] (step=0031288) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.121698297789083, LR: 0.0003 +[2026-03-02 05:55:54] (step=0031289) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 6.121893954216396, LR: 0.0003 +[2026-03-02 05:56:02] (step=0031290) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.12208961064371, LR: 0.0003 +[2026-03-02 05:56:10] (step=0031291) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.122285267071024, LR: 0.0003 +[2026-03-02 05:56:18] (step=0031292) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.122480923498337, LR: 0.0003 +[2026-03-02 05:56:26] (step=0031293) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.122676579925651, LR: 0.0003 +[2026-03-02 05:56:34] (step=0031294) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 6.122872236352964, LR: 0.0003 +[2026-03-02 05:56:42] (step=0031295) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.123067892780278, LR: 0.0003 +[2026-03-02 05:56:49] (step=0031296) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.123263549207591, LR: 0.0003 +[2026-03-02 05:56:57] (step=0031297) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 6.123459205634905, LR: 0.0003 +[2026-03-02 05:57:05] (step=0031298) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 6.123654862062219, LR: 0.0003 +[2026-03-02 05:57:13] (step=0031299) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 6.123850518489532, LR: 0.0003 +[2026-03-02 05:57:21] (step=0031300) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 6.124046174916846, LR: 0.0003 +[2026-03-02 05:57:29] (step=0031301) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 6.124241831344159, LR: 0.0003 +[2026-03-02 05:57:37] (step=0031302) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.124437487771473, LR: 0.0003 +[2026-03-02 05:57:45] (step=0031303) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 6.124633144198787, LR: 0.0003 +[2026-03-02 05:57:52] (step=0031304) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.1248288006261005, LR: 0.0003 +[2026-03-02 05:58:00] (step=0031305) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 6.1250244570534145, LR: 0.0003 +[2026-03-02 05:58:08] (step=0031306) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.125220113480728, LR: 0.0003 +[2026-03-02 05:58:16] (step=0031307) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.125415769908042, LR: 0.0003 +[2026-03-02 05:58:24] (step=0031308) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.125611426335355, LR: 0.0003 +[2026-03-02 05:58:32] (step=0031309) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.125807082762669, LR: 0.0003 +[2026-03-02 05:58:40] (step=0031310) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 6.126002739189983, LR: 0.0003 +[2026-03-02 05:58:47] (step=0031311) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 6.126198395617296, LR: 0.0003 +[2026-03-02 05:58:55] (step=0031312) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.12639405204461, LR: 0.0003 +[2026-03-02 05:59:03] (step=0031313) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.126589708471923, LR: 0.0003 +[2026-03-02 05:59:11] (step=0031314) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.126785364899237, LR: 0.0003 +[2026-03-02 05:59:19] (step=0031315) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 6.126981021326551, LR: 0.0003 +[2026-03-02 05:59:27] (step=0031316) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 6.127176677753864, LR: 0.0003 +[2026-03-02 05:59:35] (step=0031317) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 6.127372334181178, LR: 0.0003 +[2026-03-02 05:59:42] (step=0031318) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.127567990608491, LR: 0.0003 +[2026-03-02 05:59:50] (step=0031319) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.127763647035805, LR: 0.0003 +[2026-03-02 05:59:58] (step=0031320) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 6.127959303463118, LR: 0.0003 +[2026-03-02 06:00:06] (step=0031321) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.128154959890432, LR: 0.0003 +[2026-03-02 06:00:14] (step=0031322) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.128350616317746, LR: 0.0003 +[2026-03-02 06:00:22] (step=0031323) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 6.1285462727450595, LR: 0.0003 +[2026-03-02 06:00:30] (step=0031324) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.1287419291723735, LR: 0.0003 +[2026-03-02 06:00:37] (step=0031325) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.128937585599687, LR: 0.0003 +[2026-03-02 06:00:45] (step=0031326) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.129133242027001, LR: 0.0003 +[2026-03-02 06:00:53] (step=0031327) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.129328898454315, LR: 0.0003 +[2026-03-02 06:01:01] (step=0031328) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.129524554881628, LR: 0.0003 +[2026-03-02 06:01:09] (step=0031329) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.129720211308942, LR: 0.0003 +[2026-03-02 06:01:17] (step=0031330) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.129915867736255, LR: 0.0003 +[2026-03-02 06:01:25] (step=0031331) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.130111524163569, LR: 0.0003 +[2026-03-02 06:01:32] (step=0031332) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 6.130307180590882, LR: 0.0003 +[2026-03-02 06:01:40] (step=0031333) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.130502837018196, LR: 0.0003 +[2026-03-02 06:01:48] (step=0031334) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.13069849344551, LR: 0.0003 +[2026-03-02 06:01:56] (step=0031335) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.130894149872823, LR: 0.0003 +[2026-03-02 06:02:04] (step=0031336) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.131089806300137, LR: 0.0003 +[2026-03-02 06:02:12] (step=0031337) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.13128546272745, LR: 0.0003 +[2026-03-02 06:02:20] (step=0031338) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.131481119154764, LR: 0.0003 +[2026-03-02 06:02:27] (step=0031339) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.131676775582078, LR: 0.0003 +[2026-03-02 06:02:35] (step=0031340) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.131872432009391, LR: 0.0003 +[2026-03-02 06:02:43] (step=0031341) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 6.132068088436705, LR: 0.0003 +[2026-03-02 06:02:51] (step=0031342) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.1322637448640185, LR: 0.0003 +[2026-03-02 06:02:59] (step=0031343) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.1324594012913325, LR: 0.0003 +[2026-03-02 06:03:07] (step=0031344) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 6.1326550577186465, LR: 0.0003 +[2026-03-02 06:03:14] (step=0031345) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 6.13285071414596, LR: 0.0003 +[2026-03-02 06:03:22] (step=0031346) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.133046370573274, LR: 0.0003 +[2026-03-02 06:03:30] (step=0031347) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.133242027000587, LR: 0.0003 +[2026-03-02 06:03:38] (step=0031348) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.133437683427901, LR: 0.0003 +[2026-03-02 06:03:46] (step=0031349) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 6.133633339855214, LR: 0.0003 +[2026-03-02 06:03:54] (step=0031350) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.133828996282528, LR: 0.0003 +[2026-03-02 06:04:02] (step=0031351) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.134024652709842, LR: 0.0003 +[2026-03-02 06:04:10] (step=0031352) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 6.134220309137155, LR: 0.0003 +[2026-03-02 06:04:17] (step=0031353) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.134415965564469, LR: 0.0003 +[2026-03-02 06:04:25] (step=0031354) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 6.134611621991782, LR: 0.0003 +[2026-03-02 06:04:33] (step=0031355) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 6.134807278419096, LR: 0.0003 +[2026-03-02 06:04:41] (step=0031356) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.13500293484641, LR: 0.0003 +[2026-03-02 06:04:49] (step=0031357) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.135198591273723, LR: 0.0003 +[2026-03-02 06:04:57] (step=0031358) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 6.135394247701037, LR: 0.0003 +[2026-03-02 06:05:05] (step=0031359) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.13558990412835, LR: 0.0003 +[2026-03-02 06:05:12] (step=0031360) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.135785560555664, LR: 0.0003 +[2026-03-02 06:05:20] (step=0031361) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.1359812169829775, LR: 0.0003 +[2026-03-02 06:05:28] (step=0031362) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.1361768734102915, LR: 0.0003 +[2026-03-02 06:05:36] (step=0031363) Train Loss: 0.4435, Train Steps/Sec: 0.12, Epoch: 6.1363725298376055, LR: 0.0003 +[2026-03-02 06:05:44] (step=0031364) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.136568186264919, LR: 0.0003 +[2026-03-02 06:05:52] (step=0031365) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.136763842692233, LR: 0.0003 +[2026-03-02 06:06:00] (step=0031366) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.136959499119546, LR: 0.0003 +[2026-03-02 06:06:08] (step=0031367) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.13715515554686, LR: 0.0003 +[2026-03-02 06:06:15] (step=0031368) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.137350811974174, LR: 0.0003 +[2026-03-02 06:06:23] (step=0031369) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 6.137546468401487, LR: 0.0003 +[2026-03-02 06:06:31] (step=0031370) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.137742124828801, LR: 0.0003 +[2026-03-02 06:06:39] (step=0031371) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 6.137937781256114, LR: 0.0003 +[2026-03-02 06:06:47] (step=0031372) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.138133437683428, LR: 0.0003 +[2026-03-02 06:06:55] (step=0031373) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.138329094110741, LR: 0.0003 +[2026-03-02 06:07:03] (step=0031374) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 6.138524750538055, LR: 0.0003 +[2026-03-02 06:07:10] (step=0031375) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 6.138720406965369, LR: 0.0003 +[2026-03-02 06:07:18] (step=0031376) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.138916063392682, LR: 0.0003 +[2026-03-02 06:07:26] (step=0031377) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 6.139111719819996, LR: 0.0003 +[2026-03-02 06:07:34] (step=0031378) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 6.139307376247309, LR: 0.0003 +[2026-03-02 06:07:42] (step=0031379) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 6.139503032674623, LR: 0.0003 +[2026-03-02 06:07:50] (step=0031380) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 6.139698689101937, LR: 0.0003 +[2026-03-02 06:07:58] (step=0031381) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.1398943455292505, LR: 0.0003 +[2026-03-02 06:08:05] (step=0031382) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.1400900019565645, LR: 0.0003 +[2026-03-02 06:08:13] (step=0031383) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.140285658383878, LR: 0.0003 +[2026-03-02 06:08:21] (step=0031384) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.140481314811192, LR: 0.0003 +[2026-03-02 06:08:29] (step=0031385) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.140676971238505, LR: 0.0003 +[2026-03-02 06:08:37] (step=0031386) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.140872627665819, LR: 0.0003 +[2026-03-02 06:08:45] (step=0031387) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.141068284093133, LR: 0.0003 +[2026-03-02 06:08:53] (step=0031388) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.141263940520446, LR: 0.0003 +[2026-03-02 06:09:01] (step=0031389) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 6.14145959694776, LR: 0.0003 +[2026-03-02 06:09:08] (step=0031390) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.141655253375073, LR: 0.0003 +[2026-03-02 06:09:16] (step=0031391) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.141850909802387, LR: 0.0003 +[2026-03-02 06:09:24] (step=0031392) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.142046566229701, LR: 0.0003 +[2026-03-02 06:09:32] (step=0031393) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.142242222657014, LR: 0.0003 +[2026-03-02 06:09:40] (step=0031394) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.142437879084328, LR: 0.0003 +[2026-03-02 06:09:48] (step=0031395) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 6.142633535511641, LR: 0.0003 +[2026-03-02 06:09:56] (step=0031396) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.142829191938955, LR: 0.0003 +[2026-03-02 06:10:04] (step=0031397) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.143024848366269, LR: 0.0003 +[2026-03-02 06:10:11] (step=0031398) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.143220504793582, LR: 0.0003 +[2026-03-02 06:10:19] (step=0031399) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.143416161220896, LR: 0.0003 +[2026-03-02 06:10:27] (step=0031400) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 6.1436118176482095, LR: 0.0003 +[2026-03-02 06:10:35] (step=0031401) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.1438074740755235, LR: 0.0003 +[2026-03-02 06:10:43] (step=0031402) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.144003130502837, LR: 0.0003 +[2026-03-02 06:10:51] (step=0031403) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.144198786930151, LR: 0.0003 +[2026-03-02 06:10:59] (step=0031404) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 6.144394443357465, LR: 0.0003 +[2026-03-02 06:11:06] (step=0031405) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.144590099784778, LR: 0.0003 +[2026-03-02 06:11:14] (step=0031406) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 6.144785756212092, LR: 0.0003 +[2026-03-02 06:11:22] (step=0031407) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 6.144981412639405, LR: 0.0003 +[2026-03-02 06:11:30] (step=0031408) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.145177069066719, LR: 0.0003 +[2026-03-02 06:11:38] (step=0031409) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.145372725494033, LR: 0.0003 +[2026-03-02 06:11:46] (step=0031410) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 6.145568381921346, LR: 0.0003 +[2026-03-02 06:11:54] (step=0031411) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.14576403834866, LR: 0.0003 +[2026-03-02 06:12:02] (step=0031412) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 6.145959694775973, LR: 0.0003 +[2026-03-02 06:12:09] (step=0031413) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.146155351203287, LR: 0.0003 +[2026-03-02 06:12:17] (step=0031414) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 6.1463510076306, LR: 0.0003 +[2026-03-02 06:12:25] (step=0031415) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 6.146546664057914, LR: 0.0003 +[2026-03-02 06:12:33] (step=0031416) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.146742320485228, LR: 0.0003 +[2026-03-02 06:12:41] (step=0031417) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.146937976912541, LR: 0.0003 +[2026-03-02 06:12:49] (step=0031418) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.147133633339855, LR: 0.0003 +[2026-03-02 06:12:56] (step=0031419) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 6.1473292897671685, LR: 0.0003 +[2026-03-02 06:13:04] (step=0031420) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 6.1475249461944825, LR: 0.0003 +[2026-03-02 06:13:12] (step=0031421) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.1477206026217965, LR: 0.0003 +[2026-03-02 06:13:20] (step=0031422) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.14791625904911, LR: 0.0003 +[2026-03-02 06:13:28] (step=0031423) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.148111915476424, LR: 0.0003 +[2026-03-02 06:13:36] (step=0031424) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 6.148307571903737, LR: 0.0003 +[2026-03-02 06:13:44] (step=0031425) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.148503228331051, LR: 0.0003 +[2026-03-02 06:13:51] (step=0031426) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.148698884758364, LR: 0.0003 +[2026-03-02 06:13:59] (step=0031427) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.148894541185678, LR: 0.0003 +[2026-03-02 06:14:07] (step=0031428) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.149090197612992, LR: 0.0003 +[2026-03-02 06:14:15] (step=0031429) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.149285854040305, LR: 0.0003 +[2026-03-02 06:14:23] (step=0031430) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.149481510467619, LR: 0.0003 +[2026-03-02 06:14:31] (step=0031431) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.149677166894932, LR: 0.0003 +[2026-03-02 06:14:39] (step=0031432) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 6.149872823322246, LR: 0.0003 +[2026-03-02 06:14:46] (step=0031433) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 6.15006847974956, LR: 0.0003 +[2026-03-02 06:14:54] (step=0031434) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.150264136176873, LR: 0.0003 +[2026-03-02 06:15:02] (step=0031435) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.150459792604187, LR: 0.0003 +[2026-03-02 06:15:10] (step=0031436) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.1506554490315, LR: 0.0003 +[2026-03-02 06:15:18] (step=0031437) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.150851105458814, LR: 0.0003 +[2026-03-02 06:15:26] (step=0031438) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 6.1510467618861275, LR: 0.0003 +[2026-03-02 06:15:34] (step=0031439) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 6.1512424183134415, LR: 0.0003 +[2026-03-02 06:15:41] (step=0031440) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.1514380747407555, LR: 0.0003 +[2026-03-02 06:15:49] (step=0031441) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 6.151633731168069, LR: 0.0003 +[2026-03-02 06:15:57] (step=0031442) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 6.151829387595383, LR: 0.0003 +[2026-03-02 06:16:05] (step=0031443) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.152025044022696, LR: 0.0003 +[2026-03-02 06:16:13] (step=0031444) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.15222070045001, LR: 0.0003 +[2026-03-02 06:16:21] (step=0031445) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.152416356877324, LR: 0.0003 +[2026-03-02 06:16:29] (step=0031446) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 6.152612013304637, LR: 0.0003 +[2026-03-02 06:16:36] (step=0031447) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.152807669731951, LR: 0.0003 +[2026-03-02 06:16:44] (step=0031448) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 6.153003326159264, LR: 0.0003 +[2026-03-02 06:16:52] (step=0031449) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.153198982586578, LR: 0.0003 +[2026-03-02 06:17:00] (step=0031450) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.153394639013891, LR: 0.0003 +[2026-03-02 06:17:08] (step=0031451) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.153590295441205, LR: 0.0003 +[2026-03-02 06:17:16] (step=0031452) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.153785951868519, LR: 0.0003 +[2026-03-02 06:17:24] (step=0031453) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 6.153981608295832, LR: 0.0003 +[2026-03-02 06:17:31] (step=0031454) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.154177264723146, LR: 0.0003 +[2026-03-02 06:17:39] (step=0031455) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.154372921150459, LR: 0.0003 +[2026-03-02 06:17:47] (step=0031456) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.154568577577773, LR: 0.0003 +[2026-03-02 06:17:55] (step=0031457) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.154764234005087, LR: 0.0003 +[2026-03-02 06:18:03] (step=0031458) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.1549598904324005, LR: 0.0003 +[2026-03-02 06:18:11] (step=0031459) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 6.1551555468597146, LR: 0.0003 +[2026-03-02 06:18:18] (step=0031460) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.155351203287028, LR: 0.0003 +[2026-03-02 06:18:26] (step=0031461) Train Loss: 0.4382, Train Steps/Sec: 0.12, Epoch: 6.155546859714342, LR: 0.0003 +[2026-03-02 06:18:34] (step=0031462) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.155742516141656, LR: 0.0003 +[2026-03-02 06:18:42] (step=0031463) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.155938172568969, LR: 0.0003 +[2026-03-02 06:18:50] (step=0031464) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.156133828996283, LR: 0.0003 +[2026-03-02 06:18:58] (step=0031465) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.156329485423596, LR: 0.0003 +[2026-03-02 06:19:06] (step=0031466) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 6.15652514185091, LR: 0.0003 +[2026-03-02 06:19:14] (step=0031467) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 6.156720798278223, LR: 0.0003 +[2026-03-02 06:19:21] (step=0031468) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.156916454705537, LR: 0.0003 +[2026-03-02 06:19:29] (step=0031469) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.157112111132851, LR: 0.0003 +[2026-03-02 06:19:37] (step=0031470) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.157307767560164, LR: 0.0003 +[2026-03-02 06:19:45] (step=0031471) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.157503423987478, LR: 0.0003 +[2026-03-02 06:19:53] (step=0031472) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.157699080414791, LR: 0.0003 +[2026-03-02 06:20:01] (step=0031473) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 6.157894736842105, LR: 0.0003 +[2026-03-02 06:20:09] (step=0031474) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 6.158090393269419, LR: 0.0003 +[2026-03-02 06:20:16] (step=0031475) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.158286049696732, LR: 0.0003 +[2026-03-02 06:20:24] (step=0031476) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.158481706124046, LR: 0.0003 +[2026-03-02 06:20:32] (step=0031477) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.15867736255136, LR: 0.0003 +[2026-03-02 06:20:40] (step=0031478) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.158873018978674, LR: 0.0003 +[2026-03-02 06:20:48] (step=0031479) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.159068675405987, LR: 0.0003 +[2026-03-02 06:20:56] (step=0031480) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.159264331833301, LR: 0.0003 +[2026-03-02 06:21:04] (step=0031481) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 6.159459988260615, LR: 0.0003 +[2026-03-02 06:21:11] (step=0031482) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 6.159655644687928, LR: 0.0003 +[2026-03-02 06:21:19] (step=0031483) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.159851301115242, LR: 0.0003 +[2026-03-02 06:21:27] (step=0031484) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.160046957542555, LR: 0.0003 +[2026-03-02 06:21:35] (step=0031485) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.160242613969869, LR: 0.0003 +[2026-03-02 06:21:43] (step=0031486) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.160438270397183, LR: 0.0003 +[2026-03-02 06:21:51] (step=0031487) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 6.160633926824496, LR: 0.0003 +[2026-03-02 06:21:59] (step=0031488) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.16082958325181, LR: 0.0003 +[2026-03-02 06:22:07] (step=0031489) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.161025239679123, LR: 0.0003 +[2026-03-02 06:22:14] (step=0031490) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 6.161220896106437, LR: 0.0003 +[2026-03-02 06:22:22] (step=0031491) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.16141655253375, LR: 0.0003 +[2026-03-02 06:22:30] (step=0031492) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.161612208961064, LR: 0.0003 +[2026-03-02 06:22:38] (step=0031493) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.161807865388378, LR: 0.0003 +[2026-03-02 06:22:46] (step=0031494) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.162003521815691, LR: 0.0003 +[2026-03-02 06:22:54] (step=0031495) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.1621991782430054, LR: 0.0003 +[2026-03-02 06:23:02] (step=0031496) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 6.162394834670319, LR: 0.0003 +[2026-03-02 06:23:09] (step=0031497) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.162590491097633, LR: 0.0003 +[2026-03-02 06:23:17] (step=0031498) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.162786147524947, LR: 0.0003 +[2026-03-02 06:23:25] (step=0031499) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 6.16298180395226, LR: 0.0003 +[2026-03-02 06:23:33] (step=0031500) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.163177460379574, LR: 0.0003 +[2026-03-02 06:23:33] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0031500/ +[2026-03-02 06:23:41] (step=0031501) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.163373116806887, LR: 0.0003 +[2026-03-02 06:23:49] (step=0031502) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.163568773234201, LR: 0.0003 +[2026-03-02 06:23:57] (step=0031503) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.163764429661514, LR: 0.0003 +[2026-03-02 06:24:05] (step=0031504) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 6.163960086088828, LR: 0.0003 +[2026-03-02 06:24:12] (step=0031505) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 6.164155742516142, LR: 0.0003 +[2026-03-02 06:24:20] (step=0031506) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.164351398943455, LR: 0.0003 +[2026-03-02 06:24:28] (step=0031507) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.164547055370769, LR: 0.0003 +[2026-03-02 06:24:36] (step=0031508) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 6.164742711798082, LR: 0.0003 +[2026-03-02 06:24:44] (step=0031509) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.164938368225396, LR: 0.0003 +[2026-03-02 06:24:52] (step=0031510) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 6.16513402465271, LR: 0.0003 +[2026-03-02 06:25:00] (step=0031511) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.165329681080023, LR: 0.0003 +[2026-03-02 06:25:08] (step=0031512) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.165525337507337, LR: 0.0003 +[2026-03-02 06:25:15] (step=0031513) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.1657209939346505, LR: 0.0003 +[2026-03-02 06:25:23] (step=0031514) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.1659166503619645, LR: 0.0003 +[2026-03-02 06:25:31] (step=0031515) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 6.1661123067892785, LR: 0.0003 +[2026-03-02 06:25:39] (step=0031516) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 6.166307963216592, LR: 0.0003 +[2026-03-02 06:25:47] (step=0031517) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.166503619643906, LR: 0.0003 +[2026-03-02 06:25:55] (step=0031518) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.166699276071219, LR: 0.0003 +[2026-03-02 06:26:03] (step=0031519) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.166894932498533, LR: 0.0003 +[2026-03-02 06:26:10] (step=0031520) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.167090588925846, LR: 0.0003 +[2026-03-02 06:26:18] (step=0031521) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.16728624535316, LR: 0.0003 +[2026-03-02 06:26:26] (step=0031522) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.167481901780474, LR: 0.0003 +[2026-03-02 06:26:34] (step=0031523) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.167677558207787, LR: 0.0003 +[2026-03-02 06:26:42] (step=0031524) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.167873214635101, LR: 0.0003 +[2026-03-02 06:26:50] (step=0031525) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.168068871062414, LR: 0.0003 +[2026-03-02 06:26:58] (step=0031526) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.168264527489728, LR: 0.0003 +[2026-03-02 06:27:05] (step=0031527) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.168460183917042, LR: 0.0003 +[2026-03-02 06:27:13] (step=0031528) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.168655840344355, LR: 0.0003 +[2026-03-02 06:27:21] (step=0031529) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.168851496771669, LR: 0.0003 +[2026-03-02 06:27:29] (step=0031530) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.169047153198982, LR: 0.0003 +[2026-03-02 06:27:37] (step=0031531) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 6.169242809626296, LR: 0.0003 +[2026-03-02 06:27:45] (step=0031532) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 6.1694384660536095, LR: 0.0003 +[2026-03-02 06:27:53] (step=0031533) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.1696341224809235, LR: 0.0003 +[2026-03-02 06:28:01] (step=0031534) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.1698297789082375, LR: 0.0003 +[2026-03-02 06:28:08] (step=0031535) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.170025435335551, LR: 0.0003 +[2026-03-02 06:28:16] (step=0031536) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.170221091762865, LR: 0.0003 +[2026-03-02 06:28:24] (step=0031537) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.170416748190178, LR: 0.0003 +[2026-03-02 06:28:32] (step=0031538) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.170612404617492, LR: 0.0003 +[2026-03-02 06:28:40] (step=0031539) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.170808061044806, LR: 0.0003 +[2026-03-02 06:28:48] (step=0031540) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 6.171003717472119, LR: 0.0003 +[2026-03-02 06:28:56] (step=0031541) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.171199373899433, LR: 0.0003 +[2026-03-02 06:29:04] (step=0031542) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.171395030326746, LR: 0.0003 +[2026-03-02 06:29:11] (step=0031543) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.17159068675406, LR: 0.0003 +[2026-03-02 06:29:19] (step=0031544) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 6.171786343181373, LR: 0.0003 +[2026-03-02 06:29:27] (step=0031545) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.171981999608687, LR: 0.0003 +[2026-03-02 06:29:35] (step=0031546) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.172177656036001, LR: 0.0003 +[2026-03-02 06:29:43] (step=0031547) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.172373312463314, LR: 0.0003 +[2026-03-02 06:29:51] (step=0031548) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 6.172568968890628, LR: 0.0003 +[2026-03-02 06:29:59] (step=0031549) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.172764625317941, LR: 0.0003 +[2026-03-02 06:30:06] (step=0031550) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.172960281745255, LR: 0.0003 +[2026-03-02 06:30:14] (step=0031551) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.173155938172569, LR: 0.0003 +[2026-03-02 06:30:22] (step=0031552) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.1733515945998825, LR: 0.0003 +[2026-03-02 06:30:30] (step=0031553) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.1735472510271965, LR: 0.0003 +[2026-03-02 06:30:38] (step=0031554) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.17374290745451, LR: 0.0003 +[2026-03-02 06:30:46] (step=0031555) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.173938563881824, LR: 0.0003 +[2026-03-02 06:30:54] (step=0031556) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.174134220309137, LR: 0.0003 +[2026-03-02 06:31:01] (step=0031557) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.174329876736451, LR: 0.0003 +[2026-03-02 06:31:09] (step=0031558) Train Loss: 0.4564, Train Steps/Sec: 0.12, Epoch: 6.174525533163765, LR: 0.0003 +[2026-03-02 06:31:17] (step=0031559) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.174721189591078, LR: 0.0003 +[2026-03-02 06:31:25] (step=0031560) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.174916846018392, LR: 0.0003 +[2026-03-02 06:31:33] (step=0031561) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.175112502445705, LR: 0.0003 +[2026-03-02 06:31:41] (step=0031562) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.175308158873019, LR: 0.0003 +[2026-03-02 06:31:49] (step=0031563) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.175503815300333, LR: 0.0003 +[2026-03-02 06:31:57] (step=0031564) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 6.175699471727646, LR: 0.0003 +[2026-03-02 06:32:05] (step=0031565) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.17589512815496, LR: 0.0003 +[2026-03-02 06:32:12] (step=0031566) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.176090784582273, LR: 0.0003 +[2026-03-02 06:32:20] (step=0031567) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 6.176286441009587, LR: 0.0003 +[2026-03-02 06:32:28] (step=0031568) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.176482097436901, LR: 0.0003 +[2026-03-02 06:32:36] (step=0031569) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.176677753864214, LR: 0.0003 +[2026-03-02 06:32:44] (step=0031570) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.176873410291528, LR: 0.0003 +[2026-03-02 06:32:52] (step=0031571) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 6.1770690667188415, LR: 0.0003 +[2026-03-02 06:33:00] (step=0031572) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.1772647231461555, LR: 0.0003 +[2026-03-02 06:33:07] (step=0031573) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 6.177460379573469, LR: 0.0003 +[2026-03-02 06:33:15] (step=0031574) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.177656036000783, LR: 0.0003 +[2026-03-02 06:33:23] (step=0031575) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.177851692428097, LR: 0.0003 +[2026-03-02 06:33:31] (step=0031576) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.17804734885541, LR: 0.0003 +[2026-03-02 06:33:39] (step=0031577) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.178243005282724, LR: 0.0003 +[2026-03-02 06:33:47] (step=0031578) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.178438661710037, LR: 0.0003 +[2026-03-02 06:33:55] (step=0031579) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.178634318137351, LR: 0.0003 +[2026-03-02 06:34:02] (step=0031580) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 6.178829974564665, LR: 0.0003 +[2026-03-02 06:34:10] (step=0031581) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.179025630991978, LR: 0.0003 +[2026-03-02 06:34:18] (step=0031582) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 6.179221287419292, LR: 0.0003 +[2026-03-02 06:34:26] (step=0031583) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 6.179416943846605, LR: 0.0003 +[2026-03-02 06:34:34] (step=0031584) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 6.179612600273919, LR: 0.0003 +[2026-03-02 06:34:42] (step=0031585) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.179808256701232, LR: 0.0003 +[2026-03-02 06:34:50] (step=0031586) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.180003913128546, LR: 0.0003 +[2026-03-02 06:34:57] (step=0031587) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 6.18019956955586, LR: 0.0003 +[2026-03-02 06:35:05] (step=0031588) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.180395225983173, LR: 0.0003 +[2026-03-02 06:35:13] (step=0031589) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.180590882410487, LR: 0.0003 +[2026-03-02 06:35:21] (step=0031590) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 6.1807865388378005, LR: 0.0003 +[2026-03-02 06:35:29] (step=0031591) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.1809821952651145, LR: 0.0003 +[2026-03-02 06:35:37] (step=0031592) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.1811778516924285, LR: 0.0003 +[2026-03-02 06:35:45] (step=0031593) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.181373508119742, LR: 0.0003 +[2026-03-02 06:35:53] (step=0031594) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 6.181569164547056, LR: 0.0003 +[2026-03-02 06:36:00] (step=0031595) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.181764820974369, LR: 0.0003 +[2026-03-02 06:36:08] (step=0031596) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.181960477401683, LR: 0.0003 +[2026-03-02 06:36:16] (step=0031597) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.182156133828996, LR: 0.0003 +[2026-03-02 06:36:24] (step=0031598) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.18235179025631, LR: 0.0003 +[2026-03-02 06:36:32] (step=0031599) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.182547446683624, LR: 0.0003 +[2026-03-02 06:36:40] (step=0031600) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.182743103110937, LR: 0.0003 +[2026-03-02 06:36:48] (step=0031601) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.182938759538251, LR: 0.0003 +[2026-03-02 06:36:55] (step=0031602) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.183134415965564, LR: 0.0003 +[2026-03-02 06:37:03] (step=0031603) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.183330072392878, LR: 0.0003 +[2026-03-02 06:37:11] (step=0031604) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.183525728820192, LR: 0.0003 +[2026-03-02 06:37:19] (step=0031605) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 6.183721385247505, LR: 0.0003 +[2026-03-02 06:37:27] (step=0031606) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 6.183917041674819, LR: 0.0003 +[2026-03-02 06:37:35] (step=0031607) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 6.184112698102132, LR: 0.0003 +[2026-03-02 06:37:43] (step=0031608) Train Loss: 0.4497, Train Steps/Sec: 0.12, Epoch: 6.184308354529446, LR: 0.0003 +[2026-03-02 06:37:51] (step=0031609) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.1845040109567595, LR: 0.0003 +[2026-03-02 06:37:58] (step=0031610) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.1846996673840735, LR: 0.0003 +[2026-03-02 06:38:06] (step=0031611) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.1848953238113875, LR: 0.0003 +[2026-03-02 06:38:14] (step=0031612) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.185090980238701, LR: 0.0003 +[2026-03-02 06:38:22] (step=0031613) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.185286636666015, LR: 0.0003 +[2026-03-02 06:38:30] (step=0031614) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.185482293093328, LR: 0.0003 +[2026-03-02 06:38:38] (step=0031615) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.185677949520642, LR: 0.0003 +[2026-03-02 06:38:46] (step=0031616) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.185873605947956, LR: 0.0003 +[2026-03-02 06:38:53] (step=0031617) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 6.186069262375269, LR: 0.0003 +[2026-03-02 06:39:01] (step=0031618) Train Loss: 0.4270, Train Steps/Sec: 0.13, Epoch: 6.186264918802583, LR: 0.0003 +[2026-03-02 06:39:09] (step=0031619) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.186460575229896, LR: 0.0003 +[2026-03-02 06:39:17] (step=0031620) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.18665623165721, LR: 0.0003 +[2026-03-02 06:39:25] (step=0031621) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.186851888084524, LR: 0.0003 +[2026-03-02 06:39:33] (step=0031622) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 6.187047544511837, LR: 0.0003 +[2026-03-02 06:39:41] (step=0031623) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 6.187243200939151, LR: 0.0003 +[2026-03-02 06:39:49] (step=0031624) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.187438857366464, LR: 0.0003 +[2026-03-02 06:39:56] (step=0031625) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.187634513793778, LR: 0.0003 +[2026-03-02 06:40:04] (step=0031626) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.187830170221091, LR: 0.0003 +[2026-03-02 06:40:12] (step=0031627) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.188025826648405, LR: 0.0003 +[2026-03-02 06:40:20] (step=0031628) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.188221483075719, LR: 0.0003 +[2026-03-02 06:40:28] (step=0031629) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 6.1884171395030325, LR: 0.0003 +[2026-03-02 06:40:36] (step=0031630) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.1886127959303465, LR: 0.0003 +[2026-03-02 06:40:43] (step=0031631) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.18880845235766, LR: 0.0003 +[2026-03-02 06:40:51] (step=0031632) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.189004108784974, LR: 0.0003 +[2026-03-02 06:40:59] (step=0031633) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.189199765212288, LR: 0.0003 +[2026-03-02 06:41:07] (step=0031634) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.189395421639601, LR: 0.0003 +[2026-03-02 06:41:15] (step=0031635) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.189591078066915, LR: 0.0003 +[2026-03-02 06:41:23] (step=0031636) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.189786734494228, LR: 0.0003 +[2026-03-02 06:41:31] (step=0031637) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.189982390921542, LR: 0.0003 +[2026-03-02 06:41:39] (step=0031638) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 6.190178047348855, LR: 0.0003 +[2026-03-02 06:41:46] (step=0031639) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.190373703776169, LR: 0.0003 +[2026-03-02 06:41:54] (step=0031640) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 6.190569360203483, LR: 0.0003 +[2026-03-02 06:42:02] (step=0031641) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.190765016630796, LR: 0.0003 +[2026-03-02 06:42:10] (step=0031642) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 6.19096067305811, LR: 0.0003 +[2026-03-02 06:42:18] (step=0031643) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.191156329485423, LR: 0.0003 +[2026-03-02 06:42:26] (step=0031644) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 6.191351985912737, LR: 0.0003 +[2026-03-02 06:42:34] (step=0031645) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.191547642340051, LR: 0.0003 +[2026-03-02 06:42:42] (step=0031646) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.191743298767364, LR: 0.0003 +[2026-03-02 06:42:49] (step=0031647) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 6.191938955194678, LR: 0.0003 +[2026-03-02 06:42:57] (step=0031648) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.1921346116219915, LR: 0.0003 +[2026-03-02 06:43:05] (step=0031649) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.1923302680493055, LR: 0.0003 +[2026-03-02 06:43:13] (step=0031650) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 6.192525924476619, LR: 0.0003 +[2026-03-02 06:43:21] (step=0031651) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.192721580903933, LR: 0.0003 +[2026-03-02 06:43:29] (step=0031652) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.192917237331247, LR: 0.0003 +[2026-03-02 06:43:37] (step=0031653) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 6.19311289375856, LR: 0.0003 +[2026-03-02 06:43:45] (step=0031654) Train Loss: 0.4408, Train Steps/Sec: 0.12, Epoch: 6.193308550185874, LR: 0.0003 +[2026-03-02 06:43:52] (step=0031655) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.193504206613187, LR: 0.0003 +[2026-03-02 06:44:00] (step=0031656) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.193699863040501, LR: 0.0003 +[2026-03-02 06:44:08] (step=0031657) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 6.193895519467815, LR: 0.0003 +[2026-03-02 06:44:16] (step=0031658) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 6.194091175895128, LR: 0.0003 +[2026-03-02 06:44:24] (step=0031659) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.194286832322442, LR: 0.0003 +[2026-03-02 06:44:32] (step=0031660) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.194482488749755, LR: 0.0003 +[2026-03-02 06:44:40] (step=0031661) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.194678145177069, LR: 0.0003 +[2026-03-02 06:44:47] (step=0031662) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.194873801604382, LR: 0.0003 +[2026-03-02 06:44:55] (step=0031663) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.195069458031696, LR: 0.0003 +[2026-03-02 06:45:03] (step=0031664) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.19526511445901, LR: 0.0003 +[2026-03-02 06:45:11] (step=0031665) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.195460770886323, LR: 0.0003 +[2026-03-02 06:45:19] (step=0031666) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 6.195656427313637, LR: 0.0003 +[2026-03-02 06:45:27] (step=0031667) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 6.1958520837409505, LR: 0.0003 +[2026-03-02 06:45:35] (step=0031668) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.1960477401682645, LR: 0.0003 +[2026-03-02 06:45:43] (step=0031669) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.1962433965955785, LR: 0.0003 +[2026-03-02 06:45:50] (step=0031670) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.196439053022892, LR: 0.0003 +[2026-03-02 06:45:58] (step=0031671) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.196634709450206, LR: 0.0003 +[2026-03-02 06:46:06] (step=0031672) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 6.196830365877519, LR: 0.0003 +[2026-03-02 06:46:14] (step=0031673) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 6.197026022304833, LR: 0.0003 +[2026-03-02 06:46:22] (step=0031674) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.197221678732146, LR: 0.0003 +[2026-03-02 06:46:30] (step=0031675) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 6.19741733515946, LR: 0.0003 +[2026-03-02 06:46:38] (step=0031676) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.197612991586774, LR: 0.0003 +[2026-03-02 06:46:45] (step=0031677) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 6.197808648014087, LR: 0.0003 +[2026-03-02 06:46:53] (step=0031678) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.198004304441401, LR: 0.0003 +[2026-03-02 06:47:01] (step=0031679) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.198199960868714, LR: 0.0003 +[2026-03-02 06:47:09] (step=0031680) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 6.198395617296028, LR: 0.0003 +[2026-03-02 06:47:17] (step=0031681) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.198591273723342, LR: 0.0003 +[2026-03-02 06:47:25] (step=0031682) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 6.198786930150655, LR: 0.0003 +[2026-03-02 06:47:33] (step=0031683) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.198982586577969, LR: 0.0003 +[2026-03-02 06:47:40] (step=0031684) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 6.199178243005282, LR: 0.0003 +[2026-03-02 06:47:48] (step=0031685) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.199373899432596, LR: 0.0003 +[2026-03-02 06:47:56] (step=0031686) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.19956955585991, LR: 0.0003 +[2026-03-02 06:48:04] (step=0031687) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.1997652122872235, LR: 0.0003 +[2026-03-02 06:48:12] (step=0031688) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.1999608687145376, LR: 0.0003 +[2026-03-02 06:48:20] (step=0031689) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.200156525141851, LR: 0.0003 +[2026-03-02 06:48:28] (step=0031690) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.200352181569165, LR: 0.0003 +[2026-03-02 06:48:35] (step=0031691) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 6.200547837996478, LR: 0.0003 +[2026-03-02 06:48:43] (step=0031692) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.200743494423792, LR: 0.0003 +[2026-03-02 06:48:51] (step=0031693) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.200939150851106, LR: 0.0003 +[2026-03-02 06:48:59] (step=0031694) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.201134807278419, LR: 0.0003 +[2026-03-02 06:49:07] (step=0031695) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 6.201330463705733, LR: 0.0003 +[2026-03-02 06:49:15] (step=0031696) Train Loss: 0.4500, Train Steps/Sec: 0.12, Epoch: 6.201526120133046, LR: 0.0003 +[2026-03-02 06:49:23] (step=0031697) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.20172177656036, LR: 0.0003 +[2026-03-02 06:49:31] (step=0031698) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 6.201917432987674, LR: 0.0003 +[2026-03-02 06:49:38] (step=0031699) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.202113089414987, LR: 0.0003 +[2026-03-02 06:49:46] (step=0031700) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.202308745842301, LR: 0.0003 +[2026-03-02 06:49:54] (step=0031701) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.202504402269614, LR: 0.0003 +[2026-03-02 06:50:02] (step=0031702) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 6.202700058696928, LR: 0.0003 +[2026-03-02 06:50:10] (step=0031703) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.202895715124241, LR: 0.0003 +[2026-03-02 06:50:18] (step=0031704) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.203091371551555, LR: 0.0003 +[2026-03-02 06:50:25] (step=0031705) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.203287027978869, LR: 0.0003 +[2026-03-02 06:50:33] (step=0031706) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 6.203482684406183, LR: 0.0003 +[2026-03-02 06:50:41] (step=0031707) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.203678340833497, LR: 0.0003 +[2026-03-02 06:50:49] (step=0031708) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.20387399726081, LR: 0.0003 +[2026-03-02 06:50:57] (step=0031709) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.204069653688124, LR: 0.0003 +[2026-03-02 06:51:05] (step=0031710) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.204265310115438, LR: 0.0003 +[2026-03-02 06:51:13] (step=0031711) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.204460966542751, LR: 0.0003 +[2026-03-02 06:51:20] (step=0031712) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.204656622970065, LR: 0.0003 +[2026-03-02 06:51:28] (step=0031713) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.204852279397378, LR: 0.0003 +[2026-03-02 06:51:36] (step=0031714) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.205047935824692, LR: 0.0003 +[2026-03-02 06:51:44] (step=0031715) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 6.205243592252005, LR: 0.0003 +[2026-03-02 06:51:52] (step=0031716) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.205439248679319, LR: 0.0003 +[2026-03-02 06:52:00] (step=0031717) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.205634905106633, LR: 0.0003 +[2026-03-02 06:52:07] (step=0031718) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.205830561533946, LR: 0.0003 +[2026-03-02 06:52:15] (step=0031719) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.20602621796126, LR: 0.0003 +[2026-03-02 06:52:23] (step=0031720) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.206221874388573, LR: 0.0003 +[2026-03-02 06:52:31] (step=0031721) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.206417530815887, LR: 0.0003 +[2026-03-02 06:52:39] (step=0031722) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.206613187243201, LR: 0.0003 +[2026-03-02 06:52:47] (step=0031723) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.206808843670514, LR: 0.0003 +[2026-03-02 06:52:54] (step=0031724) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.2070045000978284, LR: 0.0003 +[2026-03-02 06:53:02] (step=0031725) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.207200156525142, LR: 0.0003 +[2026-03-02 06:53:10] (step=0031726) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 6.207395812952456, LR: 0.0003 +[2026-03-02 06:53:18] (step=0031727) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.207591469379769, LR: 0.0003 +[2026-03-02 06:53:26] (step=0031728) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.207787125807083, LR: 0.0003 +[2026-03-02 06:53:34] (step=0031729) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 6.207982782234397, LR: 0.0003 +[2026-03-02 06:53:42] (step=0031730) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.20817843866171, LR: 0.0003 +[2026-03-02 06:53:49] (step=0031731) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.208374095089024, LR: 0.0003 +[2026-03-02 06:53:57] (step=0031732) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 6.208569751516337, LR: 0.0003 +[2026-03-02 06:54:05] (step=0031733) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.208765407943651, LR: 0.0003 +[2026-03-02 06:54:13] (step=0031734) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.208961064370965, LR: 0.0003 +[2026-03-02 06:54:21] (step=0031735) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.209156720798278, LR: 0.0003 +[2026-03-02 06:54:29] (step=0031736) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.209352377225592, LR: 0.0003 +[2026-03-02 06:54:36] (step=0031737) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.209548033652905, LR: 0.0003 +[2026-03-02 06:54:44] (step=0031738) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.209743690080219, LR: 0.0003 +[2026-03-02 06:54:52] (step=0031739) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.209939346507533, LR: 0.0003 +[2026-03-02 06:55:00] (step=0031740) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 6.210135002934846, LR: 0.0003 +[2026-03-02 06:55:08] (step=0031741) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.21033065936216, LR: 0.0003 +[2026-03-02 06:55:16] (step=0031742) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.2105263157894735, LR: 0.0003 +[2026-03-02 06:55:24] (step=0031743) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 6.2107219722167875, LR: 0.0003 +[2026-03-02 06:55:32] (step=0031744) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 6.210917628644101, LR: 0.0003 +[2026-03-02 06:55:39] (step=0031745) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 6.211113285071415, LR: 0.0003 +[2026-03-02 06:55:47] (step=0031746) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.211308941498729, LR: 0.0003 +[2026-03-02 06:55:55] (step=0031747) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.211504597926042, LR: 0.0003 +[2026-03-02 06:56:03] (step=0031748) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.211700254353356, LR: 0.0003 +[2026-03-02 06:56:11] (step=0031749) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.211895910780669, LR: 0.0003 +[2026-03-02 06:56:19] (step=0031750) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.212091567207983, LR: 0.0003 +[2026-03-02 06:56:26] (step=0031751) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.212287223635297, LR: 0.0003 +[2026-03-02 06:56:34] (step=0031752) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.21248288006261, LR: 0.0003 +[2026-03-02 06:56:42] (step=0031753) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.212678536489924, LR: 0.0003 +[2026-03-02 06:56:50] (step=0031754) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 6.212874192917237, LR: 0.0003 +[2026-03-02 06:56:58] (step=0031755) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.213069849344551, LR: 0.0003 +[2026-03-02 06:57:06] (step=0031756) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.213265505771864, LR: 0.0003 +[2026-03-02 06:57:14] (step=0031757) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.213461162199178, LR: 0.0003 +[2026-03-02 06:57:21] (step=0031758) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.213656818626492, LR: 0.0003 +[2026-03-02 06:57:29] (step=0031759) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.213852475053805, LR: 0.0003 +[2026-03-02 06:57:37] (step=0031760) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.214048131481119, LR: 0.0003 +[2026-03-02 06:57:45] (step=0031761) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.2142437879084325, LR: 0.0003 +[2026-03-02 06:57:53] (step=0031762) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.2144394443357465, LR: 0.0003 +[2026-03-02 06:58:00] (step=0031763) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.2146351007630605, LR: 0.0003 +[2026-03-02 06:58:08] (step=0031764) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.214830757190374, LR: 0.0003 +[2026-03-02 06:58:16] (step=0031765) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.215026413617688, LR: 0.0003 +[2026-03-02 06:58:24] (step=0031766) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.215222070045001, LR: 0.0003 +[2026-03-02 06:58:32] (step=0031767) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.215417726472315, LR: 0.0003 +[2026-03-02 06:58:40] (step=0031768) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.215613382899628, LR: 0.0003 +[2026-03-02 06:58:48] (step=0031769) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.215809039326942, LR: 0.0003 +[2026-03-02 06:58:55] (step=0031770) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.216004695754256, LR: 0.0003 +[2026-03-02 06:59:03] (step=0031771) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.216200352181569, LR: 0.0003 +[2026-03-02 06:59:11] (step=0031772) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.216396008608883, LR: 0.0003 +[2026-03-02 06:59:19] (step=0031773) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 6.216591665036196, LR: 0.0003 +[2026-03-02 06:59:27] (step=0031774) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.21678732146351, LR: 0.0003 +[2026-03-02 06:59:35] (step=0031775) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.216982977890824, LR: 0.0003 +[2026-03-02 06:59:42] (step=0031776) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 6.217178634318137, LR: 0.0003 +[2026-03-02 06:59:50] (step=0031777) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.217374290745451, LR: 0.0003 +[2026-03-02 06:59:58] (step=0031778) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.217569947172764, LR: 0.0003 +[2026-03-02 07:00:06] (step=0031779) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.217765603600078, LR: 0.0003 +[2026-03-02 07:00:14] (step=0031780) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.2179612600273915, LR: 0.0003 +[2026-03-02 07:00:22] (step=0031781) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.2181569164547055, LR: 0.0003 +[2026-03-02 07:00:30] (step=0031782) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.2183525728820195, LR: 0.0003 +[2026-03-02 07:00:37] (step=0031783) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 6.218548229309333, LR: 0.0003 +[2026-03-02 07:00:45] (step=0031784) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.218743885736647, LR: 0.0003 +[2026-03-02 07:00:53] (step=0031785) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.21893954216396, LR: 0.0003 +[2026-03-02 07:01:01] (step=0031786) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 6.219135198591274, LR: 0.0003 +[2026-03-02 07:01:09] (step=0031787) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.219330855018588, LR: 0.0003 +[2026-03-02 07:01:17] (step=0031788) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.219526511445901, LR: 0.0003 +[2026-03-02 07:01:25] (step=0031789) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 6.219722167873215, LR: 0.0003 +[2026-03-02 07:01:32] (step=0031790) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.219917824300528, LR: 0.0003 +[2026-03-02 07:01:40] (step=0031791) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.220113480727842, LR: 0.0003 +[2026-03-02 07:01:48] (step=0031792) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.220309137155156, LR: 0.0003 +[2026-03-02 07:01:56] (step=0031793) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.220504793582469, LR: 0.0003 +[2026-03-02 07:02:04] (step=0031794) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.220700450009783, LR: 0.0003 +[2026-03-02 07:02:12] (step=0031795) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.220896106437096, LR: 0.0003 +[2026-03-02 07:02:19] (step=0031796) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 6.22109176286441, LR: 0.0003 +[2026-03-02 07:02:27] (step=0031797) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.221287419291723, LR: 0.0003 +[2026-03-02 07:02:35] (step=0031798) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 6.221483075719037, LR: 0.0003 +[2026-03-02 07:02:43] (step=0031799) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 6.221678732146351, LR: 0.0003 +[2026-03-02 07:02:51] (step=0031800) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.2218743885736645, LR: 0.0003 +[2026-03-02 07:02:59] (step=0031801) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.2220700450009785, LR: 0.0003 +[2026-03-02 07:03:06] (step=0031802) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.222265701428292, LR: 0.0003 +[2026-03-02 07:03:14] (step=0031803) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.222461357855606, LR: 0.0003 +[2026-03-02 07:03:22] (step=0031804) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.22265701428292, LR: 0.0003 +[2026-03-02 07:03:30] (step=0031805) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.222852670710233, LR: 0.0003 +[2026-03-02 07:03:38] (step=0031806) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.223048327137547, LR: 0.0003 +[2026-03-02 07:03:46] (step=0031807) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.22324398356486, LR: 0.0003 +[2026-03-02 07:03:53] (step=0031808) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.223439639992174, LR: 0.0003 +[2026-03-02 07:04:01] (step=0031809) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.223635296419487, LR: 0.0003 +[2026-03-02 07:04:09] (step=0031810) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.223830952846801, LR: 0.0003 +[2026-03-02 07:04:17] (step=0031811) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.224026609274115, LR: 0.0003 +[2026-03-02 07:04:25] (step=0031812) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.224222265701428, LR: 0.0003 +[2026-03-02 07:04:33] (step=0031813) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.224417922128742, LR: 0.0003 +[2026-03-02 07:04:41] (step=0031814) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.224613578556055, LR: 0.0003 +[2026-03-02 07:04:48] (step=0031815) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 6.224809234983369, LR: 0.0003 +[2026-03-02 07:04:56] (step=0031816) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.225004891410683, LR: 0.0003 +[2026-03-02 07:05:04] (step=0031817) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.225200547837996, LR: 0.0003 +[2026-03-02 07:05:12] (step=0031818) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.22539620426531, LR: 0.0003 +[2026-03-02 07:05:20] (step=0031819) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 6.2255918606926235, LR: 0.0003 +[2026-03-02 07:05:28] (step=0031820) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 6.2257875171199375, LR: 0.0003 +[2026-03-02 07:05:35] (step=0031821) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.225983173547251, LR: 0.0003 +[2026-03-02 07:05:43] (step=0031822) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 6.226178829974565, LR: 0.0003 +[2026-03-02 07:05:51] (step=0031823) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 6.226374486401879, LR: 0.0003 +[2026-03-02 07:05:59] (step=0031824) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.226570142829192, LR: 0.0003 +[2026-03-02 07:06:07] (step=0031825) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.226765799256506, LR: 0.0003 +[2026-03-02 07:06:15] (step=0031826) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.226961455683819, LR: 0.0003 +[2026-03-02 07:06:22] (step=0031827) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.227157112111133, LR: 0.0003 +[2026-03-02 07:06:30] (step=0031828) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.227352768538447, LR: 0.0003 +[2026-03-02 07:06:38] (step=0031829) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.22754842496576, LR: 0.0003 +[2026-03-02 07:06:46] (step=0031830) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.227744081393074, LR: 0.0003 +[2026-03-02 07:06:54] (step=0031831) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 6.227939737820387, LR: 0.0003 +[2026-03-02 07:07:02] (step=0031832) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.228135394247701, LR: 0.0003 +[2026-03-02 07:07:09] (step=0031833) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.228331050675014, LR: 0.0003 +[2026-03-02 07:07:17] (step=0031834) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.228526707102328, LR: 0.0003 +[2026-03-02 07:07:25] (step=0031835) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 6.228722363529642, LR: 0.0003 +[2026-03-02 07:07:33] (step=0031836) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 6.228918019956955, LR: 0.0003 +[2026-03-02 07:07:41] (step=0031837) Train Loss: 0.4654, Train Steps/Sec: 0.13, Epoch: 6.229113676384269, LR: 0.0003 +[2026-03-02 07:07:49] (step=0031838) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.2293093328115825, LR: 0.0003 +[2026-03-02 07:07:57] (step=0031839) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.2295049892388965, LR: 0.0003 +[2026-03-02 07:08:05] (step=0031840) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 6.2297006456662105, LR: 0.0003 +[2026-03-02 07:08:12] (step=0031841) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.229896302093524, LR: 0.0003 +[2026-03-02 07:08:20] (step=0031842) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.230091958520838, LR: 0.0003 +[2026-03-02 07:08:28] (step=0031843) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.230287614948151, LR: 0.0003 +[2026-03-02 07:08:36] (step=0031844) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.230483271375465, LR: 0.0003 +[2026-03-02 07:08:44] (step=0031845) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.230678927802778, LR: 0.0003 +[2026-03-02 07:08:52] (step=0031846) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.230874584230092, LR: 0.0003 +[2026-03-02 07:08:59] (step=0031847) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 6.231070240657406, LR: 0.0003 +[2026-03-02 07:09:07] (step=0031848) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.231265897084719, LR: 0.0003 +[2026-03-02 07:09:15] (step=0031849) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.231461553512033, LR: 0.0003 +[2026-03-02 07:09:23] (step=0031850) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.231657209939346, LR: 0.0003 +[2026-03-02 07:09:31] (step=0031851) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.23185286636666, LR: 0.0003 +[2026-03-02 07:09:39] (step=0031852) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.232048522793974, LR: 0.0003 +[2026-03-02 07:09:47] (step=0031853) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.232244179221287, LR: 0.0003 +[2026-03-02 07:09:54] (step=0031854) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.232439835648601, LR: 0.0003 +[2026-03-02 07:10:02] (step=0031855) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.232635492075914, LR: 0.0003 +[2026-03-02 07:10:10] (step=0031856) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.232831148503228, LR: 0.0003 +[2026-03-02 07:10:18] (step=0031857) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.233026804930542, LR: 0.0003 +[2026-03-02 07:10:26] (step=0031858) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.2332224613578555, LR: 0.0003 +[2026-03-02 07:10:34] (step=0031859) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.2334181177851695, LR: 0.0003 +[2026-03-02 07:10:41] (step=0031860) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.233613774212483, LR: 0.0003 +[2026-03-02 07:10:49] (step=0031861) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.233809430639797, LR: 0.0003 +[2026-03-02 07:10:57] (step=0031862) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.23400508706711, LR: 0.0003 +[2026-03-02 07:11:05] (step=0031863) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.234200743494424, LR: 0.0003 +[2026-03-02 07:11:13] (step=0031864) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.234396399921738, LR: 0.0003 +[2026-03-02 07:11:21] (step=0031865) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.234592056349051, LR: 0.0003 +[2026-03-02 07:11:28] (step=0031866) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.234787712776365, LR: 0.0003 +[2026-03-02 07:11:36] (step=0031867) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 6.234983369203678, LR: 0.0003 +[2026-03-02 07:11:44] (step=0031868) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 6.235179025630992, LR: 0.0003 +[2026-03-02 07:11:52] (step=0031869) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.235374682058306, LR: 0.0003 +[2026-03-02 07:12:00] (step=0031870) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 6.235570338485619, LR: 0.0003 +[2026-03-02 07:12:08] (step=0031871) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.235765994912933, LR: 0.0003 +[2026-03-02 07:12:15] (step=0031872) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.235961651340246, LR: 0.0003 +[2026-03-02 07:12:23] (step=0031873) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.23615730776756, LR: 0.0003 +[2026-03-02 07:12:31] (step=0031874) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.236352964194873, LR: 0.0003 +[2026-03-02 07:12:39] (step=0031875) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.236548620622187, LR: 0.0003 +[2026-03-02 07:12:47] (step=0031876) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.236744277049501, LR: 0.0003 +[2026-03-02 07:12:55] (step=0031877) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 6.2369399334768145, LR: 0.0003 +[2026-03-02 07:13:03] (step=0031878) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.2371355899041285, LR: 0.0003 +[2026-03-02 07:13:10] (step=0031879) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.237331246331442, LR: 0.0003 +[2026-03-02 07:13:18] (step=0031880) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.237526902758756, LR: 0.0003 +[2026-03-02 07:13:26] (step=0031881) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.23772255918607, LR: 0.0003 +[2026-03-02 07:13:34] (step=0031882) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.237918215613383, LR: 0.0003 +[2026-03-02 07:13:42] (step=0031883) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.238113872040697, LR: 0.0003 +[2026-03-02 07:13:50] (step=0031884) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.23830952846801, LR: 0.0003 +[2026-03-02 07:13:58] (step=0031885) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.238505184895324, LR: 0.0003 +[2026-03-02 07:14:06] (step=0031886) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 6.238700841322637, LR: 0.0003 +[2026-03-02 07:14:13] (step=0031887) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 6.238896497749951, LR: 0.0003 +[2026-03-02 07:14:21] (step=0031888) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.239092154177265, LR: 0.0003 +[2026-03-02 07:14:29] (step=0031889) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.239287810604578, LR: 0.0003 +[2026-03-02 07:14:37] (step=0031890) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.239483467031892, LR: 0.0003 +[2026-03-02 07:14:45] (step=0031891) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.239679123459205, LR: 0.0003 +[2026-03-02 07:14:53] (step=0031892) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.239874779886519, LR: 0.0003 +[2026-03-02 07:15:00] (step=0031893) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.240070436313833, LR: 0.0003 +[2026-03-02 07:15:08] (step=0031894) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.240266092741146, LR: 0.0003 +[2026-03-02 07:15:16] (step=0031895) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 6.24046174916846, LR: 0.0003 +[2026-03-02 07:15:24] (step=0031896) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.2406574055957735, LR: 0.0003 +[2026-03-02 07:15:32] (step=0031897) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.2408530620230875, LR: 0.0003 +[2026-03-02 07:15:40] (step=0031898) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 6.241048718450401, LR: 0.0003 +[2026-03-02 07:15:47] (step=0031899) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.241244374877715, LR: 0.0003 +[2026-03-02 07:15:55] (step=0031900) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 6.241440031305029, LR: 0.0003 +[2026-03-02 07:16:03] (step=0031901) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.241635687732342, LR: 0.0003 +[2026-03-02 07:16:11] (step=0031902) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 6.241831344159656, LR: 0.0003 +[2026-03-02 07:16:19] (step=0031903) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 6.242027000586969, LR: 0.0003 +[2026-03-02 07:16:27] (step=0031904) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.242222657014283, LR: 0.0003 +[2026-03-02 07:16:34] (step=0031905) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.242418313441597, LR: 0.0003 +[2026-03-02 07:16:42] (step=0031906) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.24261396986891, LR: 0.0003 +[2026-03-02 07:16:50] (step=0031907) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.242809626296224, LR: 0.0003 +[2026-03-02 07:16:58] (step=0031908) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.243005282723537, LR: 0.0003 +[2026-03-02 07:17:06] (step=0031909) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 6.243200939150851, LR: 0.0003 +[2026-03-02 07:17:14] (step=0031910) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 6.243396595578165, LR: 0.0003 +[2026-03-02 07:17:21] (step=0031911) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.243592252005478, LR: 0.0003 +[2026-03-02 07:17:29] (step=0031912) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.243787908432792, LR: 0.0003 +[2026-03-02 07:17:37] (step=0031913) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.243983564860105, LR: 0.0003 +[2026-03-02 07:17:45] (step=0031914) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 6.244179221287419, LR: 0.0003 +[2026-03-02 07:17:53] (step=0031915) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.2443748777147325, LR: 0.0003 +[2026-03-02 07:18:01] (step=0031916) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.2445705341420465, LR: 0.0003 +[2026-03-02 07:18:08] (step=0031917) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 6.2447661905693606, LR: 0.0003 +[2026-03-02 07:18:16] (step=0031918) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.244961846996674, LR: 0.0003 +[2026-03-02 07:18:24] (step=0031919) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.245157503423988, LR: 0.0003 +[2026-03-02 07:18:32] (step=0031920) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 6.245353159851301, LR: 0.0003 +[2026-03-02 07:18:40] (step=0031921) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 6.245548816278615, LR: 0.0003 +[2026-03-02 07:18:48] (step=0031922) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.245744472705929, LR: 0.0003 +[2026-03-02 07:18:55] (step=0031923) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.245940129133242, LR: 0.0003 +[2026-03-02 07:19:03] (step=0031924) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.246135785560556, LR: 0.0003 +[2026-03-02 07:19:11] (step=0031925) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.246331441987869, LR: 0.0003 +[2026-03-02 07:19:19] (step=0031926) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.246527098415183, LR: 0.0003 +[2026-03-02 07:19:27] (step=0031927) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.246722754842496, LR: 0.0003 +[2026-03-02 07:19:35] (step=0031928) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.24691841126981, LR: 0.0003 +[2026-03-02 07:19:43] (step=0031929) Train Loss: 0.4365, Train Steps/Sec: 0.12, Epoch: 6.247114067697124, LR: 0.0003 +[2026-03-02 07:19:51] (step=0031930) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 6.247309724124437, LR: 0.0003 +[2026-03-02 07:19:59] (step=0031931) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.247505380551751, LR: 0.0003 +[2026-03-02 07:20:06] (step=0031932) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 6.247701036979064, LR: 0.0003 +[2026-03-02 07:20:14] (step=0031933) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.247896693406378, LR: 0.0003 +[2026-03-02 07:20:22] (step=0031934) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.248092349833692, LR: 0.0003 +[2026-03-02 07:20:30] (step=0031935) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 6.248288006261006, LR: 0.0003 +[2026-03-02 07:20:38] (step=0031936) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.24848366268832, LR: 0.0003 +[2026-03-02 07:20:46] (step=0031937) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.248679319115633, LR: 0.0003 +[2026-03-02 07:20:53] (step=0031938) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.248874975542947, LR: 0.0003 +[2026-03-02 07:21:01] (step=0031939) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.24907063197026, LR: 0.0003 +[2026-03-02 07:21:09] (step=0031940) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.249266288397574, LR: 0.0003 +[2026-03-02 07:21:17] (step=0031941) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.249461944824888, LR: 0.0003 +[2026-03-02 07:21:25] (step=0031942) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.249657601252201, LR: 0.0003 +[2026-03-02 07:21:33] (step=0031943) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 6.249853257679515, LR: 0.0003 +[2026-03-02 07:21:40] (step=0031944) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.250048914106828, LR: 0.0003 +[2026-03-02 07:21:48] (step=0031945) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.250244570534142, LR: 0.0003 +[2026-03-02 07:21:56] (step=0031946) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.250440226961456, LR: 0.0003 +[2026-03-02 07:22:04] (step=0031947) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.250635883388769, LR: 0.0003 +[2026-03-02 07:22:12] (step=0031948) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 6.250831539816083, LR: 0.0003 +[2026-03-02 07:22:20] (step=0031949) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.251027196243396, LR: 0.0003 +[2026-03-02 07:22:27] (step=0031950) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 6.25122285267071, LR: 0.0003 +[2026-03-02 07:22:35] (step=0031951) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.251418509098023, LR: 0.0003 +[2026-03-02 07:22:43] (step=0031952) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.251614165525337, LR: 0.0003 +[2026-03-02 07:22:51] (step=0031953) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.2518098219526514, LR: 0.0003 +[2026-03-02 07:22:59] (step=0031954) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.252005478379965, LR: 0.0003 +[2026-03-02 07:23:07] (step=0031955) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.252201134807279, LR: 0.0003 +[2026-03-02 07:23:14] (step=0031956) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 6.252396791234592, LR: 0.0003 +[2026-03-02 07:23:22] (step=0031957) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.252592447661906, LR: 0.0003 +[2026-03-02 07:23:30] (step=0031958) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.25278810408922, LR: 0.0003 +[2026-03-02 07:23:38] (step=0031959) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.252983760516533, LR: 0.0003 +[2026-03-02 07:23:46] (step=0031960) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.253179416943847, LR: 0.0003 +[2026-03-02 07:23:54] (step=0031961) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.25337507337116, LR: 0.0003 +[2026-03-02 07:24:01] (step=0031962) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.253570729798474, LR: 0.0003 +[2026-03-02 07:24:09] (step=0031963) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.253766386225788, LR: 0.0003 +[2026-03-02 07:24:17] (step=0031964) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.253962042653101, LR: 0.0003 +[2026-03-02 07:24:25] (step=0031965) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.254157699080415, LR: 0.0003 +[2026-03-02 07:24:33] (step=0031966) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.254353355507728, LR: 0.0003 +[2026-03-02 07:24:41] (step=0031967) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.254549011935042, LR: 0.0003 +[2026-03-02 07:24:48] (step=0031968) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.254744668362355, LR: 0.0003 +[2026-03-02 07:24:56] (step=0031969) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.254940324789669, LR: 0.0003 +[2026-03-02 07:25:04] (step=0031970) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.255135981216983, LR: 0.0003 +[2026-03-02 07:25:12] (step=0031971) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.2553316376442964, LR: 0.0003 +[2026-03-02 07:25:20] (step=0031972) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.2555272940716105, LR: 0.0003 +[2026-03-02 07:25:28] (step=0031973) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.255722950498924, LR: 0.0003 +[2026-03-02 07:25:36] (step=0031974) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.255918606926238, LR: 0.0003 +[2026-03-02 07:25:44] (step=0031975) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.256114263353552, LR: 0.0003 +[2026-03-02 07:25:52] (step=0031976) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 6.256309919780865, LR: 0.0003 +[2026-03-02 07:25:59] (step=0031977) Train Loss: 0.4266, Train Steps/Sec: 0.13, Epoch: 6.256505576208179, LR: 0.0003 +[2026-03-02 07:26:07] (step=0031978) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.256701232635492, LR: 0.0003 +[2026-03-02 07:26:15] (step=0031979) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 6.256896889062806, LR: 0.0003 +[2026-03-02 07:26:23] (step=0031980) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.257092545490119, LR: 0.0003 +[2026-03-02 07:26:31] (step=0031981) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 6.257288201917433, LR: 0.0003 +[2026-03-02 07:26:39] (step=0031982) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.257483858344747, LR: 0.0003 +[2026-03-02 07:26:46] (step=0031983) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.25767951477206, LR: 0.0003 +[2026-03-02 07:26:54] (step=0031984) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.257875171199374, LR: 0.0003 +[2026-03-02 07:27:02] (step=0031985) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 6.258070827626687, LR: 0.0003 +[2026-03-02 07:27:10] (step=0031986) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.258266484054001, LR: 0.0003 +[2026-03-02 07:27:18] (step=0031987) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.258462140481315, LR: 0.0003 +[2026-03-02 07:27:26] (step=0031988) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 6.258657796908628, LR: 0.0003 +[2026-03-02 07:27:33] (step=0031989) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 6.258853453335942, LR: 0.0003 +[2026-03-02 07:27:41] (step=0031990) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.2590491097632555, LR: 0.0003 +[2026-03-02 07:27:49] (step=0031991) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.2592447661905695, LR: 0.0003 +[2026-03-02 07:27:57] (step=0031992) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.259440422617883, LR: 0.0003 +[2026-03-02 07:28:05] (step=0031993) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.259636079045197, LR: 0.0003 +[2026-03-02 07:28:13] (step=0031994) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.259831735472511, LR: 0.0003 +[2026-03-02 07:28:20] (step=0031995) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.260027391899824, LR: 0.0003 +[2026-03-02 07:28:28] (step=0031996) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.260223048327138, LR: 0.0003 +[2026-03-02 07:28:36] (step=0031997) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.260418704754451, LR: 0.0003 +[2026-03-02 07:28:44] (step=0031998) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.260614361181765, LR: 0.0003 +[2026-03-02 07:28:52] (step=0031999) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.260810017609079, LR: 0.0003 +[2026-03-02 07:29:00] (step=0032000) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.261005674036392, LR: 0.0003 +[2026-03-02 07:29:00] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0032000/ +[2026-03-02 07:29:07] (step=0032001) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 6.261201330463706, LR: 0.0003 +[2026-03-02 07:29:15] (step=0032002) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.261396986891019, LR: 0.0003 +[2026-03-02 07:29:23] (step=0032003) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 6.261592643318333, LR: 0.0003 +[2026-03-02 07:29:31] (step=0032004) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.261788299745646, LR: 0.0003 +[2026-03-02 07:29:39] (step=0032005) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 6.26198395617296, LR: 0.0003 +[2026-03-02 07:29:47] (step=0032006) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.262179612600274, LR: 0.0003 +[2026-03-02 07:29:54] (step=0032007) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.262375269027587, LR: 0.0003 +[2026-03-02 07:30:02] (step=0032008) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.262570925454901, LR: 0.0003 +[2026-03-02 07:30:10] (step=0032009) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 6.2627665818822145, LR: 0.0003 +[2026-03-02 07:30:18] (step=0032010) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.2629622383095285, LR: 0.0003 +[2026-03-02 07:30:26] (step=0032011) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 6.2631578947368425, LR: 0.0003 +[2026-03-02 07:30:34] (step=0032012) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 6.263353551164156, LR: 0.0003 +[2026-03-02 07:30:41] (step=0032013) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.26354920759147, LR: 0.0003 +[2026-03-02 07:30:49] (step=0032014) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.263744864018783, LR: 0.0003 +[2026-03-02 07:30:57] (step=0032015) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.263940520446097, LR: 0.0003 +[2026-03-02 07:31:05] (step=0032016) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 6.264136176873411, LR: 0.0003 +[2026-03-02 07:31:13] (step=0032017) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.264331833300724, LR: 0.0003 +[2026-03-02 07:31:21] (step=0032018) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.264527489728038, LR: 0.0003 +[2026-03-02 07:31:29] (step=0032019) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.264723146155351, LR: 0.0003 +[2026-03-02 07:31:36] (step=0032020) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.264918802582665, LR: 0.0003 +[2026-03-02 07:31:44] (step=0032021) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 6.265114459009978, LR: 0.0003 +[2026-03-02 07:31:52] (step=0032022) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.265310115437292, LR: 0.0003 +[2026-03-02 07:32:00] (step=0032023) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.265505771864606, LR: 0.0003 +[2026-03-02 07:32:08] (step=0032024) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.265701428291919, LR: 0.0003 +[2026-03-02 07:32:16] (step=0032025) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 6.265897084719233, LR: 0.0003 +[2026-03-02 07:32:24] (step=0032026) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.266092741146546, LR: 0.0003 +[2026-03-02 07:32:31] (step=0032027) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 6.26628839757386, LR: 0.0003 +[2026-03-02 07:32:39] (step=0032028) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.266484054001174, LR: 0.0003 +[2026-03-02 07:32:47] (step=0032029) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.2666797104284875, LR: 0.0003 +[2026-03-02 07:32:55] (step=0032030) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.2668753668558015, LR: 0.0003 +[2026-03-02 07:33:03] (step=0032031) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 6.267071023283115, LR: 0.0003 +[2026-03-02 07:33:11] (step=0032032) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.267266679710429, LR: 0.0003 +[2026-03-02 07:33:19] (step=0032033) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.267462336137742, LR: 0.0003 +[2026-03-02 07:33:26] (step=0032034) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 6.267657992565056, LR: 0.0003 +[2026-03-02 07:33:34] (step=0032035) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.26785364899237, LR: 0.0003 +[2026-03-02 07:33:42] (step=0032036) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.268049305419683, LR: 0.0003 +[2026-03-02 07:33:50] (step=0032037) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.268244961846997, LR: 0.0003 +[2026-03-02 07:33:58] (step=0032038) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.26844061827431, LR: 0.0003 +[2026-03-02 07:34:06] (step=0032039) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.268636274701624, LR: 0.0003 +[2026-03-02 07:34:13] (step=0032040) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.268831931128938, LR: 0.0003 +[2026-03-02 07:34:21] (step=0032041) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 6.269027587556251, LR: 0.0003 +[2026-03-02 07:34:29] (step=0032042) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.269223243983565, LR: 0.0003 +[2026-03-02 07:34:37] (step=0032043) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.269418900410878, LR: 0.0003 +[2026-03-02 07:34:45] (step=0032044) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.269614556838192, LR: 0.0003 +[2026-03-02 07:34:53] (step=0032045) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.269810213265505, LR: 0.0003 +[2026-03-02 07:35:00] (step=0032046) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.270005869692819, LR: 0.0003 +[2026-03-02 07:35:08] (step=0032047) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.270201526120133, LR: 0.0003 +[2026-03-02 07:35:16] (step=0032048) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.2703971825474465, LR: 0.0003 +[2026-03-02 07:35:24] (step=0032049) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.2705928389747605, LR: 0.0003 +[2026-03-02 07:35:32] (step=0032050) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 6.270788495402074, LR: 0.0003 +[2026-03-02 07:35:40] (step=0032051) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.270984151829388, LR: 0.0003 +[2026-03-02 07:35:48] (step=0032052) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.271179808256702, LR: 0.0003 +[2026-03-02 07:35:55] (step=0032053) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.271375464684015, LR: 0.0003 +[2026-03-02 07:36:03] (step=0032054) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.271571121111329, LR: 0.0003 +[2026-03-02 07:36:11] (step=0032055) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.271766777538642, LR: 0.0003 +[2026-03-02 07:36:19] (step=0032056) Train Loss: 0.4660, Train Steps/Sec: 0.13, Epoch: 6.271962433965956, LR: 0.0003 +[2026-03-02 07:36:27] (step=0032057) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.272158090393269, LR: 0.0003 +[2026-03-02 07:36:35] (step=0032058) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.272353746820583, LR: 0.0003 +[2026-03-02 07:36:42] (step=0032059) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.272549403247897, LR: 0.0003 +[2026-03-02 07:36:50] (step=0032060) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 6.27274505967521, LR: 0.0003 +[2026-03-02 07:36:58] (step=0032061) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.272940716102524, LR: 0.0003 +[2026-03-02 07:37:06] (step=0032062) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.273136372529837, LR: 0.0003 +[2026-03-02 07:37:14] (step=0032063) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.273332028957151, LR: 0.0003 +[2026-03-02 07:37:22] (step=0032064) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.273527685384465, LR: 0.0003 +[2026-03-02 07:37:29] (step=0032065) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.273723341811778, LR: 0.0003 +[2026-03-02 07:37:37] (step=0032066) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.273918998239092, LR: 0.0003 +[2026-03-02 07:37:45] (step=0032067) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.2741146546664055, LR: 0.0003 +[2026-03-02 07:37:53] (step=0032068) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 6.2743103110937195, LR: 0.0003 +[2026-03-02 07:38:01] (step=0032069) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 6.274505967521033, LR: 0.0003 +[2026-03-02 07:38:09] (step=0032070) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.274701623948347, LR: 0.0003 +[2026-03-02 07:38:17] (step=0032071) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.274897280375661, LR: 0.0003 +[2026-03-02 07:38:24] (step=0032072) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.275092936802974, LR: 0.0003 +[2026-03-02 07:38:32] (step=0032073) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.275288593230288, LR: 0.0003 +[2026-03-02 07:38:40] (step=0032074) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.275484249657601, LR: 0.0003 +[2026-03-02 07:38:48] (step=0032075) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.275679906084915, LR: 0.0003 +[2026-03-02 07:38:56] (step=0032076) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.275875562512229, LR: 0.0003 +[2026-03-02 07:39:04] (step=0032077) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.276071218939542, LR: 0.0003 +[2026-03-02 07:39:11] (step=0032078) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.276266875366856, LR: 0.0003 +[2026-03-02 07:39:19] (step=0032079) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 6.276462531794169, LR: 0.0003 +[2026-03-02 07:39:27] (step=0032080) Train Loss: 0.4436, Train Steps/Sec: 0.12, Epoch: 6.276658188221483, LR: 0.0003 +[2026-03-02 07:39:35] (step=0032081) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.276853844648797, LR: 0.0003 +[2026-03-02 07:39:43] (step=0032082) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.27704950107611, LR: 0.0003 +[2026-03-02 07:39:51] (step=0032083) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.277245157503424, LR: 0.0003 +[2026-03-02 07:39:58] (step=0032084) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.277440813930737, LR: 0.0003 +[2026-03-02 07:40:06] (step=0032085) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.277636470358051, LR: 0.0003 +[2026-03-02 07:40:14] (step=0032086) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 6.2778321267853645, LR: 0.0003 +[2026-03-02 07:40:22] (step=0032087) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.2780277832126785, LR: 0.0003 +[2026-03-02 07:40:30] (step=0032088) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.2782234396399925, LR: 0.0003 +[2026-03-02 07:40:38] (step=0032089) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.278419096067306, LR: 0.0003 +[2026-03-02 07:40:45] (step=0032090) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.27861475249462, LR: 0.0003 +[2026-03-02 07:40:53] (step=0032091) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 6.278810408921933, LR: 0.0003 +[2026-03-02 07:41:01] (step=0032092) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.279006065349247, LR: 0.0003 +[2026-03-02 07:41:09] (step=0032093) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 6.279201721776561, LR: 0.0003 +[2026-03-02 07:41:17] (step=0032094) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.279397378203874, LR: 0.0003 +[2026-03-02 07:41:25] (step=0032095) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 6.279593034631188, LR: 0.0003 +[2026-03-02 07:41:32] (step=0032096) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.279788691058501, LR: 0.0003 +[2026-03-02 07:41:40] (step=0032097) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.279984347485815, LR: 0.0003 +[2026-03-02 07:41:48] (step=0032098) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.280180003913128, LR: 0.0003 +[2026-03-02 07:41:56] (step=0032099) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.280375660340442, LR: 0.0003 +[2026-03-02 07:42:04] (step=0032100) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.280571316767756, LR: 0.0003 +[2026-03-02 07:42:12] (step=0032101) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.280766973195069, LR: 0.0003 +[2026-03-02 07:42:19] (step=0032102) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.280962629622383, LR: 0.0003 +[2026-03-02 07:42:27] (step=0032103) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.281158286049696, LR: 0.0003 +[2026-03-02 07:42:35] (step=0032104) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.28135394247701, LR: 0.0003 +[2026-03-02 07:42:43] (step=0032105) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.281549598904324, LR: 0.0003 +[2026-03-02 07:42:51] (step=0032106) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.2817452553316375, LR: 0.0003 +[2026-03-02 07:42:59] (step=0032107) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.2819409117589515, LR: 0.0003 +[2026-03-02 07:43:06] (step=0032108) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.282136568186265, LR: 0.0003 +[2026-03-02 07:43:14] (step=0032109) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.282332224613579, LR: 0.0003 +[2026-03-02 07:43:22] (step=0032110) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.282527881040892, LR: 0.0003 +[2026-03-02 07:43:30] (step=0032111) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.282723537468206, LR: 0.0003 +[2026-03-02 07:43:38] (step=0032112) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.28291919389552, LR: 0.0003 +[2026-03-02 07:43:46] (step=0032113) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.283114850322833, LR: 0.0003 +[2026-03-02 07:43:53] (step=0032114) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.283310506750147, LR: 0.0003 +[2026-03-02 07:44:01] (step=0032115) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.28350616317746, LR: 0.0003 +[2026-03-02 07:44:09] (step=0032116) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 6.283701819604774, LR: 0.0003 +[2026-03-02 07:44:17] (step=0032117) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.283897476032088, LR: 0.0003 +[2026-03-02 07:44:25] (step=0032118) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.284093132459401, LR: 0.0003 +[2026-03-02 07:44:33] (step=0032119) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 6.284288788886715, LR: 0.0003 +[2026-03-02 07:44:41] (step=0032120) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.284484445314028, LR: 0.0003 +[2026-03-02 07:44:48] (step=0032121) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.284680101741342, LR: 0.0003 +[2026-03-02 07:44:56] (step=0032122) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 6.284875758168655, LR: 0.0003 +[2026-03-02 07:45:04] (step=0032123) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.285071414595969, LR: 0.0003 +[2026-03-02 07:45:12] (step=0032124) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 6.285267071023283, LR: 0.0003 +[2026-03-02 07:45:20] (step=0032125) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 6.2854627274505965, LR: 0.0003 +[2026-03-02 07:45:28] (step=0032126) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 6.2856583838779105, LR: 0.0003 +[2026-03-02 07:45:35] (step=0032127) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.285854040305224, LR: 0.0003 +[2026-03-02 07:45:43] (step=0032128) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 6.286049696732538, LR: 0.0003 +[2026-03-02 07:45:51] (step=0032129) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 6.286245353159852, LR: 0.0003 +[2026-03-02 07:45:59] (step=0032130) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.286441009587165, LR: 0.0003 +[2026-03-02 07:46:07] (step=0032131) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.286636666014479, LR: 0.0003 +[2026-03-02 07:46:15] (step=0032132) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.286832322441792, LR: 0.0003 +[2026-03-02 07:46:23] (step=0032133) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.287027978869106, LR: 0.0003 +[2026-03-02 07:46:30] (step=0032134) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.28722363529642, LR: 0.0003 +[2026-03-02 07:46:38] (step=0032135) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.287419291723733, LR: 0.0003 +[2026-03-02 07:46:46] (step=0032136) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 6.287614948151047, LR: 0.0003 +[2026-03-02 07:46:54] (step=0032137) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 6.28781060457836, LR: 0.0003 +[2026-03-02 07:47:02] (step=0032138) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.288006261005674, LR: 0.0003 +[2026-03-02 07:47:10] (step=0032139) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.288201917432987, LR: 0.0003 +[2026-03-02 07:47:17] (step=0032140) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.288397573860301, LR: 0.0003 +[2026-03-02 07:47:25] (step=0032141) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 6.288593230287615, LR: 0.0003 +[2026-03-02 07:47:33] (step=0032142) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.288788886714928, LR: 0.0003 +[2026-03-02 07:47:41] (step=0032143) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.288984543142242, LR: 0.0003 +[2026-03-02 07:47:49] (step=0032144) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.2891801995695555, LR: 0.0003 +[2026-03-02 07:47:57] (step=0032145) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 6.2893758559968695, LR: 0.0003 +[2026-03-02 07:48:04] (step=0032146) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 6.2895715124241836, LR: 0.0003 +[2026-03-02 07:48:12] (step=0032147) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 6.289767168851497, LR: 0.0003 +[2026-03-02 07:48:20] (step=0032148) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.289962825278811, LR: 0.0003 +[2026-03-02 07:48:28] (step=0032149) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.290158481706124, LR: 0.0003 +[2026-03-02 07:48:36] (step=0032150) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 6.290354138133438, LR: 0.0003 +[2026-03-02 07:48:44] (step=0032151) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.290549794560751, LR: 0.0003 +[2026-03-02 07:48:51] (step=0032152) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.290745450988065, LR: 0.0003 +[2026-03-02 07:48:59] (step=0032153) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.290941107415379, LR: 0.0003 +[2026-03-02 07:49:07] (step=0032154) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.291136763842692, LR: 0.0003 +[2026-03-02 07:49:15] (step=0032155) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.291332420270006, LR: 0.0003 +[2026-03-02 07:49:23] (step=0032156) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.291528076697319, LR: 0.0003 +[2026-03-02 07:49:31] (step=0032157) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.291723733124633, LR: 0.0003 +[2026-03-02 07:49:38] (step=0032158) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.291919389551947, LR: 0.0003 +[2026-03-02 07:49:46] (step=0032159) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.29211504597926, LR: 0.0003 +[2026-03-02 07:49:54] (step=0032160) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.292310702406574, LR: 0.0003 +[2026-03-02 07:50:02] (step=0032161) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.292506358833887, LR: 0.0003 +[2026-03-02 07:50:10] (step=0032162) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 6.292702015261201, LR: 0.0003 +[2026-03-02 07:50:18] (step=0032163) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 6.2928976716885145, LR: 0.0003 +[2026-03-02 07:50:25] (step=0032164) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.2930933281158286, LR: 0.0003 +[2026-03-02 07:50:33] (step=0032165) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.293288984543143, LR: 0.0003 +[2026-03-02 07:50:41] (step=0032166) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.293484640970456, LR: 0.0003 +[2026-03-02 07:50:49] (step=0032167) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 6.29368029739777, LR: 0.0003 +[2026-03-02 07:50:57] (step=0032168) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.293875953825083, LR: 0.0003 +[2026-03-02 07:51:05] (step=0032169) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.294071610252397, LR: 0.0003 +[2026-03-02 07:51:13] (step=0032170) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.294267266679711, LR: 0.0003 +[2026-03-02 07:51:20] (step=0032171) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.294462923107024, LR: 0.0003 +[2026-03-02 07:51:28] (step=0032172) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 6.294658579534338, LR: 0.0003 +[2026-03-02 07:51:36] (step=0032173) Train Loss: 0.4238, Train Steps/Sec: 0.13, Epoch: 6.294854235961651, LR: 0.0003 +[2026-03-02 07:51:44] (step=0032174) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.295049892388965, LR: 0.0003 +[2026-03-02 07:51:52] (step=0032175) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.295245548816278, LR: 0.0003 +[2026-03-02 07:52:00] (step=0032176) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.295441205243592, LR: 0.0003 +[2026-03-02 07:52:07] (step=0032177) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.295636861670906, LR: 0.0003 +[2026-03-02 07:52:15] (step=0032178) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.295832518098219, LR: 0.0003 +[2026-03-02 07:52:23] (step=0032179) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.296028174525533, LR: 0.0003 +[2026-03-02 07:52:31] (step=0032180) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.296223830952846, LR: 0.0003 +[2026-03-02 07:52:39] (step=0032181) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.29641948738016, LR: 0.0003 +[2026-03-02 07:52:47] (step=0032182) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.2966151438074744, LR: 0.0003 +[2026-03-02 07:52:55] (step=0032183) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.296810800234788, LR: 0.0003 +[2026-03-02 07:53:02] (step=0032184) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 6.297006456662102, LR: 0.0003 +[2026-03-02 07:53:10] (step=0032185) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.297202113089415, LR: 0.0003 +[2026-03-02 07:53:18] (step=0032186) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.297397769516729, LR: 0.0003 +[2026-03-02 07:53:26] (step=0032187) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.297593425944043, LR: 0.0003 +[2026-03-02 07:53:34] (step=0032188) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 6.297789082371356, LR: 0.0003 +[2026-03-02 07:53:41] (step=0032189) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.29798473879867, LR: 0.0003 +[2026-03-02 07:53:49] (step=0032190) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.298180395225983, LR: 0.0003 +[2026-03-02 07:53:57] (step=0032191) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.298376051653297, LR: 0.0003 +[2026-03-02 07:54:05] (step=0032192) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 6.29857170808061, LR: 0.0003 +[2026-03-02 07:54:13] (step=0032193) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 6.298767364507924, LR: 0.0003 +[2026-03-02 07:54:21] (step=0032194) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.298963020935238, LR: 0.0003 +[2026-03-02 07:54:28] (step=0032195) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.299158677362551, LR: 0.0003 +[2026-03-02 07:54:36] (step=0032196) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.299354333789865, LR: 0.0003 +[2026-03-02 07:54:44] (step=0032197) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.299549990217178, LR: 0.0003 +[2026-03-02 07:54:52] (step=0032198) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.299745646644492, LR: 0.0003 +[2026-03-02 07:55:00] (step=0032199) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.299941303071806, LR: 0.0003 +[2026-03-02 07:55:08] (step=0032200) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.3001369594991194, LR: 0.0003 +[2026-03-02 07:55:15] (step=0032201) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.3003326159264335, LR: 0.0003 +[2026-03-02 07:55:23] (step=0032202) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.300528272353747, LR: 0.0003 +[2026-03-02 07:55:31] (step=0032203) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.300723928781061, LR: 0.0003 +[2026-03-02 07:55:39] (step=0032204) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.300919585208374, LR: 0.0003 +[2026-03-02 07:55:47] (step=0032205) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.301115241635688, LR: 0.0003 +[2026-03-02 07:55:55] (step=0032206) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 6.301310898063002, LR: 0.0003 +[2026-03-02 07:56:02] (step=0032207) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.301506554490315, LR: 0.0003 +[2026-03-02 07:56:10] (step=0032208) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.301702210917629, LR: 0.0003 +[2026-03-02 07:56:18] (step=0032209) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 6.301897867344942, LR: 0.0003 +[2026-03-02 07:56:26] (step=0032210) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.302093523772256, LR: 0.0003 +[2026-03-02 07:56:34] (step=0032211) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.30228918019957, LR: 0.0003 +[2026-03-02 07:56:42] (step=0032212) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.302484836626883, LR: 0.0003 +[2026-03-02 07:56:49] (step=0032213) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.302680493054197, LR: 0.0003 +[2026-03-02 07:56:57] (step=0032214) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.30287614948151, LR: 0.0003 +[2026-03-02 07:57:05] (step=0032215) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.303071805908824, LR: 0.0003 +[2026-03-02 07:57:13] (step=0032216) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 6.303267462336137, LR: 0.0003 +[2026-03-02 07:57:21] (step=0032217) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.303463118763451, LR: 0.0003 +[2026-03-02 07:57:29] (step=0032218) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.303658775190765, LR: 0.0003 +[2026-03-02 07:57:36] (step=0032219) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 6.3038544316180785, LR: 0.0003 +[2026-03-02 07:57:44] (step=0032220) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.3040500880453925, LR: 0.0003 +[2026-03-02 07:57:52] (step=0032221) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 6.304245744472706, LR: 0.0003 +[2026-03-02 07:58:00] (step=0032222) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 6.30444140090002, LR: 0.0003 +[2026-03-02 07:58:08] (step=0032223) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.304637057327334, LR: 0.0003 +[2026-03-02 07:58:16] (step=0032224) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.304832713754647, LR: 0.0003 +[2026-03-02 07:58:24] (step=0032225) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.305028370181961, LR: 0.0003 +[2026-03-02 07:58:31] (step=0032226) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.305224026609274, LR: 0.0003 +[2026-03-02 07:58:39] (step=0032227) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 6.305419683036588, LR: 0.0003 +[2026-03-02 07:58:47] (step=0032228) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.305615339463901, LR: 0.0003 +[2026-03-02 07:58:55] (step=0032229) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.305810995891215, LR: 0.0003 +[2026-03-02 07:59:03] (step=0032230) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 6.306006652318529, LR: 0.0003 +[2026-03-02 07:59:11] (step=0032231) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.306202308745842, LR: 0.0003 +[2026-03-02 07:59:19] (step=0032232) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.306397965173156, LR: 0.0003 +[2026-03-02 07:59:26] (step=0032233) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.306593621600469, LR: 0.0003 +[2026-03-02 07:59:34] (step=0032234) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.306789278027783, LR: 0.0003 +[2026-03-02 07:59:42] (step=0032235) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.306984934455097, LR: 0.0003 +[2026-03-02 07:59:50] (step=0032236) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.30718059088241, LR: 0.0003 +[2026-03-02 07:59:58] (step=0032237) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.307376247309724, LR: 0.0003 +[2026-03-02 08:00:06] (step=0032238) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.3075719037370375, LR: 0.0003 +[2026-03-02 08:00:13] (step=0032239) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.3077675601643515, LR: 0.0003 +[2026-03-02 08:00:21] (step=0032240) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.3079632165916655, LR: 0.0003 +[2026-03-02 08:00:29] (step=0032241) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.308158873018979, LR: 0.0003 +[2026-03-02 08:00:37] (step=0032242) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.308354529446293, LR: 0.0003 +[2026-03-02 08:00:45] (step=0032243) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 6.308550185873606, LR: 0.0003 +[2026-03-02 08:00:53] (step=0032244) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.30874584230092, LR: 0.0003 +[2026-03-02 08:01:00] (step=0032245) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.308941498728233, LR: 0.0003 +[2026-03-02 08:01:08] (step=0032246) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 6.309137155155547, LR: 0.0003 +[2026-03-02 08:01:16] (step=0032247) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.309332811582861, LR: 0.0003 +[2026-03-02 08:01:24] (step=0032248) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.309528468010174, LR: 0.0003 +[2026-03-02 08:01:32] (step=0032249) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.309724124437488, LR: 0.0003 +[2026-03-02 08:01:40] (step=0032250) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.309919780864801, LR: 0.0003 +[2026-03-02 08:01:48] (step=0032251) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.310115437292115, LR: 0.0003 +[2026-03-02 08:01:55] (step=0032252) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.310311093719429, LR: 0.0003 +[2026-03-02 08:02:03] (step=0032253) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.310506750146742, LR: 0.0003 +[2026-03-02 08:02:11] (step=0032254) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.310702406574056, LR: 0.0003 +[2026-03-02 08:02:19] (step=0032255) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 6.310898063001369, LR: 0.0003 +[2026-03-02 08:02:27] (step=0032256) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 6.311093719428683, LR: 0.0003 +[2026-03-02 08:02:35] (step=0032257) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 6.3112893758559965, LR: 0.0003 +[2026-03-02 08:02:42] (step=0032258) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.3114850322833105, LR: 0.0003 +[2026-03-02 08:02:50] (step=0032259) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 6.3116806887106245, LR: 0.0003 +[2026-03-02 08:02:58] (step=0032260) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.311876345137938, LR: 0.0003 +[2026-03-02 08:03:06] (step=0032261) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.312072001565252, LR: 0.0003 +[2026-03-02 08:03:14] (step=0032262) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.312267657992565, LR: 0.0003 +[2026-03-02 08:03:22] (step=0032263) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.312463314419879, LR: 0.0003 +[2026-03-02 08:03:29] (step=0032264) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.312658970847193, LR: 0.0003 +[2026-03-02 08:03:37] (step=0032265) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.312854627274506, LR: 0.0003 +[2026-03-02 08:03:45] (step=0032266) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.31305028370182, LR: 0.0003 +[2026-03-02 08:03:53] (step=0032267) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.313245940129133, LR: 0.0003 +[2026-03-02 08:04:01] (step=0032268) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 6.313441596556447, LR: 0.0003 +[2026-03-02 08:04:09] (step=0032269) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.31363725298376, LR: 0.0003 +[2026-03-02 08:04:16] (step=0032270) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.313832909411074, LR: 0.0003 +[2026-03-02 08:04:24] (step=0032271) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 6.314028565838388, LR: 0.0003 +[2026-03-02 08:04:32] (step=0032272) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 6.314224222265701, LR: 0.0003 +[2026-03-02 08:04:40] (step=0032273) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.314419878693015, LR: 0.0003 +[2026-03-02 08:04:48] (step=0032274) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.314615535120328, LR: 0.0003 +[2026-03-02 08:04:56] (step=0032275) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.314811191547642, LR: 0.0003 +[2026-03-02 08:05:04] (step=0032276) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 6.315006847974956, LR: 0.0003 +[2026-03-02 08:05:11] (step=0032277) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.3152025044022695, LR: 0.0003 +[2026-03-02 08:05:19] (step=0032278) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.3153981608295835, LR: 0.0003 +[2026-03-02 08:05:27] (step=0032279) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.315593817256897, LR: 0.0003 +[2026-03-02 08:05:35] (step=0032280) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.315789473684211, LR: 0.0003 +[2026-03-02 08:05:43] (step=0032281) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.315985130111524, LR: 0.0003 +[2026-03-02 08:05:51] (step=0032282) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.316180786538838, LR: 0.0003 +[2026-03-02 08:05:59] (step=0032283) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.316376442966152, LR: 0.0003 +[2026-03-02 08:06:06] (step=0032284) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.316572099393465, LR: 0.0003 +[2026-03-02 08:06:14] (step=0032285) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 6.316767755820779, LR: 0.0003 +[2026-03-02 08:06:22] (step=0032286) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 6.316963412248092, LR: 0.0003 +[2026-03-02 08:06:30] (step=0032287) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.317159068675406, LR: 0.0003 +[2026-03-02 08:06:38] (step=0032288) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.31735472510272, LR: 0.0003 +[2026-03-02 08:06:46] (step=0032289) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.317550381530033, LR: 0.0003 +[2026-03-02 08:06:53] (step=0032290) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 6.317746037957347, LR: 0.0003 +[2026-03-02 08:07:01] (step=0032291) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.31794169438466, LR: 0.0003 +[2026-03-02 08:07:09] (step=0032292) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.318137350811974, LR: 0.0003 +[2026-03-02 08:07:17] (step=0032293) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.318333007239287, LR: 0.0003 +[2026-03-02 08:07:25] (step=0032294) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 6.318528663666601, LR: 0.0003 +[2026-03-02 08:07:33] (step=0032295) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 6.318724320093915, LR: 0.0003 +[2026-03-02 08:07:40] (step=0032296) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.3189199765212285, LR: 0.0003 +[2026-03-02 08:07:48] (step=0032297) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.3191156329485425, LR: 0.0003 +[2026-03-02 08:07:56] (step=0032298) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 6.319311289375856, LR: 0.0003 +[2026-03-02 08:08:04] (step=0032299) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.31950694580317, LR: 0.0003 +[2026-03-02 08:08:12] (step=0032300) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.319702602230484, LR: 0.0003 +[2026-03-02 08:08:20] (step=0032301) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.319898258657797, LR: 0.0003 +[2026-03-02 08:08:27] (step=0032302) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.320093915085111, LR: 0.0003 +[2026-03-02 08:08:35] (step=0032303) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 6.320289571512424, LR: 0.0003 +[2026-03-02 08:08:43] (step=0032304) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.320485227939738, LR: 0.0003 +[2026-03-02 08:08:51] (step=0032305) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.320680884367052, LR: 0.0003 +[2026-03-02 08:08:59] (step=0032306) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.320876540794365, LR: 0.0003 +[2026-03-02 08:09:07] (step=0032307) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 6.321072197221679, LR: 0.0003 +[2026-03-02 08:09:14] (step=0032308) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.321267853648992, LR: 0.0003 +[2026-03-02 08:09:22] (step=0032309) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 6.321463510076306, LR: 0.0003 +[2026-03-02 08:09:30] (step=0032310) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.321659166503619, LR: 0.0003 +[2026-03-02 08:09:38] (step=0032311) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.321854822930933, LR: 0.0003 +[2026-03-02 08:09:46] (step=0032312) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.322050479358247, LR: 0.0003 +[2026-03-02 08:09:54] (step=0032313) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.32224613578556, LR: 0.0003 +[2026-03-02 08:10:01] (step=0032314) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.322441792212874, LR: 0.0003 +[2026-03-02 08:10:09] (step=0032315) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.3226374486401875, LR: 0.0003 +[2026-03-02 08:10:17] (step=0032316) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.3228331050675015, LR: 0.0003 +[2026-03-02 08:10:25] (step=0032317) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.3230287614948155, LR: 0.0003 +[2026-03-02 08:10:33] (step=0032318) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.323224417922129, LR: 0.0003 +[2026-03-02 08:10:41] (step=0032319) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.323420074349443, LR: 0.0003 +[2026-03-02 08:10:49] (step=0032320) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 6.323615730776756, LR: 0.0003 +[2026-03-02 08:10:56] (step=0032321) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.32381138720407, LR: 0.0003 +[2026-03-02 08:11:04] (step=0032322) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.324007043631383, LR: 0.0003 +[2026-03-02 08:11:12] (step=0032323) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.324202700058697, LR: 0.0003 +[2026-03-02 08:11:20] (step=0032324) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.324398356486011, LR: 0.0003 +[2026-03-02 08:11:28] (step=0032325) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 6.324594012913324, LR: 0.0003 +[2026-03-02 08:11:36] (step=0032326) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.324789669340638, LR: 0.0003 +[2026-03-02 08:11:43] (step=0032327) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 6.324985325767951, LR: 0.0003 +[2026-03-02 08:11:51] (step=0032328) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.325180982195265, LR: 0.0003 +[2026-03-02 08:11:59] (step=0032329) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.325376638622579, LR: 0.0003 +[2026-03-02 08:12:07] (step=0032330) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.325572295049892, LR: 0.0003 +[2026-03-02 08:12:15] (step=0032331) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.325767951477206, LR: 0.0003 +[2026-03-02 08:12:23] (step=0032332) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.325963607904519, LR: 0.0003 +[2026-03-02 08:12:30] (step=0032333) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.326159264331833, LR: 0.0003 +[2026-03-02 08:12:38] (step=0032334) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.3263549207591465, LR: 0.0003 +[2026-03-02 08:12:46] (step=0032335) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.3265505771864605, LR: 0.0003 +[2026-03-02 08:12:54] (step=0032336) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.3267462336137745, LR: 0.0003 +[2026-03-02 08:13:02] (step=0032337) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 6.326941890041088, LR: 0.0003 +[2026-03-02 08:13:10] (step=0032338) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 6.327137546468402, LR: 0.0003 +[2026-03-02 08:13:18] (step=0032339) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 6.327333202895715, LR: 0.0003 +[2026-03-02 08:13:25] (step=0032340) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.327528859323029, LR: 0.0003 +[2026-03-02 08:13:33] (step=0032341) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.327724515750343, LR: 0.0003 +[2026-03-02 08:13:41] (step=0032342) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.327920172177656, LR: 0.0003 +[2026-03-02 08:13:49] (step=0032343) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.32811582860497, LR: 0.0003 +[2026-03-02 08:13:57] (step=0032344) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 6.328311485032283, LR: 0.0003 +[2026-03-02 08:14:05] (step=0032345) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 6.328507141459597, LR: 0.0003 +[2026-03-02 08:14:12] (step=0032346) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.32870279788691, LR: 0.0003 +[2026-03-02 08:14:20] (step=0032347) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.328898454314224, LR: 0.0003 +[2026-03-02 08:14:28] (step=0032348) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.329094110741538, LR: 0.0003 +[2026-03-02 08:14:36] (step=0032349) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.329289767168851, LR: 0.0003 +[2026-03-02 08:14:44] (step=0032350) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.329485423596165, LR: 0.0003 +[2026-03-02 08:14:52] (step=0032351) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.329681080023478, LR: 0.0003 +[2026-03-02 08:14:59] (step=0032352) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 6.329876736450792, LR: 0.0003 +[2026-03-02 08:15:07] (step=0032353) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.330072392878106, LR: 0.0003 +[2026-03-02 08:15:15] (step=0032354) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.3302680493054195, LR: 0.0003 +[2026-03-02 08:15:23] (step=0032355) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.3304637057327335, LR: 0.0003 +[2026-03-02 08:15:31] (step=0032356) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.330659362160047, LR: 0.0003 +[2026-03-02 08:15:39] (step=0032357) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.330855018587361, LR: 0.0003 +[2026-03-02 08:15:46] (step=0032358) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.331050675014675, LR: 0.0003 +[2026-03-02 08:15:54] (step=0032359) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.331246331441988, LR: 0.0003 +[2026-03-02 08:16:02] (step=0032360) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.331441987869302, LR: 0.0003 +[2026-03-02 08:16:10] (step=0032361) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.331637644296615, LR: 0.0003 +[2026-03-02 08:16:18] (step=0032362) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.331833300723929, LR: 0.0003 +[2026-03-02 08:16:26] (step=0032363) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 6.332028957151242, LR: 0.0003 +[2026-03-02 08:16:34] (step=0032364) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.332224613578556, LR: 0.0003 +[2026-03-02 08:16:41] (step=0032365) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 6.33242027000587, LR: 0.0003 +[2026-03-02 08:16:49] (step=0032366) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.332615926433183, LR: 0.0003 +[2026-03-02 08:16:57] (step=0032367) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.332811582860497, LR: 0.0003 +[2026-03-02 08:17:05] (step=0032368) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.33300723928781, LR: 0.0003 +[2026-03-02 08:17:13] (step=0032369) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.333202895715124, LR: 0.0003 +[2026-03-02 08:17:21] (step=0032370) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 6.333398552142438, LR: 0.0003 +[2026-03-02 08:17:29] (step=0032371) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.333594208569751, LR: 0.0003 +[2026-03-02 08:17:36] (step=0032372) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.333789864997065, LR: 0.0003 +[2026-03-02 08:17:44] (step=0032373) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.3339855214243785, LR: 0.0003 +[2026-03-02 08:17:52] (step=0032374) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.3341811778516925, LR: 0.0003 +[2026-03-02 08:18:00] (step=0032375) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.334376834279006, LR: 0.0003 +[2026-03-02 08:18:08] (step=0032376) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.33457249070632, LR: 0.0003 +[2026-03-02 08:18:16] (step=0032377) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.334768147133634, LR: 0.0003 +[2026-03-02 08:18:24] (step=0032378) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 6.334963803560947, LR: 0.0003 +[2026-03-02 08:18:31] (step=0032379) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.335159459988261, LR: 0.0003 +[2026-03-02 08:18:39] (step=0032380) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 6.335355116415574, LR: 0.0003 +[2026-03-02 08:18:47] (step=0032381) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 6.335550772842888, LR: 0.0003 +[2026-03-02 08:18:55] (step=0032382) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.335746429270202, LR: 0.0003 +[2026-03-02 08:19:03] (step=0032383) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.335942085697515, LR: 0.0003 +[2026-03-02 08:19:11] (step=0032384) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.336137742124829, LR: 0.0003 +[2026-03-02 08:19:18] (step=0032385) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.336333398552142, LR: 0.0003 +[2026-03-02 08:19:26] (step=0032386) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.336529054979456, LR: 0.0003 +[2026-03-02 08:19:34] (step=0032387) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.336724711406769, LR: 0.0003 +[2026-03-02 08:19:42] (step=0032388) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 6.336920367834083, LR: 0.0003 +[2026-03-02 08:19:50] (step=0032389) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.337116024261397, LR: 0.0003 +[2026-03-02 08:19:58] (step=0032390) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.33731168068871, LR: 0.0003 +[2026-03-02 08:20:06] (step=0032391) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.337507337116024, LR: 0.0003 +[2026-03-02 08:20:13] (step=0032392) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 6.3377029935433375, LR: 0.0003 +[2026-03-02 08:20:21] (step=0032393) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.3378986499706516, LR: 0.0003 +[2026-03-02 08:20:29] (step=0032394) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 6.338094306397966, LR: 0.0003 +[2026-03-02 08:20:37] (step=0032395) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.338289962825279, LR: 0.0003 +[2026-03-02 08:20:45] (step=0032396) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.338485619252593, LR: 0.0003 +[2026-03-02 08:20:53] (step=0032397) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.338681275679906, LR: 0.0003 +[2026-03-02 08:21:00] (step=0032398) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.33887693210722, LR: 0.0003 +[2026-03-02 08:21:08] (step=0032399) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.339072588534533, LR: 0.0003 +[2026-03-02 08:21:16] (step=0032400) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.339268244961847, LR: 0.0003 +[2026-03-02 08:21:24] (step=0032401) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.339463901389161, LR: 0.0003 +[2026-03-02 08:21:32] (step=0032402) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.339659557816474, LR: 0.0003 +[2026-03-02 08:21:40] (step=0032403) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.339855214243788, LR: 0.0003 +[2026-03-02 08:21:47] (step=0032404) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.340050870671101, LR: 0.0003 +[2026-03-02 08:21:55] (step=0032405) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.340246527098415, LR: 0.0003 +[2026-03-02 08:22:03] (step=0032406) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.340442183525729, LR: 0.0003 +[2026-03-02 08:22:11] (step=0032407) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.340637839953042, LR: 0.0003 +[2026-03-02 08:22:19] (step=0032408) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.340833496380356, LR: 0.0003 +[2026-03-02 08:22:27] (step=0032409) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.341029152807669, LR: 0.0003 +[2026-03-02 08:22:35] (step=0032410) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.341224809234983, LR: 0.0003 +[2026-03-02 08:22:42] (step=0032411) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.3414204656622974, LR: 0.0003 +[2026-03-02 08:22:50] (step=0032412) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.341616122089611, LR: 0.0003 +[2026-03-02 08:22:58] (step=0032413) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 6.341811778516925, LR: 0.0003 +[2026-03-02 08:23:06] (step=0032414) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.342007434944238, LR: 0.0003 +[2026-03-02 08:23:14] (step=0032415) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.342203091371552, LR: 0.0003 +[2026-03-02 08:23:22] (step=0032416) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.342398747798865, LR: 0.0003 +[2026-03-02 08:23:29] (step=0032417) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.342594404226179, LR: 0.0003 +[2026-03-02 08:23:37] (step=0032418) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.342790060653493, LR: 0.0003 +[2026-03-02 08:23:45] (step=0032419) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.342985717080806, LR: 0.0003 +[2026-03-02 08:23:53] (step=0032420) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 6.34318137350812, LR: 0.0003 +[2026-03-02 08:24:01] (step=0032421) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 6.343377029935433, LR: 0.0003 +[2026-03-02 08:24:09] (step=0032422) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 6.343572686362747, LR: 0.0003 +[2026-03-02 08:24:16] (step=0032423) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.343768342790061, LR: 0.0003 +[2026-03-02 08:24:24] (step=0032424) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.343963999217374, LR: 0.0003 +[2026-03-02 08:24:32] (step=0032425) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 6.344159655644688, LR: 0.0003 +[2026-03-02 08:24:40] (step=0032426) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.344355312072001, LR: 0.0003 +[2026-03-02 08:24:48] (step=0032427) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.344550968499315, LR: 0.0003 +[2026-03-02 08:24:56] (step=0032428) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.344746624926628, LR: 0.0003 +[2026-03-02 08:25:04] (step=0032429) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.3449422813539424, LR: 0.0003 +[2026-03-02 08:25:11] (step=0032430) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.3451379377812565, LR: 0.0003 +[2026-03-02 08:25:19] (step=0032431) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.34533359420857, LR: 0.0003 +[2026-03-02 08:25:27] (step=0032432) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.345529250635884, LR: 0.0003 +[2026-03-02 08:25:35] (step=0032433) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.345724907063197, LR: 0.0003 +[2026-03-02 08:25:43] (step=0032434) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.345920563490511, LR: 0.0003 +[2026-03-02 08:25:51] (step=0032435) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.346116219917825, LR: 0.0003 +[2026-03-02 08:25:58] (step=0032436) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.346311876345138, LR: 0.0003 +[2026-03-02 08:26:06] (step=0032437) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 6.346507532772452, LR: 0.0003 +[2026-03-02 08:26:14] (step=0032438) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.346703189199765, LR: 0.0003 +[2026-03-02 08:26:22] (step=0032439) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 6.346898845627079, LR: 0.0003 +[2026-03-02 08:26:30] (step=0032440) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.347094502054392, LR: 0.0003 +[2026-03-02 08:26:38] (step=0032441) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.347290158481706, LR: 0.0003 +[2026-03-02 08:26:45] (step=0032442) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.34748581490902, LR: 0.0003 +[2026-03-02 08:26:53] (step=0032443) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.347681471336333, LR: 0.0003 +[2026-03-02 08:27:01] (step=0032444) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.347877127763647, LR: 0.0003 +[2026-03-02 08:27:09] (step=0032445) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.34807278419096, LR: 0.0003 +[2026-03-02 08:27:17] (step=0032446) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 6.348268440618274, LR: 0.0003 +[2026-03-02 08:27:25] (step=0032447) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.348464097045588, LR: 0.0003 +[2026-03-02 08:27:32] (step=0032448) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 6.3486597534729015, LR: 0.0003 +[2026-03-02 08:27:40] (step=0032449) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.3488554099002155, LR: 0.0003 +[2026-03-02 08:27:48] (step=0032450) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.349051066327529, LR: 0.0003 +[2026-03-02 08:27:56] (step=0032451) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.349246722754843, LR: 0.0003 +[2026-03-02 08:28:04] (step=0032452) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.349442379182156, LR: 0.0003 +[2026-03-02 08:28:12] (step=0032453) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 6.34963803560947, LR: 0.0003 +[2026-03-02 08:28:19] (step=0032454) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.349833692036784, LR: 0.0003 +[2026-03-02 08:28:27] (step=0032455) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.350029348464097, LR: 0.0003 +[2026-03-02 08:28:35] (step=0032456) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 6.350225004891411, LR: 0.0003 +[2026-03-02 08:28:43] (step=0032457) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.350420661318724, LR: 0.0003 +[2026-03-02 08:28:51] (step=0032458) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 6.350616317746038, LR: 0.0003 +[2026-03-02 08:28:59] (step=0032459) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.350811974173352, LR: 0.0003 +[2026-03-02 08:29:06] (step=0032460) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.351007630600665, LR: 0.0003 +[2026-03-02 08:29:14] (step=0032461) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 6.351203287027979, LR: 0.0003 +[2026-03-02 08:29:22] (step=0032462) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.351398943455292, LR: 0.0003 +[2026-03-02 08:29:30] (step=0032463) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.351594599882606, LR: 0.0003 +[2026-03-02 08:29:38] (step=0032464) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.35179025630992, LR: 0.0003 +[2026-03-02 08:29:46] (step=0032465) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.351985912737233, LR: 0.0003 +[2026-03-02 08:29:53] (step=0032466) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.352181569164547, LR: 0.0003 +[2026-03-02 08:30:01] (step=0032467) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.3523772255918605, LR: 0.0003 +[2026-03-02 08:30:09] (step=0032468) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.3525728820191745, LR: 0.0003 +[2026-03-02 08:30:17] (step=0032469) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.352768538446488, LR: 0.0003 +[2026-03-02 08:30:25] (step=0032470) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.352964194873802, LR: 0.0003 +[2026-03-02 08:30:33] (step=0032471) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.353159851301116, LR: 0.0003 +[2026-03-02 08:30:40] (step=0032472) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.353355507728429, LR: 0.0003 +[2026-03-02 08:30:48] (step=0032473) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.353551164155743, LR: 0.0003 +[2026-03-02 08:30:56] (step=0032474) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.353746820583056, LR: 0.0003 +[2026-03-02 08:31:04] (step=0032475) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.35394247701037, LR: 0.0003 +[2026-03-02 08:31:12] (step=0032476) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.354138133437684, LR: 0.0003 +[2026-03-02 08:31:20] (step=0032477) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.354333789864997, LR: 0.0003 +[2026-03-02 08:31:27] (step=0032478) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 6.354529446292311, LR: 0.0003 +[2026-03-02 08:31:35] (step=0032479) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 6.354725102719624, LR: 0.0003 +[2026-03-02 08:31:43] (step=0032480) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.354920759146938, LR: 0.0003 +[2026-03-02 08:31:51] (step=0032481) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.355116415574251, LR: 0.0003 +[2026-03-02 08:31:59] (step=0032482) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.355312072001565, LR: 0.0003 +[2026-03-02 08:32:07] (step=0032483) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.355507728428879, LR: 0.0003 +[2026-03-02 08:32:14] (step=0032484) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.355703384856192, LR: 0.0003 +[2026-03-02 08:32:22] (step=0032485) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.355899041283506, LR: 0.0003 +[2026-03-02 08:32:30] (step=0032486) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.3560946977108195, LR: 0.0003 +[2026-03-02 08:32:38] (step=0032487) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.3562903541381335, LR: 0.0003 +[2026-03-02 08:32:46] (step=0032488) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.3564860105654475, LR: 0.0003 +[2026-03-02 08:32:54] (step=0032489) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 6.356681666992761, LR: 0.0003 +[2026-03-02 08:33:02] (step=0032490) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.356877323420075, LR: 0.0003 +[2026-03-02 08:33:09] (step=0032491) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.357072979847388, LR: 0.0003 +[2026-03-02 08:33:17] (step=0032492) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 6.357268636274702, LR: 0.0003 +[2026-03-02 08:33:25] (step=0032493) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.357464292702015, LR: 0.0003 +[2026-03-02 08:33:33] (step=0032494) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.357659949129329, LR: 0.0003 +[2026-03-02 08:33:41] (step=0032495) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 6.357855605556643, LR: 0.0003 +[2026-03-02 08:33:49] (step=0032496) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 6.358051261983956, LR: 0.0003 +[2026-03-02 08:33:57] (step=0032497) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.35824691841127, LR: 0.0003 +[2026-03-02 08:34:04] (step=0032498) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.358442574838583, LR: 0.0003 +[2026-03-02 08:34:12] (step=0032499) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.358638231265897, LR: 0.0003 +[2026-03-02 08:34:20] (step=0032500) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 6.358833887693211, LR: 0.0003 +[2026-03-02 08:34:20] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0032500/ +[2026-03-02 08:34:28] (step=0032501) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 6.359029544120524, LR: 0.0003 +[2026-03-02 08:34:36] (step=0032502) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.359225200547838, LR: 0.0003 +[2026-03-02 08:34:44] (step=0032503) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 6.359420856975151, LR: 0.0003 +[2026-03-02 08:34:51] (step=0032504) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.359616513402465, LR: 0.0003 +[2026-03-02 08:34:59] (step=0032505) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.3598121698297785, LR: 0.0003 +[2026-03-02 08:35:07] (step=0032506) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.3600078262570925, LR: 0.0003 +[2026-03-02 08:35:15] (step=0032507) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.3602034826844065, LR: 0.0003 +[2026-03-02 08:35:23] (step=0032508) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.36039913911172, LR: 0.0003 +[2026-03-02 08:35:31] (step=0032509) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.360594795539034, LR: 0.0003 +[2026-03-02 08:35:39] (step=0032510) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.360790451966347, LR: 0.0003 +[2026-03-02 08:35:46] (step=0032511) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.360986108393661, LR: 0.0003 +[2026-03-02 08:35:54] (step=0032512) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.361181764820975, LR: 0.0003 +[2026-03-02 08:36:02] (step=0032513) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.361377421248288, LR: 0.0003 +[2026-03-02 08:36:10] (step=0032514) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.361573077675602, LR: 0.0003 +[2026-03-02 08:36:18] (step=0032515) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.361768734102915, LR: 0.0003 +[2026-03-02 08:36:26] (step=0032516) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.361964390530229, LR: 0.0003 +[2026-03-02 08:36:33] (step=0032517) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.362160046957542, LR: 0.0003 +[2026-03-02 08:36:41] (step=0032518) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.362355703384856, LR: 0.0003 +[2026-03-02 08:36:49] (step=0032519) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.36255135981217, LR: 0.0003 +[2026-03-02 08:36:57] (step=0032520) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 6.362747016239483, LR: 0.0003 +[2026-03-02 08:37:05] (step=0032521) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.362942672666797, LR: 0.0003 +[2026-03-02 08:37:13] (step=0032522) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 6.36313832909411, LR: 0.0003 +[2026-03-02 08:37:20] (step=0032523) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.363333985521424, LR: 0.0003 +[2026-03-02 08:37:28] (step=0032524) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.363529641948738, LR: 0.0003 +[2026-03-02 08:37:36] (step=0032525) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 6.3637252983760515, LR: 0.0003 +[2026-03-02 08:37:44] (step=0032526) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 6.3639209548033655, LR: 0.0003 +[2026-03-02 08:37:52] (step=0032527) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 6.364116611230679, LR: 0.0003 +[2026-03-02 08:38:00] (step=0032528) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.364312267657993, LR: 0.0003 +[2026-03-02 08:38:07] (step=0032529) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 6.364507924085307, LR: 0.0003 +[2026-03-02 08:38:15] (step=0032530) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.36470358051262, LR: 0.0003 +[2026-03-02 08:38:23] (step=0032531) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.364899236939934, LR: 0.0003 +[2026-03-02 08:38:31] (step=0032532) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.365094893367247, LR: 0.0003 +[2026-03-02 08:38:39] (step=0032533) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 6.365290549794561, LR: 0.0003 +[2026-03-02 08:38:47] (step=0032534) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.365486206221874, LR: 0.0003 +[2026-03-02 08:38:55] (step=0032535) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.365681862649188, LR: 0.0003 +[2026-03-02 08:39:02] (step=0032536) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.365877519076502, LR: 0.0003 +[2026-03-02 08:39:10] (step=0032537) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.366073175503815, LR: 0.0003 +[2026-03-02 08:39:18] (step=0032538) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.366268831931129, LR: 0.0003 +[2026-03-02 08:39:26] (step=0032539) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.366464488358442, LR: 0.0003 +[2026-03-02 08:39:34] (step=0032540) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.366660144785756, LR: 0.0003 +[2026-03-02 08:39:42] (step=0032541) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.36685580121307, LR: 0.0003 +[2026-03-02 08:39:50] (step=0032542) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.367051457640383, LR: 0.0003 +[2026-03-02 08:39:57] (step=0032543) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.367247114067697, LR: 0.0003 +[2026-03-02 08:40:05] (step=0032544) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.3674427704950105, LR: 0.0003 +[2026-03-02 08:40:13] (step=0032545) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.3676384269223245, LR: 0.0003 +[2026-03-02 08:40:21] (step=0032546) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.367834083349638, LR: 0.0003 +[2026-03-02 08:40:29] (step=0032547) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.368029739776952, LR: 0.0003 +[2026-03-02 08:40:37] (step=0032548) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.368225396204266, LR: 0.0003 +[2026-03-02 08:40:44] (step=0032549) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.368421052631579, LR: 0.0003 +[2026-03-02 08:40:52] (step=0032550) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.368616709058893, LR: 0.0003 +[2026-03-02 08:41:00] (step=0032551) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.368812365486206, LR: 0.0003 +[2026-03-02 08:41:08] (step=0032552) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.36900802191352, LR: 0.0003 +[2026-03-02 08:41:16] (step=0032553) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.369203678340834, LR: 0.0003 +[2026-03-02 08:41:24] (step=0032554) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.369399334768147, LR: 0.0003 +[2026-03-02 08:41:31] (step=0032555) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.369594991195461, LR: 0.0003 +[2026-03-02 08:41:39] (step=0032556) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.369790647622774, LR: 0.0003 +[2026-03-02 08:41:47] (step=0032557) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.369986304050088, LR: 0.0003 +[2026-03-02 08:41:55] (step=0032558) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.370181960477401, LR: 0.0003 +[2026-03-02 08:42:03] (step=0032559) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.370377616904715, LR: 0.0003 +[2026-03-02 08:42:11] (step=0032560) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 6.370573273332029, LR: 0.0003 +[2026-03-02 08:42:18] (step=0032561) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.370768929759342, LR: 0.0003 +[2026-03-02 08:42:26] (step=0032562) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.370964586186656, LR: 0.0003 +[2026-03-02 08:42:34] (step=0032563) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.3711602426139695, LR: 0.0003 +[2026-03-02 08:42:42] (step=0032564) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.3713558990412835, LR: 0.0003 +[2026-03-02 08:42:50] (step=0032565) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.3715515554685975, LR: 0.0003 +[2026-03-02 08:42:58] (step=0032566) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 6.371747211895911, LR: 0.0003 +[2026-03-02 08:43:05] (step=0032567) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.371942868323225, LR: 0.0003 +[2026-03-02 08:43:13] (step=0032568) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.372138524750538, LR: 0.0003 +[2026-03-02 08:43:21] (step=0032569) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 6.372334181177852, LR: 0.0003 +[2026-03-02 08:43:29] (step=0032570) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.372529837605165, LR: 0.0003 +[2026-03-02 08:43:37] (step=0032571) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.372725494032479, LR: 0.0003 +[2026-03-02 08:43:45] (step=0032572) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.372921150459793, LR: 0.0003 +[2026-03-02 08:43:52] (step=0032573) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 6.373116806887106, LR: 0.0003 +[2026-03-02 08:44:00] (step=0032574) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.37331246331442, LR: 0.0003 +[2026-03-02 08:44:08] (step=0032575) Train Loss: 0.4278, Train Steps/Sec: 0.13, Epoch: 6.373508119741733, LR: 0.0003 +[2026-03-02 08:44:16] (step=0032576) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.373703776169047, LR: 0.0003 +[2026-03-02 08:44:24] (step=0032577) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.373899432596361, LR: 0.0003 +[2026-03-02 08:44:32] (step=0032578) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.374095089023674, LR: 0.0003 +[2026-03-02 08:44:39] (step=0032579) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.374290745450988, LR: 0.0003 +[2026-03-02 08:44:47] (step=0032580) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.374486401878301, LR: 0.0003 +[2026-03-02 08:44:55] (step=0032581) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 6.374682058305615, LR: 0.0003 +[2026-03-02 08:45:03] (step=0032582) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 6.374877714732929, LR: 0.0003 +[2026-03-02 08:45:11] (step=0032583) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.3750733711602425, LR: 0.0003 +[2026-03-02 08:45:19] (step=0032584) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.3752690275875565, LR: 0.0003 +[2026-03-02 08:45:27] (step=0032585) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.37546468401487, LR: 0.0003 +[2026-03-02 08:45:34] (step=0032586) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.375660340442184, LR: 0.0003 +[2026-03-02 08:45:42] (step=0032587) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.375855996869497, LR: 0.0003 +[2026-03-02 08:45:50] (step=0032588) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.376051653296811, LR: 0.0003 +[2026-03-02 08:45:58] (step=0032589) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.376247309724125, LR: 0.0003 +[2026-03-02 08:46:06] (step=0032590) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.376442966151438, LR: 0.0003 +[2026-03-02 08:46:14] (step=0032591) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.376638622578752, LR: 0.0003 +[2026-03-02 08:46:21] (step=0032592) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.376834279006065, LR: 0.0003 +[2026-03-02 08:46:29] (step=0032593) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.377029935433379, LR: 0.0003 +[2026-03-02 08:46:37] (step=0032594) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.377225591860693, LR: 0.0003 +[2026-03-02 08:46:45] (step=0032595) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.377421248288006, LR: 0.0003 +[2026-03-02 08:46:53] (step=0032596) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 6.37761690471532, LR: 0.0003 +[2026-03-02 08:47:01] (step=0032597) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.377812561142633, LR: 0.0003 +[2026-03-02 08:47:08] (step=0032598) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.378008217569947, LR: 0.0003 +[2026-03-02 08:47:16] (step=0032599) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.37820387399726, LR: 0.0003 +[2026-03-02 08:47:24] (step=0032600) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.378399530424574, LR: 0.0003 +[2026-03-02 08:47:32] (step=0032601) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.378595186851888, LR: 0.0003 +[2026-03-02 08:47:40] (step=0032602) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 6.3787908432792015, LR: 0.0003 +[2026-03-02 08:47:48] (step=0032603) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.3789864997065155, LR: 0.0003 +[2026-03-02 08:47:55] (step=0032604) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.379182156133829, LR: 0.0003 +[2026-03-02 08:48:03] (step=0032605) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 6.379377812561143, LR: 0.0003 +[2026-03-02 08:48:11] (step=0032606) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.379573468988457, LR: 0.0003 +[2026-03-02 08:48:19] (step=0032607) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.37976912541577, LR: 0.0003 +[2026-03-02 08:48:27] (step=0032608) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.379964781843084, LR: 0.0003 +[2026-03-02 08:48:35] (step=0032609) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.380160438270397, LR: 0.0003 +[2026-03-02 08:48:43] (step=0032610) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.380356094697711, LR: 0.0003 +[2026-03-02 08:48:50] (step=0032611) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.380551751125024, LR: 0.0003 +[2026-03-02 08:48:58] (step=0032612) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.380747407552338, LR: 0.0003 +[2026-03-02 08:49:06] (step=0032613) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.380943063979652, LR: 0.0003 +[2026-03-02 08:49:14] (step=0032614) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.381138720406965, LR: 0.0003 +[2026-03-02 08:49:22] (step=0032615) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.381334376834279, LR: 0.0003 +[2026-03-02 08:49:30] (step=0032616) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.381530033261592, LR: 0.0003 +[2026-03-02 08:49:37] (step=0032617) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.381725689688906, LR: 0.0003 +[2026-03-02 08:49:45] (step=0032618) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.38192134611622, LR: 0.0003 +[2026-03-02 08:49:53] (step=0032619) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 6.382117002543533, LR: 0.0003 +[2026-03-02 08:50:01] (step=0032620) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.382312658970847, LR: 0.0003 +[2026-03-02 08:50:09] (step=0032621) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.3825083153981605, LR: 0.0003 +[2026-03-02 08:50:17] (step=0032622) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.3827039718254746, LR: 0.0003 +[2026-03-02 08:50:24] (step=0032623) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.382899628252788, LR: 0.0003 +[2026-03-02 08:50:32] (step=0032624) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.383095284680102, LR: 0.0003 +[2026-03-02 08:50:40] (step=0032625) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.383290941107416, LR: 0.0003 +[2026-03-02 08:50:48] (step=0032626) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 6.383486597534729, LR: 0.0003 +[2026-03-02 08:50:56] (step=0032627) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.383682253962043, LR: 0.0003 +[2026-03-02 08:51:04] (step=0032628) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 6.383877910389356, LR: 0.0003 +[2026-03-02 08:51:12] (step=0032629) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 6.38407356681667, LR: 0.0003 +[2026-03-02 08:51:19] (step=0032630) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.384269223243984, LR: 0.0003 +[2026-03-02 08:51:27] (step=0032631) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 6.384464879671297, LR: 0.0003 +[2026-03-02 08:51:35] (step=0032632) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.384660536098611, LR: 0.0003 +[2026-03-02 08:51:43] (step=0032633) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.384856192525924, LR: 0.0003 +[2026-03-02 08:51:51] (step=0032634) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.385051848953238, LR: 0.0003 +[2026-03-02 08:51:59] (step=0032635) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.385247505380552, LR: 0.0003 +[2026-03-02 08:52:06] (step=0032636) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.385443161807865, LR: 0.0003 +[2026-03-02 08:52:14] (step=0032637) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 6.385638818235179, LR: 0.0003 +[2026-03-02 08:52:22] (step=0032638) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 6.385834474662492, LR: 0.0003 +[2026-03-02 08:52:30] (step=0032639) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 6.386030131089806, LR: 0.0003 +[2026-03-02 08:52:38] (step=0032640) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.38622578751712, LR: 0.0003 +[2026-03-02 08:52:46] (step=0032641) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.386421443944434, LR: 0.0003 +[2026-03-02 08:52:53] (step=0032642) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.386617100371748, LR: 0.0003 +[2026-03-02 08:53:01] (step=0032643) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.386812756799061, LR: 0.0003 +[2026-03-02 08:53:09] (step=0032644) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.387008413226375, LR: 0.0003 +[2026-03-02 08:53:17] (step=0032645) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.387204069653688, LR: 0.0003 +[2026-03-02 08:53:25] (step=0032646) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.387399726081002, LR: 0.0003 +[2026-03-02 08:53:33] (step=0032647) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.387595382508316, LR: 0.0003 +[2026-03-02 08:53:40] (step=0032648) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.387791038935629, LR: 0.0003 +[2026-03-02 08:53:48] (step=0032649) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.387986695362943, LR: 0.0003 +[2026-03-02 08:53:56] (step=0032650) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 6.388182351790256, LR: 0.0003 +[2026-03-02 08:54:04] (step=0032651) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.38837800821757, LR: 0.0003 +[2026-03-02 08:54:12] (step=0032652) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.388573664644883, LR: 0.0003 +[2026-03-02 08:54:20] (step=0032653) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.388769321072197, LR: 0.0003 +[2026-03-02 08:54:27] (step=0032654) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.388964977499511, LR: 0.0003 +[2026-03-02 08:54:35] (step=0032655) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.389160633926824, LR: 0.0003 +[2026-03-02 08:54:43] (step=0032656) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.389356290354138, LR: 0.0003 +[2026-03-02 08:54:51] (step=0032657) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.389551946781451, LR: 0.0003 +[2026-03-02 08:54:59] (step=0032658) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 6.3897476032087654, LR: 0.0003 +[2026-03-02 08:55:07] (step=0032659) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.3899432596360795, LR: 0.0003 +[2026-03-02 08:55:14] (step=0032660) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.390138916063393, LR: 0.0003 +[2026-03-02 08:55:22] (step=0032661) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 6.390334572490707, LR: 0.0003 +[2026-03-02 08:55:30] (step=0032662) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.39053022891802, LR: 0.0003 +[2026-03-02 08:55:38] (step=0032663) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.390725885345334, LR: 0.0003 +[2026-03-02 08:55:46] (step=0032664) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.390921541772647, LR: 0.0003 +[2026-03-02 08:55:54] (step=0032665) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.391117198199961, LR: 0.0003 +[2026-03-02 08:56:02] (step=0032666) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.391312854627275, LR: 0.0003 +[2026-03-02 08:56:09] (step=0032667) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.391508511054588, LR: 0.0003 +[2026-03-02 08:56:17] (step=0032668) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.391704167481902, LR: 0.0003 +[2026-03-02 08:56:25] (step=0032669) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.391899823909215, LR: 0.0003 +[2026-03-02 08:56:33] (step=0032670) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.392095480336529, LR: 0.0003 +[2026-03-02 08:56:41] (step=0032671) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.392291136763843, LR: 0.0003 +[2026-03-02 08:56:49] (step=0032672) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.392486793191156, LR: 0.0003 +[2026-03-02 08:56:56] (step=0032673) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.39268244961847, LR: 0.0003 +[2026-03-02 08:57:04] (step=0032674) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 6.392878106045783, LR: 0.0003 +[2026-03-02 08:57:12] (step=0032675) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.393073762473097, LR: 0.0003 +[2026-03-02 08:57:20] (step=0032676) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.3932694189004105, LR: 0.0003 +[2026-03-02 08:57:28] (step=0032677) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.3934650753277245, LR: 0.0003 +[2026-03-02 08:57:36] (step=0032678) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.3936607317550385, LR: 0.0003 +[2026-03-02 08:57:43] (step=0032679) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.393856388182352, LR: 0.0003 +[2026-03-02 08:57:51] (step=0032680) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.394052044609666, LR: 0.0003 +[2026-03-02 08:57:59] (step=0032681) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 6.394247701036979, LR: 0.0003 +[2026-03-02 08:58:07] (step=0032682) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.394443357464293, LR: 0.0003 +[2026-03-02 08:58:15] (step=0032683) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.394639013891607, LR: 0.0003 +[2026-03-02 08:58:23] (step=0032684) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.39483467031892, LR: 0.0003 +[2026-03-02 08:58:31] (step=0032685) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.395030326746234, LR: 0.0003 +[2026-03-02 08:58:38] (step=0032686) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.395225983173547, LR: 0.0003 +[2026-03-02 08:58:46] (step=0032687) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.395421639600861, LR: 0.0003 +[2026-03-02 08:58:54] (step=0032688) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 6.395617296028175, LR: 0.0003 +[2026-03-02 08:59:02] (step=0032689) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.395812952455488, LR: 0.0003 +[2026-03-02 08:59:10] (step=0032690) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.396008608882802, LR: 0.0003 +[2026-03-02 08:59:18] (step=0032691) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 6.396204265310115, LR: 0.0003 +[2026-03-02 08:59:25] (step=0032692) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.396399921737429, LR: 0.0003 +[2026-03-02 08:59:33] (step=0032693) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.396595578164742, LR: 0.0003 +[2026-03-02 08:59:41] (step=0032694) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 6.396791234592056, LR: 0.0003 +[2026-03-02 08:59:49] (step=0032695) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.39698689101937, LR: 0.0003 +[2026-03-02 08:59:57] (step=0032696) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.3971825474466835, LR: 0.0003 +[2026-03-02 09:00:05] (step=0032697) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.3973782038739975, LR: 0.0003 +[2026-03-02 09:00:12] (step=0032698) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.397573860301311, LR: 0.0003 +[2026-03-02 09:00:20] (step=0032699) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.397769516728625, LR: 0.0003 +[2026-03-02 09:00:28] (step=0032700) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 6.397965173155939, LR: 0.0003 +[2026-03-02 09:00:36] (step=0032701) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.398160829583252, LR: 0.0003 +[2026-03-02 09:00:44] (step=0032702) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.398356486010566, LR: 0.0003 +[2026-03-02 09:00:52] (step=0032703) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 6.398552142437879, LR: 0.0003 +[2026-03-02 09:01:00] (step=0032704) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.398747798865193, LR: 0.0003 +[2026-03-02 09:01:07] (step=0032705) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.398943455292506, LR: 0.0003 +[2026-03-02 09:01:15] (step=0032706) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.39913911171982, LR: 0.0003 +[2026-03-02 09:01:23] (step=0032707) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.399334768147134, LR: 0.0003 +[2026-03-02 09:01:31] (step=0032708) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.399530424574447, LR: 0.0003 +[2026-03-02 09:01:39] (step=0032709) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 6.399726081001761, LR: 0.0003 +[2026-03-02 09:01:47] (step=0032710) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.399921737429074, LR: 0.0003 +[2026-03-02 09:01:55] (step=0032711) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 6.400117393856388, LR: 0.0003 +[2026-03-02 09:02:02] (step=0032712) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.400313050283702, LR: 0.0003 +[2026-03-02 09:02:10] (step=0032713) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.400508706711015, LR: 0.0003 +[2026-03-02 09:02:18] (step=0032714) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 6.400704363138329, LR: 0.0003 +[2026-03-02 09:02:26] (step=0032715) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.4009000195656425, LR: 0.0003 +[2026-03-02 09:02:34] (step=0032716) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.4010956759929565, LR: 0.0003 +[2026-03-02 09:02:42] (step=0032717) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.40129133242027, LR: 0.0003 +[2026-03-02 09:02:49] (step=0032718) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.401486988847584, LR: 0.0003 +[2026-03-02 09:02:57] (step=0032719) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 6.401682645274898, LR: 0.0003 +[2026-03-02 09:03:05] (step=0032720) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 6.401878301702211, LR: 0.0003 +[2026-03-02 09:03:13] (step=0032721) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.402073958129525, LR: 0.0003 +[2026-03-02 09:03:21] (step=0032722) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.402269614556838, LR: 0.0003 +[2026-03-02 09:03:29] (step=0032723) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.402465270984152, LR: 0.0003 +[2026-03-02 09:03:37] (step=0032724) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.402660927411466, LR: 0.0003 +[2026-03-02 09:03:44] (step=0032725) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.402856583838779, LR: 0.0003 +[2026-03-02 09:03:52] (step=0032726) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 6.403052240266093, LR: 0.0003 +[2026-03-02 09:04:00] (step=0032727) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.403247896693406, LR: 0.0003 +[2026-03-02 09:04:08] (step=0032728) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.40344355312072, LR: 0.0003 +[2026-03-02 09:04:16] (step=0032729) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.403639209548033, LR: 0.0003 +[2026-03-02 09:04:24] (step=0032730) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.403834865975347, LR: 0.0003 +[2026-03-02 09:04:31] (step=0032731) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.404030522402661, LR: 0.0003 +[2026-03-02 09:04:39] (step=0032732) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.404226178829974, LR: 0.0003 +[2026-03-02 09:04:47] (step=0032733) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.404421835257288, LR: 0.0003 +[2026-03-02 09:04:55] (step=0032734) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.4046174916846015, LR: 0.0003 +[2026-03-02 09:05:03] (step=0032735) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.4048131481119155, LR: 0.0003 +[2026-03-02 09:05:11] (step=0032736) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.4050088045392295, LR: 0.0003 +[2026-03-02 09:05:18] (step=0032737) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.405204460966543, LR: 0.0003 +[2026-03-02 09:05:26] (step=0032738) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.405400117393857, LR: 0.0003 +[2026-03-02 09:05:34] (step=0032739) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.40559577382117, LR: 0.0003 +[2026-03-02 09:05:42] (step=0032740) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 6.405791430248484, LR: 0.0003 +[2026-03-02 09:05:50] (step=0032741) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.405987086675797, LR: 0.0003 +[2026-03-02 09:05:58] (step=0032742) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.406182743103111, LR: 0.0003 +[2026-03-02 09:06:05] (step=0032743) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.406378399530425, LR: 0.0003 +[2026-03-02 09:06:13] (step=0032744) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.406574055957738, LR: 0.0003 +[2026-03-02 09:06:21] (step=0032745) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.406769712385052, LR: 0.0003 +[2026-03-02 09:06:29] (step=0032746) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.406965368812365, LR: 0.0003 +[2026-03-02 09:06:37] (step=0032747) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.407161025239679, LR: 0.0003 +[2026-03-02 09:06:45] (step=0032748) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.407356681666993, LR: 0.0003 +[2026-03-02 09:06:52] (step=0032749) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 6.407552338094306, LR: 0.0003 +[2026-03-02 09:07:00] (step=0032750) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.40774799452162, LR: 0.0003 +[2026-03-02 09:07:08] (step=0032751) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.407943650948933, LR: 0.0003 +[2026-03-02 09:07:16] (step=0032752) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.408139307376247, LR: 0.0003 +[2026-03-02 09:07:24] (step=0032753) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.408334963803561, LR: 0.0003 +[2026-03-02 09:07:32] (step=0032754) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.4085306202308745, LR: 0.0003 +[2026-03-02 09:07:39] (step=0032755) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.4087262766581885, LR: 0.0003 +[2026-03-02 09:07:47] (step=0032756) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.408921933085502, LR: 0.0003 +[2026-03-02 09:07:55] (step=0032757) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.409117589512816, LR: 0.0003 +[2026-03-02 09:08:03] (step=0032758) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.409313245940129, LR: 0.0003 +[2026-03-02 09:08:11] (step=0032759) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 6.409508902367443, LR: 0.0003 +[2026-03-02 09:08:19] (step=0032760) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.409704558794757, LR: 0.0003 +[2026-03-02 09:08:27] (step=0032761) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.40990021522207, LR: 0.0003 +[2026-03-02 09:08:34] (step=0032762) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.410095871649384, LR: 0.0003 +[2026-03-02 09:08:42] (step=0032763) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.410291528076697, LR: 0.0003 +[2026-03-02 09:08:50] (step=0032764) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.410487184504011, LR: 0.0003 +[2026-03-02 09:08:58] (step=0032765) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.410682840931325, LR: 0.0003 +[2026-03-02 09:09:06] (step=0032766) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.410878497358638, LR: 0.0003 +[2026-03-02 09:09:14] (step=0032767) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.411074153785952, LR: 0.0003 +[2026-03-02 09:09:21] (step=0032768) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.411269810213265, LR: 0.0003 +[2026-03-02 09:09:29] (step=0032769) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 6.411465466640579, LR: 0.0003 +[2026-03-02 09:09:37] (step=0032770) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.411661123067892, LR: 0.0003 +[2026-03-02 09:09:45] (step=0032771) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.411856779495206, LR: 0.0003 +[2026-03-02 09:09:53] (step=0032772) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.41205243592252, LR: 0.0003 +[2026-03-02 09:10:01] (step=0032773) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.4122480923498335, LR: 0.0003 +[2026-03-02 09:10:08] (step=0032774) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.4124437487771475, LR: 0.0003 +[2026-03-02 09:10:16] (step=0032775) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.412639405204461, LR: 0.0003 +[2026-03-02 09:10:24] (step=0032776) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.412835061631775, LR: 0.0003 +[2026-03-02 09:10:32] (step=0032777) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 6.413030718059089, LR: 0.0003 +[2026-03-02 09:10:40] (step=0032778) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.413226374486402, LR: 0.0003 +[2026-03-02 09:10:48] (step=0032779) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 6.413422030913716, LR: 0.0003 +[2026-03-02 09:10:56] (step=0032780) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.413617687341029, LR: 0.0003 +[2026-03-02 09:11:03] (step=0032781) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 6.413813343768343, LR: 0.0003 +[2026-03-02 09:11:11] (step=0032782) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.414009000195656, LR: 0.0003 +[2026-03-02 09:11:19] (step=0032783) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.41420465662297, LR: 0.0003 +[2026-03-02 09:11:27] (step=0032784) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.414400313050284, LR: 0.0003 +[2026-03-02 09:11:35] (step=0032785) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.414595969477597, LR: 0.0003 +[2026-03-02 09:11:43] (step=0032786) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.414791625904911, LR: 0.0003 +[2026-03-02 09:11:50] (step=0032787) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.414987282332224, LR: 0.0003 +[2026-03-02 09:11:58] (step=0032788) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.415182938759538, LR: 0.0003 +[2026-03-02 09:12:06] (step=0032789) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.415378595186852, LR: 0.0003 +[2026-03-02 09:12:14] (step=0032790) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 6.415574251614165, LR: 0.0003 +[2026-03-02 09:12:22] (step=0032791) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.415769908041479, LR: 0.0003 +[2026-03-02 09:12:29] (step=0032792) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.4159655644687925, LR: 0.0003 +[2026-03-02 09:12:37] (step=0032793) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 6.4161612208961065, LR: 0.0003 +[2026-03-02 09:12:45] (step=0032794) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 6.41635687732342, LR: 0.0003 +[2026-03-02 09:12:53] (step=0032795) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.416552533750734, LR: 0.0003 +[2026-03-02 09:13:01] (step=0032796) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.416748190178048, LR: 0.0003 +[2026-03-02 09:13:09] (step=0032797) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.416943846605361, LR: 0.0003 +[2026-03-02 09:13:16] (step=0032798) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.417139503032675, LR: 0.0003 +[2026-03-02 09:13:24] (step=0032799) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.417335159459988, LR: 0.0003 +[2026-03-02 09:13:32] (step=0032800) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.417530815887302, LR: 0.0003 +[2026-03-02 09:13:40] (step=0032801) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.417726472314616, LR: 0.0003 +[2026-03-02 09:13:48] (step=0032802) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.417922128741929, LR: 0.0003 +[2026-03-02 09:13:56] (step=0032803) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.418117785169243, LR: 0.0003 +[2026-03-02 09:14:04] (step=0032804) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 6.418313441596556, LR: 0.0003 +[2026-03-02 09:14:11] (step=0032805) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 6.41850909802387, LR: 0.0003 +[2026-03-02 09:14:19] (step=0032806) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 6.418704754451184, LR: 0.0003 +[2026-03-02 09:14:27] (step=0032807) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.418900410878497, LR: 0.0003 +[2026-03-02 09:14:35] (step=0032808) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 6.419096067305811, LR: 0.0003 +[2026-03-02 09:14:43] (step=0032809) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.419291723733124, LR: 0.0003 +[2026-03-02 09:14:51] (step=0032810) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 6.419487380160438, LR: 0.0003 +[2026-03-02 09:14:58] (step=0032811) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 6.4196830365877515, LR: 0.0003 +[2026-03-02 09:15:06] (step=0032812) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.4198786930150655, LR: 0.0003 +[2026-03-02 09:15:14] (step=0032813) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 6.4200743494423795, LR: 0.0003 +[2026-03-02 09:15:22] (step=0032814) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.420270005869693, LR: 0.0003 +[2026-03-02 09:15:30] (step=0032815) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.420465662297007, LR: 0.0003 +[2026-03-02 09:15:38] (step=0032816) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.42066131872432, LR: 0.0003 +[2026-03-02 09:15:45] (step=0032817) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 6.420856975151634, LR: 0.0003 +[2026-03-02 09:15:53] (step=0032818) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.421052631578948, LR: 0.0003 +[2026-03-02 09:16:01] (step=0032819) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.421248288006261, LR: 0.0003 +[2026-03-02 09:16:09] (step=0032820) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 6.421443944433575, LR: 0.0003 +[2026-03-02 09:16:17] (step=0032821) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 6.421639600860888, LR: 0.0003 +[2026-03-02 09:16:25] (step=0032822) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.421835257288202, LR: 0.0003 +[2026-03-02 09:16:32] (step=0032823) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.422030913715515, LR: 0.0003 +[2026-03-02 09:16:40] (step=0032824) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.422226570142829, LR: 0.0003 +[2026-03-02 09:16:48] (step=0032825) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.422422226570143, LR: 0.0003 +[2026-03-02 09:16:56] (step=0032826) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.422617882997456, LR: 0.0003 +[2026-03-02 09:17:04] (step=0032827) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 6.42281353942477, LR: 0.0003 +[2026-03-02 09:17:12] (step=0032828) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.423009195852083, LR: 0.0003 +[2026-03-02 09:17:20] (step=0032829) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.423204852279397, LR: 0.0003 +[2026-03-02 09:17:27] (step=0032830) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.423400508706711, LR: 0.0003 +[2026-03-02 09:17:35] (step=0032831) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.4235961651340245, LR: 0.0003 +[2026-03-02 09:17:43] (step=0032832) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.4237918215613385, LR: 0.0003 +[2026-03-02 09:17:51] (step=0032833) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 6.423987477988652, LR: 0.0003 +[2026-03-02 09:17:59] (step=0032834) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.424183134415966, LR: 0.0003 +[2026-03-02 09:18:07] (step=0032835) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.424378790843279, LR: 0.0003 +[2026-03-02 09:18:14] (step=0032836) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.424574447270593, LR: 0.0003 +[2026-03-02 09:18:22] (step=0032837) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.424770103697907, LR: 0.0003 +[2026-03-02 09:18:30] (step=0032838) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.42496576012522, LR: 0.0003 +[2026-03-02 09:18:38] (step=0032839) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 6.425161416552534, LR: 0.0003 +[2026-03-02 09:18:46] (step=0032840) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 6.425357072979847, LR: 0.0003 +[2026-03-02 09:18:54] (step=0032841) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.425552729407161, LR: 0.0003 +[2026-03-02 09:19:01] (step=0032842) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.425748385834475, LR: 0.0003 +[2026-03-02 09:19:09] (step=0032843) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 6.425944042261788, LR: 0.0003 +[2026-03-02 09:19:17] (step=0032844) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.426139698689102, LR: 0.0003 +[2026-03-02 09:19:25] (step=0032845) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.426335355116415, LR: 0.0003 +[2026-03-02 09:19:33] (step=0032846) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.426531011543729, LR: 0.0003 +[2026-03-02 09:19:41] (step=0032847) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 6.426726667971042, LR: 0.0003 +[2026-03-02 09:19:48] (step=0032848) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.426922324398356, LR: 0.0003 +[2026-03-02 09:19:56] (step=0032849) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.42711798082567, LR: 0.0003 +[2026-03-02 09:20:04] (step=0032850) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.4273136372529835, LR: 0.0003 +[2026-03-02 09:20:12] (step=0032851) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.4275092936802976, LR: 0.0003 +[2026-03-02 09:20:20] (step=0032852) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.427704950107611, LR: 0.0003 +[2026-03-02 09:20:28] (step=0032853) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.427900606534925, LR: 0.0003 +[2026-03-02 09:20:36] (step=0032854) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 6.428096262962239, LR: 0.0003 +[2026-03-02 09:20:44] (step=0032855) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 6.428291919389552, LR: 0.0003 +[2026-03-02 09:20:51] (step=0032856) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.428487575816866, LR: 0.0003 +[2026-03-02 09:20:59] (step=0032857) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.428683232244179, LR: 0.0003 +[2026-03-02 09:21:07] (step=0032858) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 6.428878888671493, LR: 0.0003 +[2026-03-02 09:21:15] (step=0032859) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.429074545098807, LR: 0.0003 +[2026-03-02 09:21:23] (step=0032860) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.42927020152612, LR: 0.0003 +[2026-03-02 09:21:31] (step=0032861) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.429465857953434, LR: 0.0003 +[2026-03-02 09:21:38] (step=0032862) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.429661514380747, LR: 0.0003 +[2026-03-02 09:21:46] (step=0032863) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.429857170808061, LR: 0.0003 +[2026-03-02 09:21:54] (step=0032864) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.430052827235374, LR: 0.0003 +[2026-03-02 09:22:02] (step=0032865) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.430248483662688, LR: 0.0003 +[2026-03-02 09:22:10] (step=0032866) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 6.430444140090002, LR: 0.0003 +[2026-03-02 09:22:18] (step=0032867) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.430639796517315, LR: 0.0003 +[2026-03-02 09:22:25] (step=0032868) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.430835452944629, LR: 0.0003 +[2026-03-02 09:22:33] (step=0032869) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.4310311093719426, LR: 0.0003 +[2026-03-02 09:22:41] (step=0032870) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.431226765799257, LR: 0.0003 +[2026-03-02 09:22:49] (step=0032871) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.431422422226571, LR: 0.0003 +[2026-03-02 09:22:57] (step=0032872) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 6.431618078653884, LR: 0.0003 +[2026-03-02 09:23:05] (step=0032873) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.431813735081198, LR: 0.0003 +[2026-03-02 09:23:12] (step=0032874) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.432009391508511, LR: 0.0003 +[2026-03-02 09:23:20] (step=0032875) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 6.432205047935825, LR: 0.0003 +[2026-03-02 09:23:28] (step=0032876) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 6.432400704363138, LR: 0.0003 +[2026-03-02 09:23:36] (step=0032877) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.432596360790452, LR: 0.0003 +[2026-03-02 09:23:44] (step=0032878) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.432792017217766, LR: 0.0003 +[2026-03-02 09:23:52] (step=0032879) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.432987673645079, LR: 0.0003 +[2026-03-02 09:24:00] (step=0032880) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 6.433183330072393, LR: 0.0003 +[2026-03-02 09:24:07] (step=0032881) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 6.433378986499706, LR: 0.0003 +[2026-03-02 09:24:15] (step=0032882) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.43357464292702, LR: 0.0003 +[2026-03-02 09:24:23] (step=0032883) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.433770299354334, LR: 0.0003 +[2026-03-02 09:24:31] (step=0032884) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.433965955781647, LR: 0.0003 +[2026-03-02 09:24:39] (step=0032885) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.434161612208961, LR: 0.0003 +[2026-03-02 09:24:47] (step=0032886) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 6.434357268636274, LR: 0.0003 +[2026-03-02 09:24:54] (step=0032887) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.4345529250635884, LR: 0.0003 +[2026-03-02 09:25:02] (step=0032888) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.434748581490902, LR: 0.0003 +[2026-03-02 09:25:10] (step=0032889) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 6.434944237918216, LR: 0.0003 +[2026-03-02 09:25:18] (step=0032890) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 6.43513989434553, LR: 0.0003 +[2026-03-02 09:25:26] (step=0032891) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.435335550772843, LR: 0.0003 +[2026-03-02 09:25:34] (step=0032892) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.435531207200157, LR: 0.0003 +[2026-03-02 09:25:41] (step=0032893) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.43572686362747, LR: 0.0003 +[2026-03-02 09:25:49] (step=0032894) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 6.435922520054784, LR: 0.0003 +[2026-03-02 09:25:57] (step=0032895) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.436118176482098, LR: 0.0003 +[2026-03-02 09:26:05] (step=0032896) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.436313832909411, LR: 0.0003 +[2026-03-02 09:26:13] (step=0032897) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.436509489336725, LR: 0.0003 +[2026-03-02 09:26:21] (step=0032898) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.436705145764038, LR: 0.0003 +[2026-03-02 09:26:28] (step=0032899) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.436900802191352, LR: 0.0003 +[2026-03-02 09:26:36] (step=0032900) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.437096458618665, LR: 0.0003 +[2026-03-02 09:26:44] (step=0032901) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.437292115045979, LR: 0.0003 +[2026-03-02 09:26:52] (step=0032902) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.437487771473293, LR: 0.0003 +[2026-03-02 09:27:00] (step=0032903) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.437683427900606, LR: 0.0003 +[2026-03-02 09:27:08] (step=0032904) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.43787908432792, LR: 0.0003 +[2026-03-02 09:27:16] (step=0032905) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.4380747407552334, LR: 0.0003 +[2026-03-02 09:27:23] (step=0032906) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 6.4382703971825475, LR: 0.0003 +[2026-03-02 09:27:31] (step=0032907) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.4384660536098615, LR: 0.0003 +[2026-03-02 09:27:39] (step=0032908) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.438661710037175, LR: 0.0003 +[2026-03-02 09:27:47] (step=0032909) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.438857366464489, LR: 0.0003 +[2026-03-02 09:27:55] (step=0032910) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.439053022891802, LR: 0.0003 +[2026-03-02 09:28:03] (step=0032911) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.439248679319116, LR: 0.0003 +[2026-03-02 09:28:10] (step=0032912) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.439444335746429, LR: 0.0003 +[2026-03-02 09:28:18] (step=0032913) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 6.439639992173743, LR: 0.0003 +[2026-03-02 09:28:26] (step=0032914) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 6.439835648601057, LR: 0.0003 +[2026-03-02 09:28:34] (step=0032915) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 6.44003130502837, LR: 0.0003 +[2026-03-02 09:28:42] (step=0032916) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.440226961455684, LR: 0.0003 +[2026-03-02 09:28:50] (step=0032917) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.440422617882997, LR: 0.0003 +[2026-03-02 09:28:57] (step=0032918) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.440618274310311, LR: 0.0003 +[2026-03-02 09:29:05] (step=0032919) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.440813930737625, LR: 0.0003 +[2026-03-02 09:29:13] (step=0032920) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.441009587164938, LR: 0.0003 +[2026-03-02 09:29:21] (step=0032921) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.441205243592252, LR: 0.0003 +[2026-03-02 09:29:29] (step=0032922) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.441400900019565, LR: 0.0003 +[2026-03-02 09:29:37] (step=0032923) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.441596556446879, LR: 0.0003 +[2026-03-02 09:29:44] (step=0032924) Train Loss: 0.4272, Train Steps/Sec: 0.13, Epoch: 6.441792212874193, LR: 0.0003 +[2026-03-02 09:29:52] (step=0032925) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.4419878693015065, LR: 0.0003 +[2026-03-02 09:30:00] (step=0032926) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.4421835257288205, LR: 0.0003 +[2026-03-02 09:30:08] (step=0032927) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.442379182156134, LR: 0.0003 +[2026-03-02 09:30:16] (step=0032928) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.442574838583448, LR: 0.0003 +[2026-03-02 09:30:24] (step=0032929) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.442770495010761, LR: 0.0003 +[2026-03-02 09:30:32] (step=0032930) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 6.442966151438075, LR: 0.0003 +[2026-03-02 09:30:39] (step=0032931) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.443161807865389, LR: 0.0003 +[2026-03-02 09:30:47] (step=0032932) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.443357464292702, LR: 0.0003 +[2026-03-02 09:30:55] (step=0032933) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 6.443553120720016, LR: 0.0003 +[2026-03-02 09:31:03] (step=0032934) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.443748777147329, LR: 0.0003 +[2026-03-02 09:31:11] (step=0032935) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.443944433574643, LR: 0.0003 +[2026-03-02 09:31:19] (step=0032936) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 6.444140090001957, LR: 0.0003 +[2026-03-02 09:31:26] (step=0032937) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.44433574642927, LR: 0.0003 +[2026-03-02 09:31:34] (step=0032938) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.444531402856584, LR: 0.0003 +[2026-03-02 09:31:42] (step=0032939) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.444727059283897, LR: 0.0003 +[2026-03-02 09:31:50] (step=0032940) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.444922715711211, LR: 0.0003 +[2026-03-02 09:31:58] (step=0032941) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.445118372138524, LR: 0.0003 +[2026-03-02 09:32:06] (step=0032942) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 6.445314028565838, LR: 0.0003 +[2026-03-02 09:32:14] (step=0032943) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.445509684993152, LR: 0.0003 +[2026-03-02 09:32:21] (step=0032944) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.4457053414204655, LR: 0.0003 +[2026-03-02 09:32:29] (step=0032945) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.4459009978477795, LR: 0.0003 +[2026-03-02 09:32:37] (step=0032946) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.446096654275093, LR: 0.0003 +[2026-03-02 09:32:45] (step=0032947) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 6.446292310702407, LR: 0.0003 +[2026-03-02 09:32:53] (step=0032948) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.446487967129721, LR: 0.0003 +[2026-03-02 09:33:01] (step=0032949) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.446683623557034, LR: 0.0003 +[2026-03-02 09:33:08] (step=0032950) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.446879279984348, LR: 0.0003 +[2026-03-02 09:33:16] (step=0032951) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.447074936411661, LR: 0.0003 +[2026-03-02 09:33:24] (step=0032952) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 6.447270592838975, LR: 0.0003 +[2026-03-02 09:33:32] (step=0032953) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.447466249266288, LR: 0.0003 +[2026-03-02 09:33:40] (step=0032954) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 6.447661905693602, LR: 0.0003 +[2026-03-02 09:33:48] (step=0032955) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.447857562120916, LR: 0.0003 +[2026-03-02 09:33:56] (step=0032956) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.448053218548229, LR: 0.0003 +[2026-03-02 09:34:03] (step=0032957) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 6.448248874975543, LR: 0.0003 +[2026-03-02 09:34:11] (step=0032958) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 6.448444531402856, LR: 0.0003 +[2026-03-02 09:34:19] (step=0032959) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.44864018783017, LR: 0.0003 +[2026-03-02 09:34:27] (step=0032960) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 6.448835844257484, LR: 0.0003 +[2026-03-02 09:34:35] (step=0032961) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 6.449031500684797, LR: 0.0003 +[2026-03-02 09:34:43] (step=0032962) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.449227157112111, LR: 0.0003 +[2026-03-02 09:34:50] (step=0032963) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 6.4494228135394245, LR: 0.0003 +[2026-03-02 09:34:58] (step=0032964) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 6.4496184699667385, LR: 0.0003 +[2026-03-02 09:35:06] (step=0032965) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.449814126394052, LR: 0.0003 +[2026-03-02 09:35:14] (step=0032966) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.450009782821366, LR: 0.0003 +[2026-03-02 09:35:22] (step=0032967) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.45020543924868, LR: 0.0003 +[2026-03-02 09:35:30] (step=0032968) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.450401095675993, LR: 0.0003 +[2026-03-02 09:35:37] (step=0032969) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.450596752103307, LR: 0.0003 +[2026-03-02 09:35:45] (step=0032970) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.45079240853062, LR: 0.0003 +[2026-03-02 09:35:53] (step=0032971) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.450988064957934, LR: 0.0003 +[2026-03-02 09:36:01] (step=0032972) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.451183721385248, LR: 0.0003 +[2026-03-02 09:36:09] (step=0032973) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.451379377812561, LR: 0.0003 +[2026-03-02 09:36:17] (step=0032974) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 6.451575034239875, LR: 0.0003 +[2026-03-02 09:36:25] (step=0032975) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.451770690667188, LR: 0.0003 +[2026-03-02 09:36:32] (step=0032976) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.451966347094502, LR: 0.0003 +[2026-03-02 09:36:40] (step=0032977) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.452162003521816, LR: 0.0003 +[2026-03-02 09:36:48] (step=0032978) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.452357659949129, LR: 0.0003 +[2026-03-02 09:36:56] (step=0032979) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.452553316376443, LR: 0.0003 +[2026-03-02 09:37:04] (step=0032980) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 6.452748972803756, LR: 0.0003 +[2026-03-02 09:37:12] (step=0032981) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.45294462923107, LR: 0.0003 +[2026-03-02 09:37:19] (step=0032982) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 6.4531402856583835, LR: 0.0003 +[2026-03-02 09:37:27] (step=0032983) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.4533359420856975, LR: 0.0003 +[2026-03-02 09:37:35] (step=0032984) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.4535315985130115, LR: 0.0003 +[2026-03-02 09:37:43] (step=0032985) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 6.453727254940325, LR: 0.0003 +[2026-03-02 09:37:51] (step=0032986) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.453922911367639, LR: 0.0003 +[2026-03-02 09:37:59] (step=0032987) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.454118567794952, LR: 0.0003 +[2026-03-02 09:38:06] (step=0032988) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.454314224222266, LR: 0.0003 +[2026-03-02 09:38:14] (step=0032989) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.45450988064958, LR: 0.0003 +[2026-03-02 09:38:22] (step=0032990) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.454705537076893, LR: 0.0003 +[2026-03-02 09:38:30] (step=0032991) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.454901193504207, LR: 0.0003 +[2026-03-02 09:38:38] (step=0032992) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.45509684993152, LR: 0.0003 +[2026-03-02 09:38:46] (step=0032993) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.455292506358834, LR: 0.0003 +[2026-03-02 09:38:54] (step=0032994) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 6.455488162786147, LR: 0.0003 +[2026-03-02 09:39:01] (step=0032995) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.455683819213461, LR: 0.0003 +[2026-03-02 09:39:09] (step=0032996) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 6.455879475640775, LR: 0.0003 +[2026-03-02 09:39:17] (step=0032997) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.456075132068088, LR: 0.0003 +[2026-03-02 09:39:25] (step=0032998) Train Loss: 0.4227, Train Steps/Sec: 0.13, Epoch: 6.456270788495402, LR: 0.0003 +[2026-03-02 09:39:33] (step=0032999) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.456466444922715, LR: 0.0003 +[2026-03-02 09:39:41] (step=0033000) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.456662101350029, LR: 0.0003 +[2026-03-02 09:39:41] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0033000/ +[2026-03-02 09:39:49] (step=0033001) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.456857757777343, LR: 0.0003 +[2026-03-02 09:39:56] (step=0033002) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.4570534142046565, LR: 0.0003 +[2026-03-02 09:40:04] (step=0033003) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.4572490706319705, LR: 0.0003 +[2026-03-02 09:40:12] (step=0033004) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.457444727059284, LR: 0.0003 +[2026-03-02 09:40:20] (step=0033005) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.457640383486598, LR: 0.0003 +[2026-03-02 09:40:28] (step=0033006) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 6.457836039913911, LR: 0.0003 +[2026-03-02 09:40:36] (step=0033007) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.458031696341225, LR: 0.0003 +[2026-03-02 09:40:43] (step=0033008) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 6.458227352768539, LR: 0.0003 +[2026-03-02 09:40:51] (step=0033009) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.458423009195852, LR: 0.0003 +[2026-03-02 09:40:59] (step=0033010) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 6.458618665623166, LR: 0.0003 +[2026-03-02 09:41:07] (step=0033011) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 6.458814322050479, LR: 0.0003 +[2026-03-02 09:41:15] (step=0033012) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.459009978477793, LR: 0.0003 +[2026-03-02 09:41:23] (step=0033013) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.459205634905107, LR: 0.0003 +[2026-03-02 09:41:30] (step=0033014) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 6.45940129133242, LR: 0.0003 +[2026-03-02 09:41:38] (step=0033015) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.459596947759734, LR: 0.0003 +[2026-03-02 09:41:46] (step=0033016) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 6.459792604187047, LR: 0.0003 +[2026-03-02 09:41:54] (step=0033017) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.459988260614361, LR: 0.0003 +[2026-03-02 09:42:02] (step=0033018) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.460183917041674, LR: 0.0003 +[2026-03-02 09:42:10] (step=0033019) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.460379573468988, LR: 0.0003 +[2026-03-02 09:42:18] (step=0033020) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 6.460575229896302, LR: 0.0003 +[2026-03-02 09:42:26] (step=0033021) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.4607708863236155, LR: 0.0003 +[2026-03-02 09:42:33] (step=0033022) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.4609665427509295, LR: 0.0003 +[2026-03-02 09:42:41] (step=0033023) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.461162199178243, LR: 0.0003 +[2026-03-02 09:42:49] (step=0033024) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.461357855605557, LR: 0.0003 +[2026-03-02 09:42:57] (step=0033025) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.461553512032871, LR: 0.0003 +[2026-03-02 09:43:05] (step=0033026) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.461749168460184, LR: 0.0003 +[2026-03-02 09:43:13] (step=0033027) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.461944824887498, LR: 0.0003 +[2026-03-02 09:43:20] (step=0033028) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 6.462140481314811, LR: 0.0003 +[2026-03-02 09:43:28] (step=0033029) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.462336137742125, LR: 0.0003 +[2026-03-02 09:43:36] (step=0033030) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.462531794169439, LR: 0.0003 +[2026-03-02 09:43:44] (step=0033031) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 6.462727450596752, LR: 0.0003 +[2026-03-02 09:43:52] (step=0033032) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.462923107024066, LR: 0.0003 +[2026-03-02 09:44:00] (step=0033033) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 6.463118763451379, LR: 0.0003 +[2026-03-02 09:44:07] (step=0033034) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.463314419878693, LR: 0.0003 +[2026-03-02 09:44:15] (step=0033035) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.463510076306006, LR: 0.0003 +[2026-03-02 09:44:23] (step=0033036) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.46370573273332, LR: 0.0003 +[2026-03-02 09:44:31] (step=0033037) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.463901389160634, LR: 0.0003 +[2026-03-02 09:44:39] (step=0033038) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 6.464097045587947, LR: 0.0003 +[2026-03-02 09:44:47] (step=0033039) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.464292702015261, LR: 0.0003 +[2026-03-02 09:44:54] (step=0033040) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.4644883584425745, LR: 0.0003 +[2026-03-02 09:45:02] (step=0033041) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.4646840148698885, LR: 0.0003 +[2026-03-02 09:45:10] (step=0033042) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.4648796712972025, LR: 0.0003 +[2026-03-02 09:45:18] (step=0033043) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 6.465075327724516, LR: 0.0003 +[2026-03-02 09:45:26] (step=0033044) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.46527098415183, LR: 0.0003 +[2026-03-02 09:45:34] (step=0033045) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.465466640579143, LR: 0.0003 +[2026-03-02 09:45:41] (step=0033046) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.465662297006457, LR: 0.0003 +[2026-03-02 09:45:49] (step=0033047) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.46585795343377, LR: 0.0003 +[2026-03-02 09:45:57] (step=0033048) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.466053609861084, LR: 0.0003 +[2026-03-02 09:46:05] (step=0033049) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.466249266288398, LR: 0.0003 +[2026-03-02 09:46:13] (step=0033050) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.466444922715711, LR: 0.0003 +[2026-03-02 09:46:21] (step=0033051) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 6.466640579143025, LR: 0.0003 +[2026-03-02 09:46:29] (step=0033052) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.466836235570338, LR: 0.0003 +[2026-03-02 09:46:36] (step=0033053) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.467031891997652, LR: 0.0003 +[2026-03-02 09:46:44] (step=0033054) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.467227548424966, LR: 0.0003 +[2026-03-02 09:46:52] (step=0033055) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.467423204852279, LR: 0.0003 +[2026-03-02 09:47:00] (step=0033056) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.467618861279593, LR: 0.0003 +[2026-03-02 09:47:08] (step=0033057) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.467814517706906, LR: 0.0003 +[2026-03-02 09:47:16] (step=0033058) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.46801017413422, LR: 0.0003 +[2026-03-02 09:47:23] (step=0033059) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.4682058305615335, LR: 0.0003 +[2026-03-02 09:47:31] (step=0033060) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.4684014869888475, LR: 0.0003 +[2026-03-02 09:47:39] (step=0033061) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.4685971434161615, LR: 0.0003 +[2026-03-02 09:47:47] (step=0033062) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.468792799843475, LR: 0.0003 +[2026-03-02 09:47:55] (step=0033063) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.468988456270789, LR: 0.0003 +[2026-03-02 09:48:03] (step=0033064) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.469184112698102, LR: 0.0003 +[2026-03-02 09:48:10] (step=0033065) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 6.469379769125416, LR: 0.0003 +[2026-03-02 09:48:18] (step=0033066) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.46957542555273, LR: 0.0003 +[2026-03-02 09:48:26] (step=0033067) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.469771081980043, LR: 0.0003 +[2026-03-02 09:48:34] (step=0033068) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.469966738407357, LR: 0.0003 +[2026-03-02 09:48:42] (step=0033069) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.47016239483467, LR: 0.0003 +[2026-03-02 09:48:50] (step=0033070) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 6.470358051261984, LR: 0.0003 +[2026-03-02 09:48:58] (step=0033071) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 6.470553707689297, LR: 0.0003 +[2026-03-02 09:49:05] (step=0033072) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 6.470749364116611, LR: 0.0003 +[2026-03-02 09:49:13] (step=0033073) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.470945020543925, LR: 0.0003 +[2026-03-02 09:49:21] (step=0033074) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 6.471140676971238, LR: 0.0003 +[2026-03-02 09:49:29] (step=0033075) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.471336333398552, LR: 0.0003 +[2026-03-02 09:49:37] (step=0033076) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 6.471531989825865, LR: 0.0003 +[2026-03-02 09:49:45] (step=0033077) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 6.471727646253179, LR: 0.0003 +[2026-03-02 09:49:52] (step=0033078) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.471923302680493, LR: 0.0003 +[2026-03-02 09:50:00] (step=0033079) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.4721189591078065, LR: 0.0003 +[2026-03-02 09:50:08] (step=0033080) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 6.4723146155351206, LR: 0.0003 +[2026-03-02 09:50:16] (step=0033081) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.472510271962434, LR: 0.0003 +[2026-03-02 09:50:24] (step=0033082) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.472705928389748, LR: 0.0003 +[2026-03-02 09:50:32] (step=0033083) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.472901584817062, LR: 0.0003 +[2026-03-02 09:50:39] (step=0033084) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.473097241244375, LR: 0.0003 +[2026-03-02 09:50:47] (step=0033085) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.473292897671689, LR: 0.0003 +[2026-03-02 09:50:55] (step=0033086) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 6.473488554099002, LR: 0.0003 +[2026-03-02 09:51:03] (step=0033087) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.473684210526316, LR: 0.0003 +[2026-03-02 09:51:11] (step=0033088) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.473879866953629, LR: 0.0003 +[2026-03-02 09:51:19] (step=0033089) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 6.474075523380943, LR: 0.0003 +[2026-03-02 09:51:26] (step=0033090) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 6.474271179808257, LR: 0.0003 +[2026-03-02 09:51:34] (step=0033091) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.47446683623557, LR: 0.0003 +[2026-03-02 09:51:42] (step=0033092) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.474662492662884, LR: 0.0003 +[2026-03-02 09:51:50] (step=0033093) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.474858149090197, LR: 0.0003 +[2026-03-02 09:51:58] (step=0033094) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.475053805517511, LR: 0.0003 +[2026-03-02 09:52:06] (step=0033095) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.475249461944825, LR: 0.0003 +[2026-03-02 09:52:13] (step=0033096) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.475445118372138, LR: 0.0003 +[2026-03-02 09:52:21] (step=0033097) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.475640774799452, LR: 0.0003 +[2026-03-02 09:52:29] (step=0033098) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 6.4758364312267656, LR: 0.0003 +[2026-03-02 09:52:37] (step=0033099) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 6.47603208765408, LR: 0.0003 +[2026-03-02 09:52:45] (step=0033100) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.476227744081393, LR: 0.0003 +[2026-03-02 09:52:53] (step=0033101) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.476423400508707, LR: 0.0003 +[2026-03-02 09:53:01] (step=0033102) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.476619056936021, LR: 0.0003 +[2026-03-02 09:53:08] (step=0033103) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.476814713363334, LR: 0.0003 +[2026-03-02 09:53:16] (step=0033104) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.477010369790648, LR: 0.0003 +[2026-03-02 09:53:24] (step=0033105) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.477206026217961, LR: 0.0003 +[2026-03-02 09:53:32] (step=0033106) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.477401682645275, LR: 0.0003 +[2026-03-02 09:53:40] (step=0033107) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.477597339072589, LR: 0.0003 +[2026-03-02 09:53:48] (step=0033108) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.477792995499902, LR: 0.0003 +[2026-03-02 09:53:55] (step=0033109) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 6.477988651927216, LR: 0.0003 +[2026-03-02 09:54:03] (step=0033110) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.478184308354529, LR: 0.0003 +[2026-03-02 09:54:11] (step=0033111) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 6.478379964781843, LR: 0.0003 +[2026-03-02 09:54:19] (step=0033112) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.478575621209156, LR: 0.0003 +[2026-03-02 09:54:27] (step=0033113) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.47877127763647, LR: 0.0003 +[2026-03-02 09:54:35] (step=0033114) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.478966934063784, LR: 0.0003 +[2026-03-02 09:54:42] (step=0033115) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.479162590491097, LR: 0.0003 +[2026-03-02 09:54:50] (step=0033116) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.4793582469184114, LR: 0.0003 +[2026-03-02 09:54:58] (step=0033117) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.479553903345725, LR: 0.0003 +[2026-03-02 09:55:06] (step=0033118) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.479749559773039, LR: 0.0003 +[2026-03-02 09:55:14] (step=0033119) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.479945216200353, LR: 0.0003 +[2026-03-02 09:55:22] (step=0033120) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.480140872627666, LR: 0.0003 +[2026-03-02 09:55:29] (step=0033121) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.48033652905498, LR: 0.0003 +[2026-03-02 09:55:37] (step=0033122) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 6.480532185482293, LR: 0.0003 +[2026-03-02 09:55:45] (step=0033123) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.480727841909607, LR: 0.0003 +[2026-03-02 09:55:53] (step=0033124) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 6.48092349833692, LR: 0.0003 +[2026-03-02 09:56:01] (step=0033125) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.481119154764234, LR: 0.0003 +[2026-03-02 09:56:09] (step=0033126) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.481314811191548, LR: 0.0003 +[2026-03-02 09:56:16] (step=0033127) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 6.481510467618861, LR: 0.0003 +[2026-03-02 09:56:24] (step=0033128) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.481706124046175, LR: 0.0003 +[2026-03-02 09:56:32] (step=0033129) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 6.481901780473488, LR: 0.0003 +[2026-03-02 09:56:40] (step=0033130) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.482097436900802, LR: 0.0003 +[2026-03-02 09:56:48] (step=0033131) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.482293093328116, LR: 0.0003 +[2026-03-02 09:56:56] (step=0033132) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.482488749755429, LR: 0.0003 +[2026-03-02 09:57:03] (step=0033133) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 6.482684406182743, LR: 0.0003 +[2026-03-02 09:57:11] (step=0033134) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 6.4828800626100564, LR: 0.0003 +[2026-03-02 09:57:19] (step=0033135) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.4830757190373705, LR: 0.0003 +[2026-03-02 09:57:27] (step=0033136) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.483271375464684, LR: 0.0003 +[2026-03-02 09:57:35] (step=0033137) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.483467031891998, LR: 0.0003 +[2026-03-02 09:57:43] (step=0033138) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.483662688319312, LR: 0.0003 +[2026-03-02 09:57:50] (step=0033139) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.483858344746625, LR: 0.0003 +[2026-03-02 09:57:58] (step=0033140) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.484054001173939, LR: 0.0003 +[2026-03-02 09:58:06] (step=0033141) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.484249657601252, LR: 0.0003 +[2026-03-02 09:58:14] (step=0033142) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 6.484445314028566, LR: 0.0003 +[2026-03-02 09:58:22] (step=0033143) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.48464097045588, LR: 0.0003 +[2026-03-02 09:58:30] (step=0033144) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.484836626883193, LR: 0.0003 +[2026-03-02 09:58:37] (step=0033145) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.485032283310507, LR: 0.0003 +[2026-03-02 09:58:45] (step=0033146) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.48522793973782, LR: 0.0003 +[2026-03-02 09:58:53] (step=0033147) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.485423596165134, LR: 0.0003 +[2026-03-02 09:59:01] (step=0033148) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.485619252592448, LR: 0.0003 +[2026-03-02 09:59:09] (step=0033149) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 6.485814909019761, LR: 0.0003 +[2026-03-02 09:59:17] (step=0033150) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.486010565447075, LR: 0.0003 +[2026-03-02 09:59:24] (step=0033151) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 6.486206221874388, LR: 0.0003 +[2026-03-02 09:59:32] (step=0033152) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.486401878301702, LR: 0.0003 +[2026-03-02 09:59:40] (step=0033153) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.4865975347290155, LR: 0.0003 +[2026-03-02 09:59:48] (step=0033154) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.4867931911563295, LR: 0.0003 +[2026-03-02 09:59:56] (step=0033155) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 6.4869888475836435, LR: 0.0003 +[2026-03-02 10:00:04] (step=0033156) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 6.487184504010957, LR: 0.0003 +[2026-03-02 10:00:11] (step=0033157) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 6.487380160438271, LR: 0.0003 +[2026-03-02 10:00:19] (step=0033158) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.487575816865584, LR: 0.0003 +[2026-03-02 10:00:27] (step=0033159) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.487771473292898, LR: 0.0003 +[2026-03-02 10:00:35] (step=0033160) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 6.487967129720212, LR: 0.0003 +[2026-03-02 10:00:43] (step=0033161) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.488162786147525, LR: 0.0003 +[2026-03-02 10:00:51] (step=0033162) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 6.488358442574839, LR: 0.0003 +[2026-03-02 10:00:58] (step=0033163) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.488554099002152, LR: 0.0003 +[2026-03-02 10:01:06] (step=0033164) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.488749755429466, LR: 0.0003 +[2026-03-02 10:01:14] (step=0033165) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.488945411856779, LR: 0.0003 +[2026-03-02 10:01:22] (step=0033166) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.489141068284093, LR: 0.0003 +[2026-03-02 10:01:30] (step=0033167) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.489336724711407, LR: 0.0003 +[2026-03-02 10:01:38] (step=0033168) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.48953238113872, LR: 0.0003 +[2026-03-02 10:01:46] (step=0033169) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.489728037566034, LR: 0.0003 +[2026-03-02 10:01:54] (step=0033170) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.489923693993347, LR: 0.0003 +[2026-03-02 10:02:01] (step=0033171) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 6.490119350420661, LR: 0.0003 +[2026-03-02 10:02:09] (step=0033172) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.490315006847975, LR: 0.0003 +[2026-03-02 10:02:17] (step=0033173) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.4905106632752885, LR: 0.0003 +[2026-03-02 10:02:25] (step=0033174) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.4907063197026025, LR: 0.0003 +[2026-03-02 10:02:33] (step=0033175) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.490901976129916, LR: 0.0003 +[2026-03-02 10:02:41] (step=0033176) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.49109763255723, LR: 0.0003 +[2026-03-02 10:02:48] (step=0033177) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.491293288984543, LR: 0.0003 +[2026-03-02 10:02:56] (step=0033178) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.491488945411857, LR: 0.0003 +[2026-03-02 10:03:04] (step=0033179) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 6.491684601839171, LR: 0.0003 +[2026-03-02 10:03:12] (step=0033180) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.491880258266484, LR: 0.0003 +[2026-03-02 10:03:20] (step=0033181) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.492075914693798, LR: 0.0003 +[2026-03-02 10:03:28] (step=0033182) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 6.492271571121111, LR: 0.0003 +[2026-03-02 10:03:35] (step=0033183) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 6.492467227548425, LR: 0.0003 +[2026-03-02 10:03:43] (step=0033184) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.492662883975739, LR: 0.0003 +[2026-03-02 10:03:51] (step=0033185) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.492858540403052, LR: 0.0003 +[2026-03-02 10:03:59] (step=0033186) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.493054196830366, LR: 0.0003 +[2026-03-02 10:04:07] (step=0033187) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.493249853257679, LR: 0.0003 +[2026-03-02 10:04:15] (step=0033188) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.493445509684993, LR: 0.0003 +[2026-03-02 10:04:22] (step=0033189) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.493641166112306, LR: 0.0003 +[2026-03-02 10:04:30] (step=0033190) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.49383682253962, LR: 0.0003 +[2026-03-02 10:04:38] (step=0033191) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.494032478966934, LR: 0.0003 +[2026-03-02 10:04:46] (step=0033192) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.4942281353942475, LR: 0.0003 +[2026-03-02 10:04:54] (step=0033193) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.4944237918215615, LR: 0.0003 +[2026-03-02 10:05:02] (step=0033194) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.494619448248875, LR: 0.0003 +[2026-03-02 10:05:10] (step=0033195) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 6.494815104676189, LR: 0.0003 +[2026-03-02 10:05:17] (step=0033196) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.495010761103503, LR: 0.0003 +[2026-03-02 10:05:25] (step=0033197) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.495206417530816, LR: 0.0003 +[2026-03-02 10:05:33] (step=0033198) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 6.49540207395813, LR: 0.0003 +[2026-03-02 10:05:41] (step=0033199) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.495597730385443, LR: 0.0003 +[2026-03-02 10:05:49] (step=0033200) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.495793386812757, LR: 0.0003 +[2026-03-02 10:05:57] (step=0033201) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.495989043240071, LR: 0.0003 +[2026-03-02 10:06:04] (step=0033202) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.496184699667384, LR: 0.0003 +[2026-03-02 10:06:12] (step=0033203) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 6.496380356094698, LR: 0.0003 +[2026-03-02 10:06:20] (step=0033204) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 6.496576012522011, LR: 0.0003 +[2026-03-02 10:06:28] (step=0033205) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.496771668949325, LR: 0.0003 +[2026-03-02 10:06:36] (step=0033206) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.496967325376638, LR: 0.0003 +[2026-03-02 10:06:44] (step=0033207) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 6.497162981803952, LR: 0.0003 +[2026-03-02 10:06:51] (step=0033208) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.497358638231266, LR: 0.0003 +[2026-03-02 10:06:59] (step=0033209) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.497554294658579, LR: 0.0003 +[2026-03-02 10:07:07] (step=0033210) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.497749951085893, LR: 0.0003 +[2026-03-02 10:07:15] (step=0033211) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.4979456075132065, LR: 0.0003 +[2026-03-02 10:07:23] (step=0033212) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.4981412639405205, LR: 0.0003 +[2026-03-02 10:07:31] (step=0033213) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.4983369203678345, LR: 0.0003 +[2026-03-02 10:07:38] (step=0033214) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.498532576795148, LR: 0.0003 +[2026-03-02 10:07:46] (step=0033215) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.498728233222462, LR: 0.0003 +[2026-03-02 10:07:54] (step=0033216) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.498923889649775, LR: 0.0003 +[2026-03-02 10:08:02] (step=0033217) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.499119546077089, LR: 0.0003 +[2026-03-02 10:08:10] (step=0033218) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.499315202504402, LR: 0.0003 +[2026-03-02 10:08:18] (step=0033219) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.499510858931716, LR: 0.0003 +[2026-03-02 10:08:26] (step=0033220) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.49970651535903, LR: 0.0003 +[2026-03-02 10:08:33] (step=0033221) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.499902171786343, LR: 0.0003 +[2026-03-02 10:08:41] (step=0033222) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.500097828213657, LR: 0.0003 +[2026-03-02 10:08:49] (step=0033223) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.50029348464097, LR: 0.0003 +[2026-03-02 10:08:57] (step=0033224) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.500489141068284, LR: 0.0003 +[2026-03-02 10:09:05] (step=0033225) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.500684797495598, LR: 0.0003 +[2026-03-02 10:09:13] (step=0033226) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.500880453922911, LR: 0.0003 +[2026-03-02 10:09:20] (step=0033227) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.501076110350225, LR: 0.0003 +[2026-03-02 10:09:28] (step=0033228) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.501271766777538, LR: 0.0003 +[2026-03-02 10:09:36] (step=0033229) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.501467423204852, LR: 0.0003 +[2026-03-02 10:09:44] (step=0033230) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 6.5016630796321655, LR: 0.0003 +[2026-03-02 10:09:52] (step=0033231) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 6.5018587360594795, LR: 0.0003 +[2026-03-02 10:10:00] (step=0033232) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 6.5020543924867935, LR: 0.0003 +[2026-03-02 10:10:07] (step=0033233) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.502250048914107, LR: 0.0003 +[2026-03-02 10:10:15] (step=0033234) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.502445705341421, LR: 0.0003 +[2026-03-02 10:10:23] (step=0033235) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.502641361768734, LR: 0.0003 +[2026-03-02 10:10:31] (step=0033236) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.502837018196048, LR: 0.0003 +[2026-03-02 10:10:39] (step=0033237) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.503032674623362, LR: 0.0003 +[2026-03-02 10:10:47] (step=0033238) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.503228331050675, LR: 0.0003 +[2026-03-02 10:10:54] (step=0033239) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.503423987477989, LR: 0.0003 +[2026-03-02 10:11:02] (step=0033240) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 6.503619643905302, LR: 0.0003 +[2026-03-02 10:11:10] (step=0033241) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.503815300332616, LR: 0.0003 +[2026-03-02 10:11:18] (step=0033242) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.504010956759929, LR: 0.0003 +[2026-03-02 10:11:26] (step=0033243) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.504206613187243, LR: 0.0003 +[2026-03-02 10:11:34] (step=0033244) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 6.504402269614557, LR: 0.0003 +[2026-03-02 10:11:42] (step=0033245) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 6.50459792604187, LR: 0.0003 +[2026-03-02 10:11:49] (step=0033246) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.504793582469184, LR: 0.0003 +[2026-03-02 10:11:57] (step=0033247) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.504989238896497, LR: 0.0003 +[2026-03-02 10:12:05] (step=0033248) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.505184895323811, LR: 0.0003 +[2026-03-02 10:12:13] (step=0033249) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.505380551751125, LR: 0.0003 +[2026-03-02 10:12:21] (step=0033250) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.5055762081784385, LR: 0.0003 +[2026-03-02 10:12:29] (step=0033251) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.5057718646057525, LR: 0.0003 +[2026-03-02 10:12:36] (step=0033252) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.505967521033066, LR: 0.0003 +[2026-03-02 10:12:44] (step=0033253) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 6.50616317746038, LR: 0.0003 +[2026-03-02 10:12:52] (step=0033254) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.506358833887694, LR: 0.0003 +[2026-03-02 10:13:00] (step=0033255) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.506554490315007, LR: 0.0003 +[2026-03-02 10:13:08] (step=0033256) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.506750146742321, LR: 0.0003 +[2026-03-02 10:13:16] (step=0033257) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 6.506945803169634, LR: 0.0003 +[2026-03-02 10:13:23] (step=0033258) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.507141459596948, LR: 0.0003 +[2026-03-02 10:13:31] (step=0033259) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.507337116024261, LR: 0.0003 +[2026-03-02 10:13:39] (step=0033260) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.507532772451575, LR: 0.0003 +[2026-03-02 10:13:47] (step=0033261) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 6.507728428878889, LR: 0.0003 +[2026-03-02 10:13:55] (step=0033262) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.507924085306202, LR: 0.0003 +[2026-03-02 10:14:03] (step=0033263) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.508119741733516, LR: 0.0003 +[2026-03-02 10:14:10] (step=0033264) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.508315398160829, LR: 0.0003 +[2026-03-02 10:14:18] (step=0033265) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 6.508511054588143, LR: 0.0003 +[2026-03-02 10:14:26] (step=0033266) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 6.508706711015457, LR: 0.0003 +[2026-03-02 10:14:34] (step=0033267) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.50890236744277, LR: 0.0003 +[2026-03-02 10:14:42] (step=0033268) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.509098023870084, LR: 0.0003 +[2026-03-02 10:14:50] (step=0033269) Train Loss: 0.4361, Train Steps/Sec: 0.12, Epoch: 6.5092936802973975, LR: 0.0003 +[2026-03-02 10:14:58] (step=0033270) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.5094893367247115, LR: 0.0003 +[2026-03-02 10:15:06] (step=0033271) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.509684993152025, LR: 0.0003 +[2026-03-02 10:15:13] (step=0033272) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.509880649579339, LR: 0.0003 +[2026-03-02 10:15:21] (step=0033273) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.510076306006653, LR: 0.0003 +[2026-03-02 10:15:29] (step=0033274) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.510271962433966, LR: 0.0003 +[2026-03-02 10:15:37] (step=0033275) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 6.51046761886128, LR: 0.0003 +[2026-03-02 10:15:45] (step=0033276) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.510663275288593, LR: 0.0003 +[2026-03-02 10:15:53] (step=0033277) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.510858931715907, LR: 0.0003 +[2026-03-02 10:16:00] (step=0033278) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.511054588143221, LR: 0.0003 +[2026-03-02 10:16:08] (step=0033279) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.511250244570534, LR: 0.0003 +[2026-03-02 10:16:16] (step=0033280) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.511445900997848, LR: 0.0003 +[2026-03-02 10:16:24] (step=0033281) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.511641557425161, LR: 0.0003 +[2026-03-02 10:16:32] (step=0033282) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.511837213852475, LR: 0.0003 +[2026-03-02 10:16:40] (step=0033283) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.512032870279788, LR: 0.0003 +[2026-03-02 10:16:47] (step=0033284) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 6.512228526707102, LR: 0.0003 +[2026-03-02 10:16:55] (step=0033285) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.512424183134416, LR: 0.0003 +[2026-03-02 10:17:03] (step=0033286) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 6.512619839561729, LR: 0.0003 +[2026-03-02 10:17:11] (step=0033287) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.512815495989043, LR: 0.0003 +[2026-03-02 10:17:19] (step=0033288) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 6.5130111524163565, LR: 0.0003 +[2026-03-02 10:17:27] (step=0033289) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.5132068088436705, LR: 0.0003 +[2026-03-02 10:17:34] (step=0033290) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.5134024652709845, LR: 0.0003 +[2026-03-02 10:17:42] (step=0033291) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.513598121698298, LR: 0.0003 +[2026-03-02 10:17:50] (step=0033292) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.513793778125612, LR: 0.0003 +[2026-03-02 10:17:58] (step=0033293) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.513989434552925, LR: 0.0003 +[2026-03-02 10:18:06] (step=0033294) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.514185090980239, LR: 0.0003 +[2026-03-02 10:18:14] (step=0033295) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.514380747407552, LR: 0.0003 +[2026-03-02 10:18:22] (step=0033296) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.514576403834866, LR: 0.0003 +[2026-03-02 10:18:29] (step=0033297) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.51477206026218, LR: 0.0003 +[2026-03-02 10:18:37] (step=0033298) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.514967716689493, LR: 0.0003 +[2026-03-02 10:18:45] (step=0033299) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 6.515163373116807, LR: 0.0003 +[2026-03-02 10:18:53] (step=0033300) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.51535902954412, LR: 0.0003 +[2026-03-02 10:19:01] (step=0033301) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.515554685971434, LR: 0.0003 +[2026-03-02 10:19:09] (step=0033302) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.515750342398748, LR: 0.0003 +[2026-03-02 10:19:16] (step=0033303) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.515945998826061, LR: 0.0003 +[2026-03-02 10:19:24] (step=0033304) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.516141655253375, LR: 0.0003 +[2026-03-02 10:19:32] (step=0033305) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 6.516337311680688, LR: 0.0003 +[2026-03-02 10:19:40] (step=0033306) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 6.516532968108002, LR: 0.0003 +[2026-03-02 10:19:48] (step=0033307) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 6.516728624535316, LR: 0.0003 +[2026-03-02 10:19:56] (step=0033308) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.5169242809626295, LR: 0.0003 +[2026-03-02 10:20:04] (step=0033309) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.5171199373899436, LR: 0.0003 +[2026-03-02 10:20:11] (step=0033310) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.517315593817257, LR: 0.0003 +[2026-03-02 10:20:19] (step=0033311) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.517511250244571, LR: 0.0003 +[2026-03-02 10:20:27] (step=0033312) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.517706906671884, LR: 0.0003 +[2026-03-02 10:20:35] (step=0033313) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 6.517902563099198, LR: 0.0003 +[2026-03-02 10:20:43] (step=0033314) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.518098219526512, LR: 0.0003 +[2026-03-02 10:20:51] (step=0033315) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.518293875953825, LR: 0.0003 +[2026-03-02 10:20:59] (step=0033316) Train Loss: 0.4447, Train Steps/Sec: 0.12, Epoch: 6.518489532381139, LR: 0.0003 +[2026-03-02 10:21:06] (step=0033317) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.518685188808452, LR: 0.0003 +[2026-03-02 10:21:14] (step=0033318) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 6.518880845235766, LR: 0.0003 +[2026-03-02 10:21:22] (step=0033319) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.51907650166308, LR: 0.0003 +[2026-03-02 10:21:30] (step=0033320) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.519272158090393, LR: 0.0003 +[2026-03-02 10:21:38] (step=0033321) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.519467814517707, LR: 0.0003 +[2026-03-02 10:21:46] (step=0033322) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 6.51966347094502, LR: 0.0003 +[2026-03-02 10:21:53] (step=0033323) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 6.519859127372334, LR: 0.0003 +[2026-03-02 10:22:01] (step=0033324) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 6.520054783799647, LR: 0.0003 +[2026-03-02 10:22:09] (step=0033325) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.520250440226961, LR: 0.0003 +[2026-03-02 10:22:17] (step=0033326) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.520446096654275, LR: 0.0003 +[2026-03-02 10:22:25] (step=0033327) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 6.5206417530815886, LR: 0.0003 +[2026-03-02 10:22:33] (step=0033328) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.520837409508903, LR: 0.0003 +[2026-03-02 10:22:41] (step=0033329) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 6.521033065936216, LR: 0.0003 +[2026-03-02 10:22:48] (step=0033330) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.52122872236353, LR: 0.0003 +[2026-03-02 10:22:56] (step=0033331) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 6.521424378790844, LR: 0.0003 +[2026-03-02 10:23:04] (step=0033332) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.521620035218157, LR: 0.0003 +[2026-03-02 10:23:12] (step=0033333) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.521815691645471, LR: 0.0003 +[2026-03-02 10:23:20] (step=0033334) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.522011348072784, LR: 0.0003 +[2026-03-02 10:23:28] (step=0033335) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.522207004500098, LR: 0.0003 +[2026-03-02 10:23:35] (step=0033336) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 6.522402660927411, LR: 0.0003 +[2026-03-02 10:23:43] (step=0033337) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 6.522598317354725, LR: 0.0003 +[2026-03-02 10:23:51] (step=0033338) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.522793973782039, LR: 0.0003 +[2026-03-02 10:23:59] (step=0033339) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.522989630209352, LR: 0.0003 +[2026-03-02 10:24:07] (step=0033340) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.523185286636666, LR: 0.0003 +[2026-03-02 10:24:15] (step=0033341) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.523380943063979, LR: 0.0003 +[2026-03-02 10:24:22] (step=0033342) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.523576599491293, LR: 0.0003 +[2026-03-02 10:24:30] (step=0033343) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 6.523772255918607, LR: 0.0003 +[2026-03-02 10:24:38] (step=0033344) Train Loss: 0.4276, Train Steps/Sec: 0.13, Epoch: 6.52396791234592, LR: 0.0003 +[2026-03-02 10:24:46] (step=0033345) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.5241635687732344, LR: 0.0003 +[2026-03-02 10:24:54] (step=0033346) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 6.524359225200548, LR: 0.0003 +[2026-03-02 10:25:02] (step=0033347) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.524554881627862, LR: 0.0003 +[2026-03-02 10:25:10] (step=0033348) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.524750538055175, LR: 0.0003 +[2026-03-02 10:25:17] (step=0033349) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 6.524946194482489, LR: 0.0003 +[2026-03-02 10:25:25] (step=0033350) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 6.525141850909803, LR: 0.0003 +[2026-03-02 10:25:33] (step=0033351) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.525337507337116, LR: 0.0003 +[2026-03-02 10:25:41] (step=0033352) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.52553316376443, LR: 0.0003 +[2026-03-02 10:25:49] (step=0033353) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.525728820191743, LR: 0.0003 +[2026-03-02 10:25:57] (step=0033354) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.525924476619057, LR: 0.0003 +[2026-03-02 10:26:05] (step=0033355) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.526120133046371, LR: 0.0003 +[2026-03-02 10:26:12] (step=0033356) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.526315789473684, LR: 0.0003 +[2026-03-02 10:26:20] (step=0033357) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.526511445900998, LR: 0.0003 +[2026-03-02 10:26:28] (step=0033358) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.526707102328311, LR: 0.0003 +[2026-03-02 10:26:36] (step=0033359) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.526902758755625, LR: 0.0003 +[2026-03-02 10:26:44] (step=0033360) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 6.527098415182938, LR: 0.0003 +[2026-03-02 10:26:52] (step=0033361) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.527294071610252, LR: 0.0003 +[2026-03-02 10:26:59] (step=0033362) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.527489728037566, LR: 0.0003 +[2026-03-02 10:27:07] (step=0033363) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 6.5276853844648794, LR: 0.0003 +[2026-03-02 10:27:15] (step=0033364) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.5278810408921935, LR: 0.0003 +[2026-03-02 10:27:23] (step=0033365) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.528076697319507, LR: 0.0003 +[2026-03-02 10:27:31] (step=0033366) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.528272353746821, LR: 0.0003 +[2026-03-02 10:27:39] (step=0033367) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 6.528468010174135, LR: 0.0003 +[2026-03-02 10:27:47] (step=0033368) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.528663666601448, LR: 0.0003 +[2026-03-02 10:27:54] (step=0033369) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.528859323028762, LR: 0.0003 +[2026-03-02 10:28:02] (step=0033370) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.529054979456075, LR: 0.0003 +[2026-03-02 10:28:10] (step=0033371) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.529250635883389, LR: 0.0003 +[2026-03-02 10:28:18] (step=0033372) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.529446292310703, LR: 0.0003 +[2026-03-02 10:28:26] (step=0033373) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.529641948738016, LR: 0.0003 +[2026-03-02 10:28:34] (step=0033374) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.52983760516533, LR: 0.0003 +[2026-03-02 10:28:41] (step=0033375) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.530033261592643, LR: 0.0003 +[2026-03-02 10:28:49] (step=0033376) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.530228918019957, LR: 0.0003 +[2026-03-02 10:28:57] (step=0033377) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 6.53042457444727, LR: 0.0003 +[2026-03-02 10:29:05] (step=0033378) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.530620230874584, LR: 0.0003 +[2026-03-02 10:29:13] (step=0033379) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.530815887301898, LR: 0.0003 +[2026-03-02 10:29:21] (step=0033380) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.531011543729211, LR: 0.0003 +[2026-03-02 10:29:28] (step=0033381) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.531207200156525, LR: 0.0003 +[2026-03-02 10:29:36] (step=0033382) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.5314028565838385, LR: 0.0003 +[2026-03-02 10:29:44] (step=0033383) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.5315985130111525, LR: 0.0003 +[2026-03-02 10:29:52] (step=0033384) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.5317941694384665, LR: 0.0003 +[2026-03-02 10:30:00] (step=0033385) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.53198982586578, LR: 0.0003 +[2026-03-02 10:30:08] (step=0033386) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 6.532185482293094, LR: 0.0003 +[2026-03-02 10:30:15] (step=0033387) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.532381138720407, LR: 0.0003 +[2026-03-02 10:30:23] (step=0033388) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.532576795147721, LR: 0.0003 +[2026-03-02 10:30:31] (step=0033389) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.532772451575034, LR: 0.0003 +[2026-03-02 10:30:39] (step=0033390) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.532968108002348, LR: 0.0003 +[2026-03-02 10:30:47] (step=0033391) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.533163764429662, LR: 0.0003 +[2026-03-02 10:30:55] (step=0033392) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.533359420856975, LR: 0.0003 +[2026-03-02 10:31:02] (step=0033393) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.533555077284289, LR: 0.0003 +[2026-03-02 10:31:10] (step=0033394) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.533750733711602, LR: 0.0003 +[2026-03-02 10:31:18] (step=0033395) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.533946390138916, LR: 0.0003 +[2026-03-02 10:31:26] (step=0033396) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 6.53414204656623, LR: 0.0003 +[2026-03-02 10:31:34] (step=0033397) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.534337702993543, LR: 0.0003 +[2026-03-02 10:31:42] (step=0033398) Train Loss: 0.4493, Train Steps/Sec: 0.12, Epoch: 6.534533359420857, LR: 0.0003 +[2026-03-02 10:31:50] (step=0033399) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.53472901584817, LR: 0.0003 +[2026-03-02 10:31:58] (step=0033400) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.534924672275484, LR: 0.0003 +[2026-03-02 10:32:05] (step=0033401) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.5351203287027975, LR: 0.0003 +[2026-03-02 10:32:13] (step=0033402) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.5353159851301115, LR: 0.0003 +[2026-03-02 10:32:21] (step=0033403) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.5355116415574255, LR: 0.0003 +[2026-03-02 10:32:29] (step=0033404) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 6.535707297984739, LR: 0.0003 +[2026-03-02 10:32:37] (step=0033405) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 6.535902954412053, LR: 0.0003 +[2026-03-02 10:32:45] (step=0033406) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.536098610839366, LR: 0.0003 +[2026-03-02 10:32:52] (step=0033407) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.53629426726668, LR: 0.0003 +[2026-03-02 10:33:00] (step=0033408) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.536489923693994, LR: 0.0003 +[2026-03-02 10:33:08] (step=0033409) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.536685580121307, LR: 0.0003 +[2026-03-02 10:33:16] (step=0033410) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.536881236548621, LR: 0.0003 +[2026-03-02 10:33:24] (step=0033411) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.537076892975934, LR: 0.0003 +[2026-03-02 10:33:32] (step=0033412) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.537272549403248, LR: 0.0003 +[2026-03-02 10:33:40] (step=0033413) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.537468205830561, LR: 0.0003 +[2026-03-02 10:33:47] (step=0033414) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.537663862257875, LR: 0.0003 +[2026-03-02 10:33:55] (step=0033415) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.537859518685189, LR: 0.0003 +[2026-03-02 10:34:03] (step=0033416) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.538055175112502, LR: 0.0003 +[2026-03-02 10:34:11] (step=0033417) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.538250831539816, LR: 0.0003 +[2026-03-02 10:34:19] (step=0033418) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.538446487967129, LR: 0.0003 +[2026-03-02 10:34:27] (step=0033419) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 6.538642144394443, LR: 0.0003 +[2026-03-02 10:34:35] (step=0033420) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.538837800821757, LR: 0.0003 +[2026-03-02 10:34:42] (step=0033421) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.5390334572490705, LR: 0.0003 +[2026-03-02 10:34:50] (step=0033422) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.5392291136763845, LR: 0.0003 +[2026-03-02 10:34:58] (step=0033423) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.539424770103698, LR: 0.0003 +[2026-03-02 10:35:06] (step=0033424) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.539620426531012, LR: 0.0003 +[2026-03-02 10:35:14] (step=0033425) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.539816082958326, LR: 0.0003 +[2026-03-02 10:35:22] (step=0033426) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.540011739385639, LR: 0.0003 +[2026-03-02 10:35:29] (step=0033427) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.540207395812953, LR: 0.0003 +[2026-03-02 10:35:37] (step=0033428) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.540403052240266, LR: 0.0003 +[2026-03-02 10:35:45] (step=0033429) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.54059870866758, LR: 0.0003 +[2026-03-02 10:35:53] (step=0033430) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.540794365094893, LR: 0.0003 +[2026-03-02 10:36:01] (step=0033431) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.540990021522207, LR: 0.0003 +[2026-03-02 10:36:09] (step=0033432) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.541185677949521, LR: 0.0003 +[2026-03-02 10:36:16] (step=0033433) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 6.541381334376834, LR: 0.0003 +[2026-03-02 10:36:24] (step=0033434) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.541576990804148, LR: 0.0003 +[2026-03-02 10:36:32] (step=0033435) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 6.541772647231461, LR: 0.0003 +[2026-03-02 10:36:40] (step=0033436) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.541968303658775, LR: 0.0003 +[2026-03-02 10:36:48] (step=0033437) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.542163960086089, LR: 0.0003 +[2026-03-02 10:36:56] (step=0033438) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 6.542359616513402, LR: 0.0003 +[2026-03-02 10:37:03] (step=0033439) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.542555272940716, LR: 0.0003 +[2026-03-02 10:37:11] (step=0033440) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.5427509293680295, LR: 0.0003 +[2026-03-02 10:37:19] (step=0033441) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 6.5429465857953435, LR: 0.0003 +[2026-03-02 10:37:27] (step=0033442) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.543142242222657, LR: 0.0003 +[2026-03-02 10:37:35] (step=0033443) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.543337898649971, LR: 0.0003 +[2026-03-02 10:37:43] (step=0033444) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.543533555077285, LR: 0.0003 +[2026-03-02 10:37:51] (step=0033445) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.543729211504598, LR: 0.0003 +[2026-03-02 10:37:58] (step=0033446) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 6.543924867931912, LR: 0.0003 +[2026-03-02 10:38:06] (step=0033447) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 6.544120524359225, LR: 0.0003 +[2026-03-02 10:38:14] (step=0033448) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.544316180786539, LR: 0.0003 +[2026-03-02 10:38:22] (step=0033449) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.544511837213853, LR: 0.0003 +[2026-03-02 10:38:30] (step=0033450) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 6.544707493641166, LR: 0.0003 +[2026-03-02 10:38:38] (step=0033451) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.54490315006848, LR: 0.0003 +[2026-03-02 10:38:45] (step=0033452) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.545098806495793, LR: 0.0003 +[2026-03-02 10:38:53] (step=0033453) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.545294462923107, LR: 0.0003 +[2026-03-02 10:39:01] (step=0033454) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.54549011935042, LR: 0.0003 +[2026-03-02 10:39:09] (step=0033455) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.545685775777734, LR: 0.0003 +[2026-03-02 10:39:17] (step=0033456) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.545881432205048, LR: 0.0003 +[2026-03-02 10:39:25] (step=0033457) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.546077088632361, LR: 0.0003 +[2026-03-02 10:39:32] (step=0033458) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.546272745059675, LR: 0.0003 +[2026-03-02 10:39:40] (step=0033459) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.5464684014869885, LR: 0.0003 +[2026-03-02 10:39:48] (step=0033460) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 6.5466640579143025, LR: 0.0003 +[2026-03-02 10:39:56] (step=0033461) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.5468597143416165, LR: 0.0003 +[2026-03-02 10:40:04] (step=0033462) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.54705537076893, LR: 0.0003 +[2026-03-02 10:40:12] (step=0033463) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.547251027196244, LR: 0.0003 +[2026-03-02 10:40:19] (step=0033464) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.547446683623557, LR: 0.0003 +[2026-03-02 10:40:27] (step=0033465) Train Loss: 0.4665, Train Steps/Sec: 0.12, Epoch: 6.547642340050871, LR: 0.0003 +[2026-03-02 10:40:35] (step=0033466) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.547837996478184, LR: 0.0003 +[2026-03-02 10:40:43] (step=0033467) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.548033652905498, LR: 0.0003 +[2026-03-02 10:40:51] (step=0033468) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 6.548229309332812, LR: 0.0003 +[2026-03-02 10:40:59] (step=0033469) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.548424965760125, LR: 0.0003 +[2026-03-02 10:41:07] (step=0033470) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.548620622187439, LR: 0.0003 +[2026-03-02 10:41:14] (step=0033471) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.548816278614752, LR: 0.0003 +[2026-03-02 10:41:22] (step=0033472) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.549011935042066, LR: 0.0003 +[2026-03-02 10:41:30] (step=0033473) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.54920759146938, LR: 0.0003 +[2026-03-02 10:41:38] (step=0033474) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.549403247896693, LR: 0.0003 +[2026-03-02 10:41:46] (step=0033475) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 6.549598904324007, LR: 0.0003 +[2026-03-02 10:41:54] (step=0033476) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.54979456075132, LR: 0.0003 +[2026-03-02 10:42:01] (step=0033477) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.549990217178634, LR: 0.0003 +[2026-03-02 10:42:09] (step=0033478) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.550185873605948, LR: 0.0003 +[2026-03-02 10:42:17] (step=0033479) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.5503815300332615, LR: 0.0003 +[2026-03-02 10:42:25] (step=0033480) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.5505771864605755, LR: 0.0003 +[2026-03-02 10:42:33] (step=0033481) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.550772842887889, LR: 0.0003 +[2026-03-02 10:42:41] (step=0033482) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 6.550968499315203, LR: 0.0003 +[2026-03-02 10:42:49] (step=0033483) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.551164155742516, LR: 0.0003 +[2026-03-02 10:42:56] (step=0033484) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.55135981216983, LR: 0.0003 +[2026-03-02 10:43:04] (step=0033485) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 6.551555468597144, LR: 0.0003 +[2026-03-02 10:43:12] (step=0033486) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.551751125024457, LR: 0.0003 +[2026-03-02 10:43:20] (step=0033487) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.551946781451771, LR: 0.0003 +[2026-03-02 10:43:28] (step=0033488) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.552142437879084, LR: 0.0003 +[2026-03-02 10:43:36] (step=0033489) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 6.552338094306398, LR: 0.0003 +[2026-03-02 10:43:43] (step=0033490) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 6.552533750733712, LR: 0.0003 +[2026-03-02 10:43:51] (step=0033491) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.552729407161025, LR: 0.0003 +[2026-03-02 10:43:59] (step=0033492) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.552925063588339, LR: 0.0003 +[2026-03-02 10:44:07] (step=0033493) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 6.553120720015652, LR: 0.0003 +[2026-03-02 10:44:15] (step=0033494) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.553316376442966, LR: 0.0003 +[2026-03-02 10:44:23] (step=0033495) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 6.553512032870279, LR: 0.0003 +[2026-03-02 10:44:31] (step=0033496) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 6.553707689297593, LR: 0.0003 +[2026-03-02 10:44:38] (step=0033497) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.553903345724907, LR: 0.0003 +[2026-03-02 10:44:46] (step=0033498) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 6.5540990021522205, LR: 0.0003 +[2026-03-02 10:44:54] (step=0033499) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 6.5542946585795345, LR: 0.0003 +[2026-03-02 10:45:02] (step=0033500) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.554490315006848, LR: 0.0003 +[2026-03-02 10:45:02] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0033500/ +[2026-03-02 10:45:10] (step=0033501) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.554685971434162, LR: 0.0003 +[2026-03-02 10:45:18] (step=0033502) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 6.554881627861476, LR: 0.0003 +[2026-03-02 10:45:25] (step=0033503) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.555077284288789, LR: 0.0003 +[2026-03-02 10:45:33] (step=0033504) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 6.555272940716103, LR: 0.0003 +[2026-03-02 10:45:41] (step=0033505) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.555468597143416, LR: 0.0003 +[2026-03-02 10:45:49] (step=0033506) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.55566425357073, LR: 0.0003 +[2026-03-02 10:45:57] (step=0033507) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.555859909998043, LR: 0.0003 +[2026-03-02 10:46:05] (step=0033508) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 6.556055566425357, LR: 0.0003 +[2026-03-02 10:46:12] (step=0033509) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.556251222852671, LR: 0.0003 +[2026-03-02 10:46:20] (step=0033510) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.556446879279984, LR: 0.0003 +[2026-03-02 10:46:28] (step=0033511) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.556642535707298, LR: 0.0003 +[2026-03-02 10:46:36] (step=0033512) Train Loss: 0.4546, Train Steps/Sec: 0.12, Epoch: 6.556838192134611, LR: 0.0003 +[2026-03-02 10:46:44] (step=0033513) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.557033848561925, LR: 0.0003 +[2026-03-02 10:46:52] (step=0033514) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.557229504989239, LR: 0.0003 +[2026-03-02 10:47:00] (step=0033515) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.557425161416552, LR: 0.0003 +[2026-03-02 10:47:08] (step=0033516) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.557620817843866, LR: 0.0003 +[2026-03-02 10:47:15] (step=0033517) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.5578164742711795, LR: 0.0003 +[2026-03-02 10:47:23] (step=0033518) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.5580121306984935, LR: 0.0003 +[2026-03-02 10:47:31] (step=0033519) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.558207787125807, LR: 0.0003 +[2026-03-02 10:47:39] (step=0033520) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.558403443553121, LR: 0.0003 +[2026-03-02 10:47:47] (step=0033521) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.558599099980435, LR: 0.0003 +[2026-03-02 10:47:55] (step=0033522) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.558794756407748, LR: 0.0003 +[2026-03-02 10:48:02] (step=0033523) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.558990412835062, LR: 0.0003 +[2026-03-02 10:48:10] (step=0033524) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.559186069262375, LR: 0.0003 +[2026-03-02 10:48:18] (step=0033525) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 6.559381725689689, LR: 0.0003 +[2026-03-02 10:48:26] (step=0033526) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.559577382117003, LR: 0.0003 +[2026-03-02 10:48:34] (step=0033527) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.559773038544316, LR: 0.0003 +[2026-03-02 10:48:42] (step=0033528) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 6.55996869497163, LR: 0.0003 +[2026-03-02 10:48:49] (step=0033529) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.560164351398943, LR: 0.0003 +[2026-03-02 10:48:57] (step=0033530) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 6.560360007826257, LR: 0.0003 +[2026-03-02 10:49:05] (step=0033531) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.560555664253571, LR: 0.0003 +[2026-03-02 10:49:13] (step=0033532) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.560751320680884, LR: 0.0003 +[2026-03-02 10:49:21] (step=0033533) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.560946977108198, LR: 0.0003 +[2026-03-02 10:49:29] (step=0033534) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.561142633535511, LR: 0.0003 +[2026-03-02 10:49:36] (step=0033535) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.561338289962825, LR: 0.0003 +[2026-03-02 10:49:44] (step=0033536) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 6.5615339463901385, LR: 0.0003 +[2026-03-02 10:49:52] (step=0033537) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 6.5617296028174525, LR: 0.0003 +[2026-03-02 10:50:00] (step=0033538) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.5619252592447666, LR: 0.0003 +[2026-03-02 10:50:08] (step=0033539) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.56212091567208, LR: 0.0003 +[2026-03-02 10:50:16] (step=0033540) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.562316572099394, LR: 0.0003 +[2026-03-02 10:50:24] (step=0033541) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 6.562512228526707, LR: 0.0003 +[2026-03-02 10:50:31] (step=0033542) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 6.562707884954021, LR: 0.0003 +[2026-03-02 10:50:39] (step=0033543) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.562903541381335, LR: 0.0003 +[2026-03-02 10:50:47] (step=0033544) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.563099197808648, LR: 0.0003 +[2026-03-02 10:50:55] (step=0033545) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.563294854235962, LR: 0.0003 +[2026-03-02 10:51:03] (step=0033546) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.563490510663275, LR: 0.0003 +[2026-03-02 10:51:11] (step=0033547) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 6.563686167090589, LR: 0.0003 +[2026-03-02 10:51:18] (step=0033548) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.563881823517902, LR: 0.0003 +[2026-03-02 10:51:26] (step=0033549) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.564077479945216, LR: 0.0003 +[2026-03-02 10:51:34] (step=0033550) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.56427313637253, LR: 0.0003 +[2026-03-02 10:51:42] (step=0033551) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.564468792799843, LR: 0.0003 +[2026-03-02 10:51:50] (step=0033552) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 6.564664449227157, LR: 0.0003 +[2026-03-02 10:51:58] (step=0033553) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.56486010565447, LR: 0.0003 +[2026-03-02 10:52:05] (step=0033554) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 6.565055762081784, LR: 0.0003 +[2026-03-02 10:52:13] (step=0033555) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.565251418509098, LR: 0.0003 +[2026-03-02 10:52:21] (step=0033556) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.5654470749364116, LR: 0.0003 +[2026-03-02 10:52:29] (step=0033557) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.565642731363726, LR: 0.0003 +[2026-03-02 10:52:37] (step=0033558) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.565838387791039, LR: 0.0003 +[2026-03-02 10:52:45] (step=0033559) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.566034044218353, LR: 0.0003 +[2026-03-02 10:52:53] (step=0033560) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.566229700645666, LR: 0.0003 +[2026-03-02 10:53:00] (step=0033561) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.56642535707298, LR: 0.0003 +[2026-03-02 10:53:08] (step=0033562) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.566621013500294, LR: 0.0003 +[2026-03-02 10:53:16] (step=0033563) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 6.566816669927607, LR: 0.0003 +[2026-03-02 10:53:24] (step=0033564) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.567012326354921, LR: 0.0003 +[2026-03-02 10:53:32] (step=0033565) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 6.567207982782234, LR: 0.0003 +[2026-03-02 10:53:40] (step=0033566) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.567403639209548, LR: 0.0003 +[2026-03-02 10:53:47] (step=0033567) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.567599295636862, LR: 0.0003 +[2026-03-02 10:53:55] (step=0033568) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.567794952064175, LR: 0.0003 +[2026-03-02 10:54:03] (step=0033569) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.567990608491489, LR: 0.0003 +[2026-03-02 10:54:11] (step=0033570) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.568186264918802, LR: 0.0003 +[2026-03-02 10:54:19] (step=0033571) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.568381921346116, LR: 0.0003 +[2026-03-02 10:54:27] (step=0033572) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.568577577773429, LR: 0.0003 +[2026-03-02 10:54:35] (step=0033573) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 6.568773234200743, LR: 0.0003 +[2026-03-02 10:54:42] (step=0033574) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.5689688906280574, LR: 0.0003 +[2026-03-02 10:54:50] (step=0033575) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 6.569164547055371, LR: 0.0003 +[2026-03-02 10:54:58] (step=0033576) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.569360203482685, LR: 0.0003 +[2026-03-02 10:55:06] (step=0033577) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.569555859909998, LR: 0.0003 +[2026-03-02 10:55:14] (step=0033578) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 6.569751516337312, LR: 0.0003 +[2026-03-02 10:55:22] (step=0033579) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.569947172764626, LR: 0.0003 +[2026-03-02 10:55:29] (step=0033580) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 6.570142829191939, LR: 0.0003 +[2026-03-02 10:55:37] (step=0033581) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.570338485619253, LR: 0.0003 +[2026-03-02 10:55:45] (step=0033582) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.570534142046566, LR: 0.0003 +[2026-03-02 10:55:53] (step=0033583) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.57072979847388, LR: 0.0003 +[2026-03-02 10:56:01] (step=0033584) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.570925454901193, LR: 0.0003 +[2026-03-02 10:56:09] (step=0033585) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.571121111328507, LR: 0.0003 +[2026-03-02 10:56:16] (step=0033586) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.571316767755821, LR: 0.0003 +[2026-03-02 10:56:24] (step=0033587) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.571512424183134, LR: 0.0003 +[2026-03-02 10:56:32] (step=0033588) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.571708080610448, LR: 0.0003 +[2026-03-02 10:56:40] (step=0033589) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.571903737037761, LR: 0.0003 +[2026-03-02 10:56:48] (step=0033590) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.572099393465075, LR: 0.0003 +[2026-03-02 10:56:56] (step=0033591) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 6.572295049892389, LR: 0.0003 +[2026-03-02 10:57:04] (step=0033592) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.5724907063197024, LR: 0.0003 +[2026-03-02 10:57:11] (step=0033593) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 6.5726863627470165, LR: 0.0003 +[2026-03-02 10:57:19] (step=0033594) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.57288201917433, LR: 0.0003 +[2026-03-02 10:57:27] (step=0033595) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 6.573077675601644, LR: 0.0003 +[2026-03-02 10:57:35] (step=0033596) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.573273332028958, LR: 0.0003 +[2026-03-02 10:57:43] (step=0033597) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 6.573468988456271, LR: 0.0003 +[2026-03-02 10:57:51] (step=0033598) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.573664644883585, LR: 0.0003 +[2026-03-02 10:57:58] (step=0033599) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.573860301310898, LR: 0.0003 +[2026-03-02 10:58:06] (step=0033600) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.574055957738212, LR: 0.0003 +[2026-03-02 10:58:14] (step=0033601) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.574251614165525, LR: 0.0003 +[2026-03-02 10:58:22] (step=0033602) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.574447270592839, LR: 0.0003 +[2026-03-02 10:58:30] (step=0033603) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.574642927020153, LR: 0.0003 +[2026-03-02 10:58:38] (step=0033604) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 6.574838583447466, LR: 0.0003 +[2026-03-02 10:58:46] (step=0033605) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.57503423987478, LR: 0.0003 +[2026-03-02 10:58:54] (step=0033606) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 6.575229896302093, LR: 0.0003 +[2026-03-02 10:59:01] (step=0033607) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 6.575425552729407, LR: 0.0003 +[2026-03-02 10:59:09] (step=0033608) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.575621209156721, LR: 0.0003 +[2026-03-02 10:59:17] (step=0033609) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 6.575816865584034, LR: 0.0003 +[2026-03-02 10:59:25] (step=0033610) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.576012522011348, LR: 0.0003 +[2026-03-02 10:59:33] (step=0033611) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.5762081784386615, LR: 0.0003 +[2026-03-02 10:59:41] (step=0033612) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.5764038348659755, LR: 0.0003 +[2026-03-02 10:59:48] (step=0033613) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 6.576599491293289, LR: 0.0003 +[2026-03-02 10:59:56] (step=0033614) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.576795147720603, LR: 0.0003 +[2026-03-02 11:00:04] (step=0033615) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.576990804147917, LR: 0.0003 +[2026-03-02 11:00:12] (step=0033616) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.57718646057523, LR: 0.0003 +[2026-03-02 11:00:20] (step=0033617) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.577382117002544, LR: 0.0003 +[2026-03-02 11:00:28] (step=0033618) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.577577773429857, LR: 0.0003 +[2026-03-02 11:00:35] (step=0033619) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.577773429857171, LR: 0.0003 +[2026-03-02 11:00:43] (step=0033620) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.577969086284485, LR: 0.0003 +[2026-03-02 11:00:51] (step=0033621) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 6.578164742711798, LR: 0.0003 +[2026-03-02 11:00:59] (step=0033622) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.578360399139112, LR: 0.0003 +[2026-03-02 11:01:07] (step=0033623) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.578556055566425, LR: 0.0003 +[2026-03-02 11:01:15] (step=0033624) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.578751711993739, LR: 0.0003 +[2026-03-02 11:01:22] (step=0033625) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.578947368421052, LR: 0.0003 +[2026-03-02 11:01:30] (step=0033626) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.579143024848366, LR: 0.0003 +[2026-03-02 11:01:38] (step=0033627) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.57933868127568, LR: 0.0003 +[2026-03-02 11:01:46] (step=0033628) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.579534337702993, LR: 0.0003 +[2026-03-02 11:01:54] (step=0033629) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.579729994130307, LR: 0.0003 +[2026-03-02 11:02:02] (step=0033630) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.5799256505576205, LR: 0.0003 +[2026-03-02 11:02:10] (step=0033631) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 6.5801213069849345, LR: 0.0003 +[2026-03-02 11:02:17] (step=0033632) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.5803169634122485, LR: 0.0003 +[2026-03-02 11:02:25] (step=0033633) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.580512619839562, LR: 0.0003 +[2026-03-02 11:02:33] (step=0033634) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.580708276266876, LR: 0.0003 +[2026-03-02 11:02:41] (step=0033635) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 6.580903932694189, LR: 0.0003 +[2026-03-02 11:02:49] (step=0033636) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 6.581099589121503, LR: 0.0003 +[2026-03-02 11:02:57] (step=0033637) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.581295245548816, LR: 0.0003 +[2026-03-02 11:03:04] (step=0033638) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.58149090197613, LR: 0.0003 +[2026-03-02 11:03:12] (step=0033639) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.581686558403444, LR: 0.0003 +[2026-03-02 11:03:20] (step=0033640) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 6.581882214830757, LR: 0.0003 +[2026-03-02 11:03:28] (step=0033641) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.582077871258071, LR: 0.0003 +[2026-03-02 11:03:36] (step=0033642) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.582273527685384, LR: 0.0003 +[2026-03-02 11:03:44] (step=0033643) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.582469184112698, LR: 0.0003 +[2026-03-02 11:03:52] (step=0033644) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.582664840540012, LR: 0.0003 +[2026-03-02 11:03:59] (step=0033645) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.582860496967325, LR: 0.0003 +[2026-03-02 11:04:07] (step=0033646) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.583056153394639, LR: 0.0003 +[2026-03-02 11:04:15] (step=0033647) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.583251809821952, LR: 0.0003 +[2026-03-02 11:04:23] (step=0033648) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.583447466249266, LR: 0.0003 +[2026-03-02 11:04:31] (step=0033649) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.58364312267658, LR: 0.0003 +[2026-03-02 11:04:39] (step=0033650) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.5838387791038935, LR: 0.0003 +[2026-03-02 11:04:46] (step=0033651) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.5840344355312075, LR: 0.0003 +[2026-03-02 11:04:54] (step=0033652) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.584230091958521, LR: 0.0003 +[2026-03-02 11:05:02] (step=0033653) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.584425748385835, LR: 0.0003 +[2026-03-02 11:05:10] (step=0033654) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 6.584621404813148, LR: 0.0003 +[2026-03-02 11:05:18] (step=0033655) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.584817061240462, LR: 0.0003 +[2026-03-02 11:05:26] (step=0033656) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.585012717667776, LR: 0.0003 +[2026-03-02 11:05:34] (step=0033657) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.585208374095089, LR: 0.0003 +[2026-03-02 11:05:42] (step=0033658) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.585404030522403, LR: 0.0003 +[2026-03-02 11:05:49] (step=0033659) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.585599686949716, LR: 0.0003 +[2026-03-02 11:05:57] (step=0033660) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.58579534337703, LR: 0.0003 +[2026-03-02 11:06:05] (step=0033661) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.585990999804344, LR: 0.0003 +[2026-03-02 11:06:13] (step=0033662) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.586186656231657, LR: 0.0003 +[2026-03-02 11:06:21] (step=0033663) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.586382312658971, LR: 0.0003 +[2026-03-02 11:06:29] (step=0033664) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.586577969086284, LR: 0.0003 +[2026-03-02 11:06:36] (step=0033665) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.586773625513598, LR: 0.0003 +[2026-03-02 11:06:44] (step=0033666) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.586969281940911, LR: 0.0003 +[2026-03-02 11:06:52] (step=0033667) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.587164938368225, LR: 0.0003 +[2026-03-02 11:07:00] (step=0033668) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.587360594795539, LR: 0.0003 +[2026-03-02 11:07:08] (step=0033669) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.5875562512228525, LR: 0.0003 +[2026-03-02 11:07:16] (step=0033670) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.5877519076501665, LR: 0.0003 +[2026-03-02 11:07:23] (step=0033671) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.58794756407748, LR: 0.0003 +[2026-03-02 11:07:31] (step=0033672) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.588143220504794, LR: 0.0003 +[2026-03-02 11:07:39] (step=0033673) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.588338876932108, LR: 0.0003 +[2026-03-02 11:07:47] (step=0033674) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.588534533359421, LR: 0.0003 +[2026-03-02 11:07:55] (step=0033675) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 6.588730189786735, LR: 0.0003 +[2026-03-02 11:08:03] (step=0033676) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.588925846214048, LR: 0.0003 +[2026-03-02 11:08:10] (step=0033677) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.589121502641362, LR: 0.0003 +[2026-03-02 11:08:18] (step=0033678) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.589317159068675, LR: 0.0003 +[2026-03-02 11:08:26] (step=0033679) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.589512815495989, LR: 0.0003 +[2026-03-02 11:08:34] (step=0033680) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 6.589708471923303, LR: 0.0003 +[2026-03-02 11:08:42] (step=0033681) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.589904128350616, LR: 0.0003 +[2026-03-02 11:08:50] (step=0033682) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.59009978477793, LR: 0.0003 +[2026-03-02 11:08:57] (step=0033683) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.590295441205243, LR: 0.0003 +[2026-03-02 11:09:05] (step=0033684) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.590491097632557, LR: 0.0003 +[2026-03-02 11:09:13] (step=0033685) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.590686754059871, LR: 0.0003 +[2026-03-02 11:09:21] (step=0033686) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.590882410487184, LR: 0.0003 +[2026-03-02 11:09:29] (step=0033687) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.591078066914498, LR: 0.0003 +[2026-03-02 11:09:37] (step=0033688) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.5912737233418115, LR: 0.0003 +[2026-03-02 11:09:44] (step=0033689) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.5914693797691255, LR: 0.0003 +[2026-03-02 11:09:52] (step=0033690) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.591665036196439, LR: 0.0003 +[2026-03-02 11:10:00] (step=0033691) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.591860692623753, LR: 0.0003 +[2026-03-02 11:10:08] (step=0033692) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.592056349051067, LR: 0.0003 +[2026-03-02 11:10:16] (step=0033693) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.59225200547838, LR: 0.0003 +[2026-03-02 11:10:24] (step=0033694) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.592447661905694, LR: 0.0003 +[2026-03-02 11:10:32] (step=0033695) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 6.592643318333007, LR: 0.0003 +[2026-03-02 11:10:39] (step=0033696) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.592838974760321, LR: 0.0003 +[2026-03-02 11:10:47] (step=0033697) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.593034631187635, LR: 0.0003 +[2026-03-02 11:10:55] (step=0033698) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.593230287614948, LR: 0.0003 +[2026-03-02 11:11:03] (step=0033699) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.593425944042262, LR: 0.0003 +[2026-03-02 11:11:11] (step=0033700) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.593621600469575, LR: 0.0003 +[2026-03-02 11:11:19] (step=0033701) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.593817256896889, LR: 0.0003 +[2026-03-02 11:11:27] (step=0033702) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 6.594012913324203, LR: 0.0003 +[2026-03-02 11:11:34] (step=0033703) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.594208569751516, LR: 0.0003 +[2026-03-02 11:11:42] (step=0033704) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.59440422617883, LR: 0.0003 +[2026-03-02 11:11:50] (step=0033705) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.594599882606143, LR: 0.0003 +[2026-03-02 11:11:58] (step=0033706) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 6.594795539033457, LR: 0.0003 +[2026-03-02 11:12:06] (step=0033707) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.5949911954607705, LR: 0.0003 +[2026-03-02 11:12:14] (step=0033708) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 6.5951868518880845, LR: 0.0003 +[2026-03-02 11:12:21] (step=0033709) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.5953825083153985, LR: 0.0003 +[2026-03-02 11:12:29] (step=0033710) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.595578164742712, LR: 0.0003 +[2026-03-02 11:12:37] (step=0033711) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.595773821170026, LR: 0.0003 +[2026-03-02 11:12:45] (step=0033712) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.595969477597339, LR: 0.0003 +[2026-03-02 11:12:53] (step=0033713) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.596165134024653, LR: 0.0003 +[2026-03-02 11:13:01] (step=0033714) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.596360790451967, LR: 0.0003 +[2026-03-02 11:13:09] (step=0033715) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 6.59655644687928, LR: 0.0003 +[2026-03-02 11:13:16] (step=0033716) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.596752103306594, LR: 0.0003 +[2026-03-02 11:13:24] (step=0033717) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.596947759733907, LR: 0.0003 +[2026-03-02 11:13:32] (step=0033718) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 6.597143416161221, LR: 0.0003 +[2026-03-02 11:13:40] (step=0033719) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.597339072588534, LR: 0.0003 +[2026-03-02 11:13:48] (step=0033720) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.597534729015848, LR: 0.0003 +[2026-03-02 11:13:56] (step=0033721) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.597730385443162, LR: 0.0003 +[2026-03-02 11:14:03] (step=0033722) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.597926041870475, LR: 0.0003 +[2026-03-02 11:14:11] (step=0033723) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.598121698297789, LR: 0.0003 +[2026-03-02 11:14:19] (step=0033724) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.598317354725102, LR: 0.0003 +[2026-03-02 11:14:27] (step=0033725) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.598513011152416, LR: 0.0003 +[2026-03-02 11:14:35] (step=0033726) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.59870866757973, LR: 0.0003 +[2026-03-02 11:14:43] (step=0033727) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.5989043240070435, LR: 0.0003 +[2026-03-02 11:14:50] (step=0033728) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 6.5990999804343575, LR: 0.0003 +[2026-03-02 11:14:58] (step=0033729) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.599295636861671, LR: 0.0003 +[2026-03-02 11:15:06] (step=0033730) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.599491293288985, LR: 0.0003 +[2026-03-02 11:15:14] (step=0033731) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 6.599686949716298, LR: 0.0003 +[2026-03-02 11:15:22] (step=0033732) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.599882606143612, LR: 0.0003 +[2026-03-02 11:15:30] (step=0033733) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.600078262570926, LR: 0.0003 +[2026-03-02 11:15:37] (step=0033734) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.600273918998239, LR: 0.0003 +[2026-03-02 11:15:45] (step=0033735) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.600469575425553, LR: 0.0003 +[2026-03-02 11:15:53] (step=0033736) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 6.600665231852866, LR: 0.0003 +[2026-03-02 11:16:01] (step=0033737) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.60086088828018, LR: 0.0003 +[2026-03-02 11:16:09] (step=0033738) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.601056544707494, LR: 0.0003 +[2026-03-02 11:16:17] (step=0033739) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.601252201134807, LR: 0.0003 +[2026-03-02 11:16:24] (step=0033740) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.601447857562121, LR: 0.0003 +[2026-03-02 11:16:32] (step=0033741) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 6.601643513989434, LR: 0.0003 +[2026-03-02 11:16:40] (step=0033742) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.601839170416748, LR: 0.0003 +[2026-03-02 11:16:48] (step=0033743) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.602034826844061, LR: 0.0003 +[2026-03-02 11:16:56] (step=0033744) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.602230483271375, LR: 0.0003 +[2026-03-02 11:17:04] (step=0033745) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.602426139698689, LR: 0.0003 +[2026-03-02 11:17:12] (step=0033746) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.6026217961260025, LR: 0.0003 +[2026-03-02 11:17:20] (step=0033747) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.6028174525533165, LR: 0.0003 +[2026-03-02 11:17:27] (step=0033748) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 6.60301310898063, LR: 0.0003 +[2026-03-02 11:17:35] (step=0033749) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.603208765407944, LR: 0.0003 +[2026-03-02 11:17:43] (step=0033750) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.603404421835258, LR: 0.0003 +[2026-03-02 11:17:51] (step=0033751) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.603600078262571, LR: 0.0003 +[2026-03-02 11:17:59] (step=0033752) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.603795734689885, LR: 0.0003 +[2026-03-02 11:18:07] (step=0033753) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.603991391117198, LR: 0.0003 +[2026-03-02 11:18:14] (step=0033754) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.604187047544512, LR: 0.0003 +[2026-03-02 11:18:22] (step=0033755) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.604382703971825, LR: 0.0003 +[2026-03-02 11:18:30] (step=0033756) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.604578360399139, LR: 0.0003 +[2026-03-02 11:18:38] (step=0033757) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.604774016826453, LR: 0.0003 +[2026-03-02 11:18:46] (step=0033758) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.604969673253766, LR: 0.0003 +[2026-03-02 11:18:54] (step=0033759) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 6.60516532968108, LR: 0.0003 +[2026-03-02 11:19:02] (step=0033760) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.605360986108393, LR: 0.0003 +[2026-03-02 11:19:09] (step=0033761) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.605556642535707, LR: 0.0003 +[2026-03-02 11:19:17] (step=0033762) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 6.605752298963021, LR: 0.0003 +[2026-03-02 11:19:25] (step=0033763) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.605947955390334, LR: 0.0003 +[2026-03-02 11:19:33] (step=0033764) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.606143611817648, LR: 0.0003 +[2026-03-02 11:19:41] (step=0033765) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.6063392682449615, LR: 0.0003 +[2026-03-02 11:19:49] (step=0033766) Train Loss: 0.4769, Train Steps/Sec: 0.13, Epoch: 6.6065349246722755, LR: 0.0003 +[2026-03-02 11:19:56] (step=0033767) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.6067305810995895, LR: 0.0003 +[2026-03-02 11:20:04] (step=0033768) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.606926237526903, LR: 0.0003 +[2026-03-02 11:20:12] (step=0033769) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.607121893954217, LR: 0.0003 +[2026-03-02 11:20:20] (step=0033770) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.60731755038153, LR: 0.0003 +[2026-03-02 11:20:28] (step=0033771) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.607513206808844, LR: 0.0003 +[2026-03-02 11:20:36] (step=0033772) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.607708863236157, LR: 0.0003 +[2026-03-02 11:20:43] (step=0033773) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.607904519663471, LR: 0.0003 +[2026-03-02 11:20:51] (step=0033774) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 6.608100176090785, LR: 0.0003 +[2026-03-02 11:20:59] (step=0033775) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 6.608295832518098, LR: 0.0003 +[2026-03-02 11:21:07] (step=0033776) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.608491488945412, LR: 0.0003 +[2026-03-02 11:21:15] (step=0033777) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.608687145372725, LR: 0.0003 +[2026-03-02 11:21:23] (step=0033778) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 6.608882801800039, LR: 0.0003 +[2026-03-02 11:21:30] (step=0033779) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.609078458227353, LR: 0.0003 +[2026-03-02 11:21:38] (step=0033780) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.609274114654666, LR: 0.0003 +[2026-03-02 11:21:46] (step=0033781) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.60946977108198, LR: 0.0003 +[2026-03-02 11:21:54] (step=0033782) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 6.609665427509293, LR: 0.0003 +[2026-03-02 11:22:02] (step=0033783) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.609861083936607, LR: 0.0003 +[2026-03-02 11:22:10] (step=0033784) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 6.6100567403639205, LR: 0.0003 +[2026-03-02 11:22:18] (step=0033785) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.6102523967912346, LR: 0.0003 +[2026-03-02 11:22:25] (step=0033786) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.610448053218549, LR: 0.0003 +[2026-03-02 11:22:33] (step=0033787) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.610643709645862, LR: 0.0003 +[2026-03-02 11:22:41] (step=0033788) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.610839366073176, LR: 0.0003 +[2026-03-02 11:22:49] (step=0033789) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 6.611035022500489, LR: 0.0003 +[2026-03-02 11:22:57] (step=0033790) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.611230678927803, LR: 0.0003 +[2026-03-02 11:23:05] (step=0033791) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.611426335355117, LR: 0.0003 +[2026-03-02 11:23:13] (step=0033792) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 6.61162199178243, LR: 0.0003 +[2026-03-02 11:23:20] (step=0033793) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.611817648209744, LR: 0.0003 +[2026-03-02 11:23:28] (step=0033794) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.612013304637057, LR: 0.0003 +[2026-03-02 11:23:36] (step=0033795) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.612208961064371, LR: 0.0003 +[2026-03-02 11:23:44] (step=0033796) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.612404617491684, LR: 0.0003 +[2026-03-02 11:23:52] (step=0033797) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 6.612600273918998, LR: 0.0003 +[2026-03-02 11:24:00] (step=0033798) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.612795930346312, LR: 0.0003 +[2026-03-02 11:24:08] (step=0033799) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.612991586773625, LR: 0.0003 +[2026-03-02 11:24:16] (step=0033800) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.613187243200939, LR: 0.0003 +[2026-03-02 11:24:23] (step=0033801) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 6.613382899628252, LR: 0.0003 +[2026-03-02 11:24:31] (step=0033802) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.613578556055566, LR: 0.0003 +[2026-03-02 11:24:39] (step=0033803) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.61377421248288, LR: 0.0003 +[2026-03-02 11:24:47] (step=0033804) Train Loss: 0.4249, Train Steps/Sec: 0.13, Epoch: 6.613969868910194, LR: 0.0003 +[2026-03-02 11:24:55] (step=0033805) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.614165525337508, LR: 0.0003 +[2026-03-02 11:25:03] (step=0033806) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.614361181764821, LR: 0.0003 +[2026-03-02 11:25:11] (step=0033807) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.614556838192135, LR: 0.0003 +[2026-03-02 11:25:18] (step=0033808) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.614752494619448, LR: 0.0003 +[2026-03-02 11:25:26] (step=0033809) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.614948151046762, LR: 0.0003 +[2026-03-02 11:25:34] (step=0033810) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.615143807474076, LR: 0.0003 +[2026-03-02 11:25:42] (step=0033811) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 6.615339463901389, LR: 0.0003 +[2026-03-02 11:25:50] (step=0033812) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.615535120328703, LR: 0.0003 +[2026-03-02 11:25:58] (step=0033813) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.615730776756016, LR: 0.0003 +[2026-03-02 11:26:05] (step=0033814) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.61592643318333, LR: 0.0003 +[2026-03-02 11:26:13] (step=0033815) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.616122089610644, LR: 0.0003 +[2026-03-02 11:26:21] (step=0033816) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 6.616317746037957, LR: 0.0003 +[2026-03-02 11:26:29] (step=0033817) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.616513402465271, LR: 0.0003 +[2026-03-02 11:26:37] (step=0033818) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.616709058892584, LR: 0.0003 +[2026-03-02 11:26:45] (step=0033819) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.616904715319898, LR: 0.0003 +[2026-03-02 11:26:52] (step=0033820) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.617100371747212, LR: 0.0003 +[2026-03-02 11:27:00] (step=0033821) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.6172960281745254, LR: 0.0003 +[2026-03-02 11:27:08] (step=0033822) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.6174916846018395, LR: 0.0003 +[2026-03-02 11:27:16] (step=0033823) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.617687341029153, LR: 0.0003 +[2026-03-02 11:27:24] (step=0033824) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.617882997456467, LR: 0.0003 +[2026-03-02 11:27:32] (step=0033825) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.61807865388378, LR: 0.0003 +[2026-03-02 11:27:40] (step=0033826) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.618274310311094, LR: 0.0003 +[2026-03-02 11:27:47] (step=0033827) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.618469966738408, LR: 0.0003 +[2026-03-02 11:27:55] (step=0033828) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 6.618665623165721, LR: 0.0003 +[2026-03-02 11:28:03] (step=0033829) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 6.618861279593035, LR: 0.0003 +[2026-03-02 11:28:11] (step=0033830) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.619056936020348, LR: 0.0003 +[2026-03-02 11:28:19] (step=0033831) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 6.619252592447662, LR: 0.0003 +[2026-03-02 11:28:27] (step=0033832) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.619448248874976, LR: 0.0003 +[2026-03-02 11:28:34] (step=0033833) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 6.619643905302289, LR: 0.0003 +[2026-03-02 11:28:42] (step=0033834) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.619839561729603, LR: 0.0003 +[2026-03-02 11:28:50] (step=0033835) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.620035218156916, LR: 0.0003 +[2026-03-02 11:28:58] (step=0033836) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.62023087458423, LR: 0.0003 +[2026-03-02 11:29:06] (step=0033837) Train Loss: 0.4251, Train Steps/Sec: 0.13, Epoch: 6.620426531011543, LR: 0.0003 +[2026-03-02 11:29:14] (step=0033838) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.620622187438857, LR: 0.0003 +[2026-03-02 11:29:22] (step=0033839) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.620817843866171, LR: 0.0003 +[2026-03-02 11:29:29] (step=0033840) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.6210135002934845, LR: 0.0003 +[2026-03-02 11:29:37] (step=0033841) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.6212091567207985, LR: 0.0003 +[2026-03-02 11:29:45] (step=0033842) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.621404813148112, LR: 0.0003 +[2026-03-02 11:29:53] (step=0033843) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 6.621600469575426, LR: 0.0003 +[2026-03-02 11:30:01] (step=0033844) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.62179612600274, LR: 0.0003 +[2026-03-02 11:30:09] (step=0033845) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.621991782430053, LR: 0.0003 +[2026-03-02 11:30:17] (step=0033846) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 6.622187438857367, LR: 0.0003 +[2026-03-02 11:30:24] (step=0033847) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 6.62238309528468, LR: 0.0003 +[2026-03-02 11:30:32] (step=0033848) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.622578751711994, LR: 0.0003 +[2026-03-02 11:30:40] (step=0033849) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.622774408139307, LR: 0.0003 +[2026-03-02 11:30:48] (step=0033850) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.622970064566621, LR: 0.0003 +[2026-03-02 11:30:56] (step=0033851) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.623165720993935, LR: 0.0003 +[2026-03-02 11:31:04] (step=0033852) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.623361377421248, LR: 0.0003 +[2026-03-02 11:31:11] (step=0033853) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 6.623557033848562, LR: 0.0003 +[2026-03-02 11:31:19] (step=0033854) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 6.623752690275875, LR: 0.0003 +[2026-03-02 11:31:27] (step=0033855) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 6.623948346703189, LR: 0.0003 +[2026-03-02 11:31:35] (step=0033856) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.624144003130503, LR: 0.0003 +[2026-03-02 11:31:43] (step=0033857) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.624339659557816, LR: 0.0003 +[2026-03-02 11:31:51] (step=0033858) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.62453531598513, LR: 0.0003 +[2026-03-02 11:31:58] (step=0033859) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 6.6247309724124435, LR: 0.0003 +[2026-03-02 11:32:06] (step=0033860) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.6249266288397575, LR: 0.0003 +[2026-03-02 11:32:14] (step=0033861) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.625122285267071, LR: 0.0003 +[2026-03-02 11:32:22] (step=0033862) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.625317941694385, LR: 0.0003 +[2026-03-02 11:32:30] (step=0033863) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.625513598121699, LR: 0.0003 +[2026-03-02 11:32:38] (step=0033864) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.625709254549012, LR: 0.0003 +[2026-03-02 11:32:46] (step=0033865) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.625904910976326, LR: 0.0003 +[2026-03-02 11:32:53] (step=0033866) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.626100567403639, LR: 0.0003 +[2026-03-02 11:33:01] (step=0033867) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.626296223830953, LR: 0.0003 +[2026-03-02 11:33:09] (step=0033868) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.626491880258267, LR: 0.0003 +[2026-03-02 11:33:17] (step=0033869) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.62668753668558, LR: 0.0003 +[2026-03-02 11:33:25] (step=0033870) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.626883193112894, LR: 0.0003 +[2026-03-02 11:33:33] (step=0033871) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.627078849540207, LR: 0.0003 +[2026-03-02 11:33:40] (step=0033872) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.627274505967521, LR: 0.0003 +[2026-03-02 11:33:48] (step=0033873) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.627470162394835, LR: 0.0003 +[2026-03-02 11:33:56] (step=0033874) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.627665818822148, LR: 0.0003 +[2026-03-02 11:34:04] (step=0033875) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 6.627861475249462, LR: 0.0003 +[2026-03-02 11:34:12] (step=0033876) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 6.628057131676775, LR: 0.0003 +[2026-03-02 11:34:20] (step=0033877) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 6.628252788104089, LR: 0.0003 +[2026-03-02 11:34:28] (step=0033878) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.6284484445314025, LR: 0.0003 +[2026-03-02 11:34:35] (step=0033879) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.6286441009587165, LR: 0.0003 +[2026-03-02 11:34:43] (step=0033880) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.6288397573860305, LR: 0.0003 +[2026-03-02 11:34:51] (step=0033881) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.629035413813344, LR: 0.0003 +[2026-03-02 11:34:59] (step=0033882) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.629231070240658, LR: 0.0003 +[2026-03-02 11:35:07] (step=0033883) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.629426726667971, LR: 0.0003 +[2026-03-02 11:35:15] (step=0033884) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.629622383095285, LR: 0.0003 +[2026-03-02 11:35:22] (step=0033885) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.629818039522599, LR: 0.0003 +[2026-03-02 11:35:30] (step=0033886) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.630013695949912, LR: 0.0003 +[2026-03-02 11:35:38] (step=0033887) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.630209352377226, LR: 0.0003 +[2026-03-02 11:35:46] (step=0033888) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.630405008804539, LR: 0.0003 +[2026-03-02 11:35:54] (step=0033889) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 6.630600665231853, LR: 0.0003 +[2026-03-02 11:36:02] (step=0033890) Train Loss: 0.4552, Train Steps/Sec: 0.12, Epoch: 6.630796321659166, LR: 0.0003 +[2026-03-02 11:36:10] (step=0033891) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.63099197808648, LR: 0.0003 +[2026-03-02 11:36:18] (step=0033892) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 6.631187634513794, LR: 0.0003 +[2026-03-02 11:36:25] (step=0033893) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.631383290941107, LR: 0.0003 +[2026-03-02 11:36:33] (step=0033894) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.631578947368421, LR: 0.0003 +[2026-03-02 11:36:41] (step=0033895) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.631774603795734, LR: 0.0003 +[2026-03-02 11:36:49] (step=0033896) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.631970260223048, LR: 0.0003 +[2026-03-02 11:36:57] (step=0033897) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.632165916650362, LR: 0.0003 +[2026-03-02 11:37:05] (step=0033898) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.6323615730776755, LR: 0.0003 +[2026-03-02 11:37:12] (step=0033899) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.6325572295049895, LR: 0.0003 +[2026-03-02 11:37:20] (step=0033900) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.632752885932303, LR: 0.0003 +[2026-03-02 11:37:28] (step=0033901) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.632948542359617, LR: 0.0003 +[2026-03-02 11:37:36] (step=0033902) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.63314419878693, LR: 0.0003 +[2026-03-02 11:37:44] (step=0033903) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.633339855214244, LR: 0.0003 +[2026-03-02 11:37:52] (step=0033904) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.633535511641558, LR: 0.0003 +[2026-03-02 11:37:59] (step=0033905) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.633731168068871, LR: 0.0003 +[2026-03-02 11:38:07] (step=0033906) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 6.633926824496185, LR: 0.0003 +[2026-03-02 11:38:15] (step=0033907) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 6.634122480923498, LR: 0.0003 +[2026-03-02 11:38:23] (step=0033908) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.634318137350812, LR: 0.0003 +[2026-03-02 11:38:31] (step=0033909) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.634513793778126, LR: 0.0003 +[2026-03-02 11:38:39] (step=0033910) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.634709450205439, LR: 0.0003 +[2026-03-02 11:38:46] (step=0033911) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.634905106632753, LR: 0.0003 +[2026-03-02 11:38:54] (step=0033912) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.635100763060066, LR: 0.0003 +[2026-03-02 11:39:02] (step=0033913) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 6.63529641948738, LR: 0.0003 +[2026-03-02 11:39:10] (step=0033914) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.635492075914693, LR: 0.0003 +[2026-03-02 11:39:18] (step=0033915) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.635687732342007, LR: 0.0003 +[2026-03-02 11:39:26] (step=0033916) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.635883388769321, LR: 0.0003 +[2026-03-02 11:39:34] (step=0033917) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 6.6360790451966345, LR: 0.0003 +[2026-03-02 11:39:41] (step=0033918) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 6.6362747016239485, LR: 0.0003 +[2026-03-02 11:39:49] (step=0033919) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.636470358051262, LR: 0.0003 +[2026-03-02 11:39:57] (step=0033920) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.636666014478576, LR: 0.0003 +[2026-03-02 11:40:05] (step=0033921) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.63686167090589, LR: 0.0003 +[2026-03-02 11:40:13] (step=0033922) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.637057327333203, LR: 0.0003 +[2026-03-02 11:40:21] (step=0033923) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 6.637252983760517, LR: 0.0003 +[2026-03-02 11:40:28] (step=0033924) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.63744864018783, LR: 0.0003 +[2026-03-02 11:40:36] (step=0033925) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 6.637644296615144, LR: 0.0003 +[2026-03-02 11:40:44] (step=0033926) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.637839953042458, LR: 0.0003 +[2026-03-02 11:40:52] (step=0033927) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 6.638035609469771, LR: 0.0003 +[2026-03-02 11:41:00] (step=0033928) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.638231265897085, LR: 0.0003 +[2026-03-02 11:41:08] (step=0033929) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.638426922324398, LR: 0.0003 +[2026-03-02 11:41:15] (step=0033930) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.638622578751712, LR: 0.0003 +[2026-03-02 11:41:23] (step=0033931) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.638818235179025, LR: 0.0003 +[2026-03-02 11:41:31] (step=0033932) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.639013891606339, LR: 0.0003 +[2026-03-02 11:41:39] (step=0033933) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.639209548033653, LR: 0.0003 +[2026-03-02 11:41:47] (step=0033934) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 6.639405204460966, LR: 0.0003 +[2026-03-02 11:41:55] (step=0033935) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 6.63960086088828, LR: 0.0003 +[2026-03-02 11:42:02] (step=0033936) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.6397965173155935, LR: 0.0003 +[2026-03-02 11:42:10] (step=0033937) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 6.6399921737429075, LR: 0.0003 +[2026-03-02 11:42:18] (step=0033938) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.6401878301702215, LR: 0.0003 +[2026-03-02 11:42:26] (step=0033939) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 6.640383486597535, LR: 0.0003 +[2026-03-02 11:42:34] (step=0033940) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 6.640579143024849, LR: 0.0003 +[2026-03-02 11:42:42] (step=0033941) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.640774799452162, LR: 0.0003 +[2026-03-02 11:42:49] (step=0033942) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.640970455879476, LR: 0.0003 +[2026-03-02 11:42:57] (step=0033943) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.641166112306789, LR: 0.0003 +[2026-03-02 11:43:05] (step=0033944) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 6.641361768734103, LR: 0.0003 +[2026-03-02 11:43:13] (step=0033945) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.641557425161417, LR: 0.0003 +[2026-03-02 11:43:21] (step=0033946) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 6.64175308158873, LR: 0.0003 +[2026-03-02 11:43:29] (step=0033947) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.641948738016044, LR: 0.0003 +[2026-03-02 11:43:37] (step=0033948) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.642144394443357, LR: 0.0003 +[2026-03-02 11:43:44] (step=0033949) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.642340050870671, LR: 0.0003 +[2026-03-02 11:43:52] (step=0033950) Train Loss: 0.4269, Train Steps/Sec: 0.13, Epoch: 6.642535707297985, LR: 0.0003 +[2026-03-02 11:44:00] (step=0033951) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.642731363725298, LR: 0.0003 +[2026-03-02 11:44:08] (step=0033952) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.642927020152612, LR: 0.0003 +[2026-03-02 11:44:16] (step=0033953) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.643122676579925, LR: 0.0003 +[2026-03-02 11:44:24] (step=0033954) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 6.643318333007239, LR: 0.0003 +[2026-03-02 11:44:31] (step=0033955) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 6.6435139894345525, LR: 0.0003 +[2026-03-02 11:44:39] (step=0033956) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.6437096458618665, LR: 0.0003 +[2026-03-02 11:44:47] (step=0033957) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.6439053022891805, LR: 0.0003 +[2026-03-02 11:44:55] (step=0033958) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 6.644100958716494, LR: 0.0003 +[2026-03-02 11:45:03] (step=0033959) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.644296615143808, LR: 0.0003 +[2026-03-02 11:45:11] (step=0033960) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.644492271571121, LR: 0.0003 +[2026-03-02 11:45:18] (step=0033961) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.644687927998435, LR: 0.0003 +[2026-03-02 11:45:26] (step=0033962) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.644883584425749, LR: 0.0003 +[2026-03-02 11:45:34] (step=0033963) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 6.645079240853062, LR: 0.0003 +[2026-03-02 11:45:42] (step=0033964) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.645274897280376, LR: 0.0003 +[2026-03-02 11:45:50] (step=0033965) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 6.645470553707689, LR: 0.0003 +[2026-03-02 11:45:58] (step=0033966) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.645666210135003, LR: 0.0003 +[2026-03-02 11:46:06] (step=0033967) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.645861866562316, LR: 0.0003 +[2026-03-02 11:46:13] (step=0033968) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.64605752298963, LR: 0.0003 +[2026-03-02 11:46:21] (step=0033969) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.646253179416944, LR: 0.0003 +[2026-03-02 11:46:29] (step=0033970) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 6.646448835844257, LR: 0.0003 +[2026-03-02 11:46:37] (step=0033971) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.646644492271571, LR: 0.0003 +[2026-03-02 11:46:45] (step=0033972) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.646840148698884, LR: 0.0003 +[2026-03-02 11:46:52] (step=0033973) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.647035805126198, LR: 0.0003 +[2026-03-02 11:47:00] (step=0033974) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.647231461553512, LR: 0.0003 +[2026-03-02 11:47:08] (step=0033975) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.6474271179808255, LR: 0.0003 +[2026-03-02 11:47:16] (step=0033976) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.6476227744081395, LR: 0.0003 +[2026-03-02 11:47:24] (step=0033977) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.647818430835453, LR: 0.0003 +[2026-03-02 11:47:32] (step=0033978) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.648014087262767, LR: 0.0003 +[2026-03-02 11:47:39] (step=0033979) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.64820974369008, LR: 0.0003 +[2026-03-02 11:47:47] (step=0033980) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 6.648405400117394, LR: 0.0003 +[2026-03-02 11:47:55] (step=0033981) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.648601056544708, LR: 0.0003 +[2026-03-02 11:48:03] (step=0033982) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.648796712972021, LR: 0.0003 +[2026-03-02 11:48:11] (step=0033983) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.648992369399335, LR: 0.0003 +[2026-03-02 11:48:19] (step=0033984) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.649188025826648, LR: 0.0003 +[2026-03-02 11:48:26] (step=0033985) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.649383682253962, LR: 0.0003 +[2026-03-02 11:48:34] (step=0033986) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.649579338681276, LR: 0.0003 +[2026-03-02 11:48:42] (step=0033987) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.649774995108589, LR: 0.0003 +[2026-03-02 11:48:50] (step=0033988) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.649970651535903, LR: 0.0003 +[2026-03-02 11:48:58] (step=0033989) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 6.650166307963216, LR: 0.0003 +[2026-03-02 11:49:06] (step=0033990) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 6.65036196439053, LR: 0.0003 +[2026-03-02 11:49:14] (step=0033991) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.650557620817844, LR: 0.0003 +[2026-03-02 11:49:21] (step=0033992) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.650753277245157, LR: 0.0003 +[2026-03-02 11:49:29] (step=0033993) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.650948933672471, LR: 0.0003 +[2026-03-02 11:49:37] (step=0033994) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.6511445900997845, LR: 0.0003 +[2026-03-02 11:49:45] (step=0033995) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.6513402465270985, LR: 0.0003 +[2026-03-02 11:49:53] (step=0033996) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.651535902954412, LR: 0.0003 +[2026-03-02 11:50:01] (step=0033997) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 6.651731559381726, LR: 0.0003 +[2026-03-02 11:50:09] (step=0033998) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.65192721580904, LR: 0.0003 +[2026-03-02 11:50:17] (step=0033999) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.652122872236353, LR: 0.0003 +[2026-03-02 11:50:24] (step=0034000) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.652318528663667, LR: 0.0003 +[2026-03-02 11:50:24] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0034000/ +[2026-03-02 11:50:32] (step=0034001) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.65251418509098, LR: 0.0003 +[2026-03-02 11:50:40] (step=0034002) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 6.652709841518294, LR: 0.0003 +[2026-03-02 11:50:48] (step=0034003) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.652905497945608, LR: 0.0003 +[2026-03-02 11:50:56] (step=0034004) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.653101154372921, LR: 0.0003 +[2026-03-02 11:51:04] (step=0034005) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.653296810800235, LR: 0.0003 +[2026-03-02 11:51:11] (step=0034006) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.653492467227548, LR: 0.0003 +[2026-03-02 11:51:19] (step=0034007) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 6.653688123654862, LR: 0.0003 +[2026-03-02 11:51:27] (step=0034008) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.653883780082175, LR: 0.0003 +[2026-03-02 11:51:35] (step=0034009) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.654079436509489, LR: 0.0003 +[2026-03-02 11:51:43] (step=0034010) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.654275092936803, LR: 0.0003 +[2026-03-02 11:51:51] (step=0034011) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 6.654470749364116, LR: 0.0003 +[2026-03-02 11:51:58] (step=0034012) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.65466640579143, LR: 0.0003 +[2026-03-02 11:52:06] (step=0034013) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.6548620622187435, LR: 0.0003 +[2026-03-02 11:52:14] (step=0034014) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.6550577186460576, LR: 0.0003 +[2026-03-02 11:52:22] (step=0034015) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 6.655253375073372, LR: 0.0003 +[2026-03-02 11:52:30] (step=0034016) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.655449031500685, LR: 0.0003 +[2026-03-02 11:52:38] (step=0034017) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.655644687927999, LR: 0.0003 +[2026-03-02 11:52:45] (step=0034018) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.655840344355312, LR: 0.0003 +[2026-03-02 11:52:53] (step=0034019) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.656036000782626, LR: 0.0003 +[2026-03-02 11:53:01] (step=0034020) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.656231657209939, LR: 0.0003 +[2026-03-02 11:53:09] (step=0034021) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.656427313637253, LR: 0.0003 +[2026-03-02 11:53:17] (step=0034022) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.656622970064567, LR: 0.0003 +[2026-03-02 11:53:25] (step=0034023) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.65681862649188, LR: 0.0003 +[2026-03-02 11:53:32] (step=0034024) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 6.657014282919194, LR: 0.0003 +[2026-03-02 11:53:40] (step=0034025) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.657209939346507, LR: 0.0003 +[2026-03-02 11:53:48] (step=0034026) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.657405595773821, LR: 0.0003 +[2026-03-02 11:53:56] (step=0034027) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.657601252201135, LR: 0.0003 +[2026-03-02 11:54:04] (step=0034028) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.657796908628448, LR: 0.0003 +[2026-03-02 11:54:12] (step=0034029) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 6.657992565055762, LR: 0.0003 +[2026-03-02 11:54:19] (step=0034030) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 6.658188221483075, LR: 0.0003 +[2026-03-02 11:54:27] (step=0034031) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.658383877910389, LR: 0.0003 +[2026-03-02 11:54:35] (step=0034032) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.6585795343377026, LR: 0.0003 +[2026-03-02 11:54:43] (step=0034033) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.658775190765017, LR: 0.0003 +[2026-03-02 11:54:51] (step=0034034) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.658970847192331, LR: 0.0003 +[2026-03-02 11:54:59] (step=0034035) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.659166503619644, LR: 0.0003 +[2026-03-02 11:55:06] (step=0034036) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.659362160046958, LR: 0.0003 +[2026-03-02 11:55:14] (step=0034037) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.659557816474271, LR: 0.0003 +[2026-03-02 11:55:22] (step=0034038) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.659753472901585, LR: 0.0003 +[2026-03-02 11:55:30] (step=0034039) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.659949129328899, LR: 0.0003 +[2026-03-02 11:55:38] (step=0034040) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.660144785756212, LR: 0.0003 +[2026-03-02 11:55:46] (step=0034041) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.660340442183526, LR: 0.0003 +[2026-03-02 11:55:53] (step=0034042) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.660536098610839, LR: 0.0003 +[2026-03-02 11:56:01] (step=0034043) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.660731755038153, LR: 0.0003 +[2026-03-02 11:56:09] (step=0034044) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.660927411465467, LR: 0.0003 +[2026-03-02 11:56:17] (step=0034045) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.66112306789278, LR: 0.0003 +[2026-03-02 11:56:25] (step=0034046) Train Loss: 0.4438, Train Steps/Sec: 0.12, Epoch: 6.661318724320094, LR: 0.0003 +[2026-03-02 11:56:33] (step=0034047) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 6.661514380747407, LR: 0.0003 +[2026-03-02 11:56:41] (step=0034048) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.661710037174721, LR: 0.0003 +[2026-03-02 11:56:49] (step=0034049) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.661905693602034, LR: 0.0003 +[2026-03-02 11:56:57] (step=0034050) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.6621013500293484, LR: 0.0003 +[2026-03-02 11:57:04] (step=0034051) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.6622970064566625, LR: 0.0003 +[2026-03-02 11:57:12] (step=0034052) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.662492662883976, LR: 0.0003 +[2026-03-02 11:57:20] (step=0034053) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.66268831931129, LR: 0.0003 +[2026-03-02 11:57:28] (step=0034054) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.662883975738603, LR: 0.0003 +[2026-03-02 11:57:36] (step=0034055) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.663079632165917, LR: 0.0003 +[2026-03-02 11:57:44] (step=0034056) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.663275288593231, LR: 0.0003 +[2026-03-02 11:57:51] (step=0034057) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.663470945020544, LR: 0.0003 +[2026-03-02 11:57:59] (step=0034058) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.663666601447858, LR: 0.0003 +[2026-03-02 11:58:07] (step=0034059) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.663862257875171, LR: 0.0003 +[2026-03-02 11:58:15] (step=0034060) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.664057914302485, LR: 0.0003 +[2026-03-02 11:58:23] (step=0034061) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.664253570729798, LR: 0.0003 +[2026-03-02 11:58:31] (step=0034062) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.664449227157112, LR: 0.0003 +[2026-03-02 11:58:38] (step=0034063) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 6.664644883584426, LR: 0.0003 +[2026-03-02 11:58:46] (step=0034064) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 6.664840540011739, LR: 0.0003 +[2026-03-02 11:58:54] (step=0034065) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.665036196439053, LR: 0.0003 +[2026-03-02 11:59:02] (step=0034066) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.665231852866366, LR: 0.0003 +[2026-03-02 11:59:10] (step=0034067) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.66542750929368, LR: 0.0003 +[2026-03-02 11:59:18] (step=0034068) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.665623165720994, LR: 0.0003 +[2026-03-02 11:59:25] (step=0034069) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.6658188221483075, LR: 0.0003 +[2026-03-02 11:59:33] (step=0034070) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.6660144785756215, LR: 0.0003 +[2026-03-02 11:59:41] (step=0034071) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.666210135002935, LR: 0.0003 +[2026-03-02 11:59:49] (step=0034072) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.666405791430249, LR: 0.0003 +[2026-03-02 11:59:57] (step=0034073) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.666601447857562, LR: 0.0003 +[2026-03-02 12:00:05] (step=0034074) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 6.666797104284876, LR: 0.0003 +[2026-03-02 12:00:12] (step=0034075) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.66699276071219, LR: 0.0003 +[2026-03-02 12:00:20] (step=0034076) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.667188417139503, LR: 0.0003 +[2026-03-02 12:00:28] (step=0034077) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.667384073566817, LR: 0.0003 +[2026-03-02 12:00:36] (step=0034078) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.66757972999413, LR: 0.0003 +[2026-03-02 12:00:44] (step=0034079) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.667775386421444, LR: 0.0003 +[2026-03-02 12:00:52] (step=0034080) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.667971042848758, LR: 0.0003 +[2026-03-02 12:00:59] (step=0034081) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 6.668166699276071, LR: 0.0003 +[2026-03-02 12:01:07] (step=0034082) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 6.668362355703385, LR: 0.0003 +[2026-03-02 12:01:15] (step=0034083) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.668558012130698, LR: 0.0003 +[2026-03-02 12:01:23] (step=0034084) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.668753668558012, LR: 0.0003 +[2026-03-02 12:01:31] (step=0034085) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.668949324985325, LR: 0.0003 +[2026-03-02 12:01:39] (step=0034086) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.669144981412639, LR: 0.0003 +[2026-03-02 12:01:46] (step=0034087) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.669340637839953, LR: 0.0003 +[2026-03-02 12:01:54] (step=0034088) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.6695362942672665, LR: 0.0003 +[2026-03-02 12:02:02] (step=0034089) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.6697319506945805, LR: 0.0003 +[2026-03-02 12:02:10] (step=0034090) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 6.669927607121894, LR: 0.0003 +[2026-03-02 12:02:18] (step=0034091) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.670123263549208, LR: 0.0003 +[2026-03-02 12:02:26] (step=0034092) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.670318919976522, LR: 0.0003 +[2026-03-02 12:02:33] (step=0034093) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 6.670514576403835, LR: 0.0003 +[2026-03-02 12:02:41] (step=0034094) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.670710232831149, LR: 0.0003 +[2026-03-02 12:02:49] (step=0034095) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.670905889258462, LR: 0.0003 +[2026-03-02 12:02:57] (step=0034096) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.671101545685776, LR: 0.0003 +[2026-03-02 12:03:05] (step=0034097) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.67129720211309, LR: 0.0003 +[2026-03-02 12:03:13] (step=0034098) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.671492858540403, LR: 0.0003 +[2026-03-02 12:03:21] (step=0034099) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 6.671688514967717, LR: 0.0003 +[2026-03-02 12:03:29] (step=0034100) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.67188417139503, LR: 0.0003 +[2026-03-02 12:03:36] (step=0034101) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 6.672079827822344, LR: 0.0003 +[2026-03-02 12:03:44] (step=0034102) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.672275484249657, LR: 0.0003 +[2026-03-02 12:03:52] (step=0034103) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.672471140676971, LR: 0.0003 +[2026-03-02 12:04:00] (step=0034104) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.672666797104285, LR: 0.0003 +[2026-03-02 12:04:08] (step=0034105) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.672862453531598, LR: 0.0003 +[2026-03-02 12:04:16] (step=0034106) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.673058109958912, LR: 0.0003 +[2026-03-02 12:04:24] (step=0034107) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.6732537663862255, LR: 0.0003 +[2026-03-02 12:04:31] (step=0034108) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.6734494228135395, LR: 0.0003 +[2026-03-02 12:04:39] (step=0034109) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.6736450792408535, LR: 0.0003 +[2026-03-02 12:04:47] (step=0034110) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 6.673840735668167, LR: 0.0003 +[2026-03-02 12:04:55] (step=0034111) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.674036392095481, LR: 0.0003 +[2026-03-02 12:05:03] (step=0034112) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 6.674232048522794, LR: 0.0003 +[2026-03-02 12:05:11] (step=0034113) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 6.674427704950108, LR: 0.0003 +[2026-03-02 12:05:18] (step=0034114) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 6.674623361377421, LR: 0.0003 +[2026-03-02 12:05:26] (step=0034115) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.674819017804735, LR: 0.0003 +[2026-03-02 12:05:34] (step=0034116) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 6.675014674232049, LR: 0.0003 +[2026-03-02 12:05:42] (step=0034117) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.675210330659362, LR: 0.0003 +[2026-03-02 12:05:50] (step=0034118) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.675405987086676, LR: 0.0003 +[2026-03-02 12:05:58] (step=0034119) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.675601643513989, LR: 0.0003 +[2026-03-02 12:06:05] (step=0034120) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 6.675797299941303, LR: 0.0003 +[2026-03-02 12:06:13] (step=0034121) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.675992956368617, LR: 0.0003 +[2026-03-02 12:06:21] (step=0034122) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.67618861279593, LR: 0.0003 +[2026-03-02 12:06:29] (step=0034123) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.676384269223244, LR: 0.0003 +[2026-03-02 12:06:37] (step=0034124) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.676579925650557, LR: 0.0003 +[2026-03-02 12:06:45] (step=0034125) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.676775582077871, LR: 0.0003 +[2026-03-02 12:06:53] (step=0034126) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 6.6769712385051845, LR: 0.0003 +[2026-03-02 12:07:00] (step=0034127) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.6771668949324985, LR: 0.0003 +[2026-03-02 12:07:08] (step=0034128) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.6773625513598125, LR: 0.0003 +[2026-03-02 12:07:16] (step=0034129) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.677558207787126, LR: 0.0003 +[2026-03-02 12:07:24] (step=0034130) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.67775386421444, LR: 0.0003 +[2026-03-02 12:07:32] (step=0034131) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.677949520641753, LR: 0.0003 +[2026-03-02 12:07:40] (step=0034132) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.678145177069067, LR: 0.0003 +[2026-03-02 12:07:47] (step=0034133) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 6.678340833496381, LR: 0.0003 +[2026-03-02 12:07:55] (step=0034134) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.678536489923694, LR: 0.0003 +[2026-03-02 12:08:03] (step=0034135) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 6.678732146351008, LR: 0.0003 +[2026-03-02 12:08:11] (step=0034136) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.678927802778321, LR: 0.0003 +[2026-03-02 12:08:19] (step=0034137) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 6.679123459205635, LR: 0.0003 +[2026-03-02 12:08:27] (step=0034138) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.679319115632948, LR: 0.0003 +[2026-03-02 12:08:35] (step=0034139) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.679514772060262, LR: 0.0003 +[2026-03-02 12:08:42] (step=0034140) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.679710428487576, LR: 0.0003 +[2026-03-02 12:08:50] (step=0034141) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.679906084914889, LR: 0.0003 +[2026-03-02 12:08:58] (step=0034142) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 6.680101741342203, LR: 0.0003 +[2026-03-02 12:09:06] (step=0034143) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 6.680297397769516, LR: 0.0003 +[2026-03-02 12:09:14] (step=0034144) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 6.68049305419683, LR: 0.0003 +[2026-03-02 12:09:22] (step=0034145) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.680688710624144, LR: 0.0003 +[2026-03-02 12:09:30] (step=0034146) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.6808843670514575, LR: 0.0003 +[2026-03-02 12:09:38] (step=0034147) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.6810800234787715, LR: 0.0003 +[2026-03-02 12:09:45] (step=0034148) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.681275679906085, LR: 0.0003 +[2026-03-02 12:09:53] (step=0034149) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.681471336333399, LR: 0.0003 +[2026-03-02 12:10:01] (step=0034150) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 6.681666992760713, LR: 0.0003 +[2026-03-02 12:10:09] (step=0034151) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.681862649188026, LR: 0.0003 +[2026-03-02 12:10:17] (step=0034152) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.68205830561534, LR: 0.0003 +[2026-03-02 12:10:25] (step=0034153) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.682253962042653, LR: 0.0003 +[2026-03-02 12:10:33] (step=0034154) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.682449618469967, LR: 0.0003 +[2026-03-02 12:10:40] (step=0034155) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.68264527489728, LR: 0.0003 +[2026-03-02 12:10:48] (step=0034156) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.682840931324594, LR: 0.0003 +[2026-03-02 12:10:56] (step=0034157) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 6.683036587751908, LR: 0.0003 +[2026-03-02 12:11:04] (step=0034158) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.683232244179221, LR: 0.0003 +[2026-03-02 12:11:12] (step=0034159) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.683427900606535, LR: 0.0003 +[2026-03-02 12:11:20] (step=0034160) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.683623557033848, LR: 0.0003 +[2026-03-02 12:11:27] (step=0034161) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.683819213461162, LR: 0.0003 +[2026-03-02 12:11:35] (step=0034162) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.684014869888476, LR: 0.0003 +[2026-03-02 12:11:43] (step=0034163) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.684210526315789, LR: 0.0003 +[2026-03-02 12:11:51] (step=0034164) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.684406182743103, LR: 0.0003 +[2026-03-02 12:11:59] (step=0034165) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.6846018391704165, LR: 0.0003 +[2026-03-02 12:12:07] (step=0034166) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.6847974955977305, LR: 0.0003 +[2026-03-02 12:12:14] (step=0034167) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 6.684993152025044, LR: 0.0003 +[2026-03-02 12:12:22] (step=0034168) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.685188808452358, LR: 0.0003 +[2026-03-02 12:12:30] (step=0034169) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.685384464879672, LR: 0.0003 +[2026-03-02 12:12:38] (step=0034170) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.685580121306985, LR: 0.0003 +[2026-03-02 12:12:46] (step=0034171) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.685775777734299, LR: 0.0003 +[2026-03-02 12:12:54] (step=0034172) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.685971434161612, LR: 0.0003 +[2026-03-02 12:13:01] (step=0034173) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.686167090588926, LR: 0.0003 +[2026-03-02 12:13:09] (step=0034174) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.68636274701624, LR: 0.0003 +[2026-03-02 12:13:17] (step=0034175) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 6.686558403443553, LR: 0.0003 +[2026-03-02 12:13:25] (step=0034176) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 6.686754059870867, LR: 0.0003 +[2026-03-02 12:13:33] (step=0034177) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 6.68694971629818, LR: 0.0003 +[2026-03-02 12:13:41] (step=0034178) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 6.687145372725494, LR: 0.0003 +[2026-03-02 12:13:48] (step=0034179) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.687341029152807, LR: 0.0003 +[2026-03-02 12:13:56] (step=0034180) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.687536685580121, LR: 0.0003 +[2026-03-02 12:14:04] (step=0034181) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.687732342007435, LR: 0.0003 +[2026-03-02 12:14:12] (step=0034182) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.687927998434748, LR: 0.0003 +[2026-03-02 12:14:20] (step=0034183) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.688123654862062, LR: 0.0003 +[2026-03-02 12:14:28] (step=0034184) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.6883193112893755, LR: 0.0003 +[2026-03-02 12:14:35] (step=0034185) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.6885149677166895, LR: 0.0003 +[2026-03-02 12:14:43] (step=0034186) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.6887106241440035, LR: 0.0003 +[2026-03-02 12:14:51] (step=0034187) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.688906280571317, LR: 0.0003 +[2026-03-02 12:14:59] (step=0034188) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.689101936998631, LR: 0.0003 +[2026-03-02 12:15:07] (step=0034189) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 6.689297593425944, LR: 0.0003 +[2026-03-02 12:15:15] (step=0034190) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.689493249853258, LR: 0.0003 +[2026-03-02 12:15:23] (step=0034191) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.689688906280571, LR: 0.0003 +[2026-03-02 12:15:30] (step=0034192) Train Loss: 0.4265, Train Steps/Sec: 0.13, Epoch: 6.689884562707885, LR: 0.0003 +[2026-03-02 12:15:38] (step=0034193) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.690080219135199, LR: 0.0003 +[2026-03-02 12:15:46] (step=0034194) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.690275875562512, LR: 0.0003 +[2026-03-02 12:15:54] (step=0034195) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.690471531989826, LR: 0.0003 +[2026-03-02 12:16:02] (step=0034196) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.690667188417139, LR: 0.0003 +[2026-03-02 12:16:10] (step=0034197) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.690862844844453, LR: 0.0003 +[2026-03-02 12:16:18] (step=0034198) Train Loss: 0.4511, Train Steps/Sec: 0.12, Epoch: 6.691058501271767, LR: 0.0003 +[2026-03-02 12:16:25] (step=0034199) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 6.69125415769908, LR: 0.0003 +[2026-03-02 12:16:33] (step=0034200) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.691449814126394, LR: 0.0003 +[2026-03-02 12:16:41] (step=0034201) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.691645470553707, LR: 0.0003 +[2026-03-02 12:16:49] (step=0034202) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 6.691841126981021, LR: 0.0003 +[2026-03-02 12:16:57] (step=0034203) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 6.6920367834083345, LR: 0.0003 +[2026-03-02 12:17:05] (step=0034204) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.6922324398356485, LR: 0.0003 +[2026-03-02 12:17:12] (step=0034205) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.6924280962629625, LR: 0.0003 +[2026-03-02 12:17:20] (step=0034206) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.692623752690276, LR: 0.0003 +[2026-03-02 12:17:28] (step=0034207) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.69281940911759, LR: 0.0003 +[2026-03-02 12:17:36] (step=0034208) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.693015065544903, LR: 0.0003 +[2026-03-02 12:17:44] (step=0034209) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 6.693210721972217, LR: 0.0003 +[2026-03-02 12:17:52] (step=0034210) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.693406378399531, LR: 0.0003 +[2026-03-02 12:17:59] (step=0034211) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.693602034826844, LR: 0.0003 +[2026-03-02 12:18:07] (step=0034212) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.693797691254158, LR: 0.0003 +[2026-03-02 12:18:15] (step=0034213) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.693993347681471, LR: 0.0003 +[2026-03-02 12:18:23] (step=0034214) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.694189004108785, LR: 0.0003 +[2026-03-02 12:18:31] (step=0034215) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.694384660536099, LR: 0.0003 +[2026-03-02 12:18:39] (step=0034216) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.694580316963412, LR: 0.0003 +[2026-03-02 12:18:47] (step=0034217) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.694775973390726, LR: 0.0003 +[2026-03-02 12:18:54] (step=0034218) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.694971629818039, LR: 0.0003 +[2026-03-02 12:19:02] (step=0034219) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.695167286245353, LR: 0.0003 +[2026-03-02 12:19:10] (step=0034220) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 6.695362942672666, LR: 0.0003 +[2026-03-02 12:19:18] (step=0034221) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.69555859909998, LR: 0.0003 +[2026-03-02 12:19:26] (step=0034222) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.695754255527294, LR: 0.0003 +[2026-03-02 12:19:34] (step=0034223) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.6959499119546075, LR: 0.0003 +[2026-03-02 12:19:41] (step=0034224) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.6961455683819215, LR: 0.0003 +[2026-03-02 12:19:49] (step=0034225) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.696341224809235, LR: 0.0003 +[2026-03-02 12:19:57] (step=0034226) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.696536881236549, LR: 0.0003 +[2026-03-02 12:20:05] (step=0034227) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.696732537663863, LR: 0.0003 +[2026-03-02 12:20:13] (step=0034228) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.696928194091176, LR: 0.0003 +[2026-03-02 12:20:21] (step=0034229) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.69712385051849, LR: 0.0003 +[2026-03-02 12:20:28] (step=0034230) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.697319506945803, LR: 0.0003 +[2026-03-02 12:20:36] (step=0034231) Train Loss: 0.4254, Train Steps/Sec: 0.13, Epoch: 6.697515163373117, LR: 0.0003 +[2026-03-02 12:20:44] (step=0034232) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.69771081980043, LR: 0.0003 +[2026-03-02 12:20:52] (step=0034233) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.697906476227744, LR: 0.0003 +[2026-03-02 12:21:00] (step=0034234) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.698102132655058, LR: 0.0003 +[2026-03-02 12:21:08] (step=0034235) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 6.698297789082371, LR: 0.0003 +[2026-03-02 12:21:15] (step=0034236) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.698493445509685, LR: 0.0003 +[2026-03-02 12:21:23] (step=0034237) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 6.698689101936998, LR: 0.0003 +[2026-03-02 12:21:31] (step=0034238) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.698884758364312, LR: 0.0003 +[2026-03-02 12:21:39] (step=0034239) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.699080414791626, LR: 0.0003 +[2026-03-02 12:21:47] (step=0034240) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.699276071218939, LR: 0.0003 +[2026-03-02 12:21:55] (step=0034241) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.699471727646253, LR: 0.0003 +[2026-03-02 12:22:03] (step=0034242) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.6996673840735665, LR: 0.0003 +[2026-03-02 12:22:10] (step=0034243) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.6998630405008806, LR: 0.0003 +[2026-03-02 12:22:18] (step=0034244) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.700058696928194, LR: 0.0003 +[2026-03-02 12:22:26] (step=0034245) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 6.700254353355508, LR: 0.0003 +[2026-03-02 12:22:34] (step=0034246) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.700450009782822, LR: 0.0003 +[2026-03-02 12:22:42] (step=0034247) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.700645666210135, LR: 0.0003 +[2026-03-02 12:22:50] (step=0034248) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.700841322637449, LR: 0.0003 +[2026-03-02 12:22:58] (step=0034249) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.701036979064762, LR: 0.0003 +[2026-03-02 12:23:05] (step=0034250) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.701232635492076, LR: 0.0003 +[2026-03-02 12:23:13] (step=0034251) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.70142829191939, LR: 0.0003 +[2026-03-02 12:23:21] (step=0034252) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.701623948346703, LR: 0.0003 +[2026-03-02 12:23:29] (step=0034253) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.701819604774017, LR: 0.0003 +[2026-03-02 12:23:37] (step=0034254) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.70201526120133, LR: 0.0003 +[2026-03-02 12:23:45] (step=0034255) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.702210917628644, LR: 0.0003 +[2026-03-02 12:23:52] (step=0034256) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 6.702406574055957, LR: 0.0003 +[2026-03-02 12:24:00] (step=0034257) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.702602230483271, LR: 0.0003 +[2026-03-02 12:24:08] (step=0034258) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 6.702797886910585, LR: 0.0003 +[2026-03-02 12:24:16] (step=0034259) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.702993543337898, LR: 0.0003 +[2026-03-02 12:24:24] (step=0034260) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 6.703189199765212, LR: 0.0003 +[2026-03-02 12:24:32] (step=0034261) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 6.7033848561925256, LR: 0.0003 +[2026-03-02 12:24:40] (step=0034262) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.70358051261984, LR: 0.0003 +[2026-03-02 12:24:47] (step=0034263) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.703776169047154, LR: 0.0003 +[2026-03-02 12:24:55] (step=0034264) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.703971825474467, LR: 0.0003 +[2026-03-02 12:25:03] (step=0034265) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 6.704167481901781, LR: 0.0003 +[2026-03-02 12:25:11] (step=0034266) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 6.704363138329094, LR: 0.0003 +[2026-03-02 12:25:19] (step=0034267) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.704558794756408, LR: 0.0003 +[2026-03-02 12:25:27] (step=0034268) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.704754451183722, LR: 0.0003 +[2026-03-02 12:25:35] (step=0034269) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.704950107611035, LR: 0.0003 +[2026-03-02 12:25:42] (step=0034270) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 6.705145764038349, LR: 0.0003 +[2026-03-02 12:25:50] (step=0034271) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 6.705341420465662, LR: 0.0003 +[2026-03-02 12:25:58] (step=0034272) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.705537076892976, LR: 0.0003 +[2026-03-02 12:26:06] (step=0034273) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.705732733320289, LR: 0.0003 +[2026-03-02 12:26:14] (step=0034274) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 6.705928389747603, LR: 0.0003 +[2026-03-02 12:26:22] (step=0034275) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.706124046174917, LR: 0.0003 +[2026-03-02 12:26:29] (step=0034276) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.70631970260223, LR: 0.0003 +[2026-03-02 12:26:37] (step=0034277) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 6.706515359029544, LR: 0.0003 +[2026-03-02 12:26:45] (step=0034278) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 6.706711015456857, LR: 0.0003 +[2026-03-02 12:26:53] (step=0034279) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.7069066718841714, LR: 0.0003 +[2026-03-02 12:27:01] (step=0034280) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.7071023283114855, LR: 0.0003 +[2026-03-02 12:27:09] (step=0034281) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.707297984738799, LR: 0.0003 +[2026-03-02 12:27:17] (step=0034282) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.707493641166113, LR: 0.0003 +[2026-03-02 12:27:24] (step=0034283) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.707689297593426, LR: 0.0003 +[2026-03-02 12:27:32] (step=0034284) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.70788495402074, LR: 0.0003 +[2026-03-02 12:27:40] (step=0034285) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 6.708080610448053, LR: 0.0003 +[2026-03-02 12:27:48] (step=0034286) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.708276266875367, LR: 0.0003 +[2026-03-02 12:27:56] (step=0034287) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.708471923302681, LR: 0.0003 +[2026-03-02 12:28:04] (step=0034288) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.708667579729994, LR: 0.0003 +[2026-03-02 12:28:11] (step=0034289) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 6.708863236157308, LR: 0.0003 +[2026-03-02 12:28:19] (step=0034290) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 6.709058892584621, LR: 0.0003 +[2026-03-02 12:28:27] (step=0034291) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 6.709254549011935, LR: 0.0003 +[2026-03-02 12:28:35] (step=0034292) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.709450205439249, LR: 0.0003 +[2026-03-02 12:28:43] (step=0034293) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.709645861866562, LR: 0.0003 +[2026-03-02 12:28:51] (step=0034294) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.709841518293876, LR: 0.0003 +[2026-03-02 12:28:59] (step=0034295) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.710037174721189, LR: 0.0003 +[2026-03-02 12:29:07] (step=0034296) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.710232831148503, LR: 0.0003 +[2026-03-02 12:29:14] (step=0034297) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.7104284875758164, LR: 0.0003 +[2026-03-02 12:29:22] (step=0034298) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.7106241440031305, LR: 0.0003 +[2026-03-02 12:29:30] (step=0034299) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 6.7108198004304445, LR: 0.0003 +[2026-03-02 12:29:38] (step=0034300) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.711015456857758, LR: 0.0003 +[2026-03-02 12:29:46] (step=0034301) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.711211113285072, LR: 0.0003 +[2026-03-02 12:29:54] (step=0034302) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.711406769712385, LR: 0.0003 +[2026-03-02 12:30:01] (step=0034303) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.711602426139699, LR: 0.0003 +[2026-03-02 12:30:09] (step=0034304) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 6.711798082567013, LR: 0.0003 +[2026-03-02 12:30:17] (step=0034305) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.711993738994326, LR: 0.0003 +[2026-03-02 12:30:25] (step=0034306) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.71218939542164, LR: 0.0003 +[2026-03-02 12:30:33] (step=0034307) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 6.712385051848953, LR: 0.0003 +[2026-03-02 12:30:41] (step=0034308) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.712580708276267, LR: 0.0003 +[2026-03-02 12:30:49] (step=0034309) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.71277636470358, LR: 0.0003 +[2026-03-02 12:30:56] (step=0034310) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.712972021130894, LR: 0.0003 +[2026-03-02 12:31:04] (step=0034311) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.713167677558208, LR: 0.0003 +[2026-03-02 12:31:12] (step=0034312) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.713363333985521, LR: 0.0003 +[2026-03-02 12:31:20] (step=0034313) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.713558990412835, LR: 0.0003 +[2026-03-02 12:31:28] (step=0034314) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.713754646840148, LR: 0.0003 +[2026-03-02 12:31:36] (step=0034315) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.713950303267462, LR: 0.0003 +[2026-03-02 12:31:43] (step=0034316) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 6.714145959694776, LR: 0.0003 +[2026-03-02 12:31:51] (step=0034317) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.7143416161220895, LR: 0.0003 +[2026-03-02 12:31:59] (step=0034318) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.7145372725494035, LR: 0.0003 +[2026-03-02 12:32:07] (step=0034319) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.714732928976717, LR: 0.0003 +[2026-03-02 12:32:15] (step=0034320) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.714928585404031, LR: 0.0003 +[2026-03-02 12:32:23] (step=0034321) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.715124241831345, LR: 0.0003 +[2026-03-02 12:32:30] (step=0034322) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.715319898258658, LR: 0.0003 +[2026-03-02 12:32:38] (step=0034323) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.715515554685972, LR: 0.0003 +[2026-03-02 12:32:46] (step=0034324) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.715711211113285, LR: 0.0003 +[2026-03-02 12:32:54] (step=0034325) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 6.715906867540599, LR: 0.0003 +[2026-03-02 12:33:02] (step=0034326) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.716102523967912, LR: 0.0003 +[2026-03-02 12:33:10] (step=0034327) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.716298180395226, LR: 0.0003 +[2026-03-02 12:33:18] (step=0034328) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 6.71649383682254, LR: 0.0003 +[2026-03-02 12:33:25] (step=0034329) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 6.716689493249853, LR: 0.0003 +[2026-03-02 12:33:33] (step=0034330) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.716885149677167, LR: 0.0003 +[2026-03-02 12:33:41] (step=0034331) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.71708080610448, LR: 0.0003 +[2026-03-02 12:33:49] (step=0034332) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.717276462531794, LR: 0.0003 +[2026-03-02 12:33:57] (step=0034333) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 6.717472118959108, LR: 0.0003 +[2026-03-02 12:34:05] (step=0034334) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.717667775386421, LR: 0.0003 +[2026-03-02 12:34:12] (step=0034335) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.717863431813735, LR: 0.0003 +[2026-03-02 12:34:20] (step=0034336) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.7180590882410485, LR: 0.0003 +[2026-03-02 12:34:28] (step=0034337) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.7182547446683625, LR: 0.0003 +[2026-03-02 12:34:36] (step=0034338) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.718450401095676, LR: 0.0003 +[2026-03-02 12:34:44] (step=0034339) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.71864605752299, LR: 0.0003 +[2026-03-02 12:34:52] (step=0034340) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.718841713950304, LR: 0.0003 +[2026-03-02 12:35:00] (step=0034341) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.719037370377617, LR: 0.0003 +[2026-03-02 12:35:07] (step=0034342) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.719233026804931, LR: 0.0003 +[2026-03-02 12:35:15] (step=0034343) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 6.719428683232244, LR: 0.0003 +[2026-03-02 12:35:23] (step=0034344) Train Loss: 0.4258, Train Steps/Sec: 0.13, Epoch: 6.719624339659558, LR: 0.0003 +[2026-03-02 12:35:31] (step=0034345) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.719819996086872, LR: 0.0003 +[2026-03-02 12:35:39] (step=0034346) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.720015652514185, LR: 0.0003 +[2026-03-02 12:35:47] (step=0034347) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.720211308941499, LR: 0.0003 +[2026-03-02 12:35:55] (step=0034348) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.720406965368812, LR: 0.0003 +[2026-03-02 12:36:03] (step=0034349) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.720602621796126, LR: 0.0003 +[2026-03-02 12:36:10] (step=0034350) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 6.720798278223439, LR: 0.0003 +[2026-03-02 12:36:18] (step=0034351) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 6.720993934650753, LR: 0.0003 +[2026-03-02 12:36:26] (step=0034352) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.721189591078067, LR: 0.0003 +[2026-03-02 12:36:34] (step=0034353) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 6.72138524750538, LR: 0.0003 +[2026-03-02 12:36:42] (step=0034354) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 6.721580903932694, LR: 0.0003 +[2026-03-02 12:36:50] (step=0034355) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 6.7217765603600075, LR: 0.0003 +[2026-03-02 12:36:57] (step=0034356) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.7219722167873215, LR: 0.0003 +[2026-03-02 12:37:05] (step=0034357) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.7221678732146355, LR: 0.0003 +[2026-03-02 12:37:13] (step=0034358) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.722363529641949, LR: 0.0003 +[2026-03-02 12:37:21] (step=0034359) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.722559186069263, LR: 0.0003 +[2026-03-02 12:37:29] (step=0034360) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.722754842496576, LR: 0.0003 +[2026-03-02 12:37:37] (step=0034361) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.72295049892389, LR: 0.0003 +[2026-03-02 12:37:44] (step=0034362) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.723146155351203, LR: 0.0003 +[2026-03-02 12:37:52] (step=0034363) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.723341811778517, LR: 0.0003 +[2026-03-02 12:38:00] (step=0034364) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.723537468205831, LR: 0.0003 +[2026-03-02 12:38:08] (step=0034365) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.723733124633144, LR: 0.0003 +[2026-03-02 12:38:16] (step=0034366) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.723928781060458, LR: 0.0003 +[2026-03-02 12:38:24] (step=0034367) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.724124437487771, LR: 0.0003 +[2026-03-02 12:38:31] (step=0034368) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.724320093915085, LR: 0.0003 +[2026-03-02 12:38:39] (step=0034369) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.724515750342399, LR: 0.0003 +[2026-03-02 12:38:47] (step=0034370) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.724711406769712, LR: 0.0003 +[2026-03-02 12:38:55] (step=0034371) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.724907063197026, LR: 0.0003 +[2026-03-02 12:39:03] (step=0034372) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.725102719624339, LR: 0.0003 +[2026-03-02 12:39:11] (step=0034373) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 6.725298376051653, LR: 0.0003 +[2026-03-02 12:39:18] (step=0034374) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.725494032478967, LR: 0.0003 +[2026-03-02 12:39:26] (step=0034375) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.7256896889062805, LR: 0.0003 +[2026-03-02 12:39:34] (step=0034376) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.7258853453335945, LR: 0.0003 +[2026-03-02 12:39:42] (step=0034377) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.726081001760908, LR: 0.0003 +[2026-03-02 12:39:50] (step=0034378) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.726276658188222, LR: 0.0003 +[2026-03-02 12:39:58] (step=0034379) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.726472314615535, LR: 0.0003 +[2026-03-02 12:40:06] (step=0034380) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 6.726667971042849, LR: 0.0003 +[2026-03-02 12:40:13] (step=0034381) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.726863627470163, LR: 0.0003 +[2026-03-02 12:40:21] (step=0034382) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.727059283897476, LR: 0.0003 +[2026-03-02 12:40:29] (step=0034383) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.72725494032479, LR: 0.0003 +[2026-03-02 12:40:37] (step=0034384) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.727450596752103, LR: 0.0003 +[2026-03-02 12:40:45] (step=0034385) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.727646253179417, LR: 0.0003 +[2026-03-02 12:40:53] (step=0034386) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.727841909606731, LR: 0.0003 +[2026-03-02 12:41:00] (step=0034387) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.728037566034044, LR: 0.0003 +[2026-03-02 12:41:08] (step=0034388) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.728233222461358, LR: 0.0003 +[2026-03-02 12:41:16] (step=0034389) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.728428878888671, LR: 0.0003 +[2026-03-02 12:41:24] (step=0034390) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.728624535315985, LR: 0.0003 +[2026-03-02 12:41:32] (step=0034391) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.728820191743298, LR: 0.0003 +[2026-03-02 12:41:40] (step=0034392) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.729015848170612, LR: 0.0003 +[2026-03-02 12:41:48] (step=0034393) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.729211504597926, LR: 0.0003 +[2026-03-02 12:41:56] (step=0034394) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 6.7294071610252395, LR: 0.0003 +[2026-03-02 12:42:04] (step=0034395) Train Loss: 0.4335, Train Steps/Sec: 0.12, Epoch: 6.7296028174525535, LR: 0.0003 +[2026-03-02 12:42:11] (step=0034396) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 6.729798473879867, LR: 0.0003 +[2026-03-02 12:42:19] (step=0034397) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.729994130307181, LR: 0.0003 +[2026-03-02 12:42:27] (step=0034398) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.730189786734495, LR: 0.0003 +[2026-03-02 12:42:35] (step=0034399) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.730385443161808, LR: 0.0003 +[2026-03-02 12:42:43] (step=0034400) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.730581099589122, LR: 0.0003 +[2026-03-02 12:42:51] (step=0034401) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.730776756016435, LR: 0.0003 +[2026-03-02 12:42:58] (step=0034402) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 6.730972412443749, LR: 0.0003 +[2026-03-02 12:43:06] (step=0034403) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.731168068871062, LR: 0.0003 +[2026-03-02 12:43:14] (step=0034404) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.731363725298376, LR: 0.0003 +[2026-03-02 12:43:22] (step=0034405) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.73155938172569, LR: 0.0003 +[2026-03-02 12:43:30] (step=0034406) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 6.731755038153003, LR: 0.0003 +[2026-03-02 12:43:38] (step=0034407) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.731950694580317, LR: 0.0003 +[2026-03-02 12:43:45] (step=0034408) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 6.73214635100763, LR: 0.0003 +[2026-03-02 12:43:53] (step=0034409) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.732342007434944, LR: 0.0003 +[2026-03-02 12:44:01] (step=0034410) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.732537663862258, LR: 0.0003 +[2026-03-02 12:44:09] (step=0034411) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.732733320289571, LR: 0.0003 +[2026-03-02 12:44:17] (step=0034412) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.732928976716885, LR: 0.0003 +[2026-03-02 12:44:25] (step=0034413) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 6.7331246331441985, LR: 0.0003 +[2026-03-02 12:44:32] (step=0034414) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 6.7333202895715125, LR: 0.0003 +[2026-03-02 12:44:40] (step=0034415) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.733515945998826, LR: 0.0003 +[2026-03-02 12:44:48] (step=0034416) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.73371160242614, LR: 0.0003 +[2026-03-02 12:44:56] (step=0034417) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 6.733907258853454, LR: 0.0003 +[2026-03-02 12:45:04] (step=0034418) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.734102915280767, LR: 0.0003 +[2026-03-02 12:45:12] (step=0034419) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 6.734298571708081, LR: 0.0003 +[2026-03-02 12:45:19] (step=0034420) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.734494228135394, LR: 0.0003 +[2026-03-02 12:45:27] (step=0034421) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.734689884562708, LR: 0.0003 +[2026-03-02 12:45:35] (step=0034422) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.734885540990022, LR: 0.0003 +[2026-03-02 12:45:43] (step=0034423) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.735081197417335, LR: 0.0003 +[2026-03-02 12:45:51] (step=0034424) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.735276853844649, LR: 0.0003 +[2026-03-02 12:45:59] (step=0034425) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.735472510271962, LR: 0.0003 +[2026-03-02 12:46:07] (step=0034426) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.735668166699276, LR: 0.0003 +[2026-03-02 12:46:14] (step=0034427) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.735863823126589, LR: 0.0003 +[2026-03-02 12:46:22] (step=0034428) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.736059479553903, LR: 0.0003 +[2026-03-02 12:46:30] (step=0034429) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.736255135981217, LR: 0.0003 +[2026-03-02 12:46:38] (step=0034430) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.73645079240853, LR: 0.0003 +[2026-03-02 12:46:46] (step=0034431) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.736646448835844, LR: 0.0003 +[2026-03-02 12:46:54] (step=0034432) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.7368421052631575, LR: 0.0003 +[2026-03-02 12:47:01] (step=0034433) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 6.7370377616904715, LR: 0.0003 +[2026-03-02 12:47:09] (step=0034434) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.7372334181177855, LR: 0.0003 +[2026-03-02 12:47:17] (step=0034435) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.737429074545099, LR: 0.0003 +[2026-03-02 12:47:25] (step=0034436) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.737624730972413, LR: 0.0003 +[2026-03-02 12:47:33] (step=0034437) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.737820387399726, LR: 0.0003 +[2026-03-02 12:47:41] (step=0034438) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 6.73801604382704, LR: 0.0003 +[2026-03-02 12:47:49] (step=0034439) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 6.738211700254354, LR: 0.0003 +[2026-03-02 12:47:56] (step=0034440) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.738407356681667, LR: 0.0003 +[2026-03-02 12:48:04] (step=0034441) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.738603013108981, LR: 0.0003 +[2026-03-02 12:48:12] (step=0034442) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.738798669536294, LR: 0.0003 +[2026-03-02 12:48:20] (step=0034443) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 6.738994325963608, LR: 0.0003 +[2026-03-02 12:48:28] (step=0034444) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.739189982390921, LR: 0.0003 +[2026-03-02 12:48:36] (step=0034445) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.739385638818235, LR: 0.0003 +[2026-03-02 12:48:44] (step=0034446) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.739581295245549, LR: 0.0003 +[2026-03-02 12:48:52] (step=0034447) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.739776951672862, LR: 0.0003 +[2026-03-02 12:48:59] (step=0034448) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 6.739972608100176, LR: 0.0003 +[2026-03-02 12:49:07] (step=0034449) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.740168264527489, LR: 0.0003 +[2026-03-02 12:49:15] (step=0034450) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.740363920954803, LR: 0.0003 +[2026-03-02 12:49:23] (step=0034451) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 6.740559577382117, LR: 0.0003 +[2026-03-02 12:49:31] (step=0034452) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.7407552338094305, LR: 0.0003 +[2026-03-02 12:49:39] (step=0034453) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.7409508902367445, LR: 0.0003 +[2026-03-02 12:49:46] (step=0034454) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.741146546664058, LR: 0.0003 +[2026-03-02 12:49:54] (step=0034455) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 6.741342203091372, LR: 0.0003 +[2026-03-02 12:50:02] (step=0034456) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.741537859518685, LR: 0.0003 +[2026-03-02 12:50:10] (step=0034457) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 6.741733515945999, LR: 0.0003 +[2026-03-02 12:50:18] (step=0034458) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.741929172373313, LR: 0.0003 +[2026-03-02 12:50:26] (step=0034459) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.742124828800626, LR: 0.0003 +[2026-03-02 12:50:33] (step=0034460) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.74232048522794, LR: 0.0003 +[2026-03-02 12:50:41] (step=0034461) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.742516141655253, LR: 0.0003 +[2026-03-02 12:50:49] (step=0034462) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.742711798082567, LR: 0.0003 +[2026-03-02 12:50:57] (step=0034463) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 6.742907454509881, LR: 0.0003 +[2026-03-02 12:51:05] (step=0034464) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.743103110937194, LR: 0.0003 +[2026-03-02 12:51:13] (step=0034465) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 6.743298767364508, LR: 0.0003 +[2026-03-02 12:51:21] (step=0034466) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.743494423791821, LR: 0.0003 +[2026-03-02 12:51:28] (step=0034467) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.743690080219135, LR: 0.0003 +[2026-03-02 12:51:36] (step=0034468) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.743885736646448, LR: 0.0003 +[2026-03-02 12:51:44] (step=0034469) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.744081393073762, LR: 0.0003 +[2026-03-02 12:51:52] (step=0034470) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 6.744277049501076, LR: 0.0003 +[2026-03-02 12:52:00] (step=0034471) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.7444727059283895, LR: 0.0003 +[2026-03-02 12:52:08] (step=0034472) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.7446683623557036, LR: 0.0003 +[2026-03-02 12:52:15] (step=0034473) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.744864018783017, LR: 0.0003 +[2026-03-02 12:52:23] (step=0034474) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.745059675210331, LR: 0.0003 +[2026-03-02 12:52:31] (step=0034475) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.745255331637645, LR: 0.0003 +[2026-03-02 12:52:39] (step=0034476) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.745450988064958, LR: 0.0003 +[2026-03-02 12:52:47] (step=0034477) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 6.745646644492272, LR: 0.0003 +[2026-03-02 12:52:55] (step=0034478) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.745842300919585, LR: 0.0003 +[2026-03-02 12:53:02] (step=0034479) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.746037957346899, LR: 0.0003 +[2026-03-02 12:53:10] (step=0034480) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.746233613774212, LR: 0.0003 +[2026-03-02 12:53:18] (step=0034481) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.746429270201526, LR: 0.0003 +[2026-03-02 12:53:26] (step=0034482) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.74662492662884, LR: 0.0003 +[2026-03-02 12:53:34] (step=0034483) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.746820583056153, LR: 0.0003 +[2026-03-02 12:53:42] (step=0034484) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 6.747016239483467, LR: 0.0003 +[2026-03-02 12:53:50] (step=0034485) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.74721189591078, LR: 0.0003 +[2026-03-02 12:53:57] (step=0034486) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.747407552338094, LR: 0.0003 +[2026-03-02 12:54:05] (step=0034487) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.747603208765408, LR: 0.0003 +[2026-03-02 12:54:13] (step=0034488) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 6.747798865192721, LR: 0.0003 +[2026-03-02 12:54:21] (step=0034489) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.747994521620035, LR: 0.0003 +[2026-03-02 12:54:29] (step=0034490) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 6.7481901780473486, LR: 0.0003 +[2026-03-02 12:54:37] (step=0034491) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 6.748385834474663, LR: 0.0003 +[2026-03-02 12:54:45] (step=0034492) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.748581490901977, LR: 0.0003 +[2026-03-02 12:54:52] (step=0034493) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.74877714732929, LR: 0.0003 +[2026-03-02 12:55:00] (step=0034494) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.748972803756604, LR: 0.0003 +[2026-03-02 12:55:08] (step=0034495) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.749168460183917, LR: 0.0003 +[2026-03-02 12:55:16] (step=0034496) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.749364116611231, LR: 0.0003 +[2026-03-02 12:55:24] (step=0034497) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.749559773038544, LR: 0.0003 +[2026-03-02 12:55:32] (step=0034498) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.749755429465858, LR: 0.0003 +[2026-03-02 12:55:40] (step=0034499) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 6.749951085893172, LR: 0.0003 +[2026-03-02 12:55:48] (step=0034500) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.750146742320485, LR: 0.0003 +[2026-03-02 12:55:48] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0034500/ +[2026-03-02 12:55:55] (step=0034501) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.750342398747799, LR: 0.0003 +[2026-03-02 12:56:03] (step=0034502) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.750538055175112, LR: 0.0003 +[2026-03-02 12:56:11] (step=0034503) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.750733711602426, LR: 0.0003 +[2026-03-02 12:56:19] (step=0034504) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.75092936802974, LR: 0.0003 +[2026-03-02 12:56:27] (step=0034505) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 6.751125024457053, LR: 0.0003 +[2026-03-02 12:56:35] (step=0034506) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.751320680884367, LR: 0.0003 +[2026-03-02 12:56:42] (step=0034507) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.75151633731168, LR: 0.0003 +[2026-03-02 12:56:50] (step=0034508) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.751711993738994, LR: 0.0003 +[2026-03-02 12:56:58] (step=0034509) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 6.751907650166308, LR: 0.0003 +[2026-03-02 12:57:06] (step=0034510) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 6.752103306593622, LR: 0.0003 +[2026-03-02 12:57:14] (step=0034511) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.752298963020936, LR: 0.0003 +[2026-03-02 12:57:22] (step=0034512) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 6.752494619448249, LR: 0.0003 +[2026-03-02 12:57:30] (step=0034513) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.752690275875563, LR: 0.0003 +[2026-03-02 12:57:37] (step=0034514) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.752885932302876, LR: 0.0003 +[2026-03-02 12:57:45] (step=0034515) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.75308158873019, LR: 0.0003 +[2026-03-02 12:57:53] (step=0034516) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.753277245157504, LR: 0.0003 +[2026-03-02 12:58:01] (step=0034517) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 6.753472901584817, LR: 0.0003 +[2026-03-02 12:58:09] (step=0034518) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 6.753668558012131, LR: 0.0003 +[2026-03-02 12:58:17] (step=0034519) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 6.753864214439444, LR: 0.0003 +[2026-03-02 12:58:24] (step=0034520) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.754059870866758, LR: 0.0003 +[2026-03-02 12:58:32] (step=0034521) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.754255527294071, LR: 0.0003 +[2026-03-02 12:58:40] (step=0034522) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.754451183721385, LR: 0.0003 +[2026-03-02 12:58:48] (step=0034523) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 6.754646840148699, LR: 0.0003 +[2026-03-02 12:58:56] (step=0034524) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.754842496576012, LR: 0.0003 +[2026-03-02 12:59:04] (step=0034525) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.755038153003326, LR: 0.0003 +[2026-03-02 12:59:11] (step=0034526) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 6.7552338094306394, LR: 0.0003 +[2026-03-02 12:59:19] (step=0034527) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.7554294658579535, LR: 0.0003 +[2026-03-02 12:59:27] (step=0034528) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.7556251222852675, LR: 0.0003 +[2026-03-02 12:59:35] (step=0034529) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 6.755820778712581, LR: 0.0003 +[2026-03-02 12:59:43] (step=0034530) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.756016435139895, LR: 0.0003 +[2026-03-02 12:59:51] (step=0034531) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 6.756212091567208, LR: 0.0003 +[2026-03-02 12:59:59] (step=0034532) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 6.756407747994522, LR: 0.0003 +[2026-03-02 13:00:06] (step=0034533) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.756603404421835, LR: 0.0003 +[2026-03-02 13:00:14] (step=0034534) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.756799060849149, LR: 0.0003 +[2026-03-02 13:00:22] (step=0034535) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.756994717276463, LR: 0.0003 +[2026-03-02 13:00:30] (step=0034536) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.757190373703776, LR: 0.0003 +[2026-03-02 13:00:38] (step=0034537) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.75738603013109, LR: 0.0003 +[2026-03-02 13:00:46] (step=0034538) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.757581686558403, LR: 0.0003 +[2026-03-02 13:00:54] (step=0034539) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.757777342985717, LR: 0.0003 +[2026-03-02 13:01:01] (step=0034540) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.757972999413031, LR: 0.0003 +[2026-03-02 13:01:09] (step=0034541) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.758168655840344, LR: 0.0003 +[2026-03-02 13:01:17] (step=0034542) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.758364312267658, LR: 0.0003 +[2026-03-02 13:01:25] (step=0034543) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.758559968694971, LR: 0.0003 +[2026-03-02 13:01:33] (step=0034544) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.758755625122285, LR: 0.0003 +[2026-03-02 13:01:41] (step=0034545) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.758951281549599, LR: 0.0003 +[2026-03-02 13:01:48] (step=0034546) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.7591469379769125, LR: 0.0003 +[2026-03-02 13:01:57] (step=0034547) Train Loss: 0.4476, Train Steps/Sec: 0.12, Epoch: 6.7593425944042265, LR: 0.0003 +[2026-03-02 13:02:04] (step=0034548) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.75953825083154, LR: 0.0003 +[2026-03-02 13:02:12] (step=0034549) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.759733907258854, LR: 0.0003 +[2026-03-02 13:02:20] (step=0034550) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.759929563686167, LR: 0.0003 +[2026-03-02 13:02:28] (step=0034551) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.760125220113481, LR: 0.0003 +[2026-03-02 13:02:36] (step=0034552) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.760320876540795, LR: 0.0003 +[2026-03-02 13:02:44] (step=0034553) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.760516532968108, LR: 0.0003 +[2026-03-02 13:02:51] (step=0034554) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 6.760712189395422, LR: 0.0003 +[2026-03-02 13:02:59] (step=0034555) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.760907845822735, LR: 0.0003 +[2026-03-02 13:03:07] (step=0034556) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.761103502250049, LR: 0.0003 +[2026-03-02 13:03:15] (step=0034557) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.761299158677363, LR: 0.0003 +[2026-03-02 13:03:23] (step=0034558) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 6.761494815104676, LR: 0.0003 +[2026-03-02 13:03:31] (step=0034559) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.76169047153199, LR: 0.0003 +[2026-03-02 13:03:39] (step=0034560) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.761886127959303, LR: 0.0003 +[2026-03-02 13:03:46] (step=0034561) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.762081784386617, LR: 0.0003 +[2026-03-02 13:03:54] (step=0034562) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.76227744081393, LR: 0.0003 +[2026-03-02 13:04:02] (step=0034563) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.762473097241244, LR: 0.0003 +[2026-03-02 13:04:10] (step=0034564) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.762668753668558, LR: 0.0003 +[2026-03-02 13:04:18] (step=0034565) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.7628644100958715, LR: 0.0003 +[2026-03-02 13:04:26] (step=0034566) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 6.7630600665231855, LR: 0.0003 +[2026-03-02 13:04:33] (step=0034567) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.763255722950499, LR: 0.0003 +[2026-03-02 13:04:41] (step=0034568) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.763451379377813, LR: 0.0003 +[2026-03-02 13:04:49] (step=0034569) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.763647035805127, LR: 0.0003 +[2026-03-02 13:04:57] (step=0034570) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.76384269223244, LR: 0.0003 +[2026-03-02 13:05:05] (step=0034571) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.764038348659754, LR: 0.0003 +[2026-03-02 13:05:13] (step=0034572) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.764234005087067, LR: 0.0003 +[2026-03-02 13:05:20] (step=0034573) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.764429661514381, LR: 0.0003 +[2026-03-02 13:05:28] (step=0034574) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.764625317941694, LR: 0.0003 +[2026-03-02 13:05:36] (step=0034575) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.764820974369008, LR: 0.0003 +[2026-03-02 13:05:44] (step=0034576) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.765016630796322, LR: 0.0003 +[2026-03-02 13:05:52] (step=0034577) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.765212287223635, LR: 0.0003 +[2026-03-02 13:06:00] (step=0034578) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 6.765407943650949, LR: 0.0003 +[2026-03-02 13:06:08] (step=0034579) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.765603600078262, LR: 0.0003 +[2026-03-02 13:06:15] (step=0034580) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.765799256505576, LR: 0.0003 +[2026-03-02 13:06:23] (step=0034581) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 6.76599491293289, LR: 0.0003 +[2026-03-02 13:06:31] (step=0034582) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.766190569360203, LR: 0.0003 +[2026-03-02 13:06:39] (step=0034583) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 6.766386225787517, LR: 0.0003 +[2026-03-02 13:06:47] (step=0034584) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 6.7665818822148305, LR: 0.0003 +[2026-03-02 13:06:55] (step=0034585) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.7667775386421445, LR: 0.0003 +[2026-03-02 13:07:02] (step=0034586) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.766973195069458, LR: 0.0003 +[2026-03-02 13:07:10] (step=0034587) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.767168851496772, LR: 0.0003 +[2026-03-02 13:07:18] (step=0034588) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 6.767364507924086, LR: 0.0003 +[2026-03-02 13:07:26] (step=0034589) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.767560164351399, LR: 0.0003 +[2026-03-02 13:07:34] (step=0034590) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 6.767755820778713, LR: 0.0003 +[2026-03-02 13:07:42] (step=0034591) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.767951477206026, LR: 0.0003 +[2026-03-02 13:07:49] (step=0034592) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 6.76814713363334, LR: 0.0003 +[2026-03-02 13:07:57] (step=0034593) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.768342790060654, LR: 0.0003 +[2026-03-02 13:08:05] (step=0034594) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.768538446487967, LR: 0.0003 +[2026-03-02 13:08:13] (step=0034595) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 6.768734102915281, LR: 0.0003 +[2026-03-02 13:08:21] (step=0034596) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.768929759342594, LR: 0.0003 +[2026-03-02 13:08:29] (step=0034597) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.769125415769908, LR: 0.0003 +[2026-03-02 13:08:37] (step=0034598) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.769321072197222, LR: 0.0003 +[2026-03-02 13:08:44] (step=0034599) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.769516728624535, LR: 0.0003 +[2026-03-02 13:08:52] (step=0034600) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 6.769712385051849, LR: 0.0003 +[2026-03-02 13:09:00] (step=0034601) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.769908041479162, LR: 0.0003 +[2026-03-02 13:09:08] (step=0034602) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.770103697906476, LR: 0.0003 +[2026-03-02 13:09:16] (step=0034603) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.7702993543337895, LR: 0.0003 +[2026-03-02 13:09:24] (step=0034604) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.7704950107611035, LR: 0.0003 +[2026-03-02 13:09:32] (step=0034605) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.7706906671884175, LR: 0.0003 +[2026-03-02 13:09:39] (step=0034606) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 6.770886323615731, LR: 0.0003 +[2026-03-02 13:09:47] (step=0034607) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.771081980043045, LR: 0.0003 +[2026-03-02 13:09:55] (step=0034608) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.771277636470358, LR: 0.0003 +[2026-03-02 13:10:03] (step=0034609) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.771473292897672, LR: 0.0003 +[2026-03-02 13:10:11] (step=0034610) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.771668949324986, LR: 0.0003 +[2026-03-02 13:10:19] (step=0034611) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.771864605752299, LR: 0.0003 +[2026-03-02 13:10:27] (step=0034612) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.772060262179613, LR: 0.0003 +[2026-03-02 13:10:34] (step=0034613) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.772255918606926, LR: 0.0003 +[2026-03-02 13:10:42] (step=0034614) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.77245157503424, LR: 0.0003 +[2026-03-02 13:10:50] (step=0034615) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.772647231461553, LR: 0.0003 +[2026-03-02 13:10:58] (step=0034616) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.772842887888867, LR: 0.0003 +[2026-03-02 13:11:06] (step=0034617) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.773038544316181, LR: 0.0003 +[2026-03-02 13:11:14] (step=0034618) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.773234200743494, LR: 0.0003 +[2026-03-02 13:11:21] (step=0034619) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.773429857170808, LR: 0.0003 +[2026-03-02 13:11:29] (step=0034620) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.773625513598121, LR: 0.0003 +[2026-03-02 13:11:37] (step=0034621) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.773821170025435, LR: 0.0003 +[2026-03-02 13:11:45] (step=0034622) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 6.774016826452749, LR: 0.0003 +[2026-03-02 13:11:53] (step=0034623) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.7742124828800625, LR: 0.0003 +[2026-03-02 13:12:01] (step=0034624) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 6.7744081393073765, LR: 0.0003 +[2026-03-02 13:12:08] (step=0034625) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 6.77460379573469, LR: 0.0003 +[2026-03-02 13:12:16] (step=0034626) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.774799452162004, LR: 0.0003 +[2026-03-02 13:12:24] (step=0034627) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 6.774995108589317, LR: 0.0003 +[2026-03-02 13:12:32] (step=0034628) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.775190765016631, LR: 0.0003 +[2026-03-02 13:12:40] (step=0034629) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.775386421443945, LR: 0.0003 +[2026-03-02 13:12:48] (step=0034630) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.775582077871258, LR: 0.0003 +[2026-03-02 13:12:56] (step=0034631) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.775777734298572, LR: 0.0003 +[2026-03-02 13:13:03] (step=0034632) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 6.775973390725885, LR: 0.0003 +[2026-03-02 13:13:11] (step=0034633) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.776169047153199, LR: 0.0003 +[2026-03-02 13:13:19] (step=0034634) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.776364703580513, LR: 0.0003 +[2026-03-02 13:13:27] (step=0034635) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.776560360007826, LR: 0.0003 +[2026-03-02 13:13:35] (step=0034636) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.77675601643514, LR: 0.0003 +[2026-03-02 13:13:43] (step=0034637) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.776951672862453, LR: 0.0003 +[2026-03-02 13:13:50] (step=0034638) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 6.777147329289767, LR: 0.0003 +[2026-03-02 13:13:58] (step=0034639) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.77734298571708, LR: 0.0003 +[2026-03-02 13:14:06] (step=0034640) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.777538642144394, LR: 0.0003 +[2026-03-02 13:14:14] (step=0034641) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 6.777734298571708, LR: 0.0003 +[2026-03-02 13:14:22] (step=0034642) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.7779299549990215, LR: 0.0003 +[2026-03-02 13:14:30] (step=0034643) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.7781256114263355, LR: 0.0003 +[2026-03-02 13:14:38] (step=0034644) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.778321267853649, LR: 0.0003 +[2026-03-02 13:14:46] (step=0034645) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.778516924280963, LR: 0.0003 +[2026-03-02 13:14:53] (step=0034646) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 6.778712580708277, LR: 0.0003 +[2026-03-02 13:15:01] (step=0034647) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.77890823713559, LR: 0.0003 +[2026-03-02 13:15:09] (step=0034648) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.779103893562904, LR: 0.0003 +[2026-03-02 13:15:17] (step=0034649) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.779299549990217, LR: 0.0003 +[2026-03-02 13:15:25] (step=0034650) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.779495206417531, LR: 0.0003 +[2026-03-02 13:15:33] (step=0034651) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.779690862844844, LR: 0.0003 +[2026-03-02 13:15:40] (step=0034652) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.779886519272158, LR: 0.0003 +[2026-03-02 13:15:48] (step=0034653) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.780082175699472, LR: 0.0003 +[2026-03-02 13:15:56] (step=0034654) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.780277832126785, LR: 0.0003 +[2026-03-02 13:16:04] (step=0034655) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.780473488554099, LR: 0.0003 +[2026-03-02 13:16:12] (step=0034656) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.780669144981412, LR: 0.0003 +[2026-03-02 13:16:20] (step=0034657) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.780864801408726, LR: 0.0003 +[2026-03-02 13:16:28] (step=0034658) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.78106045783604, LR: 0.0003 +[2026-03-02 13:16:35] (step=0034659) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.781256114263353, LR: 0.0003 +[2026-03-02 13:16:43] (step=0034660) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.781451770690667, LR: 0.0003 +[2026-03-02 13:16:51] (step=0034661) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.7816474271179805, LR: 0.0003 +[2026-03-02 13:16:59] (step=0034662) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 6.7818430835452945, LR: 0.0003 +[2026-03-02 13:17:07] (step=0034663) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.7820387399726085, LR: 0.0003 +[2026-03-02 13:17:15] (step=0034664) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 6.782234396399922, LR: 0.0003 +[2026-03-02 13:17:23] (step=0034665) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 6.782430052827236, LR: 0.0003 +[2026-03-02 13:17:30] (step=0034666) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.782625709254549, LR: 0.0003 +[2026-03-02 13:17:38] (step=0034667) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.782821365681863, LR: 0.0003 +[2026-03-02 13:17:46] (step=0034668) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.783017022109176, LR: 0.0003 +[2026-03-02 13:17:54] (step=0034669) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.78321267853649, LR: 0.0003 +[2026-03-02 13:18:02] (step=0034670) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 6.783408334963804, LR: 0.0003 +[2026-03-02 13:18:10] (step=0034671) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 6.783603991391117, LR: 0.0003 +[2026-03-02 13:18:18] (step=0034672) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.783799647818431, LR: 0.0003 +[2026-03-02 13:18:25] (step=0034673) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.783995304245744, LR: 0.0003 +[2026-03-02 13:18:33] (step=0034674) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.784190960673058, LR: 0.0003 +[2026-03-02 13:18:41] (step=0034675) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.784386617100372, LR: 0.0003 +[2026-03-02 13:18:49] (step=0034676) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 6.784582273527685, LR: 0.0003 +[2026-03-02 13:18:57] (step=0034677) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.784777929954999, LR: 0.0003 +[2026-03-02 13:19:05] (step=0034678) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.784973586382312, LR: 0.0003 +[2026-03-02 13:19:12] (step=0034679) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.785169242809626, LR: 0.0003 +[2026-03-02 13:19:20] (step=0034680) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.7853648992369395, LR: 0.0003 +[2026-03-02 13:19:28] (step=0034681) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.7855605556642535, LR: 0.0003 +[2026-03-02 13:19:36] (step=0034682) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.7857562120915675, LR: 0.0003 +[2026-03-02 13:19:44] (step=0034683) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.785951868518881, LR: 0.0003 +[2026-03-02 13:19:52] (step=0034684) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.786147524946195, LR: 0.0003 +[2026-03-02 13:19:59] (step=0034685) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 6.786343181373508, LR: 0.0003 +[2026-03-02 13:20:07] (step=0034686) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.786538837800822, LR: 0.0003 +[2026-03-02 13:20:15] (step=0034687) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.786734494228136, LR: 0.0003 +[2026-03-02 13:20:23] (step=0034688) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.786930150655449, LR: 0.0003 +[2026-03-02 13:20:31] (step=0034689) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.787125807082763, LR: 0.0003 +[2026-03-02 13:20:39] (step=0034690) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.787321463510076, LR: 0.0003 +[2026-03-02 13:20:47] (step=0034691) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.78751711993739, LR: 0.0003 +[2026-03-02 13:20:55] (step=0034692) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 6.787712776364703, LR: 0.0003 +[2026-03-02 13:21:02] (step=0034693) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.787908432792017, LR: 0.0003 +[2026-03-02 13:21:10] (step=0034694) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.788104089219331, LR: 0.0003 +[2026-03-02 13:21:18] (step=0034695) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 6.788299745646644, LR: 0.0003 +[2026-03-02 13:21:26] (step=0034696) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.788495402073958, LR: 0.0003 +[2026-03-02 13:21:34] (step=0034697) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.788691058501271, LR: 0.0003 +[2026-03-02 13:21:42] (step=0034698) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.788886714928585, LR: 0.0003 +[2026-03-02 13:21:50] (step=0034699) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.789082371355899, LR: 0.0003 +[2026-03-02 13:21:57] (step=0034700) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 6.7892780277832125, LR: 0.0003 +[2026-03-02 13:22:05] (step=0034701) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.7894736842105265, LR: 0.0003 +[2026-03-02 13:22:13] (step=0034702) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.78966934063784, LR: 0.0003 +[2026-03-02 13:22:21] (step=0034703) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.789864997065154, LR: 0.0003 +[2026-03-02 13:22:29] (step=0034704) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.790060653492467, LR: 0.0003 +[2026-03-02 13:22:37] (step=0034705) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.790256309919781, LR: 0.0003 +[2026-03-02 13:22:44] (step=0034706) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.790451966347095, LR: 0.0003 +[2026-03-02 13:22:52] (step=0034707) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.790647622774408, LR: 0.0003 +[2026-03-02 13:23:00] (step=0034708) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.790843279201722, LR: 0.0003 +[2026-03-02 13:23:08] (step=0034709) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.791038935629035, LR: 0.0003 +[2026-03-02 13:23:16] (step=0034710) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.791234592056349, LR: 0.0003 +[2026-03-02 13:23:24] (step=0034711) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.791430248483663, LR: 0.0003 +[2026-03-02 13:23:31] (step=0034712) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.791625904910976, LR: 0.0003 +[2026-03-02 13:23:39] (step=0034713) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 6.79182156133829, LR: 0.0003 +[2026-03-02 13:23:47] (step=0034714) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 6.792017217765603, LR: 0.0003 +[2026-03-02 13:23:55] (step=0034715) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.792212874192917, LR: 0.0003 +[2026-03-02 13:24:03] (step=0034716) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.792408530620231, LR: 0.0003 +[2026-03-02 13:24:11] (step=0034717) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.792604187047544, LR: 0.0003 +[2026-03-02 13:24:19] (step=0034718) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.792799843474858, LR: 0.0003 +[2026-03-02 13:24:26] (step=0034719) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 6.7929954999021716, LR: 0.0003 +[2026-03-02 13:24:34] (step=0034720) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 6.793191156329486, LR: 0.0003 +[2026-03-02 13:24:42] (step=0034721) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.793386812756799, LR: 0.0003 +[2026-03-02 13:24:50] (step=0034722) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.793582469184113, LR: 0.0003 +[2026-03-02 13:24:58] (step=0034723) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.793778125611427, LR: 0.0003 +[2026-03-02 13:25:06] (step=0034724) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.79397378203874, LR: 0.0003 +[2026-03-02 13:25:13] (step=0034725) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.794169438466054, LR: 0.0003 +[2026-03-02 13:25:21] (step=0034726) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.794365094893367, LR: 0.0003 +[2026-03-02 13:25:29] (step=0034727) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 6.794560751320681, LR: 0.0003 +[2026-03-02 13:25:37] (step=0034728) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.794756407747995, LR: 0.0003 +[2026-03-02 13:25:45] (step=0034729) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.794952064175308, LR: 0.0003 +[2026-03-02 13:25:53] (step=0034730) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.795147720602622, LR: 0.0003 +[2026-03-02 13:26:00] (step=0034731) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.795343377029935, LR: 0.0003 +[2026-03-02 13:26:08] (step=0034732) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.795539033457249, LR: 0.0003 +[2026-03-02 13:26:16] (step=0034733) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.795734689884562, LR: 0.0003 +[2026-03-02 13:26:24] (step=0034734) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.795930346311876, LR: 0.0003 +[2026-03-02 13:26:32] (step=0034735) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.79612600273919, LR: 0.0003 +[2026-03-02 13:26:40] (step=0034736) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.796321659166503, LR: 0.0003 +[2026-03-02 13:26:48] (step=0034737) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 6.796517315593817, LR: 0.0003 +[2026-03-02 13:26:55] (step=0034738) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.796712972021131, LR: 0.0003 +[2026-03-02 13:27:03] (step=0034739) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.796908628448445, LR: 0.0003 +[2026-03-02 13:27:11] (step=0034740) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.797104284875759, LR: 0.0003 +[2026-03-02 13:27:19] (step=0034741) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.797299941303072, LR: 0.0003 +[2026-03-02 13:27:27] (step=0034742) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.797495597730386, LR: 0.0003 +[2026-03-02 13:27:35] (step=0034743) Train Loss: 0.4362, Train Steps/Sec: 0.12, Epoch: 6.797691254157699, LR: 0.0003 +[2026-03-02 13:27:43] (step=0034744) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.797886910585013, LR: 0.0003 +[2026-03-02 13:27:51] (step=0034745) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 6.798082567012326, LR: 0.0003 +[2026-03-02 13:27:58] (step=0034746) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 6.79827822343964, LR: 0.0003 +[2026-03-02 13:28:06] (step=0034747) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.798473879866954, LR: 0.0003 +[2026-03-02 13:28:14] (step=0034748) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 6.798669536294267, LR: 0.0003 +[2026-03-02 13:28:22] (step=0034749) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.798865192721581, LR: 0.0003 +[2026-03-02 13:28:30] (step=0034750) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.799060849148894, LR: 0.0003 +[2026-03-02 13:28:38] (step=0034751) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.799256505576208, LR: 0.0003 +[2026-03-02 13:28:45] (step=0034752) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.799452162003522, LR: 0.0003 +[2026-03-02 13:28:53] (step=0034753) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.799647818430835, LR: 0.0003 +[2026-03-02 13:29:01] (step=0034754) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.799843474858149, LR: 0.0003 +[2026-03-02 13:29:09] (step=0034755) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.8000391312854624, LR: 0.0003 +[2026-03-02 13:29:17] (step=0034756) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.8002347877127765, LR: 0.0003 +[2026-03-02 13:29:25] (step=0034757) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.80043044414009, LR: 0.0003 +[2026-03-02 13:29:32] (step=0034758) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.800626100567404, LR: 0.0003 +[2026-03-02 13:29:41] (step=0034759) Train Loss: 0.4440, Train Steps/Sec: 0.12, Epoch: 6.800821756994718, LR: 0.0003 +[2026-03-02 13:29:48] (step=0034760) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.801017413422031, LR: 0.0003 +[2026-03-02 13:29:56] (step=0034761) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.801213069849345, LR: 0.0003 +[2026-03-02 13:30:04] (step=0034762) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.801408726276658, LR: 0.0003 +[2026-03-02 13:30:12] (step=0034763) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.801604382703972, LR: 0.0003 +[2026-03-02 13:30:20] (step=0034764) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.801800039131286, LR: 0.0003 +[2026-03-02 13:30:28] (step=0034765) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.801995695558599, LR: 0.0003 +[2026-03-02 13:30:36] (step=0034766) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.802191351985913, LR: 0.0003 +[2026-03-02 13:30:44] (step=0034767) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.802387008413226, LR: 0.0003 +[2026-03-02 13:30:51] (step=0034768) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.80258266484054, LR: 0.0003 +[2026-03-02 13:30:59] (step=0034769) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.802778321267854, LR: 0.0003 +[2026-03-02 13:31:07] (step=0034770) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.802973977695167, LR: 0.0003 +[2026-03-02 13:31:15] (step=0034771) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.803169634122481, LR: 0.0003 +[2026-03-02 13:31:23] (step=0034772) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.803365290549794, LR: 0.0003 +[2026-03-02 13:31:31] (step=0034773) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.803560946977108, LR: 0.0003 +[2026-03-02 13:31:38] (step=0034774) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.8037566034044215, LR: 0.0003 +[2026-03-02 13:31:46] (step=0034775) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.8039522598317355, LR: 0.0003 +[2026-03-02 13:31:54] (step=0034776) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.8041479162590495, LR: 0.0003 +[2026-03-02 13:32:02] (step=0034777) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.804343572686363, LR: 0.0003 +[2026-03-02 13:32:10] (step=0034778) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.804539229113677, LR: 0.0003 +[2026-03-02 13:32:18] (step=0034779) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.80473488554099, LR: 0.0003 +[2026-03-02 13:32:26] (step=0034780) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.804930541968304, LR: 0.0003 +[2026-03-02 13:32:34] (step=0034781) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.805126198395618, LR: 0.0003 +[2026-03-02 13:32:41] (step=0034782) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 6.805321854822931, LR: 0.0003 +[2026-03-02 13:32:49] (step=0034783) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.805517511250245, LR: 0.0003 +[2026-03-02 13:32:57] (step=0034784) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 6.805713167677558, LR: 0.0003 +[2026-03-02 13:33:05] (step=0034785) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.805908824104872, LR: 0.0003 +[2026-03-02 13:33:13] (step=0034786) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 6.806104480532185, LR: 0.0003 +[2026-03-02 13:33:21] (step=0034787) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.806300136959499, LR: 0.0003 +[2026-03-02 13:33:29] (step=0034788) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.806495793386813, LR: 0.0003 +[2026-03-02 13:33:37] (step=0034789) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.806691449814126, LR: 0.0003 +[2026-03-02 13:33:44] (step=0034790) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 6.80688710624144, LR: 0.0003 +[2026-03-02 13:33:52] (step=0034791) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 6.807082762668753, LR: 0.0003 +[2026-03-02 13:34:00] (step=0034792) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.807278419096067, LR: 0.0003 +[2026-03-02 13:34:08] (step=0034793) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.807474075523381, LR: 0.0003 +[2026-03-02 13:34:16] (step=0034794) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.8076697319506945, LR: 0.0003 +[2026-03-02 13:34:24] (step=0034795) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 6.8078653883780085, LR: 0.0003 +[2026-03-02 13:34:32] (step=0034796) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.808061044805322, LR: 0.0003 +[2026-03-02 13:34:40] (step=0034797) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.808256701232636, LR: 0.0003 +[2026-03-02 13:34:47] (step=0034798) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 6.808452357659949, LR: 0.0003 +[2026-03-02 13:34:55] (step=0034799) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 6.808648014087263, LR: 0.0003 +[2026-03-02 13:35:03] (step=0034800) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.808843670514577, LR: 0.0003 +[2026-03-02 13:35:11] (step=0034801) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 6.80903932694189, LR: 0.0003 +[2026-03-02 13:35:19] (step=0034802) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.809234983369204, LR: 0.0003 +[2026-03-02 13:35:27] (step=0034803) Train Loss: 0.4415, Train Steps/Sec: 0.12, Epoch: 6.809430639796517, LR: 0.0003 +[2026-03-02 13:35:35] (step=0034804) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 6.809626296223831, LR: 0.0003 +[2026-03-02 13:35:43] (step=0034805) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.809821952651145, LR: 0.0003 +[2026-03-02 13:35:50] (step=0034806) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.810017609078458, LR: 0.0003 +[2026-03-02 13:35:58] (step=0034807) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 6.810213265505772, LR: 0.0003 +[2026-03-02 13:36:06] (step=0034808) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 6.810408921933085, LR: 0.0003 +[2026-03-02 13:36:14] (step=0034809) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.810604578360399, LR: 0.0003 +[2026-03-02 13:36:22] (step=0034810) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 6.810800234787712, LR: 0.0003 +[2026-03-02 13:36:30] (step=0034811) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.810995891215026, LR: 0.0003 +[2026-03-02 13:36:37] (step=0034812) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.81119154764234, LR: 0.0003 +[2026-03-02 13:36:45] (step=0034813) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 6.8113872040696535, LR: 0.0003 +[2026-03-02 13:36:53] (step=0034814) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.8115828604969675, LR: 0.0003 +[2026-03-02 13:37:01] (step=0034815) Train Loss: 0.4243, Train Steps/Sec: 0.13, Epoch: 6.811778516924281, LR: 0.0003 +[2026-03-02 13:37:09] (step=0034816) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.811974173351595, LR: 0.0003 +[2026-03-02 13:37:17] (step=0034817) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.812169829778909, LR: 0.0003 +[2026-03-02 13:37:25] (step=0034818) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.812365486206222, LR: 0.0003 +[2026-03-02 13:37:33] (step=0034819) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.812561142633536, LR: 0.0003 +[2026-03-02 13:37:40] (step=0034820) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 6.812756799060849, LR: 0.0003 +[2026-03-02 13:37:48] (step=0034821) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.812952455488163, LR: 0.0003 +[2026-03-02 13:37:56] (step=0034822) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.813148111915476, LR: 0.0003 +[2026-03-02 13:38:04] (step=0034823) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 6.81334376834279, LR: 0.0003 +[2026-03-02 13:38:12] (step=0034824) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.813539424770104, LR: 0.0003 +[2026-03-02 13:38:20] (step=0034825) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.813735081197417, LR: 0.0003 +[2026-03-02 13:38:28] (step=0034826) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 6.813930737624731, LR: 0.0003 +[2026-03-02 13:38:35] (step=0034827) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.814126394052044, LR: 0.0003 +[2026-03-02 13:38:43] (step=0034828) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.814322050479358, LR: 0.0003 +[2026-03-02 13:38:51] (step=0034829) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.814517706906672, LR: 0.0003 +[2026-03-02 13:38:59] (step=0034830) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.814713363333985, LR: 0.0003 +[2026-03-02 13:39:07] (step=0034831) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 6.814909019761299, LR: 0.0003 +[2026-03-02 13:39:15] (step=0034832) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.8151046761886125, LR: 0.0003 +[2026-03-02 13:39:23] (step=0034833) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 6.8153003326159265, LR: 0.0003 +[2026-03-02 13:39:31] (step=0034834) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.8154959890432405, LR: 0.0003 +[2026-03-02 13:39:38] (step=0034835) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.815691645470554, LR: 0.0003 +[2026-03-02 13:39:46] (step=0034836) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.815887301897868, LR: 0.0003 +[2026-03-02 13:39:54] (step=0034837) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.816082958325181, LR: 0.0003 +[2026-03-02 13:40:02] (step=0034838) Train Loss: 0.4571, Train Steps/Sec: 0.12, Epoch: 6.816278614752495, LR: 0.0003 +[2026-03-02 13:40:10] (step=0034839) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.816474271179808, LR: 0.0003 +[2026-03-02 13:40:18] (step=0034840) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.816669927607122, LR: 0.0003 +[2026-03-02 13:40:26] (step=0034841) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.816865584034436, LR: 0.0003 +[2026-03-02 13:40:34] (step=0034842) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.817061240461749, LR: 0.0003 +[2026-03-02 13:40:41] (step=0034843) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.817256896889063, LR: 0.0003 +[2026-03-02 13:40:49] (step=0034844) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 6.817452553316376, LR: 0.0003 +[2026-03-02 13:40:57] (step=0034845) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.81764820974369, LR: 0.0003 +[2026-03-02 13:41:05] (step=0034846) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.817843866171004, LR: 0.0003 +[2026-03-02 13:41:13] (step=0034847) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.818039522598317, LR: 0.0003 +[2026-03-02 13:41:21] (step=0034848) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.818235179025631, LR: 0.0003 +[2026-03-02 13:41:29] (step=0034849) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.818430835452944, LR: 0.0003 +[2026-03-02 13:41:36] (step=0034850) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.818626491880258, LR: 0.0003 +[2026-03-02 13:41:44] (step=0034851) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.8188221483075715, LR: 0.0003 +[2026-03-02 13:41:52] (step=0034852) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.8190178047348855, LR: 0.0003 +[2026-03-02 13:42:00] (step=0034853) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.8192134611621995, LR: 0.0003 +[2026-03-02 13:42:08] (step=0034854) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.819409117589513, LR: 0.0003 +[2026-03-02 13:42:16] (step=0034855) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.819604774016827, LR: 0.0003 +[2026-03-02 13:42:24] (step=0034856) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 6.81980043044414, LR: 0.0003 +[2026-03-02 13:42:31] (step=0034857) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.819996086871454, LR: 0.0003 +[2026-03-02 13:42:39] (step=0034858) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.820191743298768, LR: 0.0003 +[2026-03-02 13:42:47] (step=0034859) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.820387399726081, LR: 0.0003 +[2026-03-02 13:42:55] (step=0034860) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.820583056153395, LR: 0.0003 +[2026-03-02 13:43:03] (step=0034861) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 6.820778712580708, LR: 0.0003 +[2026-03-02 13:43:11] (step=0034862) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.820974369008022, LR: 0.0003 +[2026-03-02 13:43:19] (step=0034863) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 6.821170025435335, LR: 0.0003 +[2026-03-02 13:43:26] (step=0034864) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.821365681862649, LR: 0.0003 +[2026-03-02 13:43:34] (step=0034865) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.821561338289963, LR: 0.0003 +[2026-03-02 13:43:42] (step=0034866) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.821756994717276, LR: 0.0003 +[2026-03-02 13:43:50] (step=0034867) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.82195265114459, LR: 0.0003 +[2026-03-02 13:43:58] (step=0034868) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.822148307571903, LR: 0.0003 +[2026-03-02 13:44:06] (step=0034869) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.822343963999217, LR: 0.0003 +[2026-03-02 13:44:14] (step=0034870) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.822539620426531, LR: 0.0003 +[2026-03-02 13:44:21] (step=0034871) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.8227352768538445, LR: 0.0003 +[2026-03-02 13:44:29] (step=0034872) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.8229309332811585, LR: 0.0003 +[2026-03-02 13:44:37] (step=0034873) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.823126589708472, LR: 0.0003 +[2026-03-02 13:44:45] (step=0034874) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 6.823322246135786, LR: 0.0003 +[2026-03-02 13:44:53] (step=0034875) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 6.823517902563099, LR: 0.0003 +[2026-03-02 13:45:01] (step=0034876) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.823713558990413, LR: 0.0003 +[2026-03-02 13:45:09] (step=0034877) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.823909215417727, LR: 0.0003 +[2026-03-02 13:45:16] (step=0034878) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.82410487184504, LR: 0.0003 +[2026-03-02 13:45:24] (step=0034879) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 6.824300528272354, LR: 0.0003 +[2026-03-02 13:45:32] (step=0034880) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 6.824496184699667, LR: 0.0003 +[2026-03-02 13:45:40] (step=0034881) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.824691841126981, LR: 0.0003 +[2026-03-02 13:45:48] (step=0034882) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 6.824887497554295, LR: 0.0003 +[2026-03-02 13:45:56] (step=0034883) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.825083153981608, LR: 0.0003 +[2026-03-02 13:46:04] (step=0034884) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 6.825278810408922, LR: 0.0003 +[2026-03-02 13:46:11] (step=0034885) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.825474466836235, LR: 0.0003 +[2026-03-02 13:46:19] (step=0034886) Train Loss: 0.4272, Train Steps/Sec: 0.13, Epoch: 6.825670123263549, LR: 0.0003 +[2026-03-02 13:46:27] (step=0034887) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 6.825865779690863, LR: 0.0003 +[2026-03-02 13:46:35] (step=0034888) Train Loss: 0.4358, Train Steps/Sec: 0.12, Epoch: 6.826061436118176, LR: 0.0003 +[2026-03-02 13:46:43] (step=0034889) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 6.82625709254549, LR: 0.0003 +[2026-03-02 13:46:51] (step=0034890) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.8264527489728035, LR: 0.0003 +[2026-03-02 13:46:59] (step=0034891) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.8266484054001175, LR: 0.0003 +[2026-03-02 13:47:07] (step=0034892) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.826844061827431, LR: 0.0003 +[2026-03-02 13:47:15] (step=0034893) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.827039718254745, LR: 0.0003 +[2026-03-02 13:47:22] (step=0034894) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.827235374682059, LR: 0.0003 +[2026-03-02 13:47:30] (step=0034895) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 6.827431031109372, LR: 0.0003 +[2026-03-02 13:47:38] (step=0034896) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.827626687536686, LR: 0.0003 +[2026-03-02 13:47:46] (step=0034897) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 6.827822343963999, LR: 0.0003 +[2026-03-02 13:47:54] (step=0034898) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.828018000391313, LR: 0.0003 +[2026-03-02 13:48:02] (step=0034899) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 6.828213656818627, LR: 0.0003 +[2026-03-02 13:48:10] (step=0034900) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.82840931324594, LR: 0.0003 +[2026-03-02 13:48:17] (step=0034901) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.828604969673254, LR: 0.0003 +[2026-03-02 13:48:25] (step=0034902) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.828800626100567, LR: 0.0003 +[2026-03-02 13:48:33] (step=0034903) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.828996282527881, LR: 0.0003 +[2026-03-02 13:48:41] (step=0034904) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 6.829191938955194, LR: 0.0003 +[2026-03-02 13:48:49] (step=0034905) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 6.829387595382508, LR: 0.0003 +[2026-03-02 13:48:57] (step=0034906) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.829583251809822, LR: 0.0003 +[2026-03-02 13:49:05] (step=0034907) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.829778908237135, LR: 0.0003 +[2026-03-02 13:49:12] (step=0034908) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.829974564664449, LR: 0.0003 +[2026-03-02 13:49:20] (step=0034909) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 6.8301702210917625, LR: 0.0003 +[2026-03-02 13:49:28] (step=0034910) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.8303658775190765, LR: 0.0003 +[2026-03-02 13:49:36] (step=0034911) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.8305615339463905, LR: 0.0003 +[2026-03-02 13:49:44] (step=0034912) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 6.830757190373704, LR: 0.0003 +[2026-03-02 13:49:52] (step=0034913) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 6.830952846801018, LR: 0.0003 +[2026-03-02 13:50:00] (step=0034914) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.831148503228331, LR: 0.0003 +[2026-03-02 13:50:08] (step=0034915) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.831344159655645, LR: 0.0003 +[2026-03-02 13:50:15] (step=0034916) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.831539816082958, LR: 0.0003 +[2026-03-02 13:50:23] (step=0034917) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.831735472510272, LR: 0.0003 +[2026-03-02 13:50:31] (step=0034918) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.831931128937586, LR: 0.0003 +[2026-03-02 13:50:39] (step=0034919) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.832126785364899, LR: 0.0003 +[2026-03-02 13:50:47] (step=0034920) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.832322441792213, LR: 0.0003 +[2026-03-02 13:50:55] (step=0034921) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 6.832518098219526, LR: 0.0003 +[2026-03-02 13:51:03] (step=0034922) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.83271375464684, LR: 0.0003 +[2026-03-02 13:51:10] (step=0034923) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.832909411074154, LR: 0.0003 +[2026-03-02 13:51:18] (step=0034924) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.833105067501467, LR: 0.0003 +[2026-03-02 13:51:26] (step=0034925) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 6.833300723928781, LR: 0.0003 +[2026-03-02 13:51:34] (step=0034926) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.833496380356094, LR: 0.0003 +[2026-03-02 13:51:42] (step=0034927) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.833692036783408, LR: 0.0003 +[2026-03-02 13:51:50] (step=0034928) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.8338876932107215, LR: 0.0003 +[2026-03-02 13:51:57] (step=0034929) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.8340833496380355, LR: 0.0003 +[2026-03-02 13:52:05] (step=0034930) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.8342790060653495, LR: 0.0003 +[2026-03-02 13:52:13] (step=0034931) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.834474662492663, LR: 0.0003 +[2026-03-02 13:52:21] (step=0034932) Train Loss: 0.4556, Train Steps/Sec: 0.12, Epoch: 6.834670318919977, LR: 0.0003 +[2026-03-02 13:52:29] (step=0034933) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 6.83486597534729, LR: 0.0003 +[2026-03-02 13:52:37] (step=0034934) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.835061631774604, LR: 0.0003 +[2026-03-02 13:52:45] (step=0034935) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 6.835257288201918, LR: 0.0003 +[2026-03-02 13:52:53] (step=0034936) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.835452944629231, LR: 0.0003 +[2026-03-02 13:53:01] (step=0034937) Train Loss: 0.4519, Train Steps/Sec: 0.12, Epoch: 6.835648601056545, LR: 0.0003 +[2026-03-02 13:53:09] (step=0034938) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.835844257483858, LR: 0.0003 +[2026-03-02 13:53:16] (step=0034939) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 6.836039913911172, LR: 0.0003 +[2026-03-02 13:53:24] (step=0034940) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.836235570338486, LR: 0.0003 +[2026-03-02 13:53:32] (step=0034941) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.836431226765799, LR: 0.0003 +[2026-03-02 13:53:40] (step=0034942) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.836626883193113, LR: 0.0003 +[2026-03-02 13:53:48] (step=0034943) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.836822539620426, LR: 0.0003 +[2026-03-02 13:53:56] (step=0034944) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.83701819604774, LR: 0.0003 +[2026-03-02 13:54:03] (step=0034945) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 6.837213852475053, LR: 0.0003 +[2026-03-02 13:54:11] (step=0034946) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.837409508902367, LR: 0.0003 +[2026-03-02 13:54:19] (step=0034947) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.837605165329681, LR: 0.0003 +[2026-03-02 13:54:27] (step=0034948) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.8378008217569946, LR: 0.0003 +[2026-03-02 13:54:35] (step=0034949) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 6.837996478184309, LR: 0.0003 +[2026-03-02 13:54:43] (step=0034950) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.838192134611622, LR: 0.0003 +[2026-03-02 13:54:51] (step=0034951) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.838387791038936, LR: 0.0003 +[2026-03-02 13:54:58] (step=0034952) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 6.83858344746625, LR: 0.0003 +[2026-03-02 13:55:06] (step=0034953) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 6.838779103893563, LR: 0.0003 +[2026-03-02 13:55:14] (step=0034954) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.838974760320877, LR: 0.0003 +[2026-03-02 13:55:22] (step=0034955) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.83917041674819, LR: 0.0003 +[2026-03-02 13:55:30] (step=0034956) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 6.839366073175504, LR: 0.0003 +[2026-03-02 13:55:38] (step=0034957) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 6.839561729602817, LR: 0.0003 +[2026-03-02 13:55:46] (step=0034958) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.839757386030131, LR: 0.0003 +[2026-03-02 13:55:53] (step=0034959) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.839953042457445, LR: 0.0003 +[2026-03-02 13:56:01] (step=0034960) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 6.840148698884758, LR: 0.0003 +[2026-03-02 13:56:09] (step=0034961) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.840344355312072, LR: 0.0003 +[2026-03-02 13:56:17] (step=0034962) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 6.840540011739385, LR: 0.0003 +[2026-03-02 13:56:25] (step=0034963) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.840735668166699, LR: 0.0003 +[2026-03-02 13:56:33] (step=0034964) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.840931324594013, LR: 0.0003 +[2026-03-02 13:56:40] (step=0034965) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.841126981021326, LR: 0.0003 +[2026-03-02 13:56:48] (step=0034966) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.84132263744864, LR: 0.0003 +[2026-03-02 13:56:56] (step=0034967) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.841518293875954, LR: 0.0003 +[2026-03-02 13:57:04] (step=0034968) Train Loss: 0.4251, Train Steps/Sec: 0.13, Epoch: 6.841713950303268, LR: 0.0003 +[2026-03-02 13:57:12] (step=0034969) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.841909606730581, LR: 0.0003 +[2026-03-02 13:57:20] (step=0034970) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 6.842105263157895, LR: 0.0003 +[2026-03-02 13:57:28] (step=0034971) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.842300919585209, LR: 0.0003 +[2026-03-02 13:57:35] (step=0034972) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.842496576012522, LR: 0.0003 +[2026-03-02 13:57:43] (step=0034973) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.842692232439836, LR: 0.0003 +[2026-03-02 13:57:51] (step=0034974) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.842887888867149, LR: 0.0003 +[2026-03-02 13:57:59] (step=0034975) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.843083545294463, LR: 0.0003 +[2026-03-02 13:58:07] (step=0034976) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 6.843279201721777, LR: 0.0003 +[2026-03-02 13:58:15] (step=0034977) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.84347485814909, LR: 0.0003 +[2026-03-02 13:58:23] (step=0034978) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 6.843670514576404, LR: 0.0003 +[2026-03-02 13:58:30] (step=0034979) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.843866171003717, LR: 0.0003 +[2026-03-02 13:58:38] (step=0034980) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.844061827431031, LR: 0.0003 +[2026-03-02 13:58:46] (step=0034981) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.844257483858344, LR: 0.0003 +[2026-03-02 13:58:54] (step=0034982) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.844453140285658, LR: 0.0003 +[2026-03-02 13:59:02] (step=0034983) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.844648796712972, LR: 0.0003 +[2026-03-02 13:59:10] (step=0034984) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.8448444531402854, LR: 0.0003 +[2026-03-02 13:59:18] (step=0034985) Train Loss: 0.4477, Train Steps/Sec: 0.12, Epoch: 6.8450401095675995, LR: 0.0003 +[2026-03-02 13:59:26] (step=0034986) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.845235765994913, LR: 0.0003 +[2026-03-02 13:59:33] (step=0034987) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.845431422422227, LR: 0.0003 +[2026-03-02 13:59:41] (step=0034988) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.845627078849541, LR: 0.0003 +[2026-03-02 13:59:49] (step=0034989) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.845822735276854, LR: 0.0003 +[2026-03-02 13:59:57] (step=0034990) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 6.846018391704168, LR: 0.0003 +[2026-03-02 14:00:05] (step=0034991) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.846214048131481, LR: 0.0003 +[2026-03-02 14:00:13] (step=0034992) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.846409704558795, LR: 0.0003 +[2026-03-02 14:00:21] (step=0034993) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.846605360986109, LR: 0.0003 +[2026-03-02 14:00:28] (step=0034994) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.846801017413422, LR: 0.0003 +[2026-03-02 14:00:36] (step=0034995) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.846996673840736, LR: 0.0003 +[2026-03-02 14:00:44] (step=0034996) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.847192330268049, LR: 0.0003 +[2026-03-02 14:00:52] (step=0034997) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.847387986695363, LR: 0.0003 +[2026-03-02 14:01:00] (step=0034998) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.847583643122676, LR: 0.0003 +[2026-03-02 14:01:08] (step=0034999) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.84777929954999, LR: 0.0003 +[2026-03-02 14:01:15] (step=0035000) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.847974955977304, LR: 0.0003 +[2026-03-02 14:01:16] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0035000/ +[2026-03-02 14:01:23] (step=0035001) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.848170612404617, LR: 0.0003 +[2026-03-02 14:01:31] (step=0035002) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 6.848366268831931, LR: 0.0003 +[2026-03-02 14:01:39] (step=0035003) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 6.8485619252592445, LR: 0.0003 +[2026-03-02 14:01:47] (step=0035004) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.8487575816865585, LR: 0.0003 +[2026-03-02 14:01:55] (step=0035005) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.8489532381138725, LR: 0.0003 +[2026-03-02 14:02:03] (step=0035006) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.849148894541186, LR: 0.0003 +[2026-03-02 14:02:11] (step=0035007) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.8493445509685, LR: 0.0003 +[2026-03-02 14:02:18] (step=0035008) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.849540207395813, LR: 0.0003 +[2026-03-02 14:02:26] (step=0035009) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.849735863823127, LR: 0.0003 +[2026-03-02 14:02:34] (step=0035010) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.84993152025044, LR: 0.0003 +[2026-03-02 14:02:42] (step=0035011) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.850127176677754, LR: 0.0003 +[2026-03-02 14:02:50] (step=0035012) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.850322833105068, LR: 0.0003 +[2026-03-02 14:02:58] (step=0035013) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 6.850518489532381, LR: 0.0003 +[2026-03-02 14:03:06] (step=0035014) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.850714145959695, LR: 0.0003 +[2026-03-02 14:03:13] (step=0035015) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 6.850909802387008, LR: 0.0003 +[2026-03-02 14:03:21] (step=0035016) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 6.851105458814322, LR: 0.0003 +[2026-03-02 14:03:29] (step=0035017) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.851301115241636, LR: 0.0003 +[2026-03-02 14:03:37] (step=0035018) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 6.851496771668949, LR: 0.0003 +[2026-03-02 14:03:45] (step=0035019) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.851692428096263, LR: 0.0003 +[2026-03-02 14:03:53] (step=0035020) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.851888084523576, LR: 0.0003 +[2026-03-02 14:04:00] (step=0035021) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.85208374095089, LR: 0.0003 +[2026-03-02 14:04:08] (step=0035022) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.8522793973782035, LR: 0.0003 +[2026-03-02 14:04:16] (step=0035023) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 6.8524750538055175, LR: 0.0003 +[2026-03-02 14:04:24] (step=0035024) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.8526707102328315, LR: 0.0003 +[2026-03-02 14:04:32] (step=0035025) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 6.852866366660145, LR: 0.0003 +[2026-03-02 14:04:40] (step=0035026) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.853062023087459, LR: 0.0003 +[2026-03-02 14:04:48] (step=0035027) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 6.853257679514772, LR: 0.0003 +[2026-03-02 14:04:55] (step=0035028) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 6.853453335942086, LR: 0.0003 +[2026-03-02 14:05:03] (step=0035029) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.8536489923694, LR: 0.0003 +[2026-03-02 14:05:11] (step=0035030) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.853844648796713, LR: 0.0003 +[2026-03-02 14:05:19] (step=0035031) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.854040305224027, LR: 0.0003 +[2026-03-02 14:05:27] (step=0035032) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 6.85423596165134, LR: 0.0003 +[2026-03-02 14:05:35] (step=0035033) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.854431618078654, LR: 0.0003 +[2026-03-02 14:05:43] (step=0035034) Train Loss: 0.4404, Train Steps/Sec: 0.12, Epoch: 6.854627274505967, LR: 0.0003 +[2026-03-02 14:05:51] (step=0035035) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.854822930933281, LR: 0.0003 +[2026-03-02 14:05:58] (step=0035036) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.855018587360595, LR: 0.0003 +[2026-03-02 14:06:06] (step=0035037) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.855214243787908, LR: 0.0003 +[2026-03-02 14:06:14] (step=0035038) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.855409900215222, LR: 0.0003 +[2026-03-02 14:06:22] (step=0035039) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 6.855605556642535, LR: 0.0003 +[2026-03-02 14:06:30] (step=0035040) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.855801213069849, LR: 0.0003 +[2026-03-02 14:06:38] (step=0035041) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 6.855996869497163, LR: 0.0003 +[2026-03-02 14:06:45] (step=0035042) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.8561925259244765, LR: 0.0003 +[2026-03-02 14:06:53] (step=0035043) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.8563881823517905, LR: 0.0003 +[2026-03-02 14:07:01] (step=0035044) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 6.856583838779104, LR: 0.0003 +[2026-03-02 14:07:09] (step=0035045) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 6.856779495206418, LR: 0.0003 +[2026-03-02 14:07:17] (step=0035046) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.856975151633731, LR: 0.0003 +[2026-03-02 14:07:25] (step=0035047) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 6.857170808061045, LR: 0.0003 +[2026-03-02 14:07:33] (step=0035048) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.857366464488359, LR: 0.0003 +[2026-03-02 14:07:40] (step=0035049) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 6.857562120915672, LR: 0.0003 +[2026-03-02 14:07:48] (step=0035050) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.857757777342986, LR: 0.0003 +[2026-03-02 14:07:56] (step=0035051) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 6.857953433770299, LR: 0.0003 +[2026-03-02 14:08:04] (step=0035052) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 6.858149090197613, LR: 0.0003 +[2026-03-02 14:08:12] (step=0035053) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.858344746624927, LR: 0.0003 +[2026-03-02 14:08:20] (step=0035054) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.85854040305224, LR: 0.0003 +[2026-03-02 14:08:28] (step=0035055) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.858736059479554, LR: 0.0003 +[2026-03-02 14:08:35] (step=0035056) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.858931715906867, LR: 0.0003 +[2026-03-02 14:08:43] (step=0035057) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.859127372334181, LR: 0.0003 +[2026-03-02 14:08:51] (step=0035058) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 6.859323028761495, LR: 0.0003 +[2026-03-02 14:08:59] (step=0035059) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.859518685188808, LR: 0.0003 +[2026-03-02 14:09:07] (step=0035060) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.859714341616122, LR: 0.0003 +[2026-03-02 14:09:15] (step=0035061) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.8599099980434355, LR: 0.0003 +[2026-03-02 14:09:22] (step=0035062) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 6.8601056544707495, LR: 0.0003 +[2026-03-02 14:09:30] (step=0035063) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 6.860301310898063, LR: 0.0003 +[2026-03-02 14:09:38] (step=0035064) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 6.860496967325377, LR: 0.0003 +[2026-03-02 14:09:46] (step=0035065) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.860692623752691, LR: 0.0003 +[2026-03-02 14:09:54] (step=0035066) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 6.860888280180004, LR: 0.0003 +[2026-03-02 14:10:02] (step=0035067) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 6.861083936607318, LR: 0.0003 +[2026-03-02 14:10:10] (step=0035068) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.861279593034631, LR: 0.0003 +[2026-03-02 14:10:18] (step=0035069) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 6.861475249461945, LR: 0.0003 +[2026-03-02 14:10:25] (step=0035070) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.861670905889259, LR: 0.0003 +[2026-03-02 14:10:33] (step=0035071) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.861866562316572, LR: 0.0003 +[2026-03-02 14:10:41] (step=0035072) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 6.862062218743886, LR: 0.0003 +[2026-03-02 14:10:49] (step=0035073) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 6.862257875171199, LR: 0.0003 +[2026-03-02 14:10:57] (step=0035074) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.862453531598513, LR: 0.0003 +[2026-03-02 14:11:05] (step=0035075) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 6.862649188025826, LR: 0.0003 +[2026-03-02 14:11:12] (step=0035076) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 6.86284484445314, LR: 0.0003 +[2026-03-02 14:11:20] (step=0035077) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.863040500880454, LR: 0.0003 +[2026-03-02 14:11:28] (step=0035078) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 6.863236157307767, LR: 0.0003 +[2026-03-02 14:11:36] (step=0035079) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 6.863431813735081, LR: 0.0003 +[2026-03-02 14:11:44] (step=0035080) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.8636274701623945, LR: 0.0003 +[2026-03-02 14:11:52] (step=0035081) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 6.8638231265897085, LR: 0.0003 +[2026-03-02 14:12:00] (step=0035082) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.8640187830170225, LR: 0.0003 +[2026-03-02 14:12:08] (step=0035083) Train Loss: 0.4368, Train Steps/Sec: 0.12, Epoch: 6.864214439444336, LR: 0.0003 +[2026-03-02 14:12:15] (step=0035084) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.86441009587165, LR: 0.0003 +[2026-03-02 14:12:23] (step=0035085) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.864605752298963, LR: 0.0003 +[2026-03-02 14:12:31] (step=0035086) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.864801408726277, LR: 0.0003 +[2026-03-02 14:12:39] (step=0035087) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.86499706515359, LR: 0.0003 +[2026-03-02 14:12:47] (step=0035088) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.865192721580904, LR: 0.0003 +[2026-03-02 14:12:55] (step=0035089) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 6.865388378008218, LR: 0.0003 +[2026-03-02 14:13:03] (step=0035090) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.865584034435531, LR: 0.0003 +[2026-03-02 14:13:10] (step=0035091) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.865779690862845, LR: 0.0003 +[2026-03-02 14:13:18] (step=0035092) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 6.865975347290158, LR: 0.0003 +[2026-03-02 14:13:26] (step=0035093) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.866171003717472, LR: 0.0003 +[2026-03-02 14:13:34] (step=0035094) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 6.866366660144786, LR: 0.0003 +[2026-03-02 14:13:42] (step=0035095) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.866562316572099, LR: 0.0003 +[2026-03-02 14:13:50] (step=0035096) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.866757972999413, LR: 0.0003 +[2026-03-02 14:13:58] (step=0035097) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.866953629426726, LR: 0.0003 +[2026-03-02 14:14:05] (step=0035098) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.86714928585404, LR: 0.0003 +[2026-03-02 14:14:13] (step=0035099) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.8673449422813535, LR: 0.0003 +[2026-03-02 14:14:21] (step=0035100) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.8675405987086675, LR: 0.0003 +[2026-03-02 14:14:29] (step=0035101) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.8677362551359815, LR: 0.0003 +[2026-03-02 14:14:37] (step=0035102) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.867931911563295, LR: 0.0003 +[2026-03-02 14:14:45] (step=0035103) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 6.868127567990609, LR: 0.0003 +[2026-03-02 14:14:53] (step=0035104) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.868323224417922, LR: 0.0003 +[2026-03-02 14:15:00] (step=0035105) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.868518880845236, LR: 0.0003 +[2026-03-02 14:15:08] (step=0035106) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.86871453727255, LR: 0.0003 +[2026-03-02 14:15:16] (step=0035107) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.868910193699863, LR: 0.0003 +[2026-03-02 14:15:24] (step=0035108) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.869105850127177, LR: 0.0003 +[2026-03-02 14:15:32] (step=0035109) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 6.86930150655449, LR: 0.0003 +[2026-03-02 14:15:40] (step=0035110) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.869497162981804, LR: 0.0003 +[2026-03-02 14:15:47] (step=0035111) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 6.869692819409118, LR: 0.0003 +[2026-03-02 14:15:55] (step=0035112) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 6.869888475836431, LR: 0.0003 +[2026-03-02 14:16:03] (step=0035113) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.870084132263745, LR: 0.0003 +[2026-03-02 14:16:11] (step=0035114) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 6.870279788691058, LR: 0.0003 +[2026-03-02 14:16:19] (step=0035115) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.870475445118372, LR: 0.0003 +[2026-03-02 14:16:27] (step=0035116) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.870671101545685, LR: 0.0003 +[2026-03-02 14:16:35] (step=0035117) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.870866757972999, LR: 0.0003 +[2026-03-02 14:16:42] (step=0035118) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 6.871062414400313, LR: 0.0003 +[2026-03-02 14:16:50] (step=0035119) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.8712580708276265, LR: 0.0003 +[2026-03-02 14:16:58] (step=0035120) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.8714537272549405, LR: 0.0003 +[2026-03-02 14:17:06] (step=0035121) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.871649383682254, LR: 0.0003 +[2026-03-02 14:17:14] (step=0035122) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.871845040109568, LR: 0.0003 +[2026-03-02 14:17:22] (step=0035123) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.872040696536882, LR: 0.0003 +[2026-03-02 14:17:29] (step=0035124) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.872236352964195, LR: 0.0003 +[2026-03-02 14:17:37] (step=0035125) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.872432009391509, LR: 0.0003 +[2026-03-02 14:17:45] (step=0035126) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 6.872627665818822, LR: 0.0003 +[2026-03-02 14:17:53] (step=0035127) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.872823322246136, LR: 0.0003 +[2026-03-02 14:18:01] (step=0035128) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 6.873018978673449, LR: 0.0003 +[2026-03-02 14:18:09] (step=0035129) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.873214635100763, LR: 0.0003 +[2026-03-02 14:18:17] (step=0035130) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.873410291528077, LR: 0.0003 +[2026-03-02 14:18:24] (step=0035131) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.87360594795539, LR: 0.0003 +[2026-03-02 14:18:32] (step=0035132) Train Loss: 0.4556, Train Steps/Sec: 0.12, Epoch: 6.873801604382704, LR: 0.0003 +[2026-03-02 14:18:40] (step=0035133) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.873997260810017, LR: 0.0003 +[2026-03-02 14:18:48] (step=0035134) Train Loss: 0.4342, Train Steps/Sec: 0.12, Epoch: 6.874192917237331, LR: 0.0003 +[2026-03-02 14:18:56] (step=0035135) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.874388573664645, LR: 0.0003 +[2026-03-02 14:19:04] (step=0035136) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.874584230091958, LR: 0.0003 +[2026-03-02 14:19:12] (step=0035137) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.874779886519272, LR: 0.0003 +[2026-03-02 14:19:20] (step=0035138) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.8749755429465855, LR: 0.0003 +[2026-03-02 14:19:28] (step=0035139) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.8751711993738995, LR: 0.0003 +[2026-03-02 14:19:35] (step=0035140) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.875366855801213, LR: 0.0003 +[2026-03-02 14:19:43] (step=0035141) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.875562512228527, LR: 0.0003 +[2026-03-02 14:19:51] (step=0035142) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.875758168655841, LR: 0.0003 +[2026-03-02 14:19:59] (step=0035143) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.875953825083154, LR: 0.0003 +[2026-03-02 14:20:07] (step=0035144) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.876149481510468, LR: 0.0003 +[2026-03-02 14:20:15] (step=0035145) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 6.876345137937781, LR: 0.0003 +[2026-03-02 14:20:23] (step=0035146) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.876540794365095, LR: 0.0003 +[2026-03-02 14:20:30] (step=0035147) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.876736450792409, LR: 0.0003 +[2026-03-02 14:20:38] (step=0035148) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.876932107219722, LR: 0.0003 +[2026-03-02 14:20:46] (step=0035149) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 6.877127763647036, LR: 0.0003 +[2026-03-02 14:20:54] (step=0035150) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.877323420074349, LR: 0.0003 +[2026-03-02 14:21:02] (step=0035151) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.877519076501663, LR: 0.0003 +[2026-03-02 14:21:10] (step=0035152) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.877714732928976, LR: 0.0003 +[2026-03-02 14:21:17] (step=0035153) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.87791038935629, LR: 0.0003 +[2026-03-02 14:21:25] (step=0035154) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.878106045783604, LR: 0.0003 +[2026-03-02 14:21:33] (step=0035155) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 6.878301702210917, LR: 0.0003 +[2026-03-02 14:21:41] (step=0035156) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.878497358638231, LR: 0.0003 +[2026-03-02 14:21:49] (step=0035157) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 6.8786930150655445, LR: 0.0003 +[2026-03-02 14:21:57] (step=0035158) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.8788886714928585, LR: 0.0003 +[2026-03-02 14:22:05] (step=0035159) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.8790843279201725, LR: 0.0003 +[2026-03-02 14:22:12] (step=0035160) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.879279984347486, LR: 0.0003 +[2026-03-02 14:22:20] (step=0035161) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 6.8794756407748, LR: 0.0003 +[2026-03-02 14:22:28] (step=0035162) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.879671297202113, LR: 0.0003 +[2026-03-02 14:22:36] (step=0035163) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.879866953629427, LR: 0.0003 +[2026-03-02 14:22:44] (step=0035164) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.880062610056741, LR: 0.0003 +[2026-03-02 14:22:52] (step=0035165) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.880258266484054, LR: 0.0003 +[2026-03-02 14:23:00] (step=0035166) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.880453922911368, LR: 0.0003 +[2026-03-02 14:23:08] (step=0035167) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.880649579338681, LR: 0.0003 +[2026-03-02 14:23:15] (step=0035168) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 6.880845235765995, LR: 0.0003 +[2026-03-02 14:23:23] (step=0035169) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 6.881040892193308, LR: 0.0003 +[2026-03-02 14:23:31] (step=0035170) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.881236548620622, LR: 0.0003 +[2026-03-02 14:23:39] (step=0035171) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 6.881432205047936, LR: 0.0003 +[2026-03-02 14:23:47] (step=0035172) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.881627861475249, LR: 0.0003 +[2026-03-02 14:23:55] (step=0035173) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.881823517902563, LR: 0.0003 +[2026-03-02 14:24:03] (step=0035174) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.882019174329876, LR: 0.0003 +[2026-03-02 14:24:10] (step=0035175) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 6.88221483075719, LR: 0.0003 +[2026-03-02 14:24:18] (step=0035176) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.882410487184504, LR: 0.0003 +[2026-03-02 14:24:26] (step=0035177) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.8826061436118176, LR: 0.0003 +[2026-03-02 14:24:34] (step=0035178) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.882801800039132, LR: 0.0003 +[2026-03-02 14:24:42] (step=0035179) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.882997456466445, LR: 0.0003 +[2026-03-02 14:24:50] (step=0035180) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.883193112893759, LR: 0.0003 +[2026-03-02 14:24:58] (step=0035181) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 6.883388769321072, LR: 0.0003 +[2026-03-02 14:25:05] (step=0035182) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.883584425748386, LR: 0.0003 +[2026-03-02 14:25:13] (step=0035183) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 6.8837800821757, LR: 0.0003 +[2026-03-02 14:25:21] (step=0035184) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 6.883975738603013, LR: 0.0003 +[2026-03-02 14:25:29] (step=0035185) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 6.884171395030327, LR: 0.0003 +[2026-03-02 14:25:37] (step=0035186) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 6.88436705145764, LR: 0.0003 +[2026-03-02 14:25:45] (step=0035187) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.884562707884954, LR: 0.0003 +[2026-03-02 14:25:53] (step=0035188) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.884758364312268, LR: 0.0003 +[2026-03-02 14:26:01] (step=0035189) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.884954020739581, LR: 0.0003 +[2026-03-02 14:26:09] (step=0035190) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.885149677166895, LR: 0.0003 +[2026-03-02 14:26:16] (step=0035191) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.885345333594208, LR: 0.0003 +[2026-03-02 14:26:24] (step=0035192) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.885540990021522, LR: 0.0003 +[2026-03-02 14:26:32] (step=0035193) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.885736646448835, LR: 0.0003 +[2026-03-02 14:26:40] (step=0035194) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 6.885932302876149, LR: 0.0003 +[2026-03-02 14:26:48] (step=0035195) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.886127959303463, LR: 0.0003 +[2026-03-02 14:26:56] (step=0035196) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 6.886323615730777, LR: 0.0003 +[2026-03-02 14:27:03] (step=0035197) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.886519272158091, LR: 0.0003 +[2026-03-02 14:27:11] (step=0035198) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 6.886714928585404, LR: 0.0003 +[2026-03-02 14:27:19] (step=0035199) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 6.886910585012718, LR: 0.0003 +[2026-03-02 14:27:27] (step=0035200) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.887106241440032, LR: 0.0003 +[2026-03-02 14:27:35] (step=0035201) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.887301897867345, LR: 0.0003 +[2026-03-02 14:27:43] (step=0035202) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 6.887497554294659, LR: 0.0003 +[2026-03-02 14:27:51] (step=0035203) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.887693210721972, LR: 0.0003 +[2026-03-02 14:27:59] (step=0035204) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.887888867149286, LR: 0.0003 +[2026-03-02 14:28:06] (step=0035205) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.888084523576599, LR: 0.0003 +[2026-03-02 14:28:14] (step=0035206) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.888280180003913, LR: 0.0003 +[2026-03-02 14:28:22] (step=0035207) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.888475836431227, LR: 0.0003 +[2026-03-02 14:28:30] (step=0035208) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.88867149285854, LR: 0.0003 +[2026-03-02 14:28:38] (step=0035209) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.888867149285854, LR: 0.0003 +[2026-03-02 14:28:46] (step=0035210) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 6.889062805713167, LR: 0.0003 +[2026-03-02 14:28:54] (step=0035211) Train Loss: 0.4439, Train Steps/Sec: 0.12, Epoch: 6.889258462140481, LR: 0.0003 +[2026-03-02 14:29:02] (step=0035212) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.889454118567795, LR: 0.0003 +[2026-03-02 14:29:09] (step=0035213) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.8896497749951084, LR: 0.0003 +[2026-03-02 14:29:17] (step=0035214) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.8898454314224225, LR: 0.0003 +[2026-03-02 14:29:25] (step=0035215) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.890041087849736, LR: 0.0003 +[2026-03-02 14:29:33] (step=0035216) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.89023674427705, LR: 0.0003 +[2026-03-02 14:29:41] (step=0035217) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 6.890432400704364, LR: 0.0003 +[2026-03-02 14:29:49] (step=0035218) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.890628057131677, LR: 0.0003 +[2026-03-02 14:29:56] (step=0035219) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.890823713558991, LR: 0.0003 +[2026-03-02 14:30:04] (step=0035220) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 6.891019369986304, LR: 0.0003 +[2026-03-02 14:30:12] (step=0035221) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.891215026413618, LR: 0.0003 +[2026-03-02 14:30:20] (step=0035222) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.891410682840931, LR: 0.0003 +[2026-03-02 14:30:28] (step=0035223) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 6.891606339268245, LR: 0.0003 +[2026-03-02 14:30:36] (step=0035224) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.891801995695559, LR: 0.0003 +[2026-03-02 14:30:44] (step=0035225) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.891997652122872, LR: 0.0003 +[2026-03-02 14:30:51] (step=0035226) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.892193308550186, LR: 0.0003 +[2026-03-02 14:30:59] (step=0035227) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 6.892388964977499, LR: 0.0003 +[2026-03-02 14:31:07] (step=0035228) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 6.892584621404813, LR: 0.0003 +[2026-03-02 14:31:15] (step=0035229) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.892780277832127, LR: 0.0003 +[2026-03-02 14:31:23] (step=0035230) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.89297593425944, LR: 0.0003 +[2026-03-02 14:31:31] (step=0035231) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.893171590686754, LR: 0.0003 +[2026-03-02 14:31:39] (step=0035232) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.8933672471140675, LR: 0.0003 +[2026-03-02 14:31:47] (step=0035233) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.8935629035413815, LR: 0.0003 +[2026-03-02 14:31:54] (step=0035234) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 6.893758559968695, LR: 0.0003 +[2026-03-02 14:32:02] (step=0035235) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.893954216396009, LR: 0.0003 +[2026-03-02 14:32:10] (step=0035236) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 6.894149872823323, LR: 0.0003 +[2026-03-02 14:32:18] (step=0035237) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.894345529250636, LR: 0.0003 +[2026-03-02 14:32:26] (step=0035238) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.89454118567795, LR: 0.0003 +[2026-03-02 14:32:34] (step=0035239) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.894736842105263, LR: 0.0003 +[2026-03-02 14:32:42] (step=0035240) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 6.894932498532577, LR: 0.0003 +[2026-03-02 14:32:49] (step=0035241) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.895128154959891, LR: 0.0003 +[2026-03-02 14:32:57] (step=0035242) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 6.895323811387204, LR: 0.0003 +[2026-03-02 14:33:05] (step=0035243) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.895519467814518, LR: 0.0003 +[2026-03-02 14:33:13] (step=0035244) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.895715124241831, LR: 0.0003 +[2026-03-02 14:33:21] (step=0035245) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.895910780669145, LR: 0.0003 +[2026-03-02 14:33:29] (step=0035246) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.896106437096458, LR: 0.0003 +[2026-03-02 14:33:36] (step=0035247) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 6.896302093523772, LR: 0.0003 +[2026-03-02 14:33:44] (step=0035248) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.896497749951086, LR: 0.0003 +[2026-03-02 14:33:52] (step=0035249) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.896693406378399, LR: 0.0003 +[2026-03-02 14:34:00] (step=0035250) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.896889062805713, LR: 0.0003 +[2026-03-02 14:34:08] (step=0035251) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.8970847192330265, LR: 0.0003 +[2026-03-02 14:34:16] (step=0035252) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 6.8972803756603405, LR: 0.0003 +[2026-03-02 14:34:24] (step=0035253) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.8974760320876545, LR: 0.0003 +[2026-03-02 14:34:31] (step=0035254) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.897671688514968, LR: 0.0003 +[2026-03-02 14:34:39] (step=0035255) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.897867344942282, LR: 0.0003 +[2026-03-02 14:34:47] (step=0035256) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 6.898063001369595, LR: 0.0003 +[2026-03-02 14:34:55] (step=0035257) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.898258657796909, LR: 0.0003 +[2026-03-02 14:35:03] (step=0035258) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.898454314224222, LR: 0.0003 +[2026-03-02 14:35:11] (step=0035259) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 6.898649970651536, LR: 0.0003 +[2026-03-02 14:35:18] (step=0035260) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 6.89884562707885, LR: 0.0003 +[2026-03-02 14:35:26] (step=0035261) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.899041283506163, LR: 0.0003 +[2026-03-02 14:35:34] (step=0035262) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.899236939933477, LR: 0.0003 +[2026-03-02 14:35:42] (step=0035263) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 6.89943259636079, LR: 0.0003 +[2026-03-02 14:35:50] (step=0035264) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.899628252788104, LR: 0.0003 +[2026-03-02 14:35:58] (step=0035265) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.899823909215418, LR: 0.0003 +[2026-03-02 14:36:06] (step=0035266) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.900019565642731, LR: 0.0003 +[2026-03-02 14:36:13] (step=0035267) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.900215222070045, LR: 0.0003 +[2026-03-02 14:36:21] (step=0035268) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 6.900410878497358, LR: 0.0003 +[2026-03-02 14:36:29] (step=0035269) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.900606534924672, LR: 0.0003 +[2026-03-02 14:36:37] (step=0035270) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.9008021913519855, LR: 0.0003 +[2026-03-02 14:36:45] (step=0035271) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.9009978477792995, LR: 0.0003 +[2026-03-02 14:36:53] (step=0035272) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.9011935042066135, LR: 0.0003 +[2026-03-02 14:37:01] (step=0035273) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.901389160633927, LR: 0.0003 +[2026-03-02 14:37:08] (step=0035274) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 6.901584817061241, LR: 0.0003 +[2026-03-02 14:37:16] (step=0035275) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.901780473488554, LR: 0.0003 +[2026-03-02 14:37:24] (step=0035276) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.901976129915868, LR: 0.0003 +[2026-03-02 14:37:32] (step=0035277) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.902171786343182, LR: 0.0003 +[2026-03-02 14:37:40] (step=0035278) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 6.902367442770495, LR: 0.0003 +[2026-03-02 14:37:48] (step=0035279) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.902563099197809, LR: 0.0003 +[2026-03-02 14:37:56] (step=0035280) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.902758755625122, LR: 0.0003 +[2026-03-02 14:38:04] (step=0035281) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.902954412052436, LR: 0.0003 +[2026-03-02 14:38:11] (step=0035282) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.90315006847975, LR: 0.0003 +[2026-03-02 14:38:19] (step=0035283) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 6.903345724907063, LR: 0.0003 +[2026-03-02 14:38:27] (step=0035284) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.903541381334377, LR: 0.0003 +[2026-03-02 14:38:35] (step=0035285) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.90373703776169, LR: 0.0003 +[2026-03-02 14:38:43] (step=0035286) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.903932694189004, LR: 0.0003 +[2026-03-02 14:38:51] (step=0035287) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 6.904128350616317, LR: 0.0003 +[2026-03-02 14:38:59] (step=0035288) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 6.904324007043631, LR: 0.0003 +[2026-03-02 14:39:06] (step=0035289) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 6.904519663470945, LR: 0.0003 +[2026-03-02 14:39:14] (step=0035290) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.9047153198982585, LR: 0.0003 +[2026-03-02 14:39:22] (step=0035291) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 6.9049109763255725, LR: 0.0003 +[2026-03-02 14:39:30] (step=0035292) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 6.905106632752886, LR: 0.0003 +[2026-03-02 14:39:38] (step=0035293) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 6.9053022891802, LR: 0.0003 +[2026-03-02 14:39:46] (step=0035294) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.905497945607514, LR: 0.0003 +[2026-03-02 14:39:54] (step=0035295) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.905693602034827, LR: 0.0003 +[2026-03-02 14:40:01] (step=0035296) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.905889258462141, LR: 0.0003 +[2026-03-02 14:40:09] (step=0035297) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.906084914889454, LR: 0.0003 +[2026-03-02 14:40:17] (step=0035298) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.906280571316768, LR: 0.0003 +[2026-03-02 14:40:25] (step=0035299) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.906476227744081, LR: 0.0003 +[2026-03-02 14:40:33] (step=0035300) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.906671884171395, LR: 0.0003 +[2026-03-02 14:40:41] (step=0035301) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.906867540598709, LR: 0.0003 +[2026-03-02 14:40:48] (step=0035302) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.907063197026022, LR: 0.0003 +[2026-03-02 14:40:56] (step=0035303) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.907258853453336, LR: 0.0003 +[2026-03-02 14:41:04] (step=0035304) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 6.907454509880649, LR: 0.0003 +[2026-03-02 14:41:12] (step=0035305) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.907650166307963, LR: 0.0003 +[2026-03-02 14:41:20] (step=0035306) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.907845822735277, LR: 0.0003 +[2026-03-02 14:41:28] (step=0035307) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.90804147916259, LR: 0.0003 +[2026-03-02 14:41:35] (step=0035308) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.908237135589904, LR: 0.0003 +[2026-03-02 14:41:43] (step=0035309) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.9084327920172175, LR: 0.0003 +[2026-03-02 14:41:51] (step=0035310) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 6.9086284484445315, LR: 0.0003 +[2026-03-02 14:41:59] (step=0035311) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 6.908824104871845, LR: 0.0003 +[2026-03-02 14:42:07] (step=0035312) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.909019761299159, LR: 0.0003 +[2026-03-02 14:42:15] (step=0035313) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.909215417726473, LR: 0.0003 +[2026-03-02 14:42:23] (step=0035314) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.909411074153786, LR: 0.0003 +[2026-03-02 14:42:30] (step=0035315) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.9096067305811, LR: 0.0003 +[2026-03-02 14:42:38] (step=0035316) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.909802387008413, LR: 0.0003 +[2026-03-02 14:42:46] (step=0035317) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.909998043435727, LR: 0.0003 +[2026-03-02 14:42:54] (step=0035318) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.910193699863041, LR: 0.0003 +[2026-03-02 14:43:02] (step=0035319) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 6.910389356290354, LR: 0.0003 +[2026-03-02 14:43:10] (step=0035320) Train Loss: 0.4425, Train Steps/Sec: 0.12, Epoch: 6.910585012717668, LR: 0.0003 +[2026-03-02 14:43:18] (step=0035321) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 6.910780669144981, LR: 0.0003 +[2026-03-02 14:43:25] (step=0035322) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 6.910976325572295, LR: 0.0003 +[2026-03-02 14:43:33] (step=0035323) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.911171981999608, LR: 0.0003 +[2026-03-02 14:43:41] (step=0035324) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 6.911367638426922, LR: 0.0003 +[2026-03-02 14:43:49] (step=0035325) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 6.911563294854236, LR: 0.0003 +[2026-03-02 14:43:57] (step=0035326) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.911758951281549, LR: 0.0003 +[2026-03-02 14:44:05] (step=0035327) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 6.911954607708863, LR: 0.0003 +[2026-03-02 14:44:13] (step=0035328) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.9121502641361765, LR: 0.0003 +[2026-03-02 14:44:21] (step=0035329) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.9123459205634905, LR: 0.0003 +[2026-03-02 14:44:29] (step=0035330) Train Loss: 0.4457, Train Steps/Sec: 0.12, Epoch: 6.9125415769908045, LR: 0.0003 +[2026-03-02 14:44:37] (step=0035331) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.912737233418118, LR: 0.0003 +[2026-03-02 14:44:44] (step=0035332) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 6.912932889845432, LR: 0.0003 +[2026-03-02 14:44:52] (step=0035333) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.913128546272745, LR: 0.0003 +[2026-03-02 14:45:00] (step=0035334) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 6.913324202700059, LR: 0.0003 +[2026-03-02 14:45:08] (step=0035335) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.913519859127373, LR: 0.0003 +[2026-03-02 14:45:16] (step=0035336) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.913715515554686, LR: 0.0003 +[2026-03-02 14:45:24] (step=0035337) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.913911171982, LR: 0.0003 +[2026-03-02 14:45:32] (step=0035338) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 6.914106828409313, LR: 0.0003 +[2026-03-02 14:45:39] (step=0035339) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.914302484836627, LR: 0.0003 +[2026-03-02 14:45:47] (step=0035340) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 6.91449814126394, LR: 0.0003 +[2026-03-02 14:45:55] (step=0035341) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.914693797691254, LR: 0.0003 +[2026-03-02 14:46:03] (step=0035342) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 6.914889454118568, LR: 0.0003 +[2026-03-02 14:46:11] (step=0035343) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.915085110545881, LR: 0.0003 +[2026-03-02 14:46:19] (step=0035344) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.915280766973195, LR: 0.0003 +[2026-03-02 14:46:27] (step=0035345) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.915476423400508, LR: 0.0003 +[2026-03-02 14:46:34] (step=0035346) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 6.915672079827822, LR: 0.0003 +[2026-03-02 14:46:42] (step=0035347) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.915867736255136, LR: 0.0003 +[2026-03-02 14:46:50] (step=0035348) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.9160633926824495, LR: 0.0003 +[2026-03-02 14:46:58] (step=0035349) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 6.9162590491097635, LR: 0.0003 +[2026-03-02 14:47:06] (step=0035350) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.916454705537077, LR: 0.0003 +[2026-03-02 14:47:14] (step=0035351) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.916650361964391, LR: 0.0003 +[2026-03-02 14:47:22] (step=0035352) Train Loss: 0.4246, Train Steps/Sec: 0.13, Epoch: 6.916846018391704, LR: 0.0003 +[2026-03-02 14:47:29] (step=0035353) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.917041674819018, LR: 0.0003 +[2026-03-02 14:47:37] (step=0035354) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.917237331246332, LR: 0.0003 +[2026-03-02 14:47:45] (step=0035355) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 6.917432987673645, LR: 0.0003 +[2026-03-02 14:47:53] (step=0035356) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.917628644100959, LR: 0.0003 +[2026-03-02 14:48:01] (step=0035357) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 6.917824300528272, LR: 0.0003 +[2026-03-02 14:48:09] (step=0035358) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.918019956955586, LR: 0.0003 +[2026-03-02 14:48:17] (step=0035359) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.9182156133829, LR: 0.0003 +[2026-03-02 14:48:24] (step=0035360) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.918411269810213, LR: 0.0003 +[2026-03-02 14:48:32] (step=0035361) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 6.918606926237527, LR: 0.0003 +[2026-03-02 14:48:40] (step=0035362) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 6.91880258266484, LR: 0.0003 +[2026-03-02 14:48:48] (step=0035363) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.918998239092154, LR: 0.0003 +[2026-03-02 14:48:56] (step=0035364) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 6.919193895519467, LR: 0.0003 +[2026-03-02 14:49:04] (step=0035365) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.919389551946781, LR: 0.0003 +[2026-03-02 14:49:12] (step=0035366) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.919585208374095, LR: 0.0003 +[2026-03-02 14:49:19] (step=0035367) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 6.9197808648014085, LR: 0.0003 +[2026-03-02 14:49:27] (step=0035368) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.9199765212287225, LR: 0.0003 +[2026-03-02 14:49:35] (step=0035369) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.920172177656036, LR: 0.0003 +[2026-03-02 14:49:43] (step=0035370) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 6.92036783408335, LR: 0.0003 +[2026-03-02 14:49:51] (step=0035371) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.920563490510664, LR: 0.0003 +[2026-03-02 14:49:59] (step=0035372) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 6.920759146937977, LR: 0.0003 +[2026-03-02 14:50:07] (step=0035373) Train Loss: 0.4364, Train Steps/Sec: 0.12, Epoch: 6.920954803365291, LR: 0.0003 +[2026-03-02 14:50:14] (step=0035374) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.921150459792604, LR: 0.0003 +[2026-03-02 14:50:22] (step=0035375) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 6.921346116219918, LR: 0.0003 +[2026-03-02 14:50:30] (step=0035376) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 6.921541772647231, LR: 0.0003 +[2026-03-02 14:50:38] (step=0035377) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.921737429074545, LR: 0.0003 +[2026-03-02 14:50:46] (step=0035378) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.921933085501859, LR: 0.0003 +[2026-03-02 14:50:54] (step=0035379) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 6.922128741929172, LR: 0.0003 +[2026-03-02 14:51:02] (step=0035380) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.922324398356486, LR: 0.0003 +[2026-03-02 14:51:10] (step=0035381) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.922520054783799, LR: 0.0003 +[2026-03-02 14:51:18] (step=0035382) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 6.922715711211113, LR: 0.0003 +[2026-03-02 14:51:25] (step=0035383) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.922911367638427, LR: 0.0003 +[2026-03-02 14:51:33] (step=0035384) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.92310702406574, LR: 0.0003 +[2026-03-02 14:51:41] (step=0035385) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.923302680493054, LR: 0.0003 +[2026-03-02 14:51:49] (step=0035386) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.9234983369203675, LR: 0.0003 +[2026-03-02 14:51:57] (step=0035387) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 6.9236939933476815, LR: 0.0003 +[2026-03-02 14:52:05] (step=0035388) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.9238896497749955, LR: 0.0003 +[2026-03-02 14:52:13] (step=0035389) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 6.924085306202309, LR: 0.0003 +[2026-03-02 14:52:20] (step=0035390) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.924280962629623, LR: 0.0003 +[2026-03-02 14:52:28] (step=0035391) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 6.924476619056936, LR: 0.0003 +[2026-03-02 14:52:36] (step=0035392) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 6.92467227548425, LR: 0.0003 +[2026-03-02 14:52:44] (step=0035393) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.924867931911563, LR: 0.0003 +[2026-03-02 14:52:52] (step=0035394) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.925063588338877, LR: 0.0003 +[2026-03-02 14:53:00] (step=0035395) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.925259244766191, LR: 0.0003 +[2026-03-02 14:53:08] (step=0035396) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 6.925454901193504, LR: 0.0003 +[2026-03-02 14:53:15] (step=0035397) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.925650557620818, LR: 0.0003 +[2026-03-02 14:53:23] (step=0035398) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.925846214048131, LR: 0.0003 +[2026-03-02 14:53:31] (step=0035399) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 6.926041870475445, LR: 0.0003 +[2026-03-02 14:53:39] (step=0035400) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 6.926237526902759, LR: 0.0003 +[2026-03-02 14:53:47] (step=0035401) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.926433183330072, LR: 0.0003 +[2026-03-02 14:53:55] (step=0035402) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.926628839757386, LR: 0.0003 +[2026-03-02 14:54:03] (step=0035403) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.926824496184699, LR: 0.0003 +[2026-03-02 14:54:10] (step=0035404) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.927020152612013, LR: 0.0003 +[2026-03-02 14:54:18] (step=0035405) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 6.9272158090393265, LR: 0.0003 +[2026-03-02 14:54:26] (step=0035406) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 6.9274114654666405, LR: 0.0003 +[2026-03-02 14:54:34] (step=0035407) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 6.927607121893955, LR: 0.0003 +[2026-03-02 14:54:42] (step=0035408) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.927802778321268, LR: 0.0003 +[2026-03-02 14:54:50] (step=0035409) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.927998434748582, LR: 0.0003 +[2026-03-02 14:54:58] (step=0035410) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.928194091175895, LR: 0.0003 +[2026-03-02 14:55:05] (step=0035411) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.928389747603209, LR: 0.0003 +[2026-03-02 14:55:13] (step=0035412) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.928585404030523, LR: 0.0003 +[2026-03-02 14:55:21] (step=0035413) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.928781060457836, LR: 0.0003 +[2026-03-02 14:55:29] (step=0035414) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.92897671688515, LR: 0.0003 +[2026-03-02 14:55:37] (step=0035415) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.929172373312463, LR: 0.0003 +[2026-03-02 14:55:45] (step=0035416) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.929368029739777, LR: 0.0003 +[2026-03-02 14:55:53] (step=0035417) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.92956368616709, LR: 0.0003 +[2026-03-02 14:56:01] (step=0035418) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.929759342594404, LR: 0.0003 +[2026-03-02 14:56:08] (step=0035419) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 6.929954999021718, LR: 0.0003 +[2026-03-02 14:56:16] (step=0035420) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 6.930150655449031, LR: 0.0003 +[2026-03-02 14:56:24] (step=0035421) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 6.930346311876345, LR: 0.0003 +[2026-03-02 14:56:32] (step=0035422) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 6.930541968303658, LR: 0.0003 +[2026-03-02 14:56:40] (step=0035423) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.930737624730972, LR: 0.0003 +[2026-03-02 14:56:48] (step=0035424) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 6.930933281158286, LR: 0.0003 +[2026-03-02 14:56:56] (step=0035425) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.9311289375856, LR: 0.0003 +[2026-03-02 14:57:04] (step=0035426) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.931324594012914, LR: 0.0003 +[2026-03-02 14:57:12] (step=0035427) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.931520250440227, LR: 0.0003 +[2026-03-02 14:57:19] (step=0035428) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.931715906867541, LR: 0.0003 +[2026-03-02 14:57:27] (step=0035429) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.931911563294854, LR: 0.0003 +[2026-03-02 14:57:35] (step=0035430) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.932107219722168, LR: 0.0003 +[2026-03-02 14:57:43] (step=0035431) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.932302876149482, LR: 0.0003 +[2026-03-02 14:57:51] (step=0035432) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.932498532576795, LR: 0.0003 +[2026-03-02 14:57:59] (step=0035433) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 6.932694189004109, LR: 0.0003 +[2026-03-02 14:58:07] (step=0035434) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.932889845431422, LR: 0.0003 +[2026-03-02 14:58:15] (step=0035435) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 6.933085501858736, LR: 0.0003 +[2026-03-02 14:58:22] (step=0035436) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.93328115828605, LR: 0.0003 +[2026-03-02 14:58:30] (step=0035437) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.933476814713363, LR: 0.0003 +[2026-03-02 14:58:38] (step=0035438) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.933672471140677, LR: 0.0003 +[2026-03-02 14:58:46] (step=0035439) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 6.93386812756799, LR: 0.0003 +[2026-03-02 14:58:54] (step=0035440) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.934063783995304, LR: 0.0003 +[2026-03-02 14:59:02] (step=0035441) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.934259440422618, LR: 0.0003 +[2026-03-02 14:59:10] (step=0035442) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.934455096849931, LR: 0.0003 +[2026-03-02 14:59:17] (step=0035443) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.9346507532772454, LR: 0.0003 +[2026-03-02 14:59:25] (step=0035444) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 6.934846409704559, LR: 0.0003 +[2026-03-02 14:59:33] (step=0035445) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.935042066131873, LR: 0.0003 +[2026-03-02 14:59:41] (step=0035446) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.935237722559186, LR: 0.0003 +[2026-03-02 14:59:49] (step=0035447) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.9354333789865, LR: 0.0003 +[2026-03-02 14:59:57] (step=0035448) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.935629035413814, LR: 0.0003 +[2026-03-02 15:00:05] (step=0035449) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.935824691841127, LR: 0.0003 +[2026-03-02 15:00:12] (step=0035450) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.936020348268441, LR: 0.0003 +[2026-03-02 15:00:20] (step=0035451) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 6.936216004695754, LR: 0.0003 +[2026-03-02 15:00:28] (step=0035452) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.936411661123068, LR: 0.0003 +[2026-03-02 15:00:36] (step=0035453) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 6.936607317550382, LR: 0.0003 +[2026-03-02 15:00:44] (step=0035454) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.936802973977695, LR: 0.0003 +[2026-03-02 15:00:52] (step=0035455) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.936998630405009, LR: 0.0003 +[2026-03-02 15:01:00] (step=0035456) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 6.937194286832322, LR: 0.0003 +[2026-03-02 15:01:07] (step=0035457) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 6.937389943259636, LR: 0.0003 +[2026-03-02 15:01:15] (step=0035458) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 6.937585599686949, LR: 0.0003 +[2026-03-02 15:01:23] (step=0035459) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.937781256114263, LR: 0.0003 +[2026-03-02 15:01:31] (step=0035460) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.937976912541577, LR: 0.0003 +[2026-03-02 15:01:39] (step=0035461) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.9381725689688905, LR: 0.0003 +[2026-03-02 15:01:47] (step=0035462) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.9383682253962045, LR: 0.0003 +[2026-03-02 15:01:55] (step=0035463) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.938563881823518, LR: 0.0003 +[2026-03-02 15:02:03] (step=0035464) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 6.938759538250832, LR: 0.0003 +[2026-03-02 15:02:10] (step=0035465) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.938955194678146, LR: 0.0003 +[2026-03-02 15:02:18] (step=0035466) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 6.939150851105459, LR: 0.0003 +[2026-03-02 15:02:26] (step=0035467) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 6.939346507532773, LR: 0.0003 +[2026-03-02 15:02:34] (step=0035468) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.939542163960086, LR: 0.0003 +[2026-03-02 15:02:42] (step=0035469) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.9397378203874, LR: 0.0003 +[2026-03-02 15:02:50] (step=0035470) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.939933476814713, LR: 0.0003 +[2026-03-02 15:02:58] (step=0035471) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 6.940129133242027, LR: 0.0003 +[2026-03-02 15:03:05] (step=0035472) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.940324789669341, LR: 0.0003 +[2026-03-02 15:03:13] (step=0035473) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 6.940520446096654, LR: 0.0003 +[2026-03-02 15:03:21] (step=0035474) Train Loss: 0.4511, Train Steps/Sec: 0.12, Epoch: 6.940716102523968, LR: 0.0003 +[2026-03-02 15:03:29] (step=0035475) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.940911758951281, LR: 0.0003 +[2026-03-02 15:03:37] (step=0035476) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 6.941107415378595, LR: 0.0003 +[2026-03-02 15:03:45] (step=0035477) Train Loss: 0.4348, Train Steps/Sec: 0.12, Epoch: 6.941303071805909, LR: 0.0003 +[2026-03-02 15:03:53] (step=0035478) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.941498728233222, LR: 0.0003 +[2026-03-02 15:04:01] (step=0035479) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 6.941694384660536, LR: 0.0003 +[2026-03-02 15:04:09] (step=0035480) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.9418900410878495, LR: 0.0003 +[2026-03-02 15:04:17] (step=0035481) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.9420856975151635, LR: 0.0003 +[2026-03-02 15:04:24] (step=0035482) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 6.942281353942477, LR: 0.0003 +[2026-03-02 15:04:32] (step=0035483) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.942477010369791, LR: 0.0003 +[2026-03-02 15:04:40] (step=0035484) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.942672666797105, LR: 0.0003 +[2026-03-02 15:04:48] (step=0035485) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.942868323224418, LR: 0.0003 +[2026-03-02 15:04:56] (step=0035486) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.943063979651732, LR: 0.0003 +[2026-03-02 15:05:04] (step=0035487) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.943259636079045, LR: 0.0003 +[2026-03-02 15:05:12] (step=0035488) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.943455292506359, LR: 0.0003 +[2026-03-02 15:05:19] (step=0035489) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.943650948933673, LR: 0.0003 +[2026-03-02 15:05:27] (step=0035490) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 6.943846605360986, LR: 0.0003 +[2026-03-02 15:05:35] (step=0035491) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 6.9440422617883, LR: 0.0003 +[2026-03-02 15:05:43] (step=0035492) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.944237918215613, LR: 0.0003 +[2026-03-02 15:05:51] (step=0035493) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.944433574642927, LR: 0.0003 +[2026-03-02 15:05:59] (step=0035494) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.94462923107024, LR: 0.0003 +[2026-03-02 15:06:07] (step=0035495) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.944824887497554, LR: 0.0003 +[2026-03-02 15:06:14] (step=0035496) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 6.945020543924868, LR: 0.0003 +[2026-03-02 15:06:22] (step=0035497) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 6.945216200352181, LR: 0.0003 +[2026-03-02 15:06:30] (step=0035498) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.945411856779495, LR: 0.0003 +[2026-03-02 15:06:38] (step=0035499) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 6.9456075132068085, LR: 0.0003 +[2026-03-02 15:06:46] (step=0035500) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.9458031696341225, LR: 0.0003 +[2026-03-02 15:06:46] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0035500/ +[2026-03-02 15:06:54] (step=0035501) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 6.9459988260614365, LR: 0.0003 +[2026-03-02 15:07:02] (step=0035502) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.94619448248875, LR: 0.0003 +[2026-03-02 15:07:10] (step=0035503) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.946390138916064, LR: 0.0003 +[2026-03-02 15:07:17] (step=0035504) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.946585795343377, LR: 0.0003 +[2026-03-02 15:07:25] (step=0035505) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.946781451770691, LR: 0.0003 +[2026-03-02 15:07:33] (step=0035506) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 6.946977108198005, LR: 0.0003 +[2026-03-02 15:07:41] (step=0035507) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.947172764625318, LR: 0.0003 +[2026-03-02 15:07:49] (step=0035508) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.947368421052632, LR: 0.0003 +[2026-03-02 15:07:57] (step=0035509) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 6.947564077479945, LR: 0.0003 +[2026-03-02 15:08:05] (step=0035510) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 6.947759733907259, LR: 0.0003 +[2026-03-02 15:08:12] (step=0035511) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.947955390334572, LR: 0.0003 +[2026-03-02 15:08:20] (step=0035512) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 6.948151046761886, LR: 0.0003 +[2026-03-02 15:08:28] (step=0035513) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.9483467031892, LR: 0.0003 +[2026-03-02 15:08:36] (step=0035514) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 6.948542359616513, LR: 0.0003 +[2026-03-02 15:08:44] (step=0035515) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.948738016043827, LR: 0.0003 +[2026-03-02 15:08:52] (step=0035516) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 6.94893367247114, LR: 0.0003 +[2026-03-02 15:09:00] (step=0035517) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 6.949129328898454, LR: 0.0003 +[2026-03-02 15:09:08] (step=0035518) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 6.949324985325768, LR: 0.0003 +[2026-03-02 15:09:15] (step=0035519) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.9495206417530815, LR: 0.0003 +[2026-03-02 15:09:23] (step=0035520) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 6.9497162981803955, LR: 0.0003 +[2026-03-02 15:09:31] (step=0035521) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.949911954607709, LR: 0.0003 +[2026-03-02 15:09:39] (step=0035522) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 6.950107611035023, LR: 0.0003 +[2026-03-02 15:09:47] (step=0035523) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.950303267462336, LR: 0.0003 +[2026-03-02 15:09:55] (step=0035524) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.95049892388965, LR: 0.0003 +[2026-03-02 15:10:03] (step=0035525) Train Loss: 0.4445, Train Steps/Sec: 0.12, Epoch: 6.950694580316964, LR: 0.0003 +[2026-03-02 15:10:11] (step=0035526) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 6.950890236744277, LR: 0.0003 +[2026-03-02 15:10:19] (step=0035527) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.951085893171591, LR: 0.0003 +[2026-03-02 15:10:26] (step=0035528) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 6.951281549598904, LR: 0.0003 +[2026-03-02 15:10:34] (step=0035529) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.951477206026218, LR: 0.0003 +[2026-03-02 15:10:42] (step=0035530) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.951672862453532, LR: 0.0003 +[2026-03-02 15:10:50] (step=0035531) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.951868518880845, LR: 0.0003 +[2026-03-02 15:10:58] (step=0035532) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.952064175308159, LR: 0.0003 +[2026-03-02 15:11:06] (step=0035533) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.952259831735472, LR: 0.0003 +[2026-03-02 15:11:14] (step=0035534) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 6.952455488162786, LR: 0.0003 +[2026-03-02 15:11:22] (step=0035535) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 6.952651144590099, LR: 0.0003 +[2026-03-02 15:11:29] (step=0035536) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.952846801017413, LR: 0.0003 +[2026-03-02 15:11:37] (step=0035537) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.953042457444727, LR: 0.0003 +[2026-03-02 15:11:45] (step=0035538) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 6.9532381138720405, LR: 0.0003 +[2026-03-02 15:11:53] (step=0035539) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 6.9534337702993545, LR: 0.0003 +[2026-03-02 15:12:01] (step=0035540) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 6.953629426726668, LR: 0.0003 +[2026-03-02 15:12:09] (step=0035541) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.953825083153982, LR: 0.0003 +[2026-03-02 15:12:17] (step=0035542) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 6.954020739581296, LR: 0.0003 +[2026-03-02 15:12:24] (step=0035543) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.954216396008609, LR: 0.0003 +[2026-03-02 15:12:32] (step=0035544) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.954412052435923, LR: 0.0003 +[2026-03-02 15:12:40] (step=0035545) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 6.954607708863236, LR: 0.0003 +[2026-03-02 15:12:48] (step=0035546) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 6.95480336529055, LR: 0.0003 +[2026-03-02 15:12:56] (step=0035547) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 6.954999021717863, LR: 0.0003 +[2026-03-02 15:13:04] (step=0035548) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 6.955194678145177, LR: 0.0003 +[2026-03-02 15:13:12] (step=0035549) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.955390334572491, LR: 0.0003 +[2026-03-02 15:13:19] (step=0035550) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 6.955585990999804, LR: 0.0003 +[2026-03-02 15:13:27] (step=0035551) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.955781647427118, LR: 0.0003 +[2026-03-02 15:13:35] (step=0035552) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 6.955977303854431, LR: 0.0003 +[2026-03-02 15:13:43] (step=0035553) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 6.956172960281745, LR: 0.0003 +[2026-03-02 15:13:51] (step=0035554) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.956368616709059, LR: 0.0003 +[2026-03-02 15:13:59] (step=0035555) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.956564273136372, LR: 0.0003 +[2026-03-02 15:14:07] (step=0035556) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.956759929563686, LR: 0.0003 +[2026-03-02 15:14:14] (step=0035557) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.9569555859909995, LR: 0.0003 +[2026-03-02 15:14:22] (step=0035558) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.9571512424183135, LR: 0.0003 +[2026-03-02 15:14:30] (step=0035559) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.9573468988456275, LR: 0.0003 +[2026-03-02 15:14:38] (step=0035560) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.957542555272941, LR: 0.0003 +[2026-03-02 15:14:46] (step=0035561) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 6.957738211700255, LR: 0.0003 +[2026-03-02 15:14:54] (step=0035562) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.957933868127568, LR: 0.0003 +[2026-03-02 15:15:02] (step=0035563) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.958129524554882, LR: 0.0003 +[2026-03-02 15:15:09] (step=0035564) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.958325180982195, LR: 0.0003 +[2026-03-02 15:15:17] (step=0035565) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.958520837409509, LR: 0.0003 +[2026-03-02 15:15:25] (step=0035566) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 6.958716493836823, LR: 0.0003 +[2026-03-02 15:15:33] (step=0035567) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.958912150264136, LR: 0.0003 +[2026-03-02 15:15:41] (step=0035568) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 6.95910780669145, LR: 0.0003 +[2026-03-02 15:15:49] (step=0035569) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 6.959303463118763, LR: 0.0003 +[2026-03-02 15:15:57] (step=0035570) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 6.959499119546077, LR: 0.0003 +[2026-03-02 15:16:05] (step=0035571) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.959694775973391, LR: 0.0003 +[2026-03-02 15:16:13] (step=0035572) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.959890432400704, LR: 0.0003 +[2026-03-02 15:16:20] (step=0035573) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.960086088828018, LR: 0.0003 +[2026-03-02 15:16:28] (step=0035574) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 6.960281745255331, LR: 0.0003 +[2026-03-02 15:16:36] (step=0035575) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 6.960477401682645, LR: 0.0003 +[2026-03-02 15:16:44] (step=0035576) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 6.9606730581099585, LR: 0.0003 +[2026-03-02 15:16:52] (step=0035577) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 6.9608687145372725, LR: 0.0003 +[2026-03-02 15:17:00] (step=0035578) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 6.9610643709645865, LR: 0.0003 +[2026-03-02 15:17:08] (step=0035579) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 6.9612600273919, LR: 0.0003 +[2026-03-02 15:17:16] (step=0035580) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.961455683819214, LR: 0.0003 +[2026-03-02 15:17:23] (step=0035581) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.961651340246527, LR: 0.0003 +[2026-03-02 15:17:31] (step=0035582) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.961846996673841, LR: 0.0003 +[2026-03-02 15:17:39] (step=0035583) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 6.962042653101155, LR: 0.0003 +[2026-03-02 15:17:47] (step=0035584) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 6.962238309528468, LR: 0.0003 +[2026-03-02 15:17:55] (step=0035585) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 6.962433965955782, LR: 0.0003 +[2026-03-02 15:18:03] (step=0035586) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.962629622383095, LR: 0.0003 +[2026-03-02 15:18:11] (step=0035587) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 6.962825278810409, LR: 0.0003 +[2026-03-02 15:18:18] (step=0035588) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.963020935237722, LR: 0.0003 +[2026-03-02 15:18:26] (step=0035589) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 6.963216591665036, LR: 0.0003 +[2026-03-02 15:18:34] (step=0035590) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.96341224809235, LR: 0.0003 +[2026-03-02 15:18:42] (step=0035591) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.963607904519663, LR: 0.0003 +[2026-03-02 15:18:50] (step=0035592) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 6.963803560946977, LR: 0.0003 +[2026-03-02 15:18:58] (step=0035593) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 6.96399921737429, LR: 0.0003 +[2026-03-02 15:19:06] (step=0035594) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.964194873801604, LR: 0.0003 +[2026-03-02 15:19:13] (step=0035595) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 6.964390530228918, LR: 0.0003 +[2026-03-02 15:19:21] (step=0035596) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.9645861866562315, LR: 0.0003 +[2026-03-02 15:19:29] (step=0035597) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 6.9647818430835455, LR: 0.0003 +[2026-03-02 15:19:37] (step=0035598) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.964977499510859, LR: 0.0003 +[2026-03-02 15:19:45] (step=0035599) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 6.965173155938173, LR: 0.0003 +[2026-03-02 15:19:53] (step=0035600) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.965368812365486, LR: 0.0003 +[2026-03-02 15:20:01] (step=0035601) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 6.9655644687928, LR: 0.0003 +[2026-03-02 15:20:08] (step=0035602) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 6.965760125220114, LR: 0.0003 +[2026-03-02 15:20:16] (step=0035603) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 6.965955781647427, LR: 0.0003 +[2026-03-02 15:20:24] (step=0035604) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.966151438074741, LR: 0.0003 +[2026-03-02 15:20:32] (step=0035605) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 6.966347094502054, LR: 0.0003 +[2026-03-02 15:20:40] (step=0035606) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.966542750929368, LR: 0.0003 +[2026-03-02 15:20:48] (step=0035607) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 6.966738407356682, LR: 0.0003 +[2026-03-02 15:20:56] (step=0035608) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 6.966934063783995, LR: 0.0003 +[2026-03-02 15:21:04] (step=0035609) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 6.967129720211309, LR: 0.0003 +[2026-03-02 15:21:11] (step=0035610) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 6.967325376638622, LR: 0.0003 +[2026-03-02 15:21:19] (step=0035611) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 6.967521033065936, LR: 0.0003 +[2026-03-02 15:21:27] (step=0035612) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 6.96771668949325, LR: 0.0003 +[2026-03-02 15:21:35] (step=0035613) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.967912345920563, LR: 0.0003 +[2026-03-02 15:21:43] (step=0035614) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.968108002347877, LR: 0.0003 +[2026-03-02 15:21:51] (step=0035615) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.9683036587751905, LR: 0.0003 +[2026-03-02 15:21:59] (step=0035616) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.9684993152025045, LR: 0.0003 +[2026-03-02 15:22:06] (step=0035617) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 6.968694971629818, LR: 0.0003 +[2026-03-02 15:22:14] (step=0035618) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 6.968890628057132, LR: 0.0003 +[2026-03-02 15:22:22] (step=0035619) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 6.969086284484446, LR: 0.0003 +[2026-03-02 15:22:30] (step=0035620) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.969281940911759, LR: 0.0003 +[2026-03-02 15:22:38] (step=0035621) Train Loss: 0.4344, Train Steps/Sec: 0.12, Epoch: 6.969477597339073, LR: 0.0003 +[2026-03-02 15:22:46] (step=0035622) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 6.969673253766386, LR: 0.0003 +[2026-03-02 15:22:54] (step=0035623) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 6.9698689101937, LR: 0.0003 +[2026-03-02 15:23:02] (step=0035624) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 6.970064566621014, LR: 0.0003 +[2026-03-02 15:23:10] (step=0035625) Train Loss: 0.4425, Train Steps/Sec: 0.12, Epoch: 6.970260223048327, LR: 0.0003 +[2026-03-02 15:23:18] (step=0035626) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.970455879475641, LR: 0.0003 +[2026-03-02 15:23:25] (step=0035627) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.970651535902954, LR: 0.0003 +[2026-03-02 15:23:33] (step=0035628) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 6.970847192330268, LR: 0.0003 +[2026-03-02 15:23:41] (step=0035629) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 6.971042848757581, LR: 0.0003 +[2026-03-02 15:23:49] (step=0035630) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 6.971238505184895, LR: 0.0003 +[2026-03-02 15:23:57] (step=0035631) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 6.971434161612209, LR: 0.0003 +[2026-03-02 15:24:05] (step=0035632) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.971629818039522, LR: 0.0003 +[2026-03-02 15:24:13] (step=0035633) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 6.971825474466836, LR: 0.0003 +[2026-03-02 15:24:20] (step=0035634) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 6.9720211308941495, LR: 0.0003 +[2026-03-02 15:24:28] (step=0035635) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 6.9722167873214635, LR: 0.0003 +[2026-03-02 15:24:36] (step=0035636) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 6.9724124437487776, LR: 0.0003 +[2026-03-02 15:24:44] (step=0035637) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.972608100176091, LR: 0.0003 +[2026-03-02 15:24:52] (step=0035638) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 6.972803756603405, LR: 0.0003 +[2026-03-02 15:25:00] (step=0035639) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.972999413030718, LR: 0.0003 +[2026-03-02 15:25:08] (step=0035640) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 6.973195069458032, LR: 0.0003 +[2026-03-02 15:25:15] (step=0035641) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.973390725885345, LR: 0.0003 +[2026-03-02 15:25:23] (step=0035642) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 6.973586382312659, LR: 0.0003 +[2026-03-02 15:25:31] (step=0035643) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.973782038739973, LR: 0.0003 +[2026-03-02 15:25:39] (step=0035644) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 6.973977695167286, LR: 0.0003 +[2026-03-02 15:25:47] (step=0035645) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.9741733515946, LR: 0.0003 +[2026-03-02 15:25:55] (step=0035646) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.974369008021913, LR: 0.0003 +[2026-03-02 15:26:03] (step=0035647) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 6.974564664449227, LR: 0.0003 +[2026-03-02 15:26:10] (step=0035648) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 6.974760320876541, LR: 0.0003 +[2026-03-02 15:26:18] (step=0035649) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.974955977303854, LR: 0.0003 +[2026-03-02 15:26:26] (step=0035650) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.975151633731168, LR: 0.0003 +[2026-03-02 15:26:34] (step=0035651) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 6.975347290158481, LR: 0.0003 +[2026-03-02 15:26:42] (step=0035652) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 6.975542946585795, LR: 0.0003 +[2026-03-02 15:26:50] (step=0035653) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 6.9757386030131086, LR: 0.0003 +[2026-03-02 15:26:58] (step=0035654) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 6.975934259440423, LR: 0.0003 +[2026-03-02 15:27:05] (step=0035655) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 6.976129915867737, LR: 0.0003 +[2026-03-02 15:27:13] (step=0035656) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 6.97632557229505, LR: 0.0003 +[2026-03-02 15:27:21] (step=0035657) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 6.976521228722364, LR: 0.0003 +[2026-03-02 15:27:29] (step=0035658) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.976716885149677, LR: 0.0003 +[2026-03-02 15:27:37] (step=0035659) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 6.976912541576991, LR: 0.0003 +[2026-03-02 15:27:45] (step=0035660) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 6.977108198004305, LR: 0.0003 +[2026-03-02 15:27:53] (step=0035661) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.977303854431618, LR: 0.0003 +[2026-03-02 15:28:00] (step=0035662) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.977499510858932, LR: 0.0003 +[2026-03-02 15:28:09] (step=0035663) Train Loss: 0.4452, Train Steps/Sec: 0.12, Epoch: 6.977695167286245, LR: 0.0003 +[2026-03-02 15:28:16] (step=0035664) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.977890823713559, LR: 0.0003 +[2026-03-02 15:28:24] (step=0035665) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.978086480140873, LR: 0.0003 +[2026-03-02 15:28:32] (step=0035666) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 6.978282136568186, LR: 0.0003 +[2026-03-02 15:28:40] (step=0035667) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 6.9784777929955, LR: 0.0003 +[2026-03-02 15:28:48] (step=0035668) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 6.978673449422813, LR: 0.0003 +[2026-03-02 15:28:56] (step=0035669) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 6.978869105850127, LR: 0.0003 +[2026-03-02 15:29:04] (step=0035670) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 6.97906476227744, LR: 0.0003 +[2026-03-02 15:29:11] (step=0035671) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 6.979260418704754, LR: 0.0003 +[2026-03-02 15:29:19] (step=0035672) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 6.9794560751320684, LR: 0.0003 +[2026-03-02 15:29:27] (step=0035673) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 6.979651731559382, LR: 0.0003 +[2026-03-02 15:29:35] (step=0035674) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 6.979847387986696, LR: 0.0003 +[2026-03-02 15:29:43] (step=0035675) Train Loss: 0.4317, Train Steps/Sec: 0.12, Epoch: 6.980043044414009, LR: 0.0003 +[2026-03-02 15:29:51] (step=0035676) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 6.980238700841323, LR: 0.0003 +[2026-03-02 15:29:59] (step=0035677) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.980434357268637, LR: 0.0003 +[2026-03-02 15:30:07] (step=0035678) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.98063001369595, LR: 0.0003 +[2026-03-02 15:30:15] (step=0035679) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 6.980825670123264, LR: 0.0003 +[2026-03-02 15:30:22] (step=0035680) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 6.981021326550577, LR: 0.0003 +[2026-03-02 15:30:30] (step=0035681) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 6.981216982977891, LR: 0.0003 +[2026-03-02 15:30:38] (step=0035682) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.981412639405204, LR: 0.0003 +[2026-03-02 15:30:46] (step=0035683) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 6.981608295832518, LR: 0.0003 +[2026-03-02 15:30:54] (step=0035684) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 6.981803952259832, LR: 0.0003 +[2026-03-02 15:31:02] (step=0035685) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 6.981999608687145, LR: 0.0003 +[2026-03-02 15:31:10] (step=0035686) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 6.982195265114459, LR: 0.0003 +[2026-03-02 15:31:18] (step=0035687) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 6.982390921541772, LR: 0.0003 +[2026-03-02 15:31:25] (step=0035688) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.982586577969086, LR: 0.0003 +[2026-03-02 15:31:33] (step=0035689) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 6.9827822343964, LR: 0.0003 +[2026-03-02 15:31:41] (step=0035690) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.9829778908237135, LR: 0.0003 +[2026-03-02 15:31:49] (step=0035691) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 6.9831735472510275, LR: 0.0003 +[2026-03-02 15:31:57] (step=0035692) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 6.983369203678341, LR: 0.0003 +[2026-03-02 15:32:05] (step=0035693) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 6.983564860105655, LR: 0.0003 +[2026-03-02 15:32:13] (step=0035694) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 6.983760516532968, LR: 0.0003 +[2026-03-02 15:32:20] (step=0035695) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 6.983956172960282, LR: 0.0003 +[2026-03-02 15:32:28] (step=0035696) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 6.984151829387596, LR: 0.0003 +[2026-03-02 15:32:36] (step=0035697) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 6.984347485814909, LR: 0.0003 +[2026-03-02 15:32:44] (step=0035698) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 6.984543142242223, LR: 0.0003 +[2026-03-02 15:32:52] (step=0035699) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 6.984738798669536, LR: 0.0003 +[2026-03-02 15:33:00] (step=0035700) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 6.98493445509685, LR: 0.0003 +[2026-03-02 15:33:08] (step=0035701) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 6.985130111524164, LR: 0.0003 +[2026-03-02 15:33:15] (step=0035702) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 6.985325767951477, LR: 0.0003 +[2026-03-02 15:33:23] (step=0035703) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 6.985521424378791, LR: 0.0003 +[2026-03-02 15:33:31] (step=0035704) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 6.985717080806104, LR: 0.0003 +[2026-03-02 15:33:39] (step=0035705) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 6.985912737233418, LR: 0.0003 +[2026-03-02 15:33:47] (step=0035706) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 6.986108393660731, LR: 0.0003 +[2026-03-02 15:33:55] (step=0035707) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 6.986304050088045, LR: 0.0003 +[2026-03-02 15:34:03] (step=0035708) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 6.986499706515359, LR: 0.0003 +[2026-03-02 15:34:10] (step=0035709) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 6.9866953629426725, LR: 0.0003 +[2026-03-02 15:34:18] (step=0035710) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 6.9868910193699865, LR: 0.0003 +[2026-03-02 15:34:26] (step=0035711) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.9870866757973, LR: 0.0003 +[2026-03-02 15:34:34] (step=0035712) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 6.987282332224614, LR: 0.0003 +[2026-03-02 15:34:42] (step=0035713) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 6.987477988651928, LR: 0.0003 +[2026-03-02 15:34:50] (step=0035714) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 6.987673645079241, LR: 0.0003 +[2026-03-02 15:34:58] (step=0035715) Train Loss: 0.4646, Train Steps/Sec: 0.12, Epoch: 6.987869301506555, LR: 0.0003 +[2026-03-02 15:35:06] (step=0035716) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 6.988064957933868, LR: 0.0003 +[2026-03-02 15:35:14] (step=0035717) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 6.988260614361182, LR: 0.0003 +[2026-03-02 15:35:21] (step=0035718) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 6.988456270788495, LR: 0.0003 +[2026-03-02 15:35:29] (step=0035719) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 6.988651927215809, LR: 0.0003 +[2026-03-02 15:35:37] (step=0035720) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 6.988847583643123, LR: 0.0003 +[2026-03-02 15:35:45] (step=0035721) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 6.989043240070436, LR: 0.0003 +[2026-03-02 15:35:53] (step=0035722) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 6.98923889649775, LR: 0.0003 +[2026-03-02 15:36:01] (step=0035723) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 6.989434552925063, LR: 0.0003 +[2026-03-02 15:36:09] (step=0035724) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 6.989630209352377, LR: 0.0003 +[2026-03-02 15:36:16] (step=0035725) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 6.989825865779691, LR: 0.0003 +[2026-03-02 15:36:24] (step=0035726) Train Loss: 0.4536, Train Steps/Sec: 0.12, Epoch: 6.990021522207004, LR: 0.0003 +[2026-03-02 15:36:32] (step=0035727) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 6.990217178634318, LR: 0.0003 +[2026-03-02 15:36:40] (step=0035728) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.9904128350616315, LR: 0.0003 +[2026-03-02 15:36:48] (step=0035729) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 6.9906084914889455, LR: 0.0003 +[2026-03-02 15:36:56] (step=0035730) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 6.9908041479162595, LR: 0.0003 +[2026-03-02 15:37:04] (step=0035731) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 6.990999804343573, LR: 0.0003 +[2026-03-02 15:37:12] (step=0035732) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 6.991195460770887, LR: 0.0003 +[2026-03-02 15:37:20] (step=0035733) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 6.9913911171982, LR: 0.0003 +[2026-03-02 15:37:27] (step=0035734) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 6.991586773625514, LR: 0.0003 +[2026-03-02 15:37:35] (step=0035735) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 6.991782430052827, LR: 0.0003 +[2026-03-02 15:37:43] (step=0035736) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 6.991978086480141, LR: 0.0003 +[2026-03-02 15:37:51] (step=0035737) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.992173742907455, LR: 0.0003 +[2026-03-02 15:37:59] (step=0035738) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 6.992369399334768, LR: 0.0003 +[2026-03-02 15:38:07] (step=0035739) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 6.992565055762082, LR: 0.0003 +[2026-03-02 15:38:15] (step=0035740) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.992760712189395, LR: 0.0003 +[2026-03-02 15:38:22] (step=0035741) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 6.992956368616709, LR: 0.0003 +[2026-03-02 15:38:30] (step=0035742) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 6.993152025044023, LR: 0.0003 +[2026-03-02 15:38:38] (step=0035743) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.993347681471336, LR: 0.0003 +[2026-03-02 15:38:46] (step=0035744) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 6.99354333789865, LR: 0.0003 +[2026-03-02 15:38:54] (step=0035745) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 6.993738994325963, LR: 0.0003 +[2026-03-02 15:39:02] (step=0035746) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 6.993934650753277, LR: 0.0003 +[2026-03-02 15:39:10] (step=0035747) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 6.9941303071805905, LR: 0.0003 +[2026-03-02 15:39:17] (step=0035748) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 6.9943259636079045, LR: 0.0003 +[2026-03-02 15:39:25] (step=0035749) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 6.9945216200352185, LR: 0.0003 +[2026-03-02 15:39:33] (step=0035750) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 6.994717276462532, LR: 0.0003 +[2026-03-02 15:39:41] (step=0035751) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 6.994912932889846, LR: 0.0003 +[2026-03-02 15:39:49] (step=0035752) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 6.995108589317159, LR: 0.0003 +[2026-03-02 15:39:57] (step=0035753) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 6.995304245744473, LR: 0.0003 +[2026-03-02 15:40:05] (step=0035754) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.995499902171787, LR: 0.0003 +[2026-03-02 15:40:13] (step=0035755) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 6.9956955585991, LR: 0.0003 +[2026-03-02 15:40:20] (step=0035756) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 6.995891215026414, LR: 0.0003 +[2026-03-02 15:40:28] (step=0035757) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 6.996086871453727, LR: 0.0003 +[2026-03-02 15:40:36] (step=0035758) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 6.996282527881041, LR: 0.0003 +[2026-03-02 15:40:44] (step=0035759) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 6.996478184308354, LR: 0.0003 +[2026-03-02 15:40:52] (step=0035760) Train Loss: 0.4550, Train Steps/Sec: 0.12, Epoch: 6.996673840735668, LR: 0.0003 +[2026-03-02 15:41:00] (step=0035761) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 6.996869497162982, LR: 0.0003 +[2026-03-02 15:41:08] (step=0035762) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 6.997065153590295, LR: 0.0003 +[2026-03-02 15:41:16] (step=0035763) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 6.997260810017609, LR: 0.0003 +[2026-03-02 15:41:23] (step=0035764) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 6.997456466444922, LR: 0.0003 +[2026-03-02 15:41:31] (step=0035765) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 6.997652122872236, LR: 0.0003 +[2026-03-02 15:41:39] (step=0035766) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 6.99784777929955, LR: 0.0003 +[2026-03-02 15:41:47] (step=0035767) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 6.9980434357268635, LR: 0.0003 +[2026-03-02 15:41:55] (step=0035768) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 6.9982390921541775, LR: 0.0003 +[2026-03-02 15:42:03] (step=0035769) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 6.998434748581491, LR: 0.0003 +[2026-03-02 15:42:11] (step=0035770) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 6.998630405008805, LR: 0.0003 +[2026-03-02 15:42:19] (step=0035771) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 6.998826061436118, LR: 0.0003 +[2026-03-02 15:42:26] (step=0035772) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 6.999021717863432, LR: 0.0003 +[2026-03-02 15:42:34] (step=0035773) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 6.999217374290746, LR: 0.0003 +[2026-03-02 15:42:42] (step=0035774) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 6.999413030718059, LR: 0.0003 +[2026-03-02 15:42:50] (step=0035775) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 6.999608687145373, LR: 0.0003 +[2026-03-02 15:42:58] (step=0035776) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 6.999804343572686, LR: 0.0003 +[2026-03-02 15:43:06] (step=0035777) Train Loss: 0.4453, Train Steps/Sec: 0.12, Epoch: 7.0, LR: 0.0003 +[2026-03-02 15:43:06] Beginning epoch 7... +[2026-03-02 15:43:16] (step=0035778) Train Loss: 0.4503, Train Steps/Sec: 0.10, Epoch: 7.000195656427314, LR: 0.0003 +[2026-03-02 15:43:24] (step=0035779) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.000391312854627, LR: 0.0003 +[2026-03-02 15:43:32] (step=0035780) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.000586969281941, LR: 0.0003 +[2026-03-02 15:43:40] (step=0035781) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 7.000782625709254, LR: 0.0003 +[2026-03-02 15:43:47] (step=0035782) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.000978282136568, LR: 0.0003 +[2026-03-02 15:43:55] (step=0035783) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.001173938563882, LR: 0.0003 +[2026-03-02 15:44:03] (step=0035784) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.001369594991195, LR: 0.0003 +[2026-03-02 15:44:11] (step=0035785) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.001565251418509, LR: 0.0003 +[2026-03-02 15:44:19] (step=0035786) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 7.0017609078458225, LR: 0.0003 +[2026-03-02 15:44:27] (step=0035787) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.0019565642731365, LR: 0.0003 +[2026-03-02 15:44:35] (step=0035788) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 7.00215222070045, LR: 0.0003 +[2026-03-02 15:44:42] (step=0035789) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.002347877127764, LR: 0.0003 +[2026-03-02 15:44:50] (step=0035790) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 7.002543533555078, LR: 0.0003 +[2026-03-02 15:44:58] (step=0035791) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.002739189982391, LR: 0.0003 +[2026-03-02 15:45:06] (step=0035792) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.002934846409705, LR: 0.0003 +[2026-03-02 15:45:14] (step=0035793) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.003130502837018, LR: 0.0003 +[2026-03-02 15:45:22] (step=0035794) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 7.003326159264332, LR: 0.0003 +[2026-03-02 15:45:30] (step=0035795) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.003521815691646, LR: 0.0003 +[2026-03-02 15:45:37] (step=0035796) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.003717472118959, LR: 0.0003 +[2026-03-02 15:45:45] (step=0035797) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 7.003913128546273, LR: 0.0003 +[2026-03-02 15:45:53] (step=0035798) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.004108784973586, LR: 0.0003 +[2026-03-02 15:46:01] (step=0035799) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.0043044414009, LR: 0.0003 +[2026-03-02 15:46:09] (step=0035800) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.004500097828213, LR: 0.0003 +[2026-03-02 15:46:17] (step=0035801) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.004695754255527, LR: 0.0003 +[2026-03-02 15:46:25] (step=0035802) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 7.004891410682841, LR: 0.0003 +[2026-03-02 15:46:32] (step=0035803) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.005087067110154, LR: 0.0003 +[2026-03-02 15:46:40] (step=0035804) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.005282723537468, LR: 0.0003 +[2026-03-02 15:46:48] (step=0035805) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.0054783799647815, LR: 0.0003 +[2026-03-02 15:46:56] (step=0035806) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.0056740363920955, LR: 0.0003 +[2026-03-02 15:47:04] (step=0035807) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 7.0058696928194095, LR: 0.0003 +[2026-03-02 15:47:12] (step=0035808) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.006065349246723, LR: 0.0003 +[2026-03-02 15:47:20] (step=0035809) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 7.006261005674037, LR: 0.0003 +[2026-03-02 15:47:28] (step=0035810) Train Loss: 0.4290, Train Steps/Sec: 0.12, Epoch: 7.00645666210135, LR: 0.0003 +[2026-03-02 15:47:36] (step=0035811) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.006652318528664, LR: 0.0003 +[2026-03-02 15:47:43] (step=0035812) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.006847974955977, LR: 0.0003 +[2026-03-02 15:47:51] (step=0035813) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.007043631383291, LR: 0.0003 +[2026-03-02 15:47:59] (step=0035814) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.007239287810605, LR: 0.0003 +[2026-03-02 15:48:07] (step=0035815) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.007434944237918, LR: 0.0003 +[2026-03-02 15:48:15] (step=0035816) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.007630600665232, LR: 0.0003 +[2026-03-02 15:48:23] (step=0035817) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.007826257092545, LR: 0.0003 +[2026-03-02 15:48:31] (step=0035818) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.008021913519859, LR: 0.0003 +[2026-03-02 15:48:38] (step=0035819) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.008217569947173, LR: 0.0003 +[2026-03-02 15:48:46] (step=0035820) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.008413226374486, LR: 0.0003 +[2026-03-02 15:48:54] (step=0035821) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.0086088828018, LR: 0.0003 +[2026-03-02 15:49:02] (step=0035822) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.008804539229113, LR: 0.0003 +[2026-03-02 15:49:10] (step=0035823) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.009000195656427, LR: 0.0003 +[2026-03-02 15:49:18] (step=0035824) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.0091958520837405, LR: 0.0003 +[2026-03-02 15:49:26] (step=0035825) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 7.0093915085110545, LR: 0.0003 +[2026-03-02 15:49:33] (step=0035826) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.0095871649383685, LR: 0.0003 +[2026-03-02 15:49:41] (step=0035827) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 7.009782821365682, LR: 0.0003 +[2026-03-02 15:49:49] (step=0035828) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.009978477792996, LR: 0.0003 +[2026-03-02 15:49:57] (step=0035829) Train Loss: 0.4448, Train Steps/Sec: 0.12, Epoch: 7.010174134220309, LR: 0.0003 +[2026-03-02 15:50:05] (step=0035830) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.010369790647623, LR: 0.0003 +[2026-03-02 15:50:13] (step=0035831) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.010565447074937, LR: 0.0003 +[2026-03-02 15:50:21] (step=0035832) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.01076110350225, LR: 0.0003 +[2026-03-02 15:50:29] (step=0035833) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.010956759929564, LR: 0.0003 +[2026-03-02 15:50:36] (step=0035834) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.011152416356877, LR: 0.0003 +[2026-03-02 15:50:44] (step=0035835) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 7.011348072784191, LR: 0.0003 +[2026-03-02 15:50:52] (step=0035836) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 7.011543729211505, LR: 0.0003 +[2026-03-02 15:51:00] (step=0035837) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.011739385638818, LR: 0.0003 +[2026-03-02 15:51:08] (step=0035838) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.011935042066132, LR: 0.0003 +[2026-03-02 15:51:16] (step=0035839) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.012130698493445, LR: 0.0003 +[2026-03-02 15:51:24] (step=0035840) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 7.012326354920759, LR: 0.0003 +[2026-03-02 15:51:31] (step=0035841) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.012522011348072, LR: 0.0003 +[2026-03-02 15:51:39] (step=0035842) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 7.012717667775386, LR: 0.0003 +[2026-03-02 15:51:47] (step=0035843) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 7.0129133242027, LR: 0.0003 +[2026-03-02 15:51:55] (step=0035844) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.0131089806300135, LR: 0.0003 +[2026-03-02 15:52:03] (step=0035845) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.0133046370573275, LR: 0.0003 +[2026-03-02 15:52:11] (step=0035846) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.013500293484641, LR: 0.0003 +[2026-03-02 15:52:19] (step=0035847) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.013695949911955, LR: 0.0003 +[2026-03-02 15:52:27] (step=0035848) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.013891606339269, LR: 0.0003 +[2026-03-02 15:52:34] (step=0035849) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.014087262766582, LR: 0.0003 +[2026-03-02 15:52:42] (step=0035850) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.014282919193896, LR: 0.0003 +[2026-03-02 15:52:50] (step=0035851) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.014478575621209, LR: 0.0003 +[2026-03-02 15:52:58] (step=0035852) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.014674232048523, LR: 0.0003 +[2026-03-02 15:53:06] (step=0035853) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.014869888475836, LR: 0.0003 +[2026-03-02 15:53:14] (step=0035854) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.01506554490315, LR: 0.0003 +[2026-03-02 15:53:22] (step=0035855) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 7.015261201330464, LR: 0.0003 +[2026-03-02 15:53:29] (step=0035856) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.015456857757777, LR: 0.0003 +[2026-03-02 15:53:37] (step=0035857) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.015652514185091, LR: 0.0003 +[2026-03-02 15:53:45] (step=0035858) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.015848170612404, LR: 0.0003 +[2026-03-02 15:53:53] (step=0035859) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.016043827039718, LR: 0.0003 +[2026-03-02 15:54:01] (step=0035860) Train Loss: 0.4420, Train Steps/Sec: 0.12, Epoch: 7.016239483467032, LR: 0.0003 +[2026-03-02 15:54:09] (step=0035861) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.016435139894345, LR: 0.0003 +[2026-03-02 15:54:17] (step=0035862) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.016630796321659, LR: 0.0003 +[2026-03-02 15:54:25] (step=0035863) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 7.0168264527489725, LR: 0.0003 +[2026-03-02 15:54:33] (step=0035864) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.0170221091762865, LR: 0.0003 +[2026-03-02 15:54:40] (step=0035865) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.0172177656036, LR: 0.0003 +[2026-03-02 15:54:48] (step=0035866) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.017413422030914, LR: 0.0003 +[2026-03-02 15:54:56] (step=0035867) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 7.017609078458228, LR: 0.0003 +[2026-03-02 15:55:04] (step=0035868) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 7.017804734885541, LR: 0.0003 +[2026-03-02 15:55:12] (step=0035869) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 7.018000391312855, LR: 0.0003 +[2026-03-02 15:55:20] (step=0035870) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 7.018196047740168, LR: 0.0003 +[2026-03-02 15:55:28] (step=0035871) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 7.018391704167482, LR: 0.0003 +[2026-03-02 15:55:35] (step=0035872) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.018587360594796, LR: 0.0003 +[2026-03-02 15:55:43] (step=0035873) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 7.018783017022109, LR: 0.0003 +[2026-03-02 15:55:51] (step=0035874) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 7.018978673449423, LR: 0.0003 +[2026-03-02 15:55:59] (step=0035875) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.019174329876736, LR: 0.0003 +[2026-03-02 15:56:07] (step=0035876) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.01936998630405, LR: 0.0003 +[2026-03-02 15:56:15] (step=0035877) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.019565642731363, LR: 0.0003 +[2026-03-02 15:56:23] (step=0035878) Train Loss: 0.4436, Train Steps/Sec: 0.12, Epoch: 7.019761299158677, LR: 0.0003 +[2026-03-02 15:56:31] (step=0035879) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.019956955585991, LR: 0.0003 +[2026-03-02 15:56:38] (step=0035880) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.020152612013304, LR: 0.0003 +[2026-03-02 15:56:46] (step=0035881) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 7.020348268440618, LR: 0.0003 +[2026-03-02 15:56:54] (step=0035882) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.0205439248679316, LR: 0.0003 +[2026-03-02 15:57:02] (step=0035883) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.020739581295246, LR: 0.0003 +[2026-03-02 15:57:10] (step=0035884) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.02093523772256, LR: 0.0003 +[2026-03-02 15:57:18] (step=0035885) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.021130894149873, LR: 0.0003 +[2026-03-02 15:57:26] (step=0035886) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.021326550577187, LR: 0.0003 +[2026-03-02 15:57:34] (step=0035887) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.0215222070045, LR: 0.0003 +[2026-03-02 15:57:41] (step=0035888) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.021717863431814, LR: 0.0003 +[2026-03-02 15:57:49] (step=0035889) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.021913519859127, LR: 0.0003 +[2026-03-02 15:57:57] (step=0035890) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.022109176286441, LR: 0.0003 +[2026-03-02 15:58:05] (step=0035891) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 7.022304832713755, LR: 0.0003 +[2026-03-02 15:58:13] (step=0035892) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.022500489141068, LR: 0.0003 +[2026-03-02 15:58:21] (step=0035893) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.022696145568382, LR: 0.0003 +[2026-03-02 15:58:29] (step=0035894) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 7.022891801995695, LR: 0.0003 +[2026-03-02 15:58:36] (step=0035895) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 7.023087458423009, LR: 0.0003 +[2026-03-02 15:58:44] (step=0035896) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.023283114850323, LR: 0.0003 +[2026-03-02 15:58:52] (step=0035897) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.023478771277636, LR: 0.0003 +[2026-03-02 15:59:00] (step=0035898) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.02367442770495, LR: 0.0003 +[2026-03-02 15:59:08] (step=0035899) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.023870084132263, LR: 0.0003 +[2026-03-02 15:59:16] (step=0035900) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 7.024065740559577, LR: 0.0003 +[2026-03-02 15:59:24] (step=0035901) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.0242613969868914, LR: 0.0003 +[2026-03-02 15:59:31] (step=0035902) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.024457053414205, LR: 0.0003 +[2026-03-02 15:59:39] (step=0035903) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.024652709841519, LR: 0.0003 +[2026-03-02 15:59:47] (step=0035904) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 7.024848366268832, LR: 0.0003 +[2026-03-02 15:59:55] (step=0035905) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.025044022696146, LR: 0.0003 +[2026-03-02 16:00:03] (step=0035906) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.025239679123459, LR: 0.0003 +[2026-03-02 16:00:11] (step=0035907) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.025435335550773, LR: 0.0003 +[2026-03-02 16:00:19] (step=0035908) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.025630991978087, LR: 0.0003 +[2026-03-02 16:00:27] (step=0035909) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.0258266484054, LR: 0.0003 +[2026-03-02 16:00:34] (step=0035910) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 7.026022304832714, LR: 0.0003 +[2026-03-02 16:00:42] (step=0035911) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.026217961260027, LR: 0.0003 +[2026-03-02 16:00:50] (step=0035912) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.026413617687341, LR: 0.0003 +[2026-03-02 16:00:58] (step=0035913) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.026609274114655, LR: 0.0003 +[2026-03-02 16:01:06] (step=0035914) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.026804930541968, LR: 0.0003 +[2026-03-02 16:01:14] (step=0035915) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 7.027000586969282, LR: 0.0003 +[2026-03-02 16:01:22] (step=0035916) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.027196243396595, LR: 0.0003 +[2026-03-02 16:01:30] (step=0035917) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.027391899823909, LR: 0.0003 +[2026-03-02 16:01:37] (step=0035918) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.0275875562512224, LR: 0.0003 +[2026-03-02 16:01:45] (step=0035919) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 7.0277832126785365, LR: 0.0003 +[2026-03-02 16:01:53] (step=0035920) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.0279788691058505, LR: 0.0003 +[2026-03-02 16:02:01] (step=0035921) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.028174525533164, LR: 0.0003 +[2026-03-02 16:02:09] (step=0035922) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.028370181960478, LR: 0.0003 +[2026-03-02 16:02:17] (step=0035923) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.028565838387791, LR: 0.0003 +[2026-03-02 16:02:25] (step=0035924) Train Loss: 0.4513, Train Steps/Sec: 0.12, Epoch: 7.028761494815105, LR: 0.0003 +[2026-03-02 16:02:33] (step=0035925) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.028957151242419, LR: 0.0003 +[2026-03-02 16:02:40] (step=0035926) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.029152807669732, LR: 0.0003 +[2026-03-02 16:02:48] (step=0035927) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.029348464097046, LR: 0.0003 +[2026-03-02 16:02:56] (step=0035928) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.029544120524359, LR: 0.0003 +[2026-03-02 16:03:04] (step=0035929) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 7.029739776951673, LR: 0.0003 +[2026-03-02 16:03:12] (step=0035930) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.029935433378986, LR: 0.0003 +[2026-03-02 16:03:20] (step=0035931) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.0301310898063, LR: 0.0003 +[2026-03-02 16:03:28] (step=0035932) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.030326746233614, LR: 0.0003 +[2026-03-02 16:03:36] (step=0035933) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 7.030522402660927, LR: 0.0003 +[2026-03-02 16:03:43] (step=0035934) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.030718059088241, LR: 0.0003 +[2026-03-02 16:03:51] (step=0035935) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.030913715515554, LR: 0.0003 +[2026-03-02 16:03:59] (step=0035936) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.031109371942868, LR: 0.0003 +[2026-03-02 16:04:07] (step=0035937) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.031305028370182, LR: 0.0003 +[2026-03-02 16:04:15] (step=0035938) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.0315006847974955, LR: 0.0003 +[2026-03-02 16:04:23] (step=0035939) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.0316963412248095, LR: 0.0003 +[2026-03-02 16:04:31] (step=0035940) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.031891997652123, LR: 0.0003 +[2026-03-02 16:04:39] (step=0035941) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.032087654079437, LR: 0.0003 +[2026-03-02 16:04:46] (step=0035942) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.03228331050675, LR: 0.0003 +[2026-03-02 16:04:54] (step=0035943) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 7.032478966934064, LR: 0.0003 +[2026-03-02 16:05:02] (step=0035944) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 7.032674623361378, LR: 0.0003 +[2026-03-02 16:05:10] (step=0035945) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 7.032870279788691, LR: 0.0003 +[2026-03-02 16:05:18] (step=0035946) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.033065936216005, LR: 0.0003 +[2026-03-02 16:05:26] (step=0035947) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.033261592643318, LR: 0.0003 +[2026-03-02 16:05:34] (step=0035948) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.033457249070632, LR: 0.0003 +[2026-03-02 16:05:41] (step=0035949) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.033652905497946, LR: 0.0003 +[2026-03-02 16:05:49] (step=0035950) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.033848561925259, LR: 0.0003 +[2026-03-02 16:05:57] (step=0035951) Train Loss: 0.4442, Train Steps/Sec: 0.12, Epoch: 7.034044218352573, LR: 0.0003 +[2026-03-02 16:06:05] (step=0035952) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.034239874779886, LR: 0.0003 +[2026-03-02 16:06:13] (step=0035953) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.0344355312072, LR: 0.0003 +[2026-03-02 16:06:21] (step=0035954) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.034631187634514, LR: 0.0003 +[2026-03-02 16:06:29] (step=0035955) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.034826844061827, LR: 0.0003 +[2026-03-02 16:06:37] (step=0035956) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 7.035022500489141, LR: 0.0003 +[2026-03-02 16:06:44] (step=0035957) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.0352181569164545, LR: 0.0003 +[2026-03-02 16:06:52] (step=0035958) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 7.0354138133437685, LR: 0.0003 +[2026-03-02 16:07:00] (step=0035959) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.035609469771082, LR: 0.0003 +[2026-03-02 16:07:08] (step=0035960) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.035805126198396, LR: 0.0003 +[2026-03-02 16:07:16] (step=0035961) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.03600078262571, LR: 0.0003 +[2026-03-02 16:07:24] (step=0035962) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.036196439053023, LR: 0.0003 +[2026-03-02 16:07:32] (step=0035963) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 7.036392095480337, LR: 0.0003 +[2026-03-02 16:07:39] (step=0035964) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 7.03658775190765, LR: 0.0003 +[2026-03-02 16:07:47] (step=0035965) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.036783408334964, LR: 0.0003 +[2026-03-02 16:07:55] (step=0035966) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.036979064762278, LR: 0.0003 +[2026-03-02 16:08:03] (step=0035967) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.037174721189591, LR: 0.0003 +[2026-03-02 16:08:11] (step=0035968) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.037370377616905, LR: 0.0003 +[2026-03-02 16:08:19] (step=0035969) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 7.037566034044218, LR: 0.0003 +[2026-03-02 16:08:27] (step=0035970) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 7.037761690471532, LR: 0.0003 +[2026-03-02 16:08:35] (step=0035971) Train Loss: 0.4459, Train Steps/Sec: 0.12, Epoch: 7.037957346898845, LR: 0.0003 +[2026-03-02 16:08:43] (step=0035972) Train Loss: 0.4253, Train Steps/Sec: 0.13, Epoch: 7.038153003326159, LR: 0.0003 +[2026-03-02 16:08:50] (step=0035973) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.038348659753473, LR: 0.0003 +[2026-03-02 16:08:58] (step=0035974) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.038544316180786, LR: 0.0003 +[2026-03-02 16:09:06] (step=0035975) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.0387399726081, LR: 0.0003 +[2026-03-02 16:09:14] (step=0035976) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.0389356290354135, LR: 0.0003 +[2026-03-02 16:09:22] (step=0035977) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 7.0391312854627275, LR: 0.0003 +[2026-03-02 16:09:30] (step=0035978) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.0393269418900415, LR: 0.0003 +[2026-03-02 16:09:38] (step=0035979) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 7.039522598317355, LR: 0.0003 +[2026-03-02 16:09:45] (step=0035980) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.039718254744669, LR: 0.0003 +[2026-03-02 16:09:53] (step=0035981) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.039913911171982, LR: 0.0003 +[2026-03-02 16:10:01] (step=0035982) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.040109567599296, LR: 0.0003 +[2026-03-02 16:10:09] (step=0035983) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.040305224026609, LR: 0.0003 +[2026-03-02 16:10:17] (step=0035984) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.040500880453923, LR: 0.0003 +[2026-03-02 16:10:25] (step=0035985) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.040696536881237, LR: 0.0003 +[2026-03-02 16:10:33] (step=0035986) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.04089219330855, LR: 0.0003 +[2026-03-02 16:10:40] (step=0035987) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.041087849735864, LR: 0.0003 +[2026-03-02 16:10:48] (step=0035988) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.041283506163177, LR: 0.0003 +[2026-03-02 16:10:56] (step=0035989) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.041479162590491, LR: 0.0003 +[2026-03-02 16:11:04] (step=0035990) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.041674819017805, LR: 0.0003 +[2026-03-02 16:11:12] (step=0035991) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.041870475445118, LR: 0.0003 +[2026-03-02 16:11:20] (step=0035992) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 7.042066131872432, LR: 0.0003 +[2026-03-02 16:11:28] (step=0035993) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.042261788299745, LR: 0.0003 +[2026-03-02 16:11:35] (step=0035994) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.042457444727059, LR: 0.0003 +[2026-03-02 16:11:43] (step=0035995) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 7.0426531011543725, LR: 0.0003 +[2026-03-02 16:11:51] (step=0035996) Train Loss: 0.4439, Train Steps/Sec: 0.12, Epoch: 7.0428487575816865, LR: 0.0003 +[2026-03-02 16:11:59] (step=0035997) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.0430444140090005, LR: 0.0003 +[2026-03-02 16:12:07] (step=0035998) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.043240070436314, LR: 0.0003 +[2026-03-02 16:12:15] (step=0035999) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.043435726863628, LR: 0.0003 +[2026-03-02 16:12:23] (step=0036000) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.043631383290941, LR: 0.0003 +[2026-03-02 16:12:23] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0036000/ +[2026-03-02 16:12:31] (step=0036001) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.043827039718255, LR: 0.0003 +[2026-03-02 16:12:39] (step=0036002) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.044022696145569, LR: 0.0003 +[2026-03-02 16:12:46] (step=0036003) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.044218352572882, LR: 0.0003 +[2026-03-02 16:12:54] (step=0036004) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 7.044414009000196, LR: 0.0003 +[2026-03-02 16:13:02] (step=0036005) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.044609665427509, LR: 0.0003 +[2026-03-02 16:13:10] (step=0036006) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 7.044805321854823, LR: 0.0003 +[2026-03-02 16:13:18] (step=0036007) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.045000978282137, LR: 0.0003 +[2026-03-02 16:13:26] (step=0036008) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.04519663470945, LR: 0.0003 +[2026-03-02 16:13:34] (step=0036009) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 7.045392291136764, LR: 0.0003 +[2026-03-02 16:13:41] (step=0036010) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 7.045587947564077, LR: 0.0003 +[2026-03-02 16:13:49] (step=0036011) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 7.045783603991391, LR: 0.0003 +[2026-03-02 16:13:57] (step=0036012) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.045979260418704, LR: 0.0003 +[2026-03-02 16:14:05] (step=0036013) Train Loss: 0.4690, Train Steps/Sec: 0.13, Epoch: 7.046174916846018, LR: 0.0003 +[2026-03-02 16:14:13] (step=0036014) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.046370573273332, LR: 0.0003 +[2026-03-02 16:14:21] (step=0036015) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 7.0465662297006455, LR: 0.0003 +[2026-03-02 16:14:29] (step=0036016) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 7.0467618861279595, LR: 0.0003 +[2026-03-02 16:14:37] (step=0036017) Train Loss: 0.4468, Train Steps/Sec: 0.12, Epoch: 7.046957542555273, LR: 0.0003 +[2026-03-02 16:14:44] (step=0036018) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.047153198982587, LR: 0.0003 +[2026-03-02 16:14:52] (step=0036019) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 7.047348855409901, LR: 0.0003 +[2026-03-02 16:15:00] (step=0036020) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 7.047544511837214, LR: 0.0003 +[2026-03-02 16:15:08] (step=0036021) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 7.047740168264528, LR: 0.0003 +[2026-03-02 16:15:16] (step=0036022) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.047935824691841, LR: 0.0003 +[2026-03-02 16:15:24] (step=0036023) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 7.048131481119155, LR: 0.0003 +[2026-03-02 16:15:32] (step=0036024) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.048327137546468, LR: 0.0003 +[2026-03-02 16:15:39] (step=0036025) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.048522793973782, LR: 0.0003 +[2026-03-02 16:15:47] (step=0036026) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.048718450401096, LR: 0.0003 +[2026-03-02 16:15:55] (step=0036027) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.048914106828409, LR: 0.0003 +[2026-03-02 16:16:03] (step=0036028) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.049109763255723, LR: 0.0003 +[2026-03-02 16:16:11] (step=0036029) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.049305419683036, LR: 0.0003 +[2026-03-02 16:16:19] (step=0036030) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 7.04950107611035, LR: 0.0003 +[2026-03-02 16:16:27] (step=0036031) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.049696732537664, LR: 0.0003 +[2026-03-02 16:16:34] (step=0036032) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.049892388964977, LR: 0.0003 +[2026-03-02 16:16:42] (step=0036033) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.050088045392291, LR: 0.0003 +[2026-03-02 16:16:50] (step=0036034) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.0502837018196045, LR: 0.0003 +[2026-03-02 16:16:58] (step=0036035) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 7.0504793582469185, LR: 0.0003 +[2026-03-02 16:17:06] (step=0036036) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 7.050675014674232, LR: 0.0003 +[2026-03-02 16:17:14] (step=0036037) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.050870671101546, LR: 0.0003 +[2026-03-02 16:17:22] (step=0036038) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.05106632752886, LR: 0.0003 +[2026-03-02 16:17:29] (step=0036039) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.051261983956173, LR: 0.0003 +[2026-03-02 16:17:37] (step=0036040) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.051457640383487, LR: 0.0003 +[2026-03-02 16:17:45] (step=0036041) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.0516532968108, LR: 0.0003 +[2026-03-02 16:17:53] (step=0036042) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.051848953238114, LR: 0.0003 +[2026-03-02 16:18:01] (step=0036043) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.052044609665428, LR: 0.0003 +[2026-03-02 16:18:09] (step=0036044) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.052240266092741, LR: 0.0003 +[2026-03-02 16:18:17] (step=0036045) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.052435922520055, LR: 0.0003 +[2026-03-02 16:18:25] (step=0036046) Train Loss: 0.4440, Train Steps/Sec: 0.12, Epoch: 7.052631578947368, LR: 0.0003 +[2026-03-02 16:18:32] (step=0036047) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.052827235374682, LR: 0.0003 +[2026-03-02 16:18:40] (step=0036048) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.053022891801995, LR: 0.0003 +[2026-03-02 16:18:48] (step=0036049) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.053218548229309, LR: 0.0003 +[2026-03-02 16:18:56] (step=0036050) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 7.053414204656623, LR: 0.0003 +[2026-03-02 16:19:04] (step=0036051) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.053609861083936, LR: 0.0003 +[2026-03-02 16:19:12] (step=0036052) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 7.05380551751125, LR: 0.0003 +[2026-03-02 16:19:20] (step=0036053) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.0540011739385635, LR: 0.0003 +[2026-03-02 16:19:28] (step=0036054) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.0541968303658775, LR: 0.0003 +[2026-03-02 16:19:35] (step=0036055) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.0543924867931915, LR: 0.0003 +[2026-03-02 16:19:43] (step=0036056) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.054588143220505, LR: 0.0003 +[2026-03-02 16:19:51] (step=0036057) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.054783799647819, LR: 0.0003 +[2026-03-02 16:19:59] (step=0036058) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 7.054979456075132, LR: 0.0003 +[2026-03-02 16:20:07] (step=0036059) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.055175112502446, LR: 0.0003 +[2026-03-02 16:20:15] (step=0036060) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.05537076892976, LR: 0.0003 +[2026-03-02 16:20:23] (step=0036061) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.055566425357073, LR: 0.0003 +[2026-03-02 16:20:31] (step=0036062) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.055762081784387, LR: 0.0003 +[2026-03-02 16:20:38] (step=0036063) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.0559577382117, LR: 0.0003 +[2026-03-02 16:20:46] (step=0036064) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 7.056153394639014, LR: 0.0003 +[2026-03-02 16:20:54] (step=0036065) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 7.056349051066327, LR: 0.0003 +[2026-03-02 16:21:02] (step=0036066) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.056544707493641, LR: 0.0003 +[2026-03-02 16:21:10] (step=0036067) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 7.056740363920955, LR: 0.0003 +[2026-03-02 16:21:18] (step=0036068) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.056936020348268, LR: 0.0003 +[2026-03-02 16:21:26] (step=0036069) Train Loss: 0.4434, Train Steps/Sec: 0.12, Epoch: 7.057131676775582, LR: 0.0003 +[2026-03-02 16:21:34] (step=0036070) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.057327333202895, LR: 0.0003 +[2026-03-02 16:21:41] (step=0036071) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.057522989630209, LR: 0.0003 +[2026-03-02 16:21:49] (step=0036072) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.057718646057523, LR: 0.0003 +[2026-03-02 16:21:57] (step=0036073) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.0579143024848365, LR: 0.0003 +[2026-03-02 16:22:05] (step=0036074) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.0581099589121505, LR: 0.0003 +[2026-03-02 16:22:13] (step=0036075) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.058305615339464, LR: 0.0003 +[2026-03-02 16:22:21] (step=0036076) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 7.058501271766778, LR: 0.0003 +[2026-03-02 16:22:29] (step=0036077) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.058696928194091, LR: 0.0003 +[2026-03-02 16:22:36] (step=0036078) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 7.058892584621405, LR: 0.0003 +[2026-03-02 16:22:44] (step=0036079) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.059088241048719, LR: 0.0003 +[2026-03-02 16:22:52] (step=0036080) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.059283897476032, LR: 0.0003 +[2026-03-02 16:23:00] (step=0036081) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.059479553903346, LR: 0.0003 +[2026-03-02 16:23:08] (step=0036082) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.059675210330659, LR: 0.0003 +[2026-03-02 16:23:16] (step=0036083) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.059870866757973, LR: 0.0003 +[2026-03-02 16:23:24] (step=0036084) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 7.060066523185287, LR: 0.0003 +[2026-03-02 16:23:31] (step=0036085) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.0602621796126, LR: 0.0003 +[2026-03-02 16:23:39] (step=0036086) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.060457836039914, LR: 0.0003 +[2026-03-02 16:23:47] (step=0036087) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.060653492467227, LR: 0.0003 +[2026-03-02 16:23:55] (step=0036088) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.060849148894541, LR: 0.0003 +[2026-03-02 16:24:03] (step=0036089) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.061044805321854, LR: 0.0003 +[2026-03-02 16:24:11] (step=0036090) Train Loss: 0.4515, Train Steps/Sec: 0.12, Epoch: 7.061240461749168, LR: 0.0003 +[2026-03-02 16:24:19] (step=0036091) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.061436118176482, LR: 0.0003 +[2026-03-02 16:24:27] (step=0036092) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 7.0616317746037955, LR: 0.0003 +[2026-03-02 16:24:35] (step=0036093) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.0618274310311095, LR: 0.0003 +[2026-03-02 16:24:42] (step=0036094) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.062023087458423, LR: 0.0003 +[2026-03-02 16:24:50] (step=0036095) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 7.062218743885737, LR: 0.0003 +[2026-03-02 16:24:58] (step=0036096) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.062414400313051, LR: 0.0003 +[2026-03-02 16:25:06] (step=0036097) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 7.062610056740364, LR: 0.0003 +[2026-03-02 16:25:14] (step=0036098) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 7.062805713167678, LR: 0.0003 +[2026-03-02 16:25:22] (step=0036099) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.063001369594991, LR: 0.0003 +[2026-03-02 16:25:30] (step=0036100) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.063197026022305, LR: 0.0003 +[2026-03-02 16:25:37] (step=0036101) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.063392682449618, LR: 0.0003 +[2026-03-02 16:25:45] (step=0036102) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.063588338876932, LR: 0.0003 +[2026-03-02 16:25:53] (step=0036103) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 7.063783995304246, LR: 0.0003 +[2026-03-02 16:26:01] (step=0036104) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.063979651731559, LR: 0.0003 +[2026-03-02 16:26:09] (step=0036105) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.064175308158873, LR: 0.0003 +[2026-03-02 16:26:17] (step=0036106) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.064370964586186, LR: 0.0003 +[2026-03-02 16:26:25] (step=0036107) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 7.0645666210135, LR: 0.0003 +[2026-03-02 16:26:33] (step=0036108) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 7.064762277440814, LR: 0.0003 +[2026-03-02 16:26:40] (step=0036109) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.064957933868127, LR: 0.0003 +[2026-03-02 16:26:48] (step=0036110) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.065153590295441, LR: 0.0003 +[2026-03-02 16:26:56] (step=0036111) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.0653492467227546, LR: 0.0003 +[2026-03-02 16:27:04] (step=0036112) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.065544903150069, LR: 0.0003 +[2026-03-02 16:27:12] (step=0036113) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.065740559577382, LR: 0.0003 +[2026-03-02 16:27:20] (step=0036114) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 7.065936216004696, LR: 0.0003 +[2026-03-02 16:27:28] (step=0036115) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 7.06613187243201, LR: 0.0003 +[2026-03-02 16:27:35] (step=0036116) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.066327528859323, LR: 0.0003 +[2026-03-02 16:27:43] (step=0036117) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.066523185286637, LR: 0.0003 +[2026-03-02 16:27:51] (step=0036118) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.06671884171395, LR: 0.0003 +[2026-03-02 16:27:59] (step=0036119) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.066914498141264, LR: 0.0003 +[2026-03-02 16:28:07] (step=0036120) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.067110154568578, LR: 0.0003 +[2026-03-02 16:28:15] (step=0036121) Train Loss: 0.4396, Train Steps/Sec: 0.12, Epoch: 7.067305810995891, LR: 0.0003 +[2026-03-02 16:28:23] (step=0036122) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.067501467423205, LR: 0.0003 +[2026-03-02 16:28:31] (step=0036123) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.067697123850518, LR: 0.0003 +[2026-03-02 16:28:38] (step=0036124) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.067892780277832, LR: 0.0003 +[2026-03-02 16:28:46] (step=0036125) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.068088436705146, LR: 0.0003 +[2026-03-02 16:28:54] (step=0036126) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.068284093132459, LR: 0.0003 +[2026-03-02 16:29:02] (step=0036127) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 7.068479749559773, LR: 0.0003 +[2026-03-02 16:29:10] (step=0036128) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.068675405987086, LR: 0.0003 +[2026-03-02 16:29:18] (step=0036129) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.0688710624144, LR: 0.0003 +[2026-03-02 16:29:26] (step=0036130) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.069066718841714, LR: 0.0003 +[2026-03-02 16:29:34] (step=0036131) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.069262375269028, LR: 0.0003 +[2026-03-02 16:29:41] (step=0036132) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.069458031696342, LR: 0.0003 +[2026-03-02 16:29:49] (step=0036133) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.069653688123655, LR: 0.0003 +[2026-03-02 16:29:57] (step=0036134) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 7.069849344550969, LR: 0.0003 +[2026-03-02 16:30:05] (step=0036135) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.070045000978282, LR: 0.0003 +[2026-03-02 16:30:13] (step=0036136) Train Loss: 0.4479, Train Steps/Sec: 0.12, Epoch: 7.070240657405596, LR: 0.0003 +[2026-03-02 16:30:21] (step=0036137) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.07043631383291, LR: 0.0003 +[2026-03-02 16:30:29] (step=0036138) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.070631970260223, LR: 0.0003 +[2026-03-02 16:30:37] (step=0036139) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.070827626687537, LR: 0.0003 +[2026-03-02 16:30:44] (step=0036140) Train Loss: 0.4687, Train Steps/Sec: 0.13, Epoch: 7.07102328311485, LR: 0.0003 +[2026-03-02 16:30:52] (step=0036141) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.071218939542164, LR: 0.0003 +[2026-03-02 16:31:00] (step=0036142) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.071414595969477, LR: 0.0003 +[2026-03-02 16:31:08] (step=0036143) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.071610252396791, LR: 0.0003 +[2026-03-02 16:31:16] (step=0036144) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 7.071805908824105, LR: 0.0003 +[2026-03-02 16:31:24] (step=0036145) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.072001565251418, LR: 0.0003 +[2026-03-02 16:31:32] (step=0036146) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.072197221678732, LR: 0.0003 +[2026-03-02 16:31:39] (step=0036147) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.072392878106045, LR: 0.0003 +[2026-03-02 16:31:47] (step=0036148) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.0725885345333595, LR: 0.0003 +[2026-03-02 16:31:55] (step=0036149) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.0727841909606735, LR: 0.0003 +[2026-03-02 16:32:03] (step=0036150) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 7.072979847387987, LR: 0.0003 +[2026-03-02 16:32:11] (step=0036151) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.073175503815301, LR: 0.0003 +[2026-03-02 16:32:19] (step=0036152) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.073371160242614, LR: 0.0003 +[2026-03-02 16:32:27] (step=0036153) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.073566816669928, LR: 0.0003 +[2026-03-02 16:32:34] (step=0036154) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.073762473097241, LR: 0.0003 +[2026-03-02 16:32:42] (step=0036155) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 7.073958129524555, LR: 0.0003 +[2026-03-02 16:32:50] (step=0036156) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.074153785951869, LR: 0.0003 +[2026-03-02 16:32:58] (step=0036157) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.074349442379182, LR: 0.0003 +[2026-03-02 16:33:06] (step=0036158) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.074545098806496, LR: 0.0003 +[2026-03-02 16:33:14] (step=0036159) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.074740755233809, LR: 0.0003 +[2026-03-02 16:33:22] (step=0036160) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 7.074936411661123, LR: 0.0003 +[2026-03-02 16:33:29] (step=0036161) Train Loss: 0.4655, Train Steps/Sec: 0.13, Epoch: 7.075132068088437, LR: 0.0003 +[2026-03-02 16:33:37] (step=0036162) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.07532772451575, LR: 0.0003 +[2026-03-02 16:33:45] (step=0036163) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.075523380943064, LR: 0.0003 +[2026-03-02 16:33:53] (step=0036164) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.075719037370377, LR: 0.0003 +[2026-03-02 16:34:01] (step=0036165) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 7.075914693797691, LR: 0.0003 +[2026-03-02 16:34:09] (step=0036166) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.0761103502250045, LR: 0.0003 +[2026-03-02 16:34:17] (step=0036167) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 7.0763060066523185, LR: 0.0003 +[2026-03-02 16:34:25] (step=0036168) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.0765016630796325, LR: 0.0003 +[2026-03-02 16:34:33] (step=0036169) Train Loss: 0.4364, Train Steps/Sec: 0.12, Epoch: 7.076697319506946, LR: 0.0003 +[2026-03-02 16:34:40] (step=0036170) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.07689297593426, LR: 0.0003 +[2026-03-02 16:34:48] (step=0036171) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.077088632361573, LR: 0.0003 +[2026-03-02 16:34:56] (step=0036172) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.077284288788887, LR: 0.0003 +[2026-03-02 16:35:04] (step=0036173) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.077479945216201, LR: 0.0003 +[2026-03-02 16:35:12] (step=0036174) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.077675601643514, LR: 0.0003 +[2026-03-02 16:35:20] (step=0036175) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.077871258070828, LR: 0.0003 +[2026-03-02 16:35:28] (step=0036176) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.078066914498141, LR: 0.0003 +[2026-03-02 16:35:35] (step=0036177) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.078262570925455, LR: 0.0003 +[2026-03-02 16:35:43] (step=0036178) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.078458227352769, LR: 0.0003 +[2026-03-02 16:35:51] (step=0036179) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.078653883780082, LR: 0.0003 +[2026-03-02 16:35:59] (step=0036180) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.078849540207396, LR: 0.0003 +[2026-03-02 16:36:07] (step=0036181) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.079045196634709, LR: 0.0003 +[2026-03-02 16:36:15] (step=0036182) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.079240853062023, LR: 0.0003 +[2026-03-02 16:36:23] (step=0036183) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 7.079436509489336, LR: 0.0003 +[2026-03-02 16:36:31] (step=0036184) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.07963216591665, LR: 0.0003 +[2026-03-02 16:36:38] (step=0036185) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.079827822343964, LR: 0.0003 +[2026-03-02 16:36:46] (step=0036186) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 7.0800234787712775, LR: 0.0003 +[2026-03-02 16:36:54] (step=0036187) Train Loss: 0.4526, Train Steps/Sec: 0.12, Epoch: 7.0802191351985915, LR: 0.0003 +[2026-03-02 16:37:02] (step=0036188) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.080414791625905, LR: 0.0003 +[2026-03-02 16:37:10] (step=0036189) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 7.080610448053219, LR: 0.0003 +[2026-03-02 16:37:18] (step=0036190) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.080806104480533, LR: 0.0003 +[2026-03-02 16:37:26] (step=0036191) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.081001760907846, LR: 0.0003 +[2026-03-02 16:37:34] (step=0036192) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.08119741733516, LR: 0.0003 +[2026-03-02 16:37:41] (step=0036193) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.081393073762473, LR: 0.0003 +[2026-03-02 16:37:49] (step=0036194) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.081588730189787, LR: 0.0003 +[2026-03-02 16:37:57] (step=0036195) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.0817843866171, LR: 0.0003 +[2026-03-02 16:38:05] (step=0036196) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.081980043044414, LR: 0.0003 +[2026-03-02 16:38:13] (step=0036197) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.082175699471728, LR: 0.0003 +[2026-03-02 16:38:21] (step=0036198) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.082371355899041, LR: 0.0003 +[2026-03-02 16:38:29] (step=0036199) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.082567012326355, LR: 0.0003 +[2026-03-02 16:38:37] (step=0036200) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.082762668753668, LR: 0.0003 +[2026-03-02 16:38:44] (step=0036201) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.082958325180982, LR: 0.0003 +[2026-03-02 16:38:52] (step=0036202) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.083153981608296, LR: 0.0003 +[2026-03-02 16:39:00] (step=0036203) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.083349638035609, LR: 0.0003 +[2026-03-02 16:39:08] (step=0036204) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.083545294462923, LR: 0.0003 +[2026-03-02 16:39:16] (step=0036205) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.0837409508902365, LR: 0.0003 +[2026-03-02 16:39:24] (step=0036206) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 7.0839366073175505, LR: 0.0003 +[2026-03-02 16:39:32] (step=0036207) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.084132263744864, LR: 0.0003 +[2026-03-02 16:39:39] (step=0036208) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.084327920172178, LR: 0.0003 +[2026-03-02 16:39:47] (step=0036209) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.084523576599492, LR: 0.0003 +[2026-03-02 16:39:55] (step=0036210) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.084719233026805, LR: 0.0003 +[2026-03-02 16:40:03] (step=0036211) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.084914889454119, LR: 0.0003 +[2026-03-02 16:40:11] (step=0036212) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 7.085110545881432, LR: 0.0003 +[2026-03-02 16:40:19] (step=0036213) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 7.085306202308746, LR: 0.0003 +[2026-03-02 16:40:27] (step=0036214) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.08550185873606, LR: 0.0003 +[2026-03-02 16:40:34] (step=0036215) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.085697515163373, LR: 0.0003 +[2026-03-02 16:40:42] (step=0036216) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.085893171590687, LR: 0.0003 +[2026-03-02 16:40:50] (step=0036217) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 7.086088828018, LR: 0.0003 +[2026-03-02 16:40:58] (step=0036218) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.086284484445314, LR: 0.0003 +[2026-03-02 16:41:06] (step=0036219) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.086480140872627, LR: 0.0003 +[2026-03-02 16:41:14] (step=0036220) Train Loss: 0.4450, Train Steps/Sec: 0.12, Epoch: 7.086675797299941, LR: 0.0003 +[2026-03-02 16:41:22] (step=0036221) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.086871453727255, LR: 0.0003 +[2026-03-02 16:41:30] (step=0036222) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 7.087067110154568, LR: 0.0003 +[2026-03-02 16:41:38] (step=0036223) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.087262766581882, LR: 0.0003 +[2026-03-02 16:41:45] (step=0036224) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 7.0874584230091955, LR: 0.0003 +[2026-03-02 16:41:53] (step=0036225) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.0876540794365095, LR: 0.0003 +[2026-03-02 16:42:01] (step=0036226) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.0878497358638235, LR: 0.0003 +[2026-03-02 16:42:09] (step=0036227) Train Loss: 0.4240, Train Steps/Sec: 0.13, Epoch: 7.088045392291137, LR: 0.0003 +[2026-03-02 16:42:17] (step=0036228) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.088241048718451, LR: 0.0003 +[2026-03-02 16:42:25] (step=0036229) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.088436705145764, LR: 0.0003 +[2026-03-02 16:42:33] (step=0036230) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.088632361573078, LR: 0.0003 +[2026-03-02 16:42:40] (step=0036231) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.088828018000392, LR: 0.0003 +[2026-03-02 16:42:48] (step=0036232) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 7.089023674427705, LR: 0.0003 +[2026-03-02 16:42:56] (step=0036233) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.089219330855019, LR: 0.0003 +[2026-03-02 16:43:04] (step=0036234) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.089414987282332, LR: 0.0003 +[2026-03-02 16:43:12] (step=0036235) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.089610643709646, LR: 0.0003 +[2026-03-02 16:43:20] (step=0036236) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 7.089806300136959, LR: 0.0003 +[2026-03-02 16:43:28] (step=0036237) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.090001956564273, LR: 0.0003 +[2026-03-02 16:43:36] (step=0036238) Train Loss: 0.4532, Train Steps/Sec: 0.12, Epoch: 7.090197612991587, LR: 0.0003 +[2026-03-02 16:43:44] (step=0036239) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.0903932694189, LR: 0.0003 +[2026-03-02 16:43:51] (step=0036240) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 7.090588925846214, LR: 0.0003 +[2026-03-02 16:43:59] (step=0036241) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 7.090784582273527, LR: 0.0003 +[2026-03-02 16:44:07] (step=0036242) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.090980238700841, LR: 0.0003 +[2026-03-02 16:44:15] (step=0036243) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 7.091175895128155, LR: 0.0003 +[2026-03-02 16:44:23] (step=0036244) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.0913715515554685, LR: 0.0003 +[2026-03-02 16:44:31] (step=0036245) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.0915672079827825, LR: 0.0003 +[2026-03-02 16:44:39] (step=0036246) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.091762864410096, LR: 0.0003 +[2026-03-02 16:44:46] (step=0036247) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.09195852083741, LR: 0.0003 +[2026-03-02 16:44:54] (step=0036248) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.092154177264723, LR: 0.0003 +[2026-03-02 16:45:02] (step=0036249) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.092349833692037, LR: 0.0003 +[2026-03-02 16:45:10] (step=0036250) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.092545490119351, LR: 0.0003 +[2026-03-02 16:45:18] (step=0036251) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.092741146546664, LR: 0.0003 +[2026-03-02 16:45:26] (step=0036252) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 7.092936802973978, LR: 0.0003 +[2026-03-02 16:45:34] (step=0036253) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 7.093132459401291, LR: 0.0003 +[2026-03-02 16:45:42] (step=0036254) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 7.093328115828605, LR: 0.0003 +[2026-03-02 16:45:49] (step=0036255) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.093523772255919, LR: 0.0003 +[2026-03-02 16:45:57] (step=0036256) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 7.093719428683232, LR: 0.0003 +[2026-03-02 16:46:05] (step=0036257) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.093915085110546, LR: 0.0003 +[2026-03-02 16:46:13] (step=0036258) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.094110741537859, LR: 0.0003 +[2026-03-02 16:46:21] (step=0036259) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.094306397965173, LR: 0.0003 +[2026-03-02 16:46:29] (step=0036260) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.094502054392486, LR: 0.0003 +[2026-03-02 16:46:36] (step=0036261) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.0946977108198, LR: 0.0003 +[2026-03-02 16:46:44] (step=0036262) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.094893367247114, LR: 0.0003 +[2026-03-02 16:46:52] (step=0036263) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.0950890236744275, LR: 0.0003 +[2026-03-02 16:47:00] (step=0036264) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 7.0952846801017415, LR: 0.0003 +[2026-03-02 16:47:08] (step=0036265) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 7.095480336529055, LR: 0.0003 +[2026-03-02 16:47:16] (step=0036266) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.095675992956369, LR: 0.0003 +[2026-03-02 16:47:24] (step=0036267) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.095871649383683, LR: 0.0003 +[2026-03-02 16:47:31] (step=0036268) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.096067305810996, LR: 0.0003 +[2026-03-02 16:47:40] (step=0036269) Train Loss: 0.4509, Train Steps/Sec: 0.12, Epoch: 7.09626296223831, LR: 0.0003 +[2026-03-02 16:47:47] (step=0036270) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.096458618665623, LR: 0.0003 +[2026-03-02 16:47:55] (step=0036271) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 7.096654275092937, LR: 0.0003 +[2026-03-02 16:48:03] (step=0036272) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.09684993152025, LR: 0.0003 +[2026-03-02 16:48:11] (step=0036273) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.097045587947564, LR: 0.0003 +[2026-03-02 16:48:19] (step=0036274) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 7.097241244374878, LR: 0.0003 +[2026-03-02 16:48:27] (step=0036275) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.097436900802191, LR: 0.0003 +[2026-03-02 16:48:35] (step=0036276) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.097632557229505, LR: 0.0003 +[2026-03-02 16:48:42] (step=0036277) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.097828213656818, LR: 0.0003 +[2026-03-02 16:48:50] (step=0036278) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 7.098023870084132, LR: 0.0003 +[2026-03-02 16:48:58] (step=0036279) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.098219526511446, LR: 0.0003 +[2026-03-02 16:49:06] (step=0036280) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.098415182938759, LR: 0.0003 +[2026-03-02 16:49:14] (step=0036281) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 7.098610839366073, LR: 0.0003 +[2026-03-02 16:49:22] (step=0036282) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.0988064957933865, LR: 0.0003 +[2026-03-02 16:49:30] (step=0036283) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.0990021522207005, LR: 0.0003 +[2026-03-02 16:49:37] (step=0036284) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.0991978086480145, LR: 0.0003 +[2026-03-02 16:49:45] (step=0036285) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.099393465075328, LR: 0.0003 +[2026-03-02 16:49:53] (step=0036286) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.099589121502642, LR: 0.0003 +[2026-03-02 16:50:01] (step=0036287) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 7.099784777929955, LR: 0.0003 +[2026-03-02 16:50:09] (step=0036288) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 7.099980434357269, LR: 0.0003 +[2026-03-02 16:50:17] (step=0036289) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.100176090784582, LR: 0.0003 +[2026-03-02 16:50:25] (step=0036290) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 7.100371747211896, LR: 0.0003 +[2026-03-02 16:50:33] (step=0036291) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.10056740363921, LR: 0.0003 +[2026-03-02 16:50:40] (step=0036292) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 7.100763060066523, LR: 0.0003 +[2026-03-02 16:50:48] (step=0036293) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 7.100958716493837, LR: 0.0003 +[2026-03-02 16:50:56] (step=0036294) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.10115437292115, LR: 0.0003 +[2026-03-02 16:51:04] (step=0036295) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.101350029348464, LR: 0.0003 +[2026-03-02 16:51:12] (step=0036296) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.101545685775778, LR: 0.0003 +[2026-03-02 16:51:20] (step=0036297) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.101741342203091, LR: 0.0003 +[2026-03-02 16:51:28] (step=0036298) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 7.101936998630405, LR: 0.0003 +[2026-03-02 16:51:36] (step=0036299) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 7.102132655057718, LR: 0.0003 +[2026-03-02 16:51:43] (step=0036300) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.102328311485032, LR: 0.0003 +[2026-03-02 16:51:51] (step=0036301) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.1025239679123455, LR: 0.0003 +[2026-03-02 16:51:59] (step=0036302) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 7.1027196243396595, LR: 0.0003 +[2026-03-02 16:52:07] (step=0036303) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.1029152807669735, LR: 0.0003 +[2026-03-02 16:52:15] (step=0036304) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.103110937194287, LR: 0.0003 +[2026-03-02 16:52:23] (step=0036305) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.103306593621601, LR: 0.0003 +[2026-03-02 16:52:31] (step=0036306) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.103502250048914, LR: 0.0003 +[2026-03-02 16:52:38] (step=0036307) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.103697906476228, LR: 0.0003 +[2026-03-02 16:52:46] (step=0036308) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.103893562903542, LR: 0.0003 +[2026-03-02 16:52:54] (step=0036309) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.104089219330855, LR: 0.0003 +[2026-03-02 16:53:02] (step=0036310) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 7.104284875758169, LR: 0.0003 +[2026-03-02 16:53:10] (step=0036311) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.104480532185482, LR: 0.0003 +[2026-03-02 16:53:18] (step=0036312) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.104676188612796, LR: 0.0003 +[2026-03-02 16:53:26] (step=0036313) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.104871845040109, LR: 0.0003 +[2026-03-02 16:53:34] (step=0036314) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.105067501467423, LR: 0.0003 +[2026-03-02 16:53:42] (step=0036315) Train Loss: 0.4414, Train Steps/Sec: 0.12, Epoch: 7.105263157894737, LR: 0.0003 +[2026-03-02 16:53:50] (step=0036316) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 7.10545881432205, LR: 0.0003 +[2026-03-02 16:53:57] (step=0036317) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.105654470749364, LR: 0.0003 +[2026-03-02 16:54:05] (step=0036318) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.105850127176677, LR: 0.0003 +[2026-03-02 16:54:13] (step=0036319) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 7.106045783603991, LR: 0.0003 +[2026-03-02 16:54:21] (step=0036320) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.106241440031305, LR: 0.0003 +[2026-03-02 16:54:29] (step=0036321) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.1064370964586185, LR: 0.0003 +[2026-03-02 16:54:37] (step=0036322) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.1066327528859325, LR: 0.0003 +[2026-03-02 16:54:45] (step=0036323) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.106828409313246, LR: 0.0003 +[2026-03-02 16:54:52] (step=0036324) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.10702406574056, LR: 0.0003 +[2026-03-02 16:55:00] (step=0036325) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.107219722167873, LR: 0.0003 +[2026-03-02 16:55:08] (step=0036326) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 7.107415378595187, LR: 0.0003 +[2026-03-02 16:55:16] (step=0036327) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 7.107611035022501, LR: 0.0003 +[2026-03-02 16:55:24] (step=0036328) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.107806691449814, LR: 0.0003 +[2026-03-02 16:55:32] (step=0036329) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.108002347877128, LR: 0.0003 +[2026-03-02 16:55:40] (step=0036330) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.108198004304441, LR: 0.0003 +[2026-03-02 16:55:48] (step=0036331) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.108393660731755, LR: 0.0003 +[2026-03-02 16:55:55] (step=0036332) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.108589317159069, LR: 0.0003 +[2026-03-02 16:56:03] (step=0036333) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.108784973586382, LR: 0.0003 +[2026-03-02 16:56:11] (step=0036334) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 7.108980630013696, LR: 0.0003 +[2026-03-02 16:56:19] (step=0036335) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 7.109176286441009, LR: 0.0003 +[2026-03-02 16:56:27] (step=0036336) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.109371942868323, LR: 0.0003 +[2026-03-02 16:56:35] (step=0036337) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.109567599295636, LR: 0.0003 +[2026-03-02 16:56:43] (step=0036338) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.10976325572295, LR: 0.0003 +[2026-03-02 16:56:51] (step=0036339) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.109958912150264, LR: 0.0003 +[2026-03-02 16:56:58] (step=0036340) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 7.1101545685775775, LR: 0.0003 +[2026-03-02 16:57:06] (step=0036341) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 7.1103502250048916, LR: 0.0003 +[2026-03-02 16:57:14] (step=0036342) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.110545881432205, LR: 0.0003 +[2026-03-02 16:57:22] (step=0036343) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.110741537859519, LR: 0.0003 +[2026-03-02 16:57:30] (step=0036344) Train Loss: 0.4611, Train Steps/Sec: 0.12, Epoch: 7.110937194286833, LR: 0.0003 +[2026-03-02 16:57:38] (step=0036345) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.111132850714146, LR: 0.0003 +[2026-03-02 16:57:46] (step=0036346) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.11132850714146, LR: 0.0003 +[2026-03-02 16:57:54] (step=0036347) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.111524163568773, LR: 0.0003 +[2026-03-02 16:58:02] (step=0036348) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.111719819996087, LR: 0.0003 +[2026-03-02 16:58:09] (step=0036349) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.111915476423401, LR: 0.0003 +[2026-03-02 16:58:17] (step=0036350) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 7.112111132850714, LR: 0.0003 +[2026-03-02 16:58:25] (step=0036351) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.112306789278028, LR: 0.0003 +[2026-03-02 16:58:33] (step=0036352) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.112502445705341, LR: 0.0003 +[2026-03-02 16:58:41] (step=0036353) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.112698102132655, LR: 0.0003 +[2026-03-02 16:58:49] (step=0036354) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 7.112893758559968, LR: 0.0003 +[2026-03-02 16:58:57] (step=0036355) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 7.113089414987282, LR: 0.0003 +[2026-03-02 16:59:05] (step=0036356) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.113285071414596, LR: 0.0003 +[2026-03-02 16:59:12] (step=0036357) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.113480727841909, LR: 0.0003 +[2026-03-02 16:59:20] (step=0036358) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.113676384269223, LR: 0.0003 +[2026-03-02 16:59:28] (step=0036359) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 7.113872040696537, LR: 0.0003 +[2026-03-02 16:59:36] (step=0036360) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.114067697123851, LR: 0.0003 +[2026-03-02 16:59:44] (step=0036361) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 7.114263353551165, LR: 0.0003 +[2026-03-02 16:59:52] (step=0036362) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.114459009978478, LR: 0.0003 +[2026-03-02 17:00:00] (step=0036363) Train Loss: 0.4507, Train Steps/Sec: 0.12, Epoch: 7.114654666405792, LR: 0.0003 +[2026-03-02 17:00:08] (step=0036364) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 7.114850322833105, LR: 0.0003 +[2026-03-02 17:00:16] (step=0036365) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.115045979260419, LR: 0.0003 +[2026-03-02 17:00:23] (step=0036366) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 7.115241635687732, LR: 0.0003 +[2026-03-02 17:00:31] (step=0036367) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.115437292115046, LR: 0.0003 +[2026-03-02 17:00:39] (step=0036368) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.11563294854236, LR: 0.0003 +[2026-03-02 17:00:47] (step=0036369) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.115828604969673, LR: 0.0003 +[2026-03-02 17:00:55] (step=0036370) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.116024261396987, LR: 0.0003 +[2026-03-02 17:01:03] (step=0036371) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.1162199178243, LR: 0.0003 +[2026-03-02 17:01:11] (step=0036372) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.116415574251614, LR: 0.0003 +[2026-03-02 17:01:18] (step=0036373) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.116611230678928, LR: 0.0003 +[2026-03-02 17:01:26] (step=0036374) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.116806887106241, LR: 0.0003 +[2026-03-02 17:01:34] (step=0036375) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.117002543533555, LR: 0.0003 +[2026-03-02 17:01:42] (step=0036376) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.117198199960868, LR: 0.0003 +[2026-03-02 17:01:50] (step=0036377) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.1173938563881824, LR: 0.0003 +[2026-03-02 17:01:58] (step=0036378) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.117589512815496, LR: 0.0003 +[2026-03-02 17:02:06] (step=0036379) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.11778516924281, LR: 0.0003 +[2026-03-02 17:02:13] (step=0036380) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.117980825670124, LR: 0.0003 +[2026-03-02 17:02:21] (step=0036381) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.118176482097437, LR: 0.0003 +[2026-03-02 17:02:29] (step=0036382) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.118372138524751, LR: 0.0003 +[2026-03-02 17:02:37] (step=0036383) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.118567794952064, LR: 0.0003 +[2026-03-02 17:02:45] (step=0036384) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.118763451379378, LR: 0.0003 +[2026-03-02 17:02:53] (step=0036385) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.118959107806692, LR: 0.0003 +[2026-03-02 17:03:01] (step=0036386) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 7.119154764234005, LR: 0.0003 +[2026-03-02 17:03:08] (step=0036387) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.119350420661319, LR: 0.0003 +[2026-03-02 17:03:16] (step=0036388) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.119546077088632, LR: 0.0003 +[2026-03-02 17:03:24] (step=0036389) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 7.119741733515946, LR: 0.0003 +[2026-03-02 17:03:32] (step=0036390) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.119937389943259, LR: 0.0003 +[2026-03-02 17:03:40] (step=0036391) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.120133046370573, LR: 0.0003 +[2026-03-02 17:03:48] (step=0036392) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.120328702797887, LR: 0.0003 +[2026-03-02 17:03:56] (step=0036393) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.1205243592252, LR: 0.0003 +[2026-03-02 17:04:04] (step=0036394) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.120720015652514, LR: 0.0003 +[2026-03-02 17:04:12] (step=0036395) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 7.1209156720798275, LR: 0.0003 +[2026-03-02 17:04:19] (step=0036396) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.1211113285071415, LR: 0.0003 +[2026-03-02 17:04:27] (step=0036397) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.1213069849344555, LR: 0.0003 +[2026-03-02 17:04:35] (step=0036398) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 7.121502641361769, LR: 0.0003 +[2026-03-02 17:04:43] (step=0036399) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.121698297789083, LR: 0.0003 +[2026-03-02 17:04:51] (step=0036400) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.121893954216396, LR: 0.0003 +[2026-03-02 17:04:59] (step=0036401) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.12208961064371, LR: 0.0003 +[2026-03-02 17:05:07] (step=0036402) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.122285267071024, LR: 0.0003 +[2026-03-02 17:05:14] (step=0036403) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.122480923498337, LR: 0.0003 +[2026-03-02 17:05:22] (step=0036404) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.122676579925651, LR: 0.0003 +[2026-03-02 17:05:30] (step=0036405) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.122872236352964, LR: 0.0003 +[2026-03-02 17:05:38] (step=0036406) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.123067892780278, LR: 0.0003 +[2026-03-02 17:05:46] (step=0036407) Train Loss: 0.4504, Train Steps/Sec: 0.12, Epoch: 7.123263549207591, LR: 0.0003 +[2026-03-02 17:05:54] (step=0036408) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.123459205634905, LR: 0.0003 +[2026-03-02 17:06:02] (step=0036409) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.123654862062219, LR: 0.0003 +[2026-03-02 17:06:10] (step=0036410) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 7.123850518489532, LR: 0.0003 +[2026-03-02 17:06:18] (step=0036411) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 7.124046174916846, LR: 0.0003 +[2026-03-02 17:06:25] (step=0036412) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 7.124241831344159, LR: 0.0003 +[2026-03-02 17:06:33] (step=0036413) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.124437487771473, LR: 0.0003 +[2026-03-02 17:06:41] (step=0036414) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.124633144198787, LR: 0.0003 +[2026-03-02 17:06:49] (step=0036415) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.1248288006261005, LR: 0.0003 +[2026-03-02 17:06:57] (step=0036416) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.1250244570534145, LR: 0.0003 +[2026-03-02 17:07:05] (step=0036417) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.125220113480728, LR: 0.0003 +[2026-03-02 17:07:13] (step=0036418) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 7.125415769908042, LR: 0.0003 +[2026-03-02 17:07:21] (step=0036419) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.125611426335355, LR: 0.0003 +[2026-03-02 17:07:28] (step=0036420) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 7.125807082762669, LR: 0.0003 +[2026-03-02 17:07:36] (step=0036421) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.126002739189983, LR: 0.0003 +[2026-03-02 17:07:44] (step=0036422) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 7.126198395617296, LR: 0.0003 +[2026-03-02 17:07:52] (step=0036423) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.12639405204461, LR: 0.0003 +[2026-03-02 17:08:00] (step=0036424) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 7.126589708471923, LR: 0.0003 +[2026-03-02 17:08:08] (step=0036425) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.126785364899237, LR: 0.0003 +[2026-03-02 17:08:16] (step=0036426) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.126981021326551, LR: 0.0003 +[2026-03-02 17:08:23] (step=0036427) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.127176677753864, LR: 0.0003 +[2026-03-02 17:08:31] (step=0036428) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.127372334181178, LR: 0.0003 +[2026-03-02 17:08:39] (step=0036429) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.127567990608491, LR: 0.0003 +[2026-03-02 17:08:47] (step=0036430) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.127763647035805, LR: 0.0003 +[2026-03-02 17:08:55] (step=0036431) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 7.127959303463118, LR: 0.0003 +[2026-03-02 17:09:03] (step=0036432) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.128154959890432, LR: 0.0003 +[2026-03-02 17:09:11] (step=0036433) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.128350616317746, LR: 0.0003 +[2026-03-02 17:09:19] (step=0036434) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.1285462727450595, LR: 0.0003 +[2026-03-02 17:09:26] (step=0036435) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.1287419291723735, LR: 0.0003 +[2026-03-02 17:09:34] (step=0036436) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.128937585599687, LR: 0.0003 +[2026-03-02 17:09:42] (step=0036437) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.129133242027001, LR: 0.0003 +[2026-03-02 17:09:50] (step=0036438) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.129328898454315, LR: 0.0003 +[2026-03-02 17:09:58] (step=0036439) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.129524554881628, LR: 0.0003 +[2026-03-02 17:10:06] (step=0036440) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.129720211308942, LR: 0.0003 +[2026-03-02 17:10:14] (step=0036441) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.129915867736255, LR: 0.0003 +[2026-03-02 17:10:22] (step=0036442) Train Loss: 0.4488, Train Steps/Sec: 0.12, Epoch: 7.130111524163569, LR: 0.0003 +[2026-03-02 17:10:30] (step=0036443) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.130307180590882, LR: 0.0003 +[2026-03-02 17:10:37] (step=0036444) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.130502837018196, LR: 0.0003 +[2026-03-02 17:10:45] (step=0036445) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.13069849344551, LR: 0.0003 +[2026-03-02 17:10:53] (step=0036446) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.130894149872823, LR: 0.0003 +[2026-03-02 17:11:01] (step=0036447) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.131089806300137, LR: 0.0003 +[2026-03-02 17:11:09] (step=0036448) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 7.13128546272745, LR: 0.0003 +[2026-03-02 17:11:17] (step=0036449) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 7.131481119154764, LR: 0.0003 +[2026-03-02 17:11:25] (step=0036450) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.131676775582078, LR: 0.0003 +[2026-03-02 17:11:32] (step=0036451) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.131872432009391, LR: 0.0003 +[2026-03-02 17:11:41] (step=0036452) Train Loss: 0.4468, Train Steps/Sec: 0.12, Epoch: 7.132068088436705, LR: 0.0003 +[2026-03-02 17:11:49] (step=0036453) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 7.1322637448640185, LR: 0.0003 +[2026-03-02 17:11:56] (step=0036454) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.1324594012913325, LR: 0.0003 +[2026-03-02 17:12:04] (step=0036455) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.1326550577186465, LR: 0.0003 +[2026-03-02 17:12:12] (step=0036456) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.13285071414596, LR: 0.0003 +[2026-03-02 17:12:20] (step=0036457) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 7.133046370573274, LR: 0.0003 +[2026-03-02 17:12:28] (step=0036458) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.133242027000587, LR: 0.0003 +[2026-03-02 17:12:36] (step=0036459) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.133437683427901, LR: 0.0003 +[2026-03-02 17:12:44] (step=0036460) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.133633339855214, LR: 0.0003 +[2026-03-02 17:12:52] (step=0036461) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 7.133828996282528, LR: 0.0003 +[2026-03-02 17:12:59] (step=0036462) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.134024652709842, LR: 0.0003 +[2026-03-02 17:13:07] (step=0036463) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.134220309137155, LR: 0.0003 +[2026-03-02 17:13:15] (step=0036464) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.134415965564469, LR: 0.0003 +[2026-03-02 17:13:23] (step=0036465) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 7.134611621991782, LR: 0.0003 +[2026-03-02 17:13:31] (step=0036466) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 7.134807278419096, LR: 0.0003 +[2026-03-02 17:13:39] (step=0036467) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 7.13500293484641, LR: 0.0003 +[2026-03-02 17:13:47] (step=0036468) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.135198591273723, LR: 0.0003 +[2026-03-02 17:13:55] (step=0036469) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.135394247701037, LR: 0.0003 +[2026-03-02 17:14:02] (step=0036470) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.13558990412835, LR: 0.0003 +[2026-03-02 17:14:10] (step=0036471) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.135785560555664, LR: 0.0003 +[2026-03-02 17:14:18] (step=0036472) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 7.1359812169829775, LR: 0.0003 +[2026-03-02 17:14:26] (step=0036473) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.1361768734102915, LR: 0.0003 +[2026-03-02 17:14:34] (step=0036474) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 7.1363725298376055, LR: 0.0003 +[2026-03-02 17:14:42] (step=0036475) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.136568186264919, LR: 0.0003 +[2026-03-02 17:14:50] (step=0036476) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.136763842692233, LR: 0.0003 +[2026-03-02 17:14:57] (step=0036477) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.136959499119546, LR: 0.0003 +[2026-03-02 17:15:05] (step=0036478) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.13715515554686, LR: 0.0003 +[2026-03-02 17:15:13] (step=0036479) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 7.137350811974174, LR: 0.0003 +[2026-03-02 17:15:21] (step=0036480) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.137546468401487, LR: 0.0003 +[2026-03-02 17:15:29] (step=0036481) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 7.137742124828801, LR: 0.0003 +[2026-03-02 17:15:37] (step=0036482) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.137937781256114, LR: 0.0003 +[2026-03-02 17:15:45] (step=0036483) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.138133437683428, LR: 0.0003 +[2026-03-02 17:15:53] (step=0036484) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.138329094110741, LR: 0.0003 +[2026-03-02 17:16:00] (step=0036485) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.138524750538055, LR: 0.0003 +[2026-03-02 17:16:08] (step=0036486) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.138720406965369, LR: 0.0003 +[2026-03-02 17:16:16] (step=0036487) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.138916063392682, LR: 0.0003 +[2026-03-02 17:16:24] (step=0036488) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 7.139111719819996, LR: 0.0003 +[2026-03-02 17:16:32] (step=0036489) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 7.139307376247309, LR: 0.0003 +[2026-03-02 17:16:40] (step=0036490) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.139503032674623, LR: 0.0003 +[2026-03-02 17:16:48] (step=0036491) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.139698689101937, LR: 0.0003 +[2026-03-02 17:16:56] (step=0036492) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.1398943455292505, LR: 0.0003 +[2026-03-02 17:17:03] (step=0036493) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.1400900019565645, LR: 0.0003 +[2026-03-02 17:17:11] (step=0036494) Train Loss: 0.4552, Train Steps/Sec: 0.12, Epoch: 7.140285658383878, LR: 0.0003 +[2026-03-02 17:17:19] (step=0036495) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.140481314811192, LR: 0.0003 +[2026-03-02 17:17:27] (step=0036496) Train Loss: 0.4478, Train Steps/Sec: 0.12, Epoch: 7.140676971238505, LR: 0.0003 +[2026-03-02 17:17:35] (step=0036497) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.140872627665819, LR: 0.0003 +[2026-03-02 17:17:43] (step=0036498) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.141068284093133, LR: 0.0003 +[2026-03-02 17:17:51] (step=0036499) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.141263940520446, LR: 0.0003 +[2026-03-02 17:17:59] (step=0036500) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.14145959694776, LR: 0.0003 +[2026-03-02 17:17:59] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0036500/ +[2026-03-02 17:18:07] (step=0036501) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.141655253375073, LR: 0.0003 +[2026-03-02 17:18:15] (step=0036502) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.141850909802387, LR: 0.0003 +[2026-03-02 17:18:22] (step=0036503) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 7.142046566229701, LR: 0.0003 +[2026-03-02 17:18:30] (step=0036504) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.142242222657014, LR: 0.0003 +[2026-03-02 17:18:38] (step=0036505) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.142437879084328, LR: 0.0003 +[2026-03-02 17:18:46] (step=0036506) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 7.142633535511641, LR: 0.0003 +[2026-03-02 17:18:54] (step=0036507) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 7.142829191938955, LR: 0.0003 +[2026-03-02 17:19:02] (step=0036508) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.143024848366269, LR: 0.0003 +[2026-03-02 17:19:10] (step=0036509) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.143220504793582, LR: 0.0003 +[2026-03-02 17:19:17] (step=0036510) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.143416161220896, LR: 0.0003 +[2026-03-02 17:19:25] (step=0036511) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.1436118176482095, LR: 0.0003 +[2026-03-02 17:19:33] (step=0036512) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 7.1438074740755235, LR: 0.0003 +[2026-03-02 17:19:41] (step=0036513) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.144003130502837, LR: 0.0003 +[2026-03-02 17:19:49] (step=0036514) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.144198786930151, LR: 0.0003 +[2026-03-02 17:19:57] (step=0036515) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.144394443357465, LR: 0.0003 +[2026-03-02 17:20:05] (step=0036516) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.144590099784778, LR: 0.0003 +[2026-03-02 17:20:13] (step=0036517) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.144785756212092, LR: 0.0003 +[2026-03-02 17:20:20] (step=0036518) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.144981412639405, LR: 0.0003 +[2026-03-02 17:20:28] (step=0036519) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.145177069066719, LR: 0.0003 +[2026-03-02 17:20:36] (step=0036520) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.145372725494033, LR: 0.0003 +[2026-03-02 17:20:44] (step=0036521) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.145568381921346, LR: 0.0003 +[2026-03-02 17:20:52] (step=0036522) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.14576403834866, LR: 0.0003 +[2026-03-02 17:21:00] (step=0036523) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.145959694775973, LR: 0.0003 +[2026-03-02 17:21:08] (step=0036524) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.146155351203287, LR: 0.0003 +[2026-03-02 17:21:15] (step=0036525) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.1463510076306, LR: 0.0003 +[2026-03-02 17:21:23] (step=0036526) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.146546664057914, LR: 0.0003 +[2026-03-02 17:21:31] (step=0036527) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.146742320485228, LR: 0.0003 +[2026-03-02 17:21:39] (step=0036528) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.146937976912541, LR: 0.0003 +[2026-03-02 17:21:47] (step=0036529) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 7.147133633339855, LR: 0.0003 +[2026-03-02 17:21:55] (step=0036530) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.1473292897671685, LR: 0.0003 +[2026-03-02 17:22:03] (step=0036531) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 7.1475249461944825, LR: 0.0003 +[2026-03-02 17:22:11] (step=0036532) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.1477206026217965, LR: 0.0003 +[2026-03-02 17:22:18] (step=0036533) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.14791625904911, LR: 0.0003 +[2026-03-02 17:22:26] (step=0036534) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 7.148111915476424, LR: 0.0003 +[2026-03-02 17:22:34] (step=0036535) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.148307571903737, LR: 0.0003 +[2026-03-02 17:22:42] (step=0036536) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.148503228331051, LR: 0.0003 +[2026-03-02 17:22:50] (step=0036537) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.148698884758364, LR: 0.0003 +[2026-03-02 17:22:58] (step=0036538) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.148894541185678, LR: 0.0003 +[2026-03-02 17:23:06] (step=0036539) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 7.149090197612992, LR: 0.0003 +[2026-03-02 17:23:13] (step=0036540) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.149285854040305, LR: 0.0003 +[2026-03-02 17:23:21] (step=0036541) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.149481510467619, LR: 0.0003 +[2026-03-02 17:23:29] (step=0036542) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 7.149677166894932, LR: 0.0003 +[2026-03-02 17:23:37] (step=0036543) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.149872823322246, LR: 0.0003 +[2026-03-02 17:23:45] (step=0036544) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.15006847974956, LR: 0.0003 +[2026-03-02 17:23:53] (step=0036545) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.150264136176873, LR: 0.0003 +[2026-03-02 17:24:01] (step=0036546) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 7.150459792604187, LR: 0.0003 +[2026-03-02 17:24:09] (step=0036547) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 7.1506554490315, LR: 0.0003 +[2026-03-02 17:24:17] (step=0036548) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.150851105458814, LR: 0.0003 +[2026-03-02 17:24:25] (step=0036549) Train Loss: 0.4442, Train Steps/Sec: 0.12, Epoch: 7.1510467618861275, LR: 0.0003 +[2026-03-02 17:24:32] (step=0036550) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 7.1512424183134415, LR: 0.0003 +[2026-03-02 17:24:40] (step=0036551) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.1514380747407555, LR: 0.0003 +[2026-03-02 17:24:48] (step=0036552) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.151633731168069, LR: 0.0003 +[2026-03-02 17:24:56] (step=0036553) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.151829387595383, LR: 0.0003 +[2026-03-02 17:25:04] (step=0036554) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 7.152025044022696, LR: 0.0003 +[2026-03-02 17:25:12] (step=0036555) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.15222070045001, LR: 0.0003 +[2026-03-02 17:25:20] (step=0036556) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.152416356877324, LR: 0.0003 +[2026-03-02 17:25:28] (step=0036557) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.152612013304637, LR: 0.0003 +[2026-03-02 17:25:35] (step=0036558) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.152807669731951, LR: 0.0003 +[2026-03-02 17:25:43] (step=0036559) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 7.153003326159264, LR: 0.0003 +[2026-03-02 17:25:51] (step=0036560) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.153198982586578, LR: 0.0003 +[2026-03-02 17:25:59] (step=0036561) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.153394639013891, LR: 0.0003 +[2026-03-02 17:26:07] (step=0036562) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.153590295441205, LR: 0.0003 +[2026-03-02 17:26:15] (step=0036563) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.153785951868519, LR: 0.0003 +[2026-03-02 17:26:23] (step=0036564) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.153981608295832, LR: 0.0003 +[2026-03-02 17:26:30] (step=0036565) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.154177264723146, LR: 0.0003 +[2026-03-02 17:26:38] (step=0036566) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.154372921150459, LR: 0.0003 +[2026-03-02 17:26:46] (step=0036567) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.154568577577773, LR: 0.0003 +[2026-03-02 17:26:54] (step=0036568) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.154764234005087, LR: 0.0003 +[2026-03-02 17:27:02] (step=0036569) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.1549598904324005, LR: 0.0003 +[2026-03-02 17:27:10] (step=0036570) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 7.1551555468597146, LR: 0.0003 +[2026-03-02 17:27:18] (step=0036571) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 7.155351203287028, LR: 0.0003 +[2026-03-02 17:27:25] (step=0036572) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 7.155546859714342, LR: 0.0003 +[2026-03-02 17:27:33] (step=0036573) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.155742516141656, LR: 0.0003 +[2026-03-02 17:27:41] (step=0036574) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.155938172568969, LR: 0.0003 +[2026-03-02 17:27:49] (step=0036575) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.156133828996283, LR: 0.0003 +[2026-03-02 17:27:57] (step=0036576) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.156329485423596, LR: 0.0003 +[2026-03-02 17:28:05] (step=0036577) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.15652514185091, LR: 0.0003 +[2026-03-02 17:28:13] (step=0036578) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.156720798278223, LR: 0.0003 +[2026-03-02 17:28:21] (step=0036579) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.156916454705537, LR: 0.0003 +[2026-03-02 17:28:28] (step=0036580) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.157112111132851, LR: 0.0003 +[2026-03-02 17:28:36] (step=0036581) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.157307767560164, LR: 0.0003 +[2026-03-02 17:28:44] (step=0036582) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 7.157503423987478, LR: 0.0003 +[2026-03-02 17:28:52] (step=0036583) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.157699080414791, LR: 0.0003 +[2026-03-02 17:29:00] (step=0036584) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.157894736842105, LR: 0.0003 +[2026-03-02 17:29:08] (step=0036585) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.158090393269419, LR: 0.0003 +[2026-03-02 17:29:16] (step=0036586) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 7.158286049696732, LR: 0.0003 +[2026-03-02 17:29:23] (step=0036587) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.158481706124046, LR: 0.0003 +[2026-03-02 17:29:31] (step=0036588) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 7.15867736255136, LR: 0.0003 +[2026-03-02 17:29:39] (step=0036589) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.158873018978674, LR: 0.0003 +[2026-03-02 17:29:47] (step=0036590) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 7.159068675405987, LR: 0.0003 +[2026-03-02 17:29:55] (step=0036591) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.159264331833301, LR: 0.0003 +[2026-03-02 17:30:03] (step=0036592) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.159459988260615, LR: 0.0003 +[2026-03-02 17:30:11] (step=0036593) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.159655644687928, LR: 0.0003 +[2026-03-02 17:30:18] (step=0036594) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.159851301115242, LR: 0.0003 +[2026-03-02 17:30:26] (step=0036595) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.160046957542555, LR: 0.0003 +[2026-03-02 17:30:34] (step=0036596) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.160242613969869, LR: 0.0003 +[2026-03-02 17:30:42] (step=0036597) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.160438270397183, LR: 0.0003 +[2026-03-02 17:30:50] (step=0036598) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.160633926824496, LR: 0.0003 +[2026-03-02 17:30:58] (step=0036599) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.16082958325181, LR: 0.0003 +[2026-03-02 17:31:06] (step=0036600) Train Loss: 0.4391, Train Steps/Sec: 0.12, Epoch: 7.161025239679123, LR: 0.0003 +[2026-03-02 17:31:14] (step=0036601) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 7.161220896106437, LR: 0.0003 +[2026-03-02 17:31:22] (step=0036602) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 7.16141655253375, LR: 0.0003 +[2026-03-02 17:31:29] (step=0036603) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.161612208961064, LR: 0.0003 +[2026-03-02 17:31:37] (step=0036604) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.161807865388378, LR: 0.0003 +[2026-03-02 17:31:45] (step=0036605) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.162003521815691, LR: 0.0003 +[2026-03-02 17:31:53] (step=0036606) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 7.1621991782430054, LR: 0.0003 +[2026-03-02 17:32:01] (step=0036607) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 7.162394834670319, LR: 0.0003 +[2026-03-02 17:32:09] (step=0036608) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 7.162590491097633, LR: 0.0003 +[2026-03-02 17:32:17] (step=0036609) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.162786147524947, LR: 0.0003 +[2026-03-02 17:32:24] (step=0036610) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.16298180395226, LR: 0.0003 +[2026-03-02 17:32:32] (step=0036611) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.163177460379574, LR: 0.0003 +[2026-03-02 17:32:40] (step=0036612) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.163373116806887, LR: 0.0003 +[2026-03-02 17:32:48] (step=0036613) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.163568773234201, LR: 0.0003 +[2026-03-02 17:32:56] (step=0036614) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.163764429661514, LR: 0.0003 +[2026-03-02 17:33:04] (step=0036615) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.163960086088828, LR: 0.0003 +[2026-03-02 17:33:12] (step=0036616) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 7.164155742516142, LR: 0.0003 +[2026-03-02 17:33:19] (step=0036617) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.164351398943455, LR: 0.0003 +[2026-03-02 17:33:27] (step=0036618) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.164547055370769, LR: 0.0003 +[2026-03-02 17:33:35] (step=0036619) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.164742711798082, LR: 0.0003 +[2026-03-02 17:33:43] (step=0036620) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 7.164938368225396, LR: 0.0003 +[2026-03-02 17:33:51] (step=0036621) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.16513402465271, LR: 0.0003 +[2026-03-02 17:33:59] (step=0036622) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.165329681080023, LR: 0.0003 +[2026-03-02 17:34:07] (step=0036623) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.165525337507337, LR: 0.0003 +[2026-03-02 17:34:15] (step=0036624) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.1657209939346505, LR: 0.0003 +[2026-03-02 17:34:22] (step=0036625) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.1659166503619645, LR: 0.0003 +[2026-03-02 17:34:30] (step=0036626) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 7.1661123067892785, LR: 0.0003 +[2026-03-02 17:34:38] (step=0036627) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.166307963216592, LR: 0.0003 +[2026-03-02 17:34:46] (step=0036628) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 7.166503619643906, LR: 0.0003 +[2026-03-02 17:34:54] (step=0036629) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.166699276071219, LR: 0.0003 +[2026-03-02 17:35:02] (step=0036630) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 7.166894932498533, LR: 0.0003 +[2026-03-02 17:35:09] (step=0036631) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 7.167090588925846, LR: 0.0003 +[2026-03-02 17:35:17] (step=0036632) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 7.16728624535316, LR: 0.0003 +[2026-03-02 17:35:25] (step=0036633) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 7.167481901780474, LR: 0.0003 +[2026-03-02 17:35:33] (step=0036634) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.167677558207787, LR: 0.0003 +[2026-03-02 17:35:41] (step=0036635) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 7.167873214635101, LR: 0.0003 +[2026-03-02 17:35:49] (step=0036636) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.168068871062414, LR: 0.0003 +[2026-03-02 17:35:57] (step=0036637) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.168264527489728, LR: 0.0003 +[2026-03-02 17:36:04] (step=0036638) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.168460183917042, LR: 0.0003 +[2026-03-02 17:36:12] (step=0036639) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.168655840344355, LR: 0.0003 +[2026-03-02 17:36:20] (step=0036640) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 7.168851496771669, LR: 0.0003 +[2026-03-02 17:36:28] (step=0036641) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.169047153198982, LR: 0.0003 +[2026-03-02 17:36:36] (step=0036642) Train Loss: 0.4343, Train Steps/Sec: 0.12, Epoch: 7.169242809626296, LR: 0.0003 +[2026-03-02 17:36:44] (step=0036643) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.1694384660536095, LR: 0.0003 +[2026-03-02 17:36:52] (step=0036644) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.1696341224809235, LR: 0.0003 +[2026-03-02 17:37:00] (step=0036645) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.1698297789082375, LR: 0.0003 +[2026-03-02 17:37:08] (step=0036646) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.170025435335551, LR: 0.0003 +[2026-03-02 17:37:15] (step=0036647) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 7.170221091762865, LR: 0.0003 +[2026-03-02 17:37:23] (step=0036648) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 7.170416748190178, LR: 0.0003 +[2026-03-02 17:37:31] (step=0036649) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 7.170612404617492, LR: 0.0003 +[2026-03-02 17:37:39] (step=0036650) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 7.170808061044806, LR: 0.0003 +[2026-03-02 17:37:47] (step=0036651) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 7.171003717472119, LR: 0.0003 +[2026-03-02 17:37:55] (step=0036652) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 7.171199373899433, LR: 0.0003 +[2026-03-02 17:38:03] (step=0036653) Train Loss: 0.4476, Train Steps/Sec: 0.12, Epoch: 7.171395030326746, LR: 0.0003 +[2026-03-02 17:38:11] (step=0036654) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.17159068675406, LR: 0.0003 +[2026-03-02 17:38:18] (step=0036655) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.171786343181373, LR: 0.0003 +[2026-03-02 17:38:26] (step=0036656) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.171981999608687, LR: 0.0003 +[2026-03-02 17:38:34] (step=0036657) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.172177656036001, LR: 0.0003 +[2026-03-02 17:38:42] (step=0036658) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.172373312463314, LR: 0.0003 +[2026-03-02 17:38:50] (step=0036659) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.172568968890628, LR: 0.0003 +[2026-03-02 17:38:58] (step=0036660) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.172764625317941, LR: 0.0003 +[2026-03-02 17:39:06] (step=0036661) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.172960281745255, LR: 0.0003 +[2026-03-02 17:39:14] (step=0036662) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 7.173155938172569, LR: 0.0003 +[2026-03-02 17:39:21] (step=0036663) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.1733515945998825, LR: 0.0003 +[2026-03-02 17:39:29] (step=0036664) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.1735472510271965, LR: 0.0003 +[2026-03-02 17:39:37] (step=0036665) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.17374290745451, LR: 0.0003 +[2026-03-02 17:39:45] (step=0036666) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.173938563881824, LR: 0.0003 +[2026-03-02 17:39:53] (step=0036667) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.174134220309137, LR: 0.0003 +[2026-03-02 17:40:01] (step=0036668) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 7.174329876736451, LR: 0.0003 +[2026-03-02 17:40:09] (step=0036669) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.174525533163765, LR: 0.0003 +[2026-03-02 17:40:16] (step=0036670) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.174721189591078, LR: 0.0003 +[2026-03-02 17:40:24] (step=0036671) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.174916846018392, LR: 0.0003 +[2026-03-02 17:40:32] (step=0036672) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 7.175112502445705, LR: 0.0003 +[2026-03-02 17:40:40] (step=0036673) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.175308158873019, LR: 0.0003 +[2026-03-02 17:40:48] (step=0036674) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.175503815300333, LR: 0.0003 +[2026-03-02 17:40:56] (step=0036675) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 7.175699471727646, LR: 0.0003 +[2026-03-02 17:41:04] (step=0036676) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.17589512815496, LR: 0.0003 +[2026-03-02 17:41:11] (step=0036677) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.176090784582273, LR: 0.0003 +[2026-03-02 17:41:19] (step=0036678) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.176286441009587, LR: 0.0003 +[2026-03-02 17:41:27] (step=0036679) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 7.176482097436901, LR: 0.0003 +[2026-03-02 17:41:35] (step=0036680) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.176677753864214, LR: 0.0003 +[2026-03-02 17:41:43] (step=0036681) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.176873410291528, LR: 0.0003 +[2026-03-02 17:41:51] (step=0036682) Train Loss: 0.4268, Train Steps/Sec: 0.13, Epoch: 7.1770690667188415, LR: 0.0003 +[2026-03-02 17:41:59] (step=0036683) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.1772647231461555, LR: 0.0003 +[2026-03-02 17:42:06] (step=0036684) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 7.177460379573469, LR: 0.0003 +[2026-03-02 17:42:14] (step=0036685) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.177656036000783, LR: 0.0003 +[2026-03-02 17:42:22] (step=0036686) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.177851692428097, LR: 0.0003 +[2026-03-02 17:42:30] (step=0036687) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.17804734885541, LR: 0.0003 +[2026-03-02 17:42:38] (step=0036688) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.178243005282724, LR: 0.0003 +[2026-03-02 17:42:46] (step=0036689) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.178438661710037, LR: 0.0003 +[2026-03-02 17:42:54] (step=0036690) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.178634318137351, LR: 0.0003 +[2026-03-02 17:43:02] (step=0036691) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.178829974564665, LR: 0.0003 +[2026-03-02 17:43:09] (step=0036692) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.179025630991978, LR: 0.0003 +[2026-03-02 17:43:17] (step=0036693) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.179221287419292, LR: 0.0003 +[2026-03-02 17:43:25] (step=0036694) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.179416943846605, LR: 0.0003 +[2026-03-02 17:43:33] (step=0036695) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.179612600273919, LR: 0.0003 +[2026-03-02 17:43:41] (step=0036696) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 7.179808256701232, LR: 0.0003 +[2026-03-02 17:43:49] (step=0036697) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.180003913128546, LR: 0.0003 +[2026-03-02 17:43:57] (step=0036698) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.18019956955586, LR: 0.0003 +[2026-03-02 17:44:05] (step=0036699) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.180395225983173, LR: 0.0003 +[2026-03-02 17:44:12] (step=0036700) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.180590882410487, LR: 0.0003 +[2026-03-02 17:44:21] (step=0036701) Train Loss: 0.4406, Train Steps/Sec: 0.12, Epoch: 7.1807865388378005, LR: 0.0003 +[2026-03-02 17:44:28] (step=0036702) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.1809821952651145, LR: 0.0003 +[2026-03-02 17:44:36] (step=0036703) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 7.1811778516924285, LR: 0.0003 +[2026-03-02 17:44:44] (step=0036704) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.181373508119742, LR: 0.0003 +[2026-03-02 17:44:52] (step=0036705) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.181569164547056, LR: 0.0003 +[2026-03-02 17:45:00] (step=0036706) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 7.181764820974369, LR: 0.0003 +[2026-03-02 17:45:08] (step=0036707) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.181960477401683, LR: 0.0003 +[2026-03-02 17:45:16] (step=0036708) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.182156133828996, LR: 0.0003 +[2026-03-02 17:45:24] (step=0036709) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.18235179025631, LR: 0.0003 +[2026-03-02 17:45:31] (step=0036710) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.182547446683624, LR: 0.0003 +[2026-03-02 17:45:39] (step=0036711) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.182743103110937, LR: 0.0003 +[2026-03-02 17:45:47] (step=0036712) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 7.182938759538251, LR: 0.0003 +[2026-03-02 17:45:55] (step=0036713) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 7.183134415965564, LR: 0.0003 +[2026-03-02 17:46:03] (step=0036714) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.183330072392878, LR: 0.0003 +[2026-03-02 17:46:11] (step=0036715) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.183525728820192, LR: 0.0003 +[2026-03-02 17:46:19] (step=0036716) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.183721385247505, LR: 0.0003 +[2026-03-02 17:46:27] (step=0036717) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.183917041674819, LR: 0.0003 +[2026-03-02 17:46:34] (step=0036718) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.184112698102132, LR: 0.0003 +[2026-03-02 17:46:42] (step=0036719) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 7.184308354529446, LR: 0.0003 +[2026-03-02 17:46:50] (step=0036720) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.1845040109567595, LR: 0.0003 +[2026-03-02 17:46:58] (step=0036721) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.1846996673840735, LR: 0.0003 +[2026-03-02 17:47:06] (step=0036722) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.1848953238113875, LR: 0.0003 +[2026-03-02 17:47:14] (step=0036723) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.185090980238701, LR: 0.0003 +[2026-03-02 17:47:22] (step=0036724) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 7.185286636666015, LR: 0.0003 +[2026-03-02 17:47:30] (step=0036725) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.185482293093328, LR: 0.0003 +[2026-03-02 17:47:37] (step=0036726) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.185677949520642, LR: 0.0003 +[2026-03-02 17:47:45] (step=0036727) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 7.185873605947956, LR: 0.0003 +[2026-03-02 17:47:53] (step=0036728) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.186069262375269, LR: 0.0003 +[2026-03-02 17:48:01] (step=0036729) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.186264918802583, LR: 0.0003 +[2026-03-02 17:48:09] (step=0036730) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.186460575229896, LR: 0.0003 +[2026-03-02 17:48:17] (step=0036731) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.18665623165721, LR: 0.0003 +[2026-03-02 17:48:25] (step=0036732) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.186851888084524, LR: 0.0003 +[2026-03-02 17:48:33] (step=0036733) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.187047544511837, LR: 0.0003 +[2026-03-02 17:48:40] (step=0036734) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.187243200939151, LR: 0.0003 +[2026-03-02 17:48:48] (step=0036735) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.187438857366464, LR: 0.0003 +[2026-03-02 17:48:56] (step=0036736) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.187634513793778, LR: 0.0003 +[2026-03-02 17:49:04] (step=0036737) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.187830170221091, LR: 0.0003 +[2026-03-02 17:49:12] (step=0036738) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 7.188025826648405, LR: 0.0003 +[2026-03-02 17:49:20] (step=0036739) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.188221483075719, LR: 0.0003 +[2026-03-02 17:49:28] (step=0036740) Train Loss: 0.4591, Train Steps/Sec: 0.12, Epoch: 7.1884171395030325, LR: 0.0003 +[2026-03-02 17:49:36] (step=0036741) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.1886127959303465, LR: 0.0003 +[2026-03-02 17:49:43] (step=0036742) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 7.18880845235766, LR: 0.0003 +[2026-03-02 17:49:51] (step=0036743) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.189004108784974, LR: 0.0003 +[2026-03-02 17:49:59] (step=0036744) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.189199765212288, LR: 0.0003 +[2026-03-02 17:50:07] (step=0036745) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.189395421639601, LR: 0.0003 +[2026-03-02 17:50:15] (step=0036746) Train Loss: 0.4403, Train Steps/Sec: 0.12, Epoch: 7.189591078066915, LR: 0.0003 +[2026-03-02 17:50:23] (step=0036747) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.189786734494228, LR: 0.0003 +[2026-03-02 17:50:31] (step=0036748) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 7.189982390921542, LR: 0.0003 +[2026-03-02 17:50:39] (step=0036749) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.190178047348855, LR: 0.0003 +[2026-03-02 17:50:47] (step=0036750) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.190373703776169, LR: 0.0003 +[2026-03-02 17:50:55] (step=0036751) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 7.190569360203483, LR: 0.0003 +[2026-03-02 17:51:02] (step=0036752) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.190765016630796, LR: 0.0003 +[2026-03-02 17:51:10] (step=0036753) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 7.19096067305811, LR: 0.0003 +[2026-03-02 17:51:18] (step=0036754) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 7.191156329485423, LR: 0.0003 +[2026-03-02 17:51:26] (step=0036755) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.191351985912737, LR: 0.0003 +[2026-03-02 17:51:34] (step=0036756) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.191547642340051, LR: 0.0003 +[2026-03-02 17:51:42] (step=0036757) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 7.191743298767364, LR: 0.0003 +[2026-03-02 17:51:50] (step=0036758) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 7.191938955194678, LR: 0.0003 +[2026-03-02 17:51:57] (step=0036759) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.1921346116219915, LR: 0.0003 +[2026-03-02 17:52:05] (step=0036760) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.1923302680493055, LR: 0.0003 +[2026-03-02 17:52:13] (step=0036761) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.192525924476619, LR: 0.0003 +[2026-03-02 17:52:21] (step=0036762) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.192721580903933, LR: 0.0003 +[2026-03-02 17:52:29] (step=0036763) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 7.192917237331247, LR: 0.0003 +[2026-03-02 17:52:37] (step=0036764) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 7.19311289375856, LR: 0.0003 +[2026-03-02 17:52:45] (step=0036765) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.193308550185874, LR: 0.0003 +[2026-03-02 17:52:53] (step=0036766) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.193504206613187, LR: 0.0003 +[2026-03-02 17:53:00] (step=0036767) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.193699863040501, LR: 0.0003 +[2026-03-02 17:53:08] (step=0036768) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.193895519467815, LR: 0.0003 +[2026-03-02 17:53:16] (step=0036769) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.194091175895128, LR: 0.0003 +[2026-03-02 17:53:24] (step=0036770) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 7.194286832322442, LR: 0.0003 +[2026-03-02 17:53:32] (step=0036771) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 7.194482488749755, LR: 0.0003 +[2026-03-02 17:53:40] (step=0036772) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.194678145177069, LR: 0.0003 +[2026-03-02 17:53:48] (step=0036773) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.194873801604382, LR: 0.0003 +[2026-03-02 17:53:56] (step=0036774) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.195069458031696, LR: 0.0003 +[2026-03-02 17:54:03] (step=0036775) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.19526511445901, LR: 0.0003 +[2026-03-02 17:54:11] (step=0036776) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.195460770886323, LR: 0.0003 +[2026-03-02 17:54:19] (step=0036777) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.195656427313637, LR: 0.0003 +[2026-03-02 17:54:27] (step=0036778) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.1958520837409505, LR: 0.0003 +[2026-03-02 17:54:35] (step=0036779) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 7.1960477401682645, LR: 0.0003 +[2026-03-02 17:54:43] (step=0036780) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.1962433965955785, LR: 0.0003 +[2026-03-02 17:54:51] (step=0036781) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.196439053022892, LR: 0.0003 +[2026-03-02 17:54:58] (step=0036782) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.196634709450206, LR: 0.0003 +[2026-03-02 17:55:06] (step=0036783) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 7.196830365877519, LR: 0.0003 +[2026-03-02 17:55:14] (step=0036784) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 7.197026022304833, LR: 0.0003 +[2026-03-02 17:55:22] (step=0036785) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.197221678732146, LR: 0.0003 +[2026-03-02 17:55:30] (step=0036786) Train Loss: 0.4506, Train Steps/Sec: 0.12, Epoch: 7.19741733515946, LR: 0.0003 +[2026-03-02 17:55:38] (step=0036787) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.197612991586774, LR: 0.0003 +[2026-03-02 17:55:46] (step=0036788) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.197808648014087, LR: 0.0003 +[2026-03-02 17:55:54] (step=0036789) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 7.198004304441401, LR: 0.0003 +[2026-03-02 17:56:02] (step=0036790) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.198199960868714, LR: 0.0003 +[2026-03-02 17:56:10] (step=0036791) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.198395617296028, LR: 0.0003 +[2026-03-02 17:56:17] (step=0036792) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.198591273723342, LR: 0.0003 +[2026-03-02 17:56:25] (step=0036793) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.198786930150655, LR: 0.0003 +[2026-03-02 17:56:33] (step=0036794) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.198982586577969, LR: 0.0003 +[2026-03-02 17:56:41] (step=0036795) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.199178243005282, LR: 0.0003 +[2026-03-02 17:56:49] (step=0036796) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.199373899432596, LR: 0.0003 +[2026-03-02 17:56:57] (step=0036797) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.19956955585991, LR: 0.0003 +[2026-03-02 17:57:05] (step=0036798) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.1997652122872235, LR: 0.0003 +[2026-03-02 17:57:13] (step=0036799) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.1999608687145376, LR: 0.0003 +[2026-03-02 17:57:20] (step=0036800) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.200156525141851, LR: 0.0003 +[2026-03-02 17:57:28] (step=0036801) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.200352181569165, LR: 0.0003 +[2026-03-02 17:57:36] (step=0036802) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.200547837996478, LR: 0.0003 +[2026-03-02 17:57:44] (step=0036803) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.200743494423792, LR: 0.0003 +[2026-03-02 17:57:52] (step=0036804) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.200939150851106, LR: 0.0003 +[2026-03-02 17:58:00] (step=0036805) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.201134807278419, LR: 0.0003 +[2026-03-02 17:58:08] (step=0036806) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.201330463705733, LR: 0.0003 +[2026-03-02 17:58:15] (step=0036807) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.201526120133046, LR: 0.0003 +[2026-03-02 17:58:23] (step=0036808) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.20172177656036, LR: 0.0003 +[2026-03-02 17:58:31] (step=0036809) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.201917432987674, LR: 0.0003 +[2026-03-02 17:58:39] (step=0036810) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.202113089414987, LR: 0.0003 +[2026-03-02 17:58:47] (step=0036811) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.202308745842301, LR: 0.0003 +[2026-03-02 17:58:55] (step=0036812) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.202504402269614, LR: 0.0003 +[2026-03-02 17:59:03] (step=0036813) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.202700058696928, LR: 0.0003 +[2026-03-02 17:59:11] (step=0036814) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.202895715124241, LR: 0.0003 +[2026-03-02 17:59:18] (step=0036815) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 7.203091371551555, LR: 0.0003 +[2026-03-02 17:59:26] (step=0036816) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 7.203287027978869, LR: 0.0003 +[2026-03-02 17:59:34] (step=0036817) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.203482684406183, LR: 0.0003 +[2026-03-02 17:59:42] (step=0036818) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.203678340833497, LR: 0.0003 +[2026-03-02 17:59:50] (step=0036819) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.20387399726081, LR: 0.0003 +[2026-03-02 17:59:58] (step=0036820) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.204069653688124, LR: 0.0003 +[2026-03-02 18:00:06] (step=0036821) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.204265310115438, LR: 0.0003 +[2026-03-02 18:00:13] (step=0036822) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.204460966542751, LR: 0.0003 +[2026-03-02 18:00:21] (step=0036823) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.204656622970065, LR: 0.0003 +[2026-03-02 18:00:29] (step=0036824) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.204852279397378, LR: 0.0003 +[2026-03-02 18:00:37] (step=0036825) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.205047935824692, LR: 0.0003 +[2026-03-02 18:00:45] (step=0036826) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 7.205243592252005, LR: 0.0003 +[2026-03-02 18:00:53] (step=0036827) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.205439248679319, LR: 0.0003 +[2026-03-02 18:01:01] (step=0036828) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.205634905106633, LR: 0.0003 +[2026-03-02 18:01:08] (step=0036829) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.205830561533946, LR: 0.0003 +[2026-03-02 18:01:16] (step=0036830) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 7.20602621796126, LR: 0.0003 +[2026-03-02 18:01:24] (step=0036831) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.206221874388573, LR: 0.0003 +[2026-03-02 18:01:32] (step=0036832) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.206417530815887, LR: 0.0003 +[2026-03-02 18:01:40] (step=0036833) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 7.206613187243201, LR: 0.0003 +[2026-03-02 18:01:48] (step=0036834) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.206808843670514, LR: 0.0003 +[2026-03-02 18:01:56] (step=0036835) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.2070045000978284, LR: 0.0003 +[2026-03-02 18:02:04] (step=0036836) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.207200156525142, LR: 0.0003 +[2026-03-02 18:02:11] (step=0036837) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.207395812952456, LR: 0.0003 +[2026-03-02 18:02:19] (step=0036838) Train Loss: 0.4504, Train Steps/Sec: 0.12, Epoch: 7.207591469379769, LR: 0.0003 +[2026-03-02 18:02:27] (step=0036839) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.207787125807083, LR: 0.0003 +[2026-03-02 18:02:35] (step=0036840) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.207982782234397, LR: 0.0003 +[2026-03-02 18:02:43] (step=0036841) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.20817843866171, LR: 0.0003 +[2026-03-02 18:02:51] (step=0036842) Train Loss: 0.4560, Train Steps/Sec: 0.12, Epoch: 7.208374095089024, LR: 0.0003 +[2026-03-02 18:02:59] (step=0036843) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.208569751516337, LR: 0.0003 +[2026-03-02 18:03:07] (step=0036844) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.208765407943651, LR: 0.0003 +[2026-03-02 18:03:15] (step=0036845) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.208961064370965, LR: 0.0003 +[2026-03-02 18:03:23] (step=0036846) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 7.209156720798278, LR: 0.0003 +[2026-03-02 18:03:30] (step=0036847) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.209352377225592, LR: 0.0003 +[2026-03-02 18:03:38] (step=0036848) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.209548033652905, LR: 0.0003 +[2026-03-02 18:03:46] (step=0036849) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.209743690080219, LR: 0.0003 +[2026-03-02 18:03:54] (step=0036850) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.209939346507533, LR: 0.0003 +[2026-03-02 18:04:02] (step=0036851) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.210135002934846, LR: 0.0003 +[2026-03-02 18:04:10] (step=0036852) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.21033065936216, LR: 0.0003 +[2026-03-02 18:04:18] (step=0036853) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.2105263157894735, LR: 0.0003 +[2026-03-02 18:04:26] (step=0036854) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.2107219722167875, LR: 0.0003 +[2026-03-02 18:04:33] (step=0036855) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.210917628644101, LR: 0.0003 +[2026-03-02 18:04:41] (step=0036856) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.211113285071415, LR: 0.0003 +[2026-03-02 18:04:49] (step=0036857) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 7.211308941498729, LR: 0.0003 +[2026-03-02 18:04:57] (step=0036858) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 7.211504597926042, LR: 0.0003 +[2026-03-02 18:05:05] (step=0036859) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 7.211700254353356, LR: 0.0003 +[2026-03-02 18:05:13] (step=0036860) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 7.211895910780669, LR: 0.0003 +[2026-03-02 18:05:21] (step=0036861) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.212091567207983, LR: 0.0003 +[2026-03-02 18:05:28] (step=0036862) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.212287223635297, LR: 0.0003 +[2026-03-02 18:05:36] (step=0036863) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.21248288006261, LR: 0.0003 +[2026-03-02 18:05:44] (step=0036864) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.212678536489924, LR: 0.0003 +[2026-03-02 18:05:52] (step=0036865) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.212874192917237, LR: 0.0003 +[2026-03-02 18:06:00] (step=0036866) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.213069849344551, LR: 0.0003 +[2026-03-02 18:06:08] (step=0036867) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.213265505771864, LR: 0.0003 +[2026-03-02 18:06:16] (step=0036868) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.213461162199178, LR: 0.0003 +[2026-03-02 18:06:24] (step=0036869) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.213656818626492, LR: 0.0003 +[2026-03-02 18:06:32] (step=0036870) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.213852475053805, LR: 0.0003 +[2026-03-02 18:06:39] (step=0036871) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 7.214048131481119, LR: 0.0003 +[2026-03-02 18:06:47] (step=0036872) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.2142437879084325, LR: 0.0003 +[2026-03-02 18:06:55] (step=0036873) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.2144394443357465, LR: 0.0003 +[2026-03-02 18:07:03] (step=0036874) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 7.2146351007630605, LR: 0.0003 +[2026-03-02 18:07:11] (step=0036875) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.214830757190374, LR: 0.0003 +[2026-03-02 18:07:19] (step=0036876) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.215026413617688, LR: 0.0003 +[2026-03-02 18:07:27] (step=0036877) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.215222070045001, LR: 0.0003 +[2026-03-02 18:07:34] (step=0036878) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 7.215417726472315, LR: 0.0003 +[2026-03-02 18:07:42] (step=0036879) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 7.215613382899628, LR: 0.0003 +[2026-03-02 18:07:50] (step=0036880) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 7.215809039326942, LR: 0.0003 +[2026-03-02 18:07:58] (step=0036881) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.216004695754256, LR: 0.0003 +[2026-03-02 18:08:06] (step=0036882) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.216200352181569, LR: 0.0003 +[2026-03-02 18:08:14] (step=0036883) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.216396008608883, LR: 0.0003 +[2026-03-02 18:08:22] (step=0036884) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.216591665036196, LR: 0.0003 +[2026-03-02 18:08:30] (step=0036885) Train Loss: 0.4424, Train Steps/Sec: 0.12, Epoch: 7.21678732146351, LR: 0.0003 +[2026-03-02 18:08:38] (step=0036886) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.216982977890824, LR: 0.0003 +[2026-03-02 18:08:45] (step=0036887) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.217178634318137, LR: 0.0003 +[2026-03-02 18:08:53] (step=0036888) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.217374290745451, LR: 0.0003 +[2026-03-02 18:09:01] (step=0036889) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.217569947172764, LR: 0.0003 +[2026-03-02 18:09:09] (step=0036890) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.217765603600078, LR: 0.0003 +[2026-03-02 18:09:17] (step=0036891) Train Loss: 0.4427, Train Steps/Sec: 0.12, Epoch: 7.2179612600273915, LR: 0.0003 +[2026-03-02 18:09:25] (step=0036892) Train Loss: 0.4251, Train Steps/Sec: 0.13, Epoch: 7.2181569164547055, LR: 0.0003 +[2026-03-02 18:09:33] (step=0036893) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 7.2183525728820195, LR: 0.0003 +[2026-03-02 18:09:41] (step=0036894) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.218548229309333, LR: 0.0003 +[2026-03-02 18:09:49] (step=0036895) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.218743885736647, LR: 0.0003 +[2026-03-02 18:09:56] (step=0036896) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.21893954216396, LR: 0.0003 +[2026-03-02 18:10:04] (step=0036897) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 7.219135198591274, LR: 0.0003 +[2026-03-02 18:10:12] (step=0036898) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.219330855018588, LR: 0.0003 +[2026-03-02 18:10:20] (step=0036899) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.219526511445901, LR: 0.0003 +[2026-03-02 18:10:28] (step=0036900) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.219722167873215, LR: 0.0003 +[2026-03-02 18:10:36] (step=0036901) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.219917824300528, LR: 0.0003 +[2026-03-02 18:10:44] (step=0036902) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.220113480727842, LR: 0.0003 +[2026-03-02 18:10:52] (step=0036903) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.220309137155156, LR: 0.0003 +[2026-03-02 18:10:59] (step=0036904) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 7.220504793582469, LR: 0.0003 +[2026-03-02 18:11:07] (step=0036905) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.220700450009783, LR: 0.0003 +[2026-03-02 18:11:15] (step=0036906) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.220896106437096, LR: 0.0003 +[2026-03-02 18:11:23] (step=0036907) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.22109176286441, LR: 0.0003 +[2026-03-02 18:11:31] (step=0036908) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.221287419291723, LR: 0.0003 +[2026-03-02 18:11:39] (step=0036909) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.221483075719037, LR: 0.0003 +[2026-03-02 18:11:47] (step=0036910) Train Loss: 0.4260, Train Steps/Sec: 0.13, Epoch: 7.221678732146351, LR: 0.0003 +[2026-03-02 18:11:54] (step=0036911) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.2218743885736645, LR: 0.0003 +[2026-03-02 18:12:02] (step=0036912) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 7.2220700450009785, LR: 0.0003 +[2026-03-02 18:12:10] (step=0036913) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 7.222265701428292, LR: 0.0003 +[2026-03-02 18:12:18] (step=0036914) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 7.222461357855606, LR: 0.0003 +[2026-03-02 18:12:26] (step=0036915) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.22265701428292, LR: 0.0003 +[2026-03-02 18:12:34] (step=0036916) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.222852670710233, LR: 0.0003 +[2026-03-02 18:12:42] (step=0036917) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.223048327137547, LR: 0.0003 +[2026-03-02 18:12:50] (step=0036918) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 7.22324398356486, LR: 0.0003 +[2026-03-02 18:12:57] (step=0036919) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 7.223439639992174, LR: 0.0003 +[2026-03-02 18:13:05] (step=0036920) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.223635296419487, LR: 0.0003 +[2026-03-02 18:13:13] (step=0036921) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.223830952846801, LR: 0.0003 +[2026-03-02 18:13:21] (step=0036922) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 7.224026609274115, LR: 0.0003 +[2026-03-02 18:13:29] (step=0036923) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.224222265701428, LR: 0.0003 +[2026-03-02 18:13:37] (step=0036924) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 7.224417922128742, LR: 0.0003 +[2026-03-02 18:13:45] (step=0036925) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.224613578556055, LR: 0.0003 +[2026-03-02 18:13:52] (step=0036926) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.224809234983369, LR: 0.0003 +[2026-03-02 18:14:00] (step=0036927) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.225004891410683, LR: 0.0003 +[2026-03-02 18:14:08] (step=0036928) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.225200547837996, LR: 0.0003 +[2026-03-02 18:14:16] (step=0036929) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.22539620426531, LR: 0.0003 +[2026-03-02 18:14:24] (step=0036930) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.2255918606926235, LR: 0.0003 +[2026-03-02 18:14:32] (step=0036931) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.2257875171199375, LR: 0.0003 +[2026-03-02 18:14:40] (step=0036932) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.225983173547251, LR: 0.0003 +[2026-03-02 18:14:48] (step=0036933) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.226178829974565, LR: 0.0003 +[2026-03-02 18:14:55] (step=0036934) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 7.226374486401879, LR: 0.0003 +[2026-03-02 18:15:03] (step=0036935) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 7.226570142829192, LR: 0.0003 +[2026-03-02 18:15:11] (step=0036936) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.226765799256506, LR: 0.0003 +[2026-03-02 18:15:19] (step=0036937) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 7.226961455683819, LR: 0.0003 +[2026-03-02 18:15:27] (step=0036938) Train Loss: 0.4479, Train Steps/Sec: 0.12, Epoch: 7.227157112111133, LR: 0.0003 +[2026-03-02 18:15:35] (step=0036939) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.227352768538447, LR: 0.0003 +[2026-03-02 18:15:43] (step=0036940) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.22754842496576, LR: 0.0003 +[2026-03-02 18:15:50] (step=0036941) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.227744081393074, LR: 0.0003 +[2026-03-02 18:15:58] (step=0036942) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.227939737820387, LR: 0.0003 +[2026-03-02 18:16:06] (step=0036943) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.228135394247701, LR: 0.0003 +[2026-03-02 18:16:14] (step=0036944) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.228331050675014, LR: 0.0003 +[2026-03-02 18:16:22] (step=0036945) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 7.228526707102328, LR: 0.0003 +[2026-03-02 18:16:30] (step=0036946) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.228722363529642, LR: 0.0003 +[2026-03-02 18:16:38] (step=0036947) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 7.228918019956955, LR: 0.0003 +[2026-03-02 18:16:45] (step=0036948) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.229113676384269, LR: 0.0003 +[2026-03-02 18:16:53] (step=0036949) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.2293093328115825, LR: 0.0003 +[2026-03-02 18:17:01] (step=0036950) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.2295049892388965, LR: 0.0003 +[2026-03-02 18:17:09] (step=0036951) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 7.2297006456662105, LR: 0.0003 +[2026-03-02 18:17:17] (step=0036952) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.229896302093524, LR: 0.0003 +[2026-03-02 18:17:25] (step=0036953) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.230091958520838, LR: 0.0003 +[2026-03-02 18:17:33] (step=0036954) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.230287614948151, LR: 0.0003 +[2026-03-02 18:17:40] (step=0036955) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.230483271375465, LR: 0.0003 +[2026-03-02 18:17:48] (step=0036956) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.230678927802778, LR: 0.0003 +[2026-03-02 18:17:56] (step=0036957) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 7.230874584230092, LR: 0.0003 +[2026-03-02 18:18:04] (step=0036958) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.231070240657406, LR: 0.0003 +[2026-03-02 18:18:12] (step=0036959) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 7.231265897084719, LR: 0.0003 +[2026-03-02 18:18:20] (step=0036960) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 7.231461553512033, LR: 0.0003 +[2026-03-02 18:18:28] (step=0036961) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.231657209939346, LR: 0.0003 +[2026-03-02 18:18:35] (step=0036962) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.23185286636666, LR: 0.0003 +[2026-03-02 18:18:43] (step=0036963) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 7.232048522793974, LR: 0.0003 +[2026-03-02 18:18:51] (step=0036964) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.232244179221287, LR: 0.0003 +[2026-03-02 18:18:59] (step=0036965) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.232439835648601, LR: 0.0003 +[2026-03-02 18:19:07] (step=0036966) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.232635492075914, LR: 0.0003 +[2026-03-02 18:19:15] (step=0036967) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.232831148503228, LR: 0.0003 +[2026-03-02 18:19:23] (step=0036968) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 7.233026804930542, LR: 0.0003 +[2026-03-02 18:19:30] (step=0036969) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 7.2332224613578555, LR: 0.0003 +[2026-03-02 18:19:38] (step=0036970) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.2334181177851695, LR: 0.0003 +[2026-03-02 18:19:46] (step=0036971) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.233613774212483, LR: 0.0003 +[2026-03-02 18:19:54] (step=0036972) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.233809430639797, LR: 0.0003 +[2026-03-02 18:20:02] (step=0036973) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.23400508706711, LR: 0.0003 +[2026-03-02 18:20:10] (step=0036974) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.234200743494424, LR: 0.0003 +[2026-03-02 18:20:18] (step=0036975) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 7.234396399921738, LR: 0.0003 +[2026-03-02 18:20:25] (step=0036976) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.234592056349051, LR: 0.0003 +[2026-03-02 18:20:33] (step=0036977) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.234787712776365, LR: 0.0003 +[2026-03-02 18:20:41] (step=0036978) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.234983369203678, LR: 0.0003 +[2026-03-02 18:20:49] (step=0036979) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.235179025630992, LR: 0.0003 +[2026-03-02 18:20:57] (step=0036980) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 7.235374682058306, LR: 0.0003 +[2026-03-02 18:21:05] (step=0036981) Train Loss: 0.4432, Train Steps/Sec: 0.12, Epoch: 7.235570338485619, LR: 0.0003 +[2026-03-02 18:21:13] (step=0036982) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.235765994912933, LR: 0.0003 +[2026-03-02 18:21:21] (step=0036983) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.235961651340246, LR: 0.0003 +[2026-03-02 18:21:29] (step=0036984) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 7.23615730776756, LR: 0.0003 +[2026-03-02 18:21:36] (step=0036985) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 7.236352964194873, LR: 0.0003 +[2026-03-02 18:21:44] (step=0036986) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.236548620622187, LR: 0.0003 +[2026-03-02 18:21:52] (step=0036987) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 7.236744277049501, LR: 0.0003 +[2026-03-02 18:22:00] (step=0036988) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 7.2369399334768145, LR: 0.0003 +[2026-03-02 18:22:08] (step=0036989) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 7.2371355899041285, LR: 0.0003 +[2026-03-02 18:22:16] (step=0036990) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.237331246331442, LR: 0.0003 +[2026-03-02 18:22:24] (step=0036991) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.237526902758756, LR: 0.0003 +[2026-03-02 18:22:31] (step=0036992) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.23772255918607, LR: 0.0003 +[2026-03-02 18:22:39] (step=0036993) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 7.237918215613383, LR: 0.0003 +[2026-03-02 18:22:47] (step=0036994) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.238113872040697, LR: 0.0003 +[2026-03-02 18:22:55] (step=0036995) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.23830952846801, LR: 0.0003 +[2026-03-02 18:23:03] (step=0036996) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 7.238505184895324, LR: 0.0003 +[2026-03-02 18:23:11] (step=0036997) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.238700841322637, LR: 0.0003 +[2026-03-02 18:23:19] (step=0036998) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.238896497749951, LR: 0.0003 +[2026-03-02 18:23:26] (step=0036999) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.239092154177265, LR: 0.0003 +[2026-03-02 18:23:34] (step=0037000) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.239287810604578, LR: 0.0003 +[2026-03-02 18:23:34] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0037000/ +[2026-03-02 18:23:42] (step=0037001) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.239483467031892, LR: 0.0003 +[2026-03-02 18:23:50] (step=0037002) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.239679123459205, LR: 0.0003 +[2026-03-02 18:23:58] (step=0037003) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.239874779886519, LR: 0.0003 +[2026-03-02 18:24:06] (step=0037004) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 7.240070436313833, LR: 0.0003 +[2026-03-02 18:24:13] (step=0037005) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.240266092741146, LR: 0.0003 +[2026-03-02 18:24:21] (step=0037006) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.24046174916846, LR: 0.0003 +[2026-03-02 18:24:29] (step=0037007) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.2406574055957735, LR: 0.0003 +[2026-03-02 18:24:37] (step=0037008) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 7.2408530620230875, LR: 0.0003 +[2026-03-02 18:24:45] (step=0037009) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.241048718450401, LR: 0.0003 +[2026-03-02 18:24:53] (step=0037010) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.241244374877715, LR: 0.0003 +[2026-03-02 18:25:01] (step=0037011) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.241440031305029, LR: 0.0003 +[2026-03-02 18:25:08] (step=0037012) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 7.241635687732342, LR: 0.0003 +[2026-03-02 18:25:16] (step=0037013) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.241831344159656, LR: 0.0003 +[2026-03-02 18:25:24] (step=0037014) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.242027000586969, LR: 0.0003 +[2026-03-02 18:25:32] (step=0037015) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 7.242222657014283, LR: 0.0003 +[2026-03-02 18:25:40] (step=0037016) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.242418313441597, LR: 0.0003 +[2026-03-02 18:25:48] (step=0037017) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 7.24261396986891, LR: 0.0003 +[2026-03-02 18:25:56] (step=0037018) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.242809626296224, LR: 0.0003 +[2026-03-02 18:26:03] (step=0037019) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.243005282723537, LR: 0.0003 +[2026-03-02 18:26:11] (step=0037020) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.243200939150851, LR: 0.0003 +[2026-03-02 18:26:19] (step=0037021) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.243396595578165, LR: 0.0003 +[2026-03-02 18:26:27] (step=0037022) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.243592252005478, LR: 0.0003 +[2026-03-02 18:26:35] (step=0037023) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 7.243787908432792, LR: 0.0003 +[2026-03-02 18:26:43] (step=0037024) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.243983564860105, LR: 0.0003 +[2026-03-02 18:26:51] (step=0037025) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 7.244179221287419, LR: 0.0003 +[2026-03-02 18:26:58] (step=0037026) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.2443748777147325, LR: 0.0003 +[2026-03-02 18:27:06] (step=0037027) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 7.2445705341420465, LR: 0.0003 +[2026-03-02 18:27:14] (step=0037028) Train Loss: 0.4541, Train Steps/Sec: 0.12, Epoch: 7.2447661905693606, LR: 0.0003 +[2026-03-02 18:27:22] (step=0037029) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 7.244961846996674, LR: 0.0003 +[2026-03-02 18:27:30] (step=0037030) Train Loss: 0.4415, Train Steps/Sec: 0.12, Epoch: 7.245157503423988, LR: 0.0003 +[2026-03-02 18:27:38] (step=0037031) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.245353159851301, LR: 0.0003 +[2026-03-02 18:27:46] (step=0037032) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.245548816278615, LR: 0.0003 +[2026-03-02 18:27:54] (step=0037033) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.245744472705929, LR: 0.0003 +[2026-03-02 18:28:02] (step=0037034) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.245940129133242, LR: 0.0003 +[2026-03-02 18:28:09] (step=0037035) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 7.246135785560556, LR: 0.0003 +[2026-03-02 18:28:17] (step=0037036) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.246331441987869, LR: 0.0003 +[2026-03-02 18:28:25] (step=0037037) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 7.246527098415183, LR: 0.0003 +[2026-03-02 18:28:33] (step=0037038) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 7.246722754842496, LR: 0.0003 +[2026-03-02 18:28:41] (step=0037039) Train Loss: 0.4272, Train Steps/Sec: 0.13, Epoch: 7.24691841126981, LR: 0.0003 +[2026-03-02 18:28:49] (step=0037040) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 7.247114067697124, LR: 0.0003 +[2026-03-02 18:28:56] (step=0037041) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.247309724124437, LR: 0.0003 +[2026-03-02 18:29:04] (step=0037042) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.247505380551751, LR: 0.0003 +[2026-03-02 18:29:12] (step=0037043) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.247701036979064, LR: 0.0003 +[2026-03-02 18:29:20] (step=0037044) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 7.247896693406378, LR: 0.0003 +[2026-03-02 18:29:28] (step=0037045) Train Loss: 0.4276, Train Steps/Sec: 0.13, Epoch: 7.248092349833692, LR: 0.0003 +[2026-03-02 18:29:36] (step=0037046) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.248288006261006, LR: 0.0003 +[2026-03-02 18:29:44] (step=0037047) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.24848366268832, LR: 0.0003 +[2026-03-02 18:29:51] (step=0037048) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.248679319115633, LR: 0.0003 +[2026-03-02 18:29:59] (step=0037049) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.248874975542947, LR: 0.0003 +[2026-03-02 18:30:07] (step=0037050) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.24907063197026, LR: 0.0003 +[2026-03-02 18:30:15] (step=0037051) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.249266288397574, LR: 0.0003 +[2026-03-02 18:30:23] (step=0037052) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 7.249461944824888, LR: 0.0003 +[2026-03-02 18:30:31] (step=0037053) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.249657601252201, LR: 0.0003 +[2026-03-02 18:30:39] (step=0037054) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.249853257679515, LR: 0.0003 +[2026-03-02 18:30:46] (step=0037055) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 7.250048914106828, LR: 0.0003 +[2026-03-02 18:30:54] (step=0037056) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.250244570534142, LR: 0.0003 +[2026-03-02 18:31:02] (step=0037057) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.250440226961456, LR: 0.0003 +[2026-03-02 18:31:10] (step=0037058) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.250635883388769, LR: 0.0003 +[2026-03-02 18:31:18] (step=0037059) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 7.250831539816083, LR: 0.0003 +[2026-03-02 18:31:26] (step=0037060) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.251027196243396, LR: 0.0003 +[2026-03-02 18:31:34] (step=0037061) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.25122285267071, LR: 0.0003 +[2026-03-02 18:31:41] (step=0037062) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.251418509098023, LR: 0.0003 +[2026-03-02 18:31:49] (step=0037063) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 7.251614165525337, LR: 0.0003 +[2026-03-02 18:31:57] (step=0037064) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 7.2518098219526514, LR: 0.0003 +[2026-03-02 18:32:05] (step=0037065) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 7.252005478379965, LR: 0.0003 +[2026-03-02 18:32:13] (step=0037066) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.252201134807279, LR: 0.0003 +[2026-03-02 18:32:21] (step=0037067) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.252396791234592, LR: 0.0003 +[2026-03-02 18:32:29] (step=0037068) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.252592447661906, LR: 0.0003 +[2026-03-02 18:32:36] (step=0037069) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.25278810408922, LR: 0.0003 +[2026-03-02 18:32:44] (step=0037070) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.252983760516533, LR: 0.0003 +[2026-03-02 18:32:52] (step=0037071) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.253179416943847, LR: 0.0003 +[2026-03-02 18:33:00] (step=0037072) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.25337507337116, LR: 0.0003 +[2026-03-02 18:33:08] (step=0037073) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.253570729798474, LR: 0.0003 +[2026-03-02 18:33:16] (step=0037074) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.253766386225788, LR: 0.0003 +[2026-03-02 18:33:23] (step=0037075) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.253962042653101, LR: 0.0003 +[2026-03-02 18:33:32] (step=0037076) Train Loss: 0.4379, Train Steps/Sec: 0.12, Epoch: 7.254157699080415, LR: 0.0003 +[2026-03-02 18:33:40] (step=0037077) Train Loss: 0.4386, Train Steps/Sec: 0.12, Epoch: 7.254353355507728, LR: 0.0003 +[2026-03-02 18:33:47] (step=0037078) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.254549011935042, LR: 0.0003 +[2026-03-02 18:33:55] (step=0037079) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.254744668362355, LR: 0.0003 +[2026-03-02 18:34:03] (step=0037080) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.254940324789669, LR: 0.0003 +[2026-03-02 18:34:11] (step=0037081) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 7.255135981216983, LR: 0.0003 +[2026-03-02 18:34:19] (step=0037082) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.2553316376442964, LR: 0.0003 +[2026-03-02 18:34:27] (step=0037083) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.2555272940716105, LR: 0.0003 +[2026-03-02 18:34:35] (step=0037084) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.255722950498924, LR: 0.0003 +[2026-03-02 18:34:42] (step=0037085) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.255918606926238, LR: 0.0003 +[2026-03-02 18:34:50] (step=0037086) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.256114263353552, LR: 0.0003 +[2026-03-02 18:34:58] (step=0037087) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.256309919780865, LR: 0.0003 +[2026-03-02 18:35:06] (step=0037088) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 7.256505576208179, LR: 0.0003 +[2026-03-02 18:35:14] (step=0037089) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.256701232635492, LR: 0.0003 +[2026-03-02 18:35:22] (step=0037090) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.256896889062806, LR: 0.0003 +[2026-03-02 18:35:29] (step=0037091) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.257092545490119, LR: 0.0003 +[2026-03-02 18:35:37] (step=0037092) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.257288201917433, LR: 0.0003 +[2026-03-02 18:35:45] (step=0037093) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.257483858344747, LR: 0.0003 +[2026-03-02 18:35:53] (step=0037094) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.25767951477206, LR: 0.0003 +[2026-03-02 18:36:01] (step=0037095) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.257875171199374, LR: 0.0003 +[2026-03-02 18:36:09] (step=0037096) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 7.258070827626687, LR: 0.0003 +[2026-03-02 18:36:17] (step=0037097) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 7.258266484054001, LR: 0.0003 +[2026-03-02 18:36:24] (step=0037098) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.258462140481315, LR: 0.0003 +[2026-03-02 18:36:32] (step=0037099) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 7.258657796908628, LR: 0.0003 +[2026-03-02 18:36:40] (step=0037100) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 7.258853453335942, LR: 0.0003 +[2026-03-02 18:36:48] (step=0037101) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.2590491097632555, LR: 0.0003 +[2026-03-02 18:36:56] (step=0037102) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 7.2592447661905695, LR: 0.0003 +[2026-03-02 18:37:04] (step=0037103) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 7.259440422617883, LR: 0.0003 +[2026-03-02 18:37:12] (step=0037104) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.259636079045197, LR: 0.0003 +[2026-03-02 18:37:19] (step=0037105) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.259831735472511, LR: 0.0003 +[2026-03-02 18:37:27] (step=0037106) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.260027391899824, LR: 0.0003 +[2026-03-02 18:37:35] (step=0037107) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 7.260223048327138, LR: 0.0003 +[2026-03-02 18:37:43] (step=0037108) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.260418704754451, LR: 0.0003 +[2026-03-02 18:37:51] (step=0037109) Train Loss: 0.4683, Train Steps/Sec: 0.13, Epoch: 7.260614361181765, LR: 0.0003 +[2026-03-02 18:37:59] (step=0037110) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.260810017609079, LR: 0.0003 +[2026-03-02 18:38:07] (step=0037111) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.261005674036392, LR: 0.0003 +[2026-03-02 18:38:14] (step=0037112) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.261201330463706, LR: 0.0003 +[2026-03-02 18:38:22] (step=0037113) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.261396986891019, LR: 0.0003 +[2026-03-02 18:38:30] (step=0037114) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.261592643318333, LR: 0.0003 +[2026-03-02 18:38:38] (step=0037115) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.261788299745646, LR: 0.0003 +[2026-03-02 18:38:46] (step=0037116) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 7.26198395617296, LR: 0.0003 +[2026-03-02 18:38:54] (step=0037117) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.262179612600274, LR: 0.0003 +[2026-03-02 18:39:02] (step=0037118) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.262375269027587, LR: 0.0003 +[2026-03-02 18:39:09] (step=0037119) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.262570925454901, LR: 0.0003 +[2026-03-02 18:39:17] (step=0037120) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.2627665818822145, LR: 0.0003 +[2026-03-02 18:39:25] (step=0037121) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.2629622383095285, LR: 0.0003 +[2026-03-02 18:39:33] (step=0037122) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.2631578947368425, LR: 0.0003 +[2026-03-02 18:39:41] (step=0037123) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.263353551164156, LR: 0.0003 +[2026-03-02 18:39:49] (step=0037124) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 7.26354920759147, LR: 0.0003 +[2026-03-02 18:39:57] (step=0037125) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.263744864018783, LR: 0.0003 +[2026-03-02 18:40:04] (step=0037126) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.263940520446097, LR: 0.0003 +[2026-03-02 18:40:12] (step=0037127) Train Loss: 0.4388, Train Steps/Sec: 0.12, Epoch: 7.264136176873411, LR: 0.0003 +[2026-03-02 18:40:20] (step=0037128) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.264331833300724, LR: 0.0003 +[2026-03-02 18:40:28] (step=0037129) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.264527489728038, LR: 0.0003 +[2026-03-02 18:40:36] (step=0037130) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.264723146155351, LR: 0.0003 +[2026-03-02 18:40:44] (step=0037131) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 7.264918802582665, LR: 0.0003 +[2026-03-02 18:40:52] (step=0037132) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.265114459009978, LR: 0.0003 +[2026-03-02 18:41:00] (step=0037133) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.265310115437292, LR: 0.0003 +[2026-03-02 18:41:07] (step=0037134) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.265505771864606, LR: 0.0003 +[2026-03-02 18:41:15] (step=0037135) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 7.265701428291919, LR: 0.0003 +[2026-03-02 18:41:23] (step=0037136) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.265897084719233, LR: 0.0003 +[2026-03-02 18:41:31] (step=0037137) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.266092741146546, LR: 0.0003 +[2026-03-02 18:41:39] (step=0037138) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.26628839757386, LR: 0.0003 +[2026-03-02 18:41:47] (step=0037139) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.266484054001174, LR: 0.0003 +[2026-03-02 18:41:55] (step=0037140) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.2666797104284875, LR: 0.0003 +[2026-03-02 18:42:02] (step=0037141) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.2668753668558015, LR: 0.0003 +[2026-03-02 18:42:10] (step=0037142) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.267071023283115, LR: 0.0003 +[2026-03-02 18:42:18] (step=0037143) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 7.267266679710429, LR: 0.0003 +[2026-03-02 18:42:26] (step=0037144) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.267462336137742, LR: 0.0003 +[2026-03-02 18:42:34] (step=0037145) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.267657992565056, LR: 0.0003 +[2026-03-02 18:42:42] (step=0037146) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.26785364899237, LR: 0.0003 +[2026-03-02 18:42:50] (step=0037147) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.268049305419683, LR: 0.0003 +[2026-03-02 18:42:57] (step=0037148) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.268244961846997, LR: 0.0003 +[2026-03-02 18:43:05] (step=0037149) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.26844061827431, LR: 0.0003 +[2026-03-02 18:43:13] (step=0037150) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.268636274701624, LR: 0.0003 +[2026-03-02 18:43:21] (step=0037151) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.268831931128938, LR: 0.0003 +[2026-03-02 18:43:29] (step=0037152) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.269027587556251, LR: 0.0003 +[2026-03-02 18:43:37] (step=0037153) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.269223243983565, LR: 0.0003 +[2026-03-02 18:43:45] (step=0037154) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.269418900410878, LR: 0.0003 +[2026-03-02 18:43:52] (step=0037155) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.269614556838192, LR: 0.0003 +[2026-03-02 18:44:00] (step=0037156) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.269810213265505, LR: 0.0003 +[2026-03-02 18:44:08] (step=0037157) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.270005869692819, LR: 0.0003 +[2026-03-02 18:44:16] (step=0037158) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.270201526120133, LR: 0.0003 +[2026-03-02 18:44:24] (step=0037159) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 7.2703971825474465, LR: 0.0003 +[2026-03-02 18:44:32] (step=0037160) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.2705928389747605, LR: 0.0003 +[2026-03-02 18:44:40] (step=0037161) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.270788495402074, LR: 0.0003 +[2026-03-02 18:44:47] (step=0037162) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.270984151829388, LR: 0.0003 +[2026-03-02 18:44:55] (step=0037163) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.271179808256702, LR: 0.0003 +[2026-03-02 18:45:03] (step=0037164) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.271375464684015, LR: 0.0003 +[2026-03-02 18:45:11] (step=0037165) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 7.271571121111329, LR: 0.0003 +[2026-03-02 18:45:19] (step=0037166) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.271766777538642, LR: 0.0003 +[2026-03-02 18:45:27] (step=0037167) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.271962433965956, LR: 0.0003 +[2026-03-02 18:45:35] (step=0037168) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.272158090393269, LR: 0.0003 +[2026-03-02 18:45:43] (step=0037169) Train Loss: 0.4393, Train Steps/Sec: 0.12, Epoch: 7.272353746820583, LR: 0.0003 +[2026-03-02 18:45:50] (step=0037170) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.272549403247897, LR: 0.0003 +[2026-03-02 18:45:58] (step=0037171) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.27274505967521, LR: 0.0003 +[2026-03-02 18:46:06] (step=0037172) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.272940716102524, LR: 0.0003 +[2026-03-02 18:46:14] (step=0037173) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.273136372529837, LR: 0.0003 +[2026-03-02 18:46:22] (step=0037174) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.273332028957151, LR: 0.0003 +[2026-03-02 18:46:30] (step=0037175) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.273527685384465, LR: 0.0003 +[2026-03-02 18:46:38] (step=0037176) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 7.273723341811778, LR: 0.0003 +[2026-03-02 18:46:45] (step=0037177) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.273918998239092, LR: 0.0003 +[2026-03-02 18:46:53] (step=0037178) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 7.2741146546664055, LR: 0.0003 +[2026-03-02 18:47:01] (step=0037179) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.2743103110937195, LR: 0.0003 +[2026-03-02 18:47:09] (step=0037180) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 7.274505967521033, LR: 0.0003 +[2026-03-02 18:47:17] (step=0037181) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.274701623948347, LR: 0.0003 +[2026-03-02 18:47:25] (step=0037182) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 7.274897280375661, LR: 0.0003 +[2026-03-02 18:47:33] (step=0037183) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.275092936802974, LR: 0.0003 +[2026-03-02 18:47:41] (step=0037184) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 7.275288593230288, LR: 0.0003 +[2026-03-02 18:47:48] (step=0037185) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.275484249657601, LR: 0.0003 +[2026-03-02 18:47:56] (step=0037186) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.275679906084915, LR: 0.0003 +[2026-03-02 18:48:04] (step=0037187) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 7.275875562512229, LR: 0.0003 +[2026-03-02 18:48:12] (step=0037188) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.276071218939542, LR: 0.0003 +[2026-03-02 18:48:20] (step=0037189) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.276266875366856, LR: 0.0003 +[2026-03-02 18:48:28] (step=0037190) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.276462531794169, LR: 0.0003 +[2026-03-02 18:48:36] (step=0037191) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.276658188221483, LR: 0.0003 +[2026-03-02 18:48:43] (step=0037192) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.276853844648797, LR: 0.0003 +[2026-03-02 18:48:51] (step=0037193) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 7.27704950107611, LR: 0.0003 +[2026-03-02 18:48:59] (step=0037194) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.277245157503424, LR: 0.0003 +[2026-03-02 18:49:07] (step=0037195) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.277440813930737, LR: 0.0003 +[2026-03-02 18:49:15] (step=0037196) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.277636470358051, LR: 0.0003 +[2026-03-02 18:49:23] (step=0037197) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.2778321267853645, LR: 0.0003 +[2026-03-02 18:49:30] (step=0037198) Train Loss: 0.4269, Train Steps/Sec: 0.13, Epoch: 7.2780277832126785, LR: 0.0003 +[2026-03-02 18:49:38] (step=0037199) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.2782234396399925, LR: 0.0003 +[2026-03-02 18:49:46] (step=0037200) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.278419096067306, LR: 0.0003 +[2026-03-02 18:49:54] (step=0037201) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.27861475249462, LR: 0.0003 +[2026-03-02 18:50:02] (step=0037202) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 7.278810408921933, LR: 0.0003 +[2026-03-02 18:50:10] (step=0037203) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.279006065349247, LR: 0.0003 +[2026-03-02 18:50:18] (step=0037204) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.279201721776561, LR: 0.0003 +[2026-03-02 18:50:25] (step=0037205) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 7.279397378203874, LR: 0.0003 +[2026-03-02 18:50:33] (step=0037206) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.279593034631188, LR: 0.0003 +[2026-03-02 18:50:41] (step=0037207) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 7.279788691058501, LR: 0.0003 +[2026-03-02 18:50:49] (step=0037208) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.279984347485815, LR: 0.0003 +[2026-03-02 18:50:57] (step=0037209) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.280180003913128, LR: 0.0003 +[2026-03-02 18:51:05] (step=0037210) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.280375660340442, LR: 0.0003 +[2026-03-02 18:51:13] (step=0037211) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.280571316767756, LR: 0.0003 +[2026-03-02 18:51:20] (step=0037212) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 7.280766973195069, LR: 0.0003 +[2026-03-02 18:51:28] (step=0037213) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.280962629622383, LR: 0.0003 +[2026-03-02 18:51:36] (step=0037214) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.281158286049696, LR: 0.0003 +[2026-03-02 18:51:44] (step=0037215) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.28135394247701, LR: 0.0003 +[2026-03-02 18:51:52] (step=0037216) Train Loss: 0.4378, Train Steps/Sec: 0.12, Epoch: 7.281549598904324, LR: 0.0003 +[2026-03-02 18:52:00] (step=0037217) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.2817452553316375, LR: 0.0003 +[2026-03-02 18:52:08] (step=0037218) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 7.2819409117589515, LR: 0.0003 +[2026-03-02 18:52:16] (step=0037219) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.282136568186265, LR: 0.0003 +[2026-03-02 18:52:23] (step=0037220) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.282332224613579, LR: 0.0003 +[2026-03-02 18:52:31] (step=0037221) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.282527881040892, LR: 0.0003 +[2026-03-02 18:52:39] (step=0037222) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.282723537468206, LR: 0.0003 +[2026-03-02 18:52:47] (step=0037223) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.28291919389552, LR: 0.0003 +[2026-03-02 18:52:55] (step=0037224) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.283114850322833, LR: 0.0003 +[2026-03-02 18:53:03] (step=0037225) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 7.283310506750147, LR: 0.0003 +[2026-03-02 18:53:11] (step=0037226) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.28350616317746, LR: 0.0003 +[2026-03-02 18:53:18] (step=0037227) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 7.283701819604774, LR: 0.0003 +[2026-03-02 18:53:26] (step=0037228) Train Loss: 0.4356, Train Steps/Sec: 0.12, Epoch: 7.283897476032088, LR: 0.0003 +[2026-03-02 18:53:34] (step=0037229) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 7.284093132459401, LR: 0.0003 +[2026-03-02 18:53:42] (step=0037230) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.284288788886715, LR: 0.0003 +[2026-03-02 18:53:50] (step=0037231) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 7.284484445314028, LR: 0.0003 +[2026-03-02 18:53:58] (step=0037232) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.284680101741342, LR: 0.0003 +[2026-03-02 18:54:06] (step=0037233) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.284875758168655, LR: 0.0003 +[2026-03-02 18:54:14] (step=0037234) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.285071414595969, LR: 0.0003 +[2026-03-02 18:54:21] (step=0037235) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.285267071023283, LR: 0.0003 +[2026-03-02 18:54:29] (step=0037236) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.2854627274505965, LR: 0.0003 +[2026-03-02 18:54:37] (step=0037237) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 7.2856583838779105, LR: 0.0003 +[2026-03-02 18:54:45] (step=0037238) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 7.285854040305224, LR: 0.0003 +[2026-03-02 18:54:53] (step=0037239) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 7.286049696732538, LR: 0.0003 +[2026-03-02 18:55:01] (step=0037240) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.286245353159852, LR: 0.0003 +[2026-03-02 18:55:09] (step=0037241) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.286441009587165, LR: 0.0003 +[2026-03-02 18:55:16] (step=0037242) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.286636666014479, LR: 0.0003 +[2026-03-02 18:55:24] (step=0037243) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.286832322441792, LR: 0.0003 +[2026-03-02 18:55:32] (step=0037244) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.287027978869106, LR: 0.0003 +[2026-03-02 18:55:40] (step=0037245) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.28722363529642, LR: 0.0003 +[2026-03-02 18:55:48] (step=0037246) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.287419291723733, LR: 0.0003 +[2026-03-02 18:55:56] (step=0037247) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.287614948151047, LR: 0.0003 +[2026-03-02 18:56:04] (step=0037248) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.28781060457836, LR: 0.0003 +[2026-03-02 18:56:11] (step=0037249) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 7.288006261005674, LR: 0.0003 +[2026-03-02 18:56:19] (step=0037250) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 7.288201917432987, LR: 0.0003 +[2026-03-02 18:56:27] (step=0037251) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.288397573860301, LR: 0.0003 +[2026-03-02 18:56:35] (step=0037252) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.288593230287615, LR: 0.0003 +[2026-03-02 18:56:43] (step=0037253) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.288788886714928, LR: 0.0003 +[2026-03-02 18:56:51] (step=0037254) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.288984543142242, LR: 0.0003 +[2026-03-02 18:56:59] (step=0037255) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.2891801995695555, LR: 0.0003 +[2026-03-02 18:57:06] (step=0037256) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.2893758559968695, LR: 0.0003 +[2026-03-02 18:57:14] (step=0037257) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.2895715124241836, LR: 0.0003 +[2026-03-02 18:57:22] (step=0037258) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 7.289767168851497, LR: 0.0003 +[2026-03-02 18:57:30] (step=0037259) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.289962825278811, LR: 0.0003 +[2026-03-02 18:57:38] (step=0037260) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.290158481706124, LR: 0.0003 +[2026-03-02 18:57:46] (step=0037261) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.290354138133438, LR: 0.0003 +[2026-03-02 18:57:54] (step=0037262) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.290549794560751, LR: 0.0003 +[2026-03-02 18:58:02] (step=0037263) Train Loss: 0.4514, Train Steps/Sec: 0.12, Epoch: 7.290745450988065, LR: 0.0003 +[2026-03-02 18:58:09] (step=0037264) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.290941107415379, LR: 0.0003 +[2026-03-02 18:58:17] (step=0037265) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.291136763842692, LR: 0.0003 +[2026-03-02 18:58:25] (step=0037266) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 7.291332420270006, LR: 0.0003 +[2026-03-02 18:58:33] (step=0037267) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.291528076697319, LR: 0.0003 +[2026-03-02 18:58:41] (step=0037268) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.291723733124633, LR: 0.0003 +[2026-03-02 18:58:49] (step=0037269) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 7.291919389551947, LR: 0.0003 +[2026-03-02 18:58:57] (step=0037270) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.29211504597926, LR: 0.0003 +[2026-03-02 18:59:04] (step=0037271) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.292310702406574, LR: 0.0003 +[2026-03-02 18:59:12] (step=0037272) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 7.292506358833887, LR: 0.0003 +[2026-03-02 18:59:20] (step=0037273) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.292702015261201, LR: 0.0003 +[2026-03-02 18:59:28] (step=0037274) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.2928976716885145, LR: 0.0003 +[2026-03-02 18:59:36] (step=0037275) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 7.2930933281158286, LR: 0.0003 +[2026-03-02 18:59:44] (step=0037276) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.293288984543143, LR: 0.0003 +[2026-03-02 18:59:52] (step=0037277) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.293484640970456, LR: 0.0003 +[2026-03-02 19:00:00] (step=0037278) Train Loss: 0.4367, Train Steps/Sec: 0.12, Epoch: 7.29368029739777, LR: 0.0003 +[2026-03-02 19:00:07] (step=0037279) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.293875953825083, LR: 0.0003 +[2026-03-02 19:00:15] (step=0037280) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.294071610252397, LR: 0.0003 +[2026-03-02 19:00:23] (step=0037281) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.294267266679711, LR: 0.0003 +[2026-03-02 19:00:31] (step=0037282) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.294462923107024, LR: 0.0003 +[2026-03-02 19:00:39] (step=0037283) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.294658579534338, LR: 0.0003 +[2026-03-02 19:00:47] (step=0037284) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 7.294854235961651, LR: 0.0003 +[2026-03-02 19:00:55] (step=0037285) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 7.295049892388965, LR: 0.0003 +[2026-03-02 19:01:02] (step=0037286) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 7.295245548816278, LR: 0.0003 +[2026-03-02 19:01:10] (step=0037287) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 7.295441205243592, LR: 0.0003 +[2026-03-02 19:01:18] (step=0037288) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 7.295636861670906, LR: 0.0003 +[2026-03-02 19:01:26] (step=0037289) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.295832518098219, LR: 0.0003 +[2026-03-02 19:01:34] (step=0037290) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.296028174525533, LR: 0.0003 +[2026-03-02 19:01:42] (step=0037291) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.296223830952846, LR: 0.0003 +[2026-03-02 19:01:50] (step=0037292) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.29641948738016, LR: 0.0003 +[2026-03-02 19:01:57] (step=0037293) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.2966151438074744, LR: 0.0003 +[2026-03-02 19:02:05] (step=0037294) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.296810800234788, LR: 0.0003 +[2026-03-02 19:02:13] (step=0037295) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.297006456662102, LR: 0.0003 +[2026-03-02 19:02:21] (step=0037296) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 7.297202113089415, LR: 0.0003 +[2026-03-02 19:02:29] (step=0037297) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 7.297397769516729, LR: 0.0003 +[2026-03-02 19:02:37] (step=0037298) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.297593425944043, LR: 0.0003 +[2026-03-02 19:02:45] (step=0037299) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.297789082371356, LR: 0.0003 +[2026-03-02 19:02:52] (step=0037300) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 7.29798473879867, LR: 0.0003 +[2026-03-02 19:03:00] (step=0037301) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.298180395225983, LR: 0.0003 +[2026-03-02 19:03:08] (step=0037302) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 7.298376051653297, LR: 0.0003 +[2026-03-02 19:03:16] (step=0037303) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.29857170808061, LR: 0.0003 +[2026-03-02 19:03:24] (step=0037304) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.298767364507924, LR: 0.0003 +[2026-03-02 19:03:32] (step=0037305) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.298963020935238, LR: 0.0003 +[2026-03-02 19:03:39] (step=0037306) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 7.299158677362551, LR: 0.0003 +[2026-03-02 19:03:47] (step=0037307) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.299354333789865, LR: 0.0003 +[2026-03-02 19:03:55] (step=0037308) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.299549990217178, LR: 0.0003 +[2026-03-02 19:04:03] (step=0037309) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.299745646644492, LR: 0.0003 +[2026-03-02 19:04:11] (step=0037310) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 7.299941303071806, LR: 0.0003 +[2026-03-02 19:04:19] (step=0037311) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.3001369594991194, LR: 0.0003 +[2026-03-02 19:04:27] (step=0037312) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 7.3003326159264335, LR: 0.0003 +[2026-03-02 19:04:34] (step=0037313) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.300528272353747, LR: 0.0003 +[2026-03-02 19:04:42] (step=0037314) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.300723928781061, LR: 0.0003 +[2026-03-02 19:04:50] (step=0037315) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 7.300919585208374, LR: 0.0003 +[2026-03-02 19:04:58] (step=0037316) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.301115241635688, LR: 0.0003 +[2026-03-02 19:05:06] (step=0037317) Train Loss: 0.4370, Train Steps/Sec: 0.12, Epoch: 7.301310898063002, LR: 0.0003 +[2026-03-02 19:05:14] (step=0037318) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.301506554490315, LR: 0.0003 +[2026-03-02 19:05:22] (step=0037319) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.301702210917629, LR: 0.0003 +[2026-03-02 19:05:30] (step=0037320) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.301897867344942, LR: 0.0003 +[2026-03-02 19:05:37] (step=0037321) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.302093523772256, LR: 0.0003 +[2026-03-02 19:05:45] (step=0037322) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.30228918019957, LR: 0.0003 +[2026-03-02 19:05:53] (step=0037323) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.302484836626883, LR: 0.0003 +[2026-03-02 19:06:01] (step=0037324) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.302680493054197, LR: 0.0003 +[2026-03-02 19:06:09] (step=0037325) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.30287614948151, LR: 0.0003 +[2026-03-02 19:06:17] (step=0037326) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.303071805908824, LR: 0.0003 +[2026-03-02 19:06:25] (step=0037327) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 7.303267462336137, LR: 0.0003 +[2026-03-02 19:06:33] (step=0037328) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.303463118763451, LR: 0.0003 +[2026-03-02 19:06:40] (step=0037329) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 7.303658775190765, LR: 0.0003 +[2026-03-02 19:06:48] (step=0037330) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.3038544316180785, LR: 0.0003 +[2026-03-02 19:06:56] (step=0037331) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.3040500880453925, LR: 0.0003 +[2026-03-02 19:07:04] (step=0037332) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 7.304245744472706, LR: 0.0003 +[2026-03-02 19:07:12] (step=0037333) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.30444140090002, LR: 0.0003 +[2026-03-02 19:07:20] (step=0037334) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.304637057327334, LR: 0.0003 +[2026-03-02 19:07:28] (step=0037335) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.304832713754647, LR: 0.0003 +[2026-03-02 19:07:35] (step=0037336) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.305028370181961, LR: 0.0003 +[2026-03-02 19:07:43] (step=0037337) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.305224026609274, LR: 0.0003 +[2026-03-02 19:07:51] (step=0037338) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.305419683036588, LR: 0.0003 +[2026-03-02 19:07:59] (step=0037339) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.305615339463901, LR: 0.0003 +[2026-03-02 19:08:07] (step=0037340) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.305810995891215, LR: 0.0003 +[2026-03-02 19:08:15] (step=0037341) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.306006652318529, LR: 0.0003 +[2026-03-02 19:08:23] (step=0037342) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.306202308745842, LR: 0.0003 +[2026-03-02 19:08:30] (step=0037343) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.306397965173156, LR: 0.0003 +[2026-03-02 19:08:38] (step=0037344) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.306593621600469, LR: 0.0003 +[2026-03-02 19:08:46] (step=0037345) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.306789278027783, LR: 0.0003 +[2026-03-02 19:08:54] (step=0037346) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.306984934455097, LR: 0.0003 +[2026-03-02 19:09:02] (step=0037347) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.30718059088241, LR: 0.0003 +[2026-03-02 19:09:10] (step=0037348) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 7.307376247309724, LR: 0.0003 +[2026-03-02 19:09:18] (step=0037349) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.3075719037370375, LR: 0.0003 +[2026-03-02 19:09:25] (step=0037350) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.3077675601643515, LR: 0.0003 +[2026-03-02 19:09:33] (step=0037351) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 7.3079632165916655, LR: 0.0003 +[2026-03-02 19:09:41] (step=0037352) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.308158873018979, LR: 0.0003 +[2026-03-02 19:09:49] (step=0037353) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.308354529446293, LR: 0.0003 +[2026-03-02 19:09:57] (step=0037354) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.308550185873606, LR: 0.0003 +[2026-03-02 19:10:05] (step=0037355) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.30874584230092, LR: 0.0003 +[2026-03-02 19:10:13] (step=0037356) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.308941498728233, LR: 0.0003 +[2026-03-02 19:10:20] (step=0037357) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.309137155155547, LR: 0.0003 +[2026-03-02 19:10:28] (step=0037358) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 7.309332811582861, LR: 0.0003 +[2026-03-02 19:10:36] (step=0037359) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.309528468010174, LR: 0.0003 +[2026-03-02 19:10:44] (step=0037360) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 7.309724124437488, LR: 0.0003 +[2026-03-02 19:10:52] (step=0037361) Train Loss: 0.4357, Train Steps/Sec: 0.12, Epoch: 7.309919780864801, LR: 0.0003 +[2026-03-02 19:11:00] (step=0037362) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.310115437292115, LR: 0.0003 +[2026-03-02 19:11:08] (step=0037363) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.310311093719429, LR: 0.0003 +[2026-03-02 19:11:15] (step=0037364) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.310506750146742, LR: 0.0003 +[2026-03-02 19:11:23] (step=0037365) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.310702406574056, LR: 0.0003 +[2026-03-02 19:11:31] (step=0037366) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.310898063001369, LR: 0.0003 +[2026-03-02 19:11:39] (step=0037367) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.311093719428683, LR: 0.0003 +[2026-03-02 19:11:47] (step=0037368) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 7.3112893758559965, LR: 0.0003 +[2026-03-02 19:11:55] (step=0037369) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 7.3114850322833105, LR: 0.0003 +[2026-03-02 19:12:03] (step=0037370) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.3116806887106245, LR: 0.0003 +[2026-03-02 19:12:10] (step=0037371) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 7.311876345137938, LR: 0.0003 +[2026-03-02 19:12:18] (step=0037372) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 7.312072001565252, LR: 0.0003 +[2026-03-02 19:12:26] (step=0037373) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.312267657992565, LR: 0.0003 +[2026-03-02 19:12:34] (step=0037374) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.312463314419879, LR: 0.0003 +[2026-03-02 19:12:42] (step=0037375) Train Loss: 0.4470, Train Steps/Sec: 0.12, Epoch: 7.312658970847193, LR: 0.0003 +[2026-03-02 19:12:50] (step=0037376) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.312854627274506, LR: 0.0003 +[2026-03-02 19:12:58] (step=0037377) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.31305028370182, LR: 0.0003 +[2026-03-02 19:13:06] (step=0037378) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 7.313245940129133, LR: 0.0003 +[2026-03-02 19:13:13] (step=0037379) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.313441596556447, LR: 0.0003 +[2026-03-02 19:13:21] (step=0037380) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.31363725298376, LR: 0.0003 +[2026-03-02 19:13:29] (step=0037381) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.313832909411074, LR: 0.0003 +[2026-03-02 19:13:37] (step=0037382) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.314028565838388, LR: 0.0003 +[2026-03-02 19:13:45] (step=0037383) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.314224222265701, LR: 0.0003 +[2026-03-02 19:13:53] (step=0037384) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 7.314419878693015, LR: 0.0003 +[2026-03-02 19:14:01] (step=0037385) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.314615535120328, LR: 0.0003 +[2026-03-02 19:14:08] (step=0037386) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.314811191547642, LR: 0.0003 +[2026-03-02 19:14:16] (step=0037387) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.315006847974956, LR: 0.0003 +[2026-03-02 19:14:24] (step=0037388) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.3152025044022695, LR: 0.0003 +[2026-03-02 19:14:32] (step=0037389) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 7.3153981608295835, LR: 0.0003 +[2026-03-02 19:14:40] (step=0037390) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 7.315593817256897, LR: 0.0003 +[2026-03-02 19:14:48] (step=0037391) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 7.315789473684211, LR: 0.0003 +[2026-03-02 19:14:56] (step=0037392) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.315985130111524, LR: 0.0003 +[2026-03-02 19:15:03] (step=0037393) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.316180786538838, LR: 0.0003 +[2026-03-02 19:15:11] (step=0037394) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 7.316376442966152, LR: 0.0003 +[2026-03-02 19:15:19] (step=0037395) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 7.316572099393465, LR: 0.0003 +[2026-03-02 19:15:27] (step=0037396) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.316767755820779, LR: 0.0003 +[2026-03-02 19:15:35] (step=0037397) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.316963412248092, LR: 0.0003 +[2026-03-02 19:15:43] (step=0037398) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.317159068675406, LR: 0.0003 +[2026-03-02 19:15:51] (step=0037399) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.31735472510272, LR: 0.0003 +[2026-03-02 19:15:58] (step=0037400) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.317550381530033, LR: 0.0003 +[2026-03-02 19:16:06] (step=0037401) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.317746037957347, LR: 0.0003 +[2026-03-02 19:16:14] (step=0037402) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 7.31794169438466, LR: 0.0003 +[2026-03-02 19:16:22] (step=0037403) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.318137350811974, LR: 0.0003 +[2026-03-02 19:16:30] (step=0037404) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.318333007239287, LR: 0.0003 +[2026-03-02 19:16:38] (step=0037405) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 7.318528663666601, LR: 0.0003 +[2026-03-02 19:16:46] (step=0037406) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 7.318724320093915, LR: 0.0003 +[2026-03-02 19:16:53] (step=0037407) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.3189199765212285, LR: 0.0003 +[2026-03-02 19:17:01] (step=0037408) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.3191156329485425, LR: 0.0003 +[2026-03-02 19:17:09] (step=0037409) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.319311289375856, LR: 0.0003 +[2026-03-02 19:17:17] (step=0037410) Train Loss: 0.4458, Train Steps/Sec: 0.12, Epoch: 7.31950694580317, LR: 0.0003 +[2026-03-02 19:17:25] (step=0037411) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.319702602230484, LR: 0.0003 +[2026-03-02 19:17:33] (step=0037412) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.319898258657797, LR: 0.0003 +[2026-03-02 19:17:41] (step=0037413) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.320093915085111, LR: 0.0003 +[2026-03-02 19:17:49] (step=0037414) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.320289571512424, LR: 0.0003 +[2026-03-02 19:17:57] (step=0037415) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.320485227939738, LR: 0.0003 +[2026-03-02 19:18:04] (step=0037416) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.320680884367052, LR: 0.0003 +[2026-03-02 19:18:12] (step=0037417) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 7.320876540794365, LR: 0.0003 +[2026-03-02 19:18:20] (step=0037418) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.321072197221679, LR: 0.0003 +[2026-03-02 19:18:28] (step=0037419) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.321267853648992, LR: 0.0003 +[2026-03-02 19:18:36] (step=0037420) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.321463510076306, LR: 0.0003 +[2026-03-02 19:18:44] (step=0037421) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.321659166503619, LR: 0.0003 +[2026-03-02 19:18:52] (step=0037422) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 7.321854822930933, LR: 0.0003 +[2026-03-02 19:18:59] (step=0037423) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.322050479358247, LR: 0.0003 +[2026-03-02 19:19:07] (step=0037424) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 7.32224613578556, LR: 0.0003 +[2026-03-02 19:19:15] (step=0037425) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.322441792212874, LR: 0.0003 +[2026-03-02 19:19:23] (step=0037426) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.3226374486401875, LR: 0.0003 +[2026-03-02 19:19:31] (step=0037427) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.3228331050675015, LR: 0.0003 +[2026-03-02 19:19:39] (step=0037428) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.3230287614948155, LR: 0.0003 +[2026-03-02 19:19:46] (step=0037429) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.323224417922129, LR: 0.0003 +[2026-03-02 19:19:54] (step=0037430) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.323420074349443, LR: 0.0003 +[2026-03-02 19:20:02] (step=0037431) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.323615730776756, LR: 0.0003 +[2026-03-02 19:20:10] (step=0037432) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.32381138720407, LR: 0.0003 +[2026-03-02 19:20:18] (step=0037433) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.324007043631383, LR: 0.0003 +[2026-03-02 19:20:26] (step=0037434) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.324202700058697, LR: 0.0003 +[2026-03-02 19:20:34] (step=0037435) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 7.324398356486011, LR: 0.0003 +[2026-03-02 19:20:41] (step=0037436) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.324594012913324, LR: 0.0003 +[2026-03-02 19:20:49] (step=0037437) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 7.324789669340638, LR: 0.0003 +[2026-03-02 19:20:57] (step=0037438) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 7.324985325767951, LR: 0.0003 +[2026-03-02 19:21:05] (step=0037439) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.325180982195265, LR: 0.0003 +[2026-03-02 19:21:13] (step=0037440) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 7.325376638622579, LR: 0.0003 +[2026-03-02 19:21:21] (step=0037441) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 7.325572295049892, LR: 0.0003 +[2026-03-02 19:21:29] (step=0037442) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 7.325767951477206, LR: 0.0003 +[2026-03-02 19:21:36] (step=0037443) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.325963607904519, LR: 0.0003 +[2026-03-02 19:21:44] (step=0037444) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 7.326159264331833, LR: 0.0003 +[2026-03-02 19:21:52] (step=0037445) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.3263549207591465, LR: 0.0003 +[2026-03-02 19:22:00] (step=0037446) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.3265505771864605, LR: 0.0003 +[2026-03-02 19:22:08] (step=0037447) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 7.3267462336137745, LR: 0.0003 +[2026-03-02 19:22:16] (step=0037448) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.326941890041088, LR: 0.0003 +[2026-03-02 19:22:24] (step=0037449) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 7.327137546468402, LR: 0.0003 +[2026-03-02 19:22:31] (step=0037450) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.327333202895715, LR: 0.0003 +[2026-03-02 19:22:39] (step=0037451) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 7.327528859323029, LR: 0.0003 +[2026-03-02 19:22:47] (step=0037452) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.327724515750343, LR: 0.0003 +[2026-03-02 19:22:55] (step=0037453) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.327920172177656, LR: 0.0003 +[2026-03-02 19:23:03] (step=0037454) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 7.32811582860497, LR: 0.0003 +[2026-03-02 19:23:11] (step=0037455) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 7.328311485032283, LR: 0.0003 +[2026-03-02 19:23:18] (step=0037456) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 7.328507141459597, LR: 0.0003 +[2026-03-02 19:23:26] (step=0037457) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.32870279788691, LR: 0.0003 +[2026-03-02 19:23:34] (step=0037458) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 7.328898454314224, LR: 0.0003 +[2026-03-02 19:23:42] (step=0037459) Train Loss: 0.4376, Train Steps/Sec: 0.12, Epoch: 7.329094110741538, LR: 0.0003 +[2026-03-02 19:23:50] (step=0037460) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.329289767168851, LR: 0.0003 +[2026-03-02 19:23:58] (step=0037461) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.329485423596165, LR: 0.0003 +[2026-03-02 19:24:06] (step=0037462) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.329681080023478, LR: 0.0003 +[2026-03-02 19:24:14] (step=0037463) Train Loss: 0.4371, Train Steps/Sec: 0.12, Epoch: 7.329876736450792, LR: 0.0003 +[2026-03-02 19:24:22] (step=0037464) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.330072392878106, LR: 0.0003 +[2026-03-02 19:24:30] (step=0037465) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.3302680493054195, LR: 0.0003 +[2026-03-02 19:24:37] (step=0037466) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.3304637057327335, LR: 0.0003 +[2026-03-02 19:24:45] (step=0037467) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.330659362160047, LR: 0.0003 +[2026-03-02 19:24:53] (step=0037468) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 7.330855018587361, LR: 0.0003 +[2026-03-02 19:25:01] (step=0037469) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.331050675014675, LR: 0.0003 +[2026-03-02 19:25:09] (step=0037470) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.331246331441988, LR: 0.0003 +[2026-03-02 19:25:17] (step=0037471) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.331441987869302, LR: 0.0003 +[2026-03-02 19:25:24] (step=0037472) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.331637644296615, LR: 0.0003 +[2026-03-02 19:25:32] (step=0037473) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.331833300723929, LR: 0.0003 +[2026-03-02 19:25:40] (step=0037474) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 7.332028957151242, LR: 0.0003 +[2026-03-02 19:25:48] (step=0037475) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.332224613578556, LR: 0.0003 +[2026-03-02 19:25:56] (step=0037476) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.33242027000587, LR: 0.0003 +[2026-03-02 19:26:04] (step=0037477) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 7.332615926433183, LR: 0.0003 +[2026-03-02 19:26:12] (step=0037478) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.332811582860497, LR: 0.0003 +[2026-03-02 19:26:19] (step=0037479) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.33300723928781, LR: 0.0003 +[2026-03-02 19:26:27] (step=0037480) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.333202895715124, LR: 0.0003 +[2026-03-02 19:26:35] (step=0037481) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.333398552142438, LR: 0.0003 +[2026-03-02 19:26:43] (step=0037482) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.333594208569751, LR: 0.0003 +[2026-03-02 19:26:51] (step=0037483) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.333789864997065, LR: 0.0003 +[2026-03-02 19:26:59] (step=0037484) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 7.3339855214243785, LR: 0.0003 +[2026-03-02 19:27:07] (step=0037485) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 7.3341811778516925, LR: 0.0003 +[2026-03-02 19:27:14] (step=0037486) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.334376834279006, LR: 0.0003 +[2026-03-02 19:27:22] (step=0037487) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.33457249070632, LR: 0.0003 +[2026-03-02 19:27:30] (step=0037488) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.334768147133634, LR: 0.0003 +[2026-03-02 19:27:38] (step=0037489) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.334963803560947, LR: 0.0003 +[2026-03-02 19:27:46] (step=0037490) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.335159459988261, LR: 0.0003 +[2026-03-02 19:27:54] (step=0037491) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.335355116415574, LR: 0.0003 +[2026-03-02 19:28:01] (step=0037492) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.335550772842888, LR: 0.0003 +[2026-03-02 19:28:09] (step=0037493) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.335746429270202, LR: 0.0003 +[2026-03-02 19:28:17] (step=0037494) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.335942085697515, LR: 0.0003 +[2026-03-02 19:28:25] (step=0037495) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.336137742124829, LR: 0.0003 +[2026-03-02 19:28:33] (step=0037496) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 7.336333398552142, LR: 0.0003 +[2026-03-02 19:28:41] (step=0037497) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.336529054979456, LR: 0.0003 +[2026-03-02 19:28:49] (step=0037498) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 7.336724711406769, LR: 0.0003 +[2026-03-02 19:28:56] (step=0037499) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.336920367834083, LR: 0.0003 +[2026-03-02 19:29:04] (step=0037500) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.337116024261397, LR: 0.0003 +[2026-03-02 19:29:04] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0037500/ +[2026-03-02 19:29:12] (step=0037501) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.33731168068871, LR: 0.0003 +[2026-03-02 19:29:20] (step=0037502) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 7.337507337116024, LR: 0.0003 +[2026-03-02 19:29:28] (step=0037503) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 7.3377029935433375, LR: 0.0003 +[2026-03-02 19:29:36] (step=0037504) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.3378986499706516, LR: 0.0003 +[2026-03-02 19:29:44] (step=0037505) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.338094306397966, LR: 0.0003 +[2026-03-02 19:29:51] (step=0037506) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.338289962825279, LR: 0.0003 +[2026-03-02 19:29:59] (step=0037507) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 7.338485619252593, LR: 0.0003 +[2026-03-02 19:30:07] (step=0037508) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 7.338681275679906, LR: 0.0003 +[2026-03-02 19:30:15] (step=0037509) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.33887693210722, LR: 0.0003 +[2026-03-02 19:30:23] (step=0037510) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.339072588534533, LR: 0.0003 +[2026-03-02 19:30:31] (step=0037511) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.339268244961847, LR: 0.0003 +[2026-03-02 19:30:39] (step=0037512) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.339463901389161, LR: 0.0003 +[2026-03-02 19:30:47] (step=0037513) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.339659557816474, LR: 0.0003 +[2026-03-02 19:30:54] (step=0037514) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.339855214243788, LR: 0.0003 +[2026-03-02 19:31:02] (step=0037515) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.340050870671101, LR: 0.0003 +[2026-03-02 19:31:10] (step=0037516) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.340246527098415, LR: 0.0003 +[2026-03-02 19:31:18] (step=0037517) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.340442183525729, LR: 0.0003 +[2026-03-02 19:31:26] (step=0037518) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.340637839953042, LR: 0.0003 +[2026-03-02 19:31:34] (step=0037519) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 7.340833496380356, LR: 0.0003 +[2026-03-02 19:31:41] (step=0037520) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 7.341029152807669, LR: 0.0003 +[2026-03-02 19:31:49] (step=0037521) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 7.341224809234983, LR: 0.0003 +[2026-03-02 19:31:57] (step=0037522) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 7.3414204656622974, LR: 0.0003 +[2026-03-02 19:32:05] (step=0037523) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.341616122089611, LR: 0.0003 +[2026-03-02 19:32:13] (step=0037524) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 7.341811778516925, LR: 0.0003 +[2026-03-02 19:32:21] (step=0037525) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.342007434944238, LR: 0.0003 +[2026-03-02 19:32:29] (step=0037526) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.342203091371552, LR: 0.0003 +[2026-03-02 19:32:36] (step=0037527) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 7.342398747798865, LR: 0.0003 +[2026-03-02 19:32:44] (step=0037528) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.342594404226179, LR: 0.0003 +[2026-03-02 19:32:52] (step=0037529) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.342790060653493, LR: 0.0003 +[2026-03-02 19:33:00] (step=0037530) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 7.342985717080806, LR: 0.0003 +[2026-03-02 19:33:08] (step=0037531) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 7.34318137350812, LR: 0.0003 +[2026-03-02 19:33:16] (step=0037532) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 7.343377029935433, LR: 0.0003 +[2026-03-02 19:33:24] (step=0037533) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.343572686362747, LR: 0.0003 +[2026-03-02 19:33:31] (step=0037534) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.343768342790061, LR: 0.0003 +[2026-03-02 19:33:39] (step=0037535) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.343963999217374, LR: 0.0003 +[2026-03-02 19:33:47] (step=0037536) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.344159655644688, LR: 0.0003 +[2026-03-02 19:33:55] (step=0037537) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.344355312072001, LR: 0.0003 +[2026-03-02 19:34:03] (step=0037538) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 7.344550968499315, LR: 0.0003 +[2026-03-02 19:34:11] (step=0037539) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.344746624926628, LR: 0.0003 +[2026-03-02 19:34:18] (step=0037540) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.3449422813539424, LR: 0.0003 +[2026-03-02 19:34:26] (step=0037541) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.3451379377812565, LR: 0.0003 +[2026-03-02 19:34:34] (step=0037542) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 7.34533359420857, LR: 0.0003 +[2026-03-02 19:34:42] (step=0037543) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.345529250635884, LR: 0.0003 +[2026-03-02 19:34:50] (step=0037544) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.345724907063197, LR: 0.0003 +[2026-03-02 19:34:58] (step=0037545) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 7.345920563490511, LR: 0.0003 +[2026-03-02 19:35:06] (step=0037546) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.346116219917825, LR: 0.0003 +[2026-03-02 19:35:13] (step=0037547) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 7.346311876345138, LR: 0.0003 +[2026-03-02 19:35:21] (step=0037548) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.346507532772452, LR: 0.0003 +[2026-03-02 19:35:29] (step=0037549) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.346703189199765, LR: 0.0003 +[2026-03-02 19:35:37] (step=0037550) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.346898845627079, LR: 0.0003 +[2026-03-02 19:35:45] (step=0037551) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.347094502054392, LR: 0.0003 +[2026-03-02 19:35:53] (step=0037552) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.347290158481706, LR: 0.0003 +[2026-03-02 19:36:00] (step=0037553) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.34748581490902, LR: 0.0003 +[2026-03-02 19:36:08] (step=0037554) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.347681471336333, LR: 0.0003 +[2026-03-02 19:36:16] (step=0037555) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.347877127763647, LR: 0.0003 +[2026-03-02 19:36:24] (step=0037556) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.34807278419096, LR: 0.0003 +[2026-03-02 19:36:32] (step=0037557) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.348268440618274, LR: 0.0003 +[2026-03-02 19:36:40] (step=0037558) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.348464097045588, LR: 0.0003 +[2026-03-02 19:36:48] (step=0037559) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 7.3486597534729015, LR: 0.0003 +[2026-03-02 19:36:56] (step=0037560) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.3488554099002155, LR: 0.0003 +[2026-03-02 19:37:03] (step=0037561) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.349051066327529, LR: 0.0003 +[2026-03-02 19:37:11] (step=0037562) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.349246722754843, LR: 0.0003 +[2026-03-02 19:37:19] (step=0037563) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.349442379182156, LR: 0.0003 +[2026-03-02 19:37:27] (step=0037564) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.34963803560947, LR: 0.0003 +[2026-03-02 19:37:35] (step=0037565) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.349833692036784, LR: 0.0003 +[2026-03-02 19:37:43] (step=0037566) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.350029348464097, LR: 0.0003 +[2026-03-02 19:37:51] (step=0037567) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.350225004891411, LR: 0.0003 +[2026-03-02 19:37:58] (step=0037568) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.350420661318724, LR: 0.0003 +[2026-03-02 19:38:06] (step=0037569) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.350616317746038, LR: 0.0003 +[2026-03-02 19:38:14] (step=0037570) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.350811974173352, LR: 0.0003 +[2026-03-02 19:38:22] (step=0037571) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.351007630600665, LR: 0.0003 +[2026-03-02 19:38:30] (step=0037572) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 7.351203287027979, LR: 0.0003 +[2026-03-02 19:38:38] (step=0037573) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.351398943455292, LR: 0.0003 +[2026-03-02 19:38:45] (step=0037574) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.351594599882606, LR: 0.0003 +[2026-03-02 19:38:53] (step=0037575) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.35179025630992, LR: 0.0003 +[2026-03-02 19:39:01] (step=0037576) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.351985912737233, LR: 0.0003 +[2026-03-02 19:39:09] (step=0037577) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.352181569164547, LR: 0.0003 +[2026-03-02 19:39:17] (step=0037578) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 7.3523772255918605, LR: 0.0003 +[2026-03-02 19:39:25] (step=0037579) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.3525728820191745, LR: 0.0003 +[2026-03-02 19:39:32] (step=0037580) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.352768538446488, LR: 0.0003 +[2026-03-02 19:39:40] (step=0037581) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 7.352964194873802, LR: 0.0003 +[2026-03-02 19:39:48] (step=0037582) Train Loss: 0.4260, Train Steps/Sec: 0.13, Epoch: 7.353159851301116, LR: 0.0003 +[2026-03-02 19:39:56] (step=0037583) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.353355507728429, LR: 0.0003 +[2026-03-02 19:40:04] (step=0037584) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.353551164155743, LR: 0.0003 +[2026-03-02 19:40:12] (step=0037585) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.353746820583056, LR: 0.0003 +[2026-03-02 19:40:20] (step=0037586) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.35394247701037, LR: 0.0003 +[2026-03-02 19:40:27] (step=0037587) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.354138133437684, LR: 0.0003 +[2026-03-02 19:40:35] (step=0037588) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.354333789864997, LR: 0.0003 +[2026-03-02 19:40:43] (step=0037589) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.354529446292311, LR: 0.0003 +[2026-03-02 19:40:51] (step=0037590) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.354725102719624, LR: 0.0003 +[2026-03-02 19:40:59] (step=0037591) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.354920759146938, LR: 0.0003 +[2026-03-02 19:41:07] (step=0037592) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.355116415574251, LR: 0.0003 +[2026-03-02 19:41:14] (step=0037593) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.355312072001565, LR: 0.0003 +[2026-03-02 19:41:22] (step=0037594) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.355507728428879, LR: 0.0003 +[2026-03-02 19:41:30] (step=0037595) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 7.355703384856192, LR: 0.0003 +[2026-03-02 19:41:38] (step=0037596) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.355899041283506, LR: 0.0003 +[2026-03-02 19:41:46] (step=0037597) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.3560946977108195, LR: 0.0003 +[2026-03-02 19:41:54] (step=0037598) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 7.3562903541381335, LR: 0.0003 +[2026-03-02 19:42:02] (step=0037599) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 7.3564860105654475, LR: 0.0003 +[2026-03-02 19:42:09] (step=0037600) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.356681666992761, LR: 0.0003 +[2026-03-02 19:42:17] (step=0037601) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.356877323420075, LR: 0.0003 +[2026-03-02 19:42:25] (step=0037602) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.357072979847388, LR: 0.0003 +[2026-03-02 19:42:33] (step=0037603) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.357268636274702, LR: 0.0003 +[2026-03-02 19:42:41] (step=0037604) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 7.357464292702015, LR: 0.0003 +[2026-03-02 19:42:49] (step=0037605) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 7.357659949129329, LR: 0.0003 +[2026-03-02 19:42:57] (step=0037606) Train Loss: 0.4384, Train Steps/Sec: 0.12, Epoch: 7.357855605556643, LR: 0.0003 +[2026-03-02 19:43:04] (step=0037607) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 7.358051261983956, LR: 0.0003 +[2026-03-02 19:43:12] (step=0037608) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.35824691841127, LR: 0.0003 +[2026-03-02 19:43:20] (step=0037609) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.358442574838583, LR: 0.0003 +[2026-03-02 19:43:28] (step=0037610) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.358638231265897, LR: 0.0003 +[2026-03-02 19:43:36] (step=0037611) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.358833887693211, LR: 0.0003 +[2026-03-02 19:43:44] (step=0037612) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.359029544120524, LR: 0.0003 +[2026-03-02 19:43:52] (step=0037613) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.359225200547838, LR: 0.0003 +[2026-03-02 19:43:59] (step=0037614) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.359420856975151, LR: 0.0003 +[2026-03-02 19:44:07] (step=0037615) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.359616513402465, LR: 0.0003 +[2026-03-02 19:44:15] (step=0037616) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.3598121698297785, LR: 0.0003 +[2026-03-02 19:44:23] (step=0037617) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.3600078262570925, LR: 0.0003 +[2026-03-02 19:44:31] (step=0037618) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.3602034826844065, LR: 0.0003 +[2026-03-02 19:44:39] (step=0037619) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.36039913911172, LR: 0.0003 +[2026-03-02 19:44:47] (step=0037620) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.360594795539034, LR: 0.0003 +[2026-03-02 19:44:54] (step=0037621) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.360790451966347, LR: 0.0003 +[2026-03-02 19:45:02] (step=0037622) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.360986108393661, LR: 0.0003 +[2026-03-02 19:45:10] (step=0037623) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.361181764820975, LR: 0.0003 +[2026-03-02 19:45:18] (step=0037624) Train Loss: 0.4268, Train Steps/Sec: 0.13, Epoch: 7.361377421248288, LR: 0.0003 +[2026-03-02 19:45:26] (step=0037625) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 7.361573077675602, LR: 0.0003 +[2026-03-02 19:45:34] (step=0037626) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 7.361768734102915, LR: 0.0003 +[2026-03-02 19:45:41] (step=0037627) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.361964390530229, LR: 0.0003 +[2026-03-02 19:45:49] (step=0037628) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.362160046957542, LR: 0.0003 +[2026-03-02 19:45:57] (step=0037629) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.362355703384856, LR: 0.0003 +[2026-03-02 19:46:05] (step=0037630) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.36255135981217, LR: 0.0003 +[2026-03-02 19:46:13] (step=0037631) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.362747016239483, LR: 0.0003 +[2026-03-02 19:46:21] (step=0037632) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.362942672666797, LR: 0.0003 +[2026-03-02 19:46:29] (step=0037633) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 7.36313832909411, LR: 0.0003 +[2026-03-02 19:46:36] (step=0037634) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.363333985521424, LR: 0.0003 +[2026-03-02 19:46:44] (step=0037635) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 7.363529641948738, LR: 0.0003 +[2026-03-02 19:46:52] (step=0037636) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.3637252983760515, LR: 0.0003 +[2026-03-02 19:47:00] (step=0037637) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 7.3639209548033655, LR: 0.0003 +[2026-03-02 19:47:08] (step=0037638) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.364116611230679, LR: 0.0003 +[2026-03-02 19:47:16] (step=0037639) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.364312267657993, LR: 0.0003 +[2026-03-02 19:47:23] (step=0037640) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.364507924085307, LR: 0.0003 +[2026-03-02 19:47:31] (step=0037641) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.36470358051262, LR: 0.0003 +[2026-03-02 19:47:39] (step=0037642) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.364899236939934, LR: 0.0003 +[2026-03-02 19:47:47] (step=0037643) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.365094893367247, LR: 0.0003 +[2026-03-02 19:47:55] (step=0037644) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.365290549794561, LR: 0.0003 +[2026-03-02 19:48:03] (step=0037645) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 7.365486206221874, LR: 0.0003 +[2026-03-02 19:48:11] (step=0037646) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.365681862649188, LR: 0.0003 +[2026-03-02 19:48:18] (step=0037647) Train Loss: 0.4249, Train Steps/Sec: 0.13, Epoch: 7.365877519076502, LR: 0.0003 +[2026-03-02 19:48:26] (step=0037648) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.366073175503815, LR: 0.0003 +[2026-03-02 19:48:34] (step=0037649) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.366268831931129, LR: 0.0003 +[2026-03-02 19:48:42] (step=0037650) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.366464488358442, LR: 0.0003 +[2026-03-02 19:48:50] (step=0037651) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.366660144785756, LR: 0.0003 +[2026-03-02 19:48:58] (step=0037652) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.36685580121307, LR: 0.0003 +[2026-03-02 19:49:05] (step=0037653) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.367051457640383, LR: 0.0003 +[2026-03-02 19:49:13] (step=0037654) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.367247114067697, LR: 0.0003 +[2026-03-02 19:49:21] (step=0037655) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.3674427704950105, LR: 0.0003 +[2026-03-02 19:49:29] (step=0037656) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.3676384269223245, LR: 0.0003 +[2026-03-02 19:49:37] (step=0037657) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.367834083349638, LR: 0.0003 +[2026-03-02 19:49:45] (step=0037658) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 7.368029739776952, LR: 0.0003 +[2026-03-02 19:49:53] (step=0037659) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.368225396204266, LR: 0.0003 +[2026-03-02 19:50:01] (step=0037660) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 7.368421052631579, LR: 0.0003 +[2026-03-02 19:50:08] (step=0037661) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.368616709058893, LR: 0.0003 +[2026-03-02 19:50:16] (step=0037662) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 7.368812365486206, LR: 0.0003 +[2026-03-02 19:50:24] (step=0037663) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.36900802191352, LR: 0.0003 +[2026-03-02 19:50:32] (step=0037664) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 7.369203678340834, LR: 0.0003 +[2026-03-02 19:50:40] (step=0037665) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 7.369399334768147, LR: 0.0003 +[2026-03-02 19:50:48] (step=0037666) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.369594991195461, LR: 0.0003 +[2026-03-02 19:50:55] (step=0037667) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.369790647622774, LR: 0.0003 +[2026-03-02 19:51:03] (step=0037668) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.369986304050088, LR: 0.0003 +[2026-03-02 19:51:11] (step=0037669) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 7.370181960477401, LR: 0.0003 +[2026-03-02 19:51:19] (step=0037670) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.370377616904715, LR: 0.0003 +[2026-03-02 19:51:27] (step=0037671) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.370573273332029, LR: 0.0003 +[2026-03-02 19:51:35] (step=0037672) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 7.370768929759342, LR: 0.0003 +[2026-03-02 19:51:43] (step=0037673) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.370964586186656, LR: 0.0003 +[2026-03-02 19:51:50] (step=0037674) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.3711602426139695, LR: 0.0003 +[2026-03-02 19:51:58] (step=0037675) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.3713558990412835, LR: 0.0003 +[2026-03-02 19:52:06] (step=0037676) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 7.3715515554685975, LR: 0.0003 +[2026-03-02 19:52:14] (step=0037677) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 7.371747211895911, LR: 0.0003 +[2026-03-02 19:52:22] (step=0037678) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 7.371942868323225, LR: 0.0003 +[2026-03-02 19:52:30] (step=0037679) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 7.372138524750538, LR: 0.0003 +[2026-03-02 19:52:37] (step=0037680) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.372334181177852, LR: 0.0003 +[2026-03-02 19:52:45] (step=0037681) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.372529837605165, LR: 0.0003 +[2026-03-02 19:52:53] (step=0037682) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.372725494032479, LR: 0.0003 +[2026-03-02 19:53:01] (step=0037683) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.372921150459793, LR: 0.0003 +[2026-03-02 19:53:09] (step=0037684) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.373116806887106, LR: 0.0003 +[2026-03-02 19:53:17] (step=0037685) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.37331246331442, LR: 0.0003 +[2026-03-02 19:53:25] (step=0037686) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 7.373508119741733, LR: 0.0003 +[2026-03-02 19:53:32] (step=0037687) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 7.373703776169047, LR: 0.0003 +[2026-03-02 19:53:40] (step=0037688) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.373899432596361, LR: 0.0003 +[2026-03-02 19:53:48] (step=0037689) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.374095089023674, LR: 0.0003 +[2026-03-02 19:53:56] (step=0037690) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.374290745450988, LR: 0.0003 +[2026-03-02 19:54:04] (step=0037691) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.374486401878301, LR: 0.0003 +[2026-03-02 19:54:12] (step=0037692) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.374682058305615, LR: 0.0003 +[2026-03-02 19:54:19] (step=0037693) Train Loss: 0.4689, Train Steps/Sec: 0.13, Epoch: 7.374877714732929, LR: 0.0003 +[2026-03-02 19:54:27] (step=0037694) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.3750733711602425, LR: 0.0003 +[2026-03-02 19:54:35] (step=0037695) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.3752690275875565, LR: 0.0003 +[2026-03-02 19:54:43] (step=0037696) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 7.37546468401487, LR: 0.0003 +[2026-03-02 19:54:51] (step=0037697) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.375660340442184, LR: 0.0003 +[2026-03-02 19:54:59] (step=0037698) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 7.375855996869497, LR: 0.0003 +[2026-03-02 19:55:07] (step=0037699) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.376051653296811, LR: 0.0003 +[2026-03-02 19:55:14] (step=0037700) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.376247309724125, LR: 0.0003 +[2026-03-02 19:55:22] (step=0037701) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.376442966151438, LR: 0.0003 +[2026-03-02 19:55:30] (step=0037702) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 7.376638622578752, LR: 0.0003 +[2026-03-02 19:55:38] (step=0037703) Train Loss: 0.4446, Train Steps/Sec: 0.12, Epoch: 7.376834279006065, LR: 0.0003 +[2026-03-02 19:55:46] (step=0037704) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 7.377029935433379, LR: 0.0003 +[2026-03-02 19:55:54] (step=0037705) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.377225591860693, LR: 0.0003 +[2026-03-02 19:56:02] (step=0037706) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 7.377421248288006, LR: 0.0003 +[2026-03-02 19:56:09] (step=0037707) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 7.37761690471532, LR: 0.0003 +[2026-03-02 19:56:17] (step=0037708) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.377812561142633, LR: 0.0003 +[2026-03-02 19:56:25] (step=0037709) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.378008217569947, LR: 0.0003 +[2026-03-02 19:56:33] (step=0037710) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 7.37820387399726, LR: 0.0003 +[2026-03-02 19:56:41] (step=0037711) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.378399530424574, LR: 0.0003 +[2026-03-02 19:56:49] (step=0037712) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.378595186851888, LR: 0.0003 +[2026-03-02 19:56:57] (step=0037713) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.3787908432792015, LR: 0.0003 +[2026-03-02 19:57:04] (step=0037714) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.3789864997065155, LR: 0.0003 +[2026-03-02 19:57:12] (step=0037715) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.379182156133829, LR: 0.0003 +[2026-03-02 19:57:20] (step=0037716) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.379377812561143, LR: 0.0003 +[2026-03-02 19:57:28] (step=0037717) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 7.379573468988457, LR: 0.0003 +[2026-03-02 19:57:36] (step=0037718) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.37976912541577, LR: 0.0003 +[2026-03-02 19:57:44] (step=0037719) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.379964781843084, LR: 0.0003 +[2026-03-02 19:57:51] (step=0037720) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.380160438270397, LR: 0.0003 +[2026-03-02 19:57:59] (step=0037721) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 7.380356094697711, LR: 0.0003 +[2026-03-02 19:58:07] (step=0037722) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 7.380551751125024, LR: 0.0003 +[2026-03-02 19:58:15] (step=0037723) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.380747407552338, LR: 0.0003 +[2026-03-02 19:58:23] (step=0037724) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 7.380943063979652, LR: 0.0003 +[2026-03-02 19:58:31] (step=0037725) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 7.381138720406965, LR: 0.0003 +[2026-03-02 19:58:39] (step=0037726) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.381334376834279, LR: 0.0003 +[2026-03-02 19:58:46] (step=0037727) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 7.381530033261592, LR: 0.0003 +[2026-03-02 19:58:54] (step=0037728) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.381725689688906, LR: 0.0003 +[2026-03-02 19:59:02] (step=0037729) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 7.38192134611622, LR: 0.0003 +[2026-03-02 19:59:10] (step=0037730) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.382117002543533, LR: 0.0003 +[2026-03-02 19:59:18] (step=0037731) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.382312658970847, LR: 0.0003 +[2026-03-02 19:59:26] (step=0037732) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 7.3825083153981605, LR: 0.0003 +[2026-03-02 19:59:33] (step=0037733) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.3827039718254746, LR: 0.0003 +[2026-03-02 19:59:41] (step=0037734) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.382899628252788, LR: 0.0003 +[2026-03-02 19:59:49] (step=0037735) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 7.383095284680102, LR: 0.0003 +[2026-03-02 19:59:57] (step=0037736) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.383290941107416, LR: 0.0003 +[2026-03-02 20:00:05] (step=0037737) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.383486597534729, LR: 0.0003 +[2026-03-02 20:00:13] (step=0037738) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 7.383682253962043, LR: 0.0003 +[2026-03-02 20:00:21] (step=0037739) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.383877910389356, LR: 0.0003 +[2026-03-02 20:00:28] (step=0037740) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 7.38407356681667, LR: 0.0003 +[2026-03-02 20:00:36] (step=0037741) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.384269223243984, LR: 0.0003 +[2026-03-02 20:00:44] (step=0037742) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 7.384464879671297, LR: 0.0003 +[2026-03-02 20:00:52] (step=0037743) Train Loss: 0.4235, Train Steps/Sec: 0.13, Epoch: 7.384660536098611, LR: 0.0003 +[2026-03-02 20:01:00] (step=0037744) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 7.384856192525924, LR: 0.0003 +[2026-03-02 20:01:08] (step=0037745) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.385051848953238, LR: 0.0003 +[2026-03-02 20:01:15] (step=0037746) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.385247505380552, LR: 0.0003 +[2026-03-02 20:01:23] (step=0037747) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.385443161807865, LR: 0.0003 +[2026-03-02 20:01:31] (step=0037748) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.385638818235179, LR: 0.0003 +[2026-03-02 20:01:39] (step=0037749) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.385834474662492, LR: 0.0003 +[2026-03-02 20:01:47] (step=0037750) Train Loss: 0.4497, Train Steps/Sec: 0.12, Epoch: 7.386030131089806, LR: 0.0003 +[2026-03-02 20:01:55] (step=0037751) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.38622578751712, LR: 0.0003 +[2026-03-02 20:02:03] (step=0037752) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 7.386421443944434, LR: 0.0003 +[2026-03-02 20:02:11] (step=0037753) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.386617100371748, LR: 0.0003 +[2026-03-02 20:02:18] (step=0037754) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.386812756799061, LR: 0.0003 +[2026-03-02 20:02:26] (step=0037755) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.387008413226375, LR: 0.0003 +[2026-03-02 20:02:34] (step=0037756) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 7.387204069653688, LR: 0.0003 +[2026-03-02 20:02:42] (step=0037757) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.387399726081002, LR: 0.0003 +[2026-03-02 20:02:50] (step=0037758) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 7.387595382508316, LR: 0.0003 +[2026-03-02 20:02:58] (step=0037759) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 7.387791038935629, LR: 0.0003 +[2026-03-02 20:03:05] (step=0037760) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.387986695362943, LR: 0.0003 +[2026-03-02 20:03:13] (step=0037761) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 7.388182351790256, LR: 0.0003 +[2026-03-02 20:03:21] (step=0037762) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.38837800821757, LR: 0.0003 +[2026-03-02 20:03:29] (step=0037763) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.388573664644883, LR: 0.0003 +[2026-03-02 20:03:37] (step=0037764) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.388769321072197, LR: 0.0003 +[2026-03-02 20:03:45] (step=0037765) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.388964977499511, LR: 0.0003 +[2026-03-02 20:03:53] (step=0037766) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.389160633926824, LR: 0.0003 +[2026-03-02 20:04:00] (step=0037767) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.389356290354138, LR: 0.0003 +[2026-03-02 20:04:08] (step=0037768) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 7.389551946781451, LR: 0.0003 +[2026-03-02 20:04:16] (step=0037769) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.3897476032087654, LR: 0.0003 +[2026-03-02 20:04:24] (step=0037770) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.3899432596360795, LR: 0.0003 +[2026-03-02 20:04:32] (step=0037771) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.390138916063393, LR: 0.0003 +[2026-03-02 20:04:40] (step=0037772) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 7.390334572490707, LR: 0.0003 +[2026-03-02 20:04:47] (step=0037773) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.39053022891802, LR: 0.0003 +[2026-03-02 20:04:55] (step=0037774) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.390725885345334, LR: 0.0003 +[2026-03-02 20:05:03] (step=0037775) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 7.390921541772647, LR: 0.0003 +[2026-03-02 20:05:11] (step=0037776) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.391117198199961, LR: 0.0003 +[2026-03-02 20:05:19] (step=0037777) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.391312854627275, LR: 0.0003 +[2026-03-02 20:05:27] (step=0037778) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.391508511054588, LR: 0.0003 +[2026-03-02 20:05:35] (step=0037779) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 7.391704167481902, LR: 0.0003 +[2026-03-02 20:05:42] (step=0037780) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.391899823909215, LR: 0.0003 +[2026-03-02 20:05:50] (step=0037781) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.392095480336529, LR: 0.0003 +[2026-03-02 20:05:58] (step=0037782) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.392291136763843, LR: 0.0003 +[2026-03-02 20:06:06] (step=0037783) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.392486793191156, LR: 0.0003 +[2026-03-02 20:06:14] (step=0037784) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.39268244961847, LR: 0.0003 +[2026-03-02 20:06:22] (step=0037785) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 7.392878106045783, LR: 0.0003 +[2026-03-02 20:06:29] (step=0037786) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.393073762473097, LR: 0.0003 +[2026-03-02 20:06:37] (step=0037787) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.3932694189004105, LR: 0.0003 +[2026-03-02 20:06:45] (step=0037788) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 7.3934650753277245, LR: 0.0003 +[2026-03-02 20:06:53] (step=0037789) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 7.3936607317550385, LR: 0.0003 +[2026-03-02 20:07:01] (step=0037790) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.393856388182352, LR: 0.0003 +[2026-03-02 20:07:09] (step=0037791) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.394052044609666, LR: 0.0003 +[2026-03-02 20:07:17] (step=0037792) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.394247701036979, LR: 0.0003 +[2026-03-02 20:07:24] (step=0037793) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.394443357464293, LR: 0.0003 +[2026-03-02 20:07:32] (step=0037794) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.394639013891607, LR: 0.0003 +[2026-03-02 20:07:40] (step=0037795) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.39483467031892, LR: 0.0003 +[2026-03-02 20:07:48] (step=0037796) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 7.395030326746234, LR: 0.0003 +[2026-03-02 20:07:56] (step=0037797) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.395225983173547, LR: 0.0003 +[2026-03-02 20:08:04] (step=0037798) Train Loss: 0.4471, Train Steps/Sec: 0.12, Epoch: 7.395421639600861, LR: 0.0003 +[2026-03-02 20:08:12] (step=0037799) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.395617296028175, LR: 0.0003 +[2026-03-02 20:08:19] (step=0037800) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 7.395812952455488, LR: 0.0003 +[2026-03-02 20:08:27] (step=0037801) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.396008608882802, LR: 0.0003 +[2026-03-02 20:08:35] (step=0037802) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.396204265310115, LR: 0.0003 +[2026-03-02 20:08:43] (step=0037803) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 7.396399921737429, LR: 0.0003 +[2026-03-02 20:08:51] (step=0037804) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.396595578164742, LR: 0.0003 +[2026-03-02 20:08:59] (step=0037805) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.396791234592056, LR: 0.0003 +[2026-03-02 20:09:07] (step=0037806) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.39698689101937, LR: 0.0003 +[2026-03-02 20:09:14] (step=0037807) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.3971825474466835, LR: 0.0003 +[2026-03-02 20:09:22] (step=0037808) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 7.3973782038739975, LR: 0.0003 +[2026-03-02 20:09:30] (step=0037809) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.397573860301311, LR: 0.0003 +[2026-03-02 20:09:38] (step=0037810) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.397769516728625, LR: 0.0003 +[2026-03-02 20:09:46] (step=0037811) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 7.397965173155939, LR: 0.0003 +[2026-03-02 20:09:54] (step=0037812) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.398160829583252, LR: 0.0003 +[2026-03-02 20:10:02] (step=0037813) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.398356486010566, LR: 0.0003 +[2026-03-02 20:10:09] (step=0037814) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.398552142437879, LR: 0.0003 +[2026-03-02 20:10:17] (step=0037815) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.398747798865193, LR: 0.0003 +[2026-03-02 20:10:25] (step=0037816) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.398943455292506, LR: 0.0003 +[2026-03-02 20:10:33] (step=0037817) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.39913911171982, LR: 0.0003 +[2026-03-02 20:10:41] (step=0037818) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.399334768147134, LR: 0.0003 +[2026-03-02 20:10:49] (step=0037819) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.399530424574447, LR: 0.0003 +[2026-03-02 20:10:57] (step=0037820) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.399726081001761, LR: 0.0003 +[2026-03-02 20:11:04] (step=0037821) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.399921737429074, LR: 0.0003 +[2026-03-02 20:11:12] (step=0037822) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 7.400117393856388, LR: 0.0003 +[2026-03-02 20:11:20] (step=0037823) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 7.400313050283702, LR: 0.0003 +[2026-03-02 20:11:28] (step=0037824) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 7.400508706711015, LR: 0.0003 +[2026-03-02 20:11:36] (step=0037825) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.400704363138329, LR: 0.0003 +[2026-03-02 20:11:44] (step=0037826) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 7.4009000195656425, LR: 0.0003 +[2026-03-02 20:11:51] (step=0037827) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.4010956759929565, LR: 0.0003 +[2026-03-02 20:11:59] (step=0037828) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.40129133242027, LR: 0.0003 +[2026-03-02 20:12:07] (step=0037829) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.401486988847584, LR: 0.0003 +[2026-03-02 20:12:15] (step=0037830) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.401682645274898, LR: 0.0003 +[2026-03-02 20:12:23] (step=0037831) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.401878301702211, LR: 0.0003 +[2026-03-02 20:12:31] (step=0037832) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.402073958129525, LR: 0.0003 +[2026-03-02 20:12:38] (step=0037833) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.402269614556838, LR: 0.0003 +[2026-03-02 20:12:46] (step=0037834) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.402465270984152, LR: 0.0003 +[2026-03-02 20:12:54] (step=0037835) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 7.402660927411466, LR: 0.0003 +[2026-03-02 20:13:02] (step=0037836) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.402856583838779, LR: 0.0003 +[2026-03-02 20:13:10] (step=0037837) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.403052240266093, LR: 0.0003 +[2026-03-02 20:13:18] (step=0037838) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 7.403247896693406, LR: 0.0003 +[2026-03-02 20:13:26] (step=0037839) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.40344355312072, LR: 0.0003 +[2026-03-02 20:13:33] (step=0037840) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.403639209548033, LR: 0.0003 +[2026-03-02 20:13:41] (step=0037841) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.403834865975347, LR: 0.0003 +[2026-03-02 20:13:49] (step=0037842) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.404030522402661, LR: 0.0003 +[2026-03-02 20:13:57] (step=0037843) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.404226178829974, LR: 0.0003 +[2026-03-02 20:14:05] (step=0037844) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.404421835257288, LR: 0.0003 +[2026-03-02 20:14:13] (step=0037845) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.4046174916846015, LR: 0.0003 +[2026-03-02 20:14:20] (step=0037846) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.4048131481119155, LR: 0.0003 +[2026-03-02 20:14:28] (step=0037847) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.4050088045392295, LR: 0.0003 +[2026-03-02 20:14:36] (step=0037848) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 7.405204460966543, LR: 0.0003 +[2026-03-02 20:14:44] (step=0037849) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.405400117393857, LR: 0.0003 +[2026-03-02 20:14:52] (step=0037850) Train Loss: 0.4293, Train Steps/Sec: 0.12, Epoch: 7.40559577382117, LR: 0.0003 +[2026-03-02 20:15:00] (step=0037851) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.405791430248484, LR: 0.0003 +[2026-03-02 20:15:08] (step=0037852) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.405987086675797, LR: 0.0003 +[2026-03-02 20:15:15] (step=0037853) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 7.406182743103111, LR: 0.0003 +[2026-03-02 20:15:23] (step=0037854) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 7.406378399530425, LR: 0.0003 +[2026-03-02 20:15:31] (step=0037855) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.406574055957738, LR: 0.0003 +[2026-03-02 20:15:39] (step=0037856) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.406769712385052, LR: 0.0003 +[2026-03-02 20:15:47] (step=0037857) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.406965368812365, LR: 0.0003 +[2026-03-02 20:15:55] (step=0037858) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 7.407161025239679, LR: 0.0003 +[2026-03-02 20:16:03] (step=0037859) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 7.407356681666993, LR: 0.0003 +[2026-03-02 20:16:10] (step=0037860) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.407552338094306, LR: 0.0003 +[2026-03-02 20:16:18] (step=0037861) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.40774799452162, LR: 0.0003 +[2026-03-02 20:16:26] (step=0037862) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 7.407943650948933, LR: 0.0003 +[2026-03-02 20:16:34] (step=0037863) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.408139307376247, LR: 0.0003 +[2026-03-02 20:16:42] (step=0037864) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.408334963803561, LR: 0.0003 +[2026-03-02 20:16:50] (step=0037865) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.4085306202308745, LR: 0.0003 +[2026-03-02 20:16:58] (step=0037866) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.4087262766581885, LR: 0.0003 +[2026-03-02 20:17:05] (step=0037867) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.408921933085502, LR: 0.0003 +[2026-03-02 20:17:13] (step=0037868) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.409117589512816, LR: 0.0003 +[2026-03-02 20:17:21] (step=0037869) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.409313245940129, LR: 0.0003 +[2026-03-02 20:17:29] (step=0037870) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 7.409508902367443, LR: 0.0003 +[2026-03-02 20:17:37] (step=0037871) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.409704558794757, LR: 0.0003 +[2026-03-02 20:17:45] (step=0037872) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.40990021522207, LR: 0.0003 +[2026-03-02 20:17:53] (step=0037873) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.410095871649384, LR: 0.0003 +[2026-03-02 20:18:00] (step=0037874) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.410291528076697, LR: 0.0003 +[2026-03-02 20:18:08] (step=0037875) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.410487184504011, LR: 0.0003 +[2026-03-02 20:18:16] (step=0037876) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.410682840931325, LR: 0.0003 +[2026-03-02 20:18:24] (step=0037877) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.410878497358638, LR: 0.0003 +[2026-03-02 20:18:32] (step=0037878) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.411074153785952, LR: 0.0003 +[2026-03-02 20:18:40] (step=0037879) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 7.411269810213265, LR: 0.0003 +[2026-03-02 20:18:47] (step=0037880) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.411465466640579, LR: 0.0003 +[2026-03-02 20:18:55] (step=0037881) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 7.411661123067892, LR: 0.0003 +[2026-03-02 20:19:03] (step=0037882) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.411856779495206, LR: 0.0003 +[2026-03-02 20:19:11] (step=0037883) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.41205243592252, LR: 0.0003 +[2026-03-02 20:19:19] (step=0037884) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 7.4122480923498335, LR: 0.0003 +[2026-03-02 20:19:27] (step=0037885) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 7.4124437487771475, LR: 0.0003 +[2026-03-02 20:19:34] (step=0037886) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.412639405204461, LR: 0.0003 +[2026-03-02 20:19:42] (step=0037887) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 7.412835061631775, LR: 0.0003 +[2026-03-02 20:19:50] (step=0037888) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.413030718059089, LR: 0.0003 +[2026-03-02 20:19:58] (step=0037889) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.413226374486402, LR: 0.0003 +[2026-03-02 20:20:06] (step=0037890) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.413422030913716, LR: 0.0003 +[2026-03-02 20:20:14] (step=0037891) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 7.413617687341029, LR: 0.0003 +[2026-03-02 20:20:22] (step=0037892) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.413813343768343, LR: 0.0003 +[2026-03-02 20:20:29] (step=0037893) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.414009000195656, LR: 0.0003 +[2026-03-02 20:20:37] (step=0037894) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 7.41420465662297, LR: 0.0003 +[2026-03-02 20:20:45] (step=0037895) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.414400313050284, LR: 0.0003 +[2026-03-02 20:20:53] (step=0037896) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 7.414595969477597, LR: 0.0003 +[2026-03-02 20:21:01] (step=0037897) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.414791625904911, LR: 0.0003 +[2026-03-02 20:21:09] (step=0037898) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 7.414987282332224, LR: 0.0003 +[2026-03-02 20:21:16] (step=0037899) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 7.415182938759538, LR: 0.0003 +[2026-03-02 20:21:24] (step=0037900) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.415378595186852, LR: 0.0003 +[2026-03-02 20:21:32] (step=0037901) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.415574251614165, LR: 0.0003 +[2026-03-02 20:21:40] (step=0037902) Train Loss: 0.4436, Train Steps/Sec: 0.12, Epoch: 7.415769908041479, LR: 0.0003 +[2026-03-02 20:21:48] (step=0037903) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.4159655644687925, LR: 0.0003 +[2026-03-02 20:21:56] (step=0037904) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.4161612208961065, LR: 0.0003 +[2026-03-02 20:22:04] (step=0037905) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 7.41635687732342, LR: 0.0003 +[2026-03-02 20:22:11] (step=0037906) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 7.416552533750734, LR: 0.0003 +[2026-03-02 20:22:19] (step=0037907) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.416748190178048, LR: 0.0003 +[2026-03-02 20:22:27] (step=0037908) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.416943846605361, LR: 0.0003 +[2026-03-02 20:22:35] (step=0037909) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 7.417139503032675, LR: 0.0003 +[2026-03-02 20:22:43] (step=0037910) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.417335159459988, LR: 0.0003 +[2026-03-02 20:22:51] (step=0037911) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.417530815887302, LR: 0.0003 +[2026-03-02 20:22:59] (step=0037912) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.417726472314616, LR: 0.0003 +[2026-03-02 20:23:06] (step=0037913) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.417922128741929, LR: 0.0003 +[2026-03-02 20:23:14] (step=0037914) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.418117785169243, LR: 0.0003 +[2026-03-02 20:23:22] (step=0037915) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.418313441596556, LR: 0.0003 +[2026-03-02 20:23:30] (step=0037916) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.41850909802387, LR: 0.0003 +[2026-03-02 20:23:38] (step=0037917) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.418704754451184, LR: 0.0003 +[2026-03-02 20:23:46] (step=0037918) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.418900410878497, LR: 0.0003 +[2026-03-02 20:23:54] (step=0037919) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.419096067305811, LR: 0.0003 +[2026-03-02 20:24:01] (step=0037920) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 7.419291723733124, LR: 0.0003 +[2026-03-02 20:24:09] (step=0037921) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.419487380160438, LR: 0.0003 +[2026-03-02 20:24:17] (step=0037922) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 7.4196830365877515, LR: 0.0003 +[2026-03-02 20:24:25] (step=0037923) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.4198786930150655, LR: 0.0003 +[2026-03-02 20:24:33] (step=0037924) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.4200743494423795, LR: 0.0003 +[2026-03-02 20:24:41] (step=0037925) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.420270005869693, LR: 0.0003 +[2026-03-02 20:24:48] (step=0037926) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.420465662297007, LR: 0.0003 +[2026-03-02 20:24:56] (step=0037927) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.42066131872432, LR: 0.0003 +[2026-03-02 20:25:04] (step=0037928) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 7.420856975151634, LR: 0.0003 +[2026-03-02 20:25:12] (step=0037929) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.421052631578948, LR: 0.0003 +[2026-03-02 20:25:20] (step=0037930) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.421248288006261, LR: 0.0003 +[2026-03-02 20:25:28] (step=0037931) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 7.421443944433575, LR: 0.0003 +[2026-03-02 20:25:35] (step=0037932) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 7.421639600860888, LR: 0.0003 +[2026-03-02 20:25:43] (step=0037933) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.421835257288202, LR: 0.0003 +[2026-03-02 20:25:51] (step=0037934) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 7.422030913715515, LR: 0.0003 +[2026-03-02 20:25:59] (step=0037935) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.422226570142829, LR: 0.0003 +[2026-03-02 20:26:07] (step=0037936) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.422422226570143, LR: 0.0003 +[2026-03-02 20:26:15] (step=0037937) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.422617882997456, LR: 0.0003 +[2026-03-02 20:26:23] (step=0037938) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.42281353942477, LR: 0.0003 +[2026-03-02 20:26:30] (step=0037939) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 7.423009195852083, LR: 0.0003 +[2026-03-02 20:26:38] (step=0037940) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 7.423204852279397, LR: 0.0003 +[2026-03-02 20:26:46] (step=0037941) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 7.423400508706711, LR: 0.0003 +[2026-03-02 20:26:54] (step=0037942) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 7.4235961651340245, LR: 0.0003 +[2026-03-02 20:27:02] (step=0037943) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.4237918215613385, LR: 0.0003 +[2026-03-02 20:27:10] (step=0037944) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.423987477988652, LR: 0.0003 +[2026-03-02 20:27:17] (step=0037945) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.424183134415966, LR: 0.0003 +[2026-03-02 20:27:25] (step=0037946) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.424378790843279, LR: 0.0003 +[2026-03-02 20:27:33] (step=0037947) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.424574447270593, LR: 0.0003 +[2026-03-02 20:27:41] (step=0037948) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.424770103697907, LR: 0.0003 +[2026-03-02 20:27:49] (step=0037949) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.42496576012522, LR: 0.0003 +[2026-03-02 20:27:57] (step=0037950) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 7.425161416552534, LR: 0.0003 +[2026-03-02 20:28:05] (step=0037951) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.425357072979847, LR: 0.0003 +[2026-03-02 20:28:12] (step=0037952) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.425552729407161, LR: 0.0003 +[2026-03-02 20:28:20] (step=0037953) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.425748385834475, LR: 0.0003 +[2026-03-02 20:28:28] (step=0037954) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.425944042261788, LR: 0.0003 +[2026-03-02 20:28:36] (step=0037955) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.426139698689102, LR: 0.0003 +[2026-03-02 20:28:44] (step=0037956) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 7.426335355116415, LR: 0.0003 +[2026-03-02 20:28:52] (step=0037957) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.426531011543729, LR: 0.0003 +[2026-03-02 20:29:00] (step=0037958) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.426726667971042, LR: 0.0003 +[2026-03-02 20:29:07] (step=0037959) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.426922324398356, LR: 0.0003 +[2026-03-02 20:29:15] (step=0037960) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 7.42711798082567, LR: 0.0003 +[2026-03-02 20:29:23] (step=0037961) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.4273136372529835, LR: 0.0003 +[2026-03-02 20:29:31] (step=0037962) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.4275092936802976, LR: 0.0003 +[2026-03-02 20:29:39] (step=0037963) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.427704950107611, LR: 0.0003 +[2026-03-02 20:29:47] (step=0037964) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.427900606534925, LR: 0.0003 +[2026-03-02 20:29:55] (step=0037965) Train Loss: 0.4470, Train Steps/Sec: 0.12, Epoch: 7.428096262962239, LR: 0.0003 +[2026-03-02 20:30:03] (step=0037966) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.428291919389552, LR: 0.0003 +[2026-03-02 20:30:10] (step=0037967) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.428487575816866, LR: 0.0003 +[2026-03-02 20:30:18] (step=0037968) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.428683232244179, LR: 0.0003 +[2026-03-02 20:30:26] (step=0037969) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.428878888671493, LR: 0.0003 +[2026-03-02 20:30:34] (step=0037970) Train Loss: 0.4278, Train Steps/Sec: 0.13, Epoch: 7.429074545098807, LR: 0.0003 +[2026-03-02 20:30:42] (step=0037971) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.42927020152612, LR: 0.0003 +[2026-03-02 20:30:50] (step=0037972) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.429465857953434, LR: 0.0003 +[2026-03-02 20:30:57] (step=0037973) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.429661514380747, LR: 0.0003 +[2026-03-02 20:31:05] (step=0037974) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.429857170808061, LR: 0.0003 +[2026-03-02 20:31:13] (step=0037975) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.430052827235374, LR: 0.0003 +[2026-03-02 20:31:21] (step=0037976) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.430248483662688, LR: 0.0003 +[2026-03-02 20:31:29] (step=0037977) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.430444140090002, LR: 0.0003 +[2026-03-02 20:31:37] (step=0037978) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.430639796517315, LR: 0.0003 +[2026-03-02 20:31:44] (step=0037979) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 7.430835452944629, LR: 0.0003 +[2026-03-02 20:31:52] (step=0037980) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.4310311093719426, LR: 0.0003 +[2026-03-02 20:32:00] (step=0037981) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 7.431226765799257, LR: 0.0003 +[2026-03-02 20:32:08] (step=0037982) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.431422422226571, LR: 0.0003 +[2026-03-02 20:32:16] (step=0037983) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.431618078653884, LR: 0.0003 +[2026-03-02 20:32:24] (step=0037984) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.431813735081198, LR: 0.0003 +[2026-03-02 20:32:31] (step=0037985) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.432009391508511, LR: 0.0003 +[2026-03-02 20:32:39] (step=0037986) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 7.432205047935825, LR: 0.0003 +[2026-03-02 20:32:47] (step=0037987) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.432400704363138, LR: 0.0003 +[2026-03-02 20:32:55] (step=0037988) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.432596360790452, LR: 0.0003 +[2026-03-02 20:33:03] (step=0037989) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.432792017217766, LR: 0.0003 +[2026-03-02 20:33:11] (step=0037990) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.432987673645079, LR: 0.0003 +[2026-03-02 20:33:18] (step=0037991) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 7.433183330072393, LR: 0.0003 +[2026-03-02 20:33:26] (step=0037992) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.433378986499706, LR: 0.0003 +[2026-03-02 20:33:34] (step=0037993) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.43357464292702, LR: 0.0003 +[2026-03-02 20:33:42] (step=0037994) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.433770299354334, LR: 0.0003 +[2026-03-02 20:33:50] (step=0037995) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.433965955781647, LR: 0.0003 +[2026-03-02 20:33:58] (step=0037996) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.434161612208961, LR: 0.0003 +[2026-03-02 20:34:06] (step=0037997) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.434357268636274, LR: 0.0003 +[2026-03-02 20:34:13] (step=0037998) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 7.4345529250635884, LR: 0.0003 +[2026-03-02 20:34:21] (step=0037999) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.434748581490902, LR: 0.0003 +[2026-03-02 20:34:29] (step=0038000) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.434944237918216, LR: 0.0003 +[2026-03-02 20:34:29] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0038000/ +[2026-03-02 20:34:37] (step=0038001) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.43513989434553, LR: 0.0003 +[2026-03-02 20:34:45] (step=0038002) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 7.435335550772843, LR: 0.0003 +[2026-03-02 20:34:53] (step=0038003) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.435531207200157, LR: 0.0003 +[2026-03-02 20:35:00] (step=0038004) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 7.43572686362747, LR: 0.0003 +[2026-03-02 20:35:08] (step=0038005) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.435922520054784, LR: 0.0003 +[2026-03-02 20:35:16] (step=0038006) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 7.436118176482098, LR: 0.0003 +[2026-03-02 20:35:24] (step=0038007) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.436313832909411, LR: 0.0003 +[2026-03-02 20:35:32] (step=0038008) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.436509489336725, LR: 0.0003 +[2026-03-02 20:35:40] (step=0038009) Train Loss: 0.4501, Train Steps/Sec: 0.12, Epoch: 7.436705145764038, LR: 0.0003 +[2026-03-02 20:35:48] (step=0038010) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 7.436900802191352, LR: 0.0003 +[2026-03-02 20:35:56] (step=0038011) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.437096458618665, LR: 0.0003 +[2026-03-02 20:36:03] (step=0038012) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 7.437292115045979, LR: 0.0003 +[2026-03-02 20:36:11] (step=0038013) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.437487771473293, LR: 0.0003 +[2026-03-02 20:36:19] (step=0038014) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.437683427900606, LR: 0.0003 +[2026-03-02 20:36:27] (step=0038015) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 7.43787908432792, LR: 0.0003 +[2026-03-02 20:36:35] (step=0038016) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.4380747407552334, LR: 0.0003 +[2026-03-02 20:36:43] (step=0038017) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.4382703971825475, LR: 0.0003 +[2026-03-02 20:36:51] (step=0038018) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 7.4384660536098615, LR: 0.0003 +[2026-03-02 20:36:58] (step=0038019) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 7.438661710037175, LR: 0.0003 +[2026-03-02 20:37:06] (step=0038020) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.438857366464489, LR: 0.0003 +[2026-03-02 20:37:14] (step=0038021) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 7.439053022891802, LR: 0.0003 +[2026-03-02 20:37:22] (step=0038022) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.439248679319116, LR: 0.0003 +[2026-03-02 20:37:30] (step=0038023) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.439444335746429, LR: 0.0003 +[2026-03-02 20:37:38] (step=0038024) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 7.439639992173743, LR: 0.0003 +[2026-03-02 20:37:46] (step=0038025) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 7.439835648601057, LR: 0.0003 +[2026-03-02 20:37:53] (step=0038026) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.44003130502837, LR: 0.0003 +[2026-03-02 20:38:01] (step=0038027) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.440226961455684, LR: 0.0003 +[2026-03-02 20:38:09] (step=0038028) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.440422617882997, LR: 0.0003 +[2026-03-02 20:38:17] (step=0038029) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.440618274310311, LR: 0.0003 +[2026-03-02 20:38:25] (step=0038030) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.440813930737625, LR: 0.0003 +[2026-03-02 20:38:33] (step=0038031) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.441009587164938, LR: 0.0003 +[2026-03-02 20:38:40] (step=0038032) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.441205243592252, LR: 0.0003 +[2026-03-02 20:38:48] (step=0038033) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 7.441400900019565, LR: 0.0003 +[2026-03-02 20:38:56] (step=0038034) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.441596556446879, LR: 0.0003 +[2026-03-02 20:39:04] (step=0038035) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.441792212874193, LR: 0.0003 +[2026-03-02 20:39:12] (step=0038036) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.4419878693015065, LR: 0.0003 +[2026-03-02 20:39:20] (step=0038037) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 7.4421835257288205, LR: 0.0003 +[2026-03-02 20:39:27] (step=0038038) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 7.442379182156134, LR: 0.0003 +[2026-03-02 20:39:35] (step=0038039) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.442574838583448, LR: 0.0003 +[2026-03-02 20:39:43] (step=0038040) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.442770495010761, LR: 0.0003 +[2026-03-02 20:39:51] (step=0038041) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.442966151438075, LR: 0.0003 +[2026-03-02 20:39:59] (step=0038042) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.443161807865389, LR: 0.0003 +[2026-03-02 20:40:07] (step=0038043) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 7.443357464292702, LR: 0.0003 +[2026-03-02 20:40:15] (step=0038044) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 7.443553120720016, LR: 0.0003 +[2026-03-02 20:40:22] (step=0038045) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.443748777147329, LR: 0.0003 +[2026-03-02 20:40:30] (step=0038046) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.443944433574643, LR: 0.0003 +[2026-03-02 20:40:38] (step=0038047) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 7.444140090001957, LR: 0.0003 +[2026-03-02 20:40:46] (step=0038048) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 7.44433574642927, LR: 0.0003 +[2026-03-02 20:40:54] (step=0038049) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.444531402856584, LR: 0.0003 +[2026-03-02 20:41:02] (step=0038050) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.444727059283897, LR: 0.0003 +[2026-03-02 20:41:09] (step=0038051) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.444922715711211, LR: 0.0003 +[2026-03-02 20:41:17] (step=0038052) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.445118372138524, LR: 0.0003 +[2026-03-02 20:41:25] (step=0038053) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.445314028565838, LR: 0.0003 +[2026-03-02 20:41:33] (step=0038054) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.445509684993152, LR: 0.0003 +[2026-03-02 20:41:41] (step=0038055) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.4457053414204655, LR: 0.0003 +[2026-03-02 20:41:49] (step=0038056) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.4459009978477795, LR: 0.0003 +[2026-03-02 20:41:56] (step=0038057) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 7.446096654275093, LR: 0.0003 +[2026-03-02 20:42:05] (step=0038058) Train Loss: 0.4466, Train Steps/Sec: 0.12, Epoch: 7.446292310702407, LR: 0.0003 +[2026-03-02 20:42:13] (step=0038059) Train Loss: 0.4616, Train Steps/Sec: 0.12, Epoch: 7.446487967129721, LR: 0.0003 +[2026-03-02 20:42:20] (step=0038060) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 7.446683623557034, LR: 0.0003 +[2026-03-02 20:42:28] (step=0038061) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.446879279984348, LR: 0.0003 +[2026-03-02 20:42:36] (step=0038062) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.447074936411661, LR: 0.0003 +[2026-03-02 20:42:44] (step=0038063) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.447270592838975, LR: 0.0003 +[2026-03-02 20:42:52] (step=0038064) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 7.447466249266288, LR: 0.0003 +[2026-03-02 20:43:00] (step=0038065) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.447661905693602, LR: 0.0003 +[2026-03-02 20:43:07] (step=0038066) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 7.447857562120916, LR: 0.0003 +[2026-03-02 20:43:15] (step=0038067) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.448053218548229, LR: 0.0003 +[2026-03-02 20:43:23] (step=0038068) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.448248874975543, LR: 0.0003 +[2026-03-02 20:43:31] (step=0038069) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.448444531402856, LR: 0.0003 +[2026-03-02 20:43:39] (step=0038070) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.44864018783017, LR: 0.0003 +[2026-03-02 20:43:47] (step=0038071) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.448835844257484, LR: 0.0003 +[2026-03-02 20:43:54] (step=0038072) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.449031500684797, LR: 0.0003 +[2026-03-02 20:44:02] (step=0038073) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.449227157112111, LR: 0.0003 +[2026-03-02 20:44:10] (step=0038074) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.4494228135394245, LR: 0.0003 +[2026-03-02 20:44:18] (step=0038075) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.4496184699667385, LR: 0.0003 +[2026-03-02 20:44:26] (step=0038076) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.449814126394052, LR: 0.0003 +[2026-03-02 20:44:34] (step=0038077) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.450009782821366, LR: 0.0003 +[2026-03-02 20:44:41] (step=0038078) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 7.45020543924868, LR: 0.0003 +[2026-03-02 20:44:49] (step=0038079) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.450401095675993, LR: 0.0003 +[2026-03-02 20:44:57] (step=0038080) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.450596752103307, LR: 0.0003 +[2026-03-02 20:45:05] (step=0038081) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 7.45079240853062, LR: 0.0003 +[2026-03-02 20:45:13] (step=0038082) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.450988064957934, LR: 0.0003 +[2026-03-02 20:45:21] (step=0038083) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 7.451183721385248, LR: 0.0003 +[2026-03-02 20:45:29] (step=0038084) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.451379377812561, LR: 0.0003 +[2026-03-02 20:45:36] (step=0038085) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.451575034239875, LR: 0.0003 +[2026-03-02 20:45:44] (step=0038086) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.451770690667188, LR: 0.0003 +[2026-03-02 20:45:52] (step=0038087) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.451966347094502, LR: 0.0003 +[2026-03-02 20:46:00] (step=0038088) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.452162003521816, LR: 0.0003 +[2026-03-02 20:46:08] (step=0038089) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.452357659949129, LR: 0.0003 +[2026-03-02 20:46:16] (step=0038090) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 7.452553316376443, LR: 0.0003 +[2026-03-02 20:46:23] (step=0038091) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 7.452748972803756, LR: 0.0003 +[2026-03-02 20:46:31] (step=0038092) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 7.45294462923107, LR: 0.0003 +[2026-03-02 20:46:39] (step=0038093) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.4531402856583835, LR: 0.0003 +[2026-03-02 20:46:47] (step=0038094) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.4533359420856975, LR: 0.0003 +[2026-03-02 20:46:55] (step=0038095) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 7.4535315985130115, LR: 0.0003 +[2026-03-02 20:47:03] (step=0038096) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.453727254940325, LR: 0.0003 +[2026-03-02 20:47:10] (step=0038097) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.453922911367639, LR: 0.0003 +[2026-03-02 20:47:18] (step=0038098) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.454118567794952, LR: 0.0003 +[2026-03-02 20:47:26] (step=0038099) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.454314224222266, LR: 0.0003 +[2026-03-02 20:47:34] (step=0038100) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 7.45450988064958, LR: 0.0003 +[2026-03-02 20:47:42] (step=0038101) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.454705537076893, LR: 0.0003 +[2026-03-02 20:47:50] (step=0038102) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 7.454901193504207, LR: 0.0003 +[2026-03-02 20:47:58] (step=0038103) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.45509684993152, LR: 0.0003 +[2026-03-02 20:48:05] (step=0038104) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.455292506358834, LR: 0.0003 +[2026-03-02 20:48:13] (step=0038105) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.455488162786147, LR: 0.0003 +[2026-03-02 20:48:21] (step=0038106) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.455683819213461, LR: 0.0003 +[2026-03-02 20:48:29] (step=0038107) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.455879475640775, LR: 0.0003 +[2026-03-02 20:48:37] (step=0038108) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 7.456075132068088, LR: 0.0003 +[2026-03-02 20:48:45] (step=0038109) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.456270788495402, LR: 0.0003 +[2026-03-02 20:48:52] (step=0038110) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 7.456466444922715, LR: 0.0003 +[2026-03-02 20:49:00] (step=0038111) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.456662101350029, LR: 0.0003 +[2026-03-02 20:49:08] (step=0038112) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 7.456857757777343, LR: 0.0003 +[2026-03-02 20:49:16] (step=0038113) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 7.4570534142046565, LR: 0.0003 +[2026-03-02 20:49:24] (step=0038114) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 7.4572490706319705, LR: 0.0003 +[2026-03-02 20:49:32] (step=0038115) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.457444727059284, LR: 0.0003 +[2026-03-02 20:49:40] (step=0038116) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.457640383486598, LR: 0.0003 +[2026-03-02 20:49:47] (step=0038117) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.457836039913911, LR: 0.0003 +[2026-03-02 20:49:55] (step=0038118) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.458031696341225, LR: 0.0003 +[2026-03-02 20:50:03] (step=0038119) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.458227352768539, LR: 0.0003 +[2026-03-02 20:50:11] (step=0038120) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.458423009195852, LR: 0.0003 +[2026-03-02 20:50:19] (step=0038121) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 7.458618665623166, LR: 0.0003 +[2026-03-02 20:50:27] (step=0038122) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.458814322050479, LR: 0.0003 +[2026-03-02 20:50:34] (step=0038123) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.459009978477793, LR: 0.0003 +[2026-03-02 20:50:42] (step=0038124) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.459205634905107, LR: 0.0003 +[2026-03-02 20:50:50] (step=0038125) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.45940129133242, LR: 0.0003 +[2026-03-02 20:50:58] (step=0038126) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.459596947759734, LR: 0.0003 +[2026-03-02 20:51:06] (step=0038127) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.459792604187047, LR: 0.0003 +[2026-03-02 20:51:14] (step=0038128) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 7.459988260614361, LR: 0.0003 +[2026-03-02 20:51:22] (step=0038129) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 7.460183917041674, LR: 0.0003 +[2026-03-02 20:51:29] (step=0038130) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 7.460379573468988, LR: 0.0003 +[2026-03-02 20:51:37] (step=0038131) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 7.460575229896302, LR: 0.0003 +[2026-03-02 20:51:45] (step=0038132) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 7.4607708863236155, LR: 0.0003 +[2026-03-02 20:51:53] (step=0038133) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 7.4609665427509295, LR: 0.0003 +[2026-03-02 20:52:01] (step=0038134) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.461162199178243, LR: 0.0003 +[2026-03-02 20:52:09] (step=0038135) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.461357855605557, LR: 0.0003 +[2026-03-02 20:52:16] (step=0038136) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.461553512032871, LR: 0.0003 +[2026-03-02 20:52:24] (step=0038137) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.461749168460184, LR: 0.0003 +[2026-03-02 20:52:32] (step=0038138) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 7.461944824887498, LR: 0.0003 +[2026-03-02 20:52:40] (step=0038139) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.462140481314811, LR: 0.0003 +[2026-03-02 20:52:48] (step=0038140) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 7.462336137742125, LR: 0.0003 +[2026-03-02 20:52:56] (step=0038141) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.462531794169439, LR: 0.0003 +[2026-03-02 20:53:04] (step=0038142) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 7.462727450596752, LR: 0.0003 +[2026-03-02 20:53:11] (step=0038143) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.462923107024066, LR: 0.0003 +[2026-03-02 20:53:19] (step=0038144) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.463118763451379, LR: 0.0003 +[2026-03-02 20:53:27] (step=0038145) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.463314419878693, LR: 0.0003 +[2026-03-02 20:53:35] (step=0038146) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.463510076306006, LR: 0.0003 +[2026-03-02 20:53:43] (step=0038147) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.46370573273332, LR: 0.0003 +[2026-03-02 20:53:51] (step=0038148) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.463901389160634, LR: 0.0003 +[2026-03-02 20:53:58] (step=0038149) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.464097045587947, LR: 0.0003 +[2026-03-02 20:54:06] (step=0038150) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.464292702015261, LR: 0.0003 +[2026-03-02 20:54:14] (step=0038151) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.4644883584425745, LR: 0.0003 +[2026-03-02 20:54:22] (step=0038152) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.4646840148698885, LR: 0.0003 +[2026-03-02 20:54:30] (step=0038153) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.4648796712972025, LR: 0.0003 +[2026-03-02 20:54:38] (step=0038154) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.465075327724516, LR: 0.0003 +[2026-03-02 20:54:45] (step=0038155) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.46527098415183, LR: 0.0003 +[2026-03-02 20:54:53] (step=0038156) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 7.465466640579143, LR: 0.0003 +[2026-03-02 20:55:01] (step=0038157) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.465662297006457, LR: 0.0003 +[2026-03-02 20:55:09] (step=0038158) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.46585795343377, LR: 0.0003 +[2026-03-02 20:55:17] (step=0038159) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.466053609861084, LR: 0.0003 +[2026-03-02 20:55:25] (step=0038160) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.466249266288398, LR: 0.0003 +[2026-03-02 20:55:33] (step=0038161) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.466444922715711, LR: 0.0003 +[2026-03-02 20:55:41] (step=0038162) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.466640579143025, LR: 0.0003 +[2026-03-02 20:55:48] (step=0038163) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.466836235570338, LR: 0.0003 +[2026-03-02 20:55:56] (step=0038164) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 7.467031891997652, LR: 0.0003 +[2026-03-02 20:56:04] (step=0038165) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.467227548424966, LR: 0.0003 +[2026-03-02 20:56:12] (step=0038166) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 7.467423204852279, LR: 0.0003 +[2026-03-02 20:56:20] (step=0038167) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.467618861279593, LR: 0.0003 +[2026-03-02 20:56:28] (step=0038168) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 7.467814517706906, LR: 0.0003 +[2026-03-02 20:56:36] (step=0038169) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.46801017413422, LR: 0.0003 +[2026-03-02 20:56:43] (step=0038170) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.4682058305615335, LR: 0.0003 +[2026-03-02 20:56:51] (step=0038171) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.4684014869888475, LR: 0.0003 +[2026-03-02 20:56:59] (step=0038172) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.4685971434161615, LR: 0.0003 +[2026-03-02 20:57:07] (step=0038173) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 7.468792799843475, LR: 0.0003 +[2026-03-02 20:57:15] (step=0038174) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.468988456270789, LR: 0.0003 +[2026-03-02 20:57:23] (step=0038175) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.469184112698102, LR: 0.0003 +[2026-03-02 20:57:30] (step=0038176) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.469379769125416, LR: 0.0003 +[2026-03-02 20:57:38] (step=0038177) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.46957542555273, LR: 0.0003 +[2026-03-02 20:57:46] (step=0038178) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.469771081980043, LR: 0.0003 +[2026-03-02 20:57:54] (step=0038179) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 7.469966738407357, LR: 0.0003 +[2026-03-02 20:58:02] (step=0038180) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.47016239483467, LR: 0.0003 +[2026-03-02 20:58:10] (step=0038181) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.470358051261984, LR: 0.0003 +[2026-03-02 20:58:18] (step=0038182) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.470553707689297, LR: 0.0003 +[2026-03-02 20:58:25] (step=0038183) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 7.470749364116611, LR: 0.0003 +[2026-03-02 20:58:33] (step=0038184) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 7.470945020543925, LR: 0.0003 +[2026-03-02 20:58:41] (step=0038185) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.471140676971238, LR: 0.0003 +[2026-03-02 20:58:49] (step=0038186) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.471336333398552, LR: 0.0003 +[2026-03-02 20:58:57] (step=0038187) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.471531989825865, LR: 0.0003 +[2026-03-02 20:59:05] (step=0038188) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 7.471727646253179, LR: 0.0003 +[2026-03-02 20:59:12] (step=0038189) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.471923302680493, LR: 0.0003 +[2026-03-02 20:59:20] (step=0038190) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.4721189591078065, LR: 0.0003 +[2026-03-02 20:59:28] (step=0038191) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 7.4723146155351206, LR: 0.0003 +[2026-03-02 20:59:36] (step=0038192) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.472510271962434, LR: 0.0003 +[2026-03-02 20:59:44] (step=0038193) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.472705928389748, LR: 0.0003 +[2026-03-02 20:59:52] (step=0038194) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.472901584817062, LR: 0.0003 +[2026-03-02 20:59:59] (step=0038195) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.473097241244375, LR: 0.0003 +[2026-03-02 21:00:07] (step=0038196) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.473292897671689, LR: 0.0003 +[2026-03-02 21:00:15] (step=0038197) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.473488554099002, LR: 0.0003 +[2026-03-02 21:00:23] (step=0038198) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.473684210526316, LR: 0.0003 +[2026-03-02 21:00:31] (step=0038199) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 7.473879866953629, LR: 0.0003 +[2026-03-02 21:00:39] (step=0038200) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.474075523380943, LR: 0.0003 +[2026-03-02 21:00:47] (step=0038201) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.474271179808257, LR: 0.0003 +[2026-03-02 21:00:54] (step=0038202) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 7.47446683623557, LR: 0.0003 +[2026-03-02 21:01:02] (step=0038203) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.474662492662884, LR: 0.0003 +[2026-03-02 21:01:10] (step=0038204) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.474858149090197, LR: 0.0003 +[2026-03-02 21:01:18] (step=0038205) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 7.475053805517511, LR: 0.0003 +[2026-03-02 21:01:26] (step=0038206) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.475249461944825, LR: 0.0003 +[2026-03-02 21:01:34] (step=0038207) Train Loss: 0.4358, Train Steps/Sec: 0.12, Epoch: 7.475445118372138, LR: 0.0003 +[2026-03-02 21:01:42] (step=0038208) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.475640774799452, LR: 0.0003 +[2026-03-02 21:01:50] (step=0038209) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.4758364312267656, LR: 0.0003 +[2026-03-02 21:01:58] (step=0038210) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.47603208765408, LR: 0.0003 +[2026-03-02 21:02:05] (step=0038211) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 7.476227744081393, LR: 0.0003 +[2026-03-02 21:02:13] (step=0038212) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.476423400508707, LR: 0.0003 +[2026-03-02 21:02:21] (step=0038213) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.476619056936021, LR: 0.0003 +[2026-03-02 21:02:29] (step=0038214) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.476814713363334, LR: 0.0003 +[2026-03-02 21:02:37] (step=0038215) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.477010369790648, LR: 0.0003 +[2026-03-02 21:02:45] (step=0038216) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.477206026217961, LR: 0.0003 +[2026-03-02 21:02:52] (step=0038217) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.477401682645275, LR: 0.0003 +[2026-03-02 21:03:00] (step=0038218) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.477597339072589, LR: 0.0003 +[2026-03-02 21:03:08] (step=0038219) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.477792995499902, LR: 0.0003 +[2026-03-02 21:03:16] (step=0038220) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.477988651927216, LR: 0.0003 +[2026-03-02 21:03:24] (step=0038221) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.478184308354529, LR: 0.0003 +[2026-03-02 21:03:32] (step=0038222) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.478379964781843, LR: 0.0003 +[2026-03-02 21:03:39] (step=0038223) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 7.478575621209156, LR: 0.0003 +[2026-03-02 21:03:47] (step=0038224) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.47877127763647, LR: 0.0003 +[2026-03-02 21:03:55] (step=0038225) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 7.478966934063784, LR: 0.0003 +[2026-03-02 21:04:03] (step=0038226) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 7.479162590491097, LR: 0.0003 +[2026-03-02 21:04:11] (step=0038227) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 7.4793582469184114, LR: 0.0003 +[2026-03-02 21:04:19] (step=0038228) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.479553903345725, LR: 0.0003 +[2026-03-02 21:04:27] (step=0038229) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.479749559773039, LR: 0.0003 +[2026-03-02 21:04:34] (step=0038230) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.479945216200353, LR: 0.0003 +[2026-03-02 21:04:42] (step=0038231) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.480140872627666, LR: 0.0003 +[2026-03-02 21:04:50] (step=0038232) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.48033652905498, LR: 0.0003 +[2026-03-02 21:04:58] (step=0038233) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.480532185482293, LR: 0.0003 +[2026-03-02 21:05:06] (step=0038234) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 7.480727841909607, LR: 0.0003 +[2026-03-02 21:05:14] (step=0038235) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.48092349833692, LR: 0.0003 +[2026-03-02 21:05:22] (step=0038236) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.481119154764234, LR: 0.0003 +[2026-03-02 21:05:29] (step=0038237) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.481314811191548, LR: 0.0003 +[2026-03-02 21:05:37] (step=0038238) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.481510467618861, LR: 0.0003 +[2026-03-02 21:05:45] (step=0038239) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 7.481706124046175, LR: 0.0003 +[2026-03-02 21:05:53] (step=0038240) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 7.481901780473488, LR: 0.0003 +[2026-03-02 21:06:01] (step=0038241) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.482097436900802, LR: 0.0003 +[2026-03-02 21:06:09] (step=0038242) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.482293093328116, LR: 0.0003 +[2026-03-02 21:06:16] (step=0038243) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.482488749755429, LR: 0.0003 +[2026-03-02 21:06:24] (step=0038244) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 7.482684406182743, LR: 0.0003 +[2026-03-02 21:06:32] (step=0038245) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.4828800626100564, LR: 0.0003 +[2026-03-02 21:06:40] (step=0038246) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.4830757190373705, LR: 0.0003 +[2026-03-02 21:06:48] (step=0038247) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.483271375464684, LR: 0.0003 +[2026-03-02 21:06:56] (step=0038248) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.483467031891998, LR: 0.0003 +[2026-03-02 21:07:03] (step=0038249) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.483662688319312, LR: 0.0003 +[2026-03-02 21:07:11] (step=0038250) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.483858344746625, LR: 0.0003 +[2026-03-02 21:07:19] (step=0038251) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.484054001173939, LR: 0.0003 +[2026-03-02 21:07:27] (step=0038252) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 7.484249657601252, LR: 0.0003 +[2026-03-02 21:07:35] (step=0038253) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.484445314028566, LR: 0.0003 +[2026-03-02 21:07:43] (step=0038254) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.48464097045588, LR: 0.0003 +[2026-03-02 21:07:50] (step=0038255) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.484836626883193, LR: 0.0003 +[2026-03-02 21:07:58] (step=0038256) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.485032283310507, LR: 0.0003 +[2026-03-02 21:08:06] (step=0038257) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 7.48522793973782, LR: 0.0003 +[2026-03-02 21:08:14] (step=0038258) Train Loss: 0.4338, Train Steps/Sec: 0.12, Epoch: 7.485423596165134, LR: 0.0003 +[2026-03-02 21:08:22] (step=0038259) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.485619252592448, LR: 0.0003 +[2026-03-02 21:08:30] (step=0038260) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.485814909019761, LR: 0.0003 +[2026-03-02 21:08:38] (step=0038261) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.486010565447075, LR: 0.0003 +[2026-03-02 21:08:46] (step=0038262) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.486206221874388, LR: 0.0003 +[2026-03-02 21:08:53] (step=0038263) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.486401878301702, LR: 0.0003 +[2026-03-02 21:09:01] (step=0038264) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.4865975347290155, LR: 0.0003 +[2026-03-02 21:09:09] (step=0038265) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.4867931911563295, LR: 0.0003 +[2026-03-02 21:09:17] (step=0038266) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.4869888475836435, LR: 0.0003 +[2026-03-02 21:09:25] (step=0038267) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.487184504010957, LR: 0.0003 +[2026-03-02 21:09:33] (step=0038268) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 7.487380160438271, LR: 0.0003 +[2026-03-02 21:09:41] (step=0038269) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.487575816865584, LR: 0.0003 +[2026-03-02 21:09:48] (step=0038270) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 7.487771473292898, LR: 0.0003 +[2026-03-02 21:09:56] (step=0038271) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 7.487967129720212, LR: 0.0003 +[2026-03-02 21:10:04] (step=0038272) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.488162786147525, LR: 0.0003 +[2026-03-02 21:10:12] (step=0038273) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 7.488358442574839, LR: 0.0003 +[2026-03-02 21:10:20] (step=0038274) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 7.488554099002152, LR: 0.0003 +[2026-03-02 21:10:28] (step=0038275) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.488749755429466, LR: 0.0003 +[2026-03-02 21:10:35] (step=0038276) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 7.488945411856779, LR: 0.0003 +[2026-03-02 21:10:43] (step=0038277) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.489141068284093, LR: 0.0003 +[2026-03-02 21:10:51] (step=0038278) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.489336724711407, LR: 0.0003 +[2026-03-02 21:10:59] (step=0038279) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.48953238113872, LR: 0.0003 +[2026-03-02 21:11:07] (step=0038280) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.489728037566034, LR: 0.0003 +[2026-03-02 21:11:15] (step=0038281) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 7.489923693993347, LR: 0.0003 +[2026-03-02 21:11:23] (step=0038282) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.490119350420661, LR: 0.0003 +[2026-03-02 21:11:30] (step=0038283) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.490315006847975, LR: 0.0003 +[2026-03-02 21:11:38] (step=0038284) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 7.4905106632752885, LR: 0.0003 +[2026-03-02 21:11:46] (step=0038285) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.4907063197026025, LR: 0.0003 +[2026-03-02 21:11:54] (step=0038286) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.490901976129916, LR: 0.0003 +[2026-03-02 21:12:02] (step=0038287) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 7.49109763255723, LR: 0.0003 +[2026-03-02 21:12:10] (step=0038288) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.491293288984543, LR: 0.0003 +[2026-03-02 21:12:17] (step=0038289) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.491488945411857, LR: 0.0003 +[2026-03-02 21:12:25] (step=0038290) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 7.491684601839171, LR: 0.0003 +[2026-03-02 21:12:33] (step=0038291) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.491880258266484, LR: 0.0003 +[2026-03-02 21:12:41] (step=0038292) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 7.492075914693798, LR: 0.0003 +[2026-03-02 21:12:49] (step=0038293) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 7.492271571121111, LR: 0.0003 +[2026-03-02 21:12:57] (step=0038294) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 7.492467227548425, LR: 0.0003 +[2026-03-02 21:13:04] (step=0038295) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.492662883975739, LR: 0.0003 +[2026-03-02 21:13:12] (step=0038296) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 7.492858540403052, LR: 0.0003 +[2026-03-02 21:13:20] (step=0038297) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.493054196830366, LR: 0.0003 +[2026-03-02 21:13:28] (step=0038298) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 7.493249853257679, LR: 0.0003 +[2026-03-02 21:13:36] (step=0038299) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.493445509684993, LR: 0.0003 +[2026-03-02 21:13:44] (step=0038300) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.493641166112306, LR: 0.0003 +[2026-03-02 21:13:52] (step=0038301) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.49383682253962, LR: 0.0003 +[2026-03-02 21:13:59] (step=0038302) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 7.494032478966934, LR: 0.0003 +[2026-03-02 21:14:07] (step=0038303) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 7.4942281353942475, LR: 0.0003 +[2026-03-02 21:14:15] (step=0038304) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.4944237918215615, LR: 0.0003 +[2026-03-02 21:14:23] (step=0038305) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.494619448248875, LR: 0.0003 +[2026-03-02 21:14:31] (step=0038306) Train Loss: 0.4406, Train Steps/Sec: 0.12, Epoch: 7.494815104676189, LR: 0.0003 +[2026-03-02 21:14:39] (step=0038307) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.495010761103503, LR: 0.0003 +[2026-03-02 21:14:47] (step=0038308) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 7.495206417530816, LR: 0.0003 +[2026-03-02 21:14:54] (step=0038309) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.49540207395813, LR: 0.0003 +[2026-03-02 21:15:02] (step=0038310) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 7.495597730385443, LR: 0.0003 +[2026-03-02 21:15:10] (step=0038311) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 7.495793386812757, LR: 0.0003 +[2026-03-02 21:15:18] (step=0038312) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.495989043240071, LR: 0.0003 +[2026-03-02 21:15:26] (step=0038313) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.496184699667384, LR: 0.0003 +[2026-03-02 21:15:34] (step=0038314) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 7.496380356094698, LR: 0.0003 +[2026-03-02 21:15:41] (step=0038315) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.496576012522011, LR: 0.0003 +[2026-03-02 21:15:49] (step=0038316) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.496771668949325, LR: 0.0003 +[2026-03-02 21:15:57] (step=0038317) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.496967325376638, LR: 0.0003 +[2026-03-02 21:16:05] (step=0038318) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.497162981803952, LR: 0.0003 +[2026-03-02 21:16:13] (step=0038319) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.497358638231266, LR: 0.0003 +[2026-03-02 21:16:21] (step=0038320) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 7.497554294658579, LR: 0.0003 +[2026-03-02 21:16:29] (step=0038321) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.497749951085893, LR: 0.0003 +[2026-03-02 21:16:37] (step=0038322) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.4979456075132065, LR: 0.0003 +[2026-03-02 21:16:44] (step=0038323) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.4981412639405205, LR: 0.0003 +[2026-03-02 21:16:52] (step=0038324) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.4983369203678345, LR: 0.0003 +[2026-03-02 21:17:00] (step=0038325) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.498532576795148, LR: 0.0003 +[2026-03-02 21:17:08] (step=0038326) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.498728233222462, LR: 0.0003 +[2026-03-02 21:17:16] (step=0038327) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.498923889649775, LR: 0.0003 +[2026-03-02 21:17:24] (step=0038328) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.499119546077089, LR: 0.0003 +[2026-03-02 21:17:31] (step=0038329) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 7.499315202504402, LR: 0.0003 +[2026-03-02 21:17:39] (step=0038330) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.499510858931716, LR: 0.0003 +[2026-03-02 21:17:47] (step=0038331) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.49970651535903, LR: 0.0003 +[2026-03-02 21:17:55] (step=0038332) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.499902171786343, LR: 0.0003 +[2026-03-02 21:18:03] (step=0038333) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.500097828213657, LR: 0.0003 +[2026-03-02 21:18:11] (step=0038334) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 7.50029348464097, LR: 0.0003 +[2026-03-02 21:18:19] (step=0038335) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 7.500489141068284, LR: 0.0003 +[2026-03-02 21:18:26] (step=0038336) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.500684797495598, LR: 0.0003 +[2026-03-02 21:18:34] (step=0038337) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.500880453922911, LR: 0.0003 +[2026-03-02 21:18:42] (step=0038338) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.501076110350225, LR: 0.0003 +[2026-03-02 21:18:50] (step=0038339) Train Loss: 0.4225, Train Steps/Sec: 0.13, Epoch: 7.501271766777538, LR: 0.0003 +[2026-03-02 21:18:58] (step=0038340) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.501467423204852, LR: 0.0003 +[2026-03-02 21:19:06] (step=0038341) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 7.5016630796321655, LR: 0.0003 +[2026-03-02 21:19:13] (step=0038342) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.5018587360594795, LR: 0.0003 +[2026-03-02 21:19:21] (step=0038343) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.5020543924867935, LR: 0.0003 +[2026-03-02 21:19:29] (step=0038344) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 7.502250048914107, LR: 0.0003 +[2026-03-02 21:19:37] (step=0038345) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.502445705341421, LR: 0.0003 +[2026-03-02 21:19:45] (step=0038346) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.502641361768734, LR: 0.0003 +[2026-03-02 21:19:53] (step=0038347) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.502837018196048, LR: 0.0003 +[2026-03-02 21:20:01] (step=0038348) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.503032674623362, LR: 0.0003 +[2026-03-02 21:20:08] (step=0038349) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 7.503228331050675, LR: 0.0003 +[2026-03-02 21:20:16] (step=0038350) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.503423987477989, LR: 0.0003 +[2026-03-02 21:20:24] (step=0038351) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 7.503619643905302, LR: 0.0003 +[2026-03-02 21:20:32] (step=0038352) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 7.503815300332616, LR: 0.0003 +[2026-03-02 21:20:40] (step=0038353) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.504010956759929, LR: 0.0003 +[2026-03-02 21:20:48] (step=0038354) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.504206613187243, LR: 0.0003 +[2026-03-02 21:20:56] (step=0038355) Train Loss: 0.4483, Train Steps/Sec: 0.12, Epoch: 7.504402269614557, LR: 0.0003 +[2026-03-02 21:21:04] (step=0038356) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.50459792604187, LR: 0.0003 +[2026-03-02 21:21:12] (step=0038357) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.504793582469184, LR: 0.0003 +[2026-03-02 21:21:19] (step=0038358) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.504989238896497, LR: 0.0003 +[2026-03-02 21:21:27] (step=0038359) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.505184895323811, LR: 0.0003 +[2026-03-02 21:21:35] (step=0038360) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.505380551751125, LR: 0.0003 +[2026-03-02 21:21:43] (step=0038361) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.5055762081784385, LR: 0.0003 +[2026-03-02 21:21:51] (step=0038362) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 7.5057718646057525, LR: 0.0003 +[2026-03-02 21:21:59] (step=0038363) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.505967521033066, LR: 0.0003 +[2026-03-02 21:22:07] (step=0038364) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.50616317746038, LR: 0.0003 +[2026-03-02 21:22:14] (step=0038365) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.506358833887694, LR: 0.0003 +[2026-03-02 21:22:22] (step=0038366) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.506554490315007, LR: 0.0003 +[2026-03-02 21:22:30] (step=0038367) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.506750146742321, LR: 0.0003 +[2026-03-02 21:22:38] (step=0038368) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.506945803169634, LR: 0.0003 +[2026-03-02 21:22:46] (step=0038369) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.507141459596948, LR: 0.0003 +[2026-03-02 21:22:54] (step=0038370) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 7.507337116024261, LR: 0.0003 +[2026-03-02 21:23:02] (step=0038371) Train Loss: 0.4599, Train Steps/Sec: 0.12, Epoch: 7.507532772451575, LR: 0.0003 +[2026-03-02 21:23:10] (step=0038372) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.507728428878889, LR: 0.0003 +[2026-03-02 21:23:17] (step=0038373) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 7.507924085306202, LR: 0.0003 +[2026-03-02 21:23:25] (step=0038374) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 7.508119741733516, LR: 0.0003 +[2026-03-02 21:23:33] (step=0038375) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.508315398160829, LR: 0.0003 +[2026-03-02 21:23:41] (step=0038376) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.508511054588143, LR: 0.0003 +[2026-03-02 21:23:49] (step=0038377) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 7.508706711015457, LR: 0.0003 +[2026-03-02 21:23:57] (step=0038378) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.50890236744277, LR: 0.0003 +[2026-03-02 21:24:05] (step=0038379) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.509098023870084, LR: 0.0003 +[2026-03-02 21:24:13] (step=0038380) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.5092936802973975, LR: 0.0003 +[2026-03-02 21:24:20] (step=0038381) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 7.5094893367247115, LR: 0.0003 +[2026-03-02 21:24:28] (step=0038382) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.509684993152025, LR: 0.0003 +[2026-03-02 21:24:36] (step=0038383) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.509880649579339, LR: 0.0003 +[2026-03-02 21:24:44] (step=0038384) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.510076306006653, LR: 0.0003 +[2026-03-02 21:24:52] (step=0038385) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 7.510271962433966, LR: 0.0003 +[2026-03-02 21:25:00] (step=0038386) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.51046761886128, LR: 0.0003 +[2026-03-02 21:25:07] (step=0038387) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.510663275288593, LR: 0.0003 +[2026-03-02 21:25:15] (step=0038388) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 7.510858931715907, LR: 0.0003 +[2026-03-02 21:25:23] (step=0038389) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 7.511054588143221, LR: 0.0003 +[2026-03-02 21:25:31] (step=0038390) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.511250244570534, LR: 0.0003 +[2026-03-02 21:25:39] (step=0038391) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.511445900997848, LR: 0.0003 +[2026-03-02 21:25:47] (step=0038392) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 7.511641557425161, LR: 0.0003 +[2026-03-02 21:25:55] (step=0038393) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.511837213852475, LR: 0.0003 +[2026-03-02 21:26:02] (step=0038394) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.512032870279788, LR: 0.0003 +[2026-03-02 21:26:10] (step=0038395) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 7.512228526707102, LR: 0.0003 +[2026-03-02 21:26:18] (step=0038396) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.512424183134416, LR: 0.0003 +[2026-03-02 21:26:26] (step=0038397) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.512619839561729, LR: 0.0003 +[2026-03-02 21:26:34] (step=0038398) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 7.512815495989043, LR: 0.0003 +[2026-03-02 21:26:42] (step=0038399) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.5130111524163565, LR: 0.0003 +[2026-03-02 21:26:50] (step=0038400) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.5132068088436705, LR: 0.0003 +[2026-03-02 21:26:58] (step=0038401) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.5134024652709845, LR: 0.0003 +[2026-03-02 21:27:05] (step=0038402) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.513598121698298, LR: 0.0003 +[2026-03-02 21:27:13] (step=0038403) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.513793778125612, LR: 0.0003 +[2026-03-02 21:27:21] (step=0038404) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.513989434552925, LR: 0.0003 +[2026-03-02 21:27:29] (step=0038405) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.514185090980239, LR: 0.0003 +[2026-03-02 21:27:37] (step=0038406) Train Loss: 0.4487, Train Steps/Sec: 0.12, Epoch: 7.514380747407552, LR: 0.0003 +[2026-03-02 21:27:45] (step=0038407) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.514576403834866, LR: 0.0003 +[2026-03-02 21:27:53] (step=0038408) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.51477206026218, LR: 0.0003 +[2026-03-02 21:28:00] (step=0038409) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.514967716689493, LR: 0.0003 +[2026-03-02 21:28:08] (step=0038410) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.515163373116807, LR: 0.0003 +[2026-03-02 21:28:16] (step=0038411) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.51535902954412, LR: 0.0003 +[2026-03-02 21:28:24] (step=0038412) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.515554685971434, LR: 0.0003 +[2026-03-02 21:28:32] (step=0038413) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.515750342398748, LR: 0.0003 +[2026-03-02 21:28:40] (step=0038414) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 7.515945998826061, LR: 0.0003 +[2026-03-02 21:28:48] (step=0038415) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.516141655253375, LR: 0.0003 +[2026-03-02 21:28:55] (step=0038416) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.516337311680688, LR: 0.0003 +[2026-03-02 21:29:03] (step=0038417) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.516532968108002, LR: 0.0003 +[2026-03-02 21:29:11] (step=0038418) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.516728624535316, LR: 0.0003 +[2026-03-02 21:29:19] (step=0038419) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 7.5169242809626295, LR: 0.0003 +[2026-03-02 21:29:27] (step=0038420) Train Loss: 0.4457, Train Steps/Sec: 0.12, Epoch: 7.5171199373899436, LR: 0.0003 +[2026-03-02 21:29:35] (step=0038421) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 7.517315593817257, LR: 0.0003 +[2026-03-02 21:29:43] (step=0038422) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.517511250244571, LR: 0.0003 +[2026-03-02 21:29:51] (step=0038423) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.517706906671884, LR: 0.0003 +[2026-03-02 21:29:59] (step=0038424) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 7.517902563099198, LR: 0.0003 +[2026-03-02 21:30:06] (step=0038425) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.518098219526512, LR: 0.0003 +[2026-03-02 21:30:14] (step=0038426) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.518293875953825, LR: 0.0003 +[2026-03-02 21:30:22] (step=0038427) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 7.518489532381139, LR: 0.0003 +[2026-03-02 21:30:30] (step=0038428) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.518685188808452, LR: 0.0003 +[2026-03-02 21:30:38] (step=0038429) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 7.518880845235766, LR: 0.0003 +[2026-03-02 21:30:46] (step=0038430) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.51907650166308, LR: 0.0003 +[2026-03-02 21:30:54] (step=0038431) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.519272158090393, LR: 0.0003 +[2026-03-02 21:31:01] (step=0038432) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.519467814517707, LR: 0.0003 +[2026-03-02 21:31:09] (step=0038433) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 7.51966347094502, LR: 0.0003 +[2026-03-02 21:31:17] (step=0038434) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 7.519859127372334, LR: 0.0003 +[2026-03-02 21:31:25] (step=0038435) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 7.520054783799647, LR: 0.0003 +[2026-03-02 21:31:33] (step=0038436) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.520250440226961, LR: 0.0003 +[2026-03-02 21:31:41] (step=0038437) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.520446096654275, LR: 0.0003 +[2026-03-02 21:31:49] (step=0038438) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.5206417530815886, LR: 0.0003 +[2026-03-02 21:31:56] (step=0038439) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.520837409508903, LR: 0.0003 +[2026-03-02 21:32:04] (step=0038440) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 7.521033065936216, LR: 0.0003 +[2026-03-02 21:32:12] (step=0038441) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.52122872236353, LR: 0.0003 +[2026-03-02 21:32:20] (step=0038442) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.521424378790844, LR: 0.0003 +[2026-03-02 21:32:28] (step=0038443) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 7.521620035218157, LR: 0.0003 +[2026-03-02 21:32:36] (step=0038444) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.521815691645471, LR: 0.0003 +[2026-03-02 21:32:44] (step=0038445) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.522011348072784, LR: 0.0003 +[2026-03-02 21:32:51] (step=0038446) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.522207004500098, LR: 0.0003 +[2026-03-02 21:32:59] (step=0038447) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.522402660927411, LR: 0.0003 +[2026-03-02 21:33:07] (step=0038448) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.522598317354725, LR: 0.0003 +[2026-03-02 21:33:15] (step=0038449) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 7.522793973782039, LR: 0.0003 +[2026-03-02 21:33:23] (step=0038450) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 7.522989630209352, LR: 0.0003 +[2026-03-02 21:33:31] (step=0038451) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.523185286636666, LR: 0.0003 +[2026-03-02 21:33:39] (step=0038452) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.523380943063979, LR: 0.0003 +[2026-03-02 21:33:47] (step=0038453) Train Loss: 0.4462, Train Steps/Sec: 0.12, Epoch: 7.523576599491293, LR: 0.0003 +[2026-03-02 21:33:55] (step=0038454) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.523772255918607, LR: 0.0003 +[2026-03-02 21:34:02] (step=0038455) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.52396791234592, LR: 0.0003 +[2026-03-02 21:34:10] (step=0038456) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.5241635687732344, LR: 0.0003 +[2026-03-02 21:34:18] (step=0038457) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.524359225200548, LR: 0.0003 +[2026-03-02 21:34:26] (step=0038458) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.524554881627862, LR: 0.0003 +[2026-03-02 21:34:34] (step=0038459) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 7.524750538055175, LR: 0.0003 +[2026-03-02 21:34:42] (step=0038460) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 7.524946194482489, LR: 0.0003 +[2026-03-02 21:34:50] (step=0038461) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.525141850909803, LR: 0.0003 +[2026-03-02 21:34:57] (step=0038462) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 7.525337507337116, LR: 0.0003 +[2026-03-02 21:35:05] (step=0038463) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.52553316376443, LR: 0.0003 +[2026-03-02 21:35:13] (step=0038464) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 7.525728820191743, LR: 0.0003 +[2026-03-02 21:35:21] (step=0038465) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.525924476619057, LR: 0.0003 +[2026-03-02 21:35:29] (step=0038466) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.526120133046371, LR: 0.0003 +[2026-03-02 21:35:37] (step=0038467) Train Loss: 0.4484, Train Steps/Sec: 0.12, Epoch: 7.526315789473684, LR: 0.0003 +[2026-03-02 21:35:45] (step=0038468) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 7.526511445900998, LR: 0.0003 +[2026-03-02 21:35:53] (step=0038469) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.526707102328311, LR: 0.0003 +[2026-03-02 21:36:00] (step=0038470) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.526902758755625, LR: 0.0003 +[2026-03-02 21:36:08] (step=0038471) Train Loss: 0.4676, Train Steps/Sec: 0.13, Epoch: 7.527098415182938, LR: 0.0003 +[2026-03-02 21:36:16] (step=0038472) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.527294071610252, LR: 0.0003 +[2026-03-02 21:36:24] (step=0038473) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.527489728037566, LR: 0.0003 +[2026-03-02 21:36:32] (step=0038474) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.5276853844648794, LR: 0.0003 +[2026-03-02 21:36:40] (step=0038475) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.5278810408921935, LR: 0.0003 +[2026-03-02 21:36:48] (step=0038476) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.528076697319507, LR: 0.0003 +[2026-03-02 21:36:56] (step=0038477) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.528272353746821, LR: 0.0003 +[2026-03-02 21:37:03] (step=0038478) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.528468010174135, LR: 0.0003 +[2026-03-02 21:37:11] (step=0038479) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.528663666601448, LR: 0.0003 +[2026-03-02 21:37:19] (step=0038480) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.528859323028762, LR: 0.0003 +[2026-03-02 21:37:27] (step=0038481) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.529054979456075, LR: 0.0003 +[2026-03-02 21:37:35] (step=0038482) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 7.529250635883389, LR: 0.0003 +[2026-03-02 21:37:43] (step=0038483) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 7.529446292310703, LR: 0.0003 +[2026-03-02 21:37:51] (step=0038484) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.529641948738016, LR: 0.0003 +[2026-03-02 21:37:58] (step=0038485) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.52983760516533, LR: 0.0003 +[2026-03-02 21:38:06] (step=0038486) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 7.530033261592643, LR: 0.0003 +[2026-03-02 21:38:14] (step=0038487) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.530228918019957, LR: 0.0003 +[2026-03-02 21:38:22] (step=0038488) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.53042457444727, LR: 0.0003 +[2026-03-02 21:38:30] (step=0038489) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.530620230874584, LR: 0.0003 +[2026-03-02 21:38:38] (step=0038490) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 7.530815887301898, LR: 0.0003 +[2026-03-02 21:38:46] (step=0038491) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.531011543729211, LR: 0.0003 +[2026-03-02 21:38:54] (step=0038492) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.531207200156525, LR: 0.0003 +[2026-03-02 21:39:01] (step=0038493) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 7.5314028565838385, LR: 0.0003 +[2026-03-02 21:39:09] (step=0038494) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.5315985130111525, LR: 0.0003 +[2026-03-02 21:39:17] (step=0038495) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.5317941694384665, LR: 0.0003 +[2026-03-02 21:39:25] (step=0038496) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.53198982586578, LR: 0.0003 +[2026-03-02 21:39:33] (step=0038497) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.532185482293094, LR: 0.0003 +[2026-03-02 21:39:41] (step=0038498) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.532381138720407, LR: 0.0003 +[2026-03-02 21:39:49] (step=0038499) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.532576795147721, LR: 0.0003 +[2026-03-02 21:39:56] (step=0038500) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 7.532772451575034, LR: 0.0003 +[2026-03-02 21:39:56] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0038500/ +[2026-03-02 21:40:04] (step=0038501) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 7.532968108002348, LR: 0.0003 +[2026-03-02 21:40:12] (step=0038502) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 7.533163764429662, LR: 0.0003 +[2026-03-02 21:40:20] (step=0038503) Train Loss: 0.4536, Train Steps/Sec: 0.12, Epoch: 7.533359420856975, LR: 0.0003 +[2026-03-02 21:40:28] (step=0038504) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 7.533555077284289, LR: 0.0003 +[2026-03-02 21:40:36] (step=0038505) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 7.533750733711602, LR: 0.0003 +[2026-03-02 21:40:44] (step=0038506) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 7.533946390138916, LR: 0.0003 +[2026-03-02 21:40:52] (step=0038507) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.53414204656623, LR: 0.0003 +[2026-03-02 21:41:00] (step=0038508) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.534337702993543, LR: 0.0003 +[2026-03-02 21:41:08] (step=0038509) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.534533359420857, LR: 0.0003 +[2026-03-02 21:41:16] (step=0038510) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.53472901584817, LR: 0.0003 +[2026-03-02 21:41:23] (step=0038511) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.534924672275484, LR: 0.0003 +[2026-03-02 21:41:31] (step=0038512) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.5351203287027975, LR: 0.0003 +[2026-03-02 21:41:39] (step=0038513) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.5353159851301115, LR: 0.0003 +[2026-03-02 21:41:47] (step=0038514) Train Loss: 0.4252, Train Steps/Sec: 0.13, Epoch: 7.5355116415574255, LR: 0.0003 +[2026-03-02 21:41:55] (step=0038515) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.535707297984739, LR: 0.0003 +[2026-03-02 21:42:03] (step=0038516) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.535902954412053, LR: 0.0003 +[2026-03-02 21:42:11] (step=0038517) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 7.536098610839366, LR: 0.0003 +[2026-03-02 21:42:19] (step=0038518) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.53629426726668, LR: 0.0003 +[2026-03-02 21:42:27] (step=0038519) Train Loss: 0.4370, Train Steps/Sec: 0.12, Epoch: 7.536489923693994, LR: 0.0003 +[2026-03-02 21:42:35] (step=0038520) Train Loss: 0.4496, Train Steps/Sec: 0.12, Epoch: 7.536685580121307, LR: 0.0003 +[2026-03-02 21:42:43] (step=0038521) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 7.536881236548621, LR: 0.0003 +[2026-03-02 21:42:50] (step=0038522) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.537076892975934, LR: 0.0003 +[2026-03-02 21:42:58] (step=0038523) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.537272549403248, LR: 0.0003 +[2026-03-02 21:43:06] (step=0038524) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.537468205830561, LR: 0.0003 +[2026-03-02 21:43:14] (step=0038525) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.537663862257875, LR: 0.0003 +[2026-03-02 21:43:22] (step=0038526) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.537859518685189, LR: 0.0003 +[2026-03-02 21:43:30] (step=0038527) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.538055175112502, LR: 0.0003 +[2026-03-02 21:43:38] (step=0038528) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.538250831539816, LR: 0.0003 +[2026-03-02 21:43:46] (step=0038529) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 7.538446487967129, LR: 0.0003 +[2026-03-02 21:43:53] (step=0038530) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 7.538642144394443, LR: 0.0003 +[2026-03-02 21:44:01] (step=0038531) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.538837800821757, LR: 0.0003 +[2026-03-02 21:44:09] (step=0038532) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.5390334572490705, LR: 0.0003 +[2026-03-02 21:44:17] (step=0038533) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.5392291136763845, LR: 0.0003 +[2026-03-02 21:44:25] (step=0038534) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 7.539424770103698, LR: 0.0003 +[2026-03-02 21:44:33] (step=0038535) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.539620426531012, LR: 0.0003 +[2026-03-02 21:44:41] (step=0038536) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 7.539816082958326, LR: 0.0003 +[2026-03-02 21:44:49] (step=0038537) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.540011739385639, LR: 0.0003 +[2026-03-02 21:44:57] (step=0038538) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 7.540207395812953, LR: 0.0003 +[2026-03-02 21:45:04] (step=0038539) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 7.540403052240266, LR: 0.0003 +[2026-03-02 21:45:12] (step=0038540) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 7.54059870866758, LR: 0.0003 +[2026-03-02 21:45:20] (step=0038541) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.540794365094893, LR: 0.0003 +[2026-03-02 21:45:28] (step=0038542) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.540990021522207, LR: 0.0003 +[2026-03-02 21:45:36] (step=0038543) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 7.541185677949521, LR: 0.0003 +[2026-03-02 21:45:44] (step=0038544) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.541381334376834, LR: 0.0003 +[2026-03-02 21:45:52] (step=0038545) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.541576990804148, LR: 0.0003 +[2026-03-02 21:46:00] (step=0038546) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.541772647231461, LR: 0.0003 +[2026-03-02 21:46:07] (step=0038547) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.541968303658775, LR: 0.0003 +[2026-03-02 21:46:15] (step=0038548) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.542163960086089, LR: 0.0003 +[2026-03-02 21:46:23] (step=0038549) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.542359616513402, LR: 0.0003 +[2026-03-02 21:46:31] (step=0038550) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.542555272940716, LR: 0.0003 +[2026-03-02 21:46:39] (step=0038551) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.5427509293680295, LR: 0.0003 +[2026-03-02 21:46:47] (step=0038552) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 7.5429465857953435, LR: 0.0003 +[2026-03-02 21:46:55] (step=0038553) Train Loss: 0.4553, Train Steps/Sec: 0.12, Epoch: 7.543142242222657, LR: 0.0003 +[2026-03-02 21:47:03] (step=0038554) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 7.543337898649971, LR: 0.0003 +[2026-03-02 21:47:11] (step=0038555) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.543533555077285, LR: 0.0003 +[2026-03-02 21:47:18] (step=0038556) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.543729211504598, LR: 0.0003 +[2026-03-02 21:47:26] (step=0038557) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 7.543924867931912, LR: 0.0003 +[2026-03-02 21:47:34] (step=0038558) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.544120524359225, LR: 0.0003 +[2026-03-02 21:47:42] (step=0038559) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.544316180786539, LR: 0.0003 +[2026-03-02 21:47:50] (step=0038560) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.544511837213853, LR: 0.0003 +[2026-03-02 21:47:58] (step=0038561) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.544707493641166, LR: 0.0003 +[2026-03-02 21:48:06] (step=0038562) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.54490315006848, LR: 0.0003 +[2026-03-02 21:48:13] (step=0038563) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.545098806495793, LR: 0.0003 +[2026-03-02 21:48:21] (step=0038564) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 7.545294462923107, LR: 0.0003 +[2026-03-02 21:48:29] (step=0038565) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.54549011935042, LR: 0.0003 +[2026-03-02 21:48:37] (step=0038566) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.545685775777734, LR: 0.0003 +[2026-03-02 21:48:45] (step=0038567) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.545881432205048, LR: 0.0003 +[2026-03-02 21:48:53] (step=0038568) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.546077088632361, LR: 0.0003 +[2026-03-02 21:49:01] (step=0038569) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.546272745059675, LR: 0.0003 +[2026-03-02 21:49:09] (step=0038570) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 7.5464684014869885, LR: 0.0003 +[2026-03-02 21:49:16] (step=0038571) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 7.5466640579143025, LR: 0.0003 +[2026-03-02 21:49:24] (step=0038572) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.5468597143416165, LR: 0.0003 +[2026-03-02 21:49:32] (step=0038573) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.54705537076893, LR: 0.0003 +[2026-03-02 21:49:40] (step=0038574) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.547251027196244, LR: 0.0003 +[2026-03-02 21:49:48] (step=0038575) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.547446683623557, LR: 0.0003 +[2026-03-02 21:49:56] (step=0038576) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.547642340050871, LR: 0.0003 +[2026-03-02 21:50:04] (step=0038577) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.547837996478184, LR: 0.0003 +[2026-03-02 21:50:12] (step=0038578) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.548033652905498, LR: 0.0003 +[2026-03-02 21:50:19] (step=0038579) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 7.548229309332812, LR: 0.0003 +[2026-03-02 21:50:27] (step=0038580) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.548424965760125, LR: 0.0003 +[2026-03-02 21:50:35] (step=0038581) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.548620622187439, LR: 0.0003 +[2026-03-02 21:50:43] (step=0038582) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.548816278614752, LR: 0.0003 +[2026-03-02 21:50:51] (step=0038583) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.549011935042066, LR: 0.0003 +[2026-03-02 21:50:59] (step=0038584) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.54920759146938, LR: 0.0003 +[2026-03-02 21:51:07] (step=0038585) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 7.549403247896693, LR: 0.0003 +[2026-03-02 21:51:14] (step=0038586) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 7.549598904324007, LR: 0.0003 +[2026-03-02 21:51:22] (step=0038587) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.54979456075132, LR: 0.0003 +[2026-03-02 21:51:30] (step=0038588) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 7.549990217178634, LR: 0.0003 +[2026-03-02 21:51:38] (step=0038589) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 7.550185873605948, LR: 0.0003 +[2026-03-02 21:51:46] (step=0038590) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.5503815300332615, LR: 0.0003 +[2026-03-02 21:51:54] (step=0038591) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.5505771864605755, LR: 0.0003 +[2026-03-02 21:52:02] (step=0038592) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.550772842887889, LR: 0.0003 +[2026-03-02 21:52:09] (step=0038593) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.550968499315203, LR: 0.0003 +[2026-03-02 21:52:17] (step=0038594) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 7.551164155742516, LR: 0.0003 +[2026-03-02 21:52:25] (step=0038595) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.55135981216983, LR: 0.0003 +[2026-03-02 21:52:33] (step=0038596) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 7.551555468597144, LR: 0.0003 +[2026-03-02 21:52:41] (step=0038597) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.551751125024457, LR: 0.0003 +[2026-03-02 21:52:49] (step=0038598) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 7.551946781451771, LR: 0.0003 +[2026-03-02 21:52:57] (step=0038599) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.552142437879084, LR: 0.0003 +[2026-03-02 21:53:05] (step=0038600) Train Loss: 0.4468, Train Steps/Sec: 0.12, Epoch: 7.552338094306398, LR: 0.0003 +[2026-03-02 21:53:12] (step=0038601) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 7.552533750733712, LR: 0.0003 +[2026-03-02 21:53:20] (step=0038602) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 7.552729407161025, LR: 0.0003 +[2026-03-02 21:53:28] (step=0038603) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.552925063588339, LR: 0.0003 +[2026-03-02 21:53:36] (step=0038604) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.553120720015652, LR: 0.0003 +[2026-03-02 21:53:44] (step=0038605) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.553316376442966, LR: 0.0003 +[2026-03-02 21:53:52] (step=0038606) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.553512032870279, LR: 0.0003 +[2026-03-02 21:54:00] (step=0038607) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.553707689297593, LR: 0.0003 +[2026-03-02 21:54:07] (step=0038608) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 7.553903345724907, LR: 0.0003 +[2026-03-02 21:54:15] (step=0038609) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.5540990021522205, LR: 0.0003 +[2026-03-02 21:54:23] (step=0038610) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.5542946585795345, LR: 0.0003 +[2026-03-02 21:54:31] (step=0038611) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.554490315006848, LR: 0.0003 +[2026-03-02 21:54:39] (step=0038612) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.554685971434162, LR: 0.0003 +[2026-03-02 21:54:47] (step=0038613) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.554881627861476, LR: 0.0003 +[2026-03-02 21:54:55] (step=0038614) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.555077284288789, LR: 0.0003 +[2026-03-02 21:55:02] (step=0038615) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.555272940716103, LR: 0.0003 +[2026-03-02 21:55:10] (step=0038616) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.555468597143416, LR: 0.0003 +[2026-03-02 21:55:18] (step=0038617) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 7.55566425357073, LR: 0.0003 +[2026-03-02 21:55:26] (step=0038618) Train Loss: 0.4472, Train Steps/Sec: 0.12, Epoch: 7.555859909998043, LR: 0.0003 +[2026-03-02 21:55:34] (step=0038619) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.556055566425357, LR: 0.0003 +[2026-03-02 21:55:42] (step=0038620) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.556251222852671, LR: 0.0003 +[2026-03-02 21:55:50] (step=0038621) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.556446879279984, LR: 0.0003 +[2026-03-02 21:55:58] (step=0038622) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.556642535707298, LR: 0.0003 +[2026-03-02 21:56:06] (step=0038623) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.556838192134611, LR: 0.0003 +[2026-03-02 21:56:13] (step=0038624) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.557033848561925, LR: 0.0003 +[2026-03-02 21:56:21] (step=0038625) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.557229504989239, LR: 0.0003 +[2026-03-02 21:56:29] (step=0038626) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.557425161416552, LR: 0.0003 +[2026-03-02 21:56:37] (step=0038627) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.557620817843866, LR: 0.0003 +[2026-03-02 21:56:45] (step=0038628) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.5578164742711795, LR: 0.0003 +[2026-03-02 21:56:53] (step=0038629) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.5580121306984935, LR: 0.0003 +[2026-03-02 21:57:01] (step=0038630) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.558207787125807, LR: 0.0003 +[2026-03-02 21:57:08] (step=0038631) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 7.558403443553121, LR: 0.0003 +[2026-03-02 21:57:16] (step=0038632) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.558599099980435, LR: 0.0003 +[2026-03-02 21:57:24] (step=0038633) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.558794756407748, LR: 0.0003 +[2026-03-02 21:57:32] (step=0038634) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.558990412835062, LR: 0.0003 +[2026-03-02 21:57:40] (step=0038635) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.559186069262375, LR: 0.0003 +[2026-03-02 21:57:48] (step=0038636) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.559381725689689, LR: 0.0003 +[2026-03-02 21:57:56] (step=0038637) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.559577382117003, LR: 0.0003 +[2026-03-02 21:58:03] (step=0038638) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.559773038544316, LR: 0.0003 +[2026-03-02 21:58:11] (step=0038639) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 7.55996869497163, LR: 0.0003 +[2026-03-02 21:58:19] (step=0038640) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.560164351398943, LR: 0.0003 +[2026-03-02 21:58:27] (step=0038641) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.560360007826257, LR: 0.0003 +[2026-03-02 21:58:35] (step=0038642) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.560555664253571, LR: 0.0003 +[2026-03-02 21:58:43] (step=0038643) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.560751320680884, LR: 0.0003 +[2026-03-02 21:58:51] (step=0038644) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.560946977108198, LR: 0.0003 +[2026-03-02 21:58:58] (step=0038645) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.561142633535511, LR: 0.0003 +[2026-03-02 21:59:06] (step=0038646) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.561338289962825, LR: 0.0003 +[2026-03-02 21:59:14] (step=0038647) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.5615339463901385, LR: 0.0003 +[2026-03-02 21:59:22] (step=0038648) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.5617296028174525, LR: 0.0003 +[2026-03-02 21:59:30] (step=0038649) Train Loss: 0.4420, Train Steps/Sec: 0.12, Epoch: 7.5619252592447666, LR: 0.0003 +[2026-03-02 21:59:38] (step=0038650) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.56212091567208, LR: 0.0003 +[2026-03-02 21:59:46] (step=0038651) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 7.562316572099394, LR: 0.0003 +[2026-03-02 21:59:54] (step=0038652) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.562512228526707, LR: 0.0003 +[2026-03-02 22:00:01] (step=0038653) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.562707884954021, LR: 0.0003 +[2026-03-02 22:00:09] (step=0038654) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.562903541381335, LR: 0.0003 +[2026-03-02 22:00:17] (step=0038655) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.563099197808648, LR: 0.0003 +[2026-03-02 22:00:25] (step=0038656) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.563294854235962, LR: 0.0003 +[2026-03-02 22:00:33] (step=0038657) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 7.563490510663275, LR: 0.0003 +[2026-03-02 22:00:41] (step=0038658) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.563686167090589, LR: 0.0003 +[2026-03-02 22:00:49] (step=0038659) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 7.563881823517902, LR: 0.0003 +[2026-03-02 22:00:56] (step=0038660) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.564077479945216, LR: 0.0003 +[2026-03-02 22:01:04] (step=0038661) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.56427313637253, LR: 0.0003 +[2026-03-02 22:01:12] (step=0038662) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.564468792799843, LR: 0.0003 +[2026-03-02 22:01:20] (step=0038663) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 7.564664449227157, LR: 0.0003 +[2026-03-02 22:01:28] (step=0038664) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 7.56486010565447, LR: 0.0003 +[2026-03-02 22:01:36] (step=0038665) Train Loss: 0.4370, Train Steps/Sec: 0.12, Epoch: 7.565055762081784, LR: 0.0003 +[2026-03-02 22:01:44] (step=0038666) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.565251418509098, LR: 0.0003 +[2026-03-02 22:01:52] (step=0038667) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.5654470749364116, LR: 0.0003 +[2026-03-02 22:01:59] (step=0038668) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.565642731363726, LR: 0.0003 +[2026-03-02 22:02:07] (step=0038669) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.565838387791039, LR: 0.0003 +[2026-03-02 22:02:15] (step=0038670) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.566034044218353, LR: 0.0003 +[2026-03-02 22:02:23] (step=0038671) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.566229700645666, LR: 0.0003 +[2026-03-02 22:02:31] (step=0038672) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.56642535707298, LR: 0.0003 +[2026-03-02 22:02:39] (step=0038673) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.566621013500294, LR: 0.0003 +[2026-03-02 22:02:47] (step=0038674) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.566816669927607, LR: 0.0003 +[2026-03-02 22:02:54] (step=0038675) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 7.567012326354921, LR: 0.0003 +[2026-03-02 22:03:02] (step=0038676) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.567207982782234, LR: 0.0003 +[2026-03-02 22:03:10] (step=0038677) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.567403639209548, LR: 0.0003 +[2026-03-02 22:03:18] (step=0038678) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 7.567599295636862, LR: 0.0003 +[2026-03-02 22:03:26] (step=0038679) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.567794952064175, LR: 0.0003 +[2026-03-02 22:03:34] (step=0038680) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.567990608491489, LR: 0.0003 +[2026-03-02 22:03:42] (step=0038681) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.568186264918802, LR: 0.0003 +[2026-03-02 22:03:49] (step=0038682) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 7.568381921346116, LR: 0.0003 +[2026-03-02 22:03:57] (step=0038683) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.568577577773429, LR: 0.0003 +[2026-03-02 22:04:05] (step=0038684) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 7.568773234200743, LR: 0.0003 +[2026-03-02 22:04:13] (step=0038685) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 7.5689688906280574, LR: 0.0003 +[2026-03-02 22:04:21] (step=0038686) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.569164547055371, LR: 0.0003 +[2026-03-02 22:04:29] (step=0038687) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.569360203482685, LR: 0.0003 +[2026-03-02 22:04:37] (step=0038688) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.569555859909998, LR: 0.0003 +[2026-03-02 22:04:44] (step=0038689) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.569751516337312, LR: 0.0003 +[2026-03-02 22:04:52] (step=0038690) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.569947172764626, LR: 0.0003 +[2026-03-02 22:05:00] (step=0038691) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 7.570142829191939, LR: 0.0003 +[2026-03-02 22:05:08] (step=0038692) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.570338485619253, LR: 0.0003 +[2026-03-02 22:05:16] (step=0038693) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.570534142046566, LR: 0.0003 +[2026-03-02 22:05:24] (step=0038694) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 7.57072979847388, LR: 0.0003 +[2026-03-02 22:05:32] (step=0038695) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.570925454901193, LR: 0.0003 +[2026-03-02 22:05:39] (step=0038696) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.571121111328507, LR: 0.0003 +[2026-03-02 22:05:47] (step=0038697) Train Loss: 0.4350, Train Steps/Sec: 0.12, Epoch: 7.571316767755821, LR: 0.0003 +[2026-03-02 22:05:55] (step=0038698) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.571512424183134, LR: 0.0003 +[2026-03-02 22:06:03] (step=0038699) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.571708080610448, LR: 0.0003 +[2026-03-02 22:06:11] (step=0038700) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.571903737037761, LR: 0.0003 +[2026-03-02 22:06:19] (step=0038701) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.572099393465075, LR: 0.0003 +[2026-03-02 22:06:27] (step=0038702) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.572295049892389, LR: 0.0003 +[2026-03-02 22:06:35] (step=0038703) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.5724907063197024, LR: 0.0003 +[2026-03-02 22:06:42] (step=0038704) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.5726863627470165, LR: 0.0003 +[2026-03-02 22:06:50] (step=0038705) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.57288201917433, LR: 0.0003 +[2026-03-02 22:06:58] (step=0038706) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 7.573077675601644, LR: 0.0003 +[2026-03-02 22:07:06] (step=0038707) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.573273332028958, LR: 0.0003 +[2026-03-02 22:07:14] (step=0038708) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.573468988456271, LR: 0.0003 +[2026-03-02 22:07:22] (step=0038709) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.573664644883585, LR: 0.0003 +[2026-03-02 22:07:30] (step=0038710) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.573860301310898, LR: 0.0003 +[2026-03-02 22:07:38] (step=0038711) Train Loss: 0.4468, Train Steps/Sec: 0.12, Epoch: 7.574055957738212, LR: 0.0003 +[2026-03-02 22:07:46] (step=0038712) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.574251614165525, LR: 0.0003 +[2026-03-02 22:07:53] (step=0038713) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.574447270592839, LR: 0.0003 +[2026-03-02 22:08:01] (step=0038714) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.574642927020153, LR: 0.0003 +[2026-03-02 22:08:09] (step=0038715) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 7.574838583447466, LR: 0.0003 +[2026-03-02 22:08:17] (step=0038716) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.57503423987478, LR: 0.0003 +[2026-03-02 22:08:25] (step=0038717) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 7.575229896302093, LR: 0.0003 +[2026-03-02 22:08:33] (step=0038718) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.575425552729407, LR: 0.0003 +[2026-03-02 22:08:41] (step=0038719) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.575621209156721, LR: 0.0003 +[2026-03-02 22:08:48] (step=0038720) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.575816865584034, LR: 0.0003 +[2026-03-02 22:08:56] (step=0038721) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.576012522011348, LR: 0.0003 +[2026-03-02 22:09:04] (step=0038722) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.5762081784386615, LR: 0.0003 +[2026-03-02 22:09:12] (step=0038723) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.5764038348659755, LR: 0.0003 +[2026-03-02 22:09:20] (step=0038724) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 7.576599491293289, LR: 0.0003 +[2026-03-02 22:09:28] (step=0038725) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.576795147720603, LR: 0.0003 +[2026-03-02 22:09:35] (step=0038726) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.576990804147917, LR: 0.0003 +[2026-03-02 22:09:43] (step=0038727) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.57718646057523, LR: 0.0003 +[2026-03-02 22:09:51] (step=0038728) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.577382117002544, LR: 0.0003 +[2026-03-02 22:09:59] (step=0038729) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.577577773429857, LR: 0.0003 +[2026-03-02 22:10:07] (step=0038730) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.577773429857171, LR: 0.0003 +[2026-03-02 22:10:15] (step=0038731) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.577969086284485, LR: 0.0003 +[2026-03-02 22:10:23] (step=0038732) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.578164742711798, LR: 0.0003 +[2026-03-02 22:10:31] (step=0038733) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.578360399139112, LR: 0.0003 +[2026-03-02 22:10:38] (step=0038734) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 7.578556055566425, LR: 0.0003 +[2026-03-02 22:10:46] (step=0038735) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.578751711993739, LR: 0.0003 +[2026-03-02 22:10:54] (step=0038736) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.578947368421052, LR: 0.0003 +[2026-03-02 22:11:02] (step=0038737) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.579143024848366, LR: 0.0003 +[2026-03-02 22:11:10] (step=0038738) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.57933868127568, LR: 0.0003 +[2026-03-02 22:11:18] (step=0038739) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.579534337702993, LR: 0.0003 +[2026-03-02 22:11:26] (step=0038740) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.579729994130307, LR: 0.0003 +[2026-03-02 22:11:33] (step=0038741) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.5799256505576205, LR: 0.0003 +[2026-03-02 22:11:41] (step=0038742) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 7.5801213069849345, LR: 0.0003 +[2026-03-02 22:11:49] (step=0038743) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 7.5803169634122485, LR: 0.0003 +[2026-03-02 22:11:57] (step=0038744) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.580512619839562, LR: 0.0003 +[2026-03-02 22:12:05] (step=0038745) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.580708276266876, LR: 0.0003 +[2026-03-02 22:12:13] (step=0038746) Train Loss: 0.4497, Train Steps/Sec: 0.12, Epoch: 7.580903932694189, LR: 0.0003 +[2026-03-02 22:12:21] (step=0038747) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 7.581099589121503, LR: 0.0003 +[2026-03-02 22:12:29] (step=0038748) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 7.581295245548816, LR: 0.0003 +[2026-03-02 22:12:36] (step=0038749) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 7.58149090197613, LR: 0.0003 +[2026-03-02 22:12:44] (step=0038750) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 7.581686558403444, LR: 0.0003 +[2026-03-02 22:12:52] (step=0038751) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.581882214830757, LR: 0.0003 +[2026-03-02 22:13:00] (step=0038752) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.582077871258071, LR: 0.0003 +[2026-03-02 22:13:08] (step=0038753) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.582273527685384, LR: 0.0003 +[2026-03-02 22:13:16] (step=0038754) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.582469184112698, LR: 0.0003 +[2026-03-02 22:13:24] (step=0038755) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.582664840540012, LR: 0.0003 +[2026-03-02 22:13:31] (step=0038756) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 7.582860496967325, LR: 0.0003 +[2026-03-02 22:13:39] (step=0038757) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 7.583056153394639, LR: 0.0003 +[2026-03-02 22:13:47] (step=0038758) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.583251809821952, LR: 0.0003 +[2026-03-02 22:13:55] (step=0038759) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.583447466249266, LR: 0.0003 +[2026-03-02 22:14:03] (step=0038760) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 7.58364312267658, LR: 0.0003 +[2026-03-02 22:14:11] (step=0038761) Train Loss: 0.4474, Train Steps/Sec: 0.12, Epoch: 7.5838387791038935, LR: 0.0003 +[2026-03-02 22:14:19] (step=0038762) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.5840344355312075, LR: 0.0003 +[2026-03-02 22:14:27] (step=0038763) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.584230091958521, LR: 0.0003 +[2026-03-02 22:14:34] (step=0038764) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 7.584425748385835, LR: 0.0003 +[2026-03-02 22:14:42] (step=0038765) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.584621404813148, LR: 0.0003 +[2026-03-02 22:14:50] (step=0038766) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.584817061240462, LR: 0.0003 +[2026-03-02 22:14:58] (step=0038767) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.585012717667776, LR: 0.0003 +[2026-03-02 22:15:06] (step=0038768) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.585208374095089, LR: 0.0003 +[2026-03-02 22:15:14] (step=0038769) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.585404030522403, LR: 0.0003 +[2026-03-02 22:15:22] (step=0038770) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.585599686949716, LR: 0.0003 +[2026-03-02 22:15:30] (step=0038771) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 7.58579534337703, LR: 0.0003 +[2026-03-02 22:15:37] (step=0038772) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.585990999804344, LR: 0.0003 +[2026-03-02 22:15:45] (step=0038773) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 7.586186656231657, LR: 0.0003 +[2026-03-02 22:15:53] (step=0038774) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.586382312658971, LR: 0.0003 +[2026-03-02 22:16:01] (step=0038775) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.586577969086284, LR: 0.0003 +[2026-03-02 22:16:09] (step=0038776) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.586773625513598, LR: 0.0003 +[2026-03-02 22:16:17] (step=0038777) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 7.586969281940911, LR: 0.0003 +[2026-03-02 22:16:24] (step=0038778) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 7.587164938368225, LR: 0.0003 +[2026-03-02 22:16:32] (step=0038779) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 7.587360594795539, LR: 0.0003 +[2026-03-02 22:16:40] (step=0038780) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.5875562512228525, LR: 0.0003 +[2026-03-02 22:16:48] (step=0038781) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 7.5877519076501665, LR: 0.0003 +[2026-03-02 22:16:56] (step=0038782) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.58794756407748, LR: 0.0003 +[2026-03-02 22:17:04] (step=0038783) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.588143220504794, LR: 0.0003 +[2026-03-02 22:17:12] (step=0038784) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.588338876932108, LR: 0.0003 +[2026-03-02 22:17:20] (step=0038785) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.588534533359421, LR: 0.0003 +[2026-03-02 22:17:27] (step=0038786) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.588730189786735, LR: 0.0003 +[2026-03-02 22:17:35] (step=0038787) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.588925846214048, LR: 0.0003 +[2026-03-02 22:17:43] (step=0038788) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.589121502641362, LR: 0.0003 +[2026-03-02 22:17:51] (step=0038789) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 7.589317159068675, LR: 0.0003 +[2026-03-02 22:17:59] (step=0038790) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.589512815495989, LR: 0.0003 +[2026-03-02 22:18:07] (step=0038791) Train Loss: 0.4489, Train Steps/Sec: 0.12, Epoch: 7.589708471923303, LR: 0.0003 +[2026-03-02 22:18:15] (step=0038792) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.589904128350616, LR: 0.0003 +[2026-03-02 22:18:23] (step=0038793) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.59009978477793, LR: 0.0003 +[2026-03-02 22:18:30] (step=0038794) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 7.590295441205243, LR: 0.0003 +[2026-03-02 22:18:38] (step=0038795) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 7.590491097632557, LR: 0.0003 +[2026-03-02 22:18:46] (step=0038796) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.590686754059871, LR: 0.0003 +[2026-03-02 22:18:54] (step=0038797) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.590882410487184, LR: 0.0003 +[2026-03-02 22:19:02] (step=0038798) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 7.591078066914498, LR: 0.0003 +[2026-03-02 22:19:10] (step=0038799) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.5912737233418115, LR: 0.0003 +[2026-03-02 22:19:18] (step=0038800) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.5914693797691255, LR: 0.0003 +[2026-03-02 22:19:26] (step=0038801) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.591665036196439, LR: 0.0003 +[2026-03-02 22:19:33] (step=0038802) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 7.591860692623753, LR: 0.0003 +[2026-03-02 22:19:41] (step=0038803) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.592056349051067, LR: 0.0003 +[2026-03-02 22:19:49] (step=0038804) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.59225200547838, LR: 0.0003 +[2026-03-02 22:19:57] (step=0038805) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.592447661905694, LR: 0.0003 +[2026-03-02 22:20:05] (step=0038806) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.592643318333007, LR: 0.0003 +[2026-03-02 22:20:13] (step=0038807) Train Loss: 0.4404, Train Steps/Sec: 0.12, Epoch: 7.592838974760321, LR: 0.0003 +[2026-03-02 22:20:21] (step=0038808) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.593034631187635, LR: 0.0003 +[2026-03-02 22:20:29] (step=0038809) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.593230287614948, LR: 0.0003 +[2026-03-02 22:20:37] (step=0038810) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.593425944042262, LR: 0.0003 +[2026-03-02 22:20:44] (step=0038811) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 7.593621600469575, LR: 0.0003 +[2026-03-02 22:20:52] (step=0038812) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 7.593817256896889, LR: 0.0003 +[2026-03-02 22:21:00] (step=0038813) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 7.594012913324203, LR: 0.0003 +[2026-03-02 22:21:08] (step=0038814) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.594208569751516, LR: 0.0003 +[2026-03-02 22:21:16] (step=0038815) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.59440422617883, LR: 0.0003 +[2026-03-02 22:21:24] (step=0038816) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.594599882606143, LR: 0.0003 +[2026-03-02 22:21:31] (step=0038817) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.594795539033457, LR: 0.0003 +[2026-03-02 22:21:39] (step=0038818) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.5949911954607705, LR: 0.0003 +[2026-03-02 22:21:47] (step=0038819) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.5951868518880845, LR: 0.0003 +[2026-03-02 22:21:55] (step=0038820) Train Loss: 0.4256, Train Steps/Sec: 0.13, Epoch: 7.5953825083153985, LR: 0.0003 +[2026-03-02 22:22:03] (step=0038821) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.595578164742712, LR: 0.0003 +[2026-03-02 22:22:11] (step=0038822) Train Loss: 0.4250, Train Steps/Sec: 0.13, Epoch: 7.595773821170026, LR: 0.0003 +[2026-03-02 22:22:19] (step=0038823) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.595969477597339, LR: 0.0003 +[2026-03-02 22:22:27] (step=0038824) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.596165134024653, LR: 0.0003 +[2026-03-02 22:22:34] (step=0038825) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.596360790451967, LR: 0.0003 +[2026-03-02 22:22:42] (step=0038826) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.59655644687928, LR: 0.0003 +[2026-03-02 22:22:50] (step=0038827) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 7.596752103306594, LR: 0.0003 +[2026-03-02 22:22:58] (step=0038828) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.596947759733907, LR: 0.0003 +[2026-03-02 22:23:06] (step=0038829) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 7.597143416161221, LR: 0.0003 +[2026-03-02 22:23:14] (step=0038830) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 7.597339072588534, LR: 0.0003 +[2026-03-02 22:23:22] (step=0038831) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.597534729015848, LR: 0.0003 +[2026-03-02 22:23:29] (step=0038832) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 7.597730385443162, LR: 0.0003 +[2026-03-02 22:23:37] (step=0038833) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 7.597926041870475, LR: 0.0003 +[2026-03-02 22:23:45] (step=0038834) Train Loss: 0.4272, Train Steps/Sec: 0.13, Epoch: 7.598121698297789, LR: 0.0003 +[2026-03-02 22:23:53] (step=0038835) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.598317354725102, LR: 0.0003 +[2026-03-02 22:24:01] (step=0038836) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.598513011152416, LR: 0.0003 +[2026-03-02 22:24:09] (step=0038837) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 7.59870866757973, LR: 0.0003 +[2026-03-02 22:24:17] (step=0038838) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.5989043240070435, LR: 0.0003 +[2026-03-02 22:24:24] (step=0038839) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.5990999804343575, LR: 0.0003 +[2026-03-02 22:24:32] (step=0038840) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.599295636861671, LR: 0.0003 +[2026-03-02 22:24:40] (step=0038841) Train Loss: 0.4470, Train Steps/Sec: 0.12, Epoch: 7.599491293288985, LR: 0.0003 +[2026-03-02 22:24:48] (step=0038842) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.599686949716298, LR: 0.0003 +[2026-03-02 22:24:56] (step=0038843) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.599882606143612, LR: 0.0003 +[2026-03-02 22:25:04] (step=0038844) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 7.600078262570926, LR: 0.0003 +[2026-03-02 22:25:12] (step=0038845) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.600273918998239, LR: 0.0003 +[2026-03-02 22:25:20] (step=0038846) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.600469575425553, LR: 0.0003 +[2026-03-02 22:25:27] (step=0038847) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.600665231852866, LR: 0.0003 +[2026-03-02 22:25:35] (step=0038848) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.60086088828018, LR: 0.0003 +[2026-03-02 22:25:43] (step=0038849) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.601056544707494, LR: 0.0003 +[2026-03-02 22:25:51] (step=0038850) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 7.601252201134807, LR: 0.0003 +[2026-03-02 22:25:59] (step=0038851) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 7.601447857562121, LR: 0.0003 +[2026-03-02 22:26:07] (step=0038852) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.601643513989434, LR: 0.0003 +[2026-03-02 22:26:15] (step=0038853) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 7.601839170416748, LR: 0.0003 +[2026-03-02 22:26:22] (step=0038854) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.602034826844061, LR: 0.0003 +[2026-03-02 22:26:30] (step=0038855) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.602230483271375, LR: 0.0003 +[2026-03-02 22:26:38] (step=0038856) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.602426139698689, LR: 0.0003 +[2026-03-02 22:26:46] (step=0038857) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.6026217961260025, LR: 0.0003 +[2026-03-02 22:26:54] (step=0038858) Train Loss: 0.4503, Train Steps/Sec: 0.12, Epoch: 7.6028174525533165, LR: 0.0003 +[2026-03-02 22:27:02] (step=0038859) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 7.60301310898063, LR: 0.0003 +[2026-03-02 22:27:10] (step=0038860) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.603208765407944, LR: 0.0003 +[2026-03-02 22:27:18] (step=0038861) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 7.603404421835258, LR: 0.0003 +[2026-03-02 22:27:25] (step=0038862) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 7.603600078262571, LR: 0.0003 +[2026-03-02 22:27:33] (step=0038863) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 7.603795734689885, LR: 0.0003 +[2026-03-02 22:27:41] (step=0038864) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 7.603991391117198, LR: 0.0003 +[2026-03-02 22:27:49] (step=0038865) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.604187047544512, LR: 0.0003 +[2026-03-02 22:27:57] (step=0038866) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.604382703971825, LR: 0.0003 +[2026-03-02 22:28:05] (step=0038867) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 7.604578360399139, LR: 0.0003 +[2026-03-02 22:28:13] (step=0038868) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 7.604774016826453, LR: 0.0003 +[2026-03-02 22:28:20] (step=0038869) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.604969673253766, LR: 0.0003 +[2026-03-02 22:28:28] (step=0038870) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.60516532968108, LR: 0.0003 +[2026-03-02 22:28:36] (step=0038871) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 7.605360986108393, LR: 0.0003 +[2026-03-02 22:28:44] (step=0038872) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 7.605556642535707, LR: 0.0003 +[2026-03-02 22:28:52] (step=0038873) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.605752298963021, LR: 0.0003 +[2026-03-02 22:29:00] (step=0038874) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.605947955390334, LR: 0.0003 +[2026-03-02 22:29:08] (step=0038875) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.606143611817648, LR: 0.0003 +[2026-03-02 22:29:15] (step=0038876) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 7.6063392682449615, LR: 0.0003 +[2026-03-02 22:29:23] (step=0038877) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.6065349246722755, LR: 0.0003 +[2026-03-02 22:29:31] (step=0038878) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.6067305810995895, LR: 0.0003 +[2026-03-02 22:29:39] (step=0038879) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.606926237526903, LR: 0.0003 +[2026-03-02 22:29:47] (step=0038880) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.607121893954217, LR: 0.0003 +[2026-03-02 22:29:55] (step=0038881) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 7.60731755038153, LR: 0.0003 +[2026-03-02 22:30:03] (step=0038882) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.607513206808844, LR: 0.0003 +[2026-03-02 22:30:10] (step=0038883) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.607708863236157, LR: 0.0003 +[2026-03-02 22:30:18] (step=0038884) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.607904519663471, LR: 0.0003 +[2026-03-02 22:30:26] (step=0038885) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.608100176090785, LR: 0.0003 +[2026-03-02 22:30:34] (step=0038886) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.608295832518098, LR: 0.0003 +[2026-03-02 22:30:42] (step=0038887) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.608491488945412, LR: 0.0003 +[2026-03-02 22:30:50] (step=0038888) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.608687145372725, LR: 0.0003 +[2026-03-02 22:30:58] (step=0038889) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.608882801800039, LR: 0.0003 +[2026-03-02 22:31:06] (step=0038890) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.609078458227353, LR: 0.0003 +[2026-03-02 22:31:13] (step=0038891) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.609274114654666, LR: 0.0003 +[2026-03-02 22:31:21] (step=0038892) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.60946977108198, LR: 0.0003 +[2026-03-02 22:31:29] (step=0038893) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.609665427509293, LR: 0.0003 +[2026-03-02 22:31:37] (step=0038894) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 7.609861083936607, LR: 0.0003 +[2026-03-02 22:31:45] (step=0038895) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.6100567403639205, LR: 0.0003 +[2026-03-02 22:31:53] (step=0038896) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.6102523967912346, LR: 0.0003 +[2026-03-02 22:32:01] (step=0038897) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.610448053218549, LR: 0.0003 +[2026-03-02 22:32:08] (step=0038898) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.610643709645862, LR: 0.0003 +[2026-03-02 22:32:16] (step=0038899) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 7.610839366073176, LR: 0.0003 +[2026-03-02 22:32:24] (step=0038900) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.611035022500489, LR: 0.0003 +[2026-03-02 22:32:32] (step=0038901) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 7.611230678927803, LR: 0.0003 +[2026-03-02 22:32:40] (step=0038902) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.611426335355117, LR: 0.0003 +[2026-03-02 22:32:48] (step=0038903) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 7.61162199178243, LR: 0.0003 +[2026-03-02 22:32:56] (step=0038904) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.611817648209744, LR: 0.0003 +[2026-03-02 22:33:04] (step=0038905) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 7.612013304637057, LR: 0.0003 +[2026-03-02 22:33:11] (step=0038906) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 7.612208961064371, LR: 0.0003 +[2026-03-02 22:33:19] (step=0038907) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.612404617491684, LR: 0.0003 +[2026-03-02 22:33:27] (step=0038908) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.612600273918998, LR: 0.0003 +[2026-03-02 22:33:35] (step=0038909) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.612795930346312, LR: 0.0003 +[2026-03-02 22:33:43] (step=0038910) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.612991586773625, LR: 0.0003 +[2026-03-02 22:33:51] (step=0038911) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.613187243200939, LR: 0.0003 +[2026-03-02 22:33:59] (step=0038912) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 7.613382899628252, LR: 0.0003 +[2026-03-02 22:34:06] (step=0038913) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 7.613578556055566, LR: 0.0003 +[2026-03-02 22:34:14] (step=0038914) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.61377421248288, LR: 0.0003 +[2026-03-02 22:34:22] (step=0038915) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.613969868910194, LR: 0.0003 +[2026-03-02 22:34:30] (step=0038916) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 7.614165525337508, LR: 0.0003 +[2026-03-02 22:34:38] (step=0038917) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.614361181764821, LR: 0.0003 +[2026-03-02 22:34:46] (step=0038918) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.614556838192135, LR: 0.0003 +[2026-03-02 22:34:54] (step=0038919) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.614752494619448, LR: 0.0003 +[2026-03-02 22:35:01] (step=0038920) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 7.614948151046762, LR: 0.0003 +[2026-03-02 22:35:09] (step=0038921) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 7.615143807474076, LR: 0.0003 +[2026-03-02 22:35:17] (step=0038922) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 7.615339463901389, LR: 0.0003 +[2026-03-02 22:35:25] (step=0038923) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.615535120328703, LR: 0.0003 +[2026-03-02 22:35:33] (step=0038924) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.615730776756016, LR: 0.0003 +[2026-03-02 22:35:41] (step=0038925) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 7.61592643318333, LR: 0.0003 +[2026-03-02 22:35:49] (step=0038926) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 7.616122089610644, LR: 0.0003 +[2026-03-02 22:35:56] (step=0038927) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.616317746037957, LR: 0.0003 +[2026-03-02 22:36:04] (step=0038928) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 7.616513402465271, LR: 0.0003 +[2026-03-02 22:36:12] (step=0038929) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.616709058892584, LR: 0.0003 +[2026-03-02 22:36:20] (step=0038930) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 7.616904715319898, LR: 0.0003 +[2026-03-02 22:36:28] (step=0038931) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.617100371747212, LR: 0.0003 +[2026-03-02 22:36:36] (step=0038932) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.6172960281745254, LR: 0.0003 +[2026-03-02 22:36:44] (step=0038933) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 7.6174916846018395, LR: 0.0003 +[2026-03-02 22:36:52] (step=0038934) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.617687341029153, LR: 0.0003 +[2026-03-02 22:36:59] (step=0038935) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.617882997456467, LR: 0.0003 +[2026-03-02 22:37:07] (step=0038936) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.61807865388378, LR: 0.0003 +[2026-03-02 22:37:15] (step=0038937) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.618274310311094, LR: 0.0003 +[2026-03-02 22:37:23] (step=0038938) Train Loss: 0.4465, Train Steps/Sec: 0.12, Epoch: 7.618469966738408, LR: 0.0003 +[2026-03-02 22:37:31] (step=0038939) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.618665623165721, LR: 0.0003 +[2026-03-02 22:37:39] (step=0038940) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.618861279593035, LR: 0.0003 +[2026-03-02 22:37:47] (step=0038941) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.619056936020348, LR: 0.0003 +[2026-03-02 22:37:55] (step=0038942) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.619252592447662, LR: 0.0003 +[2026-03-02 22:38:02] (step=0038943) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.619448248874976, LR: 0.0003 +[2026-03-02 22:38:10] (step=0038944) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 7.619643905302289, LR: 0.0003 +[2026-03-02 22:38:18] (step=0038945) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.619839561729603, LR: 0.0003 +[2026-03-02 22:38:26] (step=0038946) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.620035218156916, LR: 0.0003 +[2026-03-02 22:38:34] (step=0038947) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.62023087458423, LR: 0.0003 +[2026-03-02 22:38:42] (step=0038948) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.620426531011543, LR: 0.0003 +[2026-03-02 22:38:50] (step=0038949) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 7.620622187438857, LR: 0.0003 +[2026-03-02 22:38:57] (step=0038950) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.620817843866171, LR: 0.0003 +[2026-03-02 22:39:05] (step=0038951) Train Loss: 0.4458, Train Steps/Sec: 0.12, Epoch: 7.6210135002934845, LR: 0.0003 +[2026-03-02 22:39:13] (step=0038952) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.6212091567207985, LR: 0.0003 +[2026-03-02 22:39:21] (step=0038953) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.621404813148112, LR: 0.0003 +[2026-03-02 22:39:29] (step=0038954) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.621600469575426, LR: 0.0003 +[2026-03-02 22:39:37] (step=0038955) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.62179612600274, LR: 0.0003 +[2026-03-02 22:39:45] (step=0038956) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.621991782430053, LR: 0.0003 +[2026-03-02 22:39:53] (step=0038957) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.622187438857367, LR: 0.0003 +[2026-03-02 22:40:00] (step=0038958) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.62238309528468, LR: 0.0003 +[2026-03-02 22:40:08] (step=0038959) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.622578751711994, LR: 0.0003 +[2026-03-02 22:40:16] (step=0038960) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.622774408139307, LR: 0.0003 +[2026-03-02 22:40:24] (step=0038961) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 7.622970064566621, LR: 0.0003 +[2026-03-02 22:40:32] (step=0038962) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.623165720993935, LR: 0.0003 +[2026-03-02 22:40:40] (step=0038963) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.623361377421248, LR: 0.0003 +[2026-03-02 22:40:48] (step=0038964) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.623557033848562, LR: 0.0003 +[2026-03-02 22:40:55] (step=0038965) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 7.623752690275875, LR: 0.0003 +[2026-03-02 22:41:03] (step=0038966) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 7.623948346703189, LR: 0.0003 +[2026-03-02 22:41:11] (step=0038967) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 7.624144003130503, LR: 0.0003 +[2026-03-02 22:41:19] (step=0038968) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.624339659557816, LR: 0.0003 +[2026-03-02 22:41:27] (step=0038969) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 7.62453531598513, LR: 0.0003 +[2026-03-02 22:41:35] (step=0038970) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 7.6247309724124435, LR: 0.0003 +[2026-03-02 22:41:42] (step=0038971) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.6249266288397575, LR: 0.0003 +[2026-03-02 22:41:50] (step=0038972) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 7.625122285267071, LR: 0.0003 +[2026-03-02 22:41:58] (step=0038973) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.625317941694385, LR: 0.0003 +[2026-03-02 22:42:06] (step=0038974) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.625513598121699, LR: 0.0003 +[2026-03-02 22:42:14] (step=0038975) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.625709254549012, LR: 0.0003 +[2026-03-02 22:42:22] (step=0038976) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.625904910976326, LR: 0.0003 +[2026-03-02 22:42:30] (step=0038977) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.626100567403639, LR: 0.0003 +[2026-03-02 22:42:37] (step=0038978) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.626296223830953, LR: 0.0003 +[2026-03-02 22:42:45] (step=0038979) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.626491880258267, LR: 0.0003 +[2026-03-02 22:42:53] (step=0038980) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 7.62668753668558, LR: 0.0003 +[2026-03-02 22:43:01] (step=0038981) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.626883193112894, LR: 0.0003 +[2026-03-02 22:43:09] (step=0038982) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.627078849540207, LR: 0.0003 +[2026-03-02 22:43:17] (step=0038983) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 7.627274505967521, LR: 0.0003 +[2026-03-02 22:43:25] (step=0038984) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.627470162394835, LR: 0.0003 +[2026-03-02 22:43:32] (step=0038985) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 7.627665818822148, LR: 0.0003 +[2026-03-02 22:43:40] (step=0038986) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.627861475249462, LR: 0.0003 +[2026-03-02 22:43:48] (step=0038987) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.628057131676775, LR: 0.0003 +[2026-03-02 22:43:56] (step=0038988) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.628252788104089, LR: 0.0003 +[2026-03-02 22:44:04] (step=0038989) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.6284484445314025, LR: 0.0003 +[2026-03-02 22:44:12] (step=0038990) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.6286441009587165, LR: 0.0003 +[2026-03-02 22:44:20] (step=0038991) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.6288397573860305, LR: 0.0003 +[2026-03-02 22:44:28] (step=0038992) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.629035413813344, LR: 0.0003 +[2026-03-02 22:44:35] (step=0038993) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.629231070240658, LR: 0.0003 +[2026-03-02 22:44:43] (step=0038994) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.629426726667971, LR: 0.0003 +[2026-03-02 22:44:51] (step=0038995) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 7.629622383095285, LR: 0.0003 +[2026-03-02 22:44:59] (step=0038996) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.629818039522599, LR: 0.0003 +[2026-03-02 22:45:07] (step=0038997) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.630013695949912, LR: 0.0003 +[2026-03-02 22:45:15] (step=0038998) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.630209352377226, LR: 0.0003 +[2026-03-02 22:45:23] (step=0038999) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.630405008804539, LR: 0.0003 +[2026-03-02 22:45:30] (step=0039000) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 7.630600665231853, LR: 0.0003 +[2026-03-02 22:45:31] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0039000/ +[2026-03-02 22:45:39] (step=0039001) Train Loss: 0.4518, Train Steps/Sec: 0.12, Epoch: 7.630796321659166, LR: 0.0003 +[2026-03-02 22:45:46] (step=0039002) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 7.63099197808648, LR: 0.0003 +[2026-03-02 22:45:54] (step=0039003) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.631187634513794, LR: 0.0003 +[2026-03-02 22:46:02] (step=0039004) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.631383290941107, LR: 0.0003 +[2026-03-02 22:46:10] (step=0039005) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.631578947368421, LR: 0.0003 +[2026-03-02 22:46:18] (step=0039006) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.631774603795734, LR: 0.0003 +[2026-03-02 22:46:26] (step=0039007) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 7.631970260223048, LR: 0.0003 +[2026-03-02 22:46:33] (step=0039008) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 7.632165916650362, LR: 0.0003 +[2026-03-02 22:46:41] (step=0039009) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.6323615730776755, LR: 0.0003 +[2026-03-02 22:46:49] (step=0039010) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 7.6325572295049895, LR: 0.0003 +[2026-03-02 22:46:57] (step=0039011) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.632752885932303, LR: 0.0003 +[2026-03-02 22:47:05] (step=0039012) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.632948542359617, LR: 0.0003 +[2026-03-02 22:47:13] (step=0039013) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 7.63314419878693, LR: 0.0003 +[2026-03-02 22:47:21] (step=0039014) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 7.633339855214244, LR: 0.0003 +[2026-03-02 22:47:28] (step=0039015) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.633535511641558, LR: 0.0003 +[2026-03-02 22:47:36] (step=0039016) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.633731168068871, LR: 0.0003 +[2026-03-02 22:47:44] (step=0039017) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.633926824496185, LR: 0.0003 +[2026-03-02 22:47:52] (step=0039018) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 7.634122480923498, LR: 0.0003 +[2026-03-02 22:48:00] (step=0039019) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.634318137350812, LR: 0.0003 +[2026-03-02 22:48:08] (step=0039020) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.634513793778126, LR: 0.0003 +[2026-03-02 22:48:16] (step=0039021) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.634709450205439, LR: 0.0003 +[2026-03-02 22:48:23] (step=0039022) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.634905106632753, LR: 0.0003 +[2026-03-02 22:48:31] (step=0039023) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 7.635100763060066, LR: 0.0003 +[2026-03-02 22:48:39] (step=0039024) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.63529641948738, LR: 0.0003 +[2026-03-02 22:48:47] (step=0039025) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.635492075914693, LR: 0.0003 +[2026-03-02 22:48:55] (step=0039026) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.635687732342007, LR: 0.0003 +[2026-03-02 22:49:03] (step=0039027) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 7.635883388769321, LR: 0.0003 +[2026-03-02 22:49:11] (step=0039028) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.6360790451966345, LR: 0.0003 +[2026-03-02 22:49:18] (step=0039029) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.6362747016239485, LR: 0.0003 +[2026-03-02 22:49:26] (step=0039030) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 7.636470358051262, LR: 0.0003 +[2026-03-02 22:49:34] (step=0039031) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.636666014478576, LR: 0.0003 +[2026-03-02 22:49:42] (step=0039032) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.63686167090589, LR: 0.0003 +[2026-03-02 22:49:50] (step=0039033) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 7.637057327333203, LR: 0.0003 +[2026-03-02 22:49:58] (step=0039034) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.637252983760517, LR: 0.0003 +[2026-03-02 22:50:06] (step=0039035) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.63744864018783, LR: 0.0003 +[2026-03-02 22:50:13] (step=0039036) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.637644296615144, LR: 0.0003 +[2026-03-02 22:50:21] (step=0039037) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.637839953042458, LR: 0.0003 +[2026-03-02 22:50:29] (step=0039038) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.638035609469771, LR: 0.0003 +[2026-03-02 22:50:37] (step=0039039) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.638231265897085, LR: 0.0003 +[2026-03-02 22:50:45] (step=0039040) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.638426922324398, LR: 0.0003 +[2026-03-02 22:50:53] (step=0039041) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.638622578751712, LR: 0.0003 +[2026-03-02 22:51:01] (step=0039042) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.638818235179025, LR: 0.0003 +[2026-03-02 22:51:09] (step=0039043) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.639013891606339, LR: 0.0003 +[2026-03-02 22:51:16] (step=0039044) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 7.639209548033653, LR: 0.0003 +[2026-03-02 22:51:24] (step=0039045) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.639405204460966, LR: 0.0003 +[2026-03-02 22:51:32] (step=0039046) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.63960086088828, LR: 0.0003 +[2026-03-02 22:51:40] (step=0039047) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.6397965173155935, LR: 0.0003 +[2026-03-02 22:51:48] (step=0039048) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.6399921737429075, LR: 0.0003 +[2026-03-02 22:51:56] (step=0039049) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.6401878301702215, LR: 0.0003 +[2026-03-02 22:52:04] (step=0039050) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 7.640383486597535, LR: 0.0003 +[2026-03-02 22:52:11] (step=0039051) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.640579143024849, LR: 0.0003 +[2026-03-02 22:52:19] (step=0039052) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.640774799452162, LR: 0.0003 +[2026-03-02 22:52:27] (step=0039053) Train Loss: 0.4341, Train Steps/Sec: 0.12, Epoch: 7.640970455879476, LR: 0.0003 +[2026-03-02 22:52:35] (step=0039054) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.641166112306789, LR: 0.0003 +[2026-03-02 22:52:43] (step=0039055) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.641361768734103, LR: 0.0003 +[2026-03-02 22:52:51] (step=0039056) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.641557425161417, LR: 0.0003 +[2026-03-02 22:52:59] (step=0039057) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 7.64175308158873, LR: 0.0003 +[2026-03-02 22:53:06] (step=0039058) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.641948738016044, LR: 0.0003 +[2026-03-02 22:53:14] (step=0039059) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 7.642144394443357, LR: 0.0003 +[2026-03-02 22:53:22] (step=0039060) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.642340050870671, LR: 0.0003 +[2026-03-02 22:53:30] (step=0039061) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.642535707297985, LR: 0.0003 +[2026-03-02 22:53:38] (step=0039062) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.642731363725298, LR: 0.0003 +[2026-03-02 22:53:46] (step=0039063) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.642927020152612, LR: 0.0003 +[2026-03-02 22:53:54] (step=0039064) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 7.643122676579925, LR: 0.0003 +[2026-03-02 22:54:01] (step=0039065) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.643318333007239, LR: 0.0003 +[2026-03-02 22:54:09] (step=0039066) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.6435139894345525, LR: 0.0003 +[2026-03-02 22:54:17] (step=0039067) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.6437096458618665, LR: 0.0003 +[2026-03-02 22:54:25] (step=0039068) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.6439053022891805, LR: 0.0003 +[2026-03-02 22:54:33] (step=0039069) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.644100958716494, LR: 0.0003 +[2026-03-02 22:54:41] (step=0039070) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.644296615143808, LR: 0.0003 +[2026-03-02 22:54:49] (step=0039071) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 7.644492271571121, LR: 0.0003 +[2026-03-02 22:54:56] (step=0039072) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.644687927998435, LR: 0.0003 +[2026-03-02 22:55:04] (step=0039073) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.644883584425749, LR: 0.0003 +[2026-03-02 22:55:12] (step=0039074) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.645079240853062, LR: 0.0003 +[2026-03-02 22:55:20] (step=0039075) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 7.645274897280376, LR: 0.0003 +[2026-03-02 22:55:28] (step=0039076) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.645470553707689, LR: 0.0003 +[2026-03-02 22:55:36] (step=0039077) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.645666210135003, LR: 0.0003 +[2026-03-02 22:55:44] (step=0039078) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 7.645861866562316, LR: 0.0003 +[2026-03-02 22:55:51] (step=0039079) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 7.64605752298963, LR: 0.0003 +[2026-03-02 22:55:59] (step=0039080) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.646253179416944, LR: 0.0003 +[2026-03-02 22:56:07] (step=0039081) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.646448835844257, LR: 0.0003 +[2026-03-02 22:56:15] (step=0039082) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 7.646644492271571, LR: 0.0003 +[2026-03-02 22:56:23] (step=0039083) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.646840148698884, LR: 0.0003 +[2026-03-02 22:56:31] (step=0039084) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.647035805126198, LR: 0.0003 +[2026-03-02 22:56:39] (step=0039085) Train Loss: 0.4441, Train Steps/Sec: 0.12, Epoch: 7.647231461553512, LR: 0.0003 +[2026-03-02 22:56:46] (step=0039086) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 7.6474271179808255, LR: 0.0003 +[2026-03-02 22:56:54] (step=0039087) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.6476227744081395, LR: 0.0003 +[2026-03-02 22:57:02] (step=0039088) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.647818430835453, LR: 0.0003 +[2026-03-02 22:57:10] (step=0039089) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.648014087262767, LR: 0.0003 +[2026-03-02 22:57:18] (step=0039090) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.64820974369008, LR: 0.0003 +[2026-03-02 22:57:26] (step=0039091) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 7.648405400117394, LR: 0.0003 +[2026-03-02 22:57:34] (step=0039092) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.648601056544708, LR: 0.0003 +[2026-03-02 22:57:41] (step=0039093) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.648796712972021, LR: 0.0003 +[2026-03-02 22:57:49] (step=0039094) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.648992369399335, LR: 0.0003 +[2026-03-02 22:57:57] (step=0039095) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.649188025826648, LR: 0.0003 +[2026-03-02 22:58:05] (step=0039096) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 7.649383682253962, LR: 0.0003 +[2026-03-02 22:58:13] (step=0039097) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.649579338681276, LR: 0.0003 +[2026-03-02 22:58:21] (step=0039098) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.649774995108589, LR: 0.0003 +[2026-03-02 22:58:29] (step=0039099) Train Loss: 0.4258, Train Steps/Sec: 0.13, Epoch: 7.649970651535903, LR: 0.0003 +[2026-03-02 22:58:36] (step=0039100) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.650166307963216, LR: 0.0003 +[2026-03-02 22:58:44] (step=0039101) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.65036196439053, LR: 0.0003 +[2026-03-02 22:58:52] (step=0039102) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.650557620817844, LR: 0.0003 +[2026-03-02 22:59:00] (step=0039103) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.650753277245157, LR: 0.0003 +[2026-03-02 22:59:08] (step=0039104) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.650948933672471, LR: 0.0003 +[2026-03-02 22:59:16] (step=0039105) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 7.6511445900997845, LR: 0.0003 +[2026-03-02 22:59:24] (step=0039106) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.6513402465270985, LR: 0.0003 +[2026-03-02 22:59:32] (step=0039107) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 7.651535902954412, LR: 0.0003 +[2026-03-02 22:59:39] (step=0039108) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 7.651731559381726, LR: 0.0003 +[2026-03-02 22:59:47] (step=0039109) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 7.65192721580904, LR: 0.0003 +[2026-03-02 22:59:55] (step=0039110) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.652122872236353, LR: 0.0003 +[2026-03-02 23:00:03] (step=0039111) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.652318528663667, LR: 0.0003 +[2026-03-02 23:00:11] (step=0039112) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.65251418509098, LR: 0.0003 +[2026-03-02 23:00:19] (step=0039113) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 7.652709841518294, LR: 0.0003 +[2026-03-02 23:00:27] (step=0039114) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.652905497945608, LR: 0.0003 +[2026-03-02 23:00:34] (step=0039115) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 7.653101154372921, LR: 0.0003 +[2026-03-02 23:00:42] (step=0039116) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.653296810800235, LR: 0.0003 +[2026-03-02 23:00:50] (step=0039117) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 7.653492467227548, LR: 0.0003 +[2026-03-02 23:00:58] (step=0039118) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 7.653688123654862, LR: 0.0003 +[2026-03-02 23:01:06] (step=0039119) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.653883780082175, LR: 0.0003 +[2026-03-02 23:01:14] (step=0039120) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.654079436509489, LR: 0.0003 +[2026-03-02 23:01:22] (step=0039121) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.654275092936803, LR: 0.0003 +[2026-03-02 23:01:29] (step=0039122) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 7.654470749364116, LR: 0.0003 +[2026-03-02 23:01:37] (step=0039123) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.65466640579143, LR: 0.0003 +[2026-03-02 23:01:45] (step=0039124) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.6548620622187435, LR: 0.0003 +[2026-03-02 23:01:53] (step=0039125) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 7.6550577186460576, LR: 0.0003 +[2026-03-02 23:02:01] (step=0039126) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.655253375073372, LR: 0.0003 +[2026-03-02 23:02:09] (step=0039127) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.655449031500685, LR: 0.0003 +[2026-03-02 23:02:16] (step=0039128) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.655644687927999, LR: 0.0003 +[2026-03-02 23:02:24] (step=0039129) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.655840344355312, LR: 0.0003 +[2026-03-02 23:02:32] (step=0039130) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 7.656036000782626, LR: 0.0003 +[2026-03-02 23:02:40] (step=0039131) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.656231657209939, LR: 0.0003 +[2026-03-02 23:02:48] (step=0039132) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.656427313637253, LR: 0.0003 +[2026-03-02 23:02:56] (step=0039133) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.656622970064567, LR: 0.0003 +[2026-03-02 23:03:04] (step=0039134) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.65681862649188, LR: 0.0003 +[2026-03-02 23:03:12] (step=0039135) Train Loss: 0.4461, Train Steps/Sec: 0.12, Epoch: 7.657014282919194, LR: 0.0003 +[2026-03-02 23:03:19] (step=0039136) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.657209939346507, LR: 0.0003 +[2026-03-02 23:03:27] (step=0039137) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.657405595773821, LR: 0.0003 +[2026-03-02 23:03:35] (step=0039138) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.657601252201135, LR: 0.0003 +[2026-03-02 23:03:43] (step=0039139) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.657796908628448, LR: 0.0003 +[2026-03-02 23:03:51] (step=0039140) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.657992565055762, LR: 0.0003 +[2026-03-02 23:03:59] (step=0039141) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.658188221483075, LR: 0.0003 +[2026-03-02 23:04:07] (step=0039142) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.658383877910389, LR: 0.0003 +[2026-03-02 23:04:14] (step=0039143) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 7.6585795343377026, LR: 0.0003 +[2026-03-02 23:04:22] (step=0039144) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.658775190765017, LR: 0.0003 +[2026-03-02 23:04:30] (step=0039145) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 7.658970847192331, LR: 0.0003 +[2026-03-02 23:04:38] (step=0039146) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.659166503619644, LR: 0.0003 +[2026-03-02 23:04:46] (step=0039147) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 7.659362160046958, LR: 0.0003 +[2026-03-02 23:04:54] (step=0039148) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.659557816474271, LR: 0.0003 +[2026-03-02 23:05:02] (step=0039149) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.659753472901585, LR: 0.0003 +[2026-03-02 23:05:09] (step=0039150) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.659949129328899, LR: 0.0003 +[2026-03-02 23:05:17] (step=0039151) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 7.660144785756212, LR: 0.0003 +[2026-03-02 23:05:25] (step=0039152) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 7.660340442183526, LR: 0.0003 +[2026-03-02 23:05:33] (step=0039153) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 7.660536098610839, LR: 0.0003 +[2026-03-02 23:05:41] (step=0039154) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.660731755038153, LR: 0.0003 +[2026-03-02 23:05:49] (step=0039155) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.660927411465467, LR: 0.0003 +[2026-03-02 23:05:57] (step=0039156) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.66112306789278, LR: 0.0003 +[2026-03-02 23:06:05] (step=0039157) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.661318724320094, LR: 0.0003 +[2026-03-02 23:06:12] (step=0039158) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 7.661514380747407, LR: 0.0003 +[2026-03-02 23:06:20] (step=0039159) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.661710037174721, LR: 0.0003 +[2026-03-02 23:06:28] (step=0039160) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.661905693602034, LR: 0.0003 +[2026-03-02 23:06:36] (step=0039161) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.6621013500293484, LR: 0.0003 +[2026-03-02 23:06:44] (step=0039162) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.6622970064566625, LR: 0.0003 +[2026-03-02 23:06:52] (step=0039163) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.662492662883976, LR: 0.0003 +[2026-03-02 23:06:59] (step=0039164) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.66268831931129, LR: 0.0003 +[2026-03-02 23:07:07] (step=0039165) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.662883975738603, LR: 0.0003 +[2026-03-02 23:07:15] (step=0039166) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.663079632165917, LR: 0.0003 +[2026-03-02 23:07:23] (step=0039167) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.663275288593231, LR: 0.0003 +[2026-03-02 23:07:31] (step=0039168) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.663470945020544, LR: 0.0003 +[2026-03-02 23:07:39] (step=0039169) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.663666601447858, LR: 0.0003 +[2026-03-02 23:07:47] (step=0039170) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.663862257875171, LR: 0.0003 +[2026-03-02 23:07:54] (step=0039171) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 7.664057914302485, LR: 0.0003 +[2026-03-02 23:08:02] (step=0039172) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 7.664253570729798, LR: 0.0003 +[2026-03-02 23:08:10] (step=0039173) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.664449227157112, LR: 0.0003 +[2026-03-02 23:08:18] (step=0039174) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.664644883584426, LR: 0.0003 +[2026-03-02 23:08:26] (step=0039175) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 7.664840540011739, LR: 0.0003 +[2026-03-02 23:08:34] (step=0039176) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 7.665036196439053, LR: 0.0003 +[2026-03-02 23:08:42] (step=0039177) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.665231852866366, LR: 0.0003 +[2026-03-02 23:08:49] (step=0039178) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.66542750929368, LR: 0.0003 +[2026-03-02 23:08:57] (step=0039179) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 7.665623165720994, LR: 0.0003 +[2026-03-02 23:09:05] (step=0039180) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.6658188221483075, LR: 0.0003 +[2026-03-02 23:09:13] (step=0039181) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.6660144785756215, LR: 0.0003 +[2026-03-02 23:09:21] (step=0039182) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.666210135002935, LR: 0.0003 +[2026-03-02 23:09:29] (step=0039183) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 7.666405791430249, LR: 0.0003 +[2026-03-02 23:09:37] (step=0039184) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 7.666601447857562, LR: 0.0003 +[2026-03-02 23:09:45] (step=0039185) Train Loss: 0.4514, Train Steps/Sec: 0.12, Epoch: 7.666797104284876, LR: 0.0003 +[2026-03-02 23:09:52] (step=0039186) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.66699276071219, LR: 0.0003 +[2026-03-02 23:10:00] (step=0039187) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.667188417139503, LR: 0.0003 +[2026-03-02 23:10:08] (step=0039188) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.667384073566817, LR: 0.0003 +[2026-03-02 23:10:16] (step=0039189) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.66757972999413, LR: 0.0003 +[2026-03-02 23:10:24] (step=0039190) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.667775386421444, LR: 0.0003 +[2026-03-02 23:10:32] (step=0039191) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.667971042848758, LR: 0.0003 +[2026-03-02 23:10:40] (step=0039192) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 7.668166699276071, LR: 0.0003 +[2026-03-02 23:10:47] (step=0039193) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 7.668362355703385, LR: 0.0003 +[2026-03-02 23:10:55] (step=0039194) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.668558012130698, LR: 0.0003 +[2026-03-02 23:11:03] (step=0039195) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.668753668558012, LR: 0.0003 +[2026-03-02 23:11:11] (step=0039196) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.668949324985325, LR: 0.0003 +[2026-03-02 23:11:19] (step=0039197) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.669144981412639, LR: 0.0003 +[2026-03-02 23:11:27] (step=0039198) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.669340637839953, LR: 0.0003 +[2026-03-02 23:11:35] (step=0039199) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.6695362942672665, LR: 0.0003 +[2026-03-02 23:11:42] (step=0039200) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.6697319506945805, LR: 0.0003 +[2026-03-02 23:11:50] (step=0039201) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.669927607121894, LR: 0.0003 +[2026-03-02 23:11:58] (step=0039202) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 7.670123263549208, LR: 0.0003 +[2026-03-02 23:12:06] (step=0039203) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.670318919976522, LR: 0.0003 +[2026-03-02 23:12:14] (step=0039204) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.670514576403835, LR: 0.0003 +[2026-03-02 23:12:22] (step=0039205) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.670710232831149, LR: 0.0003 +[2026-03-02 23:12:30] (step=0039206) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.670905889258462, LR: 0.0003 +[2026-03-02 23:12:38] (step=0039207) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.671101545685776, LR: 0.0003 +[2026-03-02 23:12:45] (step=0039208) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 7.67129720211309, LR: 0.0003 +[2026-03-02 23:12:53] (step=0039209) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.671492858540403, LR: 0.0003 +[2026-03-02 23:13:01] (step=0039210) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.671688514967717, LR: 0.0003 +[2026-03-02 23:13:09] (step=0039211) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.67188417139503, LR: 0.0003 +[2026-03-02 23:13:17] (step=0039212) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.672079827822344, LR: 0.0003 +[2026-03-02 23:13:25] (step=0039213) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.672275484249657, LR: 0.0003 +[2026-03-02 23:13:33] (step=0039214) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.672471140676971, LR: 0.0003 +[2026-03-02 23:13:40] (step=0039215) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.672666797104285, LR: 0.0003 +[2026-03-02 23:13:48] (step=0039216) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 7.672862453531598, LR: 0.0003 +[2026-03-02 23:13:56] (step=0039217) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 7.673058109958912, LR: 0.0003 +[2026-03-02 23:14:04] (step=0039218) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.6732537663862255, LR: 0.0003 +[2026-03-02 23:14:12] (step=0039219) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.6734494228135395, LR: 0.0003 +[2026-03-02 23:14:20] (step=0039220) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 7.6736450792408535, LR: 0.0003 +[2026-03-02 23:14:28] (step=0039221) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 7.673840735668167, LR: 0.0003 +[2026-03-02 23:14:35] (step=0039222) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.674036392095481, LR: 0.0003 +[2026-03-02 23:14:43] (step=0039223) Train Loss: 0.4668, Train Steps/Sec: 0.13, Epoch: 7.674232048522794, LR: 0.0003 +[2026-03-02 23:14:51] (step=0039224) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.674427704950108, LR: 0.0003 +[2026-03-02 23:14:59] (step=0039225) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.674623361377421, LR: 0.0003 +[2026-03-02 23:15:07] (step=0039226) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.674819017804735, LR: 0.0003 +[2026-03-02 23:15:15] (step=0039227) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.675014674232049, LR: 0.0003 +[2026-03-02 23:15:23] (step=0039228) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.675210330659362, LR: 0.0003 +[2026-03-02 23:15:30] (step=0039229) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 7.675405987086676, LR: 0.0003 +[2026-03-02 23:15:38] (step=0039230) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 7.675601643513989, LR: 0.0003 +[2026-03-02 23:15:46] (step=0039231) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.675797299941303, LR: 0.0003 +[2026-03-02 23:15:54] (step=0039232) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 7.675992956368617, LR: 0.0003 +[2026-03-02 23:16:02] (step=0039233) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.67618861279593, LR: 0.0003 +[2026-03-02 23:16:10] (step=0039234) Train Loss: 0.4536, Train Steps/Sec: 0.12, Epoch: 7.676384269223244, LR: 0.0003 +[2026-03-02 23:16:18] (step=0039235) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.676579925650557, LR: 0.0003 +[2026-03-02 23:16:26] (step=0039236) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.676775582077871, LR: 0.0003 +[2026-03-02 23:16:33] (step=0039237) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.6769712385051845, LR: 0.0003 +[2026-03-02 23:16:41] (step=0039238) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 7.6771668949324985, LR: 0.0003 +[2026-03-02 23:16:49] (step=0039239) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.6773625513598125, LR: 0.0003 +[2026-03-02 23:16:57] (step=0039240) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.677558207787126, LR: 0.0003 +[2026-03-02 23:17:05] (step=0039241) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.67775386421444, LR: 0.0003 +[2026-03-02 23:17:13] (step=0039242) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.677949520641753, LR: 0.0003 +[2026-03-02 23:17:21] (step=0039243) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 7.678145177069067, LR: 0.0003 +[2026-03-02 23:17:28] (step=0039244) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.678340833496381, LR: 0.0003 +[2026-03-02 23:17:36] (step=0039245) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.678536489923694, LR: 0.0003 +[2026-03-02 23:17:44] (step=0039246) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.678732146351008, LR: 0.0003 +[2026-03-02 23:17:52] (step=0039247) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.678927802778321, LR: 0.0003 +[2026-03-02 23:18:00] (step=0039248) Train Loss: 0.4458, Train Steps/Sec: 0.12, Epoch: 7.679123459205635, LR: 0.0003 +[2026-03-02 23:18:08] (step=0039249) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.679319115632948, LR: 0.0003 +[2026-03-02 23:18:16] (step=0039250) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.679514772060262, LR: 0.0003 +[2026-03-02 23:18:23] (step=0039251) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 7.679710428487576, LR: 0.0003 +[2026-03-02 23:18:31] (step=0039252) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 7.679906084914889, LR: 0.0003 +[2026-03-02 23:18:39] (step=0039253) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.680101741342203, LR: 0.0003 +[2026-03-02 23:18:47] (step=0039254) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.680297397769516, LR: 0.0003 +[2026-03-02 23:18:55] (step=0039255) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.68049305419683, LR: 0.0003 +[2026-03-02 23:19:03] (step=0039256) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.680688710624144, LR: 0.0003 +[2026-03-02 23:19:11] (step=0039257) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.6808843670514575, LR: 0.0003 +[2026-03-02 23:19:18] (step=0039258) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.6810800234787715, LR: 0.0003 +[2026-03-02 23:19:26] (step=0039259) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 7.681275679906085, LR: 0.0003 +[2026-03-02 23:19:34] (step=0039260) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.681471336333399, LR: 0.0003 +[2026-03-02 23:19:42] (step=0039261) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.681666992760713, LR: 0.0003 +[2026-03-02 23:19:50] (step=0039262) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.681862649188026, LR: 0.0003 +[2026-03-02 23:19:58] (step=0039263) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.68205830561534, LR: 0.0003 +[2026-03-02 23:20:05] (step=0039264) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.682253962042653, LR: 0.0003 +[2026-03-02 23:20:13] (step=0039265) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.682449618469967, LR: 0.0003 +[2026-03-02 23:20:21] (step=0039266) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 7.68264527489728, LR: 0.0003 +[2026-03-02 23:20:29] (step=0039267) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.682840931324594, LR: 0.0003 +[2026-03-02 23:20:37] (step=0039268) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 7.683036587751908, LR: 0.0003 +[2026-03-02 23:20:45] (step=0039269) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.683232244179221, LR: 0.0003 +[2026-03-02 23:20:53] (step=0039270) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.683427900606535, LR: 0.0003 +[2026-03-02 23:21:00] (step=0039271) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.683623557033848, LR: 0.0003 +[2026-03-02 23:21:08] (step=0039272) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 7.683819213461162, LR: 0.0003 +[2026-03-02 23:21:16] (step=0039273) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.684014869888476, LR: 0.0003 +[2026-03-02 23:21:24] (step=0039274) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.684210526315789, LR: 0.0003 +[2026-03-02 23:21:32] (step=0039275) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.684406182743103, LR: 0.0003 +[2026-03-02 23:21:40] (step=0039276) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 7.6846018391704165, LR: 0.0003 +[2026-03-02 23:21:48] (step=0039277) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.6847974955977305, LR: 0.0003 +[2026-03-02 23:21:55] (step=0039278) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.684993152025044, LR: 0.0003 +[2026-03-02 23:22:03] (step=0039279) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.685188808452358, LR: 0.0003 +[2026-03-02 23:22:11] (step=0039280) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.685384464879672, LR: 0.0003 +[2026-03-02 23:22:19] (step=0039281) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.685580121306985, LR: 0.0003 +[2026-03-02 23:22:27] (step=0039282) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.685775777734299, LR: 0.0003 +[2026-03-02 23:22:35] (step=0039283) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.685971434161612, LR: 0.0003 +[2026-03-02 23:22:43] (step=0039284) Train Loss: 0.4563, Train Steps/Sec: 0.12, Epoch: 7.686167090588926, LR: 0.0003 +[2026-03-02 23:22:51] (step=0039285) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 7.68636274701624, LR: 0.0003 +[2026-03-02 23:22:58] (step=0039286) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 7.686558403443553, LR: 0.0003 +[2026-03-02 23:23:06] (step=0039287) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 7.686754059870867, LR: 0.0003 +[2026-03-02 23:23:14] (step=0039288) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.68694971629818, LR: 0.0003 +[2026-03-02 23:23:22] (step=0039289) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.687145372725494, LR: 0.0003 +[2026-03-02 23:23:30] (step=0039290) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 7.687341029152807, LR: 0.0003 +[2026-03-02 23:23:38] (step=0039291) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 7.687536685580121, LR: 0.0003 +[2026-03-02 23:23:46] (step=0039292) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.687732342007435, LR: 0.0003 +[2026-03-02 23:23:54] (step=0039293) Train Loss: 0.4489, Train Steps/Sec: 0.12, Epoch: 7.687927998434748, LR: 0.0003 +[2026-03-02 23:24:01] (step=0039294) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.688123654862062, LR: 0.0003 +[2026-03-02 23:24:09] (step=0039295) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 7.6883193112893755, LR: 0.0003 +[2026-03-02 23:24:17] (step=0039296) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.6885149677166895, LR: 0.0003 +[2026-03-02 23:24:25] (step=0039297) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.6887106241440035, LR: 0.0003 +[2026-03-02 23:24:33] (step=0039298) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.688906280571317, LR: 0.0003 +[2026-03-02 23:24:41] (step=0039299) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.689101936998631, LR: 0.0003 +[2026-03-02 23:24:49] (step=0039300) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 7.689297593425944, LR: 0.0003 +[2026-03-02 23:24:56] (step=0039301) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.689493249853258, LR: 0.0003 +[2026-03-02 23:25:04] (step=0039302) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.689688906280571, LR: 0.0003 +[2026-03-02 23:25:12] (step=0039303) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.689884562707885, LR: 0.0003 +[2026-03-02 23:25:20] (step=0039304) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 7.690080219135199, LR: 0.0003 +[2026-03-02 23:25:28] (step=0039305) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 7.690275875562512, LR: 0.0003 +[2026-03-02 23:25:36] (step=0039306) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.690471531989826, LR: 0.0003 +[2026-03-02 23:25:44] (step=0039307) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.690667188417139, LR: 0.0003 +[2026-03-02 23:25:51] (step=0039308) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.690862844844453, LR: 0.0003 +[2026-03-02 23:25:59] (step=0039309) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.691058501271767, LR: 0.0003 +[2026-03-02 23:26:07] (step=0039310) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.69125415769908, LR: 0.0003 +[2026-03-02 23:26:15] (step=0039311) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.691449814126394, LR: 0.0003 +[2026-03-02 23:26:23] (step=0039312) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 7.691645470553707, LR: 0.0003 +[2026-03-02 23:26:31] (step=0039313) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.691841126981021, LR: 0.0003 +[2026-03-02 23:26:39] (step=0039314) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.6920367834083345, LR: 0.0003 +[2026-03-02 23:26:46] (step=0039315) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.6922324398356485, LR: 0.0003 +[2026-03-02 23:26:54] (step=0039316) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.6924280962629625, LR: 0.0003 +[2026-03-02 23:27:02] (step=0039317) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.692623752690276, LR: 0.0003 +[2026-03-02 23:27:10] (step=0039318) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.69281940911759, LR: 0.0003 +[2026-03-02 23:27:18] (step=0039319) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 7.693015065544903, LR: 0.0003 +[2026-03-02 23:27:26] (step=0039320) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 7.693210721972217, LR: 0.0003 +[2026-03-02 23:27:34] (step=0039321) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.693406378399531, LR: 0.0003 +[2026-03-02 23:27:41] (step=0039322) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 7.693602034826844, LR: 0.0003 +[2026-03-02 23:27:49] (step=0039323) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.693797691254158, LR: 0.0003 +[2026-03-02 23:27:57] (step=0039324) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.693993347681471, LR: 0.0003 +[2026-03-02 23:28:05] (step=0039325) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.694189004108785, LR: 0.0003 +[2026-03-02 23:28:13] (step=0039326) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.694384660536099, LR: 0.0003 +[2026-03-02 23:28:21] (step=0039327) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.694580316963412, LR: 0.0003 +[2026-03-02 23:28:29] (step=0039328) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.694775973390726, LR: 0.0003 +[2026-03-02 23:28:36] (step=0039329) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.694971629818039, LR: 0.0003 +[2026-03-02 23:28:44] (step=0039330) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.695167286245353, LR: 0.0003 +[2026-03-02 23:28:52] (step=0039331) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 7.695362942672666, LR: 0.0003 +[2026-03-02 23:29:00] (step=0039332) Train Loss: 0.4458, Train Steps/Sec: 0.12, Epoch: 7.69555859909998, LR: 0.0003 +[2026-03-02 23:29:08] (step=0039333) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.695754255527294, LR: 0.0003 +[2026-03-02 23:29:16] (step=0039334) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 7.6959499119546075, LR: 0.0003 +[2026-03-02 23:29:24] (step=0039335) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 7.6961455683819215, LR: 0.0003 +[2026-03-02 23:29:32] (step=0039336) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.696341224809235, LR: 0.0003 +[2026-03-02 23:29:39] (step=0039337) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.696536881236549, LR: 0.0003 +[2026-03-02 23:29:47] (step=0039338) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 7.696732537663863, LR: 0.0003 +[2026-03-02 23:29:55] (step=0039339) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 7.696928194091176, LR: 0.0003 +[2026-03-02 23:30:03] (step=0039340) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.69712385051849, LR: 0.0003 +[2026-03-02 23:30:11] (step=0039341) Train Loss: 0.4453, Train Steps/Sec: 0.12, Epoch: 7.697319506945803, LR: 0.0003 +[2026-03-02 23:30:19] (step=0039342) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.697515163373117, LR: 0.0003 +[2026-03-02 23:30:27] (step=0039343) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.69771081980043, LR: 0.0003 +[2026-03-02 23:30:35] (step=0039344) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.697906476227744, LR: 0.0003 +[2026-03-02 23:30:42] (step=0039345) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.698102132655058, LR: 0.0003 +[2026-03-02 23:30:50] (step=0039346) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.698297789082371, LR: 0.0003 +[2026-03-02 23:30:58] (step=0039347) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 7.698493445509685, LR: 0.0003 +[2026-03-02 23:31:06] (step=0039348) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.698689101936998, LR: 0.0003 +[2026-03-02 23:31:14] (step=0039349) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 7.698884758364312, LR: 0.0003 +[2026-03-02 23:31:22] (step=0039350) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 7.699080414791626, LR: 0.0003 +[2026-03-02 23:31:30] (step=0039351) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 7.699276071218939, LR: 0.0003 +[2026-03-02 23:31:37] (step=0039352) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.699471727646253, LR: 0.0003 +[2026-03-02 23:31:45] (step=0039353) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.6996673840735665, LR: 0.0003 +[2026-03-02 23:31:53] (step=0039354) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 7.6998630405008806, LR: 0.0003 +[2026-03-02 23:32:01] (step=0039355) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.700058696928194, LR: 0.0003 +[2026-03-02 23:32:09] (step=0039356) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.700254353355508, LR: 0.0003 +[2026-03-02 23:32:17] (step=0039357) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.700450009782822, LR: 0.0003 +[2026-03-02 23:32:25] (step=0039358) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 7.700645666210135, LR: 0.0003 +[2026-03-02 23:32:32] (step=0039359) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 7.700841322637449, LR: 0.0003 +[2026-03-02 23:32:40] (step=0039360) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.701036979064762, LR: 0.0003 +[2026-03-02 23:32:48] (step=0039361) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 7.701232635492076, LR: 0.0003 +[2026-03-02 23:32:56] (step=0039362) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.70142829191939, LR: 0.0003 +[2026-03-02 23:33:04] (step=0039363) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.701623948346703, LR: 0.0003 +[2026-03-02 23:33:12] (step=0039364) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.701819604774017, LR: 0.0003 +[2026-03-02 23:33:20] (step=0039365) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.70201526120133, LR: 0.0003 +[2026-03-02 23:33:28] (step=0039366) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.702210917628644, LR: 0.0003 +[2026-03-02 23:33:35] (step=0039367) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.702406574055957, LR: 0.0003 +[2026-03-02 23:33:43] (step=0039368) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.702602230483271, LR: 0.0003 +[2026-03-02 23:33:51] (step=0039369) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.702797886910585, LR: 0.0003 +[2026-03-02 23:33:59] (step=0039370) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.702993543337898, LR: 0.0003 +[2026-03-02 23:34:07] (step=0039371) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 7.703189199765212, LR: 0.0003 +[2026-03-02 23:34:15] (step=0039372) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 7.7033848561925256, LR: 0.0003 +[2026-03-02 23:34:23] (step=0039373) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.70358051261984, LR: 0.0003 +[2026-03-02 23:34:30] (step=0039374) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.703776169047154, LR: 0.0003 +[2026-03-02 23:34:38] (step=0039375) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.703971825474467, LR: 0.0003 +[2026-03-02 23:34:46] (step=0039376) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.704167481901781, LR: 0.0003 +[2026-03-02 23:34:54] (step=0039377) Train Loss: 0.4486, Train Steps/Sec: 0.12, Epoch: 7.704363138329094, LR: 0.0003 +[2026-03-02 23:35:02] (step=0039378) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.704558794756408, LR: 0.0003 +[2026-03-02 23:35:10] (step=0039379) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.704754451183722, LR: 0.0003 +[2026-03-02 23:35:18] (step=0039380) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 7.704950107611035, LR: 0.0003 +[2026-03-02 23:35:26] (step=0039381) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.705145764038349, LR: 0.0003 +[2026-03-02 23:35:33] (step=0039382) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.705341420465662, LR: 0.0003 +[2026-03-02 23:35:41] (step=0039383) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.705537076892976, LR: 0.0003 +[2026-03-02 23:35:49] (step=0039384) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.705732733320289, LR: 0.0003 +[2026-03-02 23:35:57] (step=0039385) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 7.705928389747603, LR: 0.0003 +[2026-03-02 23:36:05] (step=0039386) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.706124046174917, LR: 0.0003 +[2026-03-02 23:36:13] (step=0039387) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.70631970260223, LR: 0.0003 +[2026-03-02 23:36:21] (step=0039388) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 7.706515359029544, LR: 0.0003 +[2026-03-02 23:36:28] (step=0039389) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.706711015456857, LR: 0.0003 +[2026-03-02 23:36:36] (step=0039390) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 7.7069066718841714, LR: 0.0003 +[2026-03-02 23:36:44] (step=0039391) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 7.7071023283114855, LR: 0.0003 +[2026-03-02 23:36:52] (step=0039392) Train Loss: 0.4375, Train Steps/Sec: 0.12, Epoch: 7.707297984738799, LR: 0.0003 +[2026-03-02 23:37:00] (step=0039393) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.707493641166113, LR: 0.0003 +[2026-03-02 23:37:08] (step=0039394) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.707689297593426, LR: 0.0003 +[2026-03-02 23:37:16] (step=0039395) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.70788495402074, LR: 0.0003 +[2026-03-02 23:37:24] (step=0039396) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.708080610448053, LR: 0.0003 +[2026-03-02 23:37:32] (step=0039397) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.708276266875367, LR: 0.0003 +[2026-03-02 23:37:39] (step=0039398) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 7.708471923302681, LR: 0.0003 +[2026-03-02 23:37:47] (step=0039399) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.708667579729994, LR: 0.0003 +[2026-03-02 23:37:55] (step=0039400) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.708863236157308, LR: 0.0003 +[2026-03-02 23:38:03] (step=0039401) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.709058892584621, LR: 0.0003 +[2026-03-02 23:38:11] (step=0039402) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.709254549011935, LR: 0.0003 +[2026-03-02 23:38:19] (step=0039403) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 7.709450205439249, LR: 0.0003 +[2026-03-02 23:38:27] (step=0039404) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 7.709645861866562, LR: 0.0003 +[2026-03-02 23:38:34] (step=0039405) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 7.709841518293876, LR: 0.0003 +[2026-03-02 23:38:42] (step=0039406) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.710037174721189, LR: 0.0003 +[2026-03-02 23:38:50] (step=0039407) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 7.710232831148503, LR: 0.0003 +[2026-03-02 23:38:58] (step=0039408) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.7104284875758164, LR: 0.0003 +[2026-03-02 23:39:06] (step=0039409) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.7106241440031305, LR: 0.0003 +[2026-03-02 23:39:14] (step=0039410) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.7108198004304445, LR: 0.0003 +[2026-03-02 23:39:22] (step=0039411) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.711015456857758, LR: 0.0003 +[2026-03-02 23:39:29] (step=0039412) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.711211113285072, LR: 0.0003 +[2026-03-02 23:39:37] (step=0039413) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 7.711406769712385, LR: 0.0003 +[2026-03-02 23:39:45] (step=0039414) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.711602426139699, LR: 0.0003 +[2026-03-02 23:39:53] (step=0039415) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.711798082567013, LR: 0.0003 +[2026-03-02 23:40:01] (step=0039416) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 7.711993738994326, LR: 0.0003 +[2026-03-02 23:40:09] (step=0039417) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.71218939542164, LR: 0.0003 +[2026-03-02 23:40:17] (step=0039418) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.712385051848953, LR: 0.0003 +[2026-03-02 23:40:25] (step=0039419) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.712580708276267, LR: 0.0003 +[2026-03-02 23:40:32] (step=0039420) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.71277636470358, LR: 0.0003 +[2026-03-02 23:40:41] (step=0039421) Train Loss: 0.4462, Train Steps/Sec: 0.12, Epoch: 7.712972021130894, LR: 0.0003 +[2026-03-02 23:40:48] (step=0039422) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.713167677558208, LR: 0.0003 +[2026-03-02 23:40:56] (step=0039423) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 7.713363333985521, LR: 0.0003 +[2026-03-02 23:41:04] (step=0039424) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.713558990412835, LR: 0.0003 +[2026-03-02 23:41:12] (step=0039425) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.713754646840148, LR: 0.0003 +[2026-03-02 23:41:20] (step=0039426) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 7.713950303267462, LR: 0.0003 +[2026-03-02 23:41:28] (step=0039427) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.714145959694776, LR: 0.0003 +[2026-03-02 23:41:36] (step=0039428) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 7.7143416161220895, LR: 0.0003 +[2026-03-02 23:41:43] (step=0039429) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.7145372725494035, LR: 0.0003 +[2026-03-02 23:41:51] (step=0039430) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.714732928976717, LR: 0.0003 +[2026-03-02 23:41:59] (step=0039431) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 7.714928585404031, LR: 0.0003 +[2026-03-02 23:42:07] (step=0039432) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.715124241831345, LR: 0.0003 +[2026-03-02 23:42:15] (step=0039433) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.715319898258658, LR: 0.0003 +[2026-03-02 23:42:23] (step=0039434) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 7.715515554685972, LR: 0.0003 +[2026-03-02 23:42:31] (step=0039435) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.715711211113285, LR: 0.0003 +[2026-03-02 23:42:38] (step=0039436) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.715906867540599, LR: 0.0003 +[2026-03-02 23:42:46] (step=0039437) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.716102523967912, LR: 0.0003 +[2026-03-02 23:42:54] (step=0039438) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.716298180395226, LR: 0.0003 +[2026-03-02 23:43:02] (step=0039439) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 7.71649383682254, LR: 0.0003 +[2026-03-02 23:43:10] (step=0039440) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 7.716689493249853, LR: 0.0003 +[2026-03-02 23:43:18] (step=0039441) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.716885149677167, LR: 0.0003 +[2026-03-02 23:43:26] (step=0039442) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.71708080610448, LR: 0.0003 +[2026-03-02 23:43:34] (step=0039443) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.717276462531794, LR: 0.0003 +[2026-03-02 23:43:41] (step=0039444) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.717472118959108, LR: 0.0003 +[2026-03-02 23:43:49] (step=0039445) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 7.717667775386421, LR: 0.0003 +[2026-03-02 23:43:57] (step=0039446) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.717863431813735, LR: 0.0003 +[2026-03-02 23:44:05] (step=0039447) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.7180590882410485, LR: 0.0003 +[2026-03-02 23:44:13] (step=0039448) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.7182547446683625, LR: 0.0003 +[2026-03-02 23:44:21] (step=0039449) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.718450401095676, LR: 0.0003 +[2026-03-02 23:44:29] (step=0039450) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 7.71864605752299, LR: 0.0003 +[2026-03-02 23:44:37] (step=0039451) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.718841713950304, LR: 0.0003 +[2026-03-02 23:44:44] (step=0039452) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.719037370377617, LR: 0.0003 +[2026-03-02 23:44:52] (step=0039453) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.719233026804931, LR: 0.0003 +[2026-03-02 23:45:00] (step=0039454) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.719428683232244, LR: 0.0003 +[2026-03-02 23:45:08] (step=0039455) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.719624339659558, LR: 0.0003 +[2026-03-02 23:45:16] (step=0039456) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.719819996086872, LR: 0.0003 +[2026-03-02 23:45:24] (step=0039457) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.720015652514185, LR: 0.0003 +[2026-03-02 23:45:32] (step=0039458) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.720211308941499, LR: 0.0003 +[2026-03-02 23:45:39] (step=0039459) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.720406965368812, LR: 0.0003 +[2026-03-02 23:45:47] (step=0039460) Train Loss: 0.4258, Train Steps/Sec: 0.13, Epoch: 7.720602621796126, LR: 0.0003 +[2026-03-02 23:45:55] (step=0039461) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 7.720798278223439, LR: 0.0003 +[2026-03-02 23:46:03] (step=0039462) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.720993934650753, LR: 0.0003 +[2026-03-02 23:46:11] (step=0039463) Train Loss: 0.4206, Train Steps/Sec: 0.13, Epoch: 7.721189591078067, LR: 0.0003 +[2026-03-02 23:46:19] (step=0039464) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.72138524750538, LR: 0.0003 +[2026-03-02 23:46:27] (step=0039465) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.721580903932694, LR: 0.0003 +[2026-03-02 23:46:34] (step=0039466) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.7217765603600075, LR: 0.0003 +[2026-03-02 23:46:42] (step=0039467) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.7219722167873215, LR: 0.0003 +[2026-03-02 23:46:50] (step=0039468) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.7221678732146355, LR: 0.0003 +[2026-03-02 23:46:58] (step=0039469) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.722363529641949, LR: 0.0003 +[2026-03-02 23:47:06] (step=0039470) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 7.722559186069263, LR: 0.0003 +[2026-03-02 23:47:14] (step=0039471) Train Loss: 0.4424, Train Steps/Sec: 0.12, Epoch: 7.722754842496576, LR: 0.0003 +[2026-03-02 23:47:22] (step=0039472) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 7.72295049892389, LR: 0.0003 +[2026-03-02 23:47:30] (step=0039473) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.723146155351203, LR: 0.0003 +[2026-03-02 23:47:38] (step=0039474) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.723341811778517, LR: 0.0003 +[2026-03-02 23:47:45] (step=0039475) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.723537468205831, LR: 0.0003 +[2026-03-02 23:47:53] (step=0039476) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.723733124633144, LR: 0.0003 +[2026-03-02 23:48:01] (step=0039477) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.723928781060458, LR: 0.0003 +[2026-03-02 23:48:09] (step=0039478) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.724124437487771, LR: 0.0003 +[2026-03-02 23:48:17] (step=0039479) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.724320093915085, LR: 0.0003 +[2026-03-02 23:48:25] (step=0039480) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.724515750342399, LR: 0.0003 +[2026-03-02 23:48:33] (step=0039481) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.724711406769712, LR: 0.0003 +[2026-03-02 23:48:40] (step=0039482) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.724907063197026, LR: 0.0003 +[2026-03-02 23:48:48] (step=0039483) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.725102719624339, LR: 0.0003 +[2026-03-02 23:48:56] (step=0039484) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 7.725298376051653, LR: 0.0003 +[2026-03-02 23:49:04] (step=0039485) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.725494032478967, LR: 0.0003 +[2026-03-02 23:49:12] (step=0039486) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.7256896889062805, LR: 0.0003 +[2026-03-02 23:49:20] (step=0039487) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.7258853453335945, LR: 0.0003 +[2026-03-02 23:49:28] (step=0039488) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.726081001760908, LR: 0.0003 +[2026-03-02 23:49:36] (step=0039489) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.726276658188222, LR: 0.0003 +[2026-03-02 23:49:44] (step=0039490) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.726472314615535, LR: 0.0003 +[2026-03-02 23:49:51] (step=0039491) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 7.726667971042849, LR: 0.0003 +[2026-03-02 23:49:59] (step=0039492) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.726863627470163, LR: 0.0003 +[2026-03-02 23:50:07] (step=0039493) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.727059283897476, LR: 0.0003 +[2026-03-02 23:50:15] (step=0039494) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.72725494032479, LR: 0.0003 +[2026-03-02 23:50:23] (step=0039495) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 7.727450596752103, LR: 0.0003 +[2026-03-02 23:50:31] (step=0039496) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.727646253179417, LR: 0.0003 +[2026-03-02 23:50:39] (step=0039497) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.727841909606731, LR: 0.0003 +[2026-03-02 23:50:47] (step=0039498) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.728037566034044, LR: 0.0003 +[2026-03-02 23:50:54] (step=0039499) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 7.728233222461358, LR: 0.0003 +[2026-03-02 23:51:02] (step=0039500) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.728428878888671, LR: 0.0003 +[2026-03-02 23:51:02] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0039500/ +[2026-03-02 23:51:10] (step=0039501) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 7.728624535315985, LR: 0.0003 +[2026-03-02 23:51:18] (step=0039502) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.728820191743298, LR: 0.0003 +[2026-03-02 23:51:26] (step=0039503) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.729015848170612, LR: 0.0003 +[2026-03-02 23:51:34] (step=0039504) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.729211504597926, LR: 0.0003 +[2026-03-02 23:51:42] (step=0039505) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.7294071610252395, LR: 0.0003 +[2026-03-02 23:51:49] (step=0039506) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.7296028174525535, LR: 0.0003 +[2026-03-02 23:51:57] (step=0039507) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.729798473879867, LR: 0.0003 +[2026-03-02 23:52:05] (step=0039508) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.729994130307181, LR: 0.0003 +[2026-03-02 23:52:13] (step=0039509) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 7.730189786734495, LR: 0.0003 +[2026-03-02 23:52:21] (step=0039510) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.730385443161808, LR: 0.0003 +[2026-03-02 23:52:29] (step=0039511) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.730581099589122, LR: 0.0003 +[2026-03-02 23:52:37] (step=0039512) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.730776756016435, LR: 0.0003 +[2026-03-02 23:52:45] (step=0039513) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.730972412443749, LR: 0.0003 +[2026-03-02 23:52:52] (step=0039514) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.731168068871062, LR: 0.0003 +[2026-03-02 23:53:00] (step=0039515) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.731363725298376, LR: 0.0003 +[2026-03-02 23:53:08] (step=0039516) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.73155938172569, LR: 0.0003 +[2026-03-02 23:53:16] (step=0039517) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.731755038153003, LR: 0.0003 +[2026-03-02 23:53:24] (step=0039518) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.731950694580317, LR: 0.0003 +[2026-03-02 23:53:32] (step=0039519) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.73214635100763, LR: 0.0003 +[2026-03-02 23:53:40] (step=0039520) Train Loss: 0.4388, Train Steps/Sec: 0.12, Epoch: 7.732342007434944, LR: 0.0003 +[2026-03-02 23:53:48] (step=0039521) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.732537663862258, LR: 0.0003 +[2026-03-02 23:53:55] (step=0039522) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.732733320289571, LR: 0.0003 +[2026-03-02 23:54:03] (step=0039523) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 7.732928976716885, LR: 0.0003 +[2026-03-02 23:54:11] (step=0039524) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 7.7331246331441985, LR: 0.0003 +[2026-03-02 23:54:19] (step=0039525) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.7333202895715125, LR: 0.0003 +[2026-03-02 23:54:27] (step=0039526) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.733515945998826, LR: 0.0003 +[2026-03-02 23:54:35] (step=0039527) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.73371160242614, LR: 0.0003 +[2026-03-02 23:54:43] (step=0039528) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.733907258853454, LR: 0.0003 +[2026-03-02 23:54:50] (step=0039529) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 7.734102915280767, LR: 0.0003 +[2026-03-02 23:54:58] (step=0039530) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.734298571708081, LR: 0.0003 +[2026-03-02 23:55:06] (step=0039531) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.734494228135394, LR: 0.0003 +[2026-03-02 23:55:14] (step=0039532) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.734689884562708, LR: 0.0003 +[2026-03-02 23:55:22] (step=0039533) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 7.734885540990022, LR: 0.0003 +[2026-03-02 23:55:30] (step=0039534) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.735081197417335, LR: 0.0003 +[2026-03-02 23:55:38] (step=0039535) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.735276853844649, LR: 0.0003 +[2026-03-02 23:55:45] (step=0039536) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.735472510271962, LR: 0.0003 +[2026-03-02 23:55:53] (step=0039537) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 7.735668166699276, LR: 0.0003 +[2026-03-02 23:56:01] (step=0039538) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.735863823126589, LR: 0.0003 +[2026-03-02 23:56:09] (step=0039539) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.736059479553903, LR: 0.0003 +[2026-03-02 23:56:17] (step=0039540) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.736255135981217, LR: 0.0003 +[2026-03-02 23:56:25] (step=0039541) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.73645079240853, LR: 0.0003 +[2026-03-02 23:56:33] (step=0039542) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.736646448835844, LR: 0.0003 +[2026-03-02 23:56:41] (step=0039543) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.7368421052631575, LR: 0.0003 +[2026-03-02 23:56:48] (step=0039544) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 7.7370377616904715, LR: 0.0003 +[2026-03-02 23:56:56] (step=0039545) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 7.7372334181177855, LR: 0.0003 +[2026-03-02 23:57:04] (step=0039546) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.737429074545099, LR: 0.0003 +[2026-03-02 23:57:12] (step=0039547) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.737624730972413, LR: 0.0003 +[2026-03-02 23:57:20] (step=0039548) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 7.737820387399726, LR: 0.0003 +[2026-03-02 23:57:28] (step=0039549) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.73801604382704, LR: 0.0003 +[2026-03-02 23:57:36] (step=0039550) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.738211700254354, LR: 0.0003 +[2026-03-02 23:57:44] (step=0039551) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.738407356681667, LR: 0.0003 +[2026-03-02 23:57:51] (step=0039552) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.738603013108981, LR: 0.0003 +[2026-03-02 23:57:59] (step=0039553) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.738798669536294, LR: 0.0003 +[2026-03-02 23:58:07] (step=0039554) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.738994325963608, LR: 0.0003 +[2026-03-02 23:58:15] (step=0039555) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.739189982390921, LR: 0.0003 +[2026-03-02 23:58:23] (step=0039556) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.739385638818235, LR: 0.0003 +[2026-03-02 23:58:31] (step=0039557) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 7.739581295245549, LR: 0.0003 +[2026-03-02 23:58:39] (step=0039558) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.739776951672862, LR: 0.0003 +[2026-03-02 23:58:46] (step=0039559) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.739972608100176, LR: 0.0003 +[2026-03-02 23:58:54] (step=0039560) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.740168264527489, LR: 0.0003 +[2026-03-02 23:59:02] (step=0039561) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.740363920954803, LR: 0.0003 +[2026-03-02 23:59:10] (step=0039562) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.740559577382117, LR: 0.0003 +[2026-03-02 23:59:18] (step=0039563) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.7407552338094305, LR: 0.0003 +[2026-03-02 23:59:26] (step=0039564) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.7409508902367445, LR: 0.0003 +[2026-03-02 23:59:34] (step=0039565) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.741146546664058, LR: 0.0003 +[2026-03-02 23:59:42] (step=0039566) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.741342203091372, LR: 0.0003 +[2026-03-02 23:59:50] (step=0039567) Train Loss: 0.4564, Train Steps/Sec: 0.12, Epoch: 7.741537859518685, LR: 0.0003 +[2026-03-02 23:59:57] (step=0039568) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.741733515945999, LR: 0.0003 +[2026-03-03 00:00:05] (step=0039569) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 7.741929172373313, LR: 0.0003 +[2026-03-03 00:00:13] (step=0039570) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.742124828800626, LR: 0.0003 +[2026-03-03 00:00:21] (step=0039571) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.74232048522794, LR: 0.0003 +[2026-03-03 00:00:29] (step=0039572) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.742516141655253, LR: 0.0003 +[2026-03-03 00:00:37] (step=0039573) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 7.742711798082567, LR: 0.0003 +[2026-03-03 00:00:45] (step=0039574) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.742907454509881, LR: 0.0003 +[2026-03-03 00:00:52] (step=0039575) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.743103110937194, LR: 0.0003 +[2026-03-03 00:01:00] (step=0039576) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.743298767364508, LR: 0.0003 +[2026-03-03 00:01:08] (step=0039577) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.743494423791821, LR: 0.0003 +[2026-03-03 00:01:16] (step=0039578) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.743690080219135, LR: 0.0003 +[2026-03-03 00:01:24] (step=0039579) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.743885736646448, LR: 0.0003 +[2026-03-03 00:01:32] (step=0039580) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 7.744081393073762, LR: 0.0003 +[2026-03-03 00:01:40] (step=0039581) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 7.744277049501076, LR: 0.0003 +[2026-03-03 00:01:48] (step=0039582) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.7444727059283895, LR: 0.0003 +[2026-03-03 00:01:55] (step=0039583) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.7446683623557036, LR: 0.0003 +[2026-03-03 00:02:03] (step=0039584) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.744864018783017, LR: 0.0003 +[2026-03-03 00:02:11] (step=0039585) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.745059675210331, LR: 0.0003 +[2026-03-03 00:02:19] (step=0039586) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.745255331637645, LR: 0.0003 +[2026-03-03 00:02:27] (step=0039587) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.745450988064958, LR: 0.0003 +[2026-03-03 00:02:35] (step=0039588) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.745646644492272, LR: 0.0003 +[2026-03-03 00:02:43] (step=0039589) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.745842300919585, LR: 0.0003 +[2026-03-03 00:02:51] (step=0039590) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.746037957346899, LR: 0.0003 +[2026-03-03 00:02:58] (step=0039591) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 7.746233613774212, LR: 0.0003 +[2026-03-03 00:03:06] (step=0039592) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 7.746429270201526, LR: 0.0003 +[2026-03-03 00:03:14] (step=0039593) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.74662492662884, LR: 0.0003 +[2026-03-03 00:03:22] (step=0039594) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.746820583056153, LR: 0.0003 +[2026-03-03 00:03:30] (step=0039595) Train Loss: 0.4223, Train Steps/Sec: 0.13, Epoch: 7.747016239483467, LR: 0.0003 +[2026-03-03 00:03:38] (step=0039596) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.74721189591078, LR: 0.0003 +[2026-03-03 00:03:46] (step=0039597) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.747407552338094, LR: 0.0003 +[2026-03-03 00:03:53] (step=0039598) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.747603208765408, LR: 0.0003 +[2026-03-03 00:04:01] (step=0039599) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 7.747798865192721, LR: 0.0003 +[2026-03-03 00:04:09] (step=0039600) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 7.747994521620035, LR: 0.0003 +[2026-03-03 00:04:17] (step=0039601) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.7481901780473486, LR: 0.0003 +[2026-03-03 00:04:25] (step=0039602) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.748385834474663, LR: 0.0003 +[2026-03-03 00:04:33] (step=0039603) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 7.748581490901977, LR: 0.0003 +[2026-03-03 00:04:41] (step=0039604) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.74877714732929, LR: 0.0003 +[2026-03-03 00:04:49] (step=0039605) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.748972803756604, LR: 0.0003 +[2026-03-03 00:04:56] (step=0039606) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.749168460183917, LR: 0.0003 +[2026-03-03 00:05:04] (step=0039607) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.749364116611231, LR: 0.0003 +[2026-03-03 00:05:12] (step=0039608) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.749559773038544, LR: 0.0003 +[2026-03-03 00:05:20] (step=0039609) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.749755429465858, LR: 0.0003 +[2026-03-03 00:05:28] (step=0039610) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.749951085893172, LR: 0.0003 +[2026-03-03 00:05:36] (step=0039611) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 7.750146742320485, LR: 0.0003 +[2026-03-03 00:05:44] (step=0039612) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.750342398747799, LR: 0.0003 +[2026-03-03 00:05:51] (step=0039613) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 7.750538055175112, LR: 0.0003 +[2026-03-03 00:05:59] (step=0039614) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.750733711602426, LR: 0.0003 +[2026-03-03 00:06:07] (step=0039615) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 7.75092936802974, LR: 0.0003 +[2026-03-03 00:06:15] (step=0039616) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.751125024457053, LR: 0.0003 +[2026-03-03 00:06:23] (step=0039617) Train Loss: 0.4520, Train Steps/Sec: 0.12, Epoch: 7.751320680884367, LR: 0.0003 +[2026-03-03 00:06:31] (step=0039618) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 7.75151633731168, LR: 0.0003 +[2026-03-03 00:06:39] (step=0039619) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.751711993738994, LR: 0.0003 +[2026-03-03 00:06:47] (step=0039620) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.751907650166308, LR: 0.0003 +[2026-03-03 00:06:55] (step=0039621) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.752103306593622, LR: 0.0003 +[2026-03-03 00:07:02] (step=0039622) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 7.752298963020936, LR: 0.0003 +[2026-03-03 00:07:10] (step=0039623) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.752494619448249, LR: 0.0003 +[2026-03-03 00:07:18] (step=0039624) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.752690275875563, LR: 0.0003 +[2026-03-03 00:07:26] (step=0039625) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.752885932302876, LR: 0.0003 +[2026-03-03 00:07:34] (step=0039626) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.75308158873019, LR: 0.0003 +[2026-03-03 00:07:42] (step=0039627) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.753277245157504, LR: 0.0003 +[2026-03-03 00:07:50] (step=0039628) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.753472901584817, LR: 0.0003 +[2026-03-03 00:07:57] (step=0039629) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 7.753668558012131, LR: 0.0003 +[2026-03-03 00:08:05] (step=0039630) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.753864214439444, LR: 0.0003 +[2026-03-03 00:08:13] (step=0039631) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 7.754059870866758, LR: 0.0003 +[2026-03-03 00:08:21] (step=0039632) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.754255527294071, LR: 0.0003 +[2026-03-03 00:08:29] (step=0039633) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.754451183721385, LR: 0.0003 +[2026-03-03 00:08:37] (step=0039634) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.754646840148699, LR: 0.0003 +[2026-03-03 00:08:45] (step=0039635) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 7.754842496576012, LR: 0.0003 +[2026-03-03 00:08:53] (step=0039636) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 7.755038153003326, LR: 0.0003 +[2026-03-03 00:09:00] (step=0039637) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 7.7552338094306394, LR: 0.0003 +[2026-03-03 00:09:08] (step=0039638) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.7554294658579535, LR: 0.0003 +[2026-03-03 00:09:16] (step=0039639) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.7556251222852675, LR: 0.0003 +[2026-03-03 00:09:24] (step=0039640) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.755820778712581, LR: 0.0003 +[2026-03-03 00:09:32] (step=0039641) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 7.756016435139895, LR: 0.0003 +[2026-03-03 00:09:40] (step=0039642) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.756212091567208, LR: 0.0003 +[2026-03-03 00:09:48] (step=0039643) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 7.756407747994522, LR: 0.0003 +[2026-03-03 00:09:55] (step=0039644) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.756603404421835, LR: 0.0003 +[2026-03-03 00:10:03] (step=0039645) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.756799060849149, LR: 0.0003 +[2026-03-03 00:10:11] (step=0039646) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 7.756994717276463, LR: 0.0003 +[2026-03-03 00:10:19] (step=0039647) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.757190373703776, LR: 0.0003 +[2026-03-03 00:10:27] (step=0039648) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.75738603013109, LR: 0.0003 +[2026-03-03 00:10:35] (step=0039649) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.757581686558403, LR: 0.0003 +[2026-03-03 00:10:43] (step=0039650) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.757777342985717, LR: 0.0003 +[2026-03-03 00:10:50] (step=0039651) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.757972999413031, LR: 0.0003 +[2026-03-03 00:10:58] (step=0039652) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.758168655840344, LR: 0.0003 +[2026-03-03 00:11:06] (step=0039653) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.758364312267658, LR: 0.0003 +[2026-03-03 00:11:14] (step=0039654) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.758559968694971, LR: 0.0003 +[2026-03-03 00:11:22] (step=0039655) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 7.758755625122285, LR: 0.0003 +[2026-03-03 00:11:30] (step=0039656) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 7.758951281549599, LR: 0.0003 +[2026-03-03 00:11:37] (step=0039657) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 7.7591469379769125, LR: 0.0003 +[2026-03-03 00:11:45] (step=0039658) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 7.7593425944042265, LR: 0.0003 +[2026-03-03 00:11:53] (step=0039659) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.75953825083154, LR: 0.0003 +[2026-03-03 00:12:01] (step=0039660) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.759733907258854, LR: 0.0003 +[2026-03-03 00:12:09] (step=0039661) Train Loss: 0.4443, Train Steps/Sec: 0.12, Epoch: 7.759929563686167, LR: 0.0003 +[2026-03-03 00:12:17] (step=0039662) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 7.760125220113481, LR: 0.0003 +[2026-03-03 00:12:25] (step=0039663) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.760320876540795, LR: 0.0003 +[2026-03-03 00:12:33] (step=0039664) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.760516532968108, LR: 0.0003 +[2026-03-03 00:12:41] (step=0039665) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.760712189395422, LR: 0.0003 +[2026-03-03 00:12:48] (step=0039666) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.760907845822735, LR: 0.0003 +[2026-03-03 00:12:56] (step=0039667) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.761103502250049, LR: 0.0003 +[2026-03-03 00:13:04] (step=0039668) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.761299158677363, LR: 0.0003 +[2026-03-03 00:13:12] (step=0039669) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.761494815104676, LR: 0.0003 +[2026-03-03 00:13:20] (step=0039670) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 7.76169047153199, LR: 0.0003 +[2026-03-03 00:13:28] (step=0039671) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 7.761886127959303, LR: 0.0003 +[2026-03-03 00:13:36] (step=0039672) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.762081784386617, LR: 0.0003 +[2026-03-03 00:13:44] (step=0039673) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 7.76227744081393, LR: 0.0003 +[2026-03-03 00:13:51] (step=0039674) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.762473097241244, LR: 0.0003 +[2026-03-03 00:13:59] (step=0039675) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.762668753668558, LR: 0.0003 +[2026-03-03 00:14:07] (step=0039676) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.7628644100958715, LR: 0.0003 +[2026-03-03 00:14:15] (step=0039677) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.7630600665231855, LR: 0.0003 +[2026-03-03 00:14:23] (step=0039678) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.763255722950499, LR: 0.0003 +[2026-03-03 00:14:31] (step=0039679) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.763451379377813, LR: 0.0003 +[2026-03-03 00:14:39] (step=0039680) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.763647035805127, LR: 0.0003 +[2026-03-03 00:14:46] (step=0039681) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.76384269223244, LR: 0.0003 +[2026-03-03 00:14:54] (step=0039682) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.764038348659754, LR: 0.0003 +[2026-03-03 00:15:02] (step=0039683) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.764234005087067, LR: 0.0003 +[2026-03-03 00:15:10] (step=0039684) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 7.764429661514381, LR: 0.0003 +[2026-03-03 00:15:18] (step=0039685) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.764625317941694, LR: 0.0003 +[2026-03-03 00:15:26] (step=0039686) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 7.764820974369008, LR: 0.0003 +[2026-03-03 00:15:34] (step=0039687) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 7.765016630796322, LR: 0.0003 +[2026-03-03 00:15:41] (step=0039688) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.765212287223635, LR: 0.0003 +[2026-03-03 00:15:49] (step=0039689) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.765407943650949, LR: 0.0003 +[2026-03-03 00:15:57] (step=0039690) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.765603600078262, LR: 0.0003 +[2026-03-03 00:16:05] (step=0039691) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 7.765799256505576, LR: 0.0003 +[2026-03-03 00:16:13] (step=0039692) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.76599491293289, LR: 0.0003 +[2026-03-03 00:16:21] (step=0039693) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.766190569360203, LR: 0.0003 +[2026-03-03 00:16:29] (step=0039694) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.766386225787517, LR: 0.0003 +[2026-03-03 00:16:36] (step=0039695) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 7.7665818822148305, LR: 0.0003 +[2026-03-03 00:16:44] (step=0039696) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.7667775386421445, LR: 0.0003 +[2026-03-03 00:16:52] (step=0039697) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.766973195069458, LR: 0.0003 +[2026-03-03 00:17:00] (step=0039698) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 7.767168851496772, LR: 0.0003 +[2026-03-03 00:17:08] (step=0039699) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.767364507924086, LR: 0.0003 +[2026-03-03 00:17:16] (step=0039700) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.767560164351399, LR: 0.0003 +[2026-03-03 00:17:24] (step=0039701) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.767755820778713, LR: 0.0003 +[2026-03-03 00:17:31] (step=0039702) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.767951477206026, LR: 0.0003 +[2026-03-03 00:17:39] (step=0039703) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.76814713363334, LR: 0.0003 +[2026-03-03 00:17:47] (step=0039704) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.768342790060654, LR: 0.0003 +[2026-03-03 00:17:55] (step=0039705) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.768538446487967, LR: 0.0003 +[2026-03-03 00:18:03] (step=0039706) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 7.768734102915281, LR: 0.0003 +[2026-03-03 00:18:11] (step=0039707) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.768929759342594, LR: 0.0003 +[2026-03-03 00:18:19] (step=0039708) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.769125415769908, LR: 0.0003 +[2026-03-03 00:18:26] (step=0039709) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 7.769321072197222, LR: 0.0003 +[2026-03-03 00:18:34] (step=0039710) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.769516728624535, LR: 0.0003 +[2026-03-03 00:18:42] (step=0039711) Train Loss: 0.4379, Train Steps/Sec: 0.12, Epoch: 7.769712385051849, LR: 0.0003 +[2026-03-03 00:18:50] (step=0039712) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.769908041479162, LR: 0.0003 +[2026-03-03 00:18:58] (step=0039713) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.770103697906476, LR: 0.0003 +[2026-03-03 00:19:06] (step=0039714) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 7.7702993543337895, LR: 0.0003 +[2026-03-03 00:19:14] (step=0039715) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 7.7704950107611035, LR: 0.0003 +[2026-03-03 00:19:22] (step=0039716) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.7706906671884175, LR: 0.0003 +[2026-03-03 00:19:29] (step=0039717) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.770886323615731, LR: 0.0003 +[2026-03-03 00:19:37] (step=0039718) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.771081980043045, LR: 0.0003 +[2026-03-03 00:19:45] (step=0039719) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.771277636470358, LR: 0.0003 +[2026-03-03 00:19:53] (step=0039720) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.771473292897672, LR: 0.0003 +[2026-03-03 00:20:01] (step=0039721) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 7.771668949324986, LR: 0.0003 +[2026-03-03 00:20:09] (step=0039722) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.771864605752299, LR: 0.0003 +[2026-03-03 00:20:17] (step=0039723) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 7.772060262179613, LR: 0.0003 +[2026-03-03 00:20:25] (step=0039724) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.772255918606926, LR: 0.0003 +[2026-03-03 00:20:32] (step=0039725) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 7.77245157503424, LR: 0.0003 +[2026-03-03 00:20:40] (step=0039726) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.772647231461553, LR: 0.0003 +[2026-03-03 00:20:48] (step=0039727) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.772842887888867, LR: 0.0003 +[2026-03-03 00:20:56] (step=0039728) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.773038544316181, LR: 0.0003 +[2026-03-03 00:21:04] (step=0039729) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 7.773234200743494, LR: 0.0003 +[2026-03-03 00:21:12] (step=0039730) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 7.773429857170808, LR: 0.0003 +[2026-03-03 00:21:20] (step=0039731) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 7.773625513598121, LR: 0.0003 +[2026-03-03 00:21:28] (step=0039732) Train Loss: 0.4376, Train Steps/Sec: 0.12, Epoch: 7.773821170025435, LR: 0.0003 +[2026-03-03 00:21:35] (step=0039733) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.774016826452749, LR: 0.0003 +[2026-03-03 00:21:43] (step=0039734) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.7742124828800625, LR: 0.0003 +[2026-03-03 00:21:51] (step=0039735) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 7.7744081393073765, LR: 0.0003 +[2026-03-03 00:21:59] (step=0039736) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 7.77460379573469, LR: 0.0003 +[2026-03-03 00:22:07] (step=0039737) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.774799452162004, LR: 0.0003 +[2026-03-03 00:22:15] (step=0039738) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.774995108589317, LR: 0.0003 +[2026-03-03 00:22:23] (step=0039739) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.775190765016631, LR: 0.0003 +[2026-03-03 00:22:30] (step=0039740) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 7.775386421443945, LR: 0.0003 +[2026-03-03 00:22:38] (step=0039741) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.775582077871258, LR: 0.0003 +[2026-03-03 00:22:46] (step=0039742) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.775777734298572, LR: 0.0003 +[2026-03-03 00:22:54] (step=0039743) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.775973390725885, LR: 0.0003 +[2026-03-03 00:23:02] (step=0039744) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.776169047153199, LR: 0.0003 +[2026-03-03 00:23:10] (step=0039745) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.776364703580513, LR: 0.0003 +[2026-03-03 00:23:18] (step=0039746) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.776560360007826, LR: 0.0003 +[2026-03-03 00:23:25] (step=0039747) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.77675601643514, LR: 0.0003 +[2026-03-03 00:23:33] (step=0039748) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.776951672862453, LR: 0.0003 +[2026-03-03 00:23:41] (step=0039749) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.777147329289767, LR: 0.0003 +[2026-03-03 00:23:49] (step=0039750) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.77734298571708, LR: 0.0003 +[2026-03-03 00:23:57] (step=0039751) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.777538642144394, LR: 0.0003 +[2026-03-03 00:24:05] (step=0039752) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 7.777734298571708, LR: 0.0003 +[2026-03-03 00:24:13] (step=0039753) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.7779299549990215, LR: 0.0003 +[2026-03-03 00:24:21] (step=0039754) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.7781256114263355, LR: 0.0003 +[2026-03-03 00:24:28] (step=0039755) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.778321267853649, LR: 0.0003 +[2026-03-03 00:24:36] (step=0039756) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 7.778516924280963, LR: 0.0003 +[2026-03-03 00:24:44] (step=0039757) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.778712580708277, LR: 0.0003 +[2026-03-03 00:24:52] (step=0039758) Train Loss: 0.4427, Train Steps/Sec: 0.12, Epoch: 7.77890823713559, LR: 0.0003 +[2026-03-03 00:25:00] (step=0039759) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 7.779103893562904, LR: 0.0003 +[2026-03-03 00:25:08] (step=0039760) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.779299549990217, LR: 0.0003 +[2026-03-03 00:25:16] (step=0039761) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 7.779495206417531, LR: 0.0003 +[2026-03-03 00:25:24] (step=0039762) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.779690862844844, LR: 0.0003 +[2026-03-03 00:25:31] (step=0039763) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.779886519272158, LR: 0.0003 +[2026-03-03 00:25:39] (step=0039764) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.780082175699472, LR: 0.0003 +[2026-03-03 00:25:47] (step=0039765) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.780277832126785, LR: 0.0003 +[2026-03-03 00:25:55] (step=0039766) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 7.780473488554099, LR: 0.0003 +[2026-03-03 00:26:03] (step=0039767) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.780669144981412, LR: 0.0003 +[2026-03-03 00:26:11] (step=0039768) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.780864801408726, LR: 0.0003 +[2026-03-03 00:26:19] (step=0039769) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.78106045783604, LR: 0.0003 +[2026-03-03 00:26:27] (step=0039770) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.781256114263353, LR: 0.0003 +[2026-03-03 00:26:34] (step=0039771) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.781451770690667, LR: 0.0003 +[2026-03-03 00:26:42] (step=0039772) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.7816474271179805, LR: 0.0003 +[2026-03-03 00:26:50] (step=0039773) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.7818430835452945, LR: 0.0003 +[2026-03-03 00:26:58] (step=0039774) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.7820387399726085, LR: 0.0003 +[2026-03-03 00:27:06] (step=0039775) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.782234396399922, LR: 0.0003 +[2026-03-03 00:27:14] (step=0039776) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.782430052827236, LR: 0.0003 +[2026-03-03 00:27:22] (step=0039777) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.782625709254549, LR: 0.0003 +[2026-03-03 00:27:29] (step=0039778) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.782821365681863, LR: 0.0003 +[2026-03-03 00:27:37] (step=0039779) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 7.783017022109176, LR: 0.0003 +[2026-03-03 00:27:45] (step=0039780) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.78321267853649, LR: 0.0003 +[2026-03-03 00:27:53] (step=0039781) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.783408334963804, LR: 0.0003 +[2026-03-03 00:28:01] (step=0039782) Train Loss: 0.4495, Train Steps/Sec: 0.12, Epoch: 7.783603991391117, LR: 0.0003 +[2026-03-03 00:28:09] (step=0039783) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.783799647818431, LR: 0.0003 +[2026-03-03 00:28:17] (step=0039784) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.783995304245744, LR: 0.0003 +[2026-03-03 00:28:25] (step=0039785) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.784190960673058, LR: 0.0003 +[2026-03-03 00:28:32] (step=0039786) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.784386617100372, LR: 0.0003 +[2026-03-03 00:28:40] (step=0039787) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.784582273527685, LR: 0.0003 +[2026-03-03 00:28:48] (step=0039788) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.784777929954999, LR: 0.0003 +[2026-03-03 00:28:56] (step=0039789) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.784973586382312, LR: 0.0003 +[2026-03-03 00:29:04] (step=0039790) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.785169242809626, LR: 0.0003 +[2026-03-03 00:29:12] (step=0039791) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.7853648992369395, LR: 0.0003 +[2026-03-03 00:29:20] (step=0039792) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.7855605556642535, LR: 0.0003 +[2026-03-03 00:29:28] (step=0039793) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.7857562120915675, LR: 0.0003 +[2026-03-03 00:29:35] (step=0039794) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.785951868518881, LR: 0.0003 +[2026-03-03 00:29:43] (step=0039795) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.786147524946195, LR: 0.0003 +[2026-03-03 00:29:51] (step=0039796) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.786343181373508, LR: 0.0003 +[2026-03-03 00:29:59] (step=0039797) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 7.786538837800822, LR: 0.0003 +[2026-03-03 00:30:07] (step=0039798) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.786734494228136, LR: 0.0003 +[2026-03-03 00:30:15] (step=0039799) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.786930150655449, LR: 0.0003 +[2026-03-03 00:30:23] (step=0039800) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.787125807082763, LR: 0.0003 +[2026-03-03 00:30:30] (step=0039801) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 7.787321463510076, LR: 0.0003 +[2026-03-03 00:30:38] (step=0039802) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 7.78751711993739, LR: 0.0003 +[2026-03-03 00:30:46] (step=0039803) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.787712776364703, LR: 0.0003 +[2026-03-03 00:30:54] (step=0039804) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.787908432792017, LR: 0.0003 +[2026-03-03 00:31:02] (step=0039805) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.788104089219331, LR: 0.0003 +[2026-03-03 00:31:10] (step=0039806) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 7.788299745646644, LR: 0.0003 +[2026-03-03 00:31:18] (step=0039807) Train Loss: 0.4471, Train Steps/Sec: 0.12, Epoch: 7.788495402073958, LR: 0.0003 +[2026-03-03 00:31:26] (step=0039808) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.788691058501271, LR: 0.0003 +[2026-03-03 00:31:33] (step=0039809) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.788886714928585, LR: 0.0003 +[2026-03-03 00:31:41] (step=0039810) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 7.789082371355899, LR: 0.0003 +[2026-03-03 00:31:49] (step=0039811) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.7892780277832125, LR: 0.0003 +[2026-03-03 00:31:57] (step=0039812) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.7894736842105265, LR: 0.0003 +[2026-03-03 00:32:05] (step=0039813) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.78966934063784, LR: 0.0003 +[2026-03-03 00:32:13] (step=0039814) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.789864997065154, LR: 0.0003 +[2026-03-03 00:32:21] (step=0039815) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.790060653492467, LR: 0.0003 +[2026-03-03 00:32:28] (step=0039816) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.790256309919781, LR: 0.0003 +[2026-03-03 00:32:36] (step=0039817) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 7.790451966347095, LR: 0.0003 +[2026-03-03 00:32:44] (step=0039818) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.790647622774408, LR: 0.0003 +[2026-03-03 00:32:52] (step=0039819) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.790843279201722, LR: 0.0003 +[2026-03-03 00:33:00] (step=0039820) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.791038935629035, LR: 0.0003 +[2026-03-03 00:33:08] (step=0039821) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 7.791234592056349, LR: 0.0003 +[2026-03-03 00:33:16] (step=0039822) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.791430248483663, LR: 0.0003 +[2026-03-03 00:33:24] (step=0039823) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.791625904910976, LR: 0.0003 +[2026-03-03 00:33:31] (step=0039824) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 7.79182156133829, LR: 0.0003 +[2026-03-03 00:33:39] (step=0039825) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.792017217765603, LR: 0.0003 +[2026-03-03 00:33:47] (step=0039826) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.792212874192917, LR: 0.0003 +[2026-03-03 00:33:55] (step=0039827) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.792408530620231, LR: 0.0003 +[2026-03-03 00:34:03] (step=0039828) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.792604187047544, LR: 0.0003 +[2026-03-03 00:34:11] (step=0039829) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.792799843474858, LR: 0.0003 +[2026-03-03 00:34:19] (step=0039830) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 7.7929954999021716, LR: 0.0003 +[2026-03-03 00:34:26] (step=0039831) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.793191156329486, LR: 0.0003 +[2026-03-03 00:34:34] (step=0039832) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.793386812756799, LR: 0.0003 +[2026-03-03 00:34:42] (step=0039833) Train Loss: 0.4515, Train Steps/Sec: 0.12, Epoch: 7.793582469184113, LR: 0.0003 +[2026-03-03 00:34:50] (step=0039834) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.793778125611427, LR: 0.0003 +[2026-03-03 00:34:58] (step=0039835) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.79397378203874, LR: 0.0003 +[2026-03-03 00:35:06] (step=0039836) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.794169438466054, LR: 0.0003 +[2026-03-03 00:35:14] (step=0039837) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.794365094893367, LR: 0.0003 +[2026-03-03 00:35:22] (step=0039838) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.794560751320681, LR: 0.0003 +[2026-03-03 00:35:29] (step=0039839) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.794756407747995, LR: 0.0003 +[2026-03-03 00:35:37] (step=0039840) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.794952064175308, LR: 0.0003 +[2026-03-03 00:35:45] (step=0039841) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.795147720602622, LR: 0.0003 +[2026-03-03 00:35:53] (step=0039842) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 7.795343377029935, LR: 0.0003 +[2026-03-03 00:36:01] (step=0039843) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.795539033457249, LR: 0.0003 +[2026-03-03 00:36:09] (step=0039844) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.795734689884562, LR: 0.0003 +[2026-03-03 00:36:17] (step=0039845) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.795930346311876, LR: 0.0003 +[2026-03-03 00:36:24] (step=0039846) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.79612600273919, LR: 0.0003 +[2026-03-03 00:36:32] (step=0039847) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.796321659166503, LR: 0.0003 +[2026-03-03 00:36:40] (step=0039848) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.796517315593817, LR: 0.0003 +[2026-03-03 00:36:48] (step=0039849) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.796712972021131, LR: 0.0003 +[2026-03-03 00:36:56] (step=0039850) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.796908628448445, LR: 0.0003 +[2026-03-03 00:37:04] (step=0039851) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 7.797104284875759, LR: 0.0003 +[2026-03-03 00:37:12] (step=0039852) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.797299941303072, LR: 0.0003 +[2026-03-03 00:37:20] (step=0039853) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 7.797495597730386, LR: 0.0003 +[2026-03-03 00:37:27] (step=0039854) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 7.797691254157699, LR: 0.0003 +[2026-03-03 00:37:35] (step=0039855) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.797886910585013, LR: 0.0003 +[2026-03-03 00:37:43] (step=0039856) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.798082567012326, LR: 0.0003 +[2026-03-03 00:37:51] (step=0039857) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.79827822343964, LR: 0.0003 +[2026-03-03 00:37:59] (step=0039858) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.798473879866954, LR: 0.0003 +[2026-03-03 00:38:07] (step=0039859) Train Loss: 0.4447, Train Steps/Sec: 0.12, Epoch: 7.798669536294267, LR: 0.0003 +[2026-03-03 00:38:15] (step=0039860) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 7.798865192721581, LR: 0.0003 +[2026-03-03 00:38:23] (step=0039861) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.799060849148894, LR: 0.0003 +[2026-03-03 00:38:31] (step=0039862) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.799256505576208, LR: 0.0003 +[2026-03-03 00:38:38] (step=0039863) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.799452162003522, LR: 0.0003 +[2026-03-03 00:38:46] (step=0039864) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.799647818430835, LR: 0.0003 +[2026-03-03 00:38:54] (step=0039865) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 7.799843474858149, LR: 0.0003 +[2026-03-03 00:39:02] (step=0039866) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.8000391312854624, LR: 0.0003 +[2026-03-03 00:39:10] (step=0039867) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 7.8002347877127765, LR: 0.0003 +[2026-03-03 00:39:18] (step=0039868) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.80043044414009, LR: 0.0003 +[2026-03-03 00:39:26] (step=0039869) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.800626100567404, LR: 0.0003 +[2026-03-03 00:39:33] (step=0039870) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.800821756994718, LR: 0.0003 +[2026-03-03 00:39:41] (step=0039871) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.801017413422031, LR: 0.0003 +[2026-03-03 00:39:49] (step=0039872) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.801213069849345, LR: 0.0003 +[2026-03-03 00:39:57] (step=0039873) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.801408726276658, LR: 0.0003 +[2026-03-03 00:40:05] (step=0039874) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 7.801604382703972, LR: 0.0003 +[2026-03-03 00:40:13] (step=0039875) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 7.801800039131286, LR: 0.0003 +[2026-03-03 00:40:21] (step=0039876) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.801995695558599, LR: 0.0003 +[2026-03-03 00:40:29] (step=0039877) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.802191351985913, LR: 0.0003 +[2026-03-03 00:40:36] (step=0039878) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.802387008413226, LR: 0.0003 +[2026-03-03 00:40:44] (step=0039879) Train Loss: 0.4596, Train Steps/Sec: 0.12, Epoch: 7.80258266484054, LR: 0.0003 +[2026-03-03 00:40:52] (step=0039880) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.802778321267854, LR: 0.0003 +[2026-03-03 00:41:00] (step=0039881) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.802973977695167, LR: 0.0003 +[2026-03-03 00:41:08] (step=0039882) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.803169634122481, LR: 0.0003 +[2026-03-03 00:41:16] (step=0039883) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 7.803365290549794, LR: 0.0003 +[2026-03-03 00:41:24] (step=0039884) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.803560946977108, LR: 0.0003 +[2026-03-03 00:41:31] (step=0039885) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 7.8037566034044215, LR: 0.0003 +[2026-03-03 00:41:39] (step=0039886) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.8039522598317355, LR: 0.0003 +[2026-03-03 00:41:47] (step=0039887) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 7.8041479162590495, LR: 0.0003 +[2026-03-03 00:41:55] (step=0039888) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 7.804343572686363, LR: 0.0003 +[2026-03-03 00:42:03] (step=0039889) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.804539229113677, LR: 0.0003 +[2026-03-03 00:42:11] (step=0039890) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.80473488554099, LR: 0.0003 +[2026-03-03 00:42:19] (step=0039891) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.804930541968304, LR: 0.0003 +[2026-03-03 00:42:27] (step=0039892) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.805126198395618, LR: 0.0003 +[2026-03-03 00:42:34] (step=0039893) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 7.805321854822931, LR: 0.0003 +[2026-03-03 00:42:42] (step=0039894) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.805517511250245, LR: 0.0003 +[2026-03-03 00:42:50] (step=0039895) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.805713167677558, LR: 0.0003 +[2026-03-03 00:42:58] (step=0039896) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 7.805908824104872, LR: 0.0003 +[2026-03-03 00:43:06] (step=0039897) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.806104480532185, LR: 0.0003 +[2026-03-03 00:43:14] (step=0039898) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.806300136959499, LR: 0.0003 +[2026-03-03 00:43:22] (step=0039899) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.806495793386813, LR: 0.0003 +[2026-03-03 00:43:29] (step=0039900) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 7.806691449814126, LR: 0.0003 +[2026-03-03 00:43:37] (step=0039901) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 7.80688710624144, LR: 0.0003 +[2026-03-03 00:43:45] (step=0039902) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.807082762668753, LR: 0.0003 +[2026-03-03 00:43:53] (step=0039903) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.807278419096067, LR: 0.0003 +[2026-03-03 00:44:01] (step=0039904) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 7.807474075523381, LR: 0.0003 +[2026-03-03 00:44:09] (step=0039905) Train Loss: 0.4424, Train Steps/Sec: 0.12, Epoch: 7.8076697319506945, LR: 0.0003 +[2026-03-03 00:44:17] (step=0039906) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.8078653883780085, LR: 0.0003 +[2026-03-03 00:44:25] (step=0039907) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.808061044805322, LR: 0.0003 +[2026-03-03 00:44:32] (step=0039908) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.808256701232636, LR: 0.0003 +[2026-03-03 00:44:40] (step=0039909) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.808452357659949, LR: 0.0003 +[2026-03-03 00:44:48] (step=0039910) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 7.808648014087263, LR: 0.0003 +[2026-03-03 00:44:56] (step=0039911) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.808843670514577, LR: 0.0003 +[2026-03-03 00:45:04] (step=0039912) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.80903932694189, LR: 0.0003 +[2026-03-03 00:45:12] (step=0039913) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.809234983369204, LR: 0.0003 +[2026-03-03 00:45:20] (step=0039914) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.809430639796517, LR: 0.0003 +[2026-03-03 00:45:28] (step=0039915) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 7.809626296223831, LR: 0.0003 +[2026-03-03 00:45:35] (step=0039916) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.809821952651145, LR: 0.0003 +[2026-03-03 00:45:43] (step=0039917) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 7.810017609078458, LR: 0.0003 +[2026-03-03 00:45:51] (step=0039918) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.810213265505772, LR: 0.0003 +[2026-03-03 00:45:59] (step=0039919) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.810408921933085, LR: 0.0003 +[2026-03-03 00:46:07] (step=0039920) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 7.810604578360399, LR: 0.0003 +[2026-03-03 00:46:15] (step=0039921) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.810800234787712, LR: 0.0003 +[2026-03-03 00:46:23] (step=0039922) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 7.810995891215026, LR: 0.0003 +[2026-03-03 00:46:30] (step=0039923) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.81119154764234, LR: 0.0003 +[2026-03-03 00:46:38] (step=0039924) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.8113872040696535, LR: 0.0003 +[2026-03-03 00:46:46] (step=0039925) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.8115828604969675, LR: 0.0003 +[2026-03-03 00:46:54] (step=0039926) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.811778516924281, LR: 0.0003 +[2026-03-03 00:47:02] (step=0039927) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.811974173351595, LR: 0.0003 +[2026-03-03 00:47:10] (step=0039928) Train Loss: 0.4457, Train Steps/Sec: 0.12, Epoch: 7.812169829778909, LR: 0.0003 +[2026-03-03 00:47:18] (step=0039929) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.812365486206222, LR: 0.0003 +[2026-03-03 00:47:26] (step=0039930) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.812561142633536, LR: 0.0003 +[2026-03-03 00:47:33] (step=0039931) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 7.812756799060849, LR: 0.0003 +[2026-03-03 00:47:41] (step=0039932) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.812952455488163, LR: 0.0003 +[2026-03-03 00:47:49] (step=0039933) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.813148111915476, LR: 0.0003 +[2026-03-03 00:47:57] (step=0039934) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.81334376834279, LR: 0.0003 +[2026-03-03 00:48:05] (step=0039935) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.813539424770104, LR: 0.0003 +[2026-03-03 00:48:13] (step=0039936) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.813735081197417, LR: 0.0003 +[2026-03-03 00:48:21] (step=0039937) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.813930737624731, LR: 0.0003 +[2026-03-03 00:48:29] (step=0039938) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.814126394052044, LR: 0.0003 +[2026-03-03 00:48:36] (step=0039939) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.814322050479358, LR: 0.0003 +[2026-03-03 00:48:44] (step=0039940) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.814517706906672, LR: 0.0003 +[2026-03-03 00:48:52] (step=0039941) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 7.814713363333985, LR: 0.0003 +[2026-03-03 00:49:00] (step=0039942) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.814909019761299, LR: 0.0003 +[2026-03-03 00:49:08] (step=0039943) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.8151046761886125, LR: 0.0003 +[2026-03-03 00:49:16] (step=0039944) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.8153003326159265, LR: 0.0003 +[2026-03-03 00:49:24] (step=0039945) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.8154959890432405, LR: 0.0003 +[2026-03-03 00:49:31] (step=0039946) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.815691645470554, LR: 0.0003 +[2026-03-03 00:49:39] (step=0039947) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.815887301897868, LR: 0.0003 +[2026-03-03 00:49:47] (step=0039948) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.816082958325181, LR: 0.0003 +[2026-03-03 00:49:55] (step=0039949) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 7.816278614752495, LR: 0.0003 +[2026-03-03 00:50:03] (step=0039950) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.816474271179808, LR: 0.0003 +[2026-03-03 00:50:11] (step=0039951) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.816669927607122, LR: 0.0003 +[2026-03-03 00:50:19] (step=0039952) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.816865584034436, LR: 0.0003 +[2026-03-03 00:50:27] (step=0039953) Train Loss: 0.4436, Train Steps/Sec: 0.12, Epoch: 7.817061240461749, LR: 0.0003 +[2026-03-03 00:50:34] (step=0039954) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.817256896889063, LR: 0.0003 +[2026-03-03 00:50:42] (step=0039955) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.817452553316376, LR: 0.0003 +[2026-03-03 00:50:50] (step=0039956) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.81764820974369, LR: 0.0003 +[2026-03-03 00:50:58] (step=0039957) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 7.817843866171004, LR: 0.0003 +[2026-03-03 00:51:06] (step=0039958) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.818039522598317, LR: 0.0003 +[2026-03-03 00:51:14] (step=0039959) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.818235179025631, LR: 0.0003 +[2026-03-03 00:51:22] (step=0039960) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 7.818430835452944, LR: 0.0003 +[2026-03-03 00:51:29] (step=0039961) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.818626491880258, LR: 0.0003 +[2026-03-03 00:51:37] (step=0039962) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.8188221483075715, LR: 0.0003 +[2026-03-03 00:51:45] (step=0039963) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.8190178047348855, LR: 0.0003 +[2026-03-03 00:51:53] (step=0039964) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.8192134611621995, LR: 0.0003 +[2026-03-03 00:52:01] (step=0039965) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.819409117589513, LR: 0.0003 +[2026-03-03 00:52:09] (step=0039966) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.819604774016827, LR: 0.0003 +[2026-03-03 00:52:17] (step=0039967) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.81980043044414, LR: 0.0003 +[2026-03-03 00:52:24] (step=0039968) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.819996086871454, LR: 0.0003 +[2026-03-03 00:52:32] (step=0039969) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.820191743298768, LR: 0.0003 +[2026-03-03 00:52:40] (step=0039970) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.820387399726081, LR: 0.0003 +[2026-03-03 00:52:48] (step=0039971) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 7.820583056153395, LR: 0.0003 +[2026-03-03 00:52:56] (step=0039972) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.820778712580708, LR: 0.0003 +[2026-03-03 00:53:04] (step=0039973) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 7.820974369008022, LR: 0.0003 +[2026-03-03 00:53:12] (step=0039974) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.821170025435335, LR: 0.0003 +[2026-03-03 00:53:20] (step=0039975) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.821365681862649, LR: 0.0003 +[2026-03-03 00:53:27] (step=0039976) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 7.821561338289963, LR: 0.0003 +[2026-03-03 00:53:35] (step=0039977) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.821756994717276, LR: 0.0003 +[2026-03-03 00:53:43] (step=0039978) Train Loss: 0.4385, Train Steps/Sec: 0.12, Epoch: 7.82195265114459, LR: 0.0003 +[2026-03-03 00:53:51] (step=0039979) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 7.822148307571903, LR: 0.0003 +[2026-03-03 00:53:59] (step=0039980) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.822343963999217, LR: 0.0003 +[2026-03-03 00:54:07] (step=0039981) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.822539620426531, LR: 0.0003 +[2026-03-03 00:54:15] (step=0039982) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.8227352768538445, LR: 0.0003 +[2026-03-03 00:54:23] (step=0039983) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.8229309332811585, LR: 0.0003 +[2026-03-03 00:54:31] (step=0039984) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.823126589708472, LR: 0.0003 +[2026-03-03 00:54:38] (step=0039985) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 7.823322246135786, LR: 0.0003 +[2026-03-03 00:54:46] (step=0039986) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.823517902563099, LR: 0.0003 +[2026-03-03 00:54:54] (step=0039987) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.823713558990413, LR: 0.0003 +[2026-03-03 00:55:02] (step=0039988) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 7.823909215417727, LR: 0.0003 +[2026-03-03 00:55:10] (step=0039989) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 7.82410487184504, LR: 0.0003 +[2026-03-03 00:55:18] (step=0039990) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.824300528272354, LR: 0.0003 +[2026-03-03 00:55:26] (step=0039991) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.824496184699667, LR: 0.0003 +[2026-03-03 00:55:33] (step=0039992) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.824691841126981, LR: 0.0003 +[2026-03-03 00:55:41] (step=0039993) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 7.824887497554295, LR: 0.0003 +[2026-03-03 00:55:49] (step=0039994) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.825083153981608, LR: 0.0003 +[2026-03-03 00:55:57] (step=0039995) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.825278810408922, LR: 0.0003 +[2026-03-03 00:56:05] (step=0039996) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.825474466836235, LR: 0.0003 +[2026-03-03 00:56:13] (step=0039997) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.825670123263549, LR: 0.0003 +[2026-03-03 00:56:21] (step=0039998) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.825865779690863, LR: 0.0003 +[2026-03-03 00:56:28] (step=0039999) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.826061436118176, LR: 0.0003 +[2026-03-03 00:56:36] (step=0040000) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.82625709254549, LR: 0.0003 +[2026-03-03 00:56:36] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0040000/ +[2026-03-03 00:56:44] (step=0040001) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 7.8264527489728035, LR: 0.0003 +[2026-03-03 00:56:52] (step=0040002) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.8266484054001175, LR: 0.0003 +[2026-03-03 00:57:00] (step=0040003) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.826844061827431, LR: 0.0003 +[2026-03-03 00:57:08] (step=0040004) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.827039718254745, LR: 0.0003 +[2026-03-03 00:57:16] (step=0040005) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.827235374682059, LR: 0.0003 +[2026-03-03 00:57:24] (step=0040006) Train Loss: 0.4505, Train Steps/Sec: 0.12, Epoch: 7.827431031109372, LR: 0.0003 +[2026-03-03 00:57:32] (step=0040007) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.827626687536686, LR: 0.0003 +[2026-03-03 00:57:39] (step=0040008) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.827822343963999, LR: 0.0003 +[2026-03-03 00:57:47] (step=0040009) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.828018000391313, LR: 0.0003 +[2026-03-03 00:57:55] (step=0040010) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.828213656818627, LR: 0.0003 +[2026-03-03 00:58:03] (step=0040011) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.82840931324594, LR: 0.0003 +[2026-03-03 00:58:11] (step=0040012) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.828604969673254, LR: 0.0003 +[2026-03-03 00:58:19] (step=0040013) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.828800626100567, LR: 0.0003 +[2026-03-03 00:58:27] (step=0040014) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.828996282527881, LR: 0.0003 +[2026-03-03 00:58:34] (step=0040015) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 7.829191938955194, LR: 0.0003 +[2026-03-03 00:58:42] (step=0040016) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.829387595382508, LR: 0.0003 +[2026-03-03 00:58:50] (step=0040017) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.829583251809822, LR: 0.0003 +[2026-03-03 00:58:58] (step=0040018) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.829778908237135, LR: 0.0003 +[2026-03-03 00:59:06] (step=0040019) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.829974564664449, LR: 0.0003 +[2026-03-03 00:59:14] (step=0040020) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.8301702210917625, LR: 0.0003 +[2026-03-03 00:59:22] (step=0040021) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.8303658775190765, LR: 0.0003 +[2026-03-03 00:59:29] (step=0040022) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 7.8305615339463905, LR: 0.0003 +[2026-03-03 00:59:37] (step=0040023) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.830757190373704, LR: 0.0003 +[2026-03-03 00:59:45] (step=0040024) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.830952846801018, LR: 0.0003 +[2026-03-03 00:59:53] (step=0040025) Train Loss: 0.4437, Train Steps/Sec: 0.12, Epoch: 7.831148503228331, LR: 0.0003 +[2026-03-03 01:00:01] (step=0040026) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.831344159655645, LR: 0.0003 +[2026-03-03 01:00:09] (step=0040027) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.831539816082958, LR: 0.0003 +[2026-03-03 01:00:17] (step=0040028) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.831735472510272, LR: 0.0003 +[2026-03-03 01:00:25] (step=0040029) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.831931128937586, LR: 0.0003 +[2026-03-03 01:00:33] (step=0040030) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.832126785364899, LR: 0.0003 +[2026-03-03 01:00:40] (step=0040031) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 7.832322441792213, LR: 0.0003 +[2026-03-03 01:00:48] (step=0040032) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.832518098219526, LR: 0.0003 +[2026-03-03 01:00:56] (step=0040033) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.83271375464684, LR: 0.0003 +[2026-03-03 01:01:04] (step=0040034) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.832909411074154, LR: 0.0003 +[2026-03-03 01:01:12] (step=0040035) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.833105067501467, LR: 0.0003 +[2026-03-03 01:01:20] (step=0040036) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.833300723928781, LR: 0.0003 +[2026-03-03 01:01:28] (step=0040037) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.833496380356094, LR: 0.0003 +[2026-03-03 01:01:35] (step=0040038) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.833692036783408, LR: 0.0003 +[2026-03-03 01:01:43] (step=0040039) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 7.8338876932107215, LR: 0.0003 +[2026-03-03 01:01:51] (step=0040040) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.8340833496380355, LR: 0.0003 +[2026-03-03 01:01:59] (step=0040041) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.8342790060653495, LR: 0.0003 +[2026-03-03 01:02:07] (step=0040042) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 7.834474662492663, LR: 0.0003 +[2026-03-03 01:02:15] (step=0040043) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 7.834670318919977, LR: 0.0003 +[2026-03-03 01:02:23] (step=0040044) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.83486597534729, LR: 0.0003 +[2026-03-03 01:02:30] (step=0040045) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.835061631774604, LR: 0.0003 +[2026-03-03 01:02:38] (step=0040046) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.835257288201918, LR: 0.0003 +[2026-03-03 01:02:46] (step=0040047) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.835452944629231, LR: 0.0003 +[2026-03-03 01:02:54] (step=0040048) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 7.835648601056545, LR: 0.0003 +[2026-03-03 01:03:02] (step=0040049) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.835844257483858, LR: 0.0003 +[2026-03-03 01:03:10] (step=0040050) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.836039913911172, LR: 0.0003 +[2026-03-03 01:03:18] (step=0040051) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.836235570338486, LR: 0.0003 +[2026-03-03 01:03:26] (step=0040052) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.836431226765799, LR: 0.0003 +[2026-03-03 01:03:33] (step=0040053) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.836626883193113, LR: 0.0003 +[2026-03-03 01:03:41] (step=0040054) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 7.836822539620426, LR: 0.0003 +[2026-03-03 01:03:49] (step=0040055) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 7.83701819604774, LR: 0.0003 +[2026-03-03 01:03:57] (step=0040056) Train Loss: 0.4493, Train Steps/Sec: 0.12, Epoch: 7.837213852475053, LR: 0.0003 +[2026-03-03 01:04:05] (step=0040057) Train Loss: 0.4661, Train Steps/Sec: 0.13, Epoch: 7.837409508902367, LR: 0.0003 +[2026-03-03 01:04:13] (step=0040058) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 7.837605165329681, LR: 0.0003 +[2026-03-03 01:04:21] (step=0040059) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 7.8378008217569946, LR: 0.0003 +[2026-03-03 01:04:29] (step=0040060) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 7.837996478184309, LR: 0.0003 +[2026-03-03 01:04:37] (step=0040061) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 7.838192134611622, LR: 0.0003 +[2026-03-03 01:04:44] (step=0040062) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.838387791038936, LR: 0.0003 +[2026-03-03 01:04:52] (step=0040063) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 7.83858344746625, LR: 0.0003 +[2026-03-03 01:05:00] (step=0040064) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 7.838779103893563, LR: 0.0003 +[2026-03-03 01:05:08] (step=0040065) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.838974760320877, LR: 0.0003 +[2026-03-03 01:05:16] (step=0040066) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.83917041674819, LR: 0.0003 +[2026-03-03 01:05:24] (step=0040067) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 7.839366073175504, LR: 0.0003 +[2026-03-03 01:05:32] (step=0040068) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.839561729602817, LR: 0.0003 +[2026-03-03 01:05:39] (step=0040069) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.839757386030131, LR: 0.0003 +[2026-03-03 01:05:47] (step=0040070) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.839953042457445, LR: 0.0003 +[2026-03-03 01:05:55] (step=0040071) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.840148698884758, LR: 0.0003 +[2026-03-03 01:06:03] (step=0040072) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.840344355312072, LR: 0.0003 +[2026-03-03 01:06:11] (step=0040073) Train Loss: 0.4361, Train Steps/Sec: 0.12, Epoch: 7.840540011739385, LR: 0.0003 +[2026-03-03 01:06:19] (step=0040074) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 7.840735668166699, LR: 0.0003 +[2026-03-03 01:06:27] (step=0040075) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.840931324594013, LR: 0.0003 +[2026-03-03 01:06:35] (step=0040076) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.841126981021326, LR: 0.0003 +[2026-03-03 01:06:42] (step=0040077) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.84132263744864, LR: 0.0003 +[2026-03-03 01:06:50] (step=0040078) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.841518293875954, LR: 0.0003 +[2026-03-03 01:06:58] (step=0040079) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.841713950303268, LR: 0.0003 +[2026-03-03 01:07:06] (step=0040080) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 7.841909606730581, LR: 0.0003 +[2026-03-03 01:07:14] (step=0040081) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.842105263157895, LR: 0.0003 +[2026-03-03 01:07:22] (step=0040082) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.842300919585209, LR: 0.0003 +[2026-03-03 01:07:30] (step=0040083) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.842496576012522, LR: 0.0003 +[2026-03-03 01:07:38] (step=0040084) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 7.842692232439836, LR: 0.0003 +[2026-03-03 01:07:45] (step=0040085) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.842887888867149, LR: 0.0003 +[2026-03-03 01:07:53] (step=0040086) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 7.843083545294463, LR: 0.0003 +[2026-03-03 01:08:01] (step=0040087) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.843279201721777, LR: 0.0003 +[2026-03-03 01:08:09] (step=0040088) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 7.84347485814909, LR: 0.0003 +[2026-03-03 01:08:17] (step=0040089) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.843670514576404, LR: 0.0003 +[2026-03-03 01:08:25] (step=0040090) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.843866171003717, LR: 0.0003 +[2026-03-03 01:08:33] (step=0040091) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.844061827431031, LR: 0.0003 +[2026-03-03 01:08:40] (step=0040092) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.844257483858344, LR: 0.0003 +[2026-03-03 01:08:48] (step=0040093) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.844453140285658, LR: 0.0003 +[2026-03-03 01:08:56] (step=0040094) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.844648796712972, LR: 0.0003 +[2026-03-03 01:09:04] (step=0040095) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 7.8448444531402854, LR: 0.0003 +[2026-03-03 01:09:12] (step=0040096) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 7.8450401095675995, LR: 0.0003 +[2026-03-03 01:09:20] (step=0040097) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.845235765994913, LR: 0.0003 +[2026-03-03 01:09:28] (step=0040098) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.845431422422227, LR: 0.0003 +[2026-03-03 01:09:35] (step=0040099) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 7.845627078849541, LR: 0.0003 +[2026-03-03 01:09:43] (step=0040100) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.845822735276854, LR: 0.0003 +[2026-03-03 01:09:51] (step=0040101) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.846018391704168, LR: 0.0003 +[2026-03-03 01:09:59] (step=0040102) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.846214048131481, LR: 0.0003 +[2026-03-03 01:10:07] (step=0040103) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.846409704558795, LR: 0.0003 +[2026-03-03 01:10:15] (step=0040104) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 7.846605360986109, LR: 0.0003 +[2026-03-03 01:10:23] (step=0040105) Train Loss: 0.4462, Train Steps/Sec: 0.12, Epoch: 7.846801017413422, LR: 0.0003 +[2026-03-03 01:10:31] (step=0040106) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.846996673840736, LR: 0.0003 +[2026-03-03 01:10:39] (step=0040107) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.847192330268049, LR: 0.0003 +[2026-03-03 01:10:46] (step=0040108) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.847387986695363, LR: 0.0003 +[2026-03-03 01:10:54] (step=0040109) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.847583643122676, LR: 0.0003 +[2026-03-03 01:11:02] (step=0040110) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 7.84777929954999, LR: 0.0003 +[2026-03-03 01:11:10] (step=0040111) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.847974955977304, LR: 0.0003 +[2026-03-03 01:11:18] (step=0040112) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 7.848170612404617, LR: 0.0003 +[2026-03-03 01:11:26] (step=0040113) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.848366268831931, LR: 0.0003 +[2026-03-03 01:11:34] (step=0040114) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.8485619252592445, LR: 0.0003 +[2026-03-03 01:11:41] (step=0040115) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.8487575816865585, LR: 0.0003 +[2026-03-03 01:11:49] (step=0040116) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 7.8489532381138725, LR: 0.0003 +[2026-03-03 01:11:57] (step=0040117) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.849148894541186, LR: 0.0003 +[2026-03-03 01:12:05] (step=0040118) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.8493445509685, LR: 0.0003 +[2026-03-03 01:12:13] (step=0040119) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.849540207395813, LR: 0.0003 +[2026-03-03 01:12:21] (step=0040120) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.849735863823127, LR: 0.0003 +[2026-03-03 01:12:29] (step=0040121) Train Loss: 0.4401, Train Steps/Sec: 0.12, Epoch: 7.84993152025044, LR: 0.0003 +[2026-03-03 01:12:37] (step=0040122) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 7.850127176677754, LR: 0.0003 +[2026-03-03 01:12:45] (step=0040123) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.850322833105068, LR: 0.0003 +[2026-03-03 01:12:52] (step=0040124) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.850518489532381, LR: 0.0003 +[2026-03-03 01:13:00] (step=0040125) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.850714145959695, LR: 0.0003 +[2026-03-03 01:13:08] (step=0040126) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.850909802387008, LR: 0.0003 +[2026-03-03 01:13:16] (step=0040127) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 7.851105458814322, LR: 0.0003 +[2026-03-03 01:13:24] (step=0040128) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.851301115241636, LR: 0.0003 +[2026-03-03 01:13:32] (step=0040129) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.851496771668949, LR: 0.0003 +[2026-03-03 01:13:40] (step=0040130) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 7.851692428096263, LR: 0.0003 +[2026-03-03 01:13:48] (step=0040131) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.851888084523576, LR: 0.0003 +[2026-03-03 01:13:55] (step=0040132) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 7.85208374095089, LR: 0.0003 +[2026-03-03 01:14:03] (step=0040133) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.8522793973782035, LR: 0.0003 +[2026-03-03 01:14:11] (step=0040134) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.8524750538055175, LR: 0.0003 +[2026-03-03 01:14:19] (step=0040135) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.8526707102328315, LR: 0.0003 +[2026-03-03 01:14:27] (step=0040136) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.852866366660145, LR: 0.0003 +[2026-03-03 01:14:35] (step=0040137) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.853062023087459, LR: 0.0003 +[2026-03-03 01:14:43] (step=0040138) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.853257679514772, LR: 0.0003 +[2026-03-03 01:14:51] (step=0040139) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.853453335942086, LR: 0.0003 +[2026-03-03 01:14:58] (step=0040140) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.8536489923694, LR: 0.0003 +[2026-03-03 01:15:06] (step=0040141) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.853844648796713, LR: 0.0003 +[2026-03-03 01:15:14] (step=0040142) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.854040305224027, LR: 0.0003 +[2026-03-03 01:15:22] (step=0040143) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.85423596165134, LR: 0.0003 +[2026-03-03 01:15:30] (step=0040144) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.854431618078654, LR: 0.0003 +[2026-03-03 01:15:38] (step=0040145) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.854627274505967, LR: 0.0003 +[2026-03-03 01:15:46] (step=0040146) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.854822930933281, LR: 0.0003 +[2026-03-03 01:15:53] (step=0040147) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.855018587360595, LR: 0.0003 +[2026-03-03 01:16:01] (step=0040148) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.855214243787908, LR: 0.0003 +[2026-03-03 01:16:09] (step=0040149) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.855409900215222, LR: 0.0003 +[2026-03-03 01:16:17] (step=0040150) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.855605556642535, LR: 0.0003 +[2026-03-03 01:16:25] (step=0040151) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 7.855801213069849, LR: 0.0003 +[2026-03-03 01:16:33] (step=0040152) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.855996869497163, LR: 0.0003 +[2026-03-03 01:16:41] (step=0040153) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.8561925259244765, LR: 0.0003 +[2026-03-03 01:16:49] (step=0040154) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.8563881823517905, LR: 0.0003 +[2026-03-03 01:16:56] (step=0040155) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 7.856583838779104, LR: 0.0003 +[2026-03-03 01:17:04] (step=0040156) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.856779495206418, LR: 0.0003 +[2026-03-03 01:17:12] (step=0040157) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.856975151633731, LR: 0.0003 +[2026-03-03 01:17:20] (step=0040158) Train Loss: 0.4534, Train Steps/Sec: 0.12, Epoch: 7.857170808061045, LR: 0.0003 +[2026-03-03 01:17:28] (step=0040159) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 7.857366464488359, LR: 0.0003 +[2026-03-03 01:17:36] (step=0040160) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.857562120915672, LR: 0.0003 +[2026-03-03 01:17:44] (step=0040161) Train Loss: 0.4238, Train Steps/Sec: 0.13, Epoch: 7.857757777342986, LR: 0.0003 +[2026-03-03 01:17:52] (step=0040162) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.857953433770299, LR: 0.0003 +[2026-03-03 01:17:59] (step=0040163) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 7.858149090197613, LR: 0.0003 +[2026-03-03 01:18:07] (step=0040164) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 7.858344746624927, LR: 0.0003 +[2026-03-03 01:18:15] (step=0040165) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 7.85854040305224, LR: 0.0003 +[2026-03-03 01:18:23] (step=0040166) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.858736059479554, LR: 0.0003 +[2026-03-03 01:18:31] (step=0040167) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 7.858931715906867, LR: 0.0003 +[2026-03-03 01:18:39] (step=0040168) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.859127372334181, LR: 0.0003 +[2026-03-03 01:18:47] (step=0040169) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.859323028761495, LR: 0.0003 +[2026-03-03 01:18:55] (step=0040170) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 7.859518685188808, LR: 0.0003 +[2026-03-03 01:19:02] (step=0040171) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.859714341616122, LR: 0.0003 +[2026-03-03 01:19:10] (step=0040172) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.8599099980434355, LR: 0.0003 +[2026-03-03 01:19:18] (step=0040173) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 7.8601056544707495, LR: 0.0003 +[2026-03-03 01:19:26] (step=0040174) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.860301310898063, LR: 0.0003 +[2026-03-03 01:19:34] (step=0040175) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 7.860496967325377, LR: 0.0003 +[2026-03-03 01:19:42] (step=0040176) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.860692623752691, LR: 0.0003 +[2026-03-03 01:19:50] (step=0040177) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 7.860888280180004, LR: 0.0003 +[2026-03-03 01:19:58] (step=0040178) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 7.861083936607318, LR: 0.0003 +[2026-03-03 01:20:05] (step=0040179) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.861279593034631, LR: 0.0003 +[2026-03-03 01:20:13] (step=0040180) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.861475249461945, LR: 0.0003 +[2026-03-03 01:20:21] (step=0040181) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.861670905889259, LR: 0.0003 +[2026-03-03 01:20:29] (step=0040182) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 7.861866562316572, LR: 0.0003 +[2026-03-03 01:20:37] (step=0040183) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.862062218743886, LR: 0.0003 +[2026-03-03 01:20:45] (step=0040184) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.862257875171199, LR: 0.0003 +[2026-03-03 01:20:53] (step=0040185) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.862453531598513, LR: 0.0003 +[2026-03-03 01:21:00] (step=0040186) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.862649188025826, LR: 0.0003 +[2026-03-03 01:21:08] (step=0040187) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.86284484445314, LR: 0.0003 +[2026-03-03 01:21:16] (step=0040188) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.863040500880454, LR: 0.0003 +[2026-03-03 01:21:24] (step=0040189) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 7.863236157307767, LR: 0.0003 +[2026-03-03 01:21:32] (step=0040190) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 7.863431813735081, LR: 0.0003 +[2026-03-03 01:21:40] (step=0040191) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 7.8636274701623945, LR: 0.0003 +[2026-03-03 01:21:48] (step=0040192) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.8638231265897085, LR: 0.0003 +[2026-03-03 01:21:56] (step=0040193) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.8640187830170225, LR: 0.0003 +[2026-03-03 01:22:03] (step=0040194) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.864214439444336, LR: 0.0003 +[2026-03-03 01:22:11] (step=0040195) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.86441009587165, LR: 0.0003 +[2026-03-03 01:22:19] (step=0040196) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.864605752298963, LR: 0.0003 +[2026-03-03 01:22:27] (step=0040197) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.864801408726277, LR: 0.0003 +[2026-03-03 01:22:35] (step=0040198) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 7.86499706515359, LR: 0.0003 +[2026-03-03 01:22:43] (step=0040199) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.865192721580904, LR: 0.0003 +[2026-03-03 01:22:51] (step=0040200) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.865388378008218, LR: 0.0003 +[2026-03-03 01:22:58] (step=0040201) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.865584034435531, LR: 0.0003 +[2026-03-03 01:23:06] (step=0040202) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 7.865779690862845, LR: 0.0003 +[2026-03-03 01:23:14] (step=0040203) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.865975347290158, LR: 0.0003 +[2026-03-03 01:23:22] (step=0040204) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.866171003717472, LR: 0.0003 +[2026-03-03 01:23:30] (step=0040205) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 7.866366660144786, LR: 0.0003 +[2026-03-03 01:23:38] (step=0040206) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.866562316572099, LR: 0.0003 +[2026-03-03 01:23:46] (step=0040207) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.866757972999413, LR: 0.0003 +[2026-03-03 01:23:54] (step=0040208) Train Loss: 0.4388, Train Steps/Sec: 0.12, Epoch: 7.866953629426726, LR: 0.0003 +[2026-03-03 01:24:01] (step=0040209) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.86714928585404, LR: 0.0003 +[2026-03-03 01:24:09] (step=0040210) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.8673449422813535, LR: 0.0003 +[2026-03-03 01:24:17] (step=0040211) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 7.8675405987086675, LR: 0.0003 +[2026-03-03 01:24:25] (step=0040212) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.8677362551359815, LR: 0.0003 +[2026-03-03 01:24:33] (step=0040213) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.867931911563295, LR: 0.0003 +[2026-03-03 01:24:41] (step=0040214) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.868127567990609, LR: 0.0003 +[2026-03-03 01:24:49] (step=0040215) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 7.868323224417922, LR: 0.0003 +[2026-03-03 01:24:57] (step=0040216) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 7.868518880845236, LR: 0.0003 +[2026-03-03 01:25:04] (step=0040217) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.86871453727255, LR: 0.0003 +[2026-03-03 01:25:12] (step=0040218) Train Loss: 0.4335, Train Steps/Sec: 0.12, Epoch: 7.868910193699863, LR: 0.0003 +[2026-03-03 01:25:20] (step=0040219) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.869105850127177, LR: 0.0003 +[2026-03-03 01:25:28] (step=0040220) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.86930150655449, LR: 0.0003 +[2026-03-03 01:25:36] (step=0040221) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.869497162981804, LR: 0.0003 +[2026-03-03 01:25:44] (step=0040222) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.869692819409118, LR: 0.0003 +[2026-03-03 01:25:52] (step=0040223) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.869888475836431, LR: 0.0003 +[2026-03-03 01:26:00] (step=0040224) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.870084132263745, LR: 0.0003 +[2026-03-03 01:26:07] (step=0040225) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.870279788691058, LR: 0.0003 +[2026-03-03 01:26:15] (step=0040226) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.870475445118372, LR: 0.0003 +[2026-03-03 01:26:23] (step=0040227) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.870671101545685, LR: 0.0003 +[2026-03-03 01:26:31] (step=0040228) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.870866757972999, LR: 0.0003 +[2026-03-03 01:26:39] (step=0040229) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.871062414400313, LR: 0.0003 +[2026-03-03 01:26:47] (step=0040230) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.8712580708276265, LR: 0.0003 +[2026-03-03 01:26:55] (step=0040231) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.8714537272549405, LR: 0.0003 +[2026-03-03 01:27:03] (step=0040232) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 7.871649383682254, LR: 0.0003 +[2026-03-03 01:27:10] (step=0040233) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 7.871845040109568, LR: 0.0003 +[2026-03-03 01:27:18] (step=0040234) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.872040696536882, LR: 0.0003 +[2026-03-03 01:27:26] (step=0040235) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.872236352964195, LR: 0.0003 +[2026-03-03 01:27:34] (step=0040236) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.872432009391509, LR: 0.0003 +[2026-03-03 01:27:42] (step=0040237) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.872627665818822, LR: 0.0003 +[2026-03-03 01:27:50] (step=0040238) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 7.872823322246136, LR: 0.0003 +[2026-03-03 01:27:58] (step=0040239) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.873018978673449, LR: 0.0003 +[2026-03-03 01:28:06] (step=0040240) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.873214635100763, LR: 0.0003 +[2026-03-03 01:28:13] (step=0040241) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.873410291528077, LR: 0.0003 +[2026-03-03 01:28:21] (step=0040242) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.87360594795539, LR: 0.0003 +[2026-03-03 01:28:29] (step=0040243) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 7.873801604382704, LR: 0.0003 +[2026-03-03 01:28:37] (step=0040244) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.873997260810017, LR: 0.0003 +[2026-03-03 01:28:45] (step=0040245) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.874192917237331, LR: 0.0003 +[2026-03-03 01:28:53] (step=0040246) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 7.874388573664645, LR: 0.0003 +[2026-03-03 01:29:01] (step=0040247) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.874584230091958, LR: 0.0003 +[2026-03-03 01:29:08] (step=0040248) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.874779886519272, LR: 0.0003 +[2026-03-03 01:29:16] (step=0040249) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.8749755429465855, LR: 0.0003 +[2026-03-03 01:29:24] (step=0040250) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.8751711993738995, LR: 0.0003 +[2026-03-03 01:29:32] (step=0040251) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.875366855801213, LR: 0.0003 +[2026-03-03 01:29:40] (step=0040252) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.875562512228527, LR: 0.0003 +[2026-03-03 01:29:48] (step=0040253) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.875758168655841, LR: 0.0003 +[2026-03-03 01:29:56] (step=0040254) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.875953825083154, LR: 0.0003 +[2026-03-03 01:30:04] (step=0040255) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.876149481510468, LR: 0.0003 +[2026-03-03 01:30:11] (step=0040256) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.876345137937781, LR: 0.0003 +[2026-03-03 01:30:19] (step=0040257) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.876540794365095, LR: 0.0003 +[2026-03-03 01:30:27] (step=0040258) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 7.876736450792409, LR: 0.0003 +[2026-03-03 01:30:35] (step=0040259) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 7.876932107219722, LR: 0.0003 +[2026-03-03 01:30:43] (step=0040260) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 7.877127763647036, LR: 0.0003 +[2026-03-03 01:30:51] (step=0040261) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.877323420074349, LR: 0.0003 +[2026-03-03 01:30:58] (step=0040262) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.877519076501663, LR: 0.0003 +[2026-03-03 01:31:06] (step=0040263) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 7.877714732928976, LR: 0.0003 +[2026-03-03 01:31:14] (step=0040264) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 7.87791038935629, LR: 0.0003 +[2026-03-03 01:31:22] (step=0040265) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.878106045783604, LR: 0.0003 +[2026-03-03 01:31:30] (step=0040266) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.878301702210917, LR: 0.0003 +[2026-03-03 01:31:38] (step=0040267) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.878497358638231, LR: 0.0003 +[2026-03-03 01:31:46] (step=0040268) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 7.8786930150655445, LR: 0.0003 +[2026-03-03 01:31:54] (step=0040269) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.8788886714928585, LR: 0.0003 +[2026-03-03 01:32:01] (step=0040270) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.8790843279201725, LR: 0.0003 +[2026-03-03 01:32:09] (step=0040271) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.879279984347486, LR: 0.0003 +[2026-03-03 01:32:17] (step=0040272) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.8794756407748, LR: 0.0003 +[2026-03-03 01:32:25] (step=0040273) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 7.879671297202113, LR: 0.0003 +[2026-03-03 01:32:33] (step=0040274) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 7.879866953629427, LR: 0.0003 +[2026-03-03 01:32:41] (step=0040275) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.880062610056741, LR: 0.0003 +[2026-03-03 01:32:49] (step=0040276) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.880258266484054, LR: 0.0003 +[2026-03-03 01:32:57] (step=0040277) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.880453922911368, LR: 0.0003 +[2026-03-03 01:33:04] (step=0040278) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 7.880649579338681, LR: 0.0003 +[2026-03-03 01:33:12] (step=0040279) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 7.880845235765995, LR: 0.0003 +[2026-03-03 01:33:20] (step=0040280) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.881040892193308, LR: 0.0003 +[2026-03-03 01:33:28] (step=0040281) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 7.881236548620622, LR: 0.0003 +[2026-03-03 01:33:36] (step=0040282) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.881432205047936, LR: 0.0003 +[2026-03-03 01:33:44] (step=0040283) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.881627861475249, LR: 0.0003 +[2026-03-03 01:33:52] (step=0040284) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.881823517902563, LR: 0.0003 +[2026-03-03 01:33:59] (step=0040285) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.882019174329876, LR: 0.0003 +[2026-03-03 01:34:07] (step=0040286) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.88221483075719, LR: 0.0003 +[2026-03-03 01:34:15] (step=0040287) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.882410487184504, LR: 0.0003 +[2026-03-03 01:34:23] (step=0040288) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.8826061436118176, LR: 0.0003 +[2026-03-03 01:34:31] (step=0040289) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.882801800039132, LR: 0.0003 +[2026-03-03 01:34:39] (step=0040290) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 7.882997456466445, LR: 0.0003 +[2026-03-03 01:34:47] (step=0040291) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.883193112893759, LR: 0.0003 +[2026-03-03 01:34:54] (step=0040292) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.883388769321072, LR: 0.0003 +[2026-03-03 01:35:02] (step=0040293) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.883584425748386, LR: 0.0003 +[2026-03-03 01:35:10] (step=0040294) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.8837800821757, LR: 0.0003 +[2026-03-03 01:35:18] (step=0040295) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.883975738603013, LR: 0.0003 +[2026-03-03 01:35:26] (step=0040296) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 7.884171395030327, LR: 0.0003 +[2026-03-03 01:35:34] (step=0040297) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.88436705145764, LR: 0.0003 +[2026-03-03 01:35:42] (step=0040298) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.884562707884954, LR: 0.0003 +[2026-03-03 01:35:49] (step=0040299) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.884758364312268, LR: 0.0003 +[2026-03-03 01:35:57] (step=0040300) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.884954020739581, LR: 0.0003 +[2026-03-03 01:36:05] (step=0040301) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.885149677166895, LR: 0.0003 +[2026-03-03 01:36:13] (step=0040302) Train Loss: 0.4507, Train Steps/Sec: 0.12, Epoch: 7.885345333594208, LR: 0.0003 +[2026-03-03 01:36:21] (step=0040303) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.885540990021522, LR: 0.0003 +[2026-03-03 01:36:29] (step=0040304) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 7.885736646448835, LR: 0.0003 +[2026-03-03 01:36:37] (step=0040305) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.885932302876149, LR: 0.0003 +[2026-03-03 01:36:45] (step=0040306) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.886127959303463, LR: 0.0003 +[2026-03-03 01:36:53] (step=0040307) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.886323615730777, LR: 0.0003 +[2026-03-03 01:37:00] (step=0040308) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.886519272158091, LR: 0.0003 +[2026-03-03 01:37:08] (step=0040309) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.886714928585404, LR: 0.0003 +[2026-03-03 01:37:16] (step=0040310) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.886910585012718, LR: 0.0003 +[2026-03-03 01:37:24] (step=0040311) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.887106241440032, LR: 0.0003 +[2026-03-03 01:37:32] (step=0040312) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.887301897867345, LR: 0.0003 +[2026-03-03 01:37:40] (step=0040313) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 7.887497554294659, LR: 0.0003 +[2026-03-03 01:37:48] (step=0040314) Train Loss: 0.4409, Train Steps/Sec: 0.12, Epoch: 7.887693210721972, LR: 0.0003 +[2026-03-03 01:37:56] (step=0040315) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 7.887888867149286, LR: 0.0003 +[2026-03-03 01:38:04] (step=0040316) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.888084523576599, LR: 0.0003 +[2026-03-03 01:38:11] (step=0040317) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.888280180003913, LR: 0.0003 +[2026-03-03 01:38:19] (step=0040318) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 7.888475836431227, LR: 0.0003 +[2026-03-03 01:38:27] (step=0040319) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 7.88867149285854, LR: 0.0003 +[2026-03-03 01:38:35] (step=0040320) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 7.888867149285854, LR: 0.0003 +[2026-03-03 01:38:43] (step=0040321) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.889062805713167, LR: 0.0003 +[2026-03-03 01:38:51] (step=0040322) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 7.889258462140481, LR: 0.0003 +[2026-03-03 01:38:59] (step=0040323) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.889454118567795, LR: 0.0003 +[2026-03-03 01:39:06] (step=0040324) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 7.8896497749951084, LR: 0.0003 +[2026-03-03 01:39:14] (step=0040325) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.8898454314224225, LR: 0.0003 +[2026-03-03 01:39:22] (step=0040326) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.890041087849736, LR: 0.0003 +[2026-03-03 01:39:30] (step=0040327) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 7.89023674427705, LR: 0.0003 +[2026-03-03 01:39:38] (step=0040328) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 7.890432400704364, LR: 0.0003 +[2026-03-03 01:39:46] (step=0040329) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 7.890628057131677, LR: 0.0003 +[2026-03-03 01:39:54] (step=0040330) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.890823713558991, LR: 0.0003 +[2026-03-03 01:40:01] (step=0040331) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.891019369986304, LR: 0.0003 +[2026-03-03 01:40:09] (step=0040332) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.891215026413618, LR: 0.0003 +[2026-03-03 01:40:17] (step=0040333) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 7.891410682840931, LR: 0.0003 +[2026-03-03 01:40:25] (step=0040334) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 7.891606339268245, LR: 0.0003 +[2026-03-03 01:40:33] (step=0040335) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.891801995695559, LR: 0.0003 +[2026-03-03 01:40:41] (step=0040336) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.891997652122872, LR: 0.0003 +[2026-03-03 01:40:49] (step=0040337) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 7.892193308550186, LR: 0.0003 +[2026-03-03 01:40:56] (step=0040338) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 7.892388964977499, LR: 0.0003 +[2026-03-03 01:41:04] (step=0040339) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.892584621404813, LR: 0.0003 +[2026-03-03 01:41:12] (step=0040340) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.892780277832127, LR: 0.0003 +[2026-03-03 01:41:20] (step=0040341) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 7.89297593425944, LR: 0.0003 +[2026-03-03 01:41:28] (step=0040342) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.893171590686754, LR: 0.0003 +[2026-03-03 01:41:36] (step=0040343) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.8933672471140675, LR: 0.0003 +[2026-03-03 01:41:44] (step=0040344) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 7.8935629035413815, LR: 0.0003 +[2026-03-03 01:41:52] (step=0040345) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.893758559968695, LR: 0.0003 +[2026-03-03 01:41:59] (step=0040346) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.893954216396009, LR: 0.0003 +[2026-03-03 01:42:07] (step=0040347) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.894149872823323, LR: 0.0003 +[2026-03-03 01:42:15] (step=0040348) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.894345529250636, LR: 0.0003 +[2026-03-03 01:42:23] (step=0040349) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 7.89454118567795, LR: 0.0003 +[2026-03-03 01:42:31] (step=0040350) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.894736842105263, LR: 0.0003 +[2026-03-03 01:42:39] (step=0040351) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 7.894932498532577, LR: 0.0003 +[2026-03-03 01:42:47] (step=0040352) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.895128154959891, LR: 0.0003 +[2026-03-03 01:42:54] (step=0040353) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 7.895323811387204, LR: 0.0003 +[2026-03-03 01:43:03] (step=0040354) Train Loss: 0.4422, Train Steps/Sec: 0.12, Epoch: 7.895519467814518, LR: 0.0003 +[2026-03-03 01:43:10] (step=0040355) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.895715124241831, LR: 0.0003 +[2026-03-03 01:43:18] (step=0040356) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 7.895910780669145, LR: 0.0003 +[2026-03-03 01:43:26] (step=0040357) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 7.896106437096458, LR: 0.0003 +[2026-03-03 01:43:34] (step=0040358) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 7.896302093523772, LR: 0.0003 +[2026-03-03 01:43:42] (step=0040359) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 7.896497749951086, LR: 0.0003 +[2026-03-03 01:43:50] (step=0040360) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 7.896693406378399, LR: 0.0003 +[2026-03-03 01:43:58] (step=0040361) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.896889062805713, LR: 0.0003 +[2026-03-03 01:44:06] (step=0040362) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.8970847192330265, LR: 0.0003 +[2026-03-03 01:44:13] (step=0040363) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 7.8972803756603405, LR: 0.0003 +[2026-03-03 01:44:21] (step=0040364) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.8974760320876545, LR: 0.0003 +[2026-03-03 01:44:29] (step=0040365) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.897671688514968, LR: 0.0003 +[2026-03-03 01:44:37] (step=0040366) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.897867344942282, LR: 0.0003 +[2026-03-03 01:44:45] (step=0040367) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.898063001369595, LR: 0.0003 +[2026-03-03 01:44:53] (step=0040368) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 7.898258657796909, LR: 0.0003 +[2026-03-03 01:45:01] (step=0040369) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 7.898454314224222, LR: 0.0003 +[2026-03-03 01:45:08] (step=0040370) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 7.898649970651536, LR: 0.0003 +[2026-03-03 01:45:16] (step=0040371) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.89884562707885, LR: 0.0003 +[2026-03-03 01:45:24] (step=0040372) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 7.899041283506163, LR: 0.0003 +[2026-03-03 01:45:32] (step=0040373) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.899236939933477, LR: 0.0003 +[2026-03-03 01:45:40] (step=0040374) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 7.89943259636079, LR: 0.0003 +[2026-03-03 01:45:48] (step=0040375) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.899628252788104, LR: 0.0003 +[2026-03-03 01:45:56] (step=0040376) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 7.899823909215418, LR: 0.0003 +[2026-03-03 01:46:04] (step=0040377) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 7.900019565642731, LR: 0.0003 +[2026-03-03 01:46:11] (step=0040378) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.900215222070045, LR: 0.0003 +[2026-03-03 01:46:19] (step=0040379) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.900410878497358, LR: 0.0003 +[2026-03-03 01:46:27] (step=0040380) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.900606534924672, LR: 0.0003 +[2026-03-03 01:46:35] (step=0040381) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.9008021913519855, LR: 0.0003 +[2026-03-03 01:46:43] (step=0040382) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 7.9009978477792995, LR: 0.0003 +[2026-03-03 01:46:51] (step=0040383) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 7.9011935042066135, LR: 0.0003 +[2026-03-03 01:46:59] (step=0040384) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.901389160633927, LR: 0.0003 +[2026-03-03 01:47:07] (step=0040385) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.901584817061241, LR: 0.0003 +[2026-03-03 01:47:14] (step=0040386) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.901780473488554, LR: 0.0003 +[2026-03-03 01:47:22] (step=0040387) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.901976129915868, LR: 0.0003 +[2026-03-03 01:47:30] (step=0040388) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.902171786343182, LR: 0.0003 +[2026-03-03 01:47:38] (step=0040389) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.902367442770495, LR: 0.0003 +[2026-03-03 01:47:46] (step=0040390) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.902563099197809, LR: 0.0003 +[2026-03-03 01:47:54] (step=0040391) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 7.902758755625122, LR: 0.0003 +[2026-03-03 01:48:02] (step=0040392) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.902954412052436, LR: 0.0003 +[2026-03-03 01:48:09] (step=0040393) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.90315006847975, LR: 0.0003 +[2026-03-03 01:48:17] (step=0040394) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.903345724907063, LR: 0.0003 +[2026-03-03 01:48:25] (step=0040395) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 7.903541381334377, LR: 0.0003 +[2026-03-03 01:48:33] (step=0040396) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 7.90373703776169, LR: 0.0003 +[2026-03-03 01:48:41] (step=0040397) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 7.903932694189004, LR: 0.0003 +[2026-03-03 01:48:49] (step=0040398) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.904128350616317, LR: 0.0003 +[2026-03-03 01:48:57] (step=0040399) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 7.904324007043631, LR: 0.0003 +[2026-03-03 01:49:05] (step=0040400) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.904519663470945, LR: 0.0003 +[2026-03-03 01:49:12] (step=0040401) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 7.9047153198982585, LR: 0.0003 +[2026-03-03 01:49:20] (step=0040402) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 7.9049109763255725, LR: 0.0003 +[2026-03-03 01:49:28] (step=0040403) Train Loss: 0.4427, Train Steps/Sec: 0.12, Epoch: 7.905106632752886, LR: 0.0003 +[2026-03-03 01:49:36] (step=0040404) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.9053022891802, LR: 0.0003 +[2026-03-03 01:49:44] (step=0040405) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 7.905497945607514, LR: 0.0003 +[2026-03-03 01:49:52] (step=0040406) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.905693602034827, LR: 0.0003 +[2026-03-03 01:50:00] (step=0040407) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.905889258462141, LR: 0.0003 +[2026-03-03 01:50:08] (step=0040408) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 7.906084914889454, LR: 0.0003 +[2026-03-03 01:50:16] (step=0040409) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.906280571316768, LR: 0.0003 +[2026-03-03 01:50:23] (step=0040410) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 7.906476227744081, LR: 0.0003 +[2026-03-03 01:50:31] (step=0040411) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 7.906671884171395, LR: 0.0003 +[2026-03-03 01:50:39] (step=0040412) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.906867540598709, LR: 0.0003 +[2026-03-03 01:50:47] (step=0040413) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.907063197026022, LR: 0.0003 +[2026-03-03 01:50:55] (step=0040414) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.907258853453336, LR: 0.0003 +[2026-03-03 01:51:03] (step=0040415) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.907454509880649, LR: 0.0003 +[2026-03-03 01:51:11] (step=0040416) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 7.907650166307963, LR: 0.0003 +[2026-03-03 01:51:18] (step=0040417) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.907845822735277, LR: 0.0003 +[2026-03-03 01:51:26] (step=0040418) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.90804147916259, LR: 0.0003 +[2026-03-03 01:51:34] (step=0040419) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 7.908237135589904, LR: 0.0003 +[2026-03-03 01:51:42] (step=0040420) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.9084327920172175, LR: 0.0003 +[2026-03-03 01:51:50] (step=0040421) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.9086284484445315, LR: 0.0003 +[2026-03-03 01:51:58] (step=0040422) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.908824104871845, LR: 0.0003 +[2026-03-03 01:52:06] (step=0040423) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.909019761299159, LR: 0.0003 +[2026-03-03 01:52:13] (step=0040424) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 7.909215417726473, LR: 0.0003 +[2026-03-03 01:52:21] (step=0040425) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.909411074153786, LR: 0.0003 +[2026-03-03 01:52:29] (step=0040426) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 7.9096067305811, LR: 0.0003 +[2026-03-03 01:52:37] (step=0040427) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.909802387008413, LR: 0.0003 +[2026-03-03 01:52:45] (step=0040428) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.909998043435727, LR: 0.0003 +[2026-03-03 01:52:53] (step=0040429) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.910193699863041, LR: 0.0003 +[2026-03-03 01:53:01] (step=0040430) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 7.910389356290354, LR: 0.0003 +[2026-03-03 01:53:08] (step=0040431) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.910585012717668, LR: 0.0003 +[2026-03-03 01:53:16] (step=0040432) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.910780669144981, LR: 0.0003 +[2026-03-03 01:53:24] (step=0040433) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.910976325572295, LR: 0.0003 +[2026-03-03 01:53:32] (step=0040434) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.911171981999608, LR: 0.0003 +[2026-03-03 01:53:40] (step=0040435) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.911367638426922, LR: 0.0003 +[2026-03-03 01:53:48] (step=0040436) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.911563294854236, LR: 0.0003 +[2026-03-03 01:53:56] (step=0040437) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 7.911758951281549, LR: 0.0003 +[2026-03-03 01:54:04] (step=0040438) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 7.911954607708863, LR: 0.0003 +[2026-03-03 01:54:11] (step=0040439) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.9121502641361765, LR: 0.0003 +[2026-03-03 01:54:19] (step=0040440) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 7.9123459205634905, LR: 0.0003 +[2026-03-03 01:54:27] (step=0040441) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.9125415769908045, LR: 0.0003 +[2026-03-03 01:54:35] (step=0040442) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 7.912737233418118, LR: 0.0003 +[2026-03-03 01:54:43] (step=0040443) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.912932889845432, LR: 0.0003 +[2026-03-03 01:54:51] (step=0040444) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.913128546272745, LR: 0.0003 +[2026-03-03 01:54:59] (step=0040445) Train Loss: 0.4455, Train Steps/Sec: 0.12, Epoch: 7.913324202700059, LR: 0.0003 +[2026-03-03 01:55:07] (step=0040446) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.913519859127373, LR: 0.0003 +[2026-03-03 01:55:15] (step=0040447) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.913715515554686, LR: 0.0003 +[2026-03-03 01:55:23] (step=0040448) Train Loss: 0.4424, Train Steps/Sec: 0.12, Epoch: 7.913911171982, LR: 0.0003 +[2026-03-03 01:55:31] (step=0040449) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.914106828409313, LR: 0.0003 +[2026-03-03 01:55:38] (step=0040450) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.914302484836627, LR: 0.0003 +[2026-03-03 01:55:46] (step=0040451) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.91449814126394, LR: 0.0003 +[2026-03-03 01:55:54] (step=0040452) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 7.914693797691254, LR: 0.0003 +[2026-03-03 01:56:02] (step=0040453) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.914889454118568, LR: 0.0003 +[2026-03-03 01:56:10] (step=0040454) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.915085110545881, LR: 0.0003 +[2026-03-03 01:56:18] (step=0040455) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.915280766973195, LR: 0.0003 +[2026-03-03 01:56:26] (step=0040456) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.915476423400508, LR: 0.0003 +[2026-03-03 01:56:33] (step=0040457) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.915672079827822, LR: 0.0003 +[2026-03-03 01:56:41] (step=0040458) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.915867736255136, LR: 0.0003 +[2026-03-03 01:56:49] (step=0040459) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 7.9160633926824495, LR: 0.0003 +[2026-03-03 01:56:57] (step=0040460) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 7.9162590491097635, LR: 0.0003 +[2026-03-03 01:57:05] (step=0040461) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.916454705537077, LR: 0.0003 +[2026-03-03 01:57:13] (step=0040462) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.916650361964391, LR: 0.0003 +[2026-03-03 01:57:21] (step=0040463) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.916846018391704, LR: 0.0003 +[2026-03-03 01:57:29] (step=0040464) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.917041674819018, LR: 0.0003 +[2026-03-03 01:57:36] (step=0040465) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 7.917237331246332, LR: 0.0003 +[2026-03-03 01:57:44] (step=0040466) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.917432987673645, LR: 0.0003 +[2026-03-03 01:57:52] (step=0040467) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.917628644100959, LR: 0.0003 +[2026-03-03 01:58:00] (step=0040468) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.917824300528272, LR: 0.0003 +[2026-03-03 01:58:08] (step=0040469) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.918019956955586, LR: 0.0003 +[2026-03-03 01:58:16] (step=0040470) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.9182156133829, LR: 0.0003 +[2026-03-03 01:58:24] (step=0040471) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 7.918411269810213, LR: 0.0003 +[2026-03-03 01:58:31] (step=0040472) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.918606926237527, LR: 0.0003 +[2026-03-03 01:58:39] (step=0040473) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.91880258266484, LR: 0.0003 +[2026-03-03 01:58:47] (step=0040474) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.918998239092154, LR: 0.0003 +[2026-03-03 01:58:55] (step=0040475) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.919193895519467, LR: 0.0003 +[2026-03-03 01:59:03] (step=0040476) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 7.919389551946781, LR: 0.0003 +[2026-03-03 01:59:11] (step=0040477) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.919585208374095, LR: 0.0003 +[2026-03-03 01:59:19] (step=0040478) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 7.9197808648014085, LR: 0.0003 +[2026-03-03 01:59:27] (step=0040479) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.9199765212287225, LR: 0.0003 +[2026-03-03 01:59:34] (step=0040480) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.920172177656036, LR: 0.0003 +[2026-03-03 01:59:42] (step=0040481) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.92036783408335, LR: 0.0003 +[2026-03-03 01:59:50] (step=0040482) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 7.920563490510664, LR: 0.0003 +[2026-03-03 01:59:58] (step=0040483) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.920759146937977, LR: 0.0003 +[2026-03-03 02:00:06] (step=0040484) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.920954803365291, LR: 0.0003 +[2026-03-03 02:00:14] (step=0040485) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.921150459792604, LR: 0.0003 +[2026-03-03 02:00:22] (step=0040486) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.921346116219918, LR: 0.0003 +[2026-03-03 02:00:29] (step=0040487) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 7.921541772647231, LR: 0.0003 +[2026-03-03 02:00:37] (step=0040488) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.921737429074545, LR: 0.0003 +[2026-03-03 02:00:45] (step=0040489) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.921933085501859, LR: 0.0003 +[2026-03-03 02:00:53] (step=0040490) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 7.922128741929172, LR: 0.0003 +[2026-03-03 02:01:01] (step=0040491) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 7.922324398356486, LR: 0.0003 +[2026-03-03 02:01:09] (step=0040492) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 7.922520054783799, LR: 0.0003 +[2026-03-03 02:01:17] (step=0040493) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.922715711211113, LR: 0.0003 +[2026-03-03 02:01:24] (step=0040494) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 7.922911367638427, LR: 0.0003 +[2026-03-03 02:01:33] (step=0040495) Train Loss: 0.4490, Train Steps/Sec: 0.12, Epoch: 7.92310702406574, LR: 0.0003 +[2026-03-03 02:01:40] (step=0040496) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 7.923302680493054, LR: 0.0003 +[2026-03-03 02:01:48] (step=0040497) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.9234983369203675, LR: 0.0003 +[2026-03-03 02:01:56] (step=0040498) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.9236939933476815, LR: 0.0003 +[2026-03-03 02:02:04] (step=0040499) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 7.9238896497749955, LR: 0.0003 +[2026-03-03 02:02:12] (step=0040500) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.924085306202309, LR: 0.0003 +[2026-03-03 02:02:12] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0040500/ +[2026-03-03 02:02:20] (step=0040501) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 7.924280962629623, LR: 0.0003 +[2026-03-03 02:02:28] (step=0040502) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 7.924476619056936, LR: 0.0003 +[2026-03-03 02:02:35] (step=0040503) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.92467227548425, LR: 0.0003 +[2026-03-03 02:02:44] (step=0040504) Train Loss: 0.4471, Train Steps/Sec: 0.12, Epoch: 7.924867931911563, LR: 0.0003 +[2026-03-03 02:02:51] (step=0040505) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.925063588338877, LR: 0.0003 +[2026-03-03 02:02:59] (step=0040506) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.925259244766191, LR: 0.0003 +[2026-03-03 02:03:07] (step=0040507) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.925454901193504, LR: 0.0003 +[2026-03-03 02:03:15] (step=0040508) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.925650557620818, LR: 0.0003 +[2026-03-03 02:03:23] (step=0040509) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.925846214048131, LR: 0.0003 +[2026-03-03 02:03:31] (step=0040510) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.926041870475445, LR: 0.0003 +[2026-03-03 02:03:39] (step=0040511) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.926237526902759, LR: 0.0003 +[2026-03-03 02:03:46] (step=0040512) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.926433183330072, LR: 0.0003 +[2026-03-03 02:03:54] (step=0040513) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 7.926628839757386, LR: 0.0003 +[2026-03-03 02:04:02] (step=0040514) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 7.926824496184699, LR: 0.0003 +[2026-03-03 02:04:10] (step=0040515) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.927020152612013, LR: 0.0003 +[2026-03-03 02:04:18] (step=0040516) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 7.9272158090393265, LR: 0.0003 +[2026-03-03 02:04:26] (step=0040517) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.9274114654666405, LR: 0.0003 +[2026-03-03 02:04:34] (step=0040518) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 7.927607121893955, LR: 0.0003 +[2026-03-03 02:04:41] (step=0040519) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 7.927802778321268, LR: 0.0003 +[2026-03-03 02:04:49] (step=0040520) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 7.927998434748582, LR: 0.0003 +[2026-03-03 02:04:57] (step=0040521) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.928194091175895, LR: 0.0003 +[2026-03-03 02:05:05] (step=0040522) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.928389747603209, LR: 0.0003 +[2026-03-03 02:05:13] (step=0040523) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.928585404030523, LR: 0.0003 +[2026-03-03 02:05:21] (step=0040524) Train Loss: 0.4249, Train Steps/Sec: 0.13, Epoch: 7.928781060457836, LR: 0.0003 +[2026-03-03 02:05:29] (step=0040525) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.92897671688515, LR: 0.0003 +[2026-03-03 02:05:36] (step=0040526) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.929172373312463, LR: 0.0003 +[2026-03-03 02:05:44] (step=0040527) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 7.929368029739777, LR: 0.0003 +[2026-03-03 02:05:52] (step=0040528) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 7.92956368616709, LR: 0.0003 +[2026-03-03 02:06:00] (step=0040529) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 7.929759342594404, LR: 0.0003 +[2026-03-03 02:06:08] (step=0040530) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 7.929954999021718, LR: 0.0003 +[2026-03-03 02:06:16] (step=0040531) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 7.930150655449031, LR: 0.0003 +[2026-03-03 02:06:24] (step=0040532) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.930346311876345, LR: 0.0003 +[2026-03-03 02:06:31] (step=0040533) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.930541968303658, LR: 0.0003 +[2026-03-03 02:06:39] (step=0040534) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 7.930737624730972, LR: 0.0003 +[2026-03-03 02:06:47] (step=0040535) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.930933281158286, LR: 0.0003 +[2026-03-03 02:06:55] (step=0040536) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 7.9311289375856, LR: 0.0003 +[2026-03-03 02:07:03] (step=0040537) Train Loss: 0.4374, Train Steps/Sec: 0.12, Epoch: 7.931324594012914, LR: 0.0003 +[2026-03-03 02:07:11] (step=0040538) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.931520250440227, LR: 0.0003 +[2026-03-03 02:07:19] (step=0040539) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 7.931715906867541, LR: 0.0003 +[2026-03-03 02:07:27] (step=0040540) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 7.931911563294854, LR: 0.0003 +[2026-03-03 02:07:35] (step=0040541) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.932107219722168, LR: 0.0003 +[2026-03-03 02:07:42] (step=0040542) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.932302876149482, LR: 0.0003 +[2026-03-03 02:07:50] (step=0040543) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 7.932498532576795, LR: 0.0003 +[2026-03-03 02:07:58] (step=0040544) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.932694189004109, LR: 0.0003 +[2026-03-03 02:08:06] (step=0040545) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 7.932889845431422, LR: 0.0003 +[2026-03-03 02:08:14] (step=0040546) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 7.933085501858736, LR: 0.0003 +[2026-03-03 02:08:22] (step=0040547) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 7.93328115828605, LR: 0.0003 +[2026-03-03 02:08:30] (step=0040548) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.933476814713363, LR: 0.0003 +[2026-03-03 02:08:37] (step=0040549) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 7.933672471140677, LR: 0.0003 +[2026-03-03 02:08:45] (step=0040550) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 7.93386812756799, LR: 0.0003 +[2026-03-03 02:08:53] (step=0040551) Train Loss: 0.4541, Train Steps/Sec: 0.12, Epoch: 7.934063783995304, LR: 0.0003 +[2026-03-03 02:09:01] (step=0040552) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.934259440422618, LR: 0.0003 +[2026-03-03 02:09:09] (step=0040553) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 7.934455096849931, LR: 0.0003 +[2026-03-03 02:09:17] (step=0040554) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.9346507532772454, LR: 0.0003 +[2026-03-03 02:09:25] (step=0040555) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.934846409704559, LR: 0.0003 +[2026-03-03 02:09:33] (step=0040556) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.935042066131873, LR: 0.0003 +[2026-03-03 02:09:41] (step=0040557) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 7.935237722559186, LR: 0.0003 +[2026-03-03 02:09:48] (step=0040558) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 7.9354333789865, LR: 0.0003 +[2026-03-03 02:09:56] (step=0040559) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 7.935629035413814, LR: 0.0003 +[2026-03-03 02:10:04] (step=0040560) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 7.935824691841127, LR: 0.0003 +[2026-03-03 02:10:12] (step=0040561) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 7.936020348268441, LR: 0.0003 +[2026-03-03 02:10:20] (step=0040562) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 7.936216004695754, LR: 0.0003 +[2026-03-03 02:10:28] (step=0040563) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 7.936411661123068, LR: 0.0003 +[2026-03-03 02:10:36] (step=0040564) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.936607317550382, LR: 0.0003 +[2026-03-03 02:10:43] (step=0040565) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.936802973977695, LR: 0.0003 +[2026-03-03 02:10:51] (step=0040566) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.936998630405009, LR: 0.0003 +[2026-03-03 02:10:59] (step=0040567) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 7.937194286832322, LR: 0.0003 +[2026-03-03 02:11:07] (step=0040568) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 7.937389943259636, LR: 0.0003 +[2026-03-03 02:11:15] (step=0040569) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 7.937585599686949, LR: 0.0003 +[2026-03-03 02:11:23] (step=0040570) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 7.937781256114263, LR: 0.0003 +[2026-03-03 02:11:31] (step=0040571) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.937976912541577, LR: 0.0003 +[2026-03-03 02:11:38] (step=0040572) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 7.9381725689688905, LR: 0.0003 +[2026-03-03 02:11:46] (step=0040573) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 7.9383682253962045, LR: 0.0003 +[2026-03-03 02:11:54] (step=0040574) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 7.938563881823518, LR: 0.0003 +[2026-03-03 02:12:02] (step=0040575) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.938759538250832, LR: 0.0003 +[2026-03-03 02:12:10] (step=0040576) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 7.938955194678146, LR: 0.0003 +[2026-03-03 02:12:18] (step=0040577) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.939150851105459, LR: 0.0003 +[2026-03-03 02:12:26] (step=0040578) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 7.939346507532773, LR: 0.0003 +[2026-03-03 02:12:33] (step=0040579) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.939542163960086, LR: 0.0003 +[2026-03-03 02:12:41] (step=0040580) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 7.9397378203874, LR: 0.0003 +[2026-03-03 02:12:49] (step=0040581) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.939933476814713, LR: 0.0003 +[2026-03-03 02:12:57] (step=0040582) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.940129133242027, LR: 0.0003 +[2026-03-03 02:13:05] (step=0040583) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.940324789669341, LR: 0.0003 +[2026-03-03 02:13:13] (step=0040584) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.940520446096654, LR: 0.0003 +[2026-03-03 02:13:21] (step=0040585) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 7.940716102523968, LR: 0.0003 +[2026-03-03 02:13:29] (step=0040586) Train Loss: 0.4459, Train Steps/Sec: 0.12, Epoch: 7.940911758951281, LR: 0.0003 +[2026-03-03 02:13:37] (step=0040587) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.941107415378595, LR: 0.0003 +[2026-03-03 02:13:44] (step=0040588) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 7.941303071805909, LR: 0.0003 +[2026-03-03 02:13:52] (step=0040589) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.941498728233222, LR: 0.0003 +[2026-03-03 02:14:00] (step=0040590) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.941694384660536, LR: 0.0003 +[2026-03-03 02:14:08] (step=0040591) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.9418900410878495, LR: 0.0003 +[2026-03-03 02:14:16] (step=0040592) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.9420856975151635, LR: 0.0003 +[2026-03-03 02:14:24] (step=0040593) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 7.942281353942477, LR: 0.0003 +[2026-03-03 02:14:32] (step=0040594) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.942477010369791, LR: 0.0003 +[2026-03-03 02:14:39] (step=0040595) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.942672666797105, LR: 0.0003 +[2026-03-03 02:14:47] (step=0040596) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.942868323224418, LR: 0.0003 +[2026-03-03 02:14:55] (step=0040597) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.943063979651732, LR: 0.0003 +[2026-03-03 02:15:03] (step=0040598) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 7.943259636079045, LR: 0.0003 +[2026-03-03 02:15:11] (step=0040599) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 7.943455292506359, LR: 0.0003 +[2026-03-03 02:15:19] (step=0040600) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 7.943650948933673, LR: 0.0003 +[2026-03-03 02:15:27] (step=0040601) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 7.943846605360986, LR: 0.0003 +[2026-03-03 02:15:35] (step=0040602) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.9440422617883, LR: 0.0003 +[2026-03-03 02:15:42] (step=0040603) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 7.944237918215613, LR: 0.0003 +[2026-03-03 02:15:50] (step=0040604) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.944433574642927, LR: 0.0003 +[2026-03-03 02:15:58] (step=0040605) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 7.94462923107024, LR: 0.0003 +[2026-03-03 02:16:06] (step=0040606) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 7.944824887497554, LR: 0.0003 +[2026-03-03 02:16:14] (step=0040607) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.945020543924868, LR: 0.0003 +[2026-03-03 02:16:22] (step=0040608) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 7.945216200352181, LR: 0.0003 +[2026-03-03 02:16:30] (step=0040609) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.945411856779495, LR: 0.0003 +[2026-03-03 02:16:37] (step=0040610) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 7.9456075132068085, LR: 0.0003 +[2026-03-03 02:16:45] (step=0040611) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 7.9458031696341225, LR: 0.0003 +[2026-03-03 02:16:53] (step=0040612) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 7.9459988260614365, LR: 0.0003 +[2026-03-03 02:17:01] (step=0040613) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.94619448248875, LR: 0.0003 +[2026-03-03 02:17:09] (step=0040614) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.946390138916064, LR: 0.0003 +[2026-03-03 02:17:17] (step=0040615) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.946585795343377, LR: 0.0003 +[2026-03-03 02:17:25] (step=0040616) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 7.946781451770691, LR: 0.0003 +[2026-03-03 02:17:32] (step=0040617) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.946977108198005, LR: 0.0003 +[2026-03-03 02:17:40] (step=0040618) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 7.947172764625318, LR: 0.0003 +[2026-03-03 02:17:48] (step=0040619) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 7.947368421052632, LR: 0.0003 +[2026-03-03 02:17:56] (step=0040620) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 7.947564077479945, LR: 0.0003 +[2026-03-03 02:18:04] (step=0040621) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 7.947759733907259, LR: 0.0003 +[2026-03-03 02:18:12] (step=0040622) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.947955390334572, LR: 0.0003 +[2026-03-03 02:18:20] (step=0040623) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.948151046761886, LR: 0.0003 +[2026-03-03 02:18:27] (step=0040624) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.9483467031892, LR: 0.0003 +[2026-03-03 02:18:35] (step=0040625) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 7.948542359616513, LR: 0.0003 +[2026-03-03 02:18:43] (step=0040626) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 7.948738016043827, LR: 0.0003 +[2026-03-03 02:18:51] (step=0040627) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 7.94893367247114, LR: 0.0003 +[2026-03-03 02:18:59] (step=0040628) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.949129328898454, LR: 0.0003 +[2026-03-03 02:19:07] (step=0040629) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.949324985325768, LR: 0.0003 +[2026-03-03 02:19:15] (step=0040630) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.9495206417530815, LR: 0.0003 +[2026-03-03 02:19:22] (step=0040631) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.9497162981803955, LR: 0.0003 +[2026-03-03 02:19:30] (step=0040632) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 7.949911954607709, LR: 0.0003 +[2026-03-03 02:19:38] (step=0040633) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.950107611035023, LR: 0.0003 +[2026-03-03 02:19:46] (step=0040634) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 7.950303267462336, LR: 0.0003 +[2026-03-03 02:19:54] (step=0040635) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 7.95049892388965, LR: 0.0003 +[2026-03-03 02:20:02] (step=0040636) Train Loss: 0.4427, Train Steps/Sec: 0.12, Epoch: 7.950694580316964, LR: 0.0003 +[2026-03-03 02:20:10] (step=0040637) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.950890236744277, LR: 0.0003 +[2026-03-03 02:20:18] (step=0040638) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.951085893171591, LR: 0.0003 +[2026-03-03 02:20:26] (step=0040639) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 7.951281549598904, LR: 0.0003 +[2026-03-03 02:20:34] (step=0040640) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 7.951477206026218, LR: 0.0003 +[2026-03-03 02:20:41] (step=0040641) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 7.951672862453532, LR: 0.0003 +[2026-03-03 02:20:49] (step=0040642) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 7.951868518880845, LR: 0.0003 +[2026-03-03 02:20:57] (step=0040643) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.952064175308159, LR: 0.0003 +[2026-03-03 02:21:05] (step=0040644) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.952259831735472, LR: 0.0003 +[2026-03-03 02:21:13] (step=0040645) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.952455488162786, LR: 0.0003 +[2026-03-03 02:21:21] (step=0040646) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 7.952651144590099, LR: 0.0003 +[2026-03-03 02:21:29] (step=0040647) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.952846801017413, LR: 0.0003 +[2026-03-03 02:21:36] (step=0040648) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.953042457444727, LR: 0.0003 +[2026-03-03 02:21:45] (step=0040649) Train Loss: 0.4381, Train Steps/Sec: 0.12, Epoch: 7.9532381138720405, LR: 0.0003 +[2026-03-03 02:21:52] (step=0040650) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.9534337702993545, LR: 0.0003 +[2026-03-03 02:22:00] (step=0040651) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 7.953629426726668, LR: 0.0003 +[2026-03-03 02:22:08] (step=0040652) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.953825083153982, LR: 0.0003 +[2026-03-03 02:22:16] (step=0040653) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 7.954020739581296, LR: 0.0003 +[2026-03-03 02:22:24] (step=0040654) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.954216396008609, LR: 0.0003 +[2026-03-03 02:22:32] (step=0040655) Train Loss: 0.4268, Train Steps/Sec: 0.13, Epoch: 7.954412052435923, LR: 0.0003 +[2026-03-03 02:22:40] (step=0040656) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 7.954607708863236, LR: 0.0003 +[2026-03-03 02:22:48] (step=0040657) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.95480336529055, LR: 0.0003 +[2026-03-03 02:22:55] (step=0040658) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 7.954999021717863, LR: 0.0003 +[2026-03-03 02:23:03] (step=0040659) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.955194678145177, LR: 0.0003 +[2026-03-03 02:23:11] (step=0040660) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.955390334572491, LR: 0.0003 +[2026-03-03 02:23:19] (step=0040661) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 7.955585990999804, LR: 0.0003 +[2026-03-03 02:23:27] (step=0040662) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 7.955781647427118, LR: 0.0003 +[2026-03-03 02:23:35] (step=0040663) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 7.955977303854431, LR: 0.0003 +[2026-03-03 02:23:43] (step=0040664) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 7.956172960281745, LR: 0.0003 +[2026-03-03 02:23:50] (step=0040665) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 7.956368616709059, LR: 0.0003 +[2026-03-03 02:23:58] (step=0040666) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.956564273136372, LR: 0.0003 +[2026-03-03 02:24:06] (step=0040667) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.956759929563686, LR: 0.0003 +[2026-03-03 02:24:14] (step=0040668) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 7.9569555859909995, LR: 0.0003 +[2026-03-03 02:24:22] (step=0040669) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.9571512424183135, LR: 0.0003 +[2026-03-03 02:24:30] (step=0040670) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 7.9573468988456275, LR: 0.0003 +[2026-03-03 02:24:38] (step=0040671) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 7.957542555272941, LR: 0.0003 +[2026-03-03 02:24:45] (step=0040672) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 7.957738211700255, LR: 0.0003 +[2026-03-03 02:24:53] (step=0040673) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.957933868127568, LR: 0.0003 +[2026-03-03 02:25:01] (step=0040674) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 7.958129524554882, LR: 0.0003 +[2026-03-03 02:25:09] (step=0040675) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.958325180982195, LR: 0.0003 +[2026-03-03 02:25:17] (step=0040676) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.958520837409509, LR: 0.0003 +[2026-03-03 02:25:25] (step=0040677) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.958716493836823, LR: 0.0003 +[2026-03-03 02:25:33] (step=0040678) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 7.958912150264136, LR: 0.0003 +[2026-03-03 02:25:41] (step=0040679) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 7.95910780669145, LR: 0.0003 +[2026-03-03 02:25:48] (step=0040680) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 7.959303463118763, LR: 0.0003 +[2026-03-03 02:25:56] (step=0040681) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 7.959499119546077, LR: 0.0003 +[2026-03-03 02:26:04] (step=0040682) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 7.959694775973391, LR: 0.0003 +[2026-03-03 02:26:12] (step=0040683) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 7.959890432400704, LR: 0.0003 +[2026-03-03 02:26:20] (step=0040684) Train Loss: 0.4541, Train Steps/Sec: 0.12, Epoch: 7.960086088828018, LR: 0.0003 +[2026-03-03 02:26:28] (step=0040685) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 7.960281745255331, LR: 0.0003 +[2026-03-03 02:26:36] (step=0040686) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.960477401682645, LR: 0.0003 +[2026-03-03 02:26:44] (step=0040687) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.9606730581099585, LR: 0.0003 +[2026-03-03 02:26:51] (step=0040688) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.9608687145372725, LR: 0.0003 +[2026-03-03 02:26:59] (step=0040689) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 7.9610643709645865, LR: 0.0003 +[2026-03-03 02:27:07] (step=0040690) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.9612600273919, LR: 0.0003 +[2026-03-03 02:27:15] (step=0040691) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.961455683819214, LR: 0.0003 +[2026-03-03 02:27:23] (step=0040692) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.961651340246527, LR: 0.0003 +[2026-03-03 02:27:31] (step=0040693) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 7.961846996673841, LR: 0.0003 +[2026-03-03 02:27:39] (step=0040694) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 7.962042653101155, LR: 0.0003 +[2026-03-03 02:27:47] (step=0040695) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.962238309528468, LR: 0.0003 +[2026-03-03 02:27:54] (step=0040696) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.962433965955782, LR: 0.0003 +[2026-03-03 02:28:02] (step=0040697) Train Loss: 0.4374, Train Steps/Sec: 0.12, Epoch: 7.962629622383095, LR: 0.0003 +[2026-03-03 02:28:10] (step=0040698) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 7.962825278810409, LR: 0.0003 +[2026-03-03 02:28:18] (step=0040699) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 7.963020935237722, LR: 0.0003 +[2026-03-03 02:28:26] (step=0040700) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.963216591665036, LR: 0.0003 +[2026-03-03 02:28:34] (step=0040701) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.96341224809235, LR: 0.0003 +[2026-03-03 02:28:42] (step=0040702) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 7.963607904519663, LR: 0.0003 +[2026-03-03 02:28:50] (step=0040703) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 7.963803560946977, LR: 0.0003 +[2026-03-03 02:28:57] (step=0040704) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.96399921737429, LR: 0.0003 +[2026-03-03 02:29:05] (step=0040705) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 7.964194873801604, LR: 0.0003 +[2026-03-03 02:29:13] (step=0040706) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.964390530228918, LR: 0.0003 +[2026-03-03 02:29:21] (step=0040707) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 7.9645861866562315, LR: 0.0003 +[2026-03-03 02:29:29] (step=0040708) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.9647818430835455, LR: 0.0003 +[2026-03-03 02:29:37] (step=0040709) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 7.964977499510859, LR: 0.0003 +[2026-03-03 02:29:45] (step=0040710) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.965173155938173, LR: 0.0003 +[2026-03-03 02:29:52] (step=0040711) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 7.965368812365486, LR: 0.0003 +[2026-03-03 02:30:00] (step=0040712) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 7.9655644687928, LR: 0.0003 +[2026-03-03 02:30:08] (step=0040713) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 7.965760125220114, LR: 0.0003 +[2026-03-03 02:30:16] (step=0040714) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.965955781647427, LR: 0.0003 +[2026-03-03 02:30:24] (step=0040715) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.966151438074741, LR: 0.0003 +[2026-03-03 02:30:32] (step=0040716) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.966347094502054, LR: 0.0003 +[2026-03-03 02:30:40] (step=0040717) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 7.966542750929368, LR: 0.0003 +[2026-03-03 02:30:47] (step=0040718) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.966738407356682, LR: 0.0003 +[2026-03-03 02:30:55] (step=0040719) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.966934063783995, LR: 0.0003 +[2026-03-03 02:31:03] (step=0040720) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 7.967129720211309, LR: 0.0003 +[2026-03-03 02:31:11] (step=0040721) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 7.967325376638622, LR: 0.0003 +[2026-03-03 02:31:19] (step=0040722) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.967521033065936, LR: 0.0003 +[2026-03-03 02:31:27] (step=0040723) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.96771668949325, LR: 0.0003 +[2026-03-03 02:31:35] (step=0040724) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 7.967912345920563, LR: 0.0003 +[2026-03-03 02:31:43] (step=0040725) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 7.968108002347877, LR: 0.0003 +[2026-03-03 02:31:51] (step=0040726) Train Loss: 0.4484, Train Steps/Sec: 0.12, Epoch: 7.9683036587751905, LR: 0.0003 +[2026-03-03 02:31:58] (step=0040727) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 7.9684993152025045, LR: 0.0003 +[2026-03-03 02:32:06] (step=0040728) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.968694971629818, LR: 0.0003 +[2026-03-03 02:32:14] (step=0040729) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.968890628057132, LR: 0.0003 +[2026-03-03 02:32:22] (step=0040730) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.969086284484446, LR: 0.0003 +[2026-03-03 02:32:30] (step=0040731) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 7.969281940911759, LR: 0.0003 +[2026-03-03 02:32:38] (step=0040732) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 7.969477597339073, LR: 0.0003 +[2026-03-03 02:32:46] (step=0040733) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.969673253766386, LR: 0.0003 +[2026-03-03 02:32:53] (step=0040734) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.9698689101937, LR: 0.0003 +[2026-03-03 02:33:01] (step=0040735) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.970064566621014, LR: 0.0003 +[2026-03-03 02:33:09] (step=0040736) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 7.970260223048327, LR: 0.0003 +[2026-03-03 02:33:17] (step=0040737) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 7.970455879475641, LR: 0.0003 +[2026-03-03 02:33:25] (step=0040738) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.970651535902954, LR: 0.0003 +[2026-03-03 02:33:33] (step=0040739) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.970847192330268, LR: 0.0003 +[2026-03-03 02:33:41] (step=0040740) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 7.971042848757581, LR: 0.0003 +[2026-03-03 02:33:49] (step=0040741) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.971238505184895, LR: 0.0003 +[2026-03-03 02:33:56] (step=0040742) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.971434161612209, LR: 0.0003 +[2026-03-03 02:34:04] (step=0040743) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 7.971629818039522, LR: 0.0003 +[2026-03-03 02:34:12] (step=0040744) Train Loss: 0.4369, Train Steps/Sec: 0.12, Epoch: 7.971825474466836, LR: 0.0003 +[2026-03-03 02:34:20] (step=0040745) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.9720211308941495, LR: 0.0003 +[2026-03-03 02:34:28] (step=0040746) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.9722167873214635, LR: 0.0003 +[2026-03-03 02:34:36] (step=0040747) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 7.9724124437487776, LR: 0.0003 +[2026-03-03 02:34:44] (step=0040748) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.972608100176091, LR: 0.0003 +[2026-03-03 02:34:52] (step=0040749) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 7.972803756603405, LR: 0.0003 +[2026-03-03 02:35:00] (step=0040750) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 7.972999413030718, LR: 0.0003 +[2026-03-03 02:35:07] (step=0040751) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 7.973195069458032, LR: 0.0003 +[2026-03-03 02:35:15] (step=0040752) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.973390725885345, LR: 0.0003 +[2026-03-03 02:35:23] (step=0040753) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 7.973586382312659, LR: 0.0003 +[2026-03-03 02:35:31] (step=0040754) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 7.973782038739973, LR: 0.0003 +[2026-03-03 02:35:39] (step=0040755) Train Loss: 0.4249, Train Steps/Sec: 0.13, Epoch: 7.973977695167286, LR: 0.0003 +[2026-03-03 02:35:47] (step=0040756) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 7.9741733515946, LR: 0.0003 +[2026-03-03 02:35:55] (step=0040757) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 7.974369008021913, LR: 0.0003 +[2026-03-03 02:36:02] (step=0040758) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.974564664449227, LR: 0.0003 +[2026-03-03 02:36:10] (step=0040759) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.974760320876541, LR: 0.0003 +[2026-03-03 02:36:18] (step=0040760) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.974955977303854, LR: 0.0003 +[2026-03-03 02:36:26] (step=0040761) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 7.975151633731168, LR: 0.0003 +[2026-03-03 02:36:34] (step=0040762) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 7.975347290158481, LR: 0.0003 +[2026-03-03 02:36:42] (step=0040763) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 7.975542946585795, LR: 0.0003 +[2026-03-03 02:36:50] (step=0040764) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 7.9757386030131086, LR: 0.0003 +[2026-03-03 02:36:58] (step=0040765) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 7.975934259440423, LR: 0.0003 +[2026-03-03 02:37:05] (step=0040766) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 7.976129915867737, LR: 0.0003 +[2026-03-03 02:37:13] (step=0040767) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.97632557229505, LR: 0.0003 +[2026-03-03 02:37:21] (step=0040768) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 7.976521228722364, LR: 0.0003 +[2026-03-03 02:37:29] (step=0040769) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.976716885149677, LR: 0.0003 +[2026-03-03 02:37:37] (step=0040770) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 7.976912541576991, LR: 0.0003 +[2026-03-03 02:37:45] (step=0040771) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 7.977108198004305, LR: 0.0003 +[2026-03-03 02:37:53] (step=0040772) Train Loss: 0.4467, Train Steps/Sec: 0.12, Epoch: 7.977303854431618, LR: 0.0003 +[2026-03-03 02:38:01] (step=0040773) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.977499510858932, LR: 0.0003 +[2026-03-03 02:38:09] (step=0040774) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 7.977695167286245, LR: 0.0003 +[2026-03-03 02:38:16] (step=0040775) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 7.977890823713559, LR: 0.0003 +[2026-03-03 02:38:24] (step=0040776) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 7.978086480140873, LR: 0.0003 +[2026-03-03 02:38:32] (step=0040777) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.978282136568186, LR: 0.0003 +[2026-03-03 02:38:40] (step=0040778) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 7.9784777929955, LR: 0.0003 +[2026-03-03 02:38:48] (step=0040779) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 7.978673449422813, LR: 0.0003 +[2026-03-03 02:38:56] (step=0040780) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 7.978869105850127, LR: 0.0003 +[2026-03-03 02:39:04] (step=0040781) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 7.97906476227744, LR: 0.0003 +[2026-03-03 02:39:11] (step=0040782) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.979260418704754, LR: 0.0003 +[2026-03-03 02:39:19] (step=0040783) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.9794560751320684, LR: 0.0003 +[2026-03-03 02:39:27] (step=0040784) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 7.979651731559382, LR: 0.0003 +[2026-03-03 02:39:35] (step=0040785) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 7.979847387986696, LR: 0.0003 +[2026-03-03 02:39:43] (step=0040786) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.980043044414009, LR: 0.0003 +[2026-03-03 02:39:51] (step=0040787) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 7.980238700841323, LR: 0.0003 +[2026-03-03 02:39:59] (step=0040788) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 7.980434357268637, LR: 0.0003 +[2026-03-03 02:40:06] (step=0040789) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 7.98063001369595, LR: 0.0003 +[2026-03-03 02:40:14] (step=0040790) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 7.980825670123264, LR: 0.0003 +[2026-03-03 02:40:22] (step=0040791) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 7.981021326550577, LR: 0.0003 +[2026-03-03 02:40:30] (step=0040792) Train Loss: 0.4433, Train Steps/Sec: 0.12, Epoch: 7.981216982977891, LR: 0.0003 +[2026-03-03 02:40:38] (step=0040793) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 7.981412639405204, LR: 0.0003 +[2026-03-03 02:40:46] (step=0040794) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 7.981608295832518, LR: 0.0003 +[2026-03-03 02:40:54] (step=0040795) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.981803952259832, LR: 0.0003 +[2026-03-03 02:41:02] (step=0040796) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 7.981999608687145, LR: 0.0003 +[2026-03-03 02:41:10] (step=0040797) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 7.982195265114459, LR: 0.0003 +[2026-03-03 02:41:17] (step=0040798) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.982390921541772, LR: 0.0003 +[2026-03-03 02:41:25] (step=0040799) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 7.982586577969086, LR: 0.0003 +[2026-03-03 02:41:33] (step=0040800) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.9827822343964, LR: 0.0003 +[2026-03-03 02:41:41] (step=0040801) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 7.9829778908237135, LR: 0.0003 +[2026-03-03 02:41:49] (step=0040802) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 7.9831735472510275, LR: 0.0003 +[2026-03-03 02:41:57] (step=0040803) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 7.983369203678341, LR: 0.0003 +[2026-03-03 02:42:05] (step=0040804) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 7.983564860105655, LR: 0.0003 +[2026-03-03 02:42:13] (step=0040805) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 7.983760516532968, LR: 0.0003 +[2026-03-03 02:42:21] (step=0040806) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 7.983956172960282, LR: 0.0003 +[2026-03-03 02:42:28] (step=0040807) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 7.984151829387596, LR: 0.0003 +[2026-03-03 02:42:36] (step=0040808) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 7.984347485814909, LR: 0.0003 +[2026-03-03 02:42:44] (step=0040809) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 7.984543142242223, LR: 0.0003 +[2026-03-03 02:42:52] (step=0040810) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 7.984738798669536, LR: 0.0003 +[2026-03-03 02:43:00] (step=0040811) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 7.98493445509685, LR: 0.0003 +[2026-03-03 02:43:08] (step=0040812) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 7.985130111524164, LR: 0.0003 +[2026-03-03 02:43:16] (step=0040813) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.985325767951477, LR: 0.0003 +[2026-03-03 02:43:24] (step=0040814) Train Loss: 0.4358, Train Steps/Sec: 0.12, Epoch: 7.985521424378791, LR: 0.0003 +[2026-03-03 02:43:32] (step=0040815) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 7.985717080806104, LR: 0.0003 +[2026-03-03 02:43:39] (step=0040816) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 7.985912737233418, LR: 0.0003 +[2026-03-03 02:43:47] (step=0040817) Train Loss: 0.4312, Train Steps/Sec: 0.12, Epoch: 7.986108393660731, LR: 0.0003 +[2026-03-03 02:43:55] (step=0040818) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 7.986304050088045, LR: 0.0003 +[2026-03-03 02:44:03] (step=0040819) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 7.986499706515359, LR: 0.0003 +[2026-03-03 02:44:11] (step=0040820) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 7.9866953629426725, LR: 0.0003 +[2026-03-03 02:44:19] (step=0040821) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 7.9868910193699865, LR: 0.0003 +[2026-03-03 02:44:27] (step=0040822) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 7.9870866757973, LR: 0.0003 +[2026-03-03 02:44:35] (step=0040823) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.987282332224614, LR: 0.0003 +[2026-03-03 02:44:43] (step=0040824) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 7.987477988651928, LR: 0.0003 +[2026-03-03 02:44:50] (step=0040825) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.987673645079241, LR: 0.0003 +[2026-03-03 02:44:58] (step=0040826) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 7.987869301506555, LR: 0.0003 +[2026-03-03 02:45:06] (step=0040827) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.988064957933868, LR: 0.0003 +[2026-03-03 02:45:14] (step=0040828) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 7.988260614361182, LR: 0.0003 +[2026-03-03 02:45:22] (step=0040829) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 7.988456270788495, LR: 0.0003 +[2026-03-03 02:45:30] (step=0040830) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.988651927215809, LR: 0.0003 +[2026-03-03 02:45:38] (step=0040831) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 7.988847583643123, LR: 0.0003 +[2026-03-03 02:45:45] (step=0040832) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 7.989043240070436, LR: 0.0003 +[2026-03-03 02:45:53] (step=0040833) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 7.98923889649775, LR: 0.0003 +[2026-03-03 02:46:01] (step=0040834) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 7.989434552925063, LR: 0.0003 +[2026-03-03 02:46:09] (step=0040835) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 7.989630209352377, LR: 0.0003 +[2026-03-03 02:46:17] (step=0040836) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 7.989825865779691, LR: 0.0003 +[2026-03-03 02:46:25] (step=0040837) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 7.990021522207004, LR: 0.0003 +[2026-03-03 02:46:33] (step=0040838) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 7.990217178634318, LR: 0.0003 +[2026-03-03 02:46:40] (step=0040839) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.9904128350616315, LR: 0.0003 +[2026-03-03 02:46:48] (step=0040840) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 7.9906084914889455, LR: 0.0003 +[2026-03-03 02:46:56] (step=0040841) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.9908041479162595, LR: 0.0003 +[2026-03-03 02:47:04] (step=0040842) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 7.990999804343573, LR: 0.0003 +[2026-03-03 02:47:12] (step=0040843) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 7.991195460770887, LR: 0.0003 +[2026-03-03 02:47:20] (step=0040844) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 7.9913911171982, LR: 0.0003 +[2026-03-03 02:47:28] (step=0040845) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 7.991586773625514, LR: 0.0003 +[2026-03-03 02:47:36] (step=0040846) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.991782430052827, LR: 0.0003 +[2026-03-03 02:47:43] (step=0040847) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 7.991978086480141, LR: 0.0003 +[2026-03-03 02:47:51] (step=0040848) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 7.992173742907455, LR: 0.0003 +[2026-03-03 02:47:59] (step=0040849) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 7.992369399334768, LR: 0.0003 +[2026-03-03 02:48:07] (step=0040850) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 7.992565055762082, LR: 0.0003 +[2026-03-03 02:48:15] (step=0040851) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 7.992760712189395, LR: 0.0003 +[2026-03-03 02:48:23] (step=0040852) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.992956368616709, LR: 0.0003 +[2026-03-03 02:48:31] (step=0040853) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.993152025044023, LR: 0.0003 +[2026-03-03 02:48:39] (step=0040854) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 7.993347681471336, LR: 0.0003 +[2026-03-03 02:48:46] (step=0040855) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.99354333789865, LR: 0.0003 +[2026-03-03 02:48:54] (step=0040856) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 7.993738994325963, LR: 0.0003 +[2026-03-03 02:49:02] (step=0040857) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 7.993934650753277, LR: 0.0003 +[2026-03-03 02:49:10] (step=0040858) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.9941303071805905, LR: 0.0003 +[2026-03-03 02:49:18] (step=0040859) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 7.9943259636079045, LR: 0.0003 +[2026-03-03 02:49:26] (step=0040860) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 7.9945216200352185, LR: 0.0003 +[2026-03-03 02:49:34] (step=0040861) Train Loss: 0.4409, Train Steps/Sec: 0.12, Epoch: 7.994717276462532, LR: 0.0003 +[2026-03-03 02:49:42] (step=0040862) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 7.994912932889846, LR: 0.0003 +[2026-03-03 02:49:49] (step=0040863) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 7.995108589317159, LR: 0.0003 +[2026-03-03 02:49:57] (step=0040864) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 7.995304245744473, LR: 0.0003 +[2026-03-03 02:50:05] (step=0040865) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 7.995499902171787, LR: 0.0003 +[2026-03-03 02:50:13] (step=0040866) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 7.9956955585991, LR: 0.0003 +[2026-03-03 02:50:21] (step=0040867) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 7.995891215026414, LR: 0.0003 +[2026-03-03 02:50:29] (step=0040868) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 7.996086871453727, LR: 0.0003 +[2026-03-03 02:50:37] (step=0040869) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 7.996282527881041, LR: 0.0003 +[2026-03-03 02:50:45] (step=0040870) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 7.996478184308354, LR: 0.0003 +[2026-03-03 02:50:53] (step=0040871) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 7.996673840735668, LR: 0.0003 +[2026-03-03 02:51:00] (step=0040872) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 7.996869497162982, LR: 0.0003 +[2026-03-03 02:51:08] (step=0040873) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 7.997065153590295, LR: 0.0003 +[2026-03-03 02:51:16] (step=0040874) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 7.997260810017609, LR: 0.0003 +[2026-03-03 02:51:24] (step=0040875) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 7.997456466444922, LR: 0.0003 +[2026-03-03 02:51:32] (step=0040876) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 7.997652122872236, LR: 0.0003 +[2026-03-03 02:51:40] (step=0040877) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 7.99784777929955, LR: 0.0003 +[2026-03-03 02:51:48] (step=0040878) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 7.9980434357268635, LR: 0.0003 +[2026-03-03 02:51:55] (step=0040879) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 7.9982390921541775, LR: 0.0003 +[2026-03-03 02:52:03] (step=0040880) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 7.998434748581491, LR: 0.0003 +[2026-03-03 02:52:11] (step=0040881) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 7.998630405008805, LR: 0.0003 +[2026-03-03 02:52:19] (step=0040882) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 7.998826061436118, LR: 0.0003 +[2026-03-03 02:52:27] (step=0040883) Train Loss: 0.4579, Train Steps/Sec: 0.12, Epoch: 7.999021717863432, LR: 0.0003 +[2026-03-03 02:52:35] (step=0040884) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 7.999217374290746, LR: 0.0003 +[2026-03-03 02:52:43] (step=0040885) Train Loss: 0.4452, Train Steps/Sec: 0.12, Epoch: 7.999413030718059, LR: 0.0003 +[2026-03-03 02:52:51] (step=0040886) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 7.999608687145373, LR: 0.0003 +[2026-03-03 02:52:59] (step=0040887) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 7.999804343572686, LR: 0.0003 +[2026-03-03 02:53:07] (step=0040888) Train Loss: 0.4428, Train Steps/Sec: 0.12, Epoch: 8.0, LR: 0.0003 +[2026-03-03 02:53:07] Beginning epoch 8... +[2026-03-03 02:53:17] (step=0040889) Train Loss: 0.4430, Train Steps/Sec: 0.10, Epoch: 8.000195656427314, LR: 0.0003 +[2026-03-03 02:53:25] (step=0040890) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.000391312854628, LR: 0.0003 +[2026-03-03 02:53:33] (step=0040891) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 8.00058696928194, LR: 0.0003 +[2026-03-03 02:53:40] (step=0040892) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.000782625709254, LR: 0.0003 +[2026-03-03 02:53:48] (step=0040893) Train Loss: 0.4472, Train Steps/Sec: 0.12, Epoch: 8.000978282136568, LR: 0.0003 +[2026-03-03 02:53:56] (step=0040894) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 8.001173938563882, LR: 0.0003 +[2026-03-03 02:54:04] (step=0040895) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.001369594991196, LR: 0.0003 +[2026-03-03 02:54:12] (step=0040896) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.001565251418508, LR: 0.0003 +[2026-03-03 02:54:20] (step=0040897) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.001760907845823, LR: 0.0003 +[2026-03-03 02:54:28] (step=0040898) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.001956564273137, LR: 0.0003 +[2026-03-03 02:54:36] (step=0040899) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.00215222070045, LR: 0.0003 +[2026-03-03 02:54:44] (step=0040900) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.002347877127765, LR: 0.0003 +[2026-03-03 02:54:52] (step=0040901) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.002543533555077, LR: 0.0003 +[2026-03-03 02:54:59] (step=0040902) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.00273918998239, LR: 0.0003 +[2026-03-03 02:55:07] (step=0040903) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.002934846409705, LR: 0.0003 +[2026-03-03 02:55:15] (step=0040904) Train Loss: 0.4483, Train Steps/Sec: 0.12, Epoch: 8.003130502837019, LR: 0.0003 +[2026-03-03 02:55:23] (step=0040905) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.003326159264331, LR: 0.0003 +[2026-03-03 02:55:31] (step=0040906) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.003521815691645, LR: 0.0003 +[2026-03-03 02:55:39] (step=0040907) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 8.003717472118959, LR: 0.0003 +[2026-03-03 02:55:47] (step=0040908) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 8.003913128546273, LR: 0.0003 +[2026-03-03 02:55:55] (step=0040909) Train Loss: 0.4453, Train Steps/Sec: 0.12, Epoch: 8.004108784973587, LR: 0.0003 +[2026-03-03 02:56:03] (step=0040910) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 8.0043044414009, LR: 0.0003 +[2026-03-03 02:56:11] (step=0040911) Train Loss: 0.4394, Train Steps/Sec: 0.12, Epoch: 8.004500097828213, LR: 0.0003 +[2026-03-03 02:56:19] (step=0040912) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.004695754255527, LR: 0.0003 +[2026-03-03 02:56:27] (step=0040913) Train Loss: 0.4475, Train Steps/Sec: 0.12, Epoch: 8.004891410682841, LR: 0.0003 +[2026-03-03 02:56:35] (step=0040914) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.005087067110155, LR: 0.0003 +[2026-03-03 02:56:43] (step=0040915) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 8.005282723537468, LR: 0.0003 +[2026-03-03 02:56:51] (step=0040916) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.005478379964782, LR: 0.0003 +[2026-03-03 02:56:59] (step=0040917) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.005674036392096, LR: 0.0003 +[2026-03-03 02:57:06] (step=0040918) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.00586969281941, LR: 0.0003 +[2026-03-03 02:57:14] (step=0040919) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.006065349246724, LR: 0.0003 +[2026-03-03 02:57:22] (step=0040920) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.006261005674036, LR: 0.0003 +[2026-03-03 02:57:30] (step=0040921) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.00645666210135, LR: 0.0003 +[2026-03-03 02:57:38] (step=0040922) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.006652318528664, LR: 0.0003 +[2026-03-03 02:57:46] (step=0040923) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.006847974955978, LR: 0.0003 +[2026-03-03 02:57:54] (step=0040924) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.007043631383292, LR: 0.0003 +[2026-03-03 02:58:02] (step=0040925) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 8.007239287810604, LR: 0.0003 +[2026-03-03 02:58:09] (step=0040926) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.007434944237918, LR: 0.0003 +[2026-03-03 02:58:17] (step=0040927) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.007630600665232, LR: 0.0003 +[2026-03-03 02:58:25] (step=0040928) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.007826257092546, LR: 0.0003 +[2026-03-03 02:58:33] (step=0040929) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.008021913519858, LR: 0.0003 +[2026-03-03 02:58:41] (step=0040930) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.008217569947172, LR: 0.0003 +[2026-03-03 02:58:49] (step=0040931) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.008413226374486, LR: 0.0003 +[2026-03-03 02:58:57] (step=0040932) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 8.0086088828018, LR: 0.0003 +[2026-03-03 02:59:04] (step=0040933) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.008804539229114, LR: 0.0003 +[2026-03-03 02:59:12] (step=0040934) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.009000195656427, LR: 0.0003 +[2026-03-03 02:59:20] (step=0040935) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.00919585208374, LR: 0.0003 +[2026-03-03 02:59:28] (step=0040936) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.009391508511055, LR: 0.0003 +[2026-03-03 02:59:36] (step=0040937) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.009587164938369, LR: 0.0003 +[2026-03-03 02:59:44] (step=0040938) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.009782821365683, LR: 0.0003 +[2026-03-03 02:59:52] (step=0040939) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.009978477792995, LR: 0.0003 +[2026-03-03 02:59:59] (step=0040940) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.010174134220309, LR: 0.0003 +[2026-03-03 03:00:07] (step=0040941) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.010369790647623, LR: 0.0003 +[2026-03-03 03:00:15] (step=0040942) Train Loss: 0.4494, Train Steps/Sec: 0.12, Epoch: 8.010565447074937, LR: 0.0003 +[2026-03-03 03:00:23] (step=0040943) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.01076110350225, LR: 0.0003 +[2026-03-03 03:00:31] (step=0040944) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.010956759929563, LR: 0.0003 +[2026-03-03 03:00:39] (step=0040945) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 8.011152416356877, LR: 0.0003 +[2026-03-03 03:00:47] (step=0040946) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.011348072784191, LR: 0.0003 +[2026-03-03 03:00:55] (step=0040947) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.011543729211505, LR: 0.0003 +[2026-03-03 03:01:03] (step=0040948) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.011739385638819, LR: 0.0003 +[2026-03-03 03:01:10] (step=0040949) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.011935042066131, LR: 0.0003 +[2026-03-03 03:01:18] (step=0040950) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.012130698493445, LR: 0.0003 +[2026-03-03 03:01:26] (step=0040951) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.01232635492076, LR: 0.0003 +[2026-03-03 03:01:34] (step=0040952) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.012522011348073, LR: 0.0003 +[2026-03-03 03:01:42] (step=0040953) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.012717667775387, LR: 0.0003 +[2026-03-03 03:01:50] (step=0040954) Train Loss: 0.4483, Train Steps/Sec: 0.12, Epoch: 8.0129133242027, LR: 0.0003 +[2026-03-03 03:01:58] (step=0040955) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.013108980630014, LR: 0.0003 +[2026-03-03 03:02:06] (step=0040956) Train Loss: 0.4254, Train Steps/Sec: 0.13, Epoch: 8.013304637057328, LR: 0.0003 +[2026-03-03 03:02:14] (step=0040957) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.013500293484642, LR: 0.0003 +[2026-03-03 03:02:21] (step=0040958) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 8.013695949911954, LR: 0.0003 +[2026-03-03 03:02:29] (step=0040959) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.013891606339268, LR: 0.0003 +[2026-03-03 03:02:37] (step=0040960) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.014087262766582, LR: 0.0003 +[2026-03-03 03:02:45] (step=0040961) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.014282919193896, LR: 0.0003 +[2026-03-03 03:02:53] (step=0040962) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.01447857562121, LR: 0.0003 +[2026-03-03 03:03:01] (step=0040963) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 8.014674232048522, LR: 0.0003 +[2026-03-03 03:03:09] (step=0040964) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.014869888475836, LR: 0.0003 +[2026-03-03 03:03:16] (step=0040965) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.01506554490315, LR: 0.0003 +[2026-03-03 03:03:24] (step=0040966) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.015261201330464, LR: 0.0003 +[2026-03-03 03:03:32] (step=0040967) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.015456857757778, LR: 0.0003 +[2026-03-03 03:03:40] (step=0040968) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.01565251418509, LR: 0.0003 +[2026-03-03 03:03:48] (step=0040969) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 8.015848170612404, LR: 0.0003 +[2026-03-03 03:03:56] (step=0040970) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.016043827039718, LR: 0.0003 +[2026-03-03 03:04:04] (step=0040971) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.016239483467032, LR: 0.0003 +[2026-03-03 03:04:11] (step=0040972) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.016435139894346, LR: 0.0003 +[2026-03-03 03:04:19] (step=0040973) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.016630796321659, LR: 0.0003 +[2026-03-03 03:04:27] (step=0040974) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 8.016826452748973, LR: 0.0003 +[2026-03-03 03:04:35] (step=0040975) Train Loss: 0.4451, Train Steps/Sec: 0.12, Epoch: 8.017022109176287, LR: 0.0003 +[2026-03-03 03:04:43] (step=0040976) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.0172177656036, LR: 0.0003 +[2026-03-03 03:04:51] (step=0040977) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.017413422030915, LR: 0.0003 +[2026-03-03 03:04:59] (step=0040978) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.017609078458227, LR: 0.0003 +[2026-03-03 03:05:07] (step=0040979) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.01780473488554, LR: 0.0003 +[2026-03-03 03:05:15] (step=0040980) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.018000391312855, LR: 0.0003 +[2026-03-03 03:05:23] (step=0040981) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.018196047740169, LR: 0.0003 +[2026-03-03 03:05:30] (step=0040982) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.018391704167481, LR: 0.0003 +[2026-03-03 03:05:38] (step=0040983) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.018587360594795, LR: 0.0003 +[2026-03-03 03:05:46] (step=0040984) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.018783017022109, LR: 0.0003 +[2026-03-03 03:05:54] (step=0040985) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.018978673449423, LR: 0.0003 +[2026-03-03 03:06:02] (step=0040986) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.019174329876737, LR: 0.0003 +[2026-03-03 03:06:10] (step=0040987) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.01936998630405, LR: 0.0003 +[2026-03-03 03:06:18] (step=0040988) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 8.019565642731363, LR: 0.0003 +[2026-03-03 03:06:25] (step=0040989) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 8.019761299158677, LR: 0.0003 +[2026-03-03 03:06:34] (step=0040990) Train Loss: 0.4449, Train Steps/Sec: 0.12, Epoch: 8.019956955585991, LR: 0.0003 +[2026-03-03 03:06:42] (step=0040991) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.020152612013305, LR: 0.0003 +[2026-03-03 03:06:50] (step=0040992) Train Loss: 0.4440, Train Steps/Sec: 0.12, Epoch: 8.020348268440618, LR: 0.0003 +[2026-03-03 03:06:57] (step=0040993) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.020543924867932, LR: 0.0003 +[2026-03-03 03:07:05] (step=0040994) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.020739581295246, LR: 0.0003 +[2026-03-03 03:07:13] (step=0040995) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 8.02093523772256, LR: 0.0003 +[2026-03-03 03:07:21] (step=0040996) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.021130894149874, LR: 0.0003 +[2026-03-03 03:07:29] (step=0040997) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.021326550577186, LR: 0.0003 +[2026-03-03 03:07:37] (step=0040998) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.0215222070045, LR: 0.0003 +[2026-03-03 03:07:45] (step=0040999) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 8.021717863431814, LR: 0.0003 +[2026-03-03 03:07:52] (step=0041000) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 8.021913519859128, LR: 0.0003 +[2026-03-03 03:07:52] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0041000/ +[2026-03-03 03:08:00] (step=0041001) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.022109176286442, LR: 0.0003 +[2026-03-03 03:08:08] (step=0041002) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.022304832713754, LR: 0.0003 +[2026-03-03 03:08:16] (step=0041003) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.022500489141068, LR: 0.0003 +[2026-03-03 03:08:24] (step=0041004) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.022696145568382, LR: 0.0003 +[2026-03-03 03:08:32] (step=0041005) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.022891801995696, LR: 0.0003 +[2026-03-03 03:08:40] (step=0041006) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.02308745842301, LR: 0.0003 +[2026-03-03 03:08:48] (step=0041007) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.023283114850322, LR: 0.0003 +[2026-03-03 03:08:56] (step=0041008) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 8.023478771277636, LR: 0.0003 +[2026-03-03 03:09:03] (step=0041009) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.02367442770495, LR: 0.0003 +[2026-03-03 03:09:11] (step=0041010) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.023870084132264, LR: 0.0003 +[2026-03-03 03:09:19] (step=0041011) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.024065740559577, LR: 0.0003 +[2026-03-03 03:09:27] (step=0041012) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 8.02426139698689, LR: 0.0003 +[2026-03-03 03:09:35] (step=0041013) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.024457053414205, LR: 0.0003 +[2026-03-03 03:09:43] (step=0041014) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.024652709841519, LR: 0.0003 +[2026-03-03 03:09:51] (step=0041015) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.024848366268833, LR: 0.0003 +[2026-03-03 03:09:58] (step=0041016) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.025044022696145, LR: 0.0003 +[2026-03-03 03:10:06] (step=0041017) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 8.025239679123459, LR: 0.0003 +[2026-03-03 03:10:14] (step=0041018) Train Loss: 0.4243, Train Steps/Sec: 0.13, Epoch: 8.025435335550773, LR: 0.0003 +[2026-03-03 03:10:22] (step=0041019) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 8.025630991978087, LR: 0.0003 +[2026-03-03 03:10:30] (step=0041020) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 8.0258266484054, LR: 0.0003 +[2026-03-03 03:10:38] (step=0041021) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.026022304832713, LR: 0.0003 +[2026-03-03 03:10:46] (step=0041022) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.026217961260027, LR: 0.0003 +[2026-03-03 03:10:54] (step=0041023) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.026413617687341, LR: 0.0003 +[2026-03-03 03:11:01] (step=0041024) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.026609274114655, LR: 0.0003 +[2026-03-03 03:11:09] (step=0041025) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.026804930541969, LR: 0.0003 +[2026-03-03 03:11:17] (step=0041026) Train Loss: 0.4335, Train Steps/Sec: 0.12, Epoch: 8.027000586969281, LR: 0.0003 +[2026-03-03 03:11:25] (step=0041027) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 8.027196243396595, LR: 0.0003 +[2026-03-03 03:11:33] (step=0041028) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.02739189982391, LR: 0.0003 +[2026-03-03 03:11:41] (step=0041029) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.027587556251223, LR: 0.0003 +[2026-03-03 03:11:49] (step=0041030) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.027783212678537, LR: 0.0003 +[2026-03-03 03:11:57] (step=0041031) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.02797886910585, LR: 0.0003 +[2026-03-03 03:12:05] (step=0041032) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.028174525533164, LR: 0.0003 +[2026-03-03 03:12:12] (step=0041033) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.028370181960478, LR: 0.0003 +[2026-03-03 03:12:20] (step=0041034) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.028565838387792, LR: 0.0003 +[2026-03-03 03:12:28] (step=0041035) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.028761494815104, LR: 0.0003 +[2026-03-03 03:12:36] (step=0041036) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.028957151242418, LR: 0.0003 +[2026-03-03 03:12:44] (step=0041037) Train Loss: 0.4517, Train Steps/Sec: 0.12, Epoch: 8.029152807669732, LR: 0.0003 +[2026-03-03 03:12:52] (step=0041038) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 8.029348464097046, LR: 0.0003 +[2026-03-03 03:13:00] (step=0041039) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 8.02954412052436, LR: 0.0003 +[2026-03-03 03:13:08] (step=0041040) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 8.029739776951672, LR: 0.0003 +[2026-03-03 03:13:16] (step=0041041) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 8.029935433378986, LR: 0.0003 +[2026-03-03 03:13:24] (step=0041042) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 8.0301310898063, LR: 0.0003 +[2026-03-03 03:13:31] (step=0041043) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.030326746233614, LR: 0.0003 +[2026-03-03 03:13:39] (step=0041044) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.030522402660928, LR: 0.0003 +[2026-03-03 03:13:47] (step=0041045) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.03071805908824, LR: 0.0003 +[2026-03-03 03:13:55] (step=0041046) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 8.030913715515554, LR: 0.0003 +[2026-03-03 03:14:03] (step=0041047) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 8.031109371942868, LR: 0.0003 +[2026-03-03 03:14:11] (step=0041048) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 8.031305028370182, LR: 0.0003 +[2026-03-03 03:14:19] (step=0041049) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 8.031500684797496, LR: 0.0003 +[2026-03-03 03:14:27] (step=0041050) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 8.031696341224809, LR: 0.0003 +[2026-03-03 03:14:34] (step=0041051) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.031891997652123, LR: 0.0003 +[2026-03-03 03:14:42] (step=0041052) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.032087654079437, LR: 0.0003 +[2026-03-03 03:14:50] (step=0041053) Train Loss: 0.4416, Train Steps/Sec: 0.12, Epoch: 8.03228331050675, LR: 0.0003 +[2026-03-03 03:14:58] (step=0041054) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.032478966934065, LR: 0.0003 +[2026-03-03 03:15:06] (step=0041055) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.032674623361377, LR: 0.0003 +[2026-03-03 03:15:14] (step=0041056) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.03287027978869, LR: 0.0003 +[2026-03-03 03:15:22] (step=0041057) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 8.033065936216005, LR: 0.0003 +[2026-03-03 03:15:30] (step=0041058) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 8.033261592643319, LR: 0.0003 +[2026-03-03 03:15:37] (step=0041059) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.033457249070633, LR: 0.0003 +[2026-03-03 03:15:45] (step=0041060) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.033652905497945, LR: 0.0003 +[2026-03-03 03:15:53] (step=0041061) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.033848561925259, LR: 0.0003 +[2026-03-03 03:16:01] (step=0041062) Train Loss: 0.4261, Train Steps/Sec: 0.13, Epoch: 8.034044218352573, LR: 0.0003 +[2026-03-03 03:16:09] (step=0041063) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.034239874779887, LR: 0.0003 +[2026-03-03 03:16:17] (step=0041064) Train Loss: 0.4207, Train Steps/Sec: 0.13, Epoch: 8.0344355312072, LR: 0.0003 +[2026-03-03 03:16:25] (step=0041065) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.034631187634513, LR: 0.0003 +[2026-03-03 03:16:33] (step=0041066) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.034826844061827, LR: 0.0003 +[2026-03-03 03:16:40] (step=0041067) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.035022500489141, LR: 0.0003 +[2026-03-03 03:16:48] (step=0041068) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.035218156916455, LR: 0.0003 +[2026-03-03 03:16:56] (step=0041069) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 8.035413813343768, LR: 0.0003 +[2026-03-03 03:17:04] (step=0041070) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.035609469771082, LR: 0.0003 +[2026-03-03 03:17:12] (step=0041071) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.035805126198396, LR: 0.0003 +[2026-03-03 03:17:20] (step=0041072) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.03600078262571, LR: 0.0003 +[2026-03-03 03:17:28] (step=0041073) Train Loss: 0.4505, Train Steps/Sec: 0.12, Epoch: 8.036196439053024, LR: 0.0003 +[2026-03-03 03:17:36] (step=0041074) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.036392095480336, LR: 0.0003 +[2026-03-03 03:17:43] (step=0041075) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.03658775190765, LR: 0.0003 +[2026-03-03 03:17:51] (step=0041076) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 8.036783408334964, LR: 0.0003 +[2026-03-03 03:17:59] (step=0041077) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.036979064762278, LR: 0.0003 +[2026-03-03 03:18:07] (step=0041078) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 8.037174721189592, LR: 0.0003 +[2026-03-03 03:18:15] (step=0041079) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.037370377616904, LR: 0.0003 +[2026-03-03 03:18:23] (step=0041080) Train Loss: 0.4407, Train Steps/Sec: 0.12, Epoch: 8.037566034044218, LR: 0.0003 +[2026-03-03 03:18:31] (step=0041081) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.037761690471532, LR: 0.0003 +[2026-03-03 03:18:39] (step=0041082) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.037957346898846, LR: 0.0003 +[2026-03-03 03:18:47] (step=0041083) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.03815300332616, LR: 0.0003 +[2026-03-03 03:18:54] (step=0041084) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.038348659753472, LR: 0.0003 +[2026-03-03 03:19:02] (step=0041085) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.038544316180786, LR: 0.0003 +[2026-03-03 03:19:10] (step=0041086) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.0387399726081, LR: 0.0003 +[2026-03-03 03:19:18] (step=0041087) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.038935629035414, LR: 0.0003 +[2026-03-03 03:19:26] (step=0041088) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 8.039131285462727, LR: 0.0003 +[2026-03-03 03:19:34] (step=0041089) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.03932694189004, LR: 0.0003 +[2026-03-03 03:19:42] (step=0041090) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.039522598317355, LR: 0.0003 +[2026-03-03 03:19:50] (step=0041091) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.039718254744669, LR: 0.0003 +[2026-03-03 03:19:57] (step=0041092) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.039913911171983, LR: 0.0003 +[2026-03-03 03:20:05] (step=0041093) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.040109567599295, LR: 0.0003 +[2026-03-03 03:20:13] (step=0041094) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.040305224026609, LR: 0.0003 +[2026-03-03 03:20:21] (step=0041095) Train Loss: 0.4596, Train Steps/Sec: 0.12, Epoch: 8.040500880453923, LR: 0.0003 +[2026-03-03 03:20:29] (step=0041096) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.040696536881237, LR: 0.0003 +[2026-03-03 03:20:37] (step=0041097) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.04089219330855, LR: 0.0003 +[2026-03-03 03:20:45] (step=0041098) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.041087849735863, LR: 0.0003 +[2026-03-03 03:20:53] (step=0041099) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.041283506163177, LR: 0.0003 +[2026-03-03 03:21:01] (step=0041100) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.041479162590491, LR: 0.0003 +[2026-03-03 03:21:08] (step=0041101) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.041674819017805, LR: 0.0003 +[2026-03-03 03:21:16] (step=0041102) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 8.04187047544512, LR: 0.0003 +[2026-03-03 03:21:24] (step=0041103) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.042066131872431, LR: 0.0003 +[2026-03-03 03:21:32] (step=0041104) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.042261788299745, LR: 0.0003 +[2026-03-03 03:21:40] (step=0041105) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 8.04245744472706, LR: 0.0003 +[2026-03-03 03:21:48] (step=0041106) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.042653101154373, LR: 0.0003 +[2026-03-03 03:21:56] (step=0041107) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.042848757581687, LR: 0.0003 +[2026-03-03 03:22:04] (step=0041108) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.043044414009, LR: 0.0003 +[2026-03-03 03:22:11] (step=0041109) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.043240070436314, LR: 0.0003 +[2026-03-03 03:22:19] (step=0041110) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.043435726863628, LR: 0.0003 +[2026-03-03 03:22:27] (step=0041111) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.043631383290942, LR: 0.0003 +[2026-03-03 03:22:35] (step=0041112) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.043827039718254, LR: 0.0003 +[2026-03-03 03:22:43] (step=0041113) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.044022696145568, LR: 0.0003 +[2026-03-03 03:22:51] (step=0041114) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.044218352572882, LR: 0.0003 +[2026-03-03 03:22:58] (step=0041115) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.044414009000196, LR: 0.0003 +[2026-03-03 03:23:06] (step=0041116) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.04460966542751, LR: 0.0003 +[2026-03-03 03:23:14] (step=0041117) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.044805321854822, LR: 0.0003 +[2026-03-03 03:23:22] (step=0041118) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 8.045000978282136, LR: 0.0003 +[2026-03-03 03:23:30] (step=0041119) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.04519663470945, LR: 0.0003 +[2026-03-03 03:23:38] (step=0041120) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.045392291136764, LR: 0.0003 +[2026-03-03 03:23:46] (step=0041121) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.045587947564078, LR: 0.0003 +[2026-03-03 03:23:54] (step=0041122) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.04578360399139, LR: 0.0003 +[2026-03-03 03:24:01] (step=0041123) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.045979260418704, LR: 0.0003 +[2026-03-03 03:24:09] (step=0041124) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 8.046174916846018, LR: 0.0003 +[2026-03-03 03:24:17] (step=0041125) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.046370573273332, LR: 0.0003 +[2026-03-03 03:24:25] (step=0041126) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 8.046566229700646, LR: 0.0003 +[2026-03-03 03:24:33] (step=0041127) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 8.046761886127959, LR: 0.0003 +[2026-03-03 03:24:41] (step=0041128) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.046957542555273, LR: 0.0003 +[2026-03-03 03:24:49] (step=0041129) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.047153198982587, LR: 0.0003 +[2026-03-03 03:24:56] (step=0041130) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.0473488554099, LR: 0.0003 +[2026-03-03 03:25:04] (step=0041131) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.047544511837215, LR: 0.0003 +[2026-03-03 03:25:12] (step=0041132) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.047740168264527, LR: 0.0003 +[2026-03-03 03:25:20] (step=0041133) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 8.04793582469184, LR: 0.0003 +[2026-03-03 03:25:28] (step=0041134) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.048131481119155, LR: 0.0003 +[2026-03-03 03:25:36] (step=0041135) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.048327137546469, LR: 0.0003 +[2026-03-03 03:25:44] (step=0041136) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.048522793973783, LR: 0.0003 +[2026-03-03 03:25:51] (step=0041137) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.048718450401095, LR: 0.0003 +[2026-03-03 03:25:59] (step=0041138) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 8.04891410682841, LR: 0.0003 +[2026-03-03 03:26:07] (step=0041139) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 8.049109763255723, LR: 0.0003 +[2026-03-03 03:26:15] (step=0041140) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.049305419683037, LR: 0.0003 +[2026-03-03 03:26:23] (step=0041141) Train Loss: 0.4333, Train Steps/Sec: 0.12, Epoch: 8.04950107611035, LR: 0.0003 +[2026-03-03 03:26:31] (step=0041142) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 8.049696732537663, LR: 0.0003 +[2026-03-03 03:26:39] (step=0041143) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.049892388964977, LR: 0.0003 +[2026-03-03 03:26:47] (step=0041144) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.050088045392291, LR: 0.0003 +[2026-03-03 03:26:55] (step=0041145) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.050283701819605, LR: 0.0003 +[2026-03-03 03:27:02] (step=0041146) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.050479358246918, LR: 0.0003 +[2026-03-03 03:27:10] (step=0041147) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.050675014674232, LR: 0.0003 +[2026-03-03 03:27:18] (step=0041148) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.050870671101546, LR: 0.0003 +[2026-03-03 03:27:26] (step=0041149) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.05106632752886, LR: 0.0003 +[2026-03-03 03:27:34] (step=0041150) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.051261983956174, LR: 0.0003 +[2026-03-03 03:27:42] (step=0041151) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.051457640383486, LR: 0.0003 +[2026-03-03 03:27:50] (step=0041152) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 8.0516532968108, LR: 0.0003 +[2026-03-03 03:27:57] (step=0041153) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.051848953238114, LR: 0.0003 +[2026-03-03 03:28:05] (step=0041154) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 8.052044609665428, LR: 0.0003 +[2026-03-03 03:28:13] (step=0041155) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.052240266092742, LR: 0.0003 +[2026-03-03 03:28:21] (step=0041156) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 8.052435922520054, LR: 0.0003 +[2026-03-03 03:28:29] (step=0041157) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.052631578947368, LR: 0.0003 +[2026-03-03 03:28:37] (step=0041158) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.052827235374682, LR: 0.0003 +[2026-03-03 03:28:45] (step=0041159) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.053022891801996, LR: 0.0003 +[2026-03-03 03:28:53] (step=0041160) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 8.05321854822931, LR: 0.0003 +[2026-03-03 03:29:00] (step=0041161) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.053414204656622, LR: 0.0003 +[2026-03-03 03:29:08] (step=0041162) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.053609861083936, LR: 0.0003 +[2026-03-03 03:29:16] (step=0041163) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.05380551751125, LR: 0.0003 +[2026-03-03 03:29:24] (step=0041164) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 8.054001173938564, LR: 0.0003 +[2026-03-03 03:29:32] (step=0041165) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.054196830365877, LR: 0.0003 +[2026-03-03 03:29:40] (step=0041166) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.05439248679319, LR: 0.0003 +[2026-03-03 03:29:48] (step=0041167) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 8.054588143220505, LR: 0.0003 +[2026-03-03 03:29:55] (step=0041168) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.054783799647819, LR: 0.0003 +[2026-03-03 03:30:03] (step=0041169) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.054979456075133, LR: 0.0003 +[2026-03-03 03:30:11] (step=0041170) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.055175112502445, LR: 0.0003 +[2026-03-03 03:30:19] (step=0041171) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.055370768929759, LR: 0.0003 +[2026-03-03 03:30:27] (step=0041172) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.055566425357073, LR: 0.0003 +[2026-03-03 03:30:35] (step=0041173) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.055762081784387, LR: 0.0003 +[2026-03-03 03:30:43] (step=0041174) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 8.055957738211701, LR: 0.0003 +[2026-03-03 03:30:50] (step=0041175) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.056153394639013, LR: 0.0003 +[2026-03-03 03:30:58] (step=0041176) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.056349051066327, LR: 0.0003 +[2026-03-03 03:31:06] (step=0041177) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.056544707493641, LR: 0.0003 +[2026-03-03 03:31:14] (step=0041178) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.056740363920955, LR: 0.0003 +[2026-03-03 03:31:22] (step=0041179) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 8.05693602034827, LR: 0.0003 +[2026-03-03 03:31:30] (step=0041180) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.057131676775581, LR: 0.0003 +[2026-03-03 03:31:38] (step=0041181) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.057327333202895, LR: 0.0003 +[2026-03-03 03:31:45] (step=0041182) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.05752298963021, LR: 0.0003 +[2026-03-03 03:31:53] (step=0041183) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.057718646057523, LR: 0.0003 +[2026-03-03 03:32:01] (step=0041184) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 8.057914302484837, LR: 0.0003 +[2026-03-03 03:32:09] (step=0041185) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.05810995891215, LR: 0.0003 +[2026-03-03 03:32:17] (step=0041186) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.058305615339464, LR: 0.0003 +[2026-03-03 03:32:25] (step=0041187) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.058501271766778, LR: 0.0003 +[2026-03-03 03:32:33] (step=0041188) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 8.058696928194092, LR: 0.0003 +[2026-03-03 03:32:41] (step=0041189) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.058892584621406, LR: 0.0003 +[2026-03-03 03:32:49] (step=0041190) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.059088241048718, LR: 0.0003 +[2026-03-03 03:32:56] (step=0041191) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.059283897476032, LR: 0.0003 +[2026-03-03 03:33:04] (step=0041192) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.059479553903346, LR: 0.0003 +[2026-03-03 03:33:12] (step=0041193) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.05967521033066, LR: 0.0003 +[2026-03-03 03:33:20] (step=0041194) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.059870866757972, LR: 0.0003 +[2026-03-03 03:33:28] (step=0041195) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.060066523185286, LR: 0.0003 +[2026-03-03 03:33:36] (step=0041196) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 8.0602621796126, LR: 0.0003 +[2026-03-03 03:33:44] (step=0041197) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.060457836039914, LR: 0.0003 +[2026-03-03 03:33:51] (step=0041198) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 8.060653492467228, LR: 0.0003 +[2026-03-03 03:33:59] (step=0041199) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 8.06084914889454, LR: 0.0003 +[2026-03-03 03:34:07] (step=0041200) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.061044805321854, LR: 0.0003 +[2026-03-03 03:34:15] (step=0041201) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.061240461749168, LR: 0.0003 +[2026-03-03 03:34:23] (step=0041202) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 8.061436118176482, LR: 0.0003 +[2026-03-03 03:34:31] (step=0041203) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.061631774603796, LR: 0.0003 +[2026-03-03 03:34:39] (step=0041204) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.061827431031109, LR: 0.0003 +[2026-03-03 03:34:46] (step=0041205) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.062023087458423, LR: 0.0003 +[2026-03-03 03:34:54] (step=0041206) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.062218743885737, LR: 0.0003 +[2026-03-03 03:35:02] (step=0041207) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.06241440031305, LR: 0.0003 +[2026-03-03 03:35:10] (step=0041208) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.062610056740365, LR: 0.0003 +[2026-03-03 03:35:18] (step=0041209) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.062805713167677, LR: 0.0003 +[2026-03-03 03:35:26] (step=0041210) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 8.063001369594991, LR: 0.0003 +[2026-03-03 03:35:34] (step=0041211) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 8.063197026022305, LR: 0.0003 +[2026-03-03 03:35:41] (step=0041212) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 8.063392682449619, LR: 0.0003 +[2026-03-03 03:35:49] (step=0041213) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.063588338876933, LR: 0.0003 +[2026-03-03 03:35:57] (step=0041214) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.063783995304245, LR: 0.0003 +[2026-03-03 03:36:05] (step=0041215) Train Loss: 0.4268, Train Steps/Sec: 0.13, Epoch: 8.06397965173156, LR: 0.0003 +[2026-03-03 03:36:13] (step=0041216) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.064175308158873, LR: 0.0003 +[2026-03-03 03:36:21] (step=0041217) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.064370964586187, LR: 0.0003 +[2026-03-03 03:36:29] (step=0041218) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.0645666210135, LR: 0.0003 +[2026-03-03 03:36:37] (step=0041219) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.064762277440813, LR: 0.0003 +[2026-03-03 03:36:44] (step=0041220) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 8.064957933868127, LR: 0.0003 +[2026-03-03 03:36:52] (step=0041221) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.065153590295441, LR: 0.0003 +[2026-03-03 03:37:00] (step=0041222) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.065349246722755, LR: 0.0003 +[2026-03-03 03:37:08] (step=0041223) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.065544903150068, LR: 0.0003 +[2026-03-03 03:37:16] (step=0041224) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 8.065740559577382, LR: 0.0003 +[2026-03-03 03:37:24] (step=0041225) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.065936216004696, LR: 0.0003 +[2026-03-03 03:37:32] (step=0041226) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.06613187243201, LR: 0.0003 +[2026-03-03 03:37:40] (step=0041227) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.066327528859324, LR: 0.0003 +[2026-03-03 03:37:47] (step=0041228) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.066523185286636, LR: 0.0003 +[2026-03-03 03:37:55] (step=0041229) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.06671884171395, LR: 0.0003 +[2026-03-03 03:38:03] (step=0041230) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 8.066914498141264, LR: 0.0003 +[2026-03-03 03:38:11] (step=0041231) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.067110154568578, LR: 0.0003 +[2026-03-03 03:38:19] (step=0041232) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.067305810995892, LR: 0.0003 +[2026-03-03 03:38:27] (step=0041233) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 8.067501467423204, LR: 0.0003 +[2026-03-03 03:38:35] (step=0041234) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 8.067697123850518, LR: 0.0003 +[2026-03-03 03:38:43] (step=0041235) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.067892780277832, LR: 0.0003 +[2026-03-03 03:38:50] (step=0041236) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 8.068088436705146, LR: 0.0003 +[2026-03-03 03:38:58] (step=0041237) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 8.06828409313246, LR: 0.0003 +[2026-03-03 03:39:06] (step=0041238) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.068479749559772, LR: 0.0003 +[2026-03-03 03:39:14] (step=0041239) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.068675405987086, LR: 0.0003 +[2026-03-03 03:39:22] (step=0041240) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 8.0688710624144, LR: 0.0003 +[2026-03-03 03:39:30] (step=0041241) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.069066718841714, LR: 0.0003 +[2026-03-03 03:39:38] (step=0041242) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.069262375269028, LR: 0.0003 +[2026-03-03 03:39:46] (step=0041243) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.06945803169634, LR: 0.0003 +[2026-03-03 03:39:53] (step=0041244) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.069653688123655, LR: 0.0003 +[2026-03-03 03:40:01] (step=0041245) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 8.069849344550969, LR: 0.0003 +[2026-03-03 03:40:09] (step=0041246) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.070045000978283, LR: 0.0003 +[2026-03-03 03:40:17] (step=0041247) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.070240657405595, LR: 0.0003 +[2026-03-03 03:40:25] (step=0041248) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.070436313832909, LR: 0.0003 +[2026-03-03 03:40:33] (step=0041249) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.070631970260223, LR: 0.0003 +[2026-03-03 03:40:41] (step=0041250) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.070827626687537, LR: 0.0003 +[2026-03-03 03:40:48] (step=0041251) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.071023283114851, LR: 0.0003 +[2026-03-03 03:40:56] (step=0041252) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.071218939542163, LR: 0.0003 +[2026-03-03 03:41:04] (step=0041253) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.071414595969477, LR: 0.0003 +[2026-03-03 03:41:12] (step=0041254) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.071610252396791, LR: 0.0003 +[2026-03-03 03:41:20] (step=0041255) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.071805908824105, LR: 0.0003 +[2026-03-03 03:41:28] (step=0041256) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.07200156525142, LR: 0.0003 +[2026-03-03 03:41:36] (step=0041257) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.072197221678731, LR: 0.0003 +[2026-03-03 03:41:44] (step=0041258) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 8.072392878106045, LR: 0.0003 +[2026-03-03 03:41:51] (step=0041259) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.07258853453336, LR: 0.0003 +[2026-03-03 03:41:59] (step=0041260) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.072784190960673, LR: 0.0003 +[2026-03-03 03:42:07] (step=0041261) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.072979847387987, LR: 0.0003 +[2026-03-03 03:42:15] (step=0041262) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.0731755038153, LR: 0.0003 +[2026-03-03 03:42:23] (step=0041263) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.073371160242614, LR: 0.0003 +[2026-03-03 03:42:31] (step=0041264) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.073566816669928, LR: 0.0003 +[2026-03-03 03:42:39] (step=0041265) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.073762473097242, LR: 0.0003 +[2026-03-03 03:42:46] (step=0041266) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 8.073958129524556, LR: 0.0003 +[2026-03-03 03:42:54] (step=0041267) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.074153785951868, LR: 0.0003 +[2026-03-03 03:43:02] (step=0041268) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 8.074349442379182, LR: 0.0003 +[2026-03-03 03:43:10] (step=0041269) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.074545098806496, LR: 0.0003 +[2026-03-03 03:43:18] (step=0041270) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.07474075523381, LR: 0.0003 +[2026-03-03 03:43:26] (step=0041271) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.074936411661122, LR: 0.0003 +[2026-03-03 03:43:34] (step=0041272) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.075132068088436, LR: 0.0003 +[2026-03-03 03:43:41] (step=0041273) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 8.07532772451575, LR: 0.0003 +[2026-03-03 03:43:49] (step=0041274) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.075523380943064, LR: 0.0003 +[2026-03-03 03:43:57] (step=0041275) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.075719037370378, LR: 0.0003 +[2026-03-03 03:44:05] (step=0041276) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.07591469379769, LR: 0.0003 +[2026-03-03 03:44:13] (step=0041277) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.076110350225004, LR: 0.0003 +[2026-03-03 03:44:21] (step=0041278) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 8.076306006652318, LR: 0.0003 +[2026-03-03 03:44:29] (step=0041279) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.076501663079632, LR: 0.0003 +[2026-03-03 03:44:37] (step=0041280) Train Loss: 0.4349, Train Steps/Sec: 0.12, Epoch: 8.076697319506946, LR: 0.0003 +[2026-03-03 03:44:45] (step=0041281) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.076892975934259, LR: 0.0003 +[2026-03-03 03:44:52] (step=0041282) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 8.077088632361573, LR: 0.0003 +[2026-03-03 03:45:00] (step=0041283) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.077284288788887, LR: 0.0003 +[2026-03-03 03:45:08] (step=0041284) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.0774799452162, LR: 0.0003 +[2026-03-03 03:45:16] (step=0041285) Train Loss: 0.4495, Train Steps/Sec: 0.12, Epoch: 8.077675601643515, LR: 0.0003 +[2026-03-03 03:45:24] (step=0041286) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 8.077871258070827, LR: 0.0003 +[2026-03-03 03:45:32] (step=0041287) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 8.078066914498141, LR: 0.0003 +[2026-03-03 03:45:40] (step=0041288) Train Loss: 0.4548, Train Steps/Sec: 0.12, Epoch: 8.078262570925455, LR: 0.0003 +[2026-03-03 03:45:48] (step=0041289) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.078458227352769, LR: 0.0003 +[2026-03-03 03:45:56] (step=0041290) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.078653883780083, LR: 0.0003 +[2026-03-03 03:46:04] (step=0041291) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.078849540207395, LR: 0.0003 +[2026-03-03 03:46:12] (step=0041292) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.07904519663471, LR: 0.0003 +[2026-03-03 03:46:19] (step=0041293) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 8.079240853062023, LR: 0.0003 +[2026-03-03 03:46:27] (step=0041294) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.079436509489337, LR: 0.0003 +[2026-03-03 03:46:35] (step=0041295) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.079632165916651, LR: 0.0003 +[2026-03-03 03:46:43] (step=0041296) Train Loss: 0.4508, Train Steps/Sec: 0.12, Epoch: 8.079827822343963, LR: 0.0003 +[2026-03-03 03:46:51] (step=0041297) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 8.080023478771277, LR: 0.0003 +[2026-03-03 03:46:59] (step=0041298) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.080219135198591, LR: 0.0003 +[2026-03-03 03:47:07] (step=0041299) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.080414791625905, LR: 0.0003 +[2026-03-03 03:47:15] (step=0041300) Train Loss: 0.4544, Train Steps/Sec: 0.12, Epoch: 8.080610448053218, LR: 0.0003 +[2026-03-03 03:47:23] (step=0041301) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.080806104480532, LR: 0.0003 +[2026-03-03 03:47:31] (step=0041302) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.081001760907846, LR: 0.0003 +[2026-03-03 03:47:39] (step=0041303) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.08119741733516, LR: 0.0003 +[2026-03-03 03:47:47] (step=0041304) Train Loss: 0.4443, Train Steps/Sec: 0.12, Epoch: 8.081393073762474, LR: 0.0003 +[2026-03-03 03:47:55] (step=0041305) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.081588730189786, LR: 0.0003 +[2026-03-03 03:48:03] (step=0041306) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 8.0817843866171, LR: 0.0003 +[2026-03-03 03:48:10] (step=0041307) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.081980043044414, LR: 0.0003 +[2026-03-03 03:48:18] (step=0041308) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.082175699471728, LR: 0.0003 +[2026-03-03 03:48:26] (step=0041309) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.082371355899042, LR: 0.0003 +[2026-03-03 03:48:34] (step=0041310) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.082567012326354, LR: 0.0003 +[2026-03-03 03:48:42] (step=0041311) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 8.082762668753668, LR: 0.0003 +[2026-03-03 03:48:50] (step=0041312) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.082958325180982, LR: 0.0003 +[2026-03-03 03:48:58] (step=0041313) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.083153981608296, LR: 0.0003 +[2026-03-03 03:49:06] (step=0041314) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.08334963803561, LR: 0.0003 +[2026-03-03 03:49:13] (step=0041315) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.083545294462922, LR: 0.0003 +[2026-03-03 03:49:21] (step=0041316) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 8.083740950890236, LR: 0.0003 +[2026-03-03 03:49:29] (step=0041317) Train Loss: 0.4478, Train Steps/Sec: 0.12, Epoch: 8.08393660731755, LR: 0.0003 +[2026-03-03 03:49:37] (step=0041318) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.084132263744864, LR: 0.0003 +[2026-03-03 03:49:45] (step=0041319) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.084327920172178, LR: 0.0003 +[2026-03-03 03:49:53] (step=0041320) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 8.08452357659949, LR: 0.0003 +[2026-03-03 03:50:01] (step=0041321) Train Loss: 0.4400, Train Steps/Sec: 0.12, Epoch: 8.084719233026805, LR: 0.0003 +[2026-03-03 03:50:09] (step=0041322) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.084914889454119, LR: 0.0003 +[2026-03-03 03:50:17] (step=0041323) Train Loss: 0.4411, Train Steps/Sec: 0.12, Epoch: 8.085110545881433, LR: 0.0003 +[2026-03-03 03:50:25] (step=0041324) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 8.085306202308745, LR: 0.0003 +[2026-03-03 03:50:33] (step=0041325) Train Loss: 0.4409, Train Steps/Sec: 0.12, Epoch: 8.085501858736059, LR: 0.0003 +[2026-03-03 03:50:41] (step=0041326) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.085697515163373, LR: 0.0003 +[2026-03-03 03:50:49] (step=0041327) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.085893171590687, LR: 0.0003 +[2026-03-03 03:50:57] (step=0041328) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 8.086088828018001, LR: 0.0003 +[2026-03-03 03:51:04] (step=0041329) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.086284484445313, LR: 0.0003 +[2026-03-03 03:51:12] (step=0041330) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.086480140872627, LR: 0.0003 +[2026-03-03 03:51:20] (step=0041331) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.086675797299941, LR: 0.0003 +[2026-03-03 03:51:28] (step=0041332) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.086871453727255, LR: 0.0003 +[2026-03-03 03:51:36] (step=0041333) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 8.08706711015457, LR: 0.0003 +[2026-03-03 03:51:44] (step=0041334) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.087262766581881, LR: 0.0003 +[2026-03-03 03:51:52] (step=0041335) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.087458423009195, LR: 0.0003 +[2026-03-03 03:52:00] (step=0041336) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.08765407943651, LR: 0.0003 +[2026-03-03 03:52:07] (step=0041337) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.087849735863823, LR: 0.0003 +[2026-03-03 03:52:15] (step=0041338) Train Loss: 0.4537, Train Steps/Sec: 0.12, Epoch: 8.088045392291138, LR: 0.0003 +[2026-03-03 03:52:23] (step=0041339) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.08824104871845, LR: 0.0003 +[2026-03-03 03:52:31] (step=0041340) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 8.088436705145764, LR: 0.0003 +[2026-03-03 03:52:39] (step=0041341) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.088632361573078, LR: 0.0003 +[2026-03-03 03:52:47] (step=0041342) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.088828018000392, LR: 0.0003 +[2026-03-03 03:52:55] (step=0041343) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 8.089023674427706, LR: 0.0003 +[2026-03-03 03:53:03] (step=0041344) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.089219330855018, LR: 0.0003 +[2026-03-03 03:53:11] (step=0041345) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.089414987282332, LR: 0.0003 +[2026-03-03 03:53:19] (step=0041346) Train Loss: 0.4463, Train Steps/Sec: 0.12, Epoch: 8.089610643709646, LR: 0.0003 +[2026-03-03 03:53:27] (step=0041347) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.08980630013696, LR: 0.0003 +[2026-03-03 03:53:34] (step=0041348) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.090001956564274, LR: 0.0003 +[2026-03-03 03:53:42] (step=0041349) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.090197612991586, LR: 0.0003 +[2026-03-03 03:53:50] (step=0041350) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.0903932694189, LR: 0.0003 +[2026-03-03 03:53:58] (step=0041351) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.090588925846214, LR: 0.0003 +[2026-03-03 03:54:06] (step=0041352) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.090784582273528, LR: 0.0003 +[2026-03-03 03:54:14] (step=0041353) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.09098023870084, LR: 0.0003 +[2026-03-03 03:54:22] (step=0041354) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.091175895128154, LR: 0.0003 +[2026-03-03 03:54:29] (step=0041355) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.091371551555468, LR: 0.0003 +[2026-03-03 03:54:37] (step=0041356) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.091567207982783, LR: 0.0003 +[2026-03-03 03:54:45] (step=0041357) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.091762864410097, LR: 0.0003 +[2026-03-03 03:54:53] (step=0041358) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 8.091958520837409, LR: 0.0003 +[2026-03-03 03:55:01] (step=0041359) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 8.092154177264723, LR: 0.0003 +[2026-03-03 03:55:09] (step=0041360) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.092349833692037, LR: 0.0003 +[2026-03-03 03:55:17] (step=0041361) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.09254549011935, LR: 0.0003 +[2026-03-03 03:55:24] (step=0041362) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 8.092741146546665, LR: 0.0003 +[2026-03-03 03:55:32] (step=0041363) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.092936802973977, LR: 0.0003 +[2026-03-03 03:55:40] (step=0041364) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.093132459401291, LR: 0.0003 +[2026-03-03 03:55:48] (step=0041365) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 8.093328115828605, LR: 0.0003 +[2026-03-03 03:55:56] (step=0041366) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.093523772255919, LR: 0.0003 +[2026-03-03 03:56:04] (step=0041367) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 8.093719428683233, LR: 0.0003 +[2026-03-03 03:56:12] (step=0041368) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.093915085110545, LR: 0.0003 +[2026-03-03 03:56:19] (step=0041369) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.09411074153786, LR: 0.0003 +[2026-03-03 03:56:27] (step=0041370) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.094306397965173, LR: 0.0003 +[2026-03-03 03:56:35] (step=0041371) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.094502054392487, LR: 0.0003 +[2026-03-03 03:56:43] (step=0041372) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.094697710819801, LR: 0.0003 +[2026-03-03 03:56:51] (step=0041373) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.094893367247114, LR: 0.0003 +[2026-03-03 03:56:59] (step=0041374) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.095089023674428, LR: 0.0003 +[2026-03-03 03:57:07] (step=0041375) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.095284680101742, LR: 0.0003 +[2026-03-03 03:57:14] (step=0041376) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.095480336529056, LR: 0.0003 +[2026-03-03 03:57:22] (step=0041377) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 8.095675992956368, LR: 0.0003 +[2026-03-03 03:57:30] (step=0041378) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.095871649383682, LR: 0.0003 +[2026-03-03 03:57:38] (step=0041379) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.096067305810996, LR: 0.0003 +[2026-03-03 03:57:46] (step=0041380) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.09626296223831, LR: 0.0003 +[2026-03-03 03:57:54] (step=0041381) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.096458618665624, LR: 0.0003 +[2026-03-03 03:58:01] (step=0041382) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.096654275092936, LR: 0.0003 +[2026-03-03 03:58:09] (step=0041383) Train Loss: 0.4593, Train Steps/Sec: 0.12, Epoch: 8.09684993152025, LR: 0.0003 +[2026-03-03 03:58:17] (step=0041384) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.097045587947564, LR: 0.0003 +[2026-03-03 03:58:25] (step=0041385) Train Loss: 0.4357, Train Steps/Sec: 0.12, Epoch: 8.097241244374878, LR: 0.0003 +[2026-03-03 03:58:33] (step=0041386) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.097436900802192, LR: 0.0003 +[2026-03-03 03:58:41] (step=0041387) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 8.097632557229504, LR: 0.0003 +[2026-03-03 03:58:49] (step=0041388) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.097828213656818, LR: 0.0003 +[2026-03-03 03:58:57] (step=0041389) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.098023870084132, LR: 0.0003 +[2026-03-03 03:59:05] (step=0041390) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.098219526511446, LR: 0.0003 +[2026-03-03 03:59:12] (step=0041391) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.09841518293876, LR: 0.0003 +[2026-03-03 03:59:20] (step=0041392) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.098610839366073, LR: 0.0003 +[2026-03-03 03:59:28] (step=0041393) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.098806495793387, LR: 0.0003 +[2026-03-03 03:59:36] (step=0041394) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.0990021522207, LR: 0.0003 +[2026-03-03 03:59:44] (step=0041395) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.099197808648015, LR: 0.0003 +[2026-03-03 03:59:52] (step=0041396) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.099393465075329, LR: 0.0003 +[2026-03-03 04:00:00] (step=0041397) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.09958912150264, LR: 0.0003 +[2026-03-03 04:00:07] (step=0041398) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.099784777929955, LR: 0.0003 +[2026-03-03 04:00:15] (step=0041399) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 8.099980434357269, LR: 0.0003 +[2026-03-03 04:00:23] (step=0041400) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 8.100176090784583, LR: 0.0003 +[2026-03-03 04:00:31] (step=0041401) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.100371747211897, LR: 0.0003 +[2026-03-03 04:00:39] (step=0041402) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.100567403639209, LR: 0.0003 +[2026-03-03 04:00:47] (step=0041403) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.100763060066523, LR: 0.0003 +[2026-03-03 04:00:55] (step=0041404) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.100958716493837, LR: 0.0003 +[2026-03-03 04:01:02] (step=0041405) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.101154372921151, LR: 0.0003 +[2026-03-03 04:01:10] (step=0041406) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.101350029348463, LR: 0.0003 +[2026-03-03 04:01:18] (step=0041407) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 8.101545685775777, LR: 0.0003 +[2026-03-03 04:01:26] (step=0041408) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.101741342203091, LR: 0.0003 +[2026-03-03 04:01:34] (step=0041409) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.101936998630405, LR: 0.0003 +[2026-03-03 04:01:42] (step=0041410) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.10213265505772, LR: 0.0003 +[2026-03-03 04:01:50] (step=0041411) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 8.102328311485032, LR: 0.0003 +[2026-03-03 04:01:57] (step=0041412) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.102523967912346, LR: 0.0003 +[2026-03-03 04:02:05] (step=0041413) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.10271962433966, LR: 0.0003 +[2026-03-03 04:02:13] (step=0041414) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.102915280766974, LR: 0.0003 +[2026-03-03 04:02:21] (step=0041415) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.103110937194288, LR: 0.0003 +[2026-03-03 04:02:29] (step=0041416) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.1033065936216, LR: 0.0003 +[2026-03-03 04:02:37] (step=0041417) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.103502250048914, LR: 0.0003 +[2026-03-03 04:02:44] (step=0041418) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 8.103697906476228, LR: 0.0003 +[2026-03-03 04:02:52] (step=0041419) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 8.103893562903542, LR: 0.0003 +[2026-03-03 04:03:00] (step=0041420) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.104089219330856, LR: 0.0003 +[2026-03-03 04:03:08] (step=0041421) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 8.104284875758168, LR: 0.0003 +[2026-03-03 04:03:16] (step=0041422) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.104480532185482, LR: 0.0003 +[2026-03-03 04:03:24] (step=0041423) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 8.104676188612796, LR: 0.0003 +[2026-03-03 04:03:32] (step=0041424) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.10487184504011, LR: 0.0003 +[2026-03-03 04:03:39] (step=0041425) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.105067501467424, LR: 0.0003 +[2026-03-03 04:03:47] (step=0041426) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.105263157894736, LR: 0.0003 +[2026-03-03 04:03:55] (step=0041427) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 8.10545881432205, LR: 0.0003 +[2026-03-03 04:04:03] (step=0041428) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.105654470749364, LR: 0.0003 +[2026-03-03 04:04:11] (step=0041429) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.105850127176678, LR: 0.0003 +[2026-03-03 04:04:19] (step=0041430) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.10604578360399, LR: 0.0003 +[2026-03-03 04:04:27] (step=0041431) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.106241440031305, LR: 0.0003 +[2026-03-03 04:04:35] (step=0041432) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 8.106437096458619, LR: 0.0003 +[2026-03-03 04:04:42] (step=0041433) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.106632752885933, LR: 0.0003 +[2026-03-03 04:04:50] (step=0041434) Train Loss: 0.4502, Train Steps/Sec: 0.12, Epoch: 8.106828409313247, LR: 0.0003 +[2026-03-03 04:04:58] (step=0041435) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.107024065740559, LR: 0.0003 +[2026-03-03 04:05:06] (step=0041436) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.107219722167873, LR: 0.0003 +[2026-03-03 04:05:14] (step=0041437) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.107415378595187, LR: 0.0003 +[2026-03-03 04:05:22] (step=0041438) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 8.1076110350225, LR: 0.0003 +[2026-03-03 04:05:30] (step=0041439) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 8.107806691449815, LR: 0.0003 +[2026-03-03 04:05:38] (step=0041440) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 8.108002347877127, LR: 0.0003 +[2026-03-03 04:05:45] (step=0041441) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.108198004304441, LR: 0.0003 +[2026-03-03 04:05:53] (step=0041442) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.108393660731755, LR: 0.0003 +[2026-03-03 04:06:01] (step=0041443) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 8.108589317159069, LR: 0.0003 +[2026-03-03 04:06:09] (step=0041444) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.108784973586383, LR: 0.0003 +[2026-03-03 04:06:17] (step=0041445) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 8.108980630013695, LR: 0.0003 +[2026-03-03 04:06:25] (step=0041446) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 8.10917628644101, LR: 0.0003 +[2026-03-03 04:06:32] (step=0041447) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.109371942868323, LR: 0.0003 +[2026-03-03 04:06:40] (step=0041448) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.109567599295637, LR: 0.0003 +[2026-03-03 04:06:48] (step=0041449) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 8.109763255722951, LR: 0.0003 +[2026-03-03 04:06:56] (step=0041450) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.109958912150264, LR: 0.0003 +[2026-03-03 04:07:04] (step=0041451) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.110154568577578, LR: 0.0003 +[2026-03-03 04:07:12] (step=0041452) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 8.110350225004892, LR: 0.0003 +[2026-03-03 04:07:20] (step=0041453) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.110545881432206, LR: 0.0003 +[2026-03-03 04:07:27] (step=0041454) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.11074153785952, LR: 0.0003 +[2026-03-03 04:07:35] (step=0041455) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 8.110937194286832, LR: 0.0003 +[2026-03-03 04:07:43] (step=0041456) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.111132850714146, LR: 0.0003 +[2026-03-03 04:07:51] (step=0041457) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 8.11132850714146, LR: 0.0003 +[2026-03-03 04:07:59] (step=0041458) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.111524163568774, LR: 0.0003 +[2026-03-03 04:08:07] (step=0041459) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.111719819996086, LR: 0.0003 +[2026-03-03 04:08:15] (step=0041460) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.1119154764234, LR: 0.0003 +[2026-03-03 04:08:22] (step=0041461) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 8.112111132850714, LR: 0.0003 +[2026-03-03 04:08:30] (step=0041462) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 8.112306789278028, LR: 0.0003 +[2026-03-03 04:08:38] (step=0041463) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.112502445705342, LR: 0.0003 +[2026-03-03 04:08:46] (step=0041464) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.112698102132654, LR: 0.0003 +[2026-03-03 04:08:54] (step=0041465) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.112893758559968, LR: 0.0003 +[2026-03-03 04:09:02] (step=0041466) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.113089414987282, LR: 0.0003 +[2026-03-03 04:09:09] (step=0041467) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.113285071414596, LR: 0.0003 +[2026-03-03 04:09:17] (step=0041468) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.11348072784191, LR: 0.0003 +[2026-03-03 04:09:25] (step=0041469) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.113676384269223, LR: 0.0003 +[2026-03-03 04:09:33] (step=0041470) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 8.113872040696537, LR: 0.0003 +[2026-03-03 04:09:41] (step=0041471) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 8.11406769712385, LR: 0.0003 +[2026-03-03 04:09:49] (step=0041472) Train Loss: 0.4257, Train Steps/Sec: 0.13, Epoch: 8.114263353551165, LR: 0.0003 +[2026-03-03 04:09:57] (step=0041473) Train Loss: 0.4261, Train Steps/Sec: 0.13, Epoch: 8.114459009978479, LR: 0.0003 +[2026-03-03 04:10:04] (step=0041474) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 8.11465466640579, LR: 0.0003 +[2026-03-03 04:10:12] (step=0041475) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.114850322833105, LR: 0.0003 +[2026-03-03 04:10:20] (step=0041476) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.115045979260419, LR: 0.0003 +[2026-03-03 04:10:28] (step=0041477) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.115241635687733, LR: 0.0003 +[2026-03-03 04:10:36] (step=0041478) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.115437292115047, LR: 0.0003 +[2026-03-03 04:10:44] (step=0041479) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.115632948542359, LR: 0.0003 +[2026-03-03 04:10:52] (step=0041480) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.115828604969673, LR: 0.0003 +[2026-03-03 04:10:59] (step=0041481) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.116024261396987, LR: 0.0003 +[2026-03-03 04:11:07] (step=0041482) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.116219917824301, LR: 0.0003 +[2026-03-03 04:11:15] (step=0041483) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.116415574251613, LR: 0.0003 +[2026-03-03 04:11:23] (step=0041484) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.116611230678927, LR: 0.0003 +[2026-03-03 04:11:31] (step=0041485) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.116806887106241, LR: 0.0003 +[2026-03-03 04:11:39] (step=0041486) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.117002543533555, LR: 0.0003 +[2026-03-03 04:11:47] (step=0041487) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.11719819996087, LR: 0.0003 +[2026-03-03 04:11:55] (step=0041488) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 8.117393856388182, LR: 0.0003 +[2026-03-03 04:12:02] (step=0041489) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.117589512815496, LR: 0.0003 +[2026-03-03 04:12:10] (step=0041490) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 8.11778516924281, LR: 0.0003 +[2026-03-03 04:12:18] (step=0041491) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 8.117980825670124, LR: 0.0003 +[2026-03-03 04:12:26] (step=0041492) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.118176482097438, LR: 0.0003 +[2026-03-03 04:12:34] (step=0041493) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.11837213852475, LR: 0.0003 +[2026-03-03 04:12:42] (step=0041494) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.118567794952064, LR: 0.0003 +[2026-03-03 04:12:50] (step=0041495) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 8.118763451379378, LR: 0.0003 +[2026-03-03 04:12:57] (step=0041496) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 8.118959107806692, LR: 0.0003 +[2026-03-03 04:13:05] (step=0041497) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.119154764234006, LR: 0.0003 +[2026-03-03 04:13:13] (step=0041498) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 8.119350420661318, LR: 0.0003 +[2026-03-03 04:13:21] (step=0041499) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.119546077088632, LR: 0.0003 +[2026-03-03 04:13:29] (step=0041500) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.119741733515946, LR: 0.0003 +[2026-03-03 04:13:29] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0041500/ +[2026-03-03 04:13:37] (step=0041501) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.11993738994326, LR: 0.0003 +[2026-03-03 04:13:45] (step=0041502) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 8.120133046370574, LR: 0.0003 +[2026-03-03 04:13:52] (step=0041503) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.120328702797886, LR: 0.0003 +[2026-03-03 04:14:00] (step=0041504) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.1205243592252, LR: 0.0003 +[2026-03-03 04:14:08] (step=0041505) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.120720015652514, LR: 0.0003 +[2026-03-03 04:14:16] (step=0041506) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.120915672079828, LR: 0.0003 +[2026-03-03 04:14:24] (step=0041507) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 8.121111328507142, LR: 0.0003 +[2026-03-03 04:14:32] (step=0041508) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.121306984934455, LR: 0.0003 +[2026-03-03 04:14:39] (step=0041509) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.121502641361769, LR: 0.0003 +[2026-03-03 04:14:47] (step=0041510) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.121698297789083, LR: 0.0003 +[2026-03-03 04:14:55] (step=0041511) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.121893954216397, LR: 0.0003 +[2026-03-03 04:15:03] (step=0041512) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.122089610643709, LR: 0.0003 +[2026-03-03 04:15:11] (step=0041513) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.122285267071023, LR: 0.0003 +[2026-03-03 04:15:19] (step=0041514) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.122480923498337, LR: 0.0003 +[2026-03-03 04:15:27] (step=0041515) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.12267657992565, LR: 0.0003 +[2026-03-03 04:15:35] (step=0041516) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.122872236352965, LR: 0.0003 +[2026-03-03 04:15:42] (step=0041517) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.123067892780277, LR: 0.0003 +[2026-03-03 04:15:50] (step=0041518) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.123263549207591, LR: 0.0003 +[2026-03-03 04:15:58] (step=0041519) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.123459205634905, LR: 0.0003 +[2026-03-03 04:16:06] (step=0041520) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.123654862062219, LR: 0.0003 +[2026-03-03 04:16:14] (step=0041521) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.123850518489533, LR: 0.0003 +[2026-03-03 04:16:22] (step=0041522) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.124046174916845, LR: 0.0003 +[2026-03-03 04:16:29] (step=0041523) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.12424183134416, LR: 0.0003 +[2026-03-03 04:16:37] (step=0041524) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.124437487771473, LR: 0.0003 +[2026-03-03 04:16:45] (step=0041525) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.124633144198787, LR: 0.0003 +[2026-03-03 04:16:53] (step=0041526) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.124828800626101, LR: 0.0003 +[2026-03-03 04:17:01] (step=0041527) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.125024457053414, LR: 0.0003 +[2026-03-03 04:17:09] (step=0041528) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.125220113480728, LR: 0.0003 +[2026-03-03 04:17:17] (step=0041529) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.125415769908042, LR: 0.0003 +[2026-03-03 04:17:24] (step=0041530) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 8.125611426335356, LR: 0.0003 +[2026-03-03 04:17:32] (step=0041531) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.12580708276267, LR: 0.0003 +[2026-03-03 04:17:40] (step=0041532) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.126002739189982, LR: 0.0003 +[2026-03-03 04:17:48] (step=0041533) Train Loss: 0.4468, Train Steps/Sec: 0.12, Epoch: 8.126198395617296, LR: 0.0003 +[2026-03-03 04:17:56] (step=0041534) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.12639405204461, LR: 0.0003 +[2026-03-03 04:18:04] (step=0041535) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.126589708471924, LR: 0.0003 +[2026-03-03 04:18:12] (step=0041536) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.126785364899236, LR: 0.0003 +[2026-03-03 04:18:20] (step=0041537) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 8.12698102132655, LR: 0.0003 +[2026-03-03 04:18:28] (step=0041538) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.127176677753864, LR: 0.0003 +[2026-03-03 04:18:35] (step=0041539) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.127372334181178, LR: 0.0003 +[2026-03-03 04:18:43] (step=0041540) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.127567990608492, LR: 0.0003 +[2026-03-03 04:18:51] (step=0041541) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.127763647035804, LR: 0.0003 +[2026-03-03 04:18:59] (step=0041542) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.127959303463118, LR: 0.0003 +[2026-03-03 04:19:07] (step=0041543) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.128154959890432, LR: 0.0003 +[2026-03-03 04:19:15] (step=0041544) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.128350616317746, LR: 0.0003 +[2026-03-03 04:19:22] (step=0041545) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 8.12854627274506, LR: 0.0003 +[2026-03-03 04:19:30] (step=0041546) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 8.128741929172373, LR: 0.0003 +[2026-03-03 04:19:38] (step=0041547) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 8.128937585599687, LR: 0.0003 +[2026-03-03 04:19:46] (step=0041548) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.129133242027, LR: 0.0003 +[2026-03-03 04:19:54] (step=0041549) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 8.129328898454315, LR: 0.0003 +[2026-03-03 04:20:02] (step=0041550) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.129524554881629, LR: 0.0003 +[2026-03-03 04:20:09] (step=0041551) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 8.12972021130894, LR: 0.0003 +[2026-03-03 04:20:17] (step=0041552) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 8.129915867736255, LR: 0.0003 +[2026-03-03 04:20:25] (step=0041553) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 8.130111524163569, LR: 0.0003 +[2026-03-03 04:20:33] (step=0041554) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 8.130307180590883, LR: 0.0003 +[2026-03-03 04:20:41] (step=0041555) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.130502837018197, LR: 0.0003 +[2026-03-03 04:20:49] (step=0041556) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 8.130698493445509, LR: 0.0003 +[2026-03-03 04:20:57] (step=0041557) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 8.130894149872823, LR: 0.0003 +[2026-03-03 04:21:04] (step=0041558) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.131089806300137, LR: 0.0003 +[2026-03-03 04:21:12] (step=0041559) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.131285462727451, LR: 0.0003 +[2026-03-03 04:21:20] (step=0041560) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.131481119154763, LR: 0.0003 +[2026-03-03 04:21:28] (step=0041561) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.131676775582077, LR: 0.0003 +[2026-03-03 04:21:36] (step=0041562) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.131872432009391, LR: 0.0003 +[2026-03-03 04:21:44] (step=0041563) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.132068088436705, LR: 0.0003 +[2026-03-03 04:21:52] (step=0041564) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 8.13226374486402, LR: 0.0003 +[2026-03-03 04:21:59] (step=0041565) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.132459401291332, LR: 0.0003 +[2026-03-03 04:22:07] (step=0041566) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.132655057718646, LR: 0.0003 +[2026-03-03 04:22:15] (step=0041567) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.13285071414596, LR: 0.0003 +[2026-03-03 04:22:23] (step=0041568) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.133046370573274, LR: 0.0003 +[2026-03-03 04:22:31] (step=0041569) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.133242027000588, LR: 0.0003 +[2026-03-03 04:22:39] (step=0041570) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.1334376834279, LR: 0.0003 +[2026-03-03 04:22:46] (step=0041571) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 8.133633339855214, LR: 0.0003 +[2026-03-03 04:22:54] (step=0041572) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.133828996282528, LR: 0.0003 +[2026-03-03 04:23:02] (step=0041573) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.134024652709842, LR: 0.0003 +[2026-03-03 04:23:10] (step=0041574) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.134220309137156, LR: 0.0003 +[2026-03-03 04:23:18] (step=0041575) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.134415965564468, LR: 0.0003 +[2026-03-03 04:23:26] (step=0041576) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.134611621991782, LR: 0.0003 +[2026-03-03 04:23:34] (step=0041577) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.134807278419096, LR: 0.0003 +[2026-03-03 04:23:41] (step=0041578) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.13500293484641, LR: 0.0003 +[2026-03-03 04:23:49] (step=0041579) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.135198591273724, LR: 0.0003 +[2026-03-03 04:23:57] (step=0041580) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 8.135394247701036, LR: 0.0003 +[2026-03-03 04:24:05] (step=0041581) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.13558990412835, LR: 0.0003 +[2026-03-03 04:24:13] (step=0041582) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.135785560555664, LR: 0.0003 +[2026-03-03 04:24:21] (step=0041583) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.135981216982978, LR: 0.0003 +[2026-03-03 04:24:29] (step=0041584) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.136176873410292, LR: 0.0003 +[2026-03-03 04:24:37] (step=0041585) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.136372529837605, LR: 0.0003 +[2026-03-03 04:24:44] (step=0041586) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 8.136568186264919, LR: 0.0003 +[2026-03-03 04:24:52] (step=0041587) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 8.136763842692233, LR: 0.0003 +[2026-03-03 04:25:00] (step=0041588) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.136959499119547, LR: 0.0003 +[2026-03-03 04:25:08] (step=0041589) Train Loss: 0.4269, Train Steps/Sec: 0.13, Epoch: 8.137155155546859, LR: 0.0003 +[2026-03-03 04:25:16] (step=0041590) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.137350811974173, LR: 0.0003 +[2026-03-03 04:25:24] (step=0041591) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.137546468401487, LR: 0.0003 +[2026-03-03 04:25:32] (step=0041592) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.1377421248288, LR: 0.0003 +[2026-03-03 04:25:39] (step=0041593) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.137937781256115, LR: 0.0003 +[2026-03-03 04:25:47] (step=0041594) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.138133437683427, LR: 0.0003 +[2026-03-03 04:25:55] (step=0041595) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.138329094110741, LR: 0.0003 +[2026-03-03 04:26:03] (step=0041596) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.138524750538055, LR: 0.0003 +[2026-03-03 04:26:11] (step=0041597) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 8.13872040696537, LR: 0.0003 +[2026-03-03 04:26:19] (step=0041598) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.138916063392683, LR: 0.0003 +[2026-03-03 04:26:27] (step=0041599) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.139111719819995, LR: 0.0003 +[2026-03-03 04:26:34] (step=0041600) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 8.13930737624731, LR: 0.0003 +[2026-03-03 04:26:42] (step=0041601) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.139503032674623, LR: 0.0003 +[2026-03-03 04:26:50] (step=0041602) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.139698689101937, LR: 0.0003 +[2026-03-03 04:26:58] (step=0041603) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.139894345529251, LR: 0.0003 +[2026-03-03 04:27:06] (step=0041604) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 8.140090001956564, LR: 0.0003 +[2026-03-03 04:27:14] (step=0041605) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.140285658383878, LR: 0.0003 +[2026-03-03 04:27:22] (step=0041606) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.140481314811192, LR: 0.0003 +[2026-03-03 04:27:29] (step=0041607) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.140676971238506, LR: 0.0003 +[2026-03-03 04:27:37] (step=0041608) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 8.14087262766582, LR: 0.0003 +[2026-03-03 04:27:45] (step=0041609) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 8.141068284093132, LR: 0.0003 +[2026-03-03 04:27:53] (step=0041610) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.141263940520446, LR: 0.0003 +[2026-03-03 04:28:01] (step=0041611) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.14145959694776, LR: 0.0003 +[2026-03-03 04:28:09] (step=0041612) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.141655253375074, LR: 0.0003 +[2026-03-03 04:28:16] (step=0041613) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 8.141850909802386, LR: 0.0003 +[2026-03-03 04:28:24] (step=0041614) Train Loss: 0.4269, Train Steps/Sec: 0.13, Epoch: 8.1420465662297, LR: 0.0003 +[2026-03-03 04:28:32] (step=0041615) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 8.142242222657014, LR: 0.0003 +[2026-03-03 04:28:40] (step=0041616) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 8.142437879084328, LR: 0.0003 +[2026-03-03 04:28:48] (step=0041617) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.142633535511642, LR: 0.0003 +[2026-03-03 04:28:56] (step=0041618) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.142829191938954, LR: 0.0003 +[2026-03-03 04:29:04] (step=0041619) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.143024848366268, LR: 0.0003 +[2026-03-03 04:29:11] (step=0041620) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.143220504793582, LR: 0.0003 +[2026-03-03 04:29:19] (step=0041621) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.143416161220896, LR: 0.0003 +[2026-03-03 04:29:27] (step=0041622) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.14361181764821, LR: 0.0003 +[2026-03-03 04:29:35] (step=0041623) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.143807474075523, LR: 0.0003 +[2026-03-03 04:29:43] (step=0041624) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.144003130502837, LR: 0.0003 +[2026-03-03 04:29:51] (step=0041625) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.14419878693015, LR: 0.0003 +[2026-03-03 04:29:58] (step=0041626) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.144394443357465, LR: 0.0003 +[2026-03-03 04:30:06] (step=0041627) Train Loss: 0.4423, Train Steps/Sec: 0.12, Epoch: 8.144590099784779, LR: 0.0003 +[2026-03-03 04:30:14] (step=0041628) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.14478575621209, LR: 0.0003 +[2026-03-03 04:30:22] (step=0041629) Train Loss: 0.4521, Train Steps/Sec: 0.12, Epoch: 8.144981412639405, LR: 0.0003 +[2026-03-03 04:30:30] (step=0041630) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.145177069066719, LR: 0.0003 +[2026-03-03 04:30:38] (step=0041631) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.145372725494033, LR: 0.0003 +[2026-03-03 04:30:46] (step=0041632) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.145568381921347, LR: 0.0003 +[2026-03-03 04:30:54] (step=0041633) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 8.14576403834866, LR: 0.0003 +[2026-03-03 04:31:02] (step=0041634) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.145959694775973, LR: 0.0003 +[2026-03-03 04:31:09] (step=0041635) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.146155351203287, LR: 0.0003 +[2026-03-03 04:31:17] (step=0041636) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.146351007630601, LR: 0.0003 +[2026-03-03 04:31:25] (step=0041637) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.146546664057915, LR: 0.0003 +[2026-03-03 04:31:33] (step=0041638) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.146742320485227, LR: 0.0003 +[2026-03-03 04:31:41] (step=0041639) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 8.146937976912541, LR: 0.0003 +[2026-03-03 04:31:49] (step=0041640) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.147133633339855, LR: 0.0003 +[2026-03-03 04:31:56] (step=0041641) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 8.14732928976717, LR: 0.0003 +[2026-03-03 04:32:04] (step=0041642) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.147524946194482, LR: 0.0003 +[2026-03-03 04:32:12] (step=0041643) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.147720602621796, LR: 0.0003 +[2026-03-03 04:32:20] (step=0041644) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 8.14791625904911, LR: 0.0003 +[2026-03-03 04:32:28] (step=0041645) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.148111915476424, LR: 0.0003 +[2026-03-03 04:32:36] (step=0041646) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.148307571903738, LR: 0.0003 +[2026-03-03 04:32:44] (step=0041647) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 8.14850322833105, LR: 0.0003 +[2026-03-03 04:32:51] (step=0041648) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.148698884758364, LR: 0.0003 +[2026-03-03 04:32:59] (step=0041649) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.148894541185678, LR: 0.0003 +[2026-03-03 04:33:07] (step=0041650) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.149090197612992, LR: 0.0003 +[2026-03-03 04:33:15] (step=0041651) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 8.149285854040306, LR: 0.0003 +[2026-03-03 04:33:23] (step=0041652) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 8.149481510467618, LR: 0.0003 +[2026-03-03 04:33:31] (step=0041653) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.149677166894932, LR: 0.0003 +[2026-03-03 04:33:38] (step=0041654) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.149872823322246, LR: 0.0003 +[2026-03-03 04:33:46] (step=0041655) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.15006847974956, LR: 0.0003 +[2026-03-03 04:33:54] (step=0041656) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.150264136176874, LR: 0.0003 +[2026-03-03 04:34:02] (step=0041657) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.150459792604186, LR: 0.0003 +[2026-03-03 04:34:10] (step=0041658) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 8.1506554490315, LR: 0.0003 +[2026-03-03 04:34:18] (step=0041659) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.150851105458814, LR: 0.0003 +[2026-03-03 04:34:26] (step=0041660) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.151046761886128, LR: 0.0003 +[2026-03-03 04:34:33] (step=0041661) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.151242418313442, LR: 0.0003 +[2026-03-03 04:34:41] (step=0041662) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 8.151438074740755, LR: 0.0003 +[2026-03-03 04:34:49] (step=0041663) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.151633731168069, LR: 0.0003 +[2026-03-03 04:34:57] (step=0041664) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.151829387595383, LR: 0.0003 +[2026-03-03 04:35:05] (step=0041665) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.152025044022697, LR: 0.0003 +[2026-03-03 04:35:13] (step=0041666) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 8.152220700450009, LR: 0.0003 +[2026-03-03 04:35:21] (step=0041667) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 8.152416356877323, LR: 0.0003 +[2026-03-03 04:35:28] (step=0041668) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.152612013304637, LR: 0.0003 +[2026-03-03 04:35:36] (step=0041669) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.152807669731951, LR: 0.0003 +[2026-03-03 04:35:44] (step=0041670) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.153003326159265, LR: 0.0003 +[2026-03-03 04:35:52] (step=0041671) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 8.153198982586577, LR: 0.0003 +[2026-03-03 04:36:00] (step=0041672) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.153394639013891, LR: 0.0003 +[2026-03-03 04:36:08] (step=0041673) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.153590295441205, LR: 0.0003 +[2026-03-03 04:36:15] (step=0041674) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.15378595186852, LR: 0.0003 +[2026-03-03 04:36:23] (step=0041675) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.153981608295833, LR: 0.0003 +[2026-03-03 04:36:31] (step=0041676) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.154177264723145, LR: 0.0003 +[2026-03-03 04:36:39] (step=0041677) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.15437292115046, LR: 0.0003 +[2026-03-03 04:36:47] (step=0041678) Train Loss: 0.4529, Train Steps/Sec: 0.12, Epoch: 8.154568577577773, LR: 0.0003 +[2026-03-03 04:36:55] (step=0041679) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.154764234005087, LR: 0.0003 +[2026-03-03 04:37:03] (step=0041680) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.154959890432401, LR: 0.0003 +[2026-03-03 04:37:11] (step=0041681) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 8.155155546859714, LR: 0.0003 +[2026-03-03 04:37:18] (step=0041682) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.155351203287028, LR: 0.0003 +[2026-03-03 04:37:26] (step=0041683) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.155546859714342, LR: 0.0003 +[2026-03-03 04:37:34] (step=0041684) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.155742516141656, LR: 0.0003 +[2026-03-03 04:37:42] (step=0041685) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 8.15593817256897, LR: 0.0003 +[2026-03-03 04:37:50] (step=0041686) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.156133828996282, LR: 0.0003 +[2026-03-03 04:37:58] (step=0041687) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.156329485423596, LR: 0.0003 +[2026-03-03 04:38:05] (step=0041688) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.15652514185091, LR: 0.0003 +[2026-03-03 04:38:13] (step=0041689) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.156720798278224, LR: 0.0003 +[2026-03-03 04:38:21] (step=0041690) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.156916454705538, LR: 0.0003 +[2026-03-03 04:38:29] (step=0041691) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.15711211113285, LR: 0.0003 +[2026-03-03 04:38:37] (step=0041692) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.157307767560164, LR: 0.0003 +[2026-03-03 04:38:45] (step=0041693) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.157503423987478, LR: 0.0003 +[2026-03-03 04:38:53] (step=0041694) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 8.157699080414792, LR: 0.0003 +[2026-03-03 04:39:01] (step=0041695) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.157894736842104, LR: 0.0003 +[2026-03-03 04:39:08] (step=0041696) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.158090393269418, LR: 0.0003 +[2026-03-03 04:39:16] (step=0041697) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.158286049696732, LR: 0.0003 +[2026-03-03 04:39:24] (step=0041698) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.158481706124046, LR: 0.0003 +[2026-03-03 04:39:32] (step=0041699) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 8.15867736255136, LR: 0.0003 +[2026-03-03 04:39:40] (step=0041700) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.158873018978673, LR: 0.0003 +[2026-03-03 04:39:48] (step=0041701) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.159068675405987, LR: 0.0003 +[2026-03-03 04:39:55] (step=0041702) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.1592643318333, LR: 0.0003 +[2026-03-03 04:40:03] (step=0041703) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.159459988260615, LR: 0.0003 +[2026-03-03 04:40:11] (step=0041704) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.159655644687929, LR: 0.0003 +[2026-03-03 04:40:19] (step=0041705) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.159851301115241, LR: 0.0003 +[2026-03-03 04:40:27] (step=0041706) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.160046957542555, LR: 0.0003 +[2026-03-03 04:40:35] (step=0041707) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.160242613969869, LR: 0.0003 +[2026-03-03 04:40:43] (step=0041708) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.160438270397183, LR: 0.0003 +[2026-03-03 04:40:50] (step=0041709) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.160633926824497, LR: 0.0003 +[2026-03-03 04:40:58] (step=0041710) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.16082958325181, LR: 0.0003 +[2026-03-03 04:41:06] (step=0041711) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 8.161025239679123, LR: 0.0003 +[2026-03-03 04:41:14] (step=0041712) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.161220896106437, LR: 0.0003 +[2026-03-03 04:41:22] (step=0041713) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 8.161416552533751, LR: 0.0003 +[2026-03-03 04:41:30] (step=0041714) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.161612208961065, LR: 0.0003 +[2026-03-03 04:41:37] (step=0041715) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.161807865388377, LR: 0.0003 +[2026-03-03 04:41:45] (step=0041716) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.162003521815691, LR: 0.0003 +[2026-03-03 04:41:53] (step=0041717) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.162199178243005, LR: 0.0003 +[2026-03-03 04:42:01] (step=0041718) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.16239483467032, LR: 0.0003 +[2026-03-03 04:42:09] (step=0041719) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 8.162590491097632, LR: 0.0003 +[2026-03-03 04:42:17] (step=0041720) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 8.162786147524946, LR: 0.0003 +[2026-03-03 04:42:25] (step=0041721) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 8.16298180395226, LR: 0.0003 +[2026-03-03 04:42:32] (step=0041722) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.163177460379574, LR: 0.0003 +[2026-03-03 04:42:40] (step=0041723) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.163373116806888, LR: 0.0003 +[2026-03-03 04:42:48] (step=0041724) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.1635687732342, LR: 0.0003 +[2026-03-03 04:42:56] (step=0041725) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 8.163764429661514, LR: 0.0003 +[2026-03-03 04:43:04] (step=0041726) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.163960086088828, LR: 0.0003 +[2026-03-03 04:43:12] (step=0041727) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.164155742516142, LR: 0.0003 +[2026-03-03 04:43:20] (step=0041728) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.164351398943456, LR: 0.0003 +[2026-03-03 04:43:28] (step=0041729) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.164547055370768, LR: 0.0003 +[2026-03-03 04:43:35] (step=0041730) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.164742711798082, LR: 0.0003 +[2026-03-03 04:43:43] (step=0041731) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.164938368225396, LR: 0.0003 +[2026-03-03 04:43:51] (step=0041732) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 8.16513402465271, LR: 0.0003 +[2026-03-03 04:43:59] (step=0041733) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.165329681080024, LR: 0.0003 +[2026-03-03 04:44:07] (step=0041734) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.165525337507336, LR: 0.0003 +[2026-03-03 04:44:15] (step=0041735) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.16572099393465, LR: 0.0003 +[2026-03-03 04:44:23] (step=0041736) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.165916650361964, LR: 0.0003 +[2026-03-03 04:44:31] (step=0041737) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.166112306789278, LR: 0.0003 +[2026-03-03 04:44:38] (step=0041738) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.166307963216592, LR: 0.0003 +[2026-03-03 04:44:46] (step=0041739) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.166503619643905, LR: 0.0003 +[2026-03-03 04:44:54] (step=0041740) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.166699276071219, LR: 0.0003 +[2026-03-03 04:45:02] (step=0041741) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.166894932498533, LR: 0.0003 +[2026-03-03 04:45:10] (step=0041742) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 8.167090588925847, LR: 0.0003 +[2026-03-03 04:45:18] (step=0041743) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.16728624535316, LR: 0.0003 +[2026-03-03 04:45:26] (step=0041744) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.167481901780473, LR: 0.0003 +[2026-03-03 04:45:33] (step=0041745) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.167677558207787, LR: 0.0003 +[2026-03-03 04:45:41] (step=0041746) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.167873214635101, LR: 0.0003 +[2026-03-03 04:45:49] (step=0041747) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.168068871062415, LR: 0.0003 +[2026-03-03 04:45:57] (step=0041748) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.168264527489727, LR: 0.0003 +[2026-03-03 04:46:05] (step=0041749) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 8.168460183917041, LR: 0.0003 +[2026-03-03 04:46:13] (step=0041750) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.168655840344355, LR: 0.0003 +[2026-03-03 04:46:20] (step=0041751) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.16885149677167, LR: 0.0003 +[2026-03-03 04:46:28] (step=0041752) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 8.169047153198983, LR: 0.0003 +[2026-03-03 04:46:36] (step=0041753) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.169242809626295, LR: 0.0003 +[2026-03-03 04:46:44] (step=0041754) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.16943846605361, LR: 0.0003 +[2026-03-03 04:46:52] (step=0041755) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.169634122480923, LR: 0.0003 +[2026-03-03 04:47:00] (step=0041756) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.169829778908237, LR: 0.0003 +[2026-03-03 04:47:08] (step=0041757) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.170025435335551, LR: 0.0003 +[2026-03-03 04:47:15] (step=0041758) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.170221091762864, LR: 0.0003 +[2026-03-03 04:47:23] (step=0041759) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.170416748190178, LR: 0.0003 +[2026-03-03 04:47:31] (step=0041760) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.170612404617492, LR: 0.0003 +[2026-03-03 04:47:39] (step=0041761) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.170808061044806, LR: 0.0003 +[2026-03-03 04:47:47] (step=0041762) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.17100371747212, LR: 0.0003 +[2026-03-03 04:47:55] (step=0041763) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.171199373899432, LR: 0.0003 +[2026-03-03 04:48:03] (step=0041764) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.171395030326746, LR: 0.0003 +[2026-03-03 04:48:10] (step=0041765) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 8.17159068675406, LR: 0.0003 +[2026-03-03 04:48:18] (step=0041766) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.171786343181374, LR: 0.0003 +[2026-03-03 04:48:26] (step=0041767) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 8.171981999608688, LR: 0.0003 +[2026-03-03 04:48:34] (step=0041768) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.172177656036, LR: 0.0003 +[2026-03-03 04:48:42] (step=0041769) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.172373312463314, LR: 0.0003 +[2026-03-03 04:48:50] (step=0041770) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 8.172568968890628, LR: 0.0003 +[2026-03-03 04:48:58] (step=0041771) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.172764625317942, LR: 0.0003 +[2026-03-03 04:49:05] (step=0041772) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.172960281745254, LR: 0.0003 +[2026-03-03 04:49:13] (step=0041773) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.173155938172568, LR: 0.0003 +[2026-03-03 04:49:21] (step=0041774) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.173351594599882, LR: 0.0003 +[2026-03-03 04:49:29] (step=0041775) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.173547251027196, LR: 0.0003 +[2026-03-03 04:49:37] (step=0041776) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.17374290745451, LR: 0.0003 +[2026-03-03 04:49:45] (step=0041777) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 8.173938563881823, LR: 0.0003 +[2026-03-03 04:49:53] (step=0041778) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 8.174134220309137, LR: 0.0003 +[2026-03-03 04:50:01] (step=0041779) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 8.17432987673645, LR: 0.0003 +[2026-03-03 04:50:09] (step=0041780) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.174525533163765, LR: 0.0003 +[2026-03-03 04:50:16] (step=0041781) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.174721189591079, LR: 0.0003 +[2026-03-03 04:50:24] (step=0041782) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.174916846018391, LR: 0.0003 +[2026-03-03 04:50:32] (step=0041783) Train Loss: 0.4381, Train Steps/Sec: 0.12, Epoch: 8.175112502445705, LR: 0.0003 +[2026-03-03 04:50:40] (step=0041784) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 8.175308158873019, LR: 0.0003 +[2026-03-03 04:50:48] (step=0041785) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.175503815300333, LR: 0.0003 +[2026-03-03 04:50:56] (step=0041786) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.175699471727647, LR: 0.0003 +[2026-03-03 04:51:04] (step=0041787) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.17589512815496, LR: 0.0003 +[2026-03-03 04:51:12] (step=0041788) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.176090784582273, LR: 0.0003 +[2026-03-03 04:51:20] (step=0041789) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.176286441009587, LR: 0.0003 +[2026-03-03 04:51:28] (step=0041790) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.176482097436901, LR: 0.0003 +[2026-03-03 04:51:35] (step=0041791) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.176677753864215, LR: 0.0003 +[2026-03-03 04:51:43] (step=0041792) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.176873410291527, LR: 0.0003 +[2026-03-03 04:51:51] (step=0041793) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.177069066718841, LR: 0.0003 +[2026-03-03 04:51:59] (step=0041794) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.177264723146155, LR: 0.0003 +[2026-03-03 04:52:07] (step=0041795) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.17746037957347, LR: 0.0003 +[2026-03-03 04:52:15] (step=0041796) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 8.177656036000784, LR: 0.0003 +[2026-03-03 04:52:23] (step=0041797) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.177851692428096, LR: 0.0003 +[2026-03-03 04:52:30] (step=0041798) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 8.17804734885541, LR: 0.0003 +[2026-03-03 04:52:38] (step=0041799) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 8.178243005282724, LR: 0.0003 +[2026-03-03 04:52:46] (step=0041800) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.178438661710038, LR: 0.0003 +[2026-03-03 04:52:54] (step=0041801) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 8.17863431813735, LR: 0.0003 +[2026-03-03 04:53:02] (step=0041802) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.178829974564664, LR: 0.0003 +[2026-03-03 04:53:10] (step=0041803) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.179025630991978, LR: 0.0003 +[2026-03-03 04:53:18] (step=0041804) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.179221287419292, LR: 0.0003 +[2026-03-03 04:53:26] (step=0041805) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 8.179416943846606, LR: 0.0003 +[2026-03-03 04:53:33] (step=0041806) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.179612600273918, LR: 0.0003 +[2026-03-03 04:53:41] (step=0041807) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.179808256701232, LR: 0.0003 +[2026-03-03 04:53:49] (step=0041808) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.180003913128546, LR: 0.0003 +[2026-03-03 04:53:57] (step=0041809) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.18019956955586, LR: 0.0003 +[2026-03-03 04:54:05] (step=0041810) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 8.180395225983174, LR: 0.0003 +[2026-03-03 04:54:13] (step=0041811) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.180590882410486, LR: 0.0003 +[2026-03-03 04:54:21] (step=0041812) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.1807865388378, LR: 0.0003 +[2026-03-03 04:54:28] (step=0041813) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.180982195265114, LR: 0.0003 +[2026-03-03 04:54:36] (step=0041814) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 8.181177851692429, LR: 0.0003 +[2026-03-03 04:54:44] (step=0041815) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.181373508119743, LR: 0.0003 +[2026-03-03 04:54:52] (step=0041816) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.181569164547055, LR: 0.0003 +[2026-03-03 04:55:00] (step=0041817) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.181764820974369, LR: 0.0003 +[2026-03-03 04:55:08] (step=0041818) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.181960477401683, LR: 0.0003 +[2026-03-03 04:55:15] (step=0041819) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.182156133828997, LR: 0.0003 +[2026-03-03 04:55:23] (step=0041820) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.18235179025631, LR: 0.0003 +[2026-03-03 04:55:31] (step=0041821) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.182547446683623, LR: 0.0003 +[2026-03-03 04:55:39] (step=0041822) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.182743103110937, LR: 0.0003 +[2026-03-03 04:55:47] (step=0041823) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.182938759538251, LR: 0.0003 +[2026-03-03 04:55:55] (step=0041824) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.183134415965565, LR: 0.0003 +[2026-03-03 04:56:03] (step=0041825) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 8.183330072392877, LR: 0.0003 +[2026-03-03 04:56:11] (step=0041826) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.183525728820191, LR: 0.0003 +[2026-03-03 04:56:18] (step=0041827) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.183721385247505, LR: 0.0003 +[2026-03-03 04:56:26] (step=0041828) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.18391704167482, LR: 0.0003 +[2026-03-03 04:56:34] (step=0041829) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.184112698102133, LR: 0.0003 +[2026-03-03 04:56:42] (step=0041830) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.184308354529445, LR: 0.0003 +[2026-03-03 04:56:50] (step=0041831) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.18450401095676, LR: 0.0003 +[2026-03-03 04:56:58] (step=0041832) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.184699667384074, LR: 0.0003 +[2026-03-03 04:57:06] (step=0041833) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.184895323811388, LR: 0.0003 +[2026-03-03 04:57:14] (step=0041834) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 8.185090980238702, LR: 0.0003 +[2026-03-03 04:57:21] (step=0041835) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 8.185286636666014, LR: 0.0003 +[2026-03-03 04:57:29] (step=0041836) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.185482293093328, LR: 0.0003 +[2026-03-03 04:57:37] (step=0041837) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 8.185677949520642, LR: 0.0003 +[2026-03-03 04:57:45] (step=0041838) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.185873605947956, LR: 0.0003 +[2026-03-03 04:57:53] (step=0041839) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.18606926237527, LR: 0.0003 +[2026-03-03 04:58:01] (step=0041840) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 8.186264918802582, LR: 0.0003 +[2026-03-03 04:58:09] (step=0041841) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.186460575229896, LR: 0.0003 +[2026-03-03 04:58:16] (step=0041842) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 8.18665623165721, LR: 0.0003 +[2026-03-03 04:58:24] (step=0041843) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 8.186851888084524, LR: 0.0003 +[2026-03-03 04:58:32] (step=0041844) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 8.187047544511838, LR: 0.0003 +[2026-03-03 04:58:40] (step=0041845) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.18724320093915, LR: 0.0003 +[2026-03-03 04:58:48] (step=0041846) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 8.187438857366464, LR: 0.0003 +[2026-03-03 04:58:56] (step=0041847) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.187634513793778, LR: 0.0003 +[2026-03-03 04:59:04] (step=0041848) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.187830170221092, LR: 0.0003 +[2026-03-03 04:59:11] (step=0041849) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.188025826648406, LR: 0.0003 +[2026-03-03 04:59:19] (step=0041850) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.188221483075719, LR: 0.0003 +[2026-03-03 04:59:27] (step=0041851) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.188417139503033, LR: 0.0003 +[2026-03-03 04:59:35] (step=0041852) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 8.188612795930347, LR: 0.0003 +[2026-03-03 04:59:43] (step=0041853) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 8.18880845235766, LR: 0.0003 +[2026-03-03 04:59:51] (step=0041854) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.189004108784973, LR: 0.0003 +[2026-03-03 04:59:59] (step=0041855) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.189199765212287, LR: 0.0003 +[2026-03-03 05:00:06] (step=0041856) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 8.1893954216396, LR: 0.0003 +[2026-03-03 05:00:14] (step=0041857) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.189591078066915, LR: 0.0003 +[2026-03-03 05:00:22] (step=0041858) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.189786734494229, LR: 0.0003 +[2026-03-03 05:00:30] (step=0041859) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.189982390921541, LR: 0.0003 +[2026-03-03 05:00:38] (step=0041860) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.190178047348855, LR: 0.0003 +[2026-03-03 05:00:46] (step=0041861) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 8.190373703776169, LR: 0.0003 +[2026-03-03 05:00:54] (step=0041862) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 8.190569360203483, LR: 0.0003 +[2026-03-03 05:01:01] (step=0041863) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.190765016630797, LR: 0.0003 +[2026-03-03 05:01:09] (step=0041864) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 8.19096067305811, LR: 0.0003 +[2026-03-03 05:01:17] (step=0041865) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.191156329485423, LR: 0.0003 +[2026-03-03 05:01:25] (step=0041866) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.191351985912737, LR: 0.0003 +[2026-03-03 05:01:33] (step=0041867) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.191547642340051, LR: 0.0003 +[2026-03-03 05:01:41] (step=0041868) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 8.191743298767365, LR: 0.0003 +[2026-03-03 05:01:49] (step=0041869) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.191938955194678, LR: 0.0003 +[2026-03-03 05:01:57] (step=0041870) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.192134611621992, LR: 0.0003 +[2026-03-03 05:02:04] (step=0041871) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.192330268049306, LR: 0.0003 +[2026-03-03 05:02:12] (step=0041872) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 8.19252592447662, LR: 0.0003 +[2026-03-03 05:02:20] (step=0041873) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 8.192721580903934, LR: 0.0003 +[2026-03-03 05:02:28] (step=0041874) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 8.192917237331246, LR: 0.0003 +[2026-03-03 05:02:36] (step=0041875) Train Loss: 0.4397, Train Steps/Sec: 0.12, Epoch: 8.19311289375856, LR: 0.0003 +[2026-03-03 05:02:44] (step=0041876) Train Loss: 0.4766, Train Steps/Sec: 0.13, Epoch: 8.193308550185874, LR: 0.0003 +[2026-03-03 05:02:52] (step=0041877) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.193504206613188, LR: 0.0003 +[2026-03-03 05:03:00] (step=0041878) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.1936998630405, LR: 0.0003 +[2026-03-03 05:03:07] (step=0041879) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.193895519467814, LR: 0.0003 +[2026-03-03 05:03:15] (step=0041880) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 8.194091175895128, LR: 0.0003 +[2026-03-03 05:03:23] (step=0041881) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.194286832322442, LR: 0.0003 +[2026-03-03 05:03:31] (step=0041882) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.194482488749756, LR: 0.0003 +[2026-03-03 05:03:39] (step=0041883) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 8.194678145177068, LR: 0.0003 +[2026-03-03 05:03:47] (step=0041884) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.194873801604382, LR: 0.0003 +[2026-03-03 05:03:55] (step=0041885) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.195069458031696, LR: 0.0003 +[2026-03-03 05:04:03] (step=0041886) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.19526511445901, LR: 0.0003 +[2026-03-03 05:04:10] (step=0041887) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.195460770886324, LR: 0.0003 +[2026-03-03 05:04:18] (step=0041888) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.195656427313637, LR: 0.0003 +[2026-03-03 05:04:26] (step=0041889) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 8.19585208374095, LR: 0.0003 +[2026-03-03 05:04:34] (step=0041890) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 8.196047740168265, LR: 0.0003 +[2026-03-03 05:04:42] (step=0041891) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.196243396595579, LR: 0.0003 +[2026-03-03 05:04:50] (step=0041892) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 8.196439053022893, LR: 0.0003 +[2026-03-03 05:04:58] (step=0041893) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 8.196634709450205, LR: 0.0003 +[2026-03-03 05:05:05] (step=0041894) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.196830365877519, LR: 0.0003 +[2026-03-03 05:05:13] (step=0041895) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.197026022304833, LR: 0.0003 +[2026-03-03 05:05:21] (step=0041896) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.197221678732147, LR: 0.0003 +[2026-03-03 05:05:29] (step=0041897) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 8.19741733515946, LR: 0.0003 +[2026-03-03 05:05:37] (step=0041898) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.197612991586773, LR: 0.0003 +[2026-03-03 05:05:45] (step=0041899) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 8.197808648014087, LR: 0.0003 +[2026-03-03 05:05:53] (step=0041900) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.198004304441401, LR: 0.0003 +[2026-03-03 05:06:00] (step=0041901) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.198199960868715, LR: 0.0003 +[2026-03-03 05:06:08] (step=0041902) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 8.198395617296029, LR: 0.0003 +[2026-03-03 05:06:16] (step=0041903) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.198591273723341, LR: 0.0003 +[2026-03-03 05:06:24] (step=0041904) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.198786930150655, LR: 0.0003 +[2026-03-03 05:06:32] (step=0041905) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.19898258657797, LR: 0.0003 +[2026-03-03 05:06:40] (step=0041906) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 8.199178243005283, LR: 0.0003 +[2026-03-03 05:06:48] (step=0041907) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.199373899432596, LR: 0.0003 +[2026-03-03 05:06:55] (step=0041908) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 8.19956955585991, LR: 0.0003 +[2026-03-03 05:07:03] (step=0041909) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.199765212287224, LR: 0.0003 +[2026-03-03 05:07:11] (step=0041910) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 8.199960868714538, LR: 0.0003 +[2026-03-03 05:07:19] (step=0041911) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.200156525141852, LR: 0.0003 +[2026-03-03 05:07:27] (step=0041912) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.200352181569164, LR: 0.0003 +[2026-03-03 05:07:35] (step=0041913) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.200547837996478, LR: 0.0003 +[2026-03-03 05:07:43] (step=0041914) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.200743494423792, LR: 0.0003 +[2026-03-03 05:07:50] (step=0041915) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.200939150851106, LR: 0.0003 +[2026-03-03 05:07:58] (step=0041916) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.20113480727842, LR: 0.0003 +[2026-03-03 05:08:06] (step=0041917) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.201330463705732, LR: 0.0003 +[2026-03-03 05:08:14] (step=0041918) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.201526120133046, LR: 0.0003 +[2026-03-03 05:08:22] (step=0041919) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.20172177656036, LR: 0.0003 +[2026-03-03 05:08:30] (step=0041920) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.201917432987674, LR: 0.0003 +[2026-03-03 05:08:38] (step=0041921) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 8.202113089414988, LR: 0.0003 +[2026-03-03 05:08:45] (step=0041922) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.2023087458423, LR: 0.0003 +[2026-03-03 05:08:53] (step=0041923) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 8.202504402269614, LR: 0.0003 +[2026-03-03 05:09:01] (step=0041924) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.202700058696928, LR: 0.0003 +[2026-03-03 05:09:09] (step=0041925) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 8.202895715124242, LR: 0.0003 +[2026-03-03 05:09:17] (step=0041926) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.203091371551556, LR: 0.0003 +[2026-03-03 05:09:25] (step=0041927) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.203287027978869, LR: 0.0003 +[2026-03-03 05:09:33] (step=0041928) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.203482684406183, LR: 0.0003 +[2026-03-03 05:09:41] (step=0041929) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.203678340833497, LR: 0.0003 +[2026-03-03 05:09:49] (step=0041930) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.20387399726081, LR: 0.0003 +[2026-03-03 05:09:56] (step=0041931) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.204069653688123, LR: 0.0003 +[2026-03-03 05:10:04] (step=0041932) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.204265310115437, LR: 0.0003 +[2026-03-03 05:10:12] (step=0041933) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 8.20446096654275, LR: 0.0003 +[2026-03-03 05:10:20] (step=0041934) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 8.204656622970065, LR: 0.0003 +[2026-03-03 05:10:28] (step=0041935) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 8.204852279397379, LR: 0.0003 +[2026-03-03 05:10:36] (step=0041936) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 8.205047935824691, LR: 0.0003 +[2026-03-03 05:10:44] (step=0041937) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.205243592252005, LR: 0.0003 +[2026-03-03 05:10:51] (step=0041938) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.205439248679319, LR: 0.0003 +[2026-03-03 05:10:59] (step=0041939) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.205634905106633, LR: 0.0003 +[2026-03-03 05:11:07] (step=0041940) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 8.205830561533947, LR: 0.0003 +[2026-03-03 05:11:15] (step=0041941) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.20602621796126, LR: 0.0003 +[2026-03-03 05:11:23] (step=0041942) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.206221874388573, LR: 0.0003 +[2026-03-03 05:11:31] (step=0041943) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.206417530815887, LR: 0.0003 +[2026-03-03 05:11:39] (step=0041944) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.206613187243201, LR: 0.0003 +[2026-03-03 05:11:46] (step=0041945) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 8.206808843670515, LR: 0.0003 +[2026-03-03 05:11:54] (step=0041946) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.207004500097828, LR: 0.0003 +[2026-03-03 05:12:02] (step=0041947) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.207200156525142, LR: 0.0003 +[2026-03-03 05:12:10] (step=0041948) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.207395812952456, LR: 0.0003 +[2026-03-03 05:12:18] (step=0041949) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.20759146937977, LR: 0.0003 +[2026-03-03 05:12:26] (step=0041950) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.207787125807084, LR: 0.0003 +[2026-03-03 05:12:34] (step=0041951) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.207982782234396, LR: 0.0003 +[2026-03-03 05:12:41] (step=0041952) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.20817843866171, LR: 0.0003 +[2026-03-03 05:12:49] (step=0041953) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 8.208374095089024, LR: 0.0003 +[2026-03-03 05:12:57] (step=0041954) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.208569751516338, LR: 0.0003 +[2026-03-03 05:13:05] (step=0041955) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.20876540794365, LR: 0.0003 +[2026-03-03 05:13:13] (step=0041956) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.208961064370964, LR: 0.0003 +[2026-03-03 05:13:21] (step=0041957) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 8.209156720798278, LR: 0.0003 +[2026-03-03 05:13:29] (step=0041958) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.209352377225592, LR: 0.0003 +[2026-03-03 05:13:36] (step=0041959) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.209548033652906, LR: 0.0003 +[2026-03-03 05:13:44] (step=0041960) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.209743690080218, LR: 0.0003 +[2026-03-03 05:13:52] (step=0041961) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 8.209939346507532, LR: 0.0003 +[2026-03-03 05:14:00] (step=0041962) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.210135002934846, LR: 0.0003 +[2026-03-03 05:14:08] (step=0041963) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.21033065936216, LR: 0.0003 +[2026-03-03 05:14:16] (step=0041964) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.210526315789474, LR: 0.0003 +[2026-03-03 05:14:24] (step=0041965) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 8.210721972216787, LR: 0.0003 +[2026-03-03 05:14:31] (step=0041966) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 8.2109176286441, LR: 0.0003 +[2026-03-03 05:14:39] (step=0041967) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 8.211113285071415, LR: 0.0003 +[2026-03-03 05:14:47] (step=0041968) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.211308941498729, LR: 0.0003 +[2026-03-03 05:14:55] (step=0041969) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.211504597926043, LR: 0.0003 +[2026-03-03 05:15:03] (step=0041970) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.211700254353355, LR: 0.0003 +[2026-03-03 05:15:11] (step=0041971) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.211895910780669, LR: 0.0003 +[2026-03-03 05:15:19] (step=0041972) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.212091567207983, LR: 0.0003 +[2026-03-03 05:15:26] (step=0041973) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.212287223635297, LR: 0.0003 +[2026-03-03 05:15:34] (step=0041974) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.21248288006261, LR: 0.0003 +[2026-03-03 05:15:42] (step=0041975) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.212678536489923, LR: 0.0003 +[2026-03-03 05:15:50] (step=0041976) Train Loss: 0.4555, Train Steps/Sec: 0.12, Epoch: 8.212874192917237, LR: 0.0003 +[2026-03-03 05:15:58] (step=0041977) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.213069849344551, LR: 0.0003 +[2026-03-03 05:16:06] (step=0041978) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.213265505771865, LR: 0.0003 +[2026-03-03 05:16:14] (step=0041979) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 8.213461162199179, LR: 0.0003 +[2026-03-03 05:16:22] (step=0041980) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.213656818626491, LR: 0.0003 +[2026-03-03 05:16:30] (step=0041981) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.213852475053805, LR: 0.0003 +[2026-03-03 05:16:37] (step=0041982) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 8.21404813148112, LR: 0.0003 +[2026-03-03 05:16:45] (step=0041983) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.214243787908433, LR: 0.0003 +[2026-03-03 05:16:53] (step=0041984) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 8.214439444335746, LR: 0.0003 +[2026-03-03 05:17:01] (step=0041985) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.21463510076306, LR: 0.0003 +[2026-03-03 05:17:09] (step=0041986) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.214830757190374, LR: 0.0003 +[2026-03-03 05:17:17] (step=0041987) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 8.215026413617688, LR: 0.0003 +[2026-03-03 05:17:25] (step=0041988) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.215222070045002, LR: 0.0003 +[2026-03-03 05:17:32] (step=0041989) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.215417726472314, LR: 0.0003 +[2026-03-03 05:17:40] (step=0041990) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.215613382899628, LR: 0.0003 +[2026-03-03 05:17:48] (step=0041991) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.215809039326942, LR: 0.0003 +[2026-03-03 05:17:56] (step=0041992) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 8.216004695754256, LR: 0.0003 +[2026-03-03 05:18:04] (step=0041993) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.21620035218157, LR: 0.0003 +[2026-03-03 05:18:12] (step=0041994) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.216396008608882, LR: 0.0003 +[2026-03-03 05:18:20] (step=0041995) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.216591665036196, LR: 0.0003 +[2026-03-03 05:18:28] (step=0041996) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 8.21678732146351, LR: 0.0003 +[2026-03-03 05:18:35] (step=0041997) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.216982977890824, LR: 0.0003 +[2026-03-03 05:18:43] (step=0041998) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.217178634318138, LR: 0.0003 +[2026-03-03 05:18:51] (step=0041999) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.21737429074545, LR: 0.0003 +[2026-03-03 05:18:59] (step=0042000) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.217569947172764, LR: 0.0003 +[2026-03-03 05:18:59] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0042000/ +[2026-03-03 05:19:07] (step=0042001) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.217765603600078, LR: 0.0003 +[2026-03-03 05:19:15] (step=0042002) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.217961260027392, LR: 0.0003 +[2026-03-03 05:19:23] (step=0042003) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 8.218156916454706, LR: 0.0003 +[2026-03-03 05:19:30] (step=0042004) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 8.218352572882019, LR: 0.0003 +[2026-03-03 05:19:38] (step=0042005) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 8.218548229309333, LR: 0.0003 +[2026-03-03 05:19:46] (step=0042006) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.218743885736647, LR: 0.0003 +[2026-03-03 05:19:54] (step=0042007) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 8.21893954216396, LR: 0.0003 +[2026-03-03 05:20:02] (step=0042008) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.219135198591273, LR: 0.0003 +[2026-03-03 05:20:10] (step=0042009) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 8.219330855018587, LR: 0.0003 +[2026-03-03 05:20:17] (step=0042010) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.2195265114459, LR: 0.0003 +[2026-03-03 05:20:25] (step=0042011) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.219722167873215, LR: 0.0003 +[2026-03-03 05:20:33] (step=0042012) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 8.219917824300529, LR: 0.0003 +[2026-03-03 05:20:41] (step=0042013) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.220113480727841, LR: 0.0003 +[2026-03-03 05:20:49] (step=0042014) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 8.220309137155155, LR: 0.0003 +[2026-03-03 05:20:57] (step=0042015) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.220504793582469, LR: 0.0003 +[2026-03-03 05:21:05] (step=0042016) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.220700450009783, LR: 0.0003 +[2026-03-03 05:21:12] (step=0042017) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.220896106437097, LR: 0.0003 +[2026-03-03 05:21:20] (step=0042018) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.22109176286441, LR: 0.0003 +[2026-03-03 05:21:28] (step=0042019) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.221287419291723, LR: 0.0003 +[2026-03-03 05:21:36] (step=0042020) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.221483075719037, LR: 0.0003 +[2026-03-03 05:21:44] (step=0042021) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.221678732146351, LR: 0.0003 +[2026-03-03 05:21:52] (step=0042022) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.221874388573665, LR: 0.0003 +[2026-03-03 05:22:00] (step=0042023) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.222070045000978, LR: 0.0003 +[2026-03-03 05:22:08] (step=0042024) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.222265701428292, LR: 0.0003 +[2026-03-03 05:22:16] (step=0042025) Train Loss: 0.4429, Train Steps/Sec: 0.12, Epoch: 8.222461357855606, LR: 0.0003 +[2026-03-03 05:22:23] (step=0042026) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.22265701428292, LR: 0.0003 +[2026-03-03 05:22:31] (step=0042027) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.222852670710234, LR: 0.0003 +[2026-03-03 05:22:39] (step=0042028) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 8.223048327137546, LR: 0.0003 +[2026-03-03 05:22:47] (step=0042029) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.22324398356486, LR: 0.0003 +[2026-03-03 05:22:55] (step=0042030) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.223439639992174, LR: 0.0003 +[2026-03-03 05:23:03] (step=0042031) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.223635296419488, LR: 0.0003 +[2026-03-03 05:23:11] (step=0042032) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.223830952846802, LR: 0.0003 +[2026-03-03 05:23:19] (step=0042033) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.224026609274114, LR: 0.0003 +[2026-03-03 05:23:26] (step=0042034) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.224222265701428, LR: 0.0003 +[2026-03-03 05:23:34] (step=0042035) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.224417922128742, LR: 0.0003 +[2026-03-03 05:23:42] (step=0042036) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.224613578556056, LR: 0.0003 +[2026-03-03 05:23:50] (step=0042037) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.224809234983368, LR: 0.0003 +[2026-03-03 05:23:58] (step=0042038) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.225004891410682, LR: 0.0003 +[2026-03-03 05:24:06] (step=0042039) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.225200547837996, LR: 0.0003 +[2026-03-03 05:24:13] (step=0042040) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.22539620426531, LR: 0.0003 +[2026-03-03 05:24:21] (step=0042041) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.225591860692624, LR: 0.0003 +[2026-03-03 05:24:29] (step=0042042) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.225787517119937, LR: 0.0003 +[2026-03-03 05:24:37] (step=0042043) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.22598317354725, LR: 0.0003 +[2026-03-03 05:24:45] (step=0042044) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.226178829974565, LR: 0.0003 +[2026-03-03 05:24:53] (step=0042045) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.226374486401879, LR: 0.0003 +[2026-03-03 05:25:01] (step=0042046) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.226570142829193, LR: 0.0003 +[2026-03-03 05:25:09] (step=0042047) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.226765799256505, LR: 0.0003 +[2026-03-03 05:25:16] (step=0042048) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.226961455683819, LR: 0.0003 +[2026-03-03 05:25:24] (step=0042049) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 8.227157112111133, LR: 0.0003 +[2026-03-03 05:25:32] (step=0042050) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.227352768538447, LR: 0.0003 +[2026-03-03 05:25:40] (step=0042051) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.22754842496576, LR: 0.0003 +[2026-03-03 05:25:48] (step=0042052) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.227744081393073, LR: 0.0003 +[2026-03-03 05:25:56] (step=0042053) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 8.227939737820387, LR: 0.0003 +[2026-03-03 05:26:04] (step=0042054) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.228135394247701, LR: 0.0003 +[2026-03-03 05:26:11] (step=0042055) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.228331050675015, LR: 0.0003 +[2026-03-03 05:26:19] (step=0042056) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.22852670710233, LR: 0.0003 +[2026-03-03 05:26:27] (step=0042057) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 8.228722363529641, LR: 0.0003 +[2026-03-03 05:26:35] (step=0042058) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 8.228918019956955, LR: 0.0003 +[2026-03-03 05:26:43] (step=0042059) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.22911367638427, LR: 0.0003 +[2026-03-03 05:26:51] (step=0042060) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 8.229309332811583, LR: 0.0003 +[2026-03-03 05:26:59] (step=0042061) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.229504989238896, LR: 0.0003 +[2026-03-03 05:27:06] (step=0042062) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.22970064566621, LR: 0.0003 +[2026-03-03 05:27:14] (step=0042063) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 8.229896302093524, LR: 0.0003 +[2026-03-03 05:27:22] (step=0042064) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.230091958520838, LR: 0.0003 +[2026-03-03 05:27:30] (step=0042065) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.230287614948152, LR: 0.0003 +[2026-03-03 05:27:38] (step=0042066) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 8.230483271375464, LR: 0.0003 +[2026-03-03 05:27:46] (step=0042067) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.230678927802778, LR: 0.0003 +[2026-03-03 05:27:54] (step=0042068) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.230874584230092, LR: 0.0003 +[2026-03-03 05:28:01] (step=0042069) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.231070240657406, LR: 0.0003 +[2026-03-03 05:28:09] (step=0042070) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.23126589708472, LR: 0.0003 +[2026-03-03 05:28:17] (step=0042071) Train Loss: 0.4510, Train Steps/Sec: 0.12, Epoch: 8.231461553512032, LR: 0.0003 +[2026-03-03 05:28:25] (step=0042072) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.231657209939346, LR: 0.0003 +[2026-03-03 05:28:33] (step=0042073) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.23185286636666, LR: 0.0003 +[2026-03-03 05:28:41] (step=0042074) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.232048522793974, LR: 0.0003 +[2026-03-03 05:28:49] (step=0042075) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.232244179221288, LR: 0.0003 +[2026-03-03 05:28:57] (step=0042076) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 8.2324398356486, LR: 0.0003 +[2026-03-03 05:29:05] (step=0042077) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.232635492075914, LR: 0.0003 +[2026-03-03 05:29:13] (step=0042078) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.232831148503228, LR: 0.0003 +[2026-03-03 05:29:20] (step=0042079) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.233026804930542, LR: 0.0003 +[2026-03-03 05:29:28] (step=0042080) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.233222461357856, LR: 0.0003 +[2026-03-03 05:29:36] (step=0042081) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.233418117785169, LR: 0.0003 +[2026-03-03 05:29:44] (step=0042082) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.233613774212483, LR: 0.0003 +[2026-03-03 05:29:52] (step=0042083) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.233809430639797, LR: 0.0003 +[2026-03-03 05:30:00] (step=0042084) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.23400508706711, LR: 0.0003 +[2026-03-03 05:30:08] (step=0042085) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.234200743494425, LR: 0.0003 +[2026-03-03 05:30:16] (step=0042086) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.234396399921737, LR: 0.0003 +[2026-03-03 05:30:23] (step=0042087) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.234592056349051, LR: 0.0003 +[2026-03-03 05:30:31] (step=0042088) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 8.234787712776365, LR: 0.0003 +[2026-03-03 05:30:39] (step=0042089) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 8.234983369203679, LR: 0.0003 +[2026-03-03 05:30:47] (step=0042090) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.235179025630991, LR: 0.0003 +[2026-03-03 05:30:55] (step=0042091) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.235374682058305, LR: 0.0003 +[2026-03-03 05:31:03] (step=0042092) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.23557033848562, LR: 0.0003 +[2026-03-03 05:31:11] (step=0042093) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.235765994912933, LR: 0.0003 +[2026-03-03 05:31:18] (step=0042094) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.235961651340247, LR: 0.0003 +[2026-03-03 05:31:26] (step=0042095) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.23615730776756, LR: 0.0003 +[2026-03-03 05:31:34] (step=0042096) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.236352964194873, LR: 0.0003 +[2026-03-03 05:31:42] (step=0042097) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 8.236548620622187, LR: 0.0003 +[2026-03-03 05:31:50] (step=0042098) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 8.236744277049501, LR: 0.0003 +[2026-03-03 05:31:58] (step=0042099) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.236939933476815, LR: 0.0003 +[2026-03-03 05:32:06] (step=0042100) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.237135589904128, LR: 0.0003 +[2026-03-03 05:32:13] (step=0042101) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.237331246331442, LR: 0.0003 +[2026-03-03 05:32:21] (step=0042102) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 8.237526902758756, LR: 0.0003 +[2026-03-03 05:32:29] (step=0042103) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 8.23772255918607, LR: 0.0003 +[2026-03-03 05:32:37] (step=0042104) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.237918215613384, LR: 0.0003 +[2026-03-03 05:32:45] (step=0042105) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 8.238113872040696, LR: 0.0003 +[2026-03-03 05:32:53] (step=0042106) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.23830952846801, LR: 0.0003 +[2026-03-03 05:33:01] (step=0042107) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 8.238505184895324, LR: 0.0003 +[2026-03-03 05:33:08] (step=0042108) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.238700841322638, LR: 0.0003 +[2026-03-03 05:33:16] (step=0042109) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.238896497749952, LR: 0.0003 +[2026-03-03 05:33:24] (step=0042110) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.239092154177264, LR: 0.0003 +[2026-03-03 05:33:32] (step=0042111) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.239287810604578, LR: 0.0003 +[2026-03-03 05:33:40] (step=0042112) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.239483467031892, LR: 0.0003 +[2026-03-03 05:33:48] (step=0042113) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.239679123459206, LR: 0.0003 +[2026-03-03 05:33:56] (step=0042114) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.239874779886518, LR: 0.0003 +[2026-03-03 05:34:04] (step=0042115) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.240070436313832, LR: 0.0003 +[2026-03-03 05:34:11] (step=0042116) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.240266092741146, LR: 0.0003 +[2026-03-03 05:34:19] (step=0042117) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.24046174916846, LR: 0.0003 +[2026-03-03 05:34:27] (step=0042118) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.240657405595774, LR: 0.0003 +[2026-03-03 05:34:35] (step=0042119) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.240853062023087, LR: 0.0003 +[2026-03-03 05:34:43] (step=0042120) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.2410487184504, LR: 0.0003 +[2026-03-03 05:34:51] (step=0042121) Train Loss: 0.4534, Train Steps/Sec: 0.12, Epoch: 8.241244374877715, LR: 0.0003 +[2026-03-03 05:34:59] (step=0042122) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 8.241440031305029, LR: 0.0003 +[2026-03-03 05:35:07] (step=0042123) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.241635687732343, LR: 0.0003 +[2026-03-03 05:35:15] (step=0042124) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.241831344159655, LR: 0.0003 +[2026-03-03 05:35:23] (step=0042125) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.242027000586969, LR: 0.0003 +[2026-03-03 05:35:30] (step=0042126) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.242222657014283, LR: 0.0003 +[2026-03-03 05:35:38] (step=0042127) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.242418313441597, LR: 0.0003 +[2026-03-03 05:35:46] (step=0042128) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.242613969868911, LR: 0.0003 +[2026-03-03 05:35:54] (step=0042129) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.242809626296223, LR: 0.0003 +[2026-03-03 05:36:02] (step=0042130) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.243005282723537, LR: 0.0003 +[2026-03-03 05:36:10] (step=0042131) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.243200939150851, LR: 0.0003 +[2026-03-03 05:36:18] (step=0042132) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.243396595578165, LR: 0.0003 +[2026-03-03 05:36:25] (step=0042133) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.24359225200548, LR: 0.0003 +[2026-03-03 05:36:33] (step=0042134) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.243787908432791, LR: 0.0003 +[2026-03-03 05:36:41] (step=0042135) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.243983564860105, LR: 0.0003 +[2026-03-03 05:36:49] (step=0042136) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.24417922128742, LR: 0.0003 +[2026-03-03 05:36:57] (step=0042137) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.244374877714733, LR: 0.0003 +[2026-03-03 05:37:05] (step=0042138) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.244570534142047, LR: 0.0003 +[2026-03-03 05:37:12] (step=0042139) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 8.24476619056936, LR: 0.0003 +[2026-03-03 05:37:20] (step=0042140) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.244961846996674, LR: 0.0003 +[2026-03-03 05:37:28] (step=0042141) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 8.245157503423988, LR: 0.0003 +[2026-03-03 05:37:36] (step=0042142) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 8.245353159851302, LR: 0.0003 +[2026-03-03 05:37:44] (step=0042143) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.245548816278614, LR: 0.0003 +[2026-03-03 05:37:52] (step=0042144) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.245744472705928, LR: 0.0003 +[2026-03-03 05:38:00] (step=0042145) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.245940129133242, LR: 0.0003 +[2026-03-03 05:38:08] (step=0042146) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.246135785560556, LR: 0.0003 +[2026-03-03 05:38:16] (step=0042147) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 8.24633144198787, LR: 0.0003 +[2026-03-03 05:38:23] (step=0042148) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.246527098415182, LR: 0.0003 +[2026-03-03 05:38:31] (step=0042149) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 8.246722754842496, LR: 0.0003 +[2026-03-03 05:38:39] (step=0042150) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.24691841126981, LR: 0.0003 +[2026-03-03 05:38:47] (step=0042151) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.247114067697124, LR: 0.0003 +[2026-03-03 05:38:55] (step=0042152) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 8.247309724124438, LR: 0.0003 +[2026-03-03 05:39:03] (step=0042153) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 8.24750538055175, LR: 0.0003 +[2026-03-03 05:39:11] (step=0042154) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.247701036979064, LR: 0.0003 +[2026-03-03 05:39:18] (step=0042155) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 8.247896693406378, LR: 0.0003 +[2026-03-03 05:39:26] (step=0042156) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.248092349833692, LR: 0.0003 +[2026-03-03 05:39:34] (step=0042157) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.248288006261006, LR: 0.0003 +[2026-03-03 05:39:42] (step=0042158) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.248483662688319, LR: 0.0003 +[2026-03-03 05:39:50] (step=0042159) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.248679319115633, LR: 0.0003 +[2026-03-03 05:39:58] (step=0042160) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.248874975542947, LR: 0.0003 +[2026-03-03 05:40:06] (step=0042161) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.24907063197026, LR: 0.0003 +[2026-03-03 05:40:14] (step=0042162) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.249266288397575, LR: 0.0003 +[2026-03-03 05:40:21] (step=0042163) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.249461944824887, LR: 0.0003 +[2026-03-03 05:40:29] (step=0042164) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 8.249657601252201, LR: 0.0003 +[2026-03-03 05:40:37] (step=0042165) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 8.249853257679515, LR: 0.0003 +[2026-03-03 05:40:45] (step=0042166) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 8.250048914106829, LR: 0.0003 +[2026-03-03 05:40:53] (step=0042167) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.250244570534141, LR: 0.0003 +[2026-03-03 05:41:01] (step=0042168) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 8.250440226961455, LR: 0.0003 +[2026-03-03 05:41:09] (step=0042169) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.25063588338877, LR: 0.0003 +[2026-03-03 05:41:17] (step=0042170) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.250831539816083, LR: 0.0003 +[2026-03-03 05:41:25] (step=0042171) Train Loss: 0.4464, Train Steps/Sec: 0.12, Epoch: 8.251027196243397, LR: 0.0003 +[2026-03-03 05:41:32] (step=0042172) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.25122285267071, LR: 0.0003 +[2026-03-03 05:41:40] (step=0042173) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.251418509098023, LR: 0.0003 +[2026-03-03 05:41:48] (step=0042174) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.251614165525337, LR: 0.0003 +[2026-03-03 05:41:56] (step=0042175) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 8.251809821952651, LR: 0.0003 +[2026-03-03 05:42:04] (step=0042176) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 8.252005478379965, LR: 0.0003 +[2026-03-03 05:42:12] (step=0042177) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.252201134807278, LR: 0.0003 +[2026-03-03 05:42:20] (step=0042178) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.252396791234592, LR: 0.0003 +[2026-03-03 05:42:27] (step=0042179) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 8.252592447661906, LR: 0.0003 +[2026-03-03 05:42:35] (step=0042180) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 8.25278810408922, LR: 0.0003 +[2026-03-03 05:42:43] (step=0042181) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.252983760516534, LR: 0.0003 +[2026-03-03 05:42:51] (step=0042182) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.253179416943846, LR: 0.0003 +[2026-03-03 05:42:59] (step=0042183) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.25337507337116, LR: 0.0003 +[2026-03-03 05:43:07] (step=0042184) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.253570729798474, LR: 0.0003 +[2026-03-03 05:43:15] (step=0042185) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.253766386225788, LR: 0.0003 +[2026-03-03 05:43:23] (step=0042186) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 8.253962042653102, LR: 0.0003 +[2026-03-03 05:43:30] (step=0042187) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.254157699080414, LR: 0.0003 +[2026-03-03 05:43:38] (step=0042188) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.254353355507728, LR: 0.0003 +[2026-03-03 05:43:46] (step=0042189) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.254549011935042, LR: 0.0003 +[2026-03-03 05:43:54] (step=0042190) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 8.254744668362356, LR: 0.0003 +[2026-03-03 05:44:02] (step=0042191) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.25494032478967, LR: 0.0003 +[2026-03-03 05:44:10] (step=0042192) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.255135981216982, LR: 0.0003 +[2026-03-03 05:44:18] (step=0042193) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.255331637644296, LR: 0.0003 +[2026-03-03 05:44:25] (step=0042194) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.25552729407161, LR: 0.0003 +[2026-03-03 05:44:33] (step=0042195) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 8.255722950498924, LR: 0.0003 +[2026-03-03 05:44:41] (step=0042196) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.255918606926237, LR: 0.0003 +[2026-03-03 05:44:49] (step=0042197) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 8.25611426335355, LR: 0.0003 +[2026-03-03 05:44:57] (step=0042198) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.256309919780865, LR: 0.0003 +[2026-03-03 05:45:05] (step=0042199) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.256505576208179, LR: 0.0003 +[2026-03-03 05:45:13] (step=0042200) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 8.256701232635493, LR: 0.0003 +[2026-03-03 05:45:21] (step=0042201) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.256896889062805, LR: 0.0003 +[2026-03-03 05:45:28] (step=0042202) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 8.257092545490119, LR: 0.0003 +[2026-03-03 05:45:36] (step=0042203) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.257288201917433, LR: 0.0003 +[2026-03-03 05:45:44] (step=0042204) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.257483858344747, LR: 0.0003 +[2026-03-03 05:45:52] (step=0042205) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.257679514772061, LR: 0.0003 +[2026-03-03 05:46:00] (step=0042206) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.257875171199373, LR: 0.0003 +[2026-03-03 05:46:08] (step=0042207) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.258070827626687, LR: 0.0003 +[2026-03-03 05:46:16] (step=0042208) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 8.258266484054001, LR: 0.0003 +[2026-03-03 05:46:23] (step=0042209) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 8.258462140481315, LR: 0.0003 +[2026-03-03 05:46:31] (step=0042210) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.25865779690863, LR: 0.0003 +[2026-03-03 05:46:39] (step=0042211) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.258853453335941, LR: 0.0003 +[2026-03-03 05:46:47] (step=0042212) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.259049109763255, LR: 0.0003 +[2026-03-03 05:46:55] (step=0042213) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 8.25924476619057, LR: 0.0003 +[2026-03-03 05:47:03] (step=0042214) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.259440422617883, LR: 0.0003 +[2026-03-03 05:47:11] (step=0042215) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.259636079045197, LR: 0.0003 +[2026-03-03 05:47:18] (step=0042216) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 8.25983173547251, LR: 0.0003 +[2026-03-03 05:47:26] (step=0042217) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.260027391899824, LR: 0.0003 +[2026-03-03 05:47:34] (step=0042218) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.260223048327138, LR: 0.0003 +[2026-03-03 05:47:42] (step=0042219) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.260418704754452, LR: 0.0003 +[2026-03-03 05:47:50] (step=0042220) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.260614361181764, LR: 0.0003 +[2026-03-03 05:47:58] (step=0042221) Train Loss: 0.4382, Train Steps/Sec: 0.12, Epoch: 8.260810017609078, LR: 0.0003 +[2026-03-03 05:48:06] (step=0042222) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 8.261005674036392, LR: 0.0003 +[2026-03-03 05:48:14] (step=0042223) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.261201330463706, LR: 0.0003 +[2026-03-03 05:48:22] (step=0042224) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 8.26139698689102, LR: 0.0003 +[2026-03-03 05:48:30] (step=0042225) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.261592643318332, LR: 0.0003 +[2026-03-03 05:48:37] (step=0042226) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.261788299745646, LR: 0.0003 +[2026-03-03 05:48:45] (step=0042227) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.26198395617296, LR: 0.0003 +[2026-03-03 05:48:53] (step=0042228) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 8.262179612600274, LR: 0.0003 +[2026-03-03 05:49:01] (step=0042229) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 8.262375269027588, LR: 0.0003 +[2026-03-03 05:49:09] (step=0042230) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.2625709254549, LR: 0.0003 +[2026-03-03 05:49:17] (step=0042231) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.262766581882214, LR: 0.0003 +[2026-03-03 05:49:25] (step=0042232) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.262962238309528, LR: 0.0003 +[2026-03-03 05:49:32] (step=0042233) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 8.263157894736842, LR: 0.0003 +[2026-03-03 05:49:40] (step=0042234) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.263353551164156, LR: 0.0003 +[2026-03-03 05:49:48] (step=0042235) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.263549207591469, LR: 0.0003 +[2026-03-03 05:49:56] (step=0042236) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.263744864018783, LR: 0.0003 +[2026-03-03 05:50:04] (step=0042237) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 8.263940520446097, LR: 0.0003 +[2026-03-03 05:50:12] (step=0042238) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.26413617687341, LR: 0.0003 +[2026-03-03 05:50:20] (step=0042239) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.264331833300725, LR: 0.0003 +[2026-03-03 05:50:27] (step=0042240) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 8.264527489728037, LR: 0.0003 +[2026-03-03 05:50:35] (step=0042241) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.264723146155351, LR: 0.0003 +[2026-03-03 05:50:43] (step=0042242) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.264918802582665, LR: 0.0003 +[2026-03-03 05:50:51] (step=0042243) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.265114459009979, LR: 0.0003 +[2026-03-03 05:50:59] (step=0042244) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.265310115437293, LR: 0.0003 +[2026-03-03 05:51:07] (step=0042245) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.265505771864605, LR: 0.0003 +[2026-03-03 05:51:15] (step=0042246) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 8.26570142829192, LR: 0.0003 +[2026-03-03 05:51:23] (step=0042247) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.265897084719233, LR: 0.0003 +[2026-03-03 05:51:30] (step=0042248) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.266092741146547, LR: 0.0003 +[2026-03-03 05:51:38] (step=0042249) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.26628839757386, LR: 0.0003 +[2026-03-03 05:51:46] (step=0042250) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.266484054001173, LR: 0.0003 +[2026-03-03 05:51:54] (step=0042251) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.266679710428487, LR: 0.0003 +[2026-03-03 05:52:02] (step=0042252) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.266875366855801, LR: 0.0003 +[2026-03-03 05:52:10] (step=0042253) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.267071023283115, LR: 0.0003 +[2026-03-03 05:52:18] (step=0042254) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.267266679710428, LR: 0.0003 +[2026-03-03 05:52:25] (step=0042255) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.267462336137742, LR: 0.0003 +[2026-03-03 05:52:33] (step=0042256) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.267657992565056, LR: 0.0003 +[2026-03-03 05:52:41] (step=0042257) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.26785364899237, LR: 0.0003 +[2026-03-03 05:52:49] (step=0042258) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.268049305419684, LR: 0.0003 +[2026-03-03 05:52:57] (step=0042259) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.268244961846996, LR: 0.0003 +[2026-03-03 05:53:05] (step=0042260) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 8.26844061827431, LR: 0.0003 +[2026-03-03 05:53:13] (step=0042261) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.268636274701624, LR: 0.0003 +[2026-03-03 05:53:21] (step=0042262) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.268831931128938, LR: 0.0003 +[2026-03-03 05:53:28] (step=0042263) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.269027587556252, LR: 0.0003 +[2026-03-03 05:53:36] (step=0042264) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.269223243983564, LR: 0.0003 +[2026-03-03 05:53:44] (step=0042265) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 8.269418900410878, LR: 0.0003 +[2026-03-03 05:53:52] (step=0042266) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.269614556838192, LR: 0.0003 +[2026-03-03 05:54:00] (step=0042267) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.269810213265506, LR: 0.0003 +[2026-03-03 05:54:08] (step=0042268) Train Loss: 0.4570, Train Steps/Sec: 0.12, Epoch: 8.27000586969282, LR: 0.0003 +[2026-03-03 05:54:16] (step=0042269) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.270201526120132, LR: 0.0003 +[2026-03-03 05:54:24] (step=0042270) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 8.270397182547446, LR: 0.0003 +[2026-03-03 05:54:32] (step=0042271) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.27059283897476, LR: 0.0003 +[2026-03-03 05:54:39] (step=0042272) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.270788495402075, LR: 0.0003 +[2026-03-03 05:54:47] (step=0042273) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.270984151829387, LR: 0.0003 +[2026-03-03 05:54:55] (step=0042274) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.2711798082567, LR: 0.0003 +[2026-03-03 05:55:03] (step=0042275) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.271375464684015, LR: 0.0003 +[2026-03-03 05:55:11] (step=0042276) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.271571121111329, LR: 0.0003 +[2026-03-03 05:55:19] (step=0042277) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.271766777538643, LR: 0.0003 +[2026-03-03 05:55:27] (step=0042278) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.271962433965955, LR: 0.0003 +[2026-03-03 05:55:34] (step=0042279) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.272158090393269, LR: 0.0003 +[2026-03-03 05:55:42] (step=0042280) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 8.272353746820583, LR: 0.0003 +[2026-03-03 05:55:50] (step=0042281) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.272549403247897, LR: 0.0003 +[2026-03-03 05:55:58] (step=0042282) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.272745059675211, LR: 0.0003 +[2026-03-03 05:56:06] (step=0042283) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.272940716102523, LR: 0.0003 +[2026-03-03 05:56:14] (step=0042284) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.273136372529837, LR: 0.0003 +[2026-03-03 05:56:22] (step=0042285) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 8.273332028957151, LR: 0.0003 +[2026-03-03 05:56:30] (step=0042286) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.273527685384465, LR: 0.0003 +[2026-03-03 05:56:37] (step=0042287) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 8.27372334181178, LR: 0.0003 +[2026-03-03 05:56:45] (step=0042288) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.273918998239091, LR: 0.0003 +[2026-03-03 05:56:53] (step=0042289) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.274114654666405, LR: 0.0003 +[2026-03-03 05:57:01] (step=0042290) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.27431031109372, LR: 0.0003 +[2026-03-03 05:57:09] (step=0042291) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.274505967521034, LR: 0.0003 +[2026-03-03 05:57:17] (step=0042292) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 8.274701623948348, LR: 0.0003 +[2026-03-03 05:57:25] (step=0042293) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.27489728037566, LR: 0.0003 +[2026-03-03 05:57:32] (step=0042294) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 8.275092936802974, LR: 0.0003 +[2026-03-03 05:57:40] (step=0042295) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.275288593230288, LR: 0.0003 +[2026-03-03 05:57:48] (step=0042296) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.275484249657602, LR: 0.0003 +[2026-03-03 05:57:56] (step=0042297) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.275679906084916, LR: 0.0003 +[2026-03-03 05:58:04] (step=0042298) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.275875562512228, LR: 0.0003 +[2026-03-03 05:58:12] (step=0042299) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.276071218939542, LR: 0.0003 +[2026-03-03 05:58:20] (step=0042300) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 8.276266875366856, LR: 0.0003 +[2026-03-03 05:58:27] (step=0042301) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.27646253179417, LR: 0.0003 +[2026-03-03 05:58:35] (step=0042302) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.276658188221482, LR: 0.0003 +[2026-03-03 05:58:43] (step=0042303) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.276853844648796, LR: 0.0003 +[2026-03-03 05:58:51] (step=0042304) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.27704950107611, LR: 0.0003 +[2026-03-03 05:58:59] (step=0042305) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.277245157503424, LR: 0.0003 +[2026-03-03 05:59:07] (step=0042306) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.277440813930738, LR: 0.0003 +[2026-03-03 05:59:15] (step=0042307) Train Loss: 0.4237, Train Steps/Sec: 0.13, Epoch: 8.27763647035805, LR: 0.0003 +[2026-03-03 05:59:23] (step=0042308) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.277832126785365, LR: 0.0003 +[2026-03-03 05:59:30] (step=0042309) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.278027783212679, LR: 0.0003 +[2026-03-03 05:59:38] (step=0042310) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.278223439639993, LR: 0.0003 +[2026-03-03 05:59:46] (step=0042311) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.278419096067307, LR: 0.0003 +[2026-03-03 05:59:54] (step=0042312) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.278614752494619, LR: 0.0003 +[2026-03-03 06:00:02] (step=0042313) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.278810408921933, LR: 0.0003 +[2026-03-03 06:00:10] (step=0042314) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.279006065349247, LR: 0.0003 +[2026-03-03 06:00:18] (step=0042315) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.27920172177656, LR: 0.0003 +[2026-03-03 06:00:25] (step=0042316) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.279397378203875, LR: 0.0003 +[2026-03-03 06:00:33] (step=0042317) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.279593034631187, LR: 0.0003 +[2026-03-03 06:00:41] (step=0042318) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 8.279788691058501, LR: 0.0003 +[2026-03-03 06:00:49] (step=0042319) Train Loss: 0.4398, Train Steps/Sec: 0.12, Epoch: 8.279984347485815, LR: 0.0003 +[2026-03-03 06:00:57] (step=0042320) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.280180003913129, LR: 0.0003 +[2026-03-03 06:01:05] (step=0042321) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.280375660340443, LR: 0.0003 +[2026-03-03 06:01:13] (step=0042322) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 8.280571316767755, LR: 0.0003 +[2026-03-03 06:01:21] (step=0042323) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.28076697319507, LR: 0.0003 +[2026-03-03 06:01:29] (step=0042324) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.280962629622383, LR: 0.0003 +[2026-03-03 06:01:36] (step=0042325) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 8.281158286049697, LR: 0.0003 +[2026-03-03 06:01:44] (step=0042326) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 8.28135394247701, LR: 0.0003 +[2026-03-03 06:01:52] (step=0042327) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 8.281549598904324, LR: 0.0003 +[2026-03-03 06:02:00] (step=0042328) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.281745255331638, LR: 0.0003 +[2026-03-03 06:02:08] (step=0042329) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.281940911758952, LR: 0.0003 +[2026-03-03 06:02:16] (step=0042330) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 8.282136568186266, LR: 0.0003 +[2026-03-03 06:02:24] (step=0042331) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.282332224613578, LR: 0.0003 +[2026-03-03 06:02:31] (step=0042332) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.282527881040892, LR: 0.0003 +[2026-03-03 06:02:39] (step=0042333) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 8.282723537468206, LR: 0.0003 +[2026-03-03 06:02:47] (step=0042334) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 8.28291919389552, LR: 0.0003 +[2026-03-03 06:02:55] (step=0042335) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.283114850322834, LR: 0.0003 +[2026-03-03 06:03:03] (step=0042336) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.283310506750146, LR: 0.0003 +[2026-03-03 06:03:11] (step=0042337) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.28350616317746, LR: 0.0003 +[2026-03-03 06:03:19] (step=0042338) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.283701819604774, LR: 0.0003 +[2026-03-03 06:03:26] (step=0042339) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.283897476032088, LR: 0.0003 +[2026-03-03 06:03:34] (step=0042340) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 8.284093132459402, LR: 0.0003 +[2026-03-03 06:03:42] (step=0042341) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.284288788886714, LR: 0.0003 +[2026-03-03 06:03:50] (step=0042342) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.284484445314028, LR: 0.0003 +[2026-03-03 06:03:58] (step=0042343) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.284680101741342, LR: 0.0003 +[2026-03-03 06:04:06] (step=0042344) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.284875758168656, LR: 0.0003 +[2026-03-03 06:04:14] (step=0042345) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.28507141459597, LR: 0.0003 +[2026-03-03 06:04:21] (step=0042346) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 8.285267071023283, LR: 0.0003 +[2026-03-03 06:04:29] (step=0042347) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.285462727450597, LR: 0.0003 +[2026-03-03 06:04:37] (step=0042348) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.28565838387791, LR: 0.0003 +[2026-03-03 06:04:45] (step=0042349) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 8.285854040305225, LR: 0.0003 +[2026-03-03 06:04:53] (step=0042350) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.286049696732539, LR: 0.0003 +[2026-03-03 06:05:01] (step=0042351) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.28624535315985, LR: 0.0003 +[2026-03-03 06:05:09] (step=0042352) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.286441009587165, LR: 0.0003 +[2026-03-03 06:05:16] (step=0042353) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 8.286636666014479, LR: 0.0003 +[2026-03-03 06:05:24] (step=0042354) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.286832322441793, LR: 0.0003 +[2026-03-03 06:05:32] (step=0042355) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.287027978869105, LR: 0.0003 +[2026-03-03 06:05:40] (step=0042356) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.287223635296419, LR: 0.0003 +[2026-03-03 06:05:48] (step=0042357) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.287419291723733, LR: 0.0003 +[2026-03-03 06:05:56] (step=0042358) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.287614948151047, LR: 0.0003 +[2026-03-03 06:06:04] (step=0042359) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.287810604578361, LR: 0.0003 +[2026-03-03 06:06:12] (step=0042360) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.288006261005673, LR: 0.0003 +[2026-03-03 06:06:19] (step=0042361) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.288201917432987, LR: 0.0003 +[2026-03-03 06:06:27] (step=0042362) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.288397573860301, LR: 0.0003 +[2026-03-03 06:06:35] (step=0042363) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.288593230287615, LR: 0.0003 +[2026-03-03 06:06:43] (step=0042364) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.28878888671493, LR: 0.0003 +[2026-03-03 06:06:51] (step=0042365) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.288984543142242, LR: 0.0003 +[2026-03-03 06:06:59] (step=0042366) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 8.289180199569556, LR: 0.0003 +[2026-03-03 06:07:07] (step=0042367) Train Loss: 0.4497, Train Steps/Sec: 0.12, Epoch: 8.28937585599687, LR: 0.0003 +[2026-03-03 06:07:15] (step=0042368) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.289571512424184, LR: 0.0003 +[2026-03-03 06:07:23] (step=0042369) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.289767168851498, LR: 0.0003 +[2026-03-03 06:07:31] (step=0042370) Train Loss: 0.4293, Train Steps/Sec: 0.12, Epoch: 8.28996282527881, LR: 0.0003 +[2026-03-03 06:07:38] (step=0042371) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 8.290158481706124, LR: 0.0003 +[2026-03-03 06:07:46] (step=0042372) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.290354138133438, LR: 0.0003 +[2026-03-03 06:07:54] (step=0042373) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 8.290549794560752, LR: 0.0003 +[2026-03-03 06:08:02] (step=0042374) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 8.290745450988066, LR: 0.0003 +[2026-03-03 06:08:10] (step=0042375) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.290941107415378, LR: 0.0003 +[2026-03-03 06:08:18] (step=0042376) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 8.291136763842692, LR: 0.0003 +[2026-03-03 06:08:26] (step=0042377) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 8.291332420270006, LR: 0.0003 +[2026-03-03 06:08:33] (step=0042378) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.29152807669732, LR: 0.0003 +[2026-03-03 06:08:41] (step=0042379) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 8.291723733124632, LR: 0.0003 +[2026-03-03 06:08:49] (step=0042380) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.291919389551946, LR: 0.0003 +[2026-03-03 06:08:57] (step=0042381) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.29211504597926, LR: 0.0003 +[2026-03-03 06:09:05] (step=0042382) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.292310702406574, LR: 0.0003 +[2026-03-03 06:09:13] (step=0042383) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 8.292506358833888, LR: 0.0003 +[2026-03-03 06:09:21] (step=0042384) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 8.2927020152612, LR: 0.0003 +[2026-03-03 06:09:28] (step=0042385) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.292897671688515, LR: 0.0003 +[2026-03-03 06:09:36] (step=0042386) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.293093328115829, LR: 0.0003 +[2026-03-03 06:09:44] (step=0042387) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.293288984543143, LR: 0.0003 +[2026-03-03 06:09:52] (step=0042388) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.293484640970457, LR: 0.0003 +[2026-03-03 06:10:00] (step=0042389) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 8.293680297397769, LR: 0.0003 +[2026-03-03 06:10:08] (step=0042390) Train Loss: 0.4239, Train Steps/Sec: 0.13, Epoch: 8.293875953825083, LR: 0.0003 +[2026-03-03 06:10:16] (step=0042391) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.294071610252397, LR: 0.0003 +[2026-03-03 06:10:24] (step=0042392) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 8.29426726667971, LR: 0.0003 +[2026-03-03 06:10:31] (step=0042393) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.294462923107025, LR: 0.0003 +[2026-03-03 06:10:39] (step=0042394) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.294658579534337, LR: 0.0003 +[2026-03-03 06:10:47] (step=0042395) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 8.294854235961651, LR: 0.0003 +[2026-03-03 06:10:55] (step=0042396) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.295049892388965, LR: 0.0003 +[2026-03-03 06:11:03] (step=0042397) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.295245548816279, LR: 0.0003 +[2026-03-03 06:11:11] (step=0042398) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 8.295441205243593, LR: 0.0003 +[2026-03-03 06:11:19] (step=0042399) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.295636861670905, LR: 0.0003 +[2026-03-03 06:11:27] (step=0042400) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.29583251809822, LR: 0.0003 +[2026-03-03 06:11:34] (step=0042401) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.296028174525533, LR: 0.0003 +[2026-03-03 06:11:42] (step=0042402) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.296223830952847, LR: 0.0003 +[2026-03-03 06:11:50] (step=0042403) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.29641948738016, LR: 0.0003 +[2026-03-03 06:11:58] (step=0042404) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.296615143807474, LR: 0.0003 +[2026-03-03 06:12:06] (step=0042405) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.296810800234788, LR: 0.0003 +[2026-03-03 06:12:14] (step=0042406) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.297006456662102, LR: 0.0003 +[2026-03-03 06:12:22] (step=0042407) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.297202113089416, LR: 0.0003 +[2026-03-03 06:12:29] (step=0042408) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 8.297397769516728, LR: 0.0003 +[2026-03-03 06:12:37] (step=0042409) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.297593425944042, LR: 0.0003 +[2026-03-03 06:12:45] (step=0042410) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.297789082371356, LR: 0.0003 +[2026-03-03 06:12:53] (step=0042411) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 8.29798473879867, LR: 0.0003 +[2026-03-03 06:13:01] (step=0042412) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.298180395225984, LR: 0.0003 +[2026-03-03 06:13:09] (step=0042413) Train Loss: 0.4513, Train Steps/Sec: 0.12, Epoch: 8.298376051653296, LR: 0.0003 +[2026-03-03 06:13:17] (step=0042414) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 8.29857170808061, LR: 0.0003 +[2026-03-03 06:13:25] (step=0042415) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.298767364507924, LR: 0.0003 +[2026-03-03 06:13:32] (step=0042416) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.298963020935238, LR: 0.0003 +[2026-03-03 06:13:40] (step=0042417) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 8.299158677362552, LR: 0.0003 +[2026-03-03 06:13:48] (step=0042418) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.299354333789864, LR: 0.0003 +[2026-03-03 06:13:56] (step=0042419) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 8.299549990217178, LR: 0.0003 +[2026-03-03 06:14:04] (step=0042420) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 8.299745646644492, LR: 0.0003 +[2026-03-03 06:14:12] (step=0042421) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.299941303071806, LR: 0.0003 +[2026-03-03 06:14:20] (step=0042422) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.30013695949912, LR: 0.0003 +[2026-03-03 06:14:28] (step=0042423) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 8.300332615926433, LR: 0.0003 +[2026-03-03 06:14:35] (step=0042424) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 8.300528272353747, LR: 0.0003 +[2026-03-03 06:14:43] (step=0042425) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 8.30072392878106, LR: 0.0003 +[2026-03-03 06:14:51] (step=0042426) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.300919585208375, LR: 0.0003 +[2026-03-03 06:14:59] (step=0042427) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.301115241635689, LR: 0.0003 +[2026-03-03 06:15:07] (step=0042428) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 8.301310898063, LR: 0.0003 +[2026-03-03 06:15:15] (step=0042429) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.301506554490315, LR: 0.0003 +[2026-03-03 06:15:23] (step=0042430) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.301702210917629, LR: 0.0003 +[2026-03-03 06:15:31] (step=0042431) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.301897867344943, LR: 0.0003 +[2026-03-03 06:15:38] (step=0042432) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.302093523772255, LR: 0.0003 +[2026-03-03 06:15:46] (step=0042433) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.302289180199569, LR: 0.0003 +[2026-03-03 06:15:54] (step=0042434) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 8.302484836626883, LR: 0.0003 +[2026-03-03 06:16:02] (step=0042435) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 8.302680493054197, LR: 0.0003 +[2026-03-03 06:16:10] (step=0042436) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.302876149481511, LR: 0.0003 +[2026-03-03 06:16:18] (step=0042437) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.303071805908823, LR: 0.0003 +[2026-03-03 06:16:26] (step=0042438) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.303267462336137, LR: 0.0003 +[2026-03-03 06:16:33] (step=0042439) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.303463118763451, LR: 0.0003 +[2026-03-03 06:16:41] (step=0042440) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.303658775190765, LR: 0.0003 +[2026-03-03 06:16:49] (step=0042441) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.30385443161808, LR: 0.0003 +[2026-03-03 06:16:57] (step=0042442) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 8.304050088045392, LR: 0.0003 +[2026-03-03 06:17:05] (step=0042443) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.304245744472706, LR: 0.0003 +[2026-03-03 06:17:13] (step=0042444) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.30444140090002, LR: 0.0003 +[2026-03-03 06:17:21] (step=0042445) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.304637057327334, LR: 0.0003 +[2026-03-03 06:17:28] (step=0042446) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.304832713754648, LR: 0.0003 +[2026-03-03 06:17:36] (step=0042447) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.30502837018196, LR: 0.0003 +[2026-03-03 06:17:44] (step=0042448) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.305224026609274, LR: 0.0003 +[2026-03-03 06:17:52] (step=0042449) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 8.305419683036588, LR: 0.0003 +[2026-03-03 06:18:00] (step=0042450) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.305615339463902, LR: 0.0003 +[2026-03-03 06:18:08] (step=0042451) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.305810995891216, LR: 0.0003 +[2026-03-03 06:18:16] (step=0042452) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.306006652318528, LR: 0.0003 +[2026-03-03 06:18:23] (step=0042453) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 8.306202308745842, LR: 0.0003 +[2026-03-03 06:18:31] (step=0042454) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.306397965173156, LR: 0.0003 +[2026-03-03 06:18:39] (step=0042455) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.30659362160047, LR: 0.0003 +[2026-03-03 06:18:47] (step=0042456) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.306789278027782, LR: 0.0003 +[2026-03-03 06:18:55] (step=0042457) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 8.306984934455096, LR: 0.0003 +[2026-03-03 06:19:03] (step=0042458) Train Loss: 0.4217, Train Steps/Sec: 0.13, Epoch: 8.30718059088241, LR: 0.0003 +[2026-03-03 06:19:11] (step=0042459) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.307376247309724, LR: 0.0003 +[2026-03-03 06:19:19] (step=0042460) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.307571903737038, LR: 0.0003 +[2026-03-03 06:19:26] (step=0042461) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.30776756016435, LR: 0.0003 +[2026-03-03 06:19:34] (step=0042462) Train Loss: 0.4375, Train Steps/Sec: 0.12, Epoch: 8.307963216591665, LR: 0.0003 +[2026-03-03 06:19:42] (step=0042463) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 8.308158873018979, LR: 0.0003 +[2026-03-03 06:19:50] (step=0042464) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.308354529446293, LR: 0.0003 +[2026-03-03 06:19:58] (step=0042465) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.308550185873607, LR: 0.0003 +[2026-03-03 06:20:06] (step=0042466) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.308745842300919, LR: 0.0003 +[2026-03-03 06:20:14] (step=0042467) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 8.308941498728233, LR: 0.0003 +[2026-03-03 06:20:22] (step=0042468) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.309137155155547, LR: 0.0003 +[2026-03-03 06:20:30] (step=0042469) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.30933281158286, LR: 0.0003 +[2026-03-03 06:20:38] (step=0042470) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 8.309528468010175, LR: 0.0003 +[2026-03-03 06:20:45] (step=0042471) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 8.309724124437487, LR: 0.0003 +[2026-03-03 06:20:53] (step=0042472) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.309919780864801, LR: 0.0003 +[2026-03-03 06:21:01] (step=0042473) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.310115437292115, LR: 0.0003 +[2026-03-03 06:21:09] (step=0042474) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.310311093719429, LR: 0.0003 +[2026-03-03 06:21:17] (step=0042475) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.310506750146743, LR: 0.0003 +[2026-03-03 06:21:25] (step=0042476) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.310702406574055, LR: 0.0003 +[2026-03-03 06:21:33] (step=0042477) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.31089806300137, LR: 0.0003 +[2026-03-03 06:21:40] (step=0042478) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.311093719428683, LR: 0.0003 +[2026-03-03 06:21:48] (step=0042479) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.311289375855997, LR: 0.0003 +[2026-03-03 06:21:56] (step=0042480) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.311485032283311, LR: 0.0003 +[2026-03-03 06:22:04] (step=0042481) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.311680688710624, LR: 0.0003 +[2026-03-03 06:22:12] (step=0042482) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 8.311876345137938, LR: 0.0003 +[2026-03-03 06:22:20] (step=0042483) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 8.312072001565252, LR: 0.0003 +[2026-03-03 06:22:28] (step=0042484) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.312267657992566, LR: 0.0003 +[2026-03-03 06:22:36] (step=0042485) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.312463314419878, LR: 0.0003 +[2026-03-03 06:22:43] (step=0042486) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 8.312658970847192, LR: 0.0003 +[2026-03-03 06:22:51] (step=0042487) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.312854627274506, LR: 0.0003 +[2026-03-03 06:22:59] (step=0042488) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 8.31305028370182, LR: 0.0003 +[2026-03-03 06:23:07] (step=0042489) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 8.313245940129134, LR: 0.0003 +[2026-03-03 06:23:15] (step=0042490) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.313441596556446, LR: 0.0003 +[2026-03-03 06:23:23] (step=0042491) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.31363725298376, LR: 0.0003 +[2026-03-03 06:23:31] (step=0042492) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.313832909411074, LR: 0.0003 +[2026-03-03 06:23:38] (step=0042493) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.314028565838388, LR: 0.0003 +[2026-03-03 06:23:46] (step=0042494) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.314224222265702, LR: 0.0003 +[2026-03-03 06:23:54] (step=0042495) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.314419878693014, LR: 0.0003 +[2026-03-03 06:24:02] (step=0042496) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.314615535120328, LR: 0.0003 +[2026-03-03 06:24:10] (step=0042497) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 8.314811191547642, LR: 0.0003 +[2026-03-03 06:24:18] (step=0042498) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.315006847974956, LR: 0.0003 +[2026-03-03 06:24:26] (step=0042499) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.31520250440227, LR: 0.0003 +[2026-03-03 06:24:34] (step=0042500) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.315398160829583, LR: 0.0003 +[2026-03-03 06:24:34] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0042500/ +[2026-03-03 06:24:41] (step=0042501) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 8.315593817256897, LR: 0.0003 +[2026-03-03 06:24:49] (step=0042502) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.31578947368421, LR: 0.0003 +[2026-03-03 06:24:57] (step=0042503) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.315985130111525, LR: 0.0003 +[2026-03-03 06:25:05] (step=0042504) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 8.316180786538839, LR: 0.0003 +[2026-03-03 06:25:13] (step=0042505) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.31637644296615, LR: 0.0003 +[2026-03-03 06:25:21] (step=0042506) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 8.316572099393465, LR: 0.0003 +[2026-03-03 06:25:29] (step=0042507) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.316767755820779, LR: 0.0003 +[2026-03-03 06:25:37] (step=0042508) Train Loss: 0.4563, Train Steps/Sec: 0.12, Epoch: 8.316963412248093, LR: 0.0003 +[2026-03-03 06:25:45] (step=0042509) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.317159068675405, LR: 0.0003 +[2026-03-03 06:25:52] (step=0042510) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.31735472510272, LR: 0.0003 +[2026-03-03 06:26:00] (step=0042511) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.317550381530033, LR: 0.0003 +[2026-03-03 06:26:08] (step=0042512) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 8.317746037957347, LR: 0.0003 +[2026-03-03 06:26:16] (step=0042513) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 8.317941694384661, LR: 0.0003 +[2026-03-03 06:26:24] (step=0042514) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.318137350811973, LR: 0.0003 +[2026-03-03 06:26:32] (step=0042515) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.318333007239287, LR: 0.0003 +[2026-03-03 06:26:40] (step=0042516) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.318528663666601, LR: 0.0003 +[2026-03-03 06:26:47] (step=0042517) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.318724320093915, LR: 0.0003 +[2026-03-03 06:26:55] (step=0042518) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.31891997652123, LR: 0.0003 +[2026-03-03 06:27:03] (step=0042519) Train Loss: 0.4443, Train Steps/Sec: 0.12, Epoch: 8.319115632948542, LR: 0.0003 +[2026-03-03 06:27:11] (step=0042520) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.319311289375856, LR: 0.0003 +[2026-03-03 06:27:19] (step=0042521) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.31950694580317, LR: 0.0003 +[2026-03-03 06:27:27] (step=0042522) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 8.319702602230484, LR: 0.0003 +[2026-03-03 06:27:35] (step=0042523) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 8.319898258657798, LR: 0.0003 +[2026-03-03 06:27:43] (step=0042524) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.32009391508511, LR: 0.0003 +[2026-03-03 06:27:50] (step=0042525) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.320289571512424, LR: 0.0003 +[2026-03-03 06:27:58] (step=0042526) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 8.320485227939738, LR: 0.0003 +[2026-03-03 06:28:06] (step=0042527) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.320680884367052, LR: 0.0003 +[2026-03-03 06:28:14] (step=0042528) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 8.320876540794366, LR: 0.0003 +[2026-03-03 06:28:22] (step=0042529) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.321072197221678, LR: 0.0003 +[2026-03-03 06:28:30] (step=0042530) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.321267853648992, LR: 0.0003 +[2026-03-03 06:28:38] (step=0042531) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.321463510076306, LR: 0.0003 +[2026-03-03 06:28:46] (step=0042532) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.32165916650362, LR: 0.0003 +[2026-03-03 06:28:53] (step=0042533) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 8.321854822930934, LR: 0.0003 +[2026-03-03 06:29:01] (step=0042534) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.322050479358246, LR: 0.0003 +[2026-03-03 06:29:09] (step=0042535) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.32224613578556, LR: 0.0003 +[2026-03-03 06:29:17] (step=0042536) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 8.322441792212874, LR: 0.0003 +[2026-03-03 06:29:25] (step=0042537) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.322637448640188, LR: 0.0003 +[2026-03-03 06:29:33] (step=0042538) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.3228331050675, LR: 0.0003 +[2026-03-03 06:29:41] (step=0042539) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.323028761494815, LR: 0.0003 +[2026-03-03 06:29:48] (step=0042540) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 8.323224417922129, LR: 0.0003 +[2026-03-03 06:29:56] (step=0042541) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.323420074349443, LR: 0.0003 +[2026-03-03 06:30:04] (step=0042542) Train Loss: 0.4230, Train Steps/Sec: 0.13, Epoch: 8.323615730776757, LR: 0.0003 +[2026-03-03 06:30:12] (step=0042543) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.323811387204069, LR: 0.0003 +[2026-03-03 06:30:20] (step=0042544) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.324007043631383, LR: 0.0003 +[2026-03-03 06:30:28] (step=0042545) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.324202700058697, LR: 0.0003 +[2026-03-03 06:30:36] (step=0042546) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.324398356486011, LR: 0.0003 +[2026-03-03 06:30:43] (step=0042547) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 8.324594012913325, LR: 0.0003 +[2026-03-03 06:30:51] (step=0042548) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 8.324789669340637, LR: 0.0003 +[2026-03-03 06:30:59] (step=0042549) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.324985325767951, LR: 0.0003 +[2026-03-03 06:31:07] (step=0042550) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 8.325180982195265, LR: 0.0003 +[2026-03-03 06:31:15] (step=0042551) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 8.32537663862258, LR: 0.0003 +[2026-03-03 06:31:23] (step=0042552) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.325572295049893, LR: 0.0003 +[2026-03-03 06:31:31] (step=0042553) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 8.325767951477205, LR: 0.0003 +[2026-03-03 06:31:39] (step=0042554) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.32596360790452, LR: 0.0003 +[2026-03-03 06:31:46] (step=0042555) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.326159264331833, LR: 0.0003 +[2026-03-03 06:31:54] (step=0042556) Train Loss: 0.4567, Train Steps/Sec: 0.12, Epoch: 8.326354920759147, LR: 0.0003 +[2026-03-03 06:32:02] (step=0042557) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.326550577186461, LR: 0.0003 +[2026-03-03 06:32:10] (step=0042558) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.326746233613774, LR: 0.0003 +[2026-03-03 06:32:18] (step=0042559) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.326941890041088, LR: 0.0003 +[2026-03-03 06:32:26] (step=0042560) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.327137546468402, LR: 0.0003 +[2026-03-03 06:32:34] (step=0042561) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.327333202895716, LR: 0.0003 +[2026-03-03 06:32:42] (step=0042562) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.327528859323028, LR: 0.0003 +[2026-03-03 06:32:50] (step=0042563) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.327724515750342, LR: 0.0003 +[2026-03-03 06:32:57] (step=0042564) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.327920172177656, LR: 0.0003 +[2026-03-03 06:33:05] (step=0042565) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.32811582860497, LR: 0.0003 +[2026-03-03 06:33:13] (step=0042566) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.328311485032284, LR: 0.0003 +[2026-03-03 06:33:21] (step=0042567) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 8.328507141459596, LR: 0.0003 +[2026-03-03 06:33:29] (step=0042568) Train Loss: 0.4378, Train Steps/Sec: 0.12, Epoch: 8.32870279788691, LR: 0.0003 +[2026-03-03 06:33:37] (step=0042569) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.328898454314224, LR: 0.0003 +[2026-03-03 06:33:45] (step=0042570) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.329094110741538, LR: 0.0003 +[2026-03-03 06:33:53] (step=0042571) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.329289767168852, LR: 0.0003 +[2026-03-03 06:34:00] (step=0042572) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 8.329485423596164, LR: 0.0003 +[2026-03-03 06:34:08] (step=0042573) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.329681080023478, LR: 0.0003 +[2026-03-03 06:34:16] (step=0042574) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.329876736450792, LR: 0.0003 +[2026-03-03 06:34:24] (step=0042575) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.330072392878106, LR: 0.0003 +[2026-03-03 06:34:32] (step=0042576) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.33026804930542, LR: 0.0003 +[2026-03-03 06:34:40] (step=0042577) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.330463705732733, LR: 0.0003 +[2026-03-03 06:34:48] (step=0042578) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.330659362160047, LR: 0.0003 +[2026-03-03 06:34:56] (step=0042579) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.33085501858736, LR: 0.0003 +[2026-03-03 06:35:03] (step=0042580) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.331050675014675, LR: 0.0003 +[2026-03-03 06:35:11] (step=0042581) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 8.331246331441989, LR: 0.0003 +[2026-03-03 06:35:19] (step=0042582) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.331441987869301, LR: 0.0003 +[2026-03-03 06:35:27] (step=0042583) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 8.331637644296615, LR: 0.0003 +[2026-03-03 06:35:35] (step=0042584) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 8.331833300723929, LR: 0.0003 +[2026-03-03 06:35:43] (step=0042585) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.332028957151243, LR: 0.0003 +[2026-03-03 06:35:51] (step=0042586) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.332224613578557, LR: 0.0003 +[2026-03-03 06:35:59] (step=0042587) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.33242027000587, LR: 0.0003 +[2026-03-03 06:36:06] (step=0042588) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.332615926433183, LR: 0.0003 +[2026-03-03 06:36:14] (step=0042589) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.332811582860497, LR: 0.0003 +[2026-03-03 06:36:22] (step=0042590) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.333007239287811, LR: 0.0003 +[2026-03-03 06:36:30] (step=0042591) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.333202895715123, LR: 0.0003 +[2026-03-03 06:36:38] (step=0042592) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.333398552142437, LR: 0.0003 +[2026-03-03 06:36:46] (step=0042593) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 8.333594208569751, LR: 0.0003 +[2026-03-03 06:36:54] (step=0042594) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.333789864997065, LR: 0.0003 +[2026-03-03 06:37:01] (step=0042595) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.33398552142438, LR: 0.0003 +[2026-03-03 06:37:09] (step=0042596) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.334181177851692, LR: 0.0003 +[2026-03-03 06:37:17] (step=0042597) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.334376834279006, LR: 0.0003 +[2026-03-03 06:37:25] (step=0042598) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.33457249070632, LR: 0.0003 +[2026-03-03 06:37:33] (step=0042599) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.334768147133634, LR: 0.0003 +[2026-03-03 06:37:41] (step=0042600) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.334963803560948, LR: 0.0003 +[2026-03-03 06:37:49] (step=0042601) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 8.33515945998826, LR: 0.0003 +[2026-03-03 06:37:57] (step=0042602) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 8.335355116415574, LR: 0.0003 +[2026-03-03 06:38:04] (step=0042603) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.335550772842888, LR: 0.0003 +[2026-03-03 06:38:12] (step=0042604) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 8.335746429270202, LR: 0.0003 +[2026-03-03 06:38:20] (step=0042605) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.335942085697516, LR: 0.0003 +[2026-03-03 06:38:28] (step=0042606) Train Loss: 0.4579, Train Steps/Sec: 0.12, Epoch: 8.336137742124828, LR: 0.0003 +[2026-03-03 06:38:36] (step=0042607) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.336333398552142, LR: 0.0003 +[2026-03-03 06:38:44] (step=0042608) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.336529054979456, LR: 0.0003 +[2026-03-03 06:38:52] (step=0042609) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.33672471140677, LR: 0.0003 +[2026-03-03 06:39:00] (step=0042610) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.336920367834084, LR: 0.0003 +[2026-03-03 06:39:08] (step=0042611) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 8.337116024261396, LR: 0.0003 +[2026-03-03 06:39:16] (step=0042612) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.33731168068871, LR: 0.0003 +[2026-03-03 06:39:23] (step=0042613) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.337507337116024, LR: 0.0003 +[2026-03-03 06:39:31] (step=0042614) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 8.337702993543338, LR: 0.0003 +[2026-03-03 06:39:39] (step=0042615) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.33789864997065, LR: 0.0003 +[2026-03-03 06:39:47] (step=0042616) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.338094306397965, LR: 0.0003 +[2026-03-03 06:39:55] (step=0042617) Train Loss: 0.4512, Train Steps/Sec: 0.12, Epoch: 8.338289962825279, LR: 0.0003 +[2026-03-03 06:40:03] (step=0042618) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.338485619252593, LR: 0.0003 +[2026-03-03 06:40:11] (step=0042619) Train Loss: 0.4192, Train Steps/Sec: 0.13, Epoch: 8.338681275679907, LR: 0.0003 +[2026-03-03 06:40:19] (step=0042620) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.338876932107219, LR: 0.0003 +[2026-03-03 06:40:27] (step=0042621) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 8.339072588534533, LR: 0.0003 +[2026-03-03 06:40:34] (step=0042622) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.339268244961847, LR: 0.0003 +[2026-03-03 06:40:42] (step=0042623) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.339463901389161, LR: 0.0003 +[2026-03-03 06:40:50] (step=0042624) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 8.339659557816475, LR: 0.0003 +[2026-03-03 06:40:58] (step=0042625) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.339855214243787, LR: 0.0003 +[2026-03-03 06:41:06] (step=0042626) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.340050870671101, LR: 0.0003 +[2026-03-03 06:41:14] (step=0042627) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.340246527098415, LR: 0.0003 +[2026-03-03 06:41:22] (step=0042628) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.34044218352573, LR: 0.0003 +[2026-03-03 06:41:30] (step=0042629) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.340637839953043, LR: 0.0003 +[2026-03-03 06:41:37] (step=0042630) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 8.340833496380355, LR: 0.0003 +[2026-03-03 06:41:45] (step=0042631) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 8.34102915280767, LR: 0.0003 +[2026-03-03 06:41:53] (step=0042632) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.341224809234983, LR: 0.0003 +[2026-03-03 06:42:01] (step=0042633) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.341420465662297, LR: 0.0003 +[2026-03-03 06:42:09] (step=0042634) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.341616122089611, LR: 0.0003 +[2026-03-03 06:42:17] (step=0042635) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 8.341811778516924, LR: 0.0003 +[2026-03-03 06:42:25] (step=0042636) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.342007434944238, LR: 0.0003 +[2026-03-03 06:42:32] (step=0042637) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.342203091371552, LR: 0.0003 +[2026-03-03 06:42:40] (step=0042638) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 8.342398747798866, LR: 0.0003 +[2026-03-03 06:42:48] (step=0042639) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 8.34259440422618, LR: 0.0003 +[2026-03-03 06:42:56] (step=0042640) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.342790060653492, LR: 0.0003 +[2026-03-03 06:43:04] (step=0042641) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.342985717080806, LR: 0.0003 +[2026-03-03 06:43:12] (step=0042642) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 8.34318137350812, LR: 0.0003 +[2026-03-03 06:43:20] (step=0042643) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.343377029935434, LR: 0.0003 +[2026-03-03 06:43:28] (step=0042644) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.343572686362746, LR: 0.0003 +[2026-03-03 06:43:36] (step=0042645) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.34376834279006, LR: 0.0003 +[2026-03-03 06:43:43] (step=0042646) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.343963999217374, LR: 0.0003 +[2026-03-03 06:43:51] (step=0042647) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.344159655644688, LR: 0.0003 +[2026-03-03 06:43:59] (step=0042648) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.344355312072002, LR: 0.0003 +[2026-03-03 06:44:07] (step=0042649) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.344550968499314, LR: 0.0003 +[2026-03-03 06:44:15] (step=0042650) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.344746624926628, LR: 0.0003 +[2026-03-03 06:44:23] (step=0042651) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 8.344942281353942, LR: 0.0003 +[2026-03-03 06:44:31] (step=0042652) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.345137937781256, LR: 0.0003 +[2026-03-03 06:44:39] (step=0042653) Train Loss: 0.4538, Train Steps/Sec: 0.12, Epoch: 8.34533359420857, LR: 0.0003 +[2026-03-03 06:44:47] (step=0042654) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.345529250635883, LR: 0.0003 +[2026-03-03 06:44:55] (step=0042655) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.345724907063197, LR: 0.0003 +[2026-03-03 06:45:02] (step=0042656) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.34592056349051, LR: 0.0003 +[2026-03-03 06:45:10] (step=0042657) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.346116219917825, LR: 0.0003 +[2026-03-03 06:45:18] (step=0042658) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.346311876345139, LR: 0.0003 +[2026-03-03 06:45:26] (step=0042659) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 8.346507532772451, LR: 0.0003 +[2026-03-03 06:45:34] (step=0042660) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 8.346703189199765, LR: 0.0003 +[2026-03-03 06:45:42] (step=0042661) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.346898845627079, LR: 0.0003 +[2026-03-03 06:45:50] (step=0042662) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.347094502054393, LR: 0.0003 +[2026-03-03 06:45:57] (step=0042663) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 8.347290158481707, LR: 0.0003 +[2026-03-03 06:46:05] (step=0042664) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.34748581490902, LR: 0.0003 +[2026-03-03 06:46:13] (step=0042665) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.347681471336333, LR: 0.0003 +[2026-03-03 06:46:21] (step=0042666) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.347877127763647, LR: 0.0003 +[2026-03-03 06:46:29] (step=0042667) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 8.348072784190961, LR: 0.0003 +[2026-03-03 06:46:37] (step=0042668) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.348268440618273, LR: 0.0003 +[2026-03-03 06:46:45] (step=0042669) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.348464097045587, LR: 0.0003 +[2026-03-03 06:46:53] (step=0042670) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 8.348659753472901, LR: 0.0003 +[2026-03-03 06:47:01] (step=0042671) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.348855409900215, LR: 0.0003 +[2026-03-03 06:47:08] (step=0042672) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 8.34905106632753, LR: 0.0003 +[2026-03-03 06:47:16] (step=0042673) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.349246722754842, LR: 0.0003 +[2026-03-03 06:47:24] (step=0042674) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.349442379182156, LR: 0.0003 +[2026-03-03 06:47:32] (step=0042675) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.34963803560947, LR: 0.0003 +[2026-03-03 06:47:40] (step=0042676) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.349833692036784, LR: 0.0003 +[2026-03-03 06:47:48] (step=0042677) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.350029348464098, LR: 0.0003 +[2026-03-03 06:47:56] (step=0042678) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.35022500489141, LR: 0.0003 +[2026-03-03 06:48:04] (step=0042679) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.350420661318724, LR: 0.0003 +[2026-03-03 06:48:12] (step=0042680) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.350616317746038, LR: 0.0003 +[2026-03-03 06:48:19] (step=0042681) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.350811974173352, LR: 0.0003 +[2026-03-03 06:48:27] (step=0042682) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.351007630600666, LR: 0.0003 +[2026-03-03 06:48:35] (step=0042683) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.351203287027978, LR: 0.0003 +[2026-03-03 06:48:43] (step=0042684) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.351398943455292, LR: 0.0003 +[2026-03-03 06:48:51] (step=0042685) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.351594599882606, LR: 0.0003 +[2026-03-03 06:48:59] (step=0042686) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.35179025630992, LR: 0.0003 +[2026-03-03 06:49:07] (step=0042687) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.351985912737234, LR: 0.0003 +[2026-03-03 06:49:15] (step=0042688) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 8.352181569164546, LR: 0.0003 +[2026-03-03 06:49:23] (step=0042689) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.35237722559186, LR: 0.0003 +[2026-03-03 06:49:30] (step=0042690) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.352572882019174, LR: 0.0003 +[2026-03-03 06:49:38] (step=0042691) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.352768538446488, LR: 0.0003 +[2026-03-03 06:49:46] (step=0042692) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.352964194873802, LR: 0.0003 +[2026-03-03 06:49:54] (step=0042693) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.353159851301115, LR: 0.0003 +[2026-03-03 06:50:02] (step=0042694) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.353355507728429, LR: 0.0003 +[2026-03-03 06:50:10] (step=0042695) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 8.353551164155743, LR: 0.0003 +[2026-03-03 06:50:18] (step=0042696) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 8.353746820583057, LR: 0.0003 +[2026-03-03 06:50:25] (step=0042697) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.353942477010369, LR: 0.0003 +[2026-03-03 06:50:33] (step=0042698) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.354138133437683, LR: 0.0003 +[2026-03-03 06:50:41] (step=0042699) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.354333789864997, LR: 0.0003 +[2026-03-03 06:50:49] (step=0042700) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.354529446292311, LR: 0.0003 +[2026-03-03 06:50:57] (step=0042701) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.354725102719625, LR: 0.0003 +[2026-03-03 06:51:05] (step=0042702) Train Loss: 0.4483, Train Steps/Sec: 0.12, Epoch: 8.354920759146937, LR: 0.0003 +[2026-03-03 06:51:13] (step=0042703) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 8.355116415574251, LR: 0.0003 +[2026-03-03 06:51:21] (step=0042704) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 8.355312072001565, LR: 0.0003 +[2026-03-03 06:51:29] (step=0042705) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.35550772842888, LR: 0.0003 +[2026-03-03 06:51:37] (step=0042706) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.355703384856193, LR: 0.0003 +[2026-03-03 06:51:44] (step=0042707) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.355899041283505, LR: 0.0003 +[2026-03-03 06:51:52] (step=0042708) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 8.35609469771082, LR: 0.0003 +[2026-03-03 06:52:00] (step=0042709) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.356290354138133, LR: 0.0003 +[2026-03-03 06:52:08] (step=0042710) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 8.356486010565447, LR: 0.0003 +[2026-03-03 06:52:16] (step=0042711) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.356681666992761, LR: 0.0003 +[2026-03-03 06:52:24] (step=0042712) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.356877323420074, LR: 0.0003 +[2026-03-03 06:52:32] (step=0042713) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.357072979847388, LR: 0.0003 +[2026-03-03 06:52:40] (step=0042714) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.357268636274702, LR: 0.0003 +[2026-03-03 06:52:48] (step=0042715) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.357464292702016, LR: 0.0003 +[2026-03-03 06:52:55] (step=0042716) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 8.35765994912933, LR: 0.0003 +[2026-03-03 06:53:03] (step=0042717) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.357855605556642, LR: 0.0003 +[2026-03-03 06:53:11] (step=0042718) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.358051261983956, LR: 0.0003 +[2026-03-03 06:53:19] (step=0042719) Train Loss: 0.4330, Train Steps/Sec: 0.12, Epoch: 8.35824691841127, LR: 0.0003 +[2026-03-03 06:53:27] (step=0042720) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 8.358442574838584, LR: 0.0003 +[2026-03-03 06:53:35] (step=0042721) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.358638231265896, LR: 0.0003 +[2026-03-03 06:53:43] (step=0042722) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 8.35883388769321, LR: 0.0003 +[2026-03-03 06:53:51] (step=0042723) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.359029544120524, LR: 0.0003 +[2026-03-03 06:53:59] (step=0042724) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.359225200547838, LR: 0.0003 +[2026-03-03 06:54:06] (step=0042725) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 8.359420856975152, LR: 0.0003 +[2026-03-03 06:54:14] (step=0042726) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.359616513402464, LR: 0.0003 +[2026-03-03 06:54:22] (step=0042727) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.359812169829778, LR: 0.0003 +[2026-03-03 06:54:30] (step=0042728) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.360007826257092, LR: 0.0003 +[2026-03-03 06:54:38] (step=0042729) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.360203482684406, LR: 0.0003 +[2026-03-03 06:54:46] (step=0042730) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.36039913911172, LR: 0.0003 +[2026-03-03 06:54:54] (step=0042731) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.360594795539033, LR: 0.0003 +[2026-03-03 06:55:01] (step=0042732) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.360790451966347, LR: 0.0003 +[2026-03-03 06:55:09] (step=0042733) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.36098610839366, LR: 0.0003 +[2026-03-03 06:55:17] (step=0042734) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.361181764820975, LR: 0.0003 +[2026-03-03 06:55:25] (step=0042735) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.361377421248289, LR: 0.0003 +[2026-03-03 06:55:33] (step=0042736) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.361573077675601, LR: 0.0003 +[2026-03-03 06:55:41] (step=0042737) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.361768734102915, LR: 0.0003 +[2026-03-03 06:55:49] (step=0042738) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.361964390530229, LR: 0.0003 +[2026-03-03 06:55:57] (step=0042739) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 8.362160046957543, LR: 0.0003 +[2026-03-03 06:56:04] (step=0042740) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.362355703384857, LR: 0.0003 +[2026-03-03 06:56:12] (step=0042741) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.36255135981217, LR: 0.0003 +[2026-03-03 06:56:20] (step=0042742) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.362747016239483, LR: 0.0003 +[2026-03-03 06:56:28] (step=0042743) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.362942672666797, LR: 0.0003 +[2026-03-03 06:56:36] (step=0042744) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.363138329094111, LR: 0.0003 +[2026-03-03 06:56:44] (step=0042745) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.363333985521425, LR: 0.0003 +[2026-03-03 06:56:52] (step=0042746) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.363529641948737, LR: 0.0003 +[2026-03-03 06:56:59] (step=0042747) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.363725298376051, LR: 0.0003 +[2026-03-03 06:57:07] (step=0042748) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.363920954803366, LR: 0.0003 +[2026-03-03 06:57:15] (step=0042749) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 8.36411661123068, LR: 0.0003 +[2026-03-03 06:57:23] (step=0042750) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 8.364312267657992, LR: 0.0003 +[2026-03-03 06:57:31] (step=0042751) Train Loss: 0.4443, Train Steps/Sec: 0.12, Epoch: 8.364507924085306, LR: 0.0003 +[2026-03-03 06:57:39] (step=0042752) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 8.36470358051262, LR: 0.0003 +[2026-03-03 06:57:47] (step=0042753) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.364899236939934, LR: 0.0003 +[2026-03-03 06:57:55] (step=0042754) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.365094893367248, LR: 0.0003 +[2026-03-03 06:58:03] (step=0042755) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.36529054979456, LR: 0.0003 +[2026-03-03 06:58:10] (step=0042756) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.365486206221874, LR: 0.0003 +[2026-03-03 06:58:18] (step=0042757) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.365681862649188, LR: 0.0003 +[2026-03-03 06:58:26] (step=0042758) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 8.365877519076502, LR: 0.0003 +[2026-03-03 06:58:34] (step=0042759) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.366073175503816, LR: 0.0003 +[2026-03-03 06:58:42] (step=0042760) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 8.366268831931128, LR: 0.0003 +[2026-03-03 06:58:50] (step=0042761) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.366464488358442, LR: 0.0003 +[2026-03-03 06:58:58] (step=0042762) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 8.366660144785756, LR: 0.0003 +[2026-03-03 06:59:06] (step=0042763) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.36685580121307, LR: 0.0003 +[2026-03-03 06:59:13] (step=0042764) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.367051457640384, LR: 0.0003 +[2026-03-03 06:59:21] (step=0042765) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.367247114067697, LR: 0.0003 +[2026-03-03 06:59:29] (step=0042766) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.36744277049501, LR: 0.0003 +[2026-03-03 06:59:37] (step=0042767) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.367638426922325, LR: 0.0003 +[2026-03-03 06:59:45] (step=0042768) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.367834083349639, LR: 0.0003 +[2026-03-03 06:59:53] (step=0042769) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.368029739776953, LR: 0.0003 +[2026-03-03 07:00:01] (step=0042770) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 8.368225396204265, LR: 0.0003 +[2026-03-03 07:00:09] (step=0042771) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.368421052631579, LR: 0.0003 +[2026-03-03 07:00:17] (step=0042772) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 8.368616709058893, LR: 0.0003 +[2026-03-03 07:00:24] (step=0042773) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.368812365486207, LR: 0.0003 +[2026-03-03 07:00:32] (step=0042774) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.369008021913519, LR: 0.0003 +[2026-03-03 07:00:40] (step=0042775) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.369203678340833, LR: 0.0003 +[2026-03-03 07:00:48] (step=0042776) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.369399334768147, LR: 0.0003 +[2026-03-03 07:00:56] (step=0042777) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 8.369594991195461, LR: 0.0003 +[2026-03-03 07:01:04] (step=0042778) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.369790647622775, LR: 0.0003 +[2026-03-03 07:01:12] (step=0042779) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.369986304050087, LR: 0.0003 +[2026-03-03 07:01:20] (step=0042780) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.370181960477401, LR: 0.0003 +[2026-03-03 07:01:27] (step=0042781) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 8.370377616904715, LR: 0.0003 +[2026-03-03 07:01:35] (step=0042782) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 8.37057327333203, LR: 0.0003 +[2026-03-03 07:01:43] (step=0042783) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.370768929759343, LR: 0.0003 +[2026-03-03 07:01:51] (step=0042784) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 8.370964586186656, LR: 0.0003 +[2026-03-03 07:01:59] (step=0042785) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.37116024261397, LR: 0.0003 +[2026-03-03 07:02:07] (step=0042786) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.371355899041284, LR: 0.0003 +[2026-03-03 07:02:15] (step=0042787) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 8.371551555468598, LR: 0.0003 +[2026-03-03 07:02:22] (step=0042788) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.371747211895912, LR: 0.0003 +[2026-03-03 07:02:30] (step=0042789) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 8.371942868323224, LR: 0.0003 +[2026-03-03 07:02:38] (step=0042790) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.372138524750538, LR: 0.0003 +[2026-03-03 07:02:46] (step=0042791) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 8.372334181177852, LR: 0.0003 +[2026-03-03 07:02:54] (step=0042792) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 8.372529837605166, LR: 0.0003 +[2026-03-03 07:03:02] (step=0042793) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.37272549403248, LR: 0.0003 +[2026-03-03 07:03:10] (step=0042794) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 8.372921150459792, LR: 0.0003 +[2026-03-03 07:03:18] (step=0042795) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.373116806887106, LR: 0.0003 +[2026-03-03 07:03:25] (step=0042796) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.37331246331442, LR: 0.0003 +[2026-03-03 07:03:33] (step=0042797) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 8.373508119741734, LR: 0.0003 +[2026-03-03 07:03:41] (step=0042798) Train Loss: 0.4366, Train Steps/Sec: 0.12, Epoch: 8.373703776169048, LR: 0.0003 +[2026-03-03 07:03:49] (step=0042799) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 8.37389943259636, LR: 0.0003 +[2026-03-03 07:03:57] (step=0042800) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.374095089023674, LR: 0.0003 +[2026-03-03 07:04:05] (step=0042801) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.374290745450988, LR: 0.0003 +[2026-03-03 07:04:13] (step=0042802) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.374486401878302, LR: 0.0003 +[2026-03-03 07:04:21] (step=0042803) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 8.374682058305615, LR: 0.0003 +[2026-03-03 07:04:29] (step=0042804) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.374877714732929, LR: 0.0003 +[2026-03-03 07:04:37] (step=0042805) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.375073371160243, LR: 0.0003 +[2026-03-03 07:04:44] (step=0042806) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.375269027587557, LR: 0.0003 +[2026-03-03 07:04:52] (step=0042807) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 8.37546468401487, LR: 0.0003 +[2026-03-03 07:05:00] (step=0042808) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.375660340442183, LR: 0.0003 +[2026-03-03 07:05:08] (step=0042809) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.375855996869497, LR: 0.0003 +[2026-03-03 07:05:16] (step=0042810) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 8.37605165329681, LR: 0.0003 +[2026-03-03 07:05:24] (step=0042811) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 8.376247309724125, LR: 0.0003 +[2026-03-03 07:05:32] (step=0042812) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.376442966151439, LR: 0.0003 +[2026-03-03 07:05:40] (step=0042813) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.376638622578751, LR: 0.0003 +[2026-03-03 07:05:47] (step=0042814) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 8.376834279006065, LR: 0.0003 +[2026-03-03 07:05:55] (step=0042815) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.377029935433379, LR: 0.0003 +[2026-03-03 07:06:03] (step=0042816) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.377225591860693, LR: 0.0003 +[2026-03-03 07:06:11] (step=0042817) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.377421248288007, LR: 0.0003 +[2026-03-03 07:06:19] (step=0042818) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.37761690471532, LR: 0.0003 +[2026-03-03 07:06:27] (step=0042819) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.377812561142633, LR: 0.0003 +[2026-03-03 07:06:35] (step=0042820) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.378008217569947, LR: 0.0003 +[2026-03-03 07:06:43] (step=0042821) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.378203873997261, LR: 0.0003 +[2026-03-03 07:06:51] (step=0042822) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.378399530424575, LR: 0.0003 +[2026-03-03 07:06:58] (step=0042823) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.378595186851888, LR: 0.0003 +[2026-03-03 07:07:06] (step=0042824) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.378790843279202, LR: 0.0003 +[2026-03-03 07:07:14] (step=0042825) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.378986499706516, LR: 0.0003 +[2026-03-03 07:07:22] (step=0042826) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 8.37918215613383, LR: 0.0003 +[2026-03-03 07:07:30] (step=0042827) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 8.379377812561142, LR: 0.0003 +[2026-03-03 07:07:38] (step=0042828) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.379573468988456, LR: 0.0003 +[2026-03-03 07:07:46] (step=0042829) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.37976912541577, LR: 0.0003 +[2026-03-03 07:07:54] (step=0042830) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.379964781843084, LR: 0.0003 +[2026-03-03 07:08:01] (step=0042831) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.380160438270398, LR: 0.0003 +[2026-03-03 07:08:09] (step=0042832) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.38035609469771, LR: 0.0003 +[2026-03-03 07:08:17] (step=0042833) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 8.380551751125024, LR: 0.0003 +[2026-03-03 07:08:25] (step=0042834) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.380747407552338, LR: 0.0003 +[2026-03-03 07:08:33] (step=0042835) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.380943063979652, LR: 0.0003 +[2026-03-03 07:08:41] (step=0042836) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.381138720406966, LR: 0.0003 +[2026-03-03 07:08:49] (step=0042837) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.381334376834278, LR: 0.0003 +[2026-03-03 07:08:57] (step=0042838) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.381530033261592, LR: 0.0003 +[2026-03-03 07:09:05] (step=0042839) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.381725689688906, LR: 0.0003 +[2026-03-03 07:09:12] (step=0042840) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 8.38192134611622, LR: 0.0003 +[2026-03-03 07:09:20] (step=0042841) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.382117002543534, LR: 0.0003 +[2026-03-03 07:09:28] (step=0042842) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.382312658970847, LR: 0.0003 +[2026-03-03 07:09:36] (step=0042843) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.38250831539816, LR: 0.0003 +[2026-03-03 07:09:44] (step=0042844) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.382703971825475, LR: 0.0003 +[2026-03-03 07:09:52] (step=0042845) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.382899628252789, LR: 0.0003 +[2026-03-03 07:10:00] (step=0042846) Train Loss: 0.4296, Train Steps/Sec: 0.12, Epoch: 8.383095284680103, LR: 0.0003 +[2026-03-03 07:10:08] (step=0042847) Train Loss: 0.4211, Train Steps/Sec: 0.13, Epoch: 8.383290941107415, LR: 0.0003 +[2026-03-03 07:10:16] (step=0042848) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 8.383486597534729, LR: 0.0003 +[2026-03-03 07:10:23] (step=0042849) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.383682253962043, LR: 0.0003 +[2026-03-03 07:10:31] (step=0042850) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.383877910389357, LR: 0.0003 +[2026-03-03 07:10:39] (step=0042851) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.384073566816669, LR: 0.0003 +[2026-03-03 07:10:47] (step=0042852) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.384269223243983, LR: 0.0003 +[2026-03-03 07:10:55] (step=0042853) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.384464879671297, LR: 0.0003 +[2026-03-03 07:11:03] (step=0042854) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.384660536098611, LR: 0.0003 +[2026-03-03 07:11:11] (step=0042855) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.384856192525925, LR: 0.0003 +[2026-03-03 07:11:19] (step=0042856) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.385051848953237, LR: 0.0003 +[2026-03-03 07:11:26] (step=0042857) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.385247505380551, LR: 0.0003 +[2026-03-03 07:11:34] (step=0042858) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.385443161807865, LR: 0.0003 +[2026-03-03 07:11:42] (step=0042859) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.38563881823518, LR: 0.0003 +[2026-03-03 07:11:50] (step=0042860) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.385834474662493, LR: 0.0003 +[2026-03-03 07:11:58] (step=0042861) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.386030131089806, LR: 0.0003 +[2026-03-03 07:12:06] (step=0042862) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.38622578751712, LR: 0.0003 +[2026-03-03 07:12:14] (step=0042863) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.386421443944434, LR: 0.0003 +[2026-03-03 07:12:22] (step=0042864) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.386617100371748, LR: 0.0003 +[2026-03-03 07:12:29] (step=0042865) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.386812756799062, LR: 0.0003 +[2026-03-03 07:12:37] (step=0042866) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.387008413226374, LR: 0.0003 +[2026-03-03 07:12:45] (step=0042867) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.387204069653688, LR: 0.0003 +[2026-03-03 07:12:53] (step=0042868) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.387399726081002, LR: 0.0003 +[2026-03-03 07:13:01] (step=0042869) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 8.387595382508316, LR: 0.0003 +[2026-03-03 07:13:09] (step=0042870) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.38779103893563, LR: 0.0003 +[2026-03-03 07:13:17] (step=0042871) Train Loss: 0.4453, Train Steps/Sec: 0.12, Epoch: 8.387986695362942, LR: 0.0003 +[2026-03-03 07:13:25] (step=0042872) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 8.388182351790256, LR: 0.0003 +[2026-03-03 07:13:33] (step=0042873) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 8.38837800821757, LR: 0.0003 +[2026-03-03 07:13:40] (step=0042874) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.388573664644884, LR: 0.0003 +[2026-03-03 07:13:48] (step=0042875) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 8.388769321072198, LR: 0.0003 +[2026-03-03 07:13:56] (step=0042876) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.38896497749951, LR: 0.0003 +[2026-03-03 07:14:04] (step=0042877) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 8.389160633926824, LR: 0.0003 +[2026-03-03 07:14:12] (step=0042878) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.389356290354138, LR: 0.0003 +[2026-03-03 07:14:20] (step=0042879) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.389551946781452, LR: 0.0003 +[2026-03-03 07:14:28] (step=0042880) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.389747603208765, LR: 0.0003 +[2026-03-03 07:14:35] (step=0042881) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.389943259636079, LR: 0.0003 +[2026-03-03 07:14:43] (step=0042882) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.390138916063393, LR: 0.0003 +[2026-03-03 07:14:51] (step=0042883) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 8.390334572490707, LR: 0.0003 +[2026-03-03 07:14:59] (step=0042884) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.39053022891802, LR: 0.0003 +[2026-03-03 07:15:07] (step=0042885) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.390725885345333, LR: 0.0003 +[2026-03-03 07:15:15] (step=0042886) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.390921541772647, LR: 0.0003 +[2026-03-03 07:15:23] (step=0042887) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.39111719819996, LR: 0.0003 +[2026-03-03 07:15:30] (step=0042888) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.391312854627275, LR: 0.0003 +[2026-03-03 07:15:38] (step=0042889) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.391508511054589, LR: 0.0003 +[2026-03-03 07:15:46] (step=0042890) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.391704167481901, LR: 0.0003 +[2026-03-03 07:15:54] (step=0042891) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.391899823909215, LR: 0.0003 +[2026-03-03 07:16:02] (step=0042892) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.392095480336529, LR: 0.0003 +[2026-03-03 07:16:10] (step=0042893) Train Loss: 0.4401, Train Steps/Sec: 0.12, Epoch: 8.392291136763843, LR: 0.0003 +[2026-03-03 07:16:18] (step=0042894) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.392486793191157, LR: 0.0003 +[2026-03-03 07:16:26] (step=0042895) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.39268244961847, LR: 0.0003 +[2026-03-03 07:16:34] (step=0042896) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.392878106045783, LR: 0.0003 +[2026-03-03 07:16:41] (step=0042897) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 8.393073762473097, LR: 0.0003 +[2026-03-03 07:16:49] (step=0042898) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.393269418900411, LR: 0.0003 +[2026-03-03 07:16:57] (step=0042899) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.393465075327725, LR: 0.0003 +[2026-03-03 07:17:05] (step=0042900) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.393660731755038, LR: 0.0003 +[2026-03-03 07:17:13] (step=0042901) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.393856388182352, LR: 0.0003 +[2026-03-03 07:17:21] (step=0042902) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.394052044609666, LR: 0.0003 +[2026-03-03 07:17:29] (step=0042903) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.39424770103698, LR: 0.0003 +[2026-03-03 07:17:37] (step=0042904) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.394443357464292, LR: 0.0003 +[2026-03-03 07:17:44] (step=0042905) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 8.394639013891606, LR: 0.0003 +[2026-03-03 07:17:52] (step=0042906) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.39483467031892, LR: 0.0003 +[2026-03-03 07:18:00] (step=0042907) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.395030326746234, LR: 0.0003 +[2026-03-03 07:18:08] (step=0042908) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.395225983173548, LR: 0.0003 +[2026-03-03 07:18:16] (step=0042909) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.39542163960086, LR: 0.0003 +[2026-03-03 07:18:24] (step=0042910) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 8.395617296028174, LR: 0.0003 +[2026-03-03 07:18:32] (step=0042911) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.395812952455488, LR: 0.0003 +[2026-03-03 07:18:40] (step=0042912) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.396008608882802, LR: 0.0003 +[2026-03-03 07:18:47] (step=0042913) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 8.396204265310116, LR: 0.0003 +[2026-03-03 07:18:55] (step=0042914) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.396399921737428, LR: 0.0003 +[2026-03-03 07:19:03] (step=0042915) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.396595578164742, LR: 0.0003 +[2026-03-03 07:19:11] (step=0042916) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.396791234592056, LR: 0.0003 +[2026-03-03 07:19:19] (step=0042917) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 8.39698689101937, LR: 0.0003 +[2026-03-03 07:19:27] (step=0042918) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.397182547446684, LR: 0.0003 +[2026-03-03 07:19:35] (step=0042919) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.397378203873997, LR: 0.0003 +[2026-03-03 07:19:42] (step=0042920) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.39757386030131, LR: 0.0003 +[2026-03-03 07:19:50] (step=0042921) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.397769516728625, LR: 0.0003 +[2026-03-03 07:19:58] (step=0042922) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.397965173155939, LR: 0.0003 +[2026-03-03 07:20:06] (step=0042923) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 8.398160829583253, LR: 0.0003 +[2026-03-03 07:20:14] (step=0042924) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 8.398356486010565, LR: 0.0003 +[2026-03-03 07:20:22] (step=0042925) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 8.398552142437879, LR: 0.0003 +[2026-03-03 07:20:30] (step=0042926) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 8.398747798865193, LR: 0.0003 +[2026-03-03 07:20:38] (step=0042927) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.398943455292507, LR: 0.0003 +[2026-03-03 07:20:46] (step=0042928) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.39913911171982, LR: 0.0003 +[2026-03-03 07:20:53] (step=0042929) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.399334768147133, LR: 0.0003 +[2026-03-03 07:21:01] (step=0042930) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 8.399530424574447, LR: 0.0003 +[2026-03-03 07:21:09] (step=0042931) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.399726081001761, LR: 0.0003 +[2026-03-03 07:21:17] (step=0042932) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.399921737429075, LR: 0.0003 +[2026-03-03 07:21:25] (step=0042933) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.400117393856387, LR: 0.0003 +[2026-03-03 07:21:33] (step=0042934) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.400313050283701, LR: 0.0003 +[2026-03-03 07:21:41] (step=0042935) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.400508706711015, LR: 0.0003 +[2026-03-03 07:21:49] (step=0042936) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.40070436313833, LR: 0.0003 +[2026-03-03 07:21:56] (step=0042937) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.400900019565643, LR: 0.0003 +[2026-03-03 07:22:04] (step=0042938) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.401095675992956, LR: 0.0003 +[2026-03-03 07:22:12] (step=0042939) Train Loss: 0.4383, Train Steps/Sec: 0.12, Epoch: 8.40129133242027, LR: 0.0003 +[2026-03-03 07:22:20] (step=0042940) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 8.401486988847584, LR: 0.0003 +[2026-03-03 07:22:28] (step=0042941) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.401682645274898, LR: 0.0003 +[2026-03-03 07:22:36] (step=0042942) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.401878301702212, LR: 0.0003 +[2026-03-03 07:22:44] (step=0042943) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 8.402073958129524, LR: 0.0003 +[2026-03-03 07:22:52] (step=0042944) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 8.402269614556838, LR: 0.0003 +[2026-03-03 07:23:00] (step=0042945) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.402465270984152, LR: 0.0003 +[2026-03-03 07:23:07] (step=0042946) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 8.402660927411466, LR: 0.0003 +[2026-03-03 07:23:15] (step=0042947) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.40285658383878, LR: 0.0003 +[2026-03-03 07:23:23] (step=0042948) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.403052240266092, LR: 0.0003 +[2026-03-03 07:23:31] (step=0042949) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.403247896693406, LR: 0.0003 +[2026-03-03 07:23:39] (step=0042950) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.40344355312072, LR: 0.0003 +[2026-03-03 07:23:47] (step=0042951) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.403639209548034, LR: 0.0003 +[2026-03-03 07:23:55] (step=0042952) Train Loss: 0.4255, Train Steps/Sec: 0.13, Epoch: 8.403834865975348, LR: 0.0003 +[2026-03-03 07:24:03] (step=0042953) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.40403052240266, LR: 0.0003 +[2026-03-03 07:24:10] (step=0042954) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.404226178829974, LR: 0.0003 +[2026-03-03 07:24:18] (step=0042955) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.404421835257288, LR: 0.0003 +[2026-03-03 07:24:26] (step=0042956) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 8.404617491684602, LR: 0.0003 +[2026-03-03 07:24:34] (step=0042957) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.404813148111915, LR: 0.0003 +[2026-03-03 07:24:42] (step=0042958) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.405008804539229, LR: 0.0003 +[2026-03-03 07:24:50] (step=0042959) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.405204460966543, LR: 0.0003 +[2026-03-03 07:24:58] (step=0042960) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.405400117393857, LR: 0.0003 +[2026-03-03 07:25:05] (step=0042961) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 8.40559577382117, LR: 0.0003 +[2026-03-03 07:25:13] (step=0042962) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.405791430248483, LR: 0.0003 +[2026-03-03 07:25:21] (step=0042963) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.405987086675797, LR: 0.0003 +[2026-03-03 07:25:29] (step=0042964) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 8.40618274310311, LR: 0.0003 +[2026-03-03 07:25:37] (step=0042965) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.406378399530425, LR: 0.0003 +[2026-03-03 07:25:45] (step=0042966) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 8.406574055957739, LR: 0.0003 +[2026-03-03 07:25:53] (step=0042967) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.406769712385051, LR: 0.0003 +[2026-03-03 07:26:01] (step=0042968) Train Loss: 0.4435, Train Steps/Sec: 0.12, Epoch: 8.406965368812365, LR: 0.0003 +[2026-03-03 07:26:09] (step=0042969) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.40716102523968, LR: 0.0003 +[2026-03-03 07:26:16] (step=0042970) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.407356681666993, LR: 0.0003 +[2026-03-03 07:26:24] (step=0042971) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.407552338094307, LR: 0.0003 +[2026-03-03 07:26:32] (step=0042972) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 8.40774799452162, LR: 0.0003 +[2026-03-03 07:26:40] (step=0042973) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.407943650948933, LR: 0.0003 +[2026-03-03 07:26:48] (step=0042974) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.408139307376247, LR: 0.0003 +[2026-03-03 07:26:56] (step=0042975) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.408334963803561, LR: 0.0003 +[2026-03-03 07:27:04] (step=0042976) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.408530620230875, LR: 0.0003 +[2026-03-03 07:27:11] (step=0042977) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 8.408726276658188, LR: 0.0003 +[2026-03-03 07:27:19] (step=0042978) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.408921933085502, LR: 0.0003 +[2026-03-03 07:27:27] (step=0042979) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.409117589512816, LR: 0.0003 +[2026-03-03 07:27:35] (step=0042980) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.40931324594013, LR: 0.0003 +[2026-03-03 07:27:43] (step=0042981) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.409508902367444, LR: 0.0003 +[2026-03-03 07:27:51] (step=0042982) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.409704558794756, LR: 0.0003 +[2026-03-03 07:27:59] (step=0042983) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 8.40990021522207, LR: 0.0003 +[2026-03-03 07:28:07] (step=0042984) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 8.410095871649384, LR: 0.0003 +[2026-03-03 07:28:14] (step=0042985) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.410291528076698, LR: 0.0003 +[2026-03-03 07:28:23] (step=0042986) Train Loss: 0.4382, Train Steps/Sec: 0.12, Epoch: 8.41048718450401, LR: 0.0003 +[2026-03-03 07:28:30] (step=0042987) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.410682840931324, LR: 0.0003 +[2026-03-03 07:28:38] (step=0042988) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 8.410878497358638, LR: 0.0003 +[2026-03-03 07:28:46] (step=0042989) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.411074153785952, LR: 0.0003 +[2026-03-03 07:28:54] (step=0042990) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 8.411269810213266, LR: 0.0003 +[2026-03-03 07:29:02] (step=0042991) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 8.411465466640578, LR: 0.0003 +[2026-03-03 07:29:10] (step=0042992) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.411661123067892, LR: 0.0003 +[2026-03-03 07:29:18] (step=0042993) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.411856779495206, LR: 0.0003 +[2026-03-03 07:29:25] (step=0042994) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.41205243592252, LR: 0.0003 +[2026-03-03 07:29:33] (step=0042995) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.412248092349834, LR: 0.0003 +[2026-03-03 07:29:41] (step=0042996) Train Loss: 0.4238, Train Steps/Sec: 0.13, Epoch: 8.412443748777147, LR: 0.0003 +[2026-03-03 07:29:49] (step=0042997) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.41263940520446, LR: 0.0003 +[2026-03-03 07:29:57] (step=0042998) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.412835061631775, LR: 0.0003 +[2026-03-03 07:30:05] (step=0042999) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.413030718059089, LR: 0.0003 +[2026-03-03 07:30:13] (step=0043000) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 8.413226374486403, LR: 0.0003 +[2026-03-03 07:30:13] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0043000/ +[2026-03-03 07:30:21] (step=0043001) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 8.413422030913715, LR: 0.0003 +[2026-03-03 07:30:28] (step=0043002) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.413617687341029, LR: 0.0003 +[2026-03-03 07:30:36] (step=0043003) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 8.413813343768343, LR: 0.0003 +[2026-03-03 07:30:44] (step=0043004) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.414009000195657, LR: 0.0003 +[2026-03-03 07:30:52] (step=0043005) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.414204656622971, LR: 0.0003 +[2026-03-03 07:31:00] (step=0043006) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 8.414400313050283, LR: 0.0003 +[2026-03-03 07:31:08] (step=0043007) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.414595969477597, LR: 0.0003 +[2026-03-03 07:31:16] (step=0043008) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.414791625904911, LR: 0.0003 +[2026-03-03 07:31:23] (step=0043009) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.414987282332225, LR: 0.0003 +[2026-03-03 07:31:31] (step=0043010) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.415182938759537, LR: 0.0003 +[2026-03-03 07:31:39] (step=0043011) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.415378595186851, LR: 0.0003 +[2026-03-03 07:31:47] (step=0043012) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.415574251614165, LR: 0.0003 +[2026-03-03 07:31:55] (step=0043013) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.41576990804148, LR: 0.0003 +[2026-03-03 07:32:03] (step=0043014) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.415965564468793, LR: 0.0003 +[2026-03-03 07:32:11] (step=0043015) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 8.416161220896106, LR: 0.0003 +[2026-03-03 07:32:19] (step=0043016) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 8.41635687732342, LR: 0.0003 +[2026-03-03 07:32:26] (step=0043017) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 8.416552533750734, LR: 0.0003 +[2026-03-03 07:32:34] (step=0043018) Train Loss: 0.4475, Train Steps/Sec: 0.12, Epoch: 8.416748190178048, LR: 0.0003 +[2026-03-03 07:32:42] (step=0043019) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.416943846605362, LR: 0.0003 +[2026-03-03 07:32:50] (step=0043020) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.417139503032674, LR: 0.0003 +[2026-03-03 07:32:58] (step=0043021) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 8.417335159459988, LR: 0.0003 +[2026-03-03 07:33:06] (step=0043022) Train Loss: 0.4233, Train Steps/Sec: 0.13, Epoch: 8.417530815887302, LR: 0.0003 +[2026-03-03 07:33:14] (step=0043023) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 8.417726472314616, LR: 0.0003 +[2026-03-03 07:33:22] (step=0043024) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 8.41792212874193, LR: 0.0003 +[2026-03-03 07:33:30] (step=0043025) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.418117785169242, LR: 0.0003 +[2026-03-03 07:33:37] (step=0043026) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.418313441596556, LR: 0.0003 +[2026-03-03 07:33:45] (step=0043027) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.41850909802387, LR: 0.0003 +[2026-03-03 07:33:53] (step=0043028) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.418704754451184, LR: 0.0003 +[2026-03-03 07:34:01] (step=0043029) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 8.418900410878498, LR: 0.0003 +[2026-03-03 07:34:09] (step=0043030) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.41909606730581, LR: 0.0003 +[2026-03-03 07:34:17] (step=0043031) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.419291723733124, LR: 0.0003 +[2026-03-03 07:34:25] (step=0043032) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.419487380160438, LR: 0.0003 +[2026-03-03 07:34:32] (step=0043033) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.419683036587752, LR: 0.0003 +[2026-03-03 07:34:40] (step=0043034) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.419878693015066, LR: 0.0003 +[2026-03-03 07:34:48] (step=0043035) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.420074349442379, LR: 0.0003 +[2026-03-03 07:34:56] (step=0043036) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.420270005869693, LR: 0.0003 +[2026-03-03 07:35:04] (step=0043037) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.420465662297007, LR: 0.0003 +[2026-03-03 07:35:12] (step=0043038) Train Loss: 0.4538, Train Steps/Sec: 0.12, Epoch: 8.42066131872432, LR: 0.0003 +[2026-03-03 07:35:20] (step=0043039) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.420856975151633, LR: 0.0003 +[2026-03-03 07:35:28] (step=0043040) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.421052631578947, LR: 0.0003 +[2026-03-03 07:35:36] (step=0043041) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.421248288006261, LR: 0.0003 +[2026-03-03 07:35:44] (step=0043042) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.421443944433575, LR: 0.0003 +[2026-03-03 07:35:52] (step=0043043) Train Loss: 0.4255, Train Steps/Sec: 0.13, Epoch: 8.421639600860889, LR: 0.0003 +[2026-03-03 07:35:59] (step=0043044) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 8.421835257288201, LR: 0.0003 +[2026-03-03 07:36:07] (step=0043045) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 8.422030913715515, LR: 0.0003 +[2026-03-03 07:36:15] (step=0043046) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.42222657014283, LR: 0.0003 +[2026-03-03 07:36:23] (step=0043047) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.422422226570143, LR: 0.0003 +[2026-03-03 07:36:31] (step=0043048) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.422617882997457, LR: 0.0003 +[2026-03-03 07:36:39] (step=0043049) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.42281353942477, LR: 0.0003 +[2026-03-03 07:36:47] (step=0043050) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.423009195852083, LR: 0.0003 +[2026-03-03 07:36:55] (step=0043051) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.423204852279397, LR: 0.0003 +[2026-03-03 07:37:02] (step=0043052) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.423400508706711, LR: 0.0003 +[2026-03-03 07:37:10] (step=0043053) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.423596165134025, LR: 0.0003 +[2026-03-03 07:37:18] (step=0043054) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.423791821561338, LR: 0.0003 +[2026-03-03 07:37:26] (step=0043055) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.423987477988652, LR: 0.0003 +[2026-03-03 07:37:34] (step=0043056) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 8.424183134415966, LR: 0.0003 +[2026-03-03 07:37:42] (step=0043057) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.42437879084328, LR: 0.0003 +[2026-03-03 07:37:50] (step=0043058) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 8.424574447270594, LR: 0.0003 +[2026-03-03 07:37:57] (step=0043059) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.424770103697906, LR: 0.0003 +[2026-03-03 07:38:05] (step=0043060) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.42496576012522, LR: 0.0003 +[2026-03-03 07:38:13] (step=0043061) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.425161416552534, LR: 0.0003 +[2026-03-03 07:38:21] (step=0043062) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.425357072979848, LR: 0.0003 +[2026-03-03 07:38:29] (step=0043063) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 8.42555272940716, LR: 0.0003 +[2026-03-03 07:38:37] (step=0043064) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.425748385834474, LR: 0.0003 +[2026-03-03 07:38:45] (step=0043065) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 8.425944042261788, LR: 0.0003 +[2026-03-03 07:38:53] (step=0043066) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.426139698689102, LR: 0.0003 +[2026-03-03 07:39:00] (step=0043067) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 8.426335355116416, LR: 0.0003 +[2026-03-03 07:39:08] (step=0043068) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.426531011543728, LR: 0.0003 +[2026-03-03 07:39:16] (step=0043069) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.426726667971042, LR: 0.0003 +[2026-03-03 07:39:24] (step=0043070) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.426922324398356, LR: 0.0003 +[2026-03-03 07:39:32] (step=0043071) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.42711798082567, LR: 0.0003 +[2026-03-03 07:39:40] (step=0043072) Train Loss: 0.4524, Train Steps/Sec: 0.12, Epoch: 8.427313637252984, LR: 0.0003 +[2026-03-03 07:39:48] (step=0043073) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.427509293680297, LR: 0.0003 +[2026-03-03 07:39:56] (step=0043074) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.42770495010761, LR: 0.0003 +[2026-03-03 07:40:03] (step=0043075) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.427900606534925, LR: 0.0003 +[2026-03-03 07:40:11] (step=0043076) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.428096262962239, LR: 0.0003 +[2026-03-03 07:40:19] (step=0043077) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.428291919389553, LR: 0.0003 +[2026-03-03 07:40:27] (step=0043078) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.428487575816865, LR: 0.0003 +[2026-03-03 07:40:35] (step=0043079) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.428683232244179, LR: 0.0003 +[2026-03-03 07:40:43] (step=0043080) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.428878888671493, LR: 0.0003 +[2026-03-03 07:40:51] (step=0043081) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.429074545098807, LR: 0.0003 +[2026-03-03 07:40:59] (step=0043082) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.429270201526121, LR: 0.0003 +[2026-03-03 07:41:06] (step=0043083) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.429465857953433, LR: 0.0003 +[2026-03-03 07:41:14] (step=0043084) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.429661514380747, LR: 0.0003 +[2026-03-03 07:41:22] (step=0043085) Train Loss: 0.4393, Train Steps/Sec: 0.12, Epoch: 8.429857170808061, LR: 0.0003 +[2026-03-03 07:41:30] (step=0043086) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.430052827235375, LR: 0.0003 +[2026-03-03 07:41:38] (step=0043087) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.43024848366269, LR: 0.0003 +[2026-03-03 07:41:46] (step=0043088) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.430444140090001, LR: 0.0003 +[2026-03-03 07:41:54] (step=0043089) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.430639796517315, LR: 0.0003 +[2026-03-03 07:42:02] (step=0043090) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 8.43083545294463, LR: 0.0003 +[2026-03-03 07:42:10] (step=0043091) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.431031109371943, LR: 0.0003 +[2026-03-03 07:42:18] (step=0043092) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.431226765799256, LR: 0.0003 +[2026-03-03 07:42:25] (step=0043093) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.43142242222657, LR: 0.0003 +[2026-03-03 07:42:33] (step=0043094) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.431618078653884, LR: 0.0003 +[2026-03-03 07:42:41] (step=0043095) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 8.431813735081198, LR: 0.0003 +[2026-03-03 07:42:49] (step=0043096) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.432009391508512, LR: 0.0003 +[2026-03-03 07:42:57] (step=0043097) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.432205047935824, LR: 0.0003 +[2026-03-03 07:43:05] (step=0043098) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.432400704363138, LR: 0.0003 +[2026-03-03 07:43:13] (step=0043099) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.432596360790452, LR: 0.0003 +[2026-03-03 07:43:20] (step=0043100) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 8.432792017217766, LR: 0.0003 +[2026-03-03 07:43:28] (step=0043101) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.43298767364508, LR: 0.0003 +[2026-03-03 07:43:36] (step=0043102) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.433183330072392, LR: 0.0003 +[2026-03-03 07:43:44] (step=0043103) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.433378986499706, LR: 0.0003 +[2026-03-03 07:43:52] (step=0043104) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.43357464292702, LR: 0.0003 +[2026-03-03 07:44:00] (step=0043105) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.433770299354334, LR: 0.0003 +[2026-03-03 07:44:08] (step=0043106) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.433965955781648, LR: 0.0003 +[2026-03-03 07:44:16] (step=0043107) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.43416161220896, LR: 0.0003 +[2026-03-03 07:44:23] (step=0043108) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.434357268636274, LR: 0.0003 +[2026-03-03 07:44:31] (step=0043109) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.434552925063588, LR: 0.0003 +[2026-03-03 07:44:39] (step=0043110) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.434748581490902, LR: 0.0003 +[2026-03-03 07:44:47] (step=0043111) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.434944237918216, LR: 0.0003 +[2026-03-03 07:44:55] (step=0043112) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.435139894345529, LR: 0.0003 +[2026-03-03 07:45:03] (step=0043113) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.435335550772843, LR: 0.0003 +[2026-03-03 07:45:11] (step=0043114) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.435531207200157, LR: 0.0003 +[2026-03-03 07:45:19] (step=0043115) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.43572686362747, LR: 0.0003 +[2026-03-03 07:45:27] (step=0043116) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.435922520054783, LR: 0.0003 +[2026-03-03 07:45:34] (step=0043117) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.436118176482097, LR: 0.0003 +[2026-03-03 07:45:42] (step=0043118) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.436313832909411, LR: 0.0003 +[2026-03-03 07:45:50] (step=0043119) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.436509489336725, LR: 0.0003 +[2026-03-03 07:45:58] (step=0043120) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.436705145764039, LR: 0.0003 +[2026-03-03 07:46:06] (step=0043121) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.436900802191351, LR: 0.0003 +[2026-03-03 07:46:14] (step=0043122) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.437096458618665, LR: 0.0003 +[2026-03-03 07:46:22] (step=0043123) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.43729211504598, LR: 0.0003 +[2026-03-03 07:46:29] (step=0043124) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 8.437487771473293, LR: 0.0003 +[2026-03-03 07:46:37] (step=0043125) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 8.437683427900607, LR: 0.0003 +[2026-03-03 07:46:45] (step=0043126) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.43787908432792, LR: 0.0003 +[2026-03-03 07:46:53] (step=0043127) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 8.438074740755233, LR: 0.0003 +[2026-03-03 07:47:01] (step=0043128) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.438270397182547, LR: 0.0003 +[2026-03-03 07:47:09] (step=0043129) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.438466053609861, LR: 0.0003 +[2026-03-03 07:47:17] (step=0043130) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.438661710037175, LR: 0.0003 +[2026-03-03 07:47:25] (step=0043131) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.438857366464488, LR: 0.0003 +[2026-03-03 07:47:33] (step=0043132) Train Loss: 0.4474, Train Steps/Sec: 0.12, Epoch: 8.439053022891802, LR: 0.0003 +[2026-03-03 07:47:40] (step=0043133) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.439248679319116, LR: 0.0003 +[2026-03-03 07:47:48] (step=0043134) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.43944433574643, LR: 0.0003 +[2026-03-03 07:47:56] (step=0043135) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.439639992173744, LR: 0.0003 +[2026-03-03 07:48:04] (step=0043136) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 8.439835648601056, LR: 0.0003 +[2026-03-03 07:48:12] (step=0043137) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.44003130502837, LR: 0.0003 +[2026-03-03 07:48:20] (step=0043138) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.440226961455684, LR: 0.0003 +[2026-03-03 07:48:28] (step=0043139) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.440422617882998, LR: 0.0003 +[2026-03-03 07:48:36] (step=0043140) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.440618274310312, LR: 0.0003 +[2026-03-03 07:48:43] (step=0043141) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.440813930737624, LR: 0.0003 +[2026-03-03 07:48:51] (step=0043142) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 8.441009587164938, LR: 0.0003 +[2026-03-03 07:48:59] (step=0043143) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.441205243592252, LR: 0.0003 +[2026-03-03 07:49:07] (step=0043144) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.441400900019566, LR: 0.0003 +[2026-03-03 07:49:15] (step=0043145) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 8.441596556446878, LR: 0.0003 +[2026-03-03 07:49:23] (step=0043146) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.441792212874192, LR: 0.0003 +[2026-03-03 07:49:31] (step=0043147) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.441987869301506, LR: 0.0003 +[2026-03-03 07:49:38] (step=0043148) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.44218352572882, LR: 0.0003 +[2026-03-03 07:49:46] (step=0043149) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.442379182156134, LR: 0.0003 +[2026-03-03 07:49:54] (step=0043150) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.442574838583447, LR: 0.0003 +[2026-03-03 07:50:02] (step=0043151) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.44277049501076, LR: 0.0003 +[2026-03-03 07:50:10] (step=0043152) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.442966151438075, LR: 0.0003 +[2026-03-03 07:50:18] (step=0043153) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.443161807865389, LR: 0.0003 +[2026-03-03 07:50:26] (step=0043154) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.443357464292703, LR: 0.0003 +[2026-03-03 07:50:34] (step=0043155) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.443553120720015, LR: 0.0003 +[2026-03-03 07:50:41] (step=0043156) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.443748777147329, LR: 0.0003 +[2026-03-03 07:50:49] (step=0043157) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.443944433574643, LR: 0.0003 +[2026-03-03 07:50:57] (step=0043158) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.444140090001957, LR: 0.0003 +[2026-03-03 07:51:05] (step=0043159) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.444335746429271, LR: 0.0003 +[2026-03-03 07:51:13] (step=0043160) Train Loss: 0.4381, Train Steps/Sec: 0.12, Epoch: 8.444531402856583, LR: 0.0003 +[2026-03-03 07:51:21] (step=0043161) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.444727059283897, LR: 0.0003 +[2026-03-03 07:51:29] (step=0043162) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.444922715711211, LR: 0.0003 +[2026-03-03 07:51:37] (step=0043163) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.445118372138525, LR: 0.0003 +[2026-03-03 07:51:45] (step=0043164) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.44531402856584, LR: 0.0003 +[2026-03-03 07:51:52] (step=0043165) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.445509684993151, LR: 0.0003 +[2026-03-03 07:52:00] (step=0043166) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.445705341420465, LR: 0.0003 +[2026-03-03 07:52:08] (step=0043167) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.44590099784778, LR: 0.0003 +[2026-03-03 07:52:16] (step=0043168) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.446096654275093, LR: 0.0003 +[2026-03-03 07:52:24] (step=0043169) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.446292310702406, LR: 0.0003 +[2026-03-03 07:52:32] (step=0043170) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 8.44648796712972, LR: 0.0003 +[2026-03-03 07:52:40] (step=0043171) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.446683623557034, LR: 0.0003 +[2026-03-03 07:52:47] (step=0043172) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.446879279984348, LR: 0.0003 +[2026-03-03 07:52:55] (step=0043173) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.447074936411662, LR: 0.0003 +[2026-03-03 07:53:03] (step=0043174) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.447270592838974, LR: 0.0003 +[2026-03-03 07:53:11] (step=0043175) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.447466249266288, LR: 0.0003 +[2026-03-03 07:53:19] (step=0043176) Train Loss: 0.4442, Train Steps/Sec: 0.12, Epoch: 8.447661905693602, LR: 0.0003 +[2026-03-03 07:53:27] (step=0043177) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.447857562120916, LR: 0.0003 +[2026-03-03 07:53:35] (step=0043178) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.44805321854823, LR: 0.0003 +[2026-03-03 07:53:43] (step=0043179) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.448248874975542, LR: 0.0003 +[2026-03-03 07:53:51] (step=0043180) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 8.448444531402856, LR: 0.0003 +[2026-03-03 07:53:59] (step=0043181) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.44864018783017, LR: 0.0003 +[2026-03-03 07:54:06] (step=0043182) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.448835844257484, LR: 0.0003 +[2026-03-03 07:54:14] (step=0043183) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.449031500684798, LR: 0.0003 +[2026-03-03 07:54:22] (step=0043184) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 8.44922715711211, LR: 0.0003 +[2026-03-03 07:54:30] (step=0043185) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.449422813539424, LR: 0.0003 +[2026-03-03 07:54:38] (step=0043186) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.449618469966738, LR: 0.0003 +[2026-03-03 07:54:46] (step=0043187) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.449814126394052, LR: 0.0003 +[2026-03-03 07:54:54] (step=0043188) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 8.450009782821366, LR: 0.0003 +[2026-03-03 07:55:01] (step=0043189) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 8.450205439248679, LR: 0.0003 +[2026-03-03 07:55:09] (step=0043190) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.450401095675993, LR: 0.0003 +[2026-03-03 07:55:17] (step=0043191) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 8.450596752103307, LR: 0.0003 +[2026-03-03 07:55:25] (step=0043192) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.45079240853062, LR: 0.0003 +[2026-03-03 07:55:33] (step=0043193) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.450988064957935, LR: 0.0003 +[2026-03-03 07:55:41] (step=0043194) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 8.451183721385247, LR: 0.0003 +[2026-03-03 07:55:49] (step=0043195) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 8.451379377812561, LR: 0.0003 +[2026-03-03 07:55:57] (step=0043196) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 8.451575034239875, LR: 0.0003 +[2026-03-03 07:56:04] (step=0043197) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.451770690667189, LR: 0.0003 +[2026-03-03 07:56:12] (step=0043198) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.451966347094501, LR: 0.0003 +[2026-03-03 07:56:20] (step=0043199) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.452162003521815, LR: 0.0003 +[2026-03-03 07:56:28] (step=0043200) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.45235765994913, LR: 0.0003 +[2026-03-03 07:56:36] (step=0043201) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 8.452553316376443, LR: 0.0003 +[2026-03-03 07:56:44] (step=0043202) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.452748972803757, LR: 0.0003 +[2026-03-03 07:56:52] (step=0043203) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 8.45294462923107, LR: 0.0003 +[2026-03-03 07:56:59] (step=0043204) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.453140285658383, LR: 0.0003 +[2026-03-03 07:57:07] (step=0043205) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 8.453335942085697, LR: 0.0003 +[2026-03-03 07:57:15] (step=0043206) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.453531598513012, LR: 0.0003 +[2026-03-03 07:57:23] (step=0043207) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 8.453727254940326, LR: 0.0003 +[2026-03-03 07:57:31] (step=0043208) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.453922911367638, LR: 0.0003 +[2026-03-03 07:57:39] (step=0043209) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.454118567794952, LR: 0.0003 +[2026-03-03 07:57:47] (step=0043210) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.454314224222266, LR: 0.0003 +[2026-03-03 07:57:55] (step=0043211) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.45450988064958, LR: 0.0003 +[2026-03-03 07:58:02] (step=0043212) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.454705537076894, LR: 0.0003 +[2026-03-03 07:58:10] (step=0043213) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.454901193504206, LR: 0.0003 +[2026-03-03 07:58:18] (step=0043214) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.45509684993152, LR: 0.0003 +[2026-03-03 07:58:26] (step=0043215) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.455292506358834, LR: 0.0003 +[2026-03-03 07:58:34] (step=0043216) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.455488162786148, LR: 0.0003 +[2026-03-03 07:58:42] (step=0043217) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 8.455683819213462, LR: 0.0003 +[2026-03-03 07:58:50] (step=0043218) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.455879475640774, LR: 0.0003 +[2026-03-03 07:58:58] (step=0043219) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.456075132068088, LR: 0.0003 +[2026-03-03 07:59:05] (step=0043220) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.456270788495402, LR: 0.0003 +[2026-03-03 07:59:13] (step=0043221) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.456466444922716, LR: 0.0003 +[2026-03-03 07:59:21] (step=0043222) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 8.456662101350028, LR: 0.0003 +[2026-03-03 07:59:29] (step=0043223) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.456857757777342, LR: 0.0003 +[2026-03-03 07:59:37] (step=0043224) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 8.457053414204657, LR: 0.0003 +[2026-03-03 07:59:45] (step=0043225) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.45724907063197, LR: 0.0003 +[2026-03-03 07:59:53] (step=0043226) Train Loss: 0.4435, Train Steps/Sec: 0.12, Epoch: 8.457444727059285, LR: 0.0003 +[2026-03-03 08:00:01] (step=0043227) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.457640383486597, LR: 0.0003 +[2026-03-03 08:00:09] (step=0043228) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.45783603991391, LR: 0.0003 +[2026-03-03 08:00:16] (step=0043229) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.458031696341225, LR: 0.0003 +[2026-03-03 08:00:24] (step=0043230) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.458227352768539, LR: 0.0003 +[2026-03-03 08:00:32] (step=0043231) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.458423009195853, LR: 0.0003 +[2026-03-03 08:00:40] (step=0043232) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 8.458618665623165, LR: 0.0003 +[2026-03-03 08:00:48] (step=0043233) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.458814322050479, LR: 0.0003 +[2026-03-03 08:00:56] (step=0043234) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.459009978477793, LR: 0.0003 +[2026-03-03 08:01:04] (step=0043235) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.459205634905107, LR: 0.0003 +[2026-03-03 08:01:11] (step=0043236) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.459401291332421, LR: 0.0003 +[2026-03-03 08:01:19] (step=0043237) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 8.459596947759733, LR: 0.0003 +[2026-03-03 08:01:27] (step=0043238) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.459792604187047, LR: 0.0003 +[2026-03-03 08:01:35] (step=0043239) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.459988260614361, LR: 0.0003 +[2026-03-03 08:01:43] (step=0043240) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 8.460183917041675, LR: 0.0003 +[2026-03-03 08:01:51] (step=0043241) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.46037957346899, LR: 0.0003 +[2026-03-03 08:01:59] (step=0043242) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 8.460575229896302, LR: 0.0003 +[2026-03-03 08:02:07] (step=0043243) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 8.460770886323616, LR: 0.0003 +[2026-03-03 08:02:14] (step=0043244) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.46096654275093, LR: 0.0003 +[2026-03-03 08:02:22] (step=0043245) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.461162199178244, LR: 0.0003 +[2026-03-03 08:02:30] (step=0043246) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 8.461357855605556, LR: 0.0003 +[2026-03-03 08:02:38] (step=0043247) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.46155351203287, LR: 0.0003 +[2026-03-03 08:02:46] (step=0043248) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.461749168460184, LR: 0.0003 +[2026-03-03 08:02:54] (step=0043249) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.461944824887498, LR: 0.0003 +[2026-03-03 08:03:02] (step=0043250) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.462140481314812, LR: 0.0003 +[2026-03-03 08:03:09] (step=0043251) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.462336137742124, LR: 0.0003 +[2026-03-03 08:03:17] (step=0043252) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.462531794169438, LR: 0.0003 +[2026-03-03 08:03:25] (step=0043253) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.462727450596752, LR: 0.0003 +[2026-03-03 08:03:33] (step=0043254) Train Loss: 0.4390, Train Steps/Sec: 0.12, Epoch: 8.462923107024066, LR: 0.0003 +[2026-03-03 08:03:41] (step=0043255) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 8.46311876345138, LR: 0.0003 +[2026-03-03 08:03:49] (step=0043256) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.463314419878692, LR: 0.0003 +[2026-03-03 08:03:57] (step=0043257) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.463510076306006, LR: 0.0003 +[2026-03-03 08:04:05] (step=0043258) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.46370573273332, LR: 0.0003 +[2026-03-03 08:04:13] (step=0043259) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.463901389160634, LR: 0.0003 +[2026-03-03 08:04:20] (step=0043260) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 8.464097045587948, LR: 0.0003 +[2026-03-03 08:04:28] (step=0043261) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 8.46429270201526, LR: 0.0003 +[2026-03-03 08:04:36] (step=0043262) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.464488358442575, LR: 0.0003 +[2026-03-03 08:04:44] (step=0043263) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.464684014869889, LR: 0.0003 +[2026-03-03 08:04:52] (step=0043264) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 8.464879671297203, LR: 0.0003 +[2026-03-03 08:05:00] (step=0043265) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 8.465075327724517, LR: 0.0003 +[2026-03-03 08:05:08] (step=0043266) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 8.465270984151829, LR: 0.0003 +[2026-03-03 08:05:16] (step=0043267) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.465466640579143, LR: 0.0003 +[2026-03-03 08:05:23] (step=0043268) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.465662297006457, LR: 0.0003 +[2026-03-03 08:05:31] (step=0043269) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.46585795343377, LR: 0.0003 +[2026-03-03 08:05:39] (step=0043270) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.466053609861085, LR: 0.0003 +[2026-03-03 08:05:47] (step=0043271) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.466249266288397, LR: 0.0003 +[2026-03-03 08:05:55] (step=0043272) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.466444922715711, LR: 0.0003 +[2026-03-03 08:06:03] (step=0043273) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.466640579143025, LR: 0.0003 +[2026-03-03 08:06:11] (step=0043274) Train Loss: 0.4361, Train Steps/Sec: 0.12, Epoch: 8.466836235570339, LR: 0.0003 +[2026-03-03 08:06:19] (step=0043275) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.467031891997651, LR: 0.0003 +[2026-03-03 08:06:27] (step=0043276) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.467227548424965, LR: 0.0003 +[2026-03-03 08:06:34] (step=0043277) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.46742320485228, LR: 0.0003 +[2026-03-03 08:06:42] (step=0043278) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.467618861279593, LR: 0.0003 +[2026-03-03 08:06:50] (step=0043279) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 8.467814517706907, LR: 0.0003 +[2026-03-03 08:06:58] (step=0043280) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.46801017413422, LR: 0.0003 +[2026-03-03 08:07:06] (step=0043281) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.468205830561534, LR: 0.0003 +[2026-03-03 08:07:14] (step=0043282) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.468401486988848, LR: 0.0003 +[2026-03-03 08:07:22] (step=0043283) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.468597143416162, LR: 0.0003 +[2026-03-03 08:07:29] (step=0043284) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.468792799843476, LR: 0.0003 +[2026-03-03 08:07:37] (step=0043285) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.468988456270788, LR: 0.0003 +[2026-03-03 08:07:45] (step=0043286) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.469184112698102, LR: 0.0003 +[2026-03-03 08:07:53] (step=0043287) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.469379769125416, LR: 0.0003 +[2026-03-03 08:08:01] (step=0043288) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.46957542555273, LR: 0.0003 +[2026-03-03 08:08:09] (step=0043289) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 8.469771081980044, LR: 0.0003 +[2026-03-03 08:08:17] (step=0043290) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 8.469966738407356, LR: 0.0003 +[2026-03-03 08:08:24] (step=0043291) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 8.47016239483467, LR: 0.0003 +[2026-03-03 08:08:32] (step=0043292) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.470358051261984, LR: 0.0003 +[2026-03-03 08:08:40] (step=0043293) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.470553707689298, LR: 0.0003 +[2026-03-03 08:08:48] (step=0043294) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.470749364116612, LR: 0.0003 +[2026-03-03 08:08:56] (step=0043295) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.470945020543924, LR: 0.0003 +[2026-03-03 08:09:04] (step=0043296) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.471140676971238, LR: 0.0003 +[2026-03-03 08:09:12] (step=0043297) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.471336333398552, LR: 0.0003 +[2026-03-03 08:09:20] (step=0043298) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.471531989825866, LR: 0.0003 +[2026-03-03 08:09:27] (step=0043299) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.471727646253179, LR: 0.0003 +[2026-03-03 08:09:35] (step=0043300) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 8.471923302680493, LR: 0.0003 +[2026-03-03 08:09:43] (step=0043301) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.472118959107807, LR: 0.0003 +[2026-03-03 08:09:51] (step=0043302) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.47231461553512, LR: 0.0003 +[2026-03-03 08:09:59] (step=0043303) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.472510271962435, LR: 0.0003 +[2026-03-03 08:10:07] (step=0043304) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.472705928389747, LR: 0.0003 +[2026-03-03 08:10:15] (step=0043305) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.47290158481706, LR: 0.0003 +[2026-03-03 08:10:23] (step=0043306) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 8.473097241244375, LR: 0.0003 +[2026-03-03 08:10:30] (step=0043307) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.473292897671689, LR: 0.0003 +[2026-03-03 08:10:38] (step=0043308) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 8.473488554099003, LR: 0.0003 +[2026-03-03 08:10:46] (step=0043309) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.473684210526315, LR: 0.0003 +[2026-03-03 08:10:54] (step=0043310) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.473879866953629, LR: 0.0003 +[2026-03-03 08:11:02] (step=0043311) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.474075523380943, LR: 0.0003 +[2026-03-03 08:11:10] (step=0043312) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.474271179808257, LR: 0.0003 +[2026-03-03 08:11:18] (step=0043313) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.474466836235571, LR: 0.0003 +[2026-03-03 08:11:25] (step=0043314) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.474662492662883, LR: 0.0003 +[2026-03-03 08:11:33] (step=0043315) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.474858149090197, LR: 0.0003 +[2026-03-03 08:11:41] (step=0043316) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.475053805517511, LR: 0.0003 +[2026-03-03 08:11:49] (step=0043317) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.475249461944825, LR: 0.0003 +[2026-03-03 08:11:57] (step=0043318) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.47544511837214, LR: 0.0003 +[2026-03-03 08:12:05] (step=0043319) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 8.475640774799452, LR: 0.0003 +[2026-03-03 08:12:13] (step=0043320) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.475836431226766, LR: 0.0003 +[2026-03-03 08:12:21] (step=0043321) Train Loss: 0.4424, Train Steps/Sec: 0.12, Epoch: 8.47603208765408, LR: 0.0003 +[2026-03-03 08:12:29] (step=0043322) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 8.476227744081394, LR: 0.0003 +[2026-03-03 08:12:36] (step=0043323) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.476423400508708, LR: 0.0003 +[2026-03-03 08:12:44] (step=0043324) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.47661905693602, LR: 0.0003 +[2026-03-03 08:12:52] (step=0043325) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.476814713363334, LR: 0.0003 +[2026-03-03 08:13:00] (step=0043326) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.477010369790648, LR: 0.0003 +[2026-03-03 08:13:08] (step=0043327) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.477206026217962, LR: 0.0003 +[2026-03-03 08:13:16] (step=0043328) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.477401682645274, LR: 0.0003 +[2026-03-03 08:13:24] (step=0043329) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.477597339072588, LR: 0.0003 +[2026-03-03 08:13:31] (step=0043330) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.477792995499902, LR: 0.0003 +[2026-03-03 08:13:39] (step=0043331) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 8.477988651927216, LR: 0.0003 +[2026-03-03 08:13:47] (step=0043332) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 8.47818430835453, LR: 0.0003 +[2026-03-03 08:13:55] (step=0043333) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.478379964781842, LR: 0.0003 +[2026-03-03 08:14:03] (step=0043334) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 8.478575621209156, LR: 0.0003 +[2026-03-03 08:14:11] (step=0043335) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.47877127763647, LR: 0.0003 +[2026-03-03 08:14:19] (step=0043336) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.478966934063784, LR: 0.0003 +[2026-03-03 08:14:27] (step=0043337) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.479162590491098, LR: 0.0003 +[2026-03-03 08:14:34] (step=0043338) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.47935824691841, LR: 0.0003 +[2026-03-03 08:14:42] (step=0043339) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.479553903345725, LR: 0.0003 +[2026-03-03 08:14:50] (step=0043340) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.479749559773039, LR: 0.0003 +[2026-03-03 08:14:58] (step=0043341) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.479945216200353, LR: 0.0003 +[2026-03-03 08:15:06] (step=0043342) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.480140872627667, LR: 0.0003 +[2026-03-03 08:15:14] (step=0043343) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.480336529054979, LR: 0.0003 +[2026-03-03 08:15:22] (step=0043344) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.480532185482293, LR: 0.0003 +[2026-03-03 08:15:29] (step=0043345) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.480727841909607, LR: 0.0003 +[2026-03-03 08:15:37] (step=0043346) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.48092349833692, LR: 0.0003 +[2026-03-03 08:15:45] (step=0043347) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.481119154764235, LR: 0.0003 +[2026-03-03 08:15:53] (step=0043348) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.481314811191547, LR: 0.0003 +[2026-03-03 08:16:01] (step=0043349) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 8.481510467618861, LR: 0.0003 +[2026-03-03 08:16:09] (step=0043350) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.481706124046175, LR: 0.0003 +[2026-03-03 08:16:17] (step=0043351) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 8.481901780473489, LR: 0.0003 +[2026-03-03 08:16:25] (step=0043352) Train Loss: 0.4524, Train Steps/Sec: 0.12, Epoch: 8.482097436900801, LR: 0.0003 +[2026-03-03 08:16:33] (step=0043353) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.482293093328115, LR: 0.0003 +[2026-03-03 08:16:40] (step=0043354) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.48248874975543, LR: 0.0003 +[2026-03-03 08:16:48] (step=0043355) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.482684406182743, LR: 0.0003 +[2026-03-03 08:16:56] (step=0043356) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.482880062610057, LR: 0.0003 +[2026-03-03 08:17:04] (step=0043357) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.48307571903737, LR: 0.0003 +[2026-03-03 08:17:12] (step=0043358) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.483271375464684, LR: 0.0003 +[2026-03-03 08:17:20] (step=0043359) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.483467031891998, LR: 0.0003 +[2026-03-03 08:17:28] (step=0043360) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 8.483662688319312, LR: 0.0003 +[2026-03-03 08:17:36] (step=0043361) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 8.483858344746626, LR: 0.0003 +[2026-03-03 08:17:43] (step=0043362) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 8.484054001173938, LR: 0.0003 +[2026-03-03 08:17:51] (step=0043363) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 8.484249657601252, LR: 0.0003 +[2026-03-03 08:17:59] (step=0043364) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.484445314028566, LR: 0.0003 +[2026-03-03 08:18:07] (step=0043365) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.48464097045588, LR: 0.0003 +[2026-03-03 08:18:15] (step=0043366) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.484836626883194, LR: 0.0003 +[2026-03-03 08:18:23] (step=0043367) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.485032283310506, LR: 0.0003 +[2026-03-03 08:18:31] (step=0043368) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.48522793973782, LR: 0.0003 +[2026-03-03 08:18:38] (step=0043369) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.485423596165134, LR: 0.0003 +[2026-03-03 08:18:46] (step=0043370) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.485619252592448, LR: 0.0003 +[2026-03-03 08:18:54] (step=0043371) Train Loss: 0.4520, Train Steps/Sec: 0.12, Epoch: 8.485814909019762, LR: 0.0003 +[2026-03-03 08:19:02] (step=0043372) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.486010565447074, LR: 0.0003 +[2026-03-03 08:19:10] (step=0043373) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.486206221874388, LR: 0.0003 +[2026-03-03 08:19:18] (step=0043374) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.486401878301702, LR: 0.0003 +[2026-03-03 08:19:26] (step=0043375) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.486597534729016, LR: 0.0003 +[2026-03-03 08:19:34] (step=0043376) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.48679319115633, LR: 0.0003 +[2026-03-03 08:19:42] (step=0043377) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.486988847583643, LR: 0.0003 +[2026-03-03 08:19:49] (step=0043378) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.487184504010957, LR: 0.0003 +[2026-03-03 08:19:57] (step=0043379) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 8.48738016043827, LR: 0.0003 +[2026-03-03 08:20:05] (step=0043380) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 8.487575816865585, LR: 0.0003 +[2026-03-03 08:20:13] (step=0043381) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.487771473292897, LR: 0.0003 +[2026-03-03 08:20:21] (step=0043382) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.48796712972021, LR: 0.0003 +[2026-03-03 08:20:29] (step=0043383) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.488162786147525, LR: 0.0003 +[2026-03-03 08:20:37] (step=0043384) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 8.488358442574839, LR: 0.0003 +[2026-03-03 08:20:45] (step=0043385) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.488554099002153, LR: 0.0003 +[2026-03-03 08:20:52] (step=0043386) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 8.488749755429465, LR: 0.0003 +[2026-03-03 08:21:00] (step=0043387) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 8.488945411856779, LR: 0.0003 +[2026-03-03 08:21:08] (step=0043388) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.489141068284093, LR: 0.0003 +[2026-03-03 08:21:16] (step=0043389) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.489336724711407, LR: 0.0003 +[2026-03-03 08:21:24] (step=0043390) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.489532381138721, LR: 0.0003 +[2026-03-03 08:21:32] (step=0043391) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.489728037566033, LR: 0.0003 +[2026-03-03 08:21:40] (step=0043392) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.489923693993347, LR: 0.0003 +[2026-03-03 08:21:48] (step=0043393) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.490119350420661, LR: 0.0003 +[2026-03-03 08:21:55] (step=0043394) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 8.490315006847975, LR: 0.0003 +[2026-03-03 08:22:03] (step=0043395) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.49051066327529, LR: 0.0003 +[2026-03-03 08:22:11] (step=0043396) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.490706319702602, LR: 0.0003 +[2026-03-03 08:22:19] (step=0043397) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.490901976129916, LR: 0.0003 +[2026-03-03 08:22:27] (step=0043398) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.49109763255723, LR: 0.0003 +[2026-03-03 08:22:35] (step=0043399) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.491293288984544, LR: 0.0003 +[2026-03-03 08:22:43] (step=0043400) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 8.491488945411858, LR: 0.0003 +[2026-03-03 08:22:50] (step=0043401) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.49168460183917, LR: 0.0003 +[2026-03-03 08:22:58] (step=0043402) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.491880258266484, LR: 0.0003 +[2026-03-03 08:23:06] (step=0043403) Train Loss: 0.4540, Train Steps/Sec: 0.12, Epoch: 8.492075914693798, LR: 0.0003 +[2026-03-03 08:23:14] (step=0043404) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.492271571121112, LR: 0.0003 +[2026-03-03 08:23:22] (step=0043405) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.492467227548424, LR: 0.0003 +[2026-03-03 08:23:30] (step=0043406) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.492662883975738, LR: 0.0003 +[2026-03-03 08:23:38] (step=0043407) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.492858540403052, LR: 0.0003 +[2026-03-03 08:23:46] (step=0043408) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.493054196830366, LR: 0.0003 +[2026-03-03 08:23:54] (step=0043409) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.49324985325768, LR: 0.0003 +[2026-03-03 08:24:01] (step=0043410) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.493445509684992, LR: 0.0003 +[2026-03-03 08:24:09] (step=0043411) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.493641166112306, LR: 0.0003 +[2026-03-03 08:24:17] (step=0043412) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.49383682253962, LR: 0.0003 +[2026-03-03 08:24:25] (step=0043413) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.494032478966934, LR: 0.0003 +[2026-03-03 08:24:33] (step=0043414) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.494228135394248, LR: 0.0003 +[2026-03-03 08:24:41] (step=0043415) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.49442379182156, LR: 0.0003 +[2026-03-03 08:24:49] (step=0043416) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.494619448248875, LR: 0.0003 +[2026-03-03 08:24:57] (step=0043417) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.494815104676189, LR: 0.0003 +[2026-03-03 08:25:05] (step=0043418) Train Loss: 0.4499, Train Steps/Sec: 0.12, Epoch: 8.495010761103503, LR: 0.0003 +[2026-03-03 08:25:12] (step=0043419) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.495206417530817, LR: 0.0003 +[2026-03-03 08:25:20] (step=0043420) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.495402073958129, LR: 0.0003 +[2026-03-03 08:25:28] (step=0043421) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.495597730385443, LR: 0.0003 +[2026-03-03 08:25:36] (step=0043422) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.495793386812757, LR: 0.0003 +[2026-03-03 08:25:44] (step=0043423) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 8.49598904324007, LR: 0.0003 +[2026-03-03 08:25:52] (step=0043424) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.496184699667385, LR: 0.0003 +[2026-03-03 08:26:00] (step=0043425) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.496380356094697, LR: 0.0003 +[2026-03-03 08:26:08] (step=0043426) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 8.496576012522011, LR: 0.0003 +[2026-03-03 08:26:15] (step=0043427) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.496771668949325, LR: 0.0003 +[2026-03-03 08:26:23] (step=0043428) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.49696732537664, LR: 0.0003 +[2026-03-03 08:26:31] (step=0043429) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.497162981803953, LR: 0.0003 +[2026-03-03 08:26:39] (step=0043430) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.497358638231265, LR: 0.0003 +[2026-03-03 08:26:47] (step=0043431) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 8.49755429465858, LR: 0.0003 +[2026-03-03 08:26:55] (step=0043432) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 8.497749951085893, LR: 0.0003 +[2026-03-03 08:27:03] (step=0043433) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.497945607513207, LR: 0.0003 +[2026-03-03 08:27:10] (step=0043434) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.49814126394052, LR: 0.0003 +[2026-03-03 08:27:18] (step=0043435) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.498336920367834, LR: 0.0003 +[2026-03-03 08:27:26] (step=0043436) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.498532576795148, LR: 0.0003 +[2026-03-03 08:27:34] (step=0043437) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.498728233222462, LR: 0.0003 +[2026-03-03 08:27:42] (step=0043438) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.498923889649776, LR: 0.0003 +[2026-03-03 08:27:50] (step=0043439) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 8.499119546077088, LR: 0.0003 +[2026-03-03 08:27:58] (step=0043440) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 8.499315202504402, LR: 0.0003 +[2026-03-03 08:28:06] (step=0043441) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.499510858931716, LR: 0.0003 +[2026-03-03 08:28:13] (step=0043442) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 8.49970651535903, LR: 0.0003 +[2026-03-03 08:28:21] (step=0043443) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.499902171786344, LR: 0.0003 +[2026-03-03 08:28:29] (step=0043444) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 8.500097828213656, LR: 0.0003 +[2026-03-03 08:28:37] (step=0043445) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 8.50029348464097, LR: 0.0003 +[2026-03-03 08:28:45] (step=0043446) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 8.500489141068284, LR: 0.0003 +[2026-03-03 08:28:53] (step=0043447) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.500684797495598, LR: 0.0003 +[2026-03-03 08:29:01] (step=0043448) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.500880453922912, LR: 0.0003 +[2026-03-03 08:29:09] (step=0043449) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.501076110350224, LR: 0.0003 +[2026-03-03 08:29:16] (step=0043450) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.501271766777538, LR: 0.0003 +[2026-03-03 08:29:24] (step=0043451) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.501467423204852, LR: 0.0003 +[2026-03-03 08:29:32] (step=0043452) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.501663079632166, LR: 0.0003 +[2026-03-03 08:29:40] (step=0043453) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.50185873605948, LR: 0.0003 +[2026-03-03 08:29:48] (step=0043454) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.502054392486793, LR: 0.0003 +[2026-03-03 08:29:56] (step=0043455) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.502250048914107, LR: 0.0003 +[2026-03-03 08:30:04] (step=0043456) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.50244570534142, LR: 0.0003 +[2026-03-03 08:30:12] (step=0043457) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.502641361768735, LR: 0.0003 +[2026-03-03 08:30:19] (step=0043458) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 8.502837018196047, LR: 0.0003 +[2026-03-03 08:30:27] (step=0043459) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.50303267462336, LR: 0.0003 +[2026-03-03 08:30:35] (step=0043460) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 8.503228331050675, LR: 0.0003 +[2026-03-03 08:30:43] (step=0043461) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 8.503423987477989, LR: 0.0003 +[2026-03-03 08:30:51] (step=0043462) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.503619643905303, LR: 0.0003 +[2026-03-03 08:30:59] (step=0043463) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.503815300332615, LR: 0.0003 +[2026-03-03 08:31:07] (step=0043464) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.50401095675993, LR: 0.0003 +[2026-03-03 08:31:14] (step=0043465) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.504206613187243, LR: 0.0003 +[2026-03-03 08:31:22] (step=0043466) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.504402269614557, LR: 0.0003 +[2026-03-03 08:31:30] (step=0043467) Train Loss: 0.4561, Train Steps/Sec: 0.12, Epoch: 8.504597926041871, LR: 0.0003 +[2026-03-03 08:31:38] (step=0043468) Train Loss: 0.4680, Train Steps/Sec: 0.13, Epoch: 8.504793582469183, LR: 0.0003 +[2026-03-03 08:31:46] (step=0043469) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.504989238896497, LR: 0.0003 +[2026-03-03 08:31:54] (step=0043470) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.505184895323811, LR: 0.0003 +[2026-03-03 08:32:02] (step=0043471) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.505380551751125, LR: 0.0003 +[2026-03-03 08:32:10] (step=0043472) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.50557620817844, LR: 0.0003 +[2026-03-03 08:32:18] (step=0043473) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.505771864605752, LR: 0.0003 +[2026-03-03 08:32:25] (step=0043474) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.505967521033066, LR: 0.0003 +[2026-03-03 08:32:33] (step=0043475) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.50616317746038, LR: 0.0003 +[2026-03-03 08:32:41] (step=0043476) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.506358833887694, LR: 0.0003 +[2026-03-03 08:32:49] (step=0043477) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.506554490315008, LR: 0.0003 +[2026-03-03 08:32:57] (step=0043478) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.50675014674232, LR: 0.0003 +[2026-03-03 08:33:05] (step=0043479) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.506945803169634, LR: 0.0003 +[2026-03-03 08:33:13] (step=0043480) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.507141459596948, LR: 0.0003 +[2026-03-03 08:33:20] (step=0043481) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.507337116024262, LR: 0.0003 +[2026-03-03 08:33:28] (step=0043482) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.507532772451576, LR: 0.0003 +[2026-03-03 08:33:36] (step=0043483) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 8.507728428878888, LR: 0.0003 +[2026-03-03 08:33:44] (step=0043484) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.507924085306202, LR: 0.0003 +[2026-03-03 08:33:52] (step=0043485) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.508119741733516, LR: 0.0003 +[2026-03-03 08:34:00] (step=0043486) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.50831539816083, LR: 0.0003 +[2026-03-03 08:34:08] (step=0043487) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.508511054588142, LR: 0.0003 +[2026-03-03 08:34:16] (step=0043488) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.508706711015456, LR: 0.0003 +[2026-03-03 08:34:23] (step=0043489) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.50890236744277, LR: 0.0003 +[2026-03-03 08:34:31] (step=0043490) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 8.509098023870084, LR: 0.0003 +[2026-03-03 08:34:39] (step=0043491) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.509293680297398, LR: 0.0003 +[2026-03-03 08:34:47] (step=0043492) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.50948933672471, LR: 0.0003 +[2026-03-03 08:34:55] (step=0043493) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.509684993152025, LR: 0.0003 +[2026-03-03 08:35:03] (step=0043494) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 8.509880649579339, LR: 0.0003 +[2026-03-03 08:35:11] (step=0043495) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 8.510076306006653, LR: 0.0003 +[2026-03-03 08:35:18] (step=0043496) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.510271962433967, LR: 0.0003 +[2026-03-03 08:35:26] (step=0043497) Train Loss: 0.4452, Train Steps/Sec: 0.12, Epoch: 8.510467618861279, LR: 0.0003 +[2026-03-03 08:35:34] (step=0043498) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.510663275288593, LR: 0.0003 +[2026-03-03 08:35:42] (step=0043499) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 8.510858931715907, LR: 0.0003 +[2026-03-03 08:35:50] (step=0043500) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.511054588143221, LR: 0.0003 +[2026-03-03 08:35:50] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0043500/ +[2026-03-03 08:35:58] (step=0043501) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 8.511250244570535, LR: 0.0003 +[2026-03-03 08:36:06] (step=0043502) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.511445900997847, LR: 0.0003 +[2026-03-03 08:36:14] (step=0043503) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.511641557425161, LR: 0.0003 +[2026-03-03 08:36:22] (step=0043504) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.511837213852475, LR: 0.0003 +[2026-03-03 08:36:30] (step=0043505) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.51203287027979, LR: 0.0003 +[2026-03-03 08:36:37] (step=0043506) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.512228526707103, LR: 0.0003 +[2026-03-03 08:36:45] (step=0043507) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.512424183134415, LR: 0.0003 +[2026-03-03 08:36:53] (step=0043508) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.51261983956173, LR: 0.0003 +[2026-03-03 08:37:01] (step=0043509) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 8.512815495989043, LR: 0.0003 +[2026-03-03 08:37:09] (step=0043510) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.513011152416357, LR: 0.0003 +[2026-03-03 08:37:17] (step=0043511) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.51320680884367, LR: 0.0003 +[2026-03-03 08:37:25] (step=0043512) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.513402465270984, LR: 0.0003 +[2026-03-03 08:37:32] (step=0043513) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 8.513598121698298, LR: 0.0003 +[2026-03-03 08:37:40] (step=0043514) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.513793778125612, LR: 0.0003 +[2026-03-03 08:37:48] (step=0043515) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.513989434552926, LR: 0.0003 +[2026-03-03 08:37:56] (step=0043516) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.514185090980238, LR: 0.0003 +[2026-03-03 08:38:04] (step=0043517) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.514380747407552, LR: 0.0003 +[2026-03-03 08:38:12] (step=0043518) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 8.514576403834866, LR: 0.0003 +[2026-03-03 08:38:20] (step=0043519) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.51477206026218, LR: 0.0003 +[2026-03-03 08:38:28] (step=0043520) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.514967716689494, LR: 0.0003 +[2026-03-03 08:38:36] (step=0043521) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.515163373116806, LR: 0.0003 +[2026-03-03 08:38:44] (step=0043522) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.51535902954412, LR: 0.0003 +[2026-03-03 08:38:51] (step=0043523) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.515554685971434, LR: 0.0003 +[2026-03-03 08:38:59] (step=0043524) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 8.515750342398748, LR: 0.0003 +[2026-03-03 08:39:07] (step=0043525) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.515945998826062, LR: 0.0003 +[2026-03-03 08:39:15] (step=0043526) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.516141655253374, LR: 0.0003 +[2026-03-03 08:39:23] (step=0043527) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.516337311680688, LR: 0.0003 +[2026-03-03 08:39:31] (step=0043528) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.516532968108002, LR: 0.0003 +[2026-03-03 08:39:39] (step=0043529) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.516728624535316, LR: 0.0003 +[2026-03-03 08:39:46] (step=0043530) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.51692428096263, LR: 0.0003 +[2026-03-03 08:39:54] (step=0043531) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 8.517119937389943, LR: 0.0003 +[2026-03-03 08:40:02] (step=0043532) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 8.517315593817257, LR: 0.0003 +[2026-03-03 08:40:10] (step=0043533) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 8.51751125024457, LR: 0.0003 +[2026-03-03 08:40:18] (step=0043534) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.517706906671885, LR: 0.0003 +[2026-03-03 08:40:26] (step=0043535) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 8.517902563099199, LR: 0.0003 +[2026-03-03 08:40:34] (step=0043536) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.518098219526511, LR: 0.0003 +[2026-03-03 08:40:41] (step=0043537) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.518293875953825, LR: 0.0003 +[2026-03-03 08:40:49] (step=0043538) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.518489532381139, LR: 0.0003 +[2026-03-03 08:40:57] (step=0043539) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 8.518685188808453, LR: 0.0003 +[2026-03-03 08:41:05] (step=0043540) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.518880845235765, LR: 0.0003 +[2026-03-03 08:41:13] (step=0043541) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.51907650166308, LR: 0.0003 +[2026-03-03 08:41:21] (step=0043542) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.519272158090393, LR: 0.0003 +[2026-03-03 08:41:29] (step=0043543) Train Loss: 0.4353, Train Steps/Sec: 0.12, Epoch: 8.519467814517707, LR: 0.0003 +[2026-03-03 08:41:37] (step=0043544) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.519663470945021, LR: 0.0003 +[2026-03-03 08:41:45] (step=0043545) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.519859127372333, LR: 0.0003 +[2026-03-03 08:41:52] (step=0043546) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.520054783799647, LR: 0.0003 +[2026-03-03 08:42:00] (step=0043547) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 8.520250440226961, LR: 0.0003 +[2026-03-03 08:42:08] (step=0043548) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 8.520446096654275, LR: 0.0003 +[2026-03-03 08:42:16] (step=0043549) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 8.52064175308159, LR: 0.0003 +[2026-03-03 08:42:24] (step=0043550) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.520837409508902, LR: 0.0003 +[2026-03-03 08:42:32] (step=0043551) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 8.521033065936216, LR: 0.0003 +[2026-03-03 08:42:40] (step=0043552) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.52122872236353, LR: 0.0003 +[2026-03-03 08:42:48] (step=0043553) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 8.521424378790844, LR: 0.0003 +[2026-03-03 08:42:55] (step=0043554) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.521620035218158, LR: 0.0003 +[2026-03-03 08:43:03] (step=0043555) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.52181569164547, LR: 0.0003 +[2026-03-03 08:43:11] (step=0043556) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.522011348072784, LR: 0.0003 +[2026-03-03 08:43:19] (step=0043557) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.522207004500098, LR: 0.0003 +[2026-03-03 08:43:27] (step=0043558) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.522402660927412, LR: 0.0003 +[2026-03-03 08:43:35] (step=0043559) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.522598317354726, LR: 0.0003 +[2026-03-03 08:43:43] (step=0043560) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.522793973782038, LR: 0.0003 +[2026-03-03 08:43:51] (step=0043561) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.522989630209352, LR: 0.0003 +[2026-03-03 08:43:58] (step=0043562) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.523185286636666, LR: 0.0003 +[2026-03-03 08:44:06] (step=0043563) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.52338094306398, LR: 0.0003 +[2026-03-03 08:44:14] (step=0043564) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 8.523576599491292, LR: 0.0003 +[2026-03-03 08:44:22] (step=0043565) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.523772255918606, LR: 0.0003 +[2026-03-03 08:44:30] (step=0043566) Train Loss: 0.4440, Train Steps/Sec: 0.12, Epoch: 8.52396791234592, LR: 0.0003 +[2026-03-03 08:44:38] (step=0043567) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.524163568773234, LR: 0.0003 +[2026-03-03 08:44:46] (step=0043568) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.524359225200548, LR: 0.0003 +[2026-03-03 08:44:54] (step=0043569) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.52455488162786, LR: 0.0003 +[2026-03-03 08:45:02] (step=0043570) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.524750538055175, LR: 0.0003 +[2026-03-03 08:45:09] (step=0043571) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.524946194482489, LR: 0.0003 +[2026-03-03 08:45:17] (step=0043572) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 8.525141850909803, LR: 0.0003 +[2026-03-03 08:45:25] (step=0043573) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.525337507337117, LR: 0.0003 +[2026-03-03 08:45:33] (step=0043574) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 8.525533163764429, LR: 0.0003 +[2026-03-03 08:45:41] (step=0043575) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.525728820191743, LR: 0.0003 +[2026-03-03 08:45:49] (step=0043576) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 8.525924476619057, LR: 0.0003 +[2026-03-03 08:45:57] (step=0043577) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 8.526120133046371, LR: 0.0003 +[2026-03-03 08:46:04] (step=0043578) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.526315789473685, LR: 0.0003 +[2026-03-03 08:46:12] (step=0043579) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.526511445900997, LR: 0.0003 +[2026-03-03 08:46:20] (step=0043580) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.526707102328311, LR: 0.0003 +[2026-03-03 08:46:28] (step=0043581) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.526902758755625, LR: 0.0003 +[2026-03-03 08:46:36] (step=0043582) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.52709841518294, LR: 0.0003 +[2026-03-03 08:46:44] (step=0043583) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.527294071610253, LR: 0.0003 +[2026-03-03 08:46:52] (step=0043584) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.527489728037565, LR: 0.0003 +[2026-03-03 08:46:59] (step=0043585) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 8.52768538446488, LR: 0.0003 +[2026-03-03 08:47:07] (step=0043586) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.527881040892193, LR: 0.0003 +[2026-03-03 08:47:15] (step=0043587) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.528076697319507, LR: 0.0003 +[2026-03-03 08:47:23] (step=0043588) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.528272353746821, LR: 0.0003 +[2026-03-03 08:47:31] (step=0043589) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.528468010174134, LR: 0.0003 +[2026-03-03 08:47:39] (step=0043590) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.528663666601448, LR: 0.0003 +[2026-03-03 08:47:47] (step=0043591) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.528859323028762, LR: 0.0003 +[2026-03-03 08:47:55] (step=0043592) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.529054979456076, LR: 0.0003 +[2026-03-03 08:48:02] (step=0043593) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 8.529250635883388, LR: 0.0003 +[2026-03-03 08:48:10] (step=0043594) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.529446292310702, LR: 0.0003 +[2026-03-03 08:48:18] (step=0043595) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.529641948738016, LR: 0.0003 +[2026-03-03 08:48:26] (step=0043596) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 8.52983760516533, LR: 0.0003 +[2026-03-03 08:48:34] (step=0043597) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.530033261592644, LR: 0.0003 +[2026-03-03 08:48:42] (step=0043598) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.530228918019956, LR: 0.0003 +[2026-03-03 08:48:50] (step=0043599) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.53042457444727, LR: 0.0003 +[2026-03-03 08:48:58] (step=0043600) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.530620230874584, LR: 0.0003 +[2026-03-03 08:49:05] (step=0043601) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.530815887301898, LR: 0.0003 +[2026-03-03 08:49:13] (step=0043602) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.531011543729212, LR: 0.0003 +[2026-03-03 08:49:21] (step=0043603) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 8.531207200156524, LR: 0.0003 +[2026-03-03 08:49:29] (step=0043604) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.531402856583838, LR: 0.0003 +[2026-03-03 08:49:37] (step=0043605) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.531598513011152, LR: 0.0003 +[2026-03-03 08:49:45] (step=0043606) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.531794169438466, LR: 0.0003 +[2026-03-03 08:49:53] (step=0043607) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.53198982586578, LR: 0.0003 +[2026-03-03 08:50:01] (step=0043608) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.532185482293093, LR: 0.0003 +[2026-03-03 08:50:08] (step=0043609) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 8.532381138720407, LR: 0.0003 +[2026-03-03 08:50:16] (step=0043610) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.53257679514772, LR: 0.0003 +[2026-03-03 08:50:24] (step=0043611) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.532772451575035, LR: 0.0003 +[2026-03-03 08:50:32] (step=0043612) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 8.532968108002349, LR: 0.0003 +[2026-03-03 08:50:40] (step=0043613) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.533163764429661, LR: 0.0003 +[2026-03-03 08:50:48] (step=0043614) Train Loss: 0.4463, Train Steps/Sec: 0.12, Epoch: 8.533359420856975, LR: 0.0003 +[2026-03-03 08:50:56] (step=0043615) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.533555077284289, LR: 0.0003 +[2026-03-03 08:51:04] (step=0043616) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 8.533750733711603, LR: 0.0003 +[2026-03-03 08:51:12] (step=0043617) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.533946390138915, LR: 0.0003 +[2026-03-03 08:51:19] (step=0043618) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 8.53414204656623, LR: 0.0003 +[2026-03-03 08:51:27] (step=0043619) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 8.534337702993543, LR: 0.0003 +[2026-03-03 08:51:35] (step=0043620) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.534533359420857, LR: 0.0003 +[2026-03-03 08:51:43] (step=0043621) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.534729015848171, LR: 0.0003 +[2026-03-03 08:51:51] (step=0043622) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.534924672275483, LR: 0.0003 +[2026-03-03 08:51:59] (step=0043623) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.535120328702797, LR: 0.0003 +[2026-03-03 08:52:07] (step=0043624) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.535315985130111, LR: 0.0003 +[2026-03-03 08:52:14] (step=0043625) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 8.535511641557425, LR: 0.0003 +[2026-03-03 08:52:22] (step=0043626) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.53570729798474, LR: 0.0003 +[2026-03-03 08:52:30] (step=0043627) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.535902954412052, LR: 0.0003 +[2026-03-03 08:52:38] (step=0043628) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.536098610839366, LR: 0.0003 +[2026-03-03 08:52:46] (step=0043629) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.53629426726668, LR: 0.0003 +[2026-03-03 08:52:54] (step=0043630) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.536489923693994, LR: 0.0003 +[2026-03-03 08:53:02] (step=0043631) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.536685580121308, LR: 0.0003 +[2026-03-03 08:53:10] (step=0043632) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.53688123654862, LR: 0.0003 +[2026-03-03 08:53:17] (step=0043633) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.537076892975934, LR: 0.0003 +[2026-03-03 08:53:25] (step=0043634) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.537272549403248, LR: 0.0003 +[2026-03-03 08:53:33] (step=0043635) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.537468205830562, LR: 0.0003 +[2026-03-03 08:53:41] (step=0043636) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.537663862257876, LR: 0.0003 +[2026-03-03 08:53:49] (step=0043637) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.537859518685188, LR: 0.0003 +[2026-03-03 08:53:57] (step=0043638) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 8.538055175112502, LR: 0.0003 +[2026-03-03 08:54:05] (step=0043639) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 8.538250831539816, LR: 0.0003 +[2026-03-03 08:54:13] (step=0043640) Train Loss: 0.4495, Train Steps/Sec: 0.12, Epoch: 8.53844648796713, LR: 0.0003 +[2026-03-03 08:54:21] (step=0043641) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 8.538642144394444, LR: 0.0003 +[2026-03-03 08:54:28] (step=0043642) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.538837800821756, LR: 0.0003 +[2026-03-03 08:54:36] (step=0043643) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.53903345724907, LR: 0.0003 +[2026-03-03 08:54:44] (step=0043644) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.539229113676384, LR: 0.0003 +[2026-03-03 08:54:52] (step=0043645) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.539424770103698, LR: 0.0003 +[2026-03-03 08:55:00] (step=0043646) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.53962042653101, LR: 0.0003 +[2026-03-03 08:55:08] (step=0043647) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 8.539816082958325, LR: 0.0003 +[2026-03-03 08:55:16] (step=0043648) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.540011739385639, LR: 0.0003 +[2026-03-03 08:55:23] (step=0043649) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.540207395812953, LR: 0.0003 +[2026-03-03 08:55:31] (step=0043650) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.540403052240267, LR: 0.0003 +[2026-03-03 08:55:39] (step=0043651) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 8.540598708667579, LR: 0.0003 +[2026-03-03 08:55:47] (step=0043652) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 8.540794365094893, LR: 0.0003 +[2026-03-03 08:55:55] (step=0043653) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 8.540990021522207, LR: 0.0003 +[2026-03-03 08:56:03] (step=0043654) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.541185677949521, LR: 0.0003 +[2026-03-03 08:56:11] (step=0043655) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.541381334376835, LR: 0.0003 +[2026-03-03 08:56:19] (step=0043656) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.541576990804147, LR: 0.0003 +[2026-03-03 08:56:26] (step=0043657) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.541772647231461, LR: 0.0003 +[2026-03-03 08:56:34] (step=0043658) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.541968303658775, LR: 0.0003 +[2026-03-03 08:56:42] (step=0043659) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.54216396008609, LR: 0.0003 +[2026-03-03 08:56:50] (step=0043660) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.542359616513403, LR: 0.0003 +[2026-03-03 08:56:58] (step=0043661) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.542555272940715, LR: 0.0003 +[2026-03-03 08:57:06] (step=0043662) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.54275092936803, LR: 0.0003 +[2026-03-03 08:57:14] (step=0043663) Train Loss: 0.4290, Train Steps/Sec: 0.12, Epoch: 8.542946585795343, LR: 0.0003 +[2026-03-03 08:57:22] (step=0043664) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.543142242222658, LR: 0.0003 +[2026-03-03 08:57:30] (step=0043665) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.543337898649972, LR: 0.0003 +[2026-03-03 08:57:37] (step=0043666) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.543533555077284, LR: 0.0003 +[2026-03-03 08:57:45] (step=0043667) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.543729211504598, LR: 0.0003 +[2026-03-03 08:57:53] (step=0043668) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.543924867931912, LR: 0.0003 +[2026-03-03 08:58:01] (step=0043669) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 8.544120524359226, LR: 0.0003 +[2026-03-03 08:58:09] (step=0043670) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 8.544316180786538, LR: 0.0003 +[2026-03-03 08:58:17] (step=0043671) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 8.544511837213852, LR: 0.0003 +[2026-03-03 08:58:25] (step=0043672) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 8.544707493641166, LR: 0.0003 +[2026-03-03 08:58:33] (step=0043673) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.54490315006848, LR: 0.0003 +[2026-03-03 08:58:40] (step=0043674) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 8.545098806495794, LR: 0.0003 +[2026-03-03 08:58:48] (step=0043675) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 8.545294462923106, LR: 0.0003 +[2026-03-03 08:58:56] (step=0043676) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 8.54549011935042, LR: 0.0003 +[2026-03-03 08:59:04] (step=0043677) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.545685775777734, LR: 0.0003 +[2026-03-03 08:59:12] (step=0043678) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.545881432205048, LR: 0.0003 +[2026-03-03 08:59:20] (step=0043679) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.546077088632362, LR: 0.0003 +[2026-03-03 08:59:28] (step=0043680) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.546272745059674, LR: 0.0003 +[2026-03-03 08:59:36] (step=0043681) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.546468401486988, LR: 0.0003 +[2026-03-03 08:59:43] (step=0043682) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.546664057914303, LR: 0.0003 +[2026-03-03 08:59:51] (step=0043683) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.546859714341617, LR: 0.0003 +[2026-03-03 08:59:59] (step=0043684) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.54705537076893, LR: 0.0003 +[2026-03-03 09:00:07] (step=0043685) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.547251027196243, LR: 0.0003 +[2026-03-03 09:00:15] (step=0043686) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.547446683623557, LR: 0.0003 +[2026-03-03 09:00:23] (step=0043687) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.54764234005087, LR: 0.0003 +[2026-03-03 09:00:31] (step=0043688) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.547837996478185, LR: 0.0003 +[2026-03-03 09:00:39] (step=0043689) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.548033652905499, LR: 0.0003 +[2026-03-03 09:00:46] (step=0043690) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.548229309332811, LR: 0.0003 +[2026-03-03 09:00:54] (step=0043691) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.548424965760125, LR: 0.0003 +[2026-03-03 09:01:02] (step=0043692) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.548620622187439, LR: 0.0003 +[2026-03-03 09:01:10] (step=0043693) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.548816278614753, LR: 0.0003 +[2026-03-03 09:01:18] (step=0043694) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.549011935042065, LR: 0.0003 +[2026-03-03 09:01:26] (step=0043695) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 8.54920759146938, LR: 0.0003 +[2026-03-03 09:01:34] (step=0043696) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.549403247896693, LR: 0.0003 +[2026-03-03 09:01:41] (step=0043697) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.549598904324007, LR: 0.0003 +[2026-03-03 09:01:49] (step=0043698) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.549794560751321, LR: 0.0003 +[2026-03-03 09:01:57] (step=0043699) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.549990217178634, LR: 0.0003 +[2026-03-03 09:02:05] (step=0043700) Train Loss: 0.4235, Train Steps/Sec: 0.13, Epoch: 8.550185873605948, LR: 0.0003 +[2026-03-03 09:02:13] (step=0043701) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 8.550381530033262, LR: 0.0003 +[2026-03-03 09:02:21] (step=0043702) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.550577186460576, LR: 0.0003 +[2026-03-03 09:02:29] (step=0043703) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.55077284288789, LR: 0.0003 +[2026-03-03 09:02:36] (step=0043704) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.550968499315202, LR: 0.0003 +[2026-03-03 09:02:44] (step=0043705) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.551164155742516, LR: 0.0003 +[2026-03-03 09:02:52] (step=0043706) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.55135981216983, LR: 0.0003 +[2026-03-03 09:03:00] (step=0043707) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.551555468597144, LR: 0.0003 +[2026-03-03 09:03:08] (step=0043708) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.551751125024458, LR: 0.0003 +[2026-03-03 09:03:16] (step=0043709) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.55194678145177, LR: 0.0003 +[2026-03-03 09:03:24] (step=0043710) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.552142437879084, LR: 0.0003 +[2026-03-03 09:03:31] (step=0043711) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.552338094306398, LR: 0.0003 +[2026-03-03 09:03:39] (step=0043712) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.552533750733712, LR: 0.0003 +[2026-03-03 09:03:47] (step=0043713) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.552729407161026, LR: 0.0003 +[2026-03-03 09:03:55] (step=0043714) Train Loss: 0.4467, Train Steps/Sec: 0.12, Epoch: 8.552925063588338, LR: 0.0003 +[2026-03-03 09:04:03] (step=0043715) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.553120720015652, LR: 0.0003 +[2026-03-03 09:04:11] (step=0043716) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.553316376442966, LR: 0.0003 +[2026-03-03 09:04:19] (step=0043717) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.55351203287028, LR: 0.0003 +[2026-03-03 09:04:27] (step=0043718) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 8.553707689297594, LR: 0.0003 +[2026-03-03 09:04:35] (step=0043719) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.553903345724907, LR: 0.0003 +[2026-03-03 09:04:42] (step=0043720) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.55409900215222, LR: 0.0003 +[2026-03-03 09:04:50] (step=0043721) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.554294658579535, LR: 0.0003 +[2026-03-03 09:04:58] (step=0043722) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.554490315006849, LR: 0.0003 +[2026-03-03 09:05:06] (step=0043723) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 8.55468597143416, LR: 0.0003 +[2026-03-03 09:05:14] (step=0043724) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.554881627861475, LR: 0.0003 +[2026-03-03 09:05:22] (step=0043725) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.555077284288789, LR: 0.0003 +[2026-03-03 09:05:30] (step=0043726) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.555272940716103, LR: 0.0003 +[2026-03-03 09:05:38] (step=0043727) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 8.555468597143417, LR: 0.0003 +[2026-03-03 09:05:45] (step=0043728) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 8.555664253570729, LR: 0.0003 +[2026-03-03 09:05:53] (step=0043729) Train Loss: 0.4429, Train Steps/Sec: 0.12, Epoch: 8.555859909998043, LR: 0.0003 +[2026-03-03 09:06:01] (step=0043730) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.556055566425357, LR: 0.0003 +[2026-03-03 09:06:09] (step=0043731) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.556251222852671, LR: 0.0003 +[2026-03-03 09:06:17] (step=0043732) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.556446879279985, LR: 0.0003 +[2026-03-03 09:06:25] (step=0043733) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.556642535707297, LR: 0.0003 +[2026-03-03 09:06:33] (step=0043734) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.556838192134611, LR: 0.0003 +[2026-03-03 09:06:41] (step=0043735) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.557033848561925, LR: 0.0003 +[2026-03-03 09:06:48] (step=0043736) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.55722950498924, LR: 0.0003 +[2026-03-03 09:06:56] (step=0043737) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.557425161416553, LR: 0.0003 +[2026-03-03 09:07:04] (step=0043738) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.557620817843866, LR: 0.0003 +[2026-03-03 09:07:12] (step=0043739) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.55781647427118, LR: 0.0003 +[2026-03-03 09:07:20] (step=0043740) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 8.558012130698494, LR: 0.0003 +[2026-03-03 09:07:28] (step=0043741) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.558207787125808, LR: 0.0003 +[2026-03-03 09:07:36] (step=0043742) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.558403443553122, LR: 0.0003 +[2026-03-03 09:07:44] (step=0043743) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.558599099980434, LR: 0.0003 +[2026-03-03 09:07:51] (step=0043744) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.558794756407748, LR: 0.0003 +[2026-03-03 09:07:59] (step=0043745) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.558990412835062, LR: 0.0003 +[2026-03-03 09:08:07] (step=0043746) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 8.559186069262376, LR: 0.0003 +[2026-03-03 09:08:15] (step=0043747) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.559381725689688, LR: 0.0003 +[2026-03-03 09:08:23] (step=0043748) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.559577382117002, LR: 0.0003 +[2026-03-03 09:08:31] (step=0043749) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 8.559773038544316, LR: 0.0003 +[2026-03-03 09:08:39] (step=0043750) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.55996869497163, LR: 0.0003 +[2026-03-03 09:08:47] (step=0043751) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.560164351398944, LR: 0.0003 +[2026-03-03 09:08:54] (step=0043752) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 8.560360007826256, LR: 0.0003 +[2026-03-03 09:09:02] (step=0043753) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 8.56055566425357, LR: 0.0003 +[2026-03-03 09:09:10] (step=0043754) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.560751320680884, LR: 0.0003 +[2026-03-03 09:09:18] (step=0043755) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.560946977108198, LR: 0.0003 +[2026-03-03 09:09:26] (step=0043756) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.561142633535512, LR: 0.0003 +[2026-03-03 09:09:34] (step=0043757) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.561338289962825, LR: 0.0003 +[2026-03-03 09:09:42] (step=0043758) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.561533946390139, LR: 0.0003 +[2026-03-03 09:09:49] (step=0043759) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.561729602817453, LR: 0.0003 +[2026-03-03 09:09:57] (step=0043760) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.561925259244767, LR: 0.0003 +[2026-03-03 09:10:05] (step=0043761) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.56212091567208, LR: 0.0003 +[2026-03-03 09:10:13] (step=0043762) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.562316572099393, LR: 0.0003 +[2026-03-03 09:10:21] (step=0043763) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 8.562512228526707, LR: 0.0003 +[2026-03-03 09:10:29] (step=0043764) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.56270788495402, LR: 0.0003 +[2026-03-03 09:10:37] (step=0043765) Train Loss: 0.4438, Train Steps/Sec: 0.12, Epoch: 8.562903541381335, LR: 0.0003 +[2026-03-03 09:10:45] (step=0043766) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.563099197808649, LR: 0.0003 +[2026-03-03 09:10:53] (step=0043767) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 8.563294854235961, LR: 0.0003 +[2026-03-03 09:11:00] (step=0043768) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.563490510663275, LR: 0.0003 +[2026-03-03 09:11:08] (step=0043769) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 8.563686167090589, LR: 0.0003 +[2026-03-03 09:11:16] (step=0043770) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.563881823517903, LR: 0.0003 +[2026-03-03 09:11:24] (step=0043771) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.564077479945217, LR: 0.0003 +[2026-03-03 09:11:32] (step=0043772) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.56427313637253, LR: 0.0003 +[2026-03-03 09:11:40] (step=0043773) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 8.564468792799843, LR: 0.0003 +[2026-03-03 09:11:48] (step=0043774) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.564664449227157, LR: 0.0003 +[2026-03-03 09:11:56] (step=0043775) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.564860105654471, LR: 0.0003 +[2026-03-03 09:12:03] (step=0043776) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.565055762081784, LR: 0.0003 +[2026-03-03 09:12:11] (step=0043777) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.565251418509098, LR: 0.0003 +[2026-03-03 09:12:19] (step=0043778) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 8.565447074936412, LR: 0.0003 +[2026-03-03 09:12:27] (step=0043779) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.565642731363726, LR: 0.0003 +[2026-03-03 09:12:35] (step=0043780) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.56583838779104, LR: 0.0003 +[2026-03-03 09:12:43] (step=0043781) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.566034044218352, LR: 0.0003 +[2026-03-03 09:12:51] (step=0043782) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 8.566229700645666, LR: 0.0003 +[2026-03-03 09:12:59] (step=0043783) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.56642535707298, LR: 0.0003 +[2026-03-03 09:13:06] (step=0043784) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 8.566621013500294, LR: 0.0003 +[2026-03-03 09:13:14] (step=0043785) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.566816669927608, LR: 0.0003 +[2026-03-03 09:13:22] (step=0043786) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.56701232635492, LR: 0.0003 +[2026-03-03 09:13:30] (step=0043787) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.567207982782234, LR: 0.0003 +[2026-03-03 09:13:38] (step=0043788) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 8.567403639209548, LR: 0.0003 +[2026-03-03 09:13:46] (step=0043789) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.567599295636862, LR: 0.0003 +[2026-03-03 09:13:54] (step=0043790) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.567794952064176, LR: 0.0003 +[2026-03-03 09:14:01] (step=0043791) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 8.567990608491488, LR: 0.0003 +[2026-03-03 09:14:09] (step=0043792) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.568186264918802, LR: 0.0003 +[2026-03-03 09:14:17] (step=0043793) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.568381921346116, LR: 0.0003 +[2026-03-03 09:14:25] (step=0043794) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.56857757777343, LR: 0.0003 +[2026-03-03 09:14:33] (step=0043795) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.568773234200744, LR: 0.0003 +[2026-03-03 09:14:41] (step=0043796) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.568968890628057, LR: 0.0003 +[2026-03-03 09:14:49] (step=0043797) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.56916454705537, LR: 0.0003 +[2026-03-03 09:14:57] (step=0043798) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.569360203482685, LR: 0.0003 +[2026-03-03 09:15:04] (step=0043799) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.569555859909999, LR: 0.0003 +[2026-03-03 09:15:12] (step=0043800) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.56975151633731, LR: 0.0003 +[2026-03-03 09:15:20] (step=0043801) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 8.569947172764625, LR: 0.0003 +[2026-03-03 09:15:28] (step=0043802) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 8.570142829191939, LR: 0.0003 +[2026-03-03 09:15:36] (step=0043803) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.570338485619253, LR: 0.0003 +[2026-03-03 09:15:44] (step=0043804) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.570534142046567, LR: 0.0003 +[2026-03-03 09:15:52] (step=0043805) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 8.570729798473879, LR: 0.0003 +[2026-03-03 09:15:59] (step=0043806) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 8.570925454901193, LR: 0.0003 +[2026-03-03 09:16:07] (step=0043807) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.571121111328507, LR: 0.0003 +[2026-03-03 09:16:15] (step=0043808) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 8.571316767755821, LR: 0.0003 +[2026-03-03 09:16:23] (step=0043809) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.571512424183135, LR: 0.0003 +[2026-03-03 09:16:31] (step=0043810) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.571708080610447, LR: 0.0003 +[2026-03-03 09:16:39] (step=0043811) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.571903737037761, LR: 0.0003 +[2026-03-03 09:16:47] (step=0043812) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 8.572099393465075, LR: 0.0003 +[2026-03-03 09:16:54] (step=0043813) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.57229504989239, LR: 0.0003 +[2026-03-03 09:17:02] (step=0043814) Train Loss: 0.4460, Train Steps/Sec: 0.12, Epoch: 8.572490706319703, LR: 0.0003 +[2026-03-03 09:17:10] (step=0043815) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.572686362747016, LR: 0.0003 +[2026-03-03 09:17:18] (step=0043816) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.57288201917433, LR: 0.0003 +[2026-03-03 09:17:26] (step=0043817) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 8.573077675601644, LR: 0.0003 +[2026-03-03 09:17:34] (step=0043818) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.573273332028958, LR: 0.0003 +[2026-03-03 09:17:42] (step=0043819) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.573468988456272, LR: 0.0003 +[2026-03-03 09:17:50] (step=0043820) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 8.573664644883584, LR: 0.0003 +[2026-03-03 09:17:58] (step=0043821) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 8.573860301310898, LR: 0.0003 +[2026-03-03 09:18:05] (step=0043822) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.574055957738212, LR: 0.0003 +[2026-03-03 09:18:13] (step=0043823) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.574251614165526, LR: 0.0003 +[2026-03-03 09:18:21] (step=0043824) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 8.57444727059284, LR: 0.0003 +[2026-03-03 09:18:29] (step=0043825) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.574642927020152, LR: 0.0003 +[2026-03-03 09:18:37] (step=0043826) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 8.574838583447466, LR: 0.0003 +[2026-03-03 09:18:45] (step=0043827) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.57503423987478, LR: 0.0003 +[2026-03-03 09:18:53] (step=0043828) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.575229896302094, LR: 0.0003 +[2026-03-03 09:19:01] (step=0043829) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 8.575425552729406, LR: 0.0003 +[2026-03-03 09:19:08] (step=0043830) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.57562120915672, LR: 0.0003 +[2026-03-03 09:19:16] (step=0043831) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 8.575816865584034, LR: 0.0003 +[2026-03-03 09:19:24] (step=0043832) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.576012522011348, LR: 0.0003 +[2026-03-03 09:19:32] (step=0043833) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.576208178438662, LR: 0.0003 +[2026-03-03 09:19:40] (step=0043834) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.576403834865975, LR: 0.0003 +[2026-03-03 09:19:48] (step=0043835) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 8.576599491293289, LR: 0.0003 +[2026-03-03 09:19:56] (step=0043836) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.576795147720603, LR: 0.0003 +[2026-03-03 09:20:04] (step=0043837) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.576990804147917, LR: 0.0003 +[2026-03-03 09:20:11] (step=0043838) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.57718646057523, LR: 0.0003 +[2026-03-03 09:20:19] (step=0043839) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 8.577382117002543, LR: 0.0003 +[2026-03-03 09:20:27] (step=0043840) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 8.577577773429857, LR: 0.0003 +[2026-03-03 09:20:35] (step=0043841) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 8.57777342985717, LR: 0.0003 +[2026-03-03 09:20:43] (step=0043842) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.577969086284485, LR: 0.0003 +[2026-03-03 09:20:51] (step=0043843) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.578164742711799, LR: 0.0003 +[2026-03-03 09:20:59] (step=0043844) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 8.578360399139111, LR: 0.0003 +[2026-03-03 09:21:06] (step=0043845) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.578556055566425, LR: 0.0003 +[2026-03-03 09:21:14] (step=0043846) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.578751711993739, LR: 0.0003 +[2026-03-03 09:21:22] (step=0043847) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.578947368421053, LR: 0.0003 +[2026-03-03 09:21:30] (step=0043848) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.579143024848367, LR: 0.0003 +[2026-03-03 09:21:38] (step=0043849) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 8.57933868127568, LR: 0.0003 +[2026-03-03 09:21:46] (step=0043850) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 8.579534337702993, LR: 0.0003 +[2026-03-03 09:21:54] (step=0043851) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.579729994130307, LR: 0.0003 +[2026-03-03 09:22:01] (step=0043852) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.579925650557621, LR: 0.0003 +[2026-03-03 09:22:09] (step=0043853) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.580121306984934, LR: 0.0003 +[2026-03-03 09:22:17] (step=0043854) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 8.580316963412248, LR: 0.0003 +[2026-03-03 09:22:25] (step=0043855) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.580512619839562, LR: 0.0003 +[2026-03-03 09:22:33] (step=0043856) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.580708276266876, LR: 0.0003 +[2026-03-03 09:22:41] (step=0043857) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.58090393269419, LR: 0.0003 +[2026-03-03 09:22:49] (step=0043858) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.581099589121502, LR: 0.0003 +[2026-03-03 09:22:56] (step=0043859) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 8.581295245548816, LR: 0.0003 +[2026-03-03 09:23:04] (step=0043860) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.58149090197613, LR: 0.0003 +[2026-03-03 09:23:12] (step=0043861) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.581686558403444, LR: 0.0003 +[2026-03-03 09:23:20] (step=0043862) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.581882214830758, LR: 0.0003 +[2026-03-03 09:23:28] (step=0043863) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.58207787125807, LR: 0.0003 +[2026-03-03 09:23:36] (step=0043864) Train Loss: 0.4428, Train Steps/Sec: 0.12, Epoch: 8.582273527685384, LR: 0.0003 +[2026-03-03 09:23:44] (step=0043865) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.582469184112698, LR: 0.0003 +[2026-03-03 09:23:52] (step=0043866) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.582664840540012, LR: 0.0003 +[2026-03-03 09:24:00] (step=0043867) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.582860496967326, LR: 0.0003 +[2026-03-03 09:24:07] (step=0043868) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 8.583056153394638, LR: 0.0003 +[2026-03-03 09:24:15] (step=0043869) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.583251809821952, LR: 0.0003 +[2026-03-03 09:24:23] (step=0043870) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.583447466249266, LR: 0.0003 +[2026-03-03 09:24:31] (step=0043871) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.58364312267658, LR: 0.0003 +[2026-03-03 09:24:39] (step=0043872) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.583838779103894, LR: 0.0003 +[2026-03-03 09:24:47] (step=0043873) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.584034435531207, LR: 0.0003 +[2026-03-03 09:24:55] (step=0043874) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.58423009195852, LR: 0.0003 +[2026-03-03 09:25:02] (step=0043875) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.584425748385835, LR: 0.0003 +[2026-03-03 09:25:10] (step=0043876) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.584621404813149, LR: 0.0003 +[2026-03-03 09:25:18] (step=0043877) Train Loss: 0.4395, Train Steps/Sec: 0.12, Epoch: 8.584817061240463, LR: 0.0003 +[2026-03-03 09:25:26] (step=0043878) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.585012717667775, LR: 0.0003 +[2026-03-03 09:25:34] (step=0043879) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.585208374095089, LR: 0.0003 +[2026-03-03 09:25:42] (step=0043880) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.585404030522403, LR: 0.0003 +[2026-03-03 09:25:50] (step=0043881) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 8.585599686949717, LR: 0.0003 +[2026-03-03 09:25:58] (step=0043882) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.585795343377029, LR: 0.0003 +[2026-03-03 09:26:05] (step=0043883) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.585990999804343, LR: 0.0003 +[2026-03-03 09:26:13] (step=0043884) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 8.586186656231657, LR: 0.0003 +[2026-03-03 09:26:21] (step=0043885) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 8.586382312658971, LR: 0.0003 +[2026-03-03 09:26:29] (step=0043886) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 8.586577969086285, LR: 0.0003 +[2026-03-03 09:26:37] (step=0043887) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.586773625513597, LR: 0.0003 +[2026-03-03 09:26:45] (step=0043888) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.586969281940911, LR: 0.0003 +[2026-03-03 09:26:53] (step=0043889) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 8.587164938368225, LR: 0.0003 +[2026-03-03 09:27:01] (step=0043890) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.58736059479554, LR: 0.0003 +[2026-03-03 09:27:08] (step=0043891) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.587556251222853, LR: 0.0003 +[2026-03-03 09:27:16] (step=0043892) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.587751907650166, LR: 0.0003 +[2026-03-03 09:27:24] (step=0043893) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.58794756407748, LR: 0.0003 +[2026-03-03 09:27:32] (step=0043894) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 8.588143220504794, LR: 0.0003 +[2026-03-03 09:27:40] (step=0043895) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.588338876932108, LR: 0.0003 +[2026-03-03 09:27:48] (step=0043896) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.588534533359422, LR: 0.0003 +[2026-03-03 09:27:56] (step=0043897) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.588730189786734, LR: 0.0003 +[2026-03-03 09:28:04] (step=0043898) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.588925846214048, LR: 0.0003 +[2026-03-03 09:28:11] (step=0043899) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.589121502641362, LR: 0.0003 +[2026-03-03 09:28:19] (step=0043900) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.589317159068676, LR: 0.0003 +[2026-03-03 09:28:27] (step=0043901) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.58951281549599, LR: 0.0003 +[2026-03-03 09:28:35] (step=0043902) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.589708471923302, LR: 0.0003 +[2026-03-03 09:28:43] (step=0043903) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.589904128350616, LR: 0.0003 +[2026-03-03 09:28:51] (step=0043904) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 8.59009978477793, LR: 0.0003 +[2026-03-03 09:28:59] (step=0043905) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.590295441205244, LR: 0.0003 +[2026-03-03 09:29:07] (step=0043906) Train Loss: 0.4454, Train Steps/Sec: 0.12, Epoch: 8.590491097632556, LR: 0.0003 +[2026-03-03 09:29:14] (step=0043907) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 8.59068675405987, LR: 0.0003 +[2026-03-03 09:29:22] (step=0043908) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.590882410487184, LR: 0.0003 +[2026-03-03 09:29:30] (step=0043909) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 8.591078066914498, LR: 0.0003 +[2026-03-03 09:29:38] (step=0043910) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 8.591273723341812, LR: 0.0003 +[2026-03-03 09:29:46] (step=0043911) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.591469379769125, LR: 0.0003 +[2026-03-03 09:29:54] (step=0043912) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.591665036196439, LR: 0.0003 +[2026-03-03 09:30:02] (step=0043913) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 8.591860692623753, LR: 0.0003 +[2026-03-03 09:30:10] (step=0043914) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.592056349051067, LR: 0.0003 +[2026-03-03 09:30:17] (step=0043915) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 8.59225200547838, LR: 0.0003 +[2026-03-03 09:30:25] (step=0043916) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.592447661905693, LR: 0.0003 +[2026-03-03 09:30:33] (step=0043917) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.592643318333007, LR: 0.0003 +[2026-03-03 09:30:41] (step=0043918) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 8.59283897476032, LR: 0.0003 +[2026-03-03 09:30:49] (step=0043919) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.593034631187635, LR: 0.0003 +[2026-03-03 09:30:57] (step=0043920) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.593230287614949, LR: 0.0003 +[2026-03-03 09:31:05] (step=0043921) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.593425944042261, LR: 0.0003 +[2026-03-03 09:31:13] (step=0043922) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 8.593621600469575, LR: 0.0003 +[2026-03-03 09:31:20] (step=0043923) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.59381725689689, LR: 0.0003 +[2026-03-03 09:31:28] (step=0043924) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.594012913324203, LR: 0.0003 +[2026-03-03 09:31:36] (step=0043925) Train Loss: 0.4384, Train Steps/Sec: 0.12, Epoch: 8.594208569751517, LR: 0.0003 +[2026-03-03 09:31:44] (step=0043926) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 8.59440422617883, LR: 0.0003 +[2026-03-03 09:31:52] (step=0043927) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.594599882606143, LR: 0.0003 +[2026-03-03 09:32:00] (step=0043928) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.594795539033457, LR: 0.0003 +[2026-03-03 09:32:08] (step=0043929) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.594991195460771, LR: 0.0003 +[2026-03-03 09:32:16] (step=0043930) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.595186851888085, LR: 0.0003 +[2026-03-03 09:32:24] (step=0043931) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.595382508315398, LR: 0.0003 +[2026-03-03 09:32:31] (step=0043932) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.595578164742712, LR: 0.0003 +[2026-03-03 09:32:39] (step=0043933) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.595773821170026, LR: 0.0003 +[2026-03-03 09:32:47] (step=0043934) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 8.59596947759734, LR: 0.0003 +[2026-03-03 09:32:55] (step=0043935) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.596165134024652, LR: 0.0003 +[2026-03-03 09:33:03] (step=0043936) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.596360790451966, LR: 0.0003 +[2026-03-03 09:33:11] (step=0043937) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 8.59655644687928, LR: 0.0003 +[2026-03-03 09:33:19] (step=0043938) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.596752103306594, LR: 0.0003 +[2026-03-03 09:33:27] (step=0043939) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.596947759733908, LR: 0.0003 +[2026-03-03 09:33:34] (step=0043940) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.59714341616122, LR: 0.0003 +[2026-03-03 09:33:42] (step=0043941) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 8.597339072588534, LR: 0.0003 +[2026-03-03 09:33:50] (step=0043942) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.597534729015848, LR: 0.0003 +[2026-03-03 09:33:58] (step=0043943) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.597730385443162, LR: 0.0003 +[2026-03-03 09:34:06] (step=0043944) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 8.597926041870476, LR: 0.0003 +[2026-03-03 09:34:14] (step=0043945) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.598121698297788, LR: 0.0003 +[2026-03-03 09:34:22] (step=0043946) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.598317354725102, LR: 0.0003 +[2026-03-03 09:34:29] (step=0043947) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 8.598513011152416, LR: 0.0003 +[2026-03-03 09:34:37] (step=0043948) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.59870866757973, LR: 0.0003 +[2026-03-03 09:34:45] (step=0043949) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.598904324007044, LR: 0.0003 +[2026-03-03 09:34:53] (step=0043950) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.599099980434357, LR: 0.0003 +[2026-03-03 09:35:01] (step=0043951) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.59929563686167, LR: 0.0003 +[2026-03-03 09:35:09] (step=0043952) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.599491293288985, LR: 0.0003 +[2026-03-03 09:35:17] (step=0043953) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.599686949716299, LR: 0.0003 +[2026-03-03 09:35:24] (step=0043954) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.599882606143613, LR: 0.0003 +[2026-03-03 09:35:33] (step=0043955) Train Loss: 0.4502, Train Steps/Sec: 0.12, Epoch: 8.600078262570925, LR: 0.0003 +[2026-03-03 09:35:40] (step=0043956) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.600273918998239, LR: 0.0003 +[2026-03-03 09:35:48] (step=0043957) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.600469575425553, LR: 0.0003 +[2026-03-03 09:35:56] (step=0043958) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 8.600665231852867, LR: 0.0003 +[2026-03-03 09:36:04] (step=0043959) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.60086088828018, LR: 0.0003 +[2026-03-03 09:36:12] (step=0043960) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.601056544707493, LR: 0.0003 +[2026-03-03 09:36:20] (step=0043961) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.601252201134807, LR: 0.0003 +[2026-03-03 09:36:28] (step=0043962) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 8.601447857562121, LR: 0.0003 +[2026-03-03 09:36:35] (step=0043963) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 8.601643513989435, LR: 0.0003 +[2026-03-03 09:36:43] (step=0043964) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.601839170416747, LR: 0.0003 +[2026-03-03 09:36:51] (step=0043965) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.602034826844061, LR: 0.0003 +[2026-03-03 09:36:59] (step=0043966) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.602230483271375, LR: 0.0003 +[2026-03-03 09:37:07] (step=0043967) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.60242613969869, LR: 0.0003 +[2026-03-03 09:37:15] (step=0043968) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.602621796126003, LR: 0.0003 +[2026-03-03 09:37:23] (step=0043969) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.602817452553316, LR: 0.0003 +[2026-03-03 09:37:30] (step=0043970) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.60301310898063, LR: 0.0003 +[2026-03-03 09:37:38] (step=0043971) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.603208765407944, LR: 0.0003 +[2026-03-03 09:37:46] (step=0043972) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.603404421835258, LR: 0.0003 +[2026-03-03 09:37:54] (step=0043973) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.603600078262572, LR: 0.0003 +[2026-03-03 09:38:02] (step=0043974) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.603795734689884, LR: 0.0003 +[2026-03-03 09:38:10] (step=0043975) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.603991391117198, LR: 0.0003 +[2026-03-03 09:38:18] (step=0043976) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 8.604187047544512, LR: 0.0003 +[2026-03-03 09:38:26] (step=0043977) Train Loss: 0.4470, Train Steps/Sec: 0.12, Epoch: 8.604382703971826, LR: 0.0003 +[2026-03-03 09:38:34] (step=0043978) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.60457836039914, LR: 0.0003 +[2026-03-03 09:38:41] (step=0043979) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 8.604774016826452, LR: 0.0003 +[2026-03-03 09:38:49] (step=0043980) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 8.604969673253766, LR: 0.0003 +[2026-03-03 09:38:57] (step=0043981) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.60516532968108, LR: 0.0003 +[2026-03-03 09:39:05] (step=0043982) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.605360986108394, LR: 0.0003 +[2026-03-03 09:39:13] (step=0043983) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.605556642535708, LR: 0.0003 +[2026-03-03 09:39:21] (step=0043984) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.60575229896302, LR: 0.0003 +[2026-03-03 09:39:29] (step=0043985) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.605947955390334, LR: 0.0003 +[2026-03-03 09:39:36] (step=0043986) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 8.606143611817648, LR: 0.0003 +[2026-03-03 09:39:44] (step=0043987) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.606339268244962, LR: 0.0003 +[2026-03-03 09:39:52] (step=0043988) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.606534924672275, LR: 0.0003 +[2026-03-03 09:40:00] (step=0043989) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.606730581099589, LR: 0.0003 +[2026-03-03 09:40:08] (step=0043990) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 8.606926237526903, LR: 0.0003 +[2026-03-03 09:40:16] (step=0043991) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.607121893954217, LR: 0.0003 +[2026-03-03 09:40:24] (step=0043992) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.60731755038153, LR: 0.0003 +[2026-03-03 09:40:32] (step=0043993) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.607513206808843, LR: 0.0003 +[2026-03-03 09:40:39] (step=0043994) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.607708863236157, LR: 0.0003 +[2026-03-03 09:40:47] (step=0043995) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.607904519663471, LR: 0.0003 +[2026-03-03 09:40:55] (step=0043996) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.608100176090785, LR: 0.0003 +[2026-03-03 09:41:03] (step=0043997) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.608295832518099, LR: 0.0003 +[2026-03-03 09:41:11] (step=0043998) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 8.608491488945411, LR: 0.0003 +[2026-03-03 09:41:19] (step=0043999) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 8.608687145372725, LR: 0.0003 +[2026-03-03 09:41:27] (step=0044000) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 8.60888280180004, LR: 0.0003 +[2026-03-03 09:41:27] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0044000/ +[2026-03-03 09:41:34] (step=0044001) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.609078458227353, LR: 0.0003 +[2026-03-03 09:41:42] (step=0044002) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.609274114654667, LR: 0.0003 +[2026-03-03 09:41:50] (step=0044003) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.60946977108198, LR: 0.0003 +[2026-03-03 09:41:58] (step=0044004) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 8.609665427509293, LR: 0.0003 +[2026-03-03 09:42:06] (step=0044005) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.609861083936607, LR: 0.0003 +[2026-03-03 09:42:14] (step=0044006) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 8.610056740363921, LR: 0.0003 +[2026-03-03 09:42:22] (step=0044007) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.610252396791235, LR: 0.0003 +[2026-03-03 09:42:30] (step=0044008) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.610448053218548, LR: 0.0003 +[2026-03-03 09:42:38] (step=0044009) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.610643709645862, LR: 0.0003 +[2026-03-03 09:42:45] (step=0044010) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.610839366073176, LR: 0.0003 +[2026-03-03 09:42:53] (step=0044011) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.61103502250049, LR: 0.0003 +[2026-03-03 09:43:01] (step=0044012) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 8.611230678927802, LR: 0.0003 +[2026-03-03 09:43:09] (step=0044013) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.611426335355116, LR: 0.0003 +[2026-03-03 09:43:17] (step=0044014) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.61162199178243, LR: 0.0003 +[2026-03-03 09:43:25] (step=0044015) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.611817648209744, LR: 0.0003 +[2026-03-03 09:43:33] (step=0044016) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.612013304637058, LR: 0.0003 +[2026-03-03 09:43:41] (step=0044017) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.61220896106437, LR: 0.0003 +[2026-03-03 09:43:48] (step=0044018) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.612404617491684, LR: 0.0003 +[2026-03-03 09:43:56] (step=0044019) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 8.612600273918998, LR: 0.0003 +[2026-03-03 09:44:04] (step=0044020) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 8.612795930346312, LR: 0.0003 +[2026-03-03 09:44:12] (step=0044021) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.612991586773626, LR: 0.0003 +[2026-03-03 09:44:20] (step=0044022) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.613187243200938, LR: 0.0003 +[2026-03-03 09:44:28] (step=0044023) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.613382899628252, LR: 0.0003 +[2026-03-03 09:44:36] (step=0044024) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.613578556055566, LR: 0.0003 +[2026-03-03 09:44:43] (step=0044025) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.61377421248288, LR: 0.0003 +[2026-03-03 09:44:51] (step=0044026) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 8.613969868910194, LR: 0.0003 +[2026-03-03 09:44:59] (step=0044027) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.614165525337507, LR: 0.0003 +[2026-03-03 09:45:07] (step=0044028) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 8.61436118176482, LR: 0.0003 +[2026-03-03 09:45:15] (step=0044029) Train Loss: 0.4516, Train Steps/Sec: 0.12, Epoch: 8.614556838192135, LR: 0.0003 +[2026-03-03 09:45:23] (step=0044030) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.614752494619449, LR: 0.0003 +[2026-03-03 09:45:31] (step=0044031) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 8.614948151046763, LR: 0.0003 +[2026-03-03 09:45:39] (step=0044032) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.615143807474075, LR: 0.0003 +[2026-03-03 09:45:46] (step=0044033) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.615339463901389, LR: 0.0003 +[2026-03-03 09:45:54] (step=0044034) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.615535120328703, LR: 0.0003 +[2026-03-03 09:46:02] (step=0044035) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.615730776756017, LR: 0.0003 +[2026-03-03 09:46:10] (step=0044036) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.615926433183331, LR: 0.0003 +[2026-03-03 09:46:18] (step=0044037) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 8.616122089610643, LR: 0.0003 +[2026-03-03 09:46:26] (step=0044038) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.616317746037957, LR: 0.0003 +[2026-03-03 09:46:34] (step=0044039) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 8.616513402465271, LR: 0.0003 +[2026-03-03 09:46:41] (step=0044040) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.616709058892585, LR: 0.0003 +[2026-03-03 09:46:49] (step=0044041) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.616904715319897, LR: 0.0003 +[2026-03-03 09:46:57] (step=0044042) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.617100371747211, LR: 0.0003 +[2026-03-03 09:47:05] (step=0044043) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 8.617296028174525, LR: 0.0003 +[2026-03-03 09:47:13] (step=0044044) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.61749168460184, LR: 0.0003 +[2026-03-03 09:47:21] (step=0044045) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.617687341029153, LR: 0.0003 +[2026-03-03 09:47:29] (step=0044046) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.617882997456466, LR: 0.0003 +[2026-03-03 09:47:37] (step=0044047) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.61807865388378, LR: 0.0003 +[2026-03-03 09:47:45] (step=0044048) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.618274310311094, LR: 0.0003 +[2026-03-03 09:47:52] (step=0044049) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.618469966738408, LR: 0.0003 +[2026-03-03 09:48:00] (step=0044050) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.618665623165722, LR: 0.0003 +[2026-03-03 09:48:08] (step=0044051) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.618861279593034, LR: 0.0003 +[2026-03-03 09:48:16] (step=0044052) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.619056936020348, LR: 0.0003 +[2026-03-03 09:48:24] (step=0044053) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.619252592447662, LR: 0.0003 +[2026-03-03 09:48:32] (step=0044054) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 8.619448248874976, LR: 0.0003 +[2026-03-03 09:48:40] (step=0044055) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.61964390530229, LR: 0.0003 +[2026-03-03 09:48:48] (step=0044056) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.619839561729602, LR: 0.0003 +[2026-03-03 09:48:56] (step=0044057) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.620035218156916, LR: 0.0003 +[2026-03-03 09:49:03] (step=0044058) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 8.62023087458423, LR: 0.0003 +[2026-03-03 09:49:11] (step=0044059) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.620426531011544, LR: 0.0003 +[2026-03-03 09:49:19] (step=0044060) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.620622187438858, LR: 0.0003 +[2026-03-03 09:49:27] (step=0044061) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.62081784386617, LR: 0.0003 +[2026-03-03 09:49:35] (step=0044062) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.621013500293484, LR: 0.0003 +[2026-03-03 09:49:43] (step=0044063) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.621209156720798, LR: 0.0003 +[2026-03-03 09:49:51] (step=0044064) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 8.621404813148112, LR: 0.0003 +[2026-03-03 09:49:59] (step=0044065) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.621600469575425, LR: 0.0003 +[2026-03-03 09:50:06] (step=0044066) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.621796126002739, LR: 0.0003 +[2026-03-03 09:50:14] (step=0044067) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 8.621991782430053, LR: 0.0003 +[2026-03-03 09:50:22] (step=0044068) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 8.622187438857367, LR: 0.0003 +[2026-03-03 09:50:30] (step=0044069) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.62238309528468, LR: 0.0003 +[2026-03-03 09:50:38] (step=0044070) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.622578751711993, LR: 0.0003 +[2026-03-03 09:50:46] (step=0044071) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.622774408139307, LR: 0.0003 +[2026-03-03 09:50:54] (step=0044072) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.622970064566621, LR: 0.0003 +[2026-03-03 09:51:02] (step=0044073) Train Loss: 0.4510, Train Steps/Sec: 0.12, Epoch: 8.623165720993935, LR: 0.0003 +[2026-03-03 09:51:10] (step=0044074) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 8.623361377421249, LR: 0.0003 +[2026-03-03 09:51:17] (step=0044075) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.623557033848561, LR: 0.0003 +[2026-03-03 09:51:25] (step=0044076) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.623752690275875, LR: 0.0003 +[2026-03-03 09:51:33] (step=0044077) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 8.62394834670319, LR: 0.0003 +[2026-03-03 09:51:41] (step=0044078) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.624144003130503, LR: 0.0003 +[2026-03-03 09:51:49] (step=0044079) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.624339659557817, LR: 0.0003 +[2026-03-03 09:51:57] (step=0044080) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.62453531598513, LR: 0.0003 +[2026-03-03 09:52:05] (step=0044081) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 8.624730972412443, LR: 0.0003 +[2026-03-03 09:52:13] (step=0044082) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.624926628839757, LR: 0.0003 +[2026-03-03 09:52:20] (step=0044083) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.625122285267071, LR: 0.0003 +[2026-03-03 09:52:28] (step=0044084) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.625317941694385, LR: 0.0003 +[2026-03-03 09:52:36] (step=0044085) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 8.625513598121698, LR: 0.0003 +[2026-03-03 09:52:44] (step=0044086) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.625709254549012, LR: 0.0003 +[2026-03-03 09:52:52] (step=0044087) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 8.625904910976326, LR: 0.0003 +[2026-03-03 09:53:00] (step=0044088) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.62610056740364, LR: 0.0003 +[2026-03-03 09:53:08] (step=0044089) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.626296223830952, LR: 0.0003 +[2026-03-03 09:53:15] (step=0044090) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.626491880258266, LR: 0.0003 +[2026-03-03 09:53:23] (step=0044091) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.62668753668558, LR: 0.0003 +[2026-03-03 09:53:31] (step=0044092) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 8.626883193112894, LR: 0.0003 +[2026-03-03 09:53:39] (step=0044093) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.627078849540208, LR: 0.0003 +[2026-03-03 09:53:47] (step=0044094) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.62727450596752, LR: 0.0003 +[2026-03-03 09:53:55] (step=0044095) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 8.627470162394834, LR: 0.0003 +[2026-03-03 09:54:03] (step=0044096) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.627665818822148, LR: 0.0003 +[2026-03-03 09:54:11] (step=0044097) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 8.627861475249462, LR: 0.0003 +[2026-03-03 09:54:18] (step=0044098) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 8.628057131676776, LR: 0.0003 +[2026-03-03 09:54:26] (step=0044099) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.628252788104088, LR: 0.0003 +[2026-03-03 09:54:34] (step=0044100) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.628448444531402, LR: 0.0003 +[2026-03-03 09:54:42] (step=0044101) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.628644100958716, LR: 0.0003 +[2026-03-03 09:54:50] (step=0044102) Train Loss: 0.4342, Train Steps/Sec: 0.12, Epoch: 8.62883975738603, LR: 0.0003 +[2026-03-03 09:54:58] (step=0044103) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.629035413813344, LR: 0.0003 +[2026-03-03 09:55:06] (step=0044104) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.629231070240657, LR: 0.0003 +[2026-03-03 09:55:14] (step=0044105) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.62942672666797, LR: 0.0003 +[2026-03-03 09:55:22] (step=0044106) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.629622383095285, LR: 0.0003 +[2026-03-03 09:55:29] (step=0044107) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 8.629818039522599, LR: 0.0003 +[2026-03-03 09:55:37] (step=0044108) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.630013695949913, LR: 0.0003 +[2026-03-03 09:55:45] (step=0044109) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 8.630209352377225, LR: 0.0003 +[2026-03-03 09:55:53] (step=0044110) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 8.630405008804539, LR: 0.0003 +[2026-03-03 09:56:01] (step=0044111) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.630600665231853, LR: 0.0003 +[2026-03-03 09:56:09] (step=0044112) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.630796321659167, LR: 0.0003 +[2026-03-03 09:56:17] (step=0044113) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.630991978086481, LR: 0.0003 +[2026-03-03 09:56:25] (step=0044114) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 8.631187634513793, LR: 0.0003 +[2026-03-03 09:56:32] (step=0044115) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 8.631383290941107, LR: 0.0003 +[2026-03-03 09:56:40] (step=0044116) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.631578947368421, LR: 0.0003 +[2026-03-03 09:56:48] (step=0044117) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.631774603795735, LR: 0.0003 +[2026-03-03 09:56:56] (step=0044118) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.631970260223047, LR: 0.0003 +[2026-03-03 09:57:04] (step=0044119) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.632165916650361, LR: 0.0003 +[2026-03-03 09:57:12] (step=0044120) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 8.632361573077675, LR: 0.0003 +[2026-03-03 09:57:20] (step=0044121) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 8.63255722950499, LR: 0.0003 +[2026-03-03 09:57:28] (step=0044122) Train Loss: 0.4311, Train Steps/Sec: 0.12, Epoch: 8.632752885932303, LR: 0.0003 +[2026-03-03 09:57:36] (step=0044123) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.632948542359616, LR: 0.0003 +[2026-03-03 09:57:43] (step=0044124) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 8.63314419878693, LR: 0.0003 +[2026-03-03 09:57:51] (step=0044125) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.633339855214244, LR: 0.0003 +[2026-03-03 09:57:59] (step=0044126) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.633535511641558, LR: 0.0003 +[2026-03-03 09:58:07] (step=0044127) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 8.633731168068872, LR: 0.0003 +[2026-03-03 09:58:15] (step=0044128) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 8.633926824496184, LR: 0.0003 +[2026-03-03 09:58:23] (step=0044129) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.634122480923498, LR: 0.0003 +[2026-03-03 09:58:31] (step=0044130) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.634318137350812, LR: 0.0003 +[2026-03-03 09:58:38] (step=0044131) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 8.634513793778126, LR: 0.0003 +[2026-03-03 09:58:46] (step=0044132) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.63470945020544, LR: 0.0003 +[2026-03-03 09:58:54] (step=0044133) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.634905106632752, LR: 0.0003 +[2026-03-03 09:59:02] (step=0044134) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 8.635100763060066, LR: 0.0003 +[2026-03-03 09:59:10] (step=0044135) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.63529641948738, LR: 0.0003 +[2026-03-03 09:59:18] (step=0044136) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.635492075914694, LR: 0.0003 +[2026-03-03 09:59:26] (step=0044137) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.635687732342008, LR: 0.0003 +[2026-03-03 09:59:34] (step=0044138) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.63588338876932, LR: 0.0003 +[2026-03-03 09:59:41] (step=0044139) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.636079045196634, LR: 0.0003 +[2026-03-03 09:59:49] (step=0044140) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.636274701623949, LR: 0.0003 +[2026-03-03 09:59:57] (step=0044141) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.636470358051263, LR: 0.0003 +[2026-03-03 10:00:05] (step=0044142) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.636666014478575, LR: 0.0003 +[2026-03-03 10:00:13] (step=0044143) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.636861670905889, LR: 0.0003 +[2026-03-03 10:00:21] (step=0044144) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.637057327333203, LR: 0.0003 +[2026-03-03 10:00:29] (step=0044145) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.637252983760517, LR: 0.0003 +[2026-03-03 10:00:36] (step=0044146) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 8.63744864018783, LR: 0.0003 +[2026-03-03 10:00:44] (step=0044147) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 8.637644296615143, LR: 0.0003 +[2026-03-03 10:00:52] (step=0044148) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.637839953042457, LR: 0.0003 +[2026-03-03 10:01:00] (step=0044149) Train Loss: 0.4505, Train Steps/Sec: 0.12, Epoch: 8.638035609469771, LR: 0.0003 +[2026-03-03 10:01:08] (step=0044150) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.638231265897085, LR: 0.0003 +[2026-03-03 10:01:16] (step=0044151) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 8.638426922324399, LR: 0.0003 +[2026-03-03 10:01:24] (step=0044152) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.638622578751711, LR: 0.0003 +[2026-03-03 10:01:32] (step=0044153) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.638818235179025, LR: 0.0003 +[2026-03-03 10:01:40] (step=0044154) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.63901389160634, LR: 0.0003 +[2026-03-03 10:01:47] (step=0044155) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 8.639209548033653, LR: 0.0003 +[2026-03-03 10:01:55] (step=0044156) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.639405204460967, LR: 0.0003 +[2026-03-03 10:02:03] (step=0044157) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.63960086088828, LR: 0.0003 +[2026-03-03 10:02:11] (step=0044158) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.639796517315594, LR: 0.0003 +[2026-03-03 10:02:19] (step=0044159) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.639992173742908, LR: 0.0003 +[2026-03-03 10:02:27] (step=0044160) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 8.640187830170222, LR: 0.0003 +[2026-03-03 10:02:35] (step=0044161) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 8.640383486597536, LR: 0.0003 +[2026-03-03 10:02:43] (step=0044162) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.640579143024848, LR: 0.0003 +[2026-03-03 10:02:50] (step=0044163) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.640774799452162, LR: 0.0003 +[2026-03-03 10:02:58] (step=0044164) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 8.640970455879476, LR: 0.0003 +[2026-03-03 10:03:06] (step=0044165) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.64116611230679, LR: 0.0003 +[2026-03-03 10:03:14] (step=0044166) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.641361768734104, LR: 0.0003 +[2026-03-03 10:03:22] (step=0044167) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 8.641557425161416, LR: 0.0003 +[2026-03-03 10:03:30] (step=0044168) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 8.64175308158873, LR: 0.0003 +[2026-03-03 10:03:38] (step=0044169) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.641948738016044, LR: 0.0003 +[2026-03-03 10:03:46] (step=0044170) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.642144394443358, LR: 0.0003 +[2026-03-03 10:03:53] (step=0044171) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 8.64234005087067, LR: 0.0003 +[2026-03-03 10:04:01] (step=0044172) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 8.642535707297984, LR: 0.0003 +[2026-03-03 10:04:09] (step=0044173) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.642731363725298, LR: 0.0003 +[2026-03-03 10:04:17] (step=0044174) Train Loss: 0.4475, Train Steps/Sec: 0.12, Epoch: 8.642927020152612, LR: 0.0003 +[2026-03-03 10:04:25] (step=0044175) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 8.643122676579926, LR: 0.0003 +[2026-03-03 10:04:33] (step=0044176) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.643318333007239, LR: 0.0003 +[2026-03-03 10:04:41] (step=0044177) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.643513989434553, LR: 0.0003 +[2026-03-03 10:04:49] (step=0044178) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.643709645861867, LR: 0.0003 +[2026-03-03 10:04:57] (step=0044179) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.64390530228918, LR: 0.0003 +[2026-03-03 10:05:04] (step=0044180) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.644100958716495, LR: 0.0003 +[2026-03-03 10:05:12] (step=0044181) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.644296615143807, LR: 0.0003 +[2026-03-03 10:05:20] (step=0044182) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 8.64449227157112, LR: 0.0003 +[2026-03-03 10:05:28] (step=0044183) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.644687927998435, LR: 0.0003 +[2026-03-03 10:05:36] (step=0044184) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.644883584425749, LR: 0.0003 +[2026-03-03 10:05:44] (step=0044185) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 8.645079240853063, LR: 0.0003 +[2026-03-03 10:05:52] (step=0044186) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.645274897280375, LR: 0.0003 +[2026-03-03 10:05:59] (step=0044187) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.645470553707689, LR: 0.0003 +[2026-03-03 10:06:07] (step=0044188) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.645666210135003, LR: 0.0003 +[2026-03-03 10:06:15] (step=0044189) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.645861866562317, LR: 0.0003 +[2026-03-03 10:06:23] (step=0044190) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.646057522989631, LR: 0.0003 +[2026-03-03 10:06:31] (step=0044191) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 8.646253179416943, LR: 0.0003 +[2026-03-03 10:06:39] (step=0044192) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.646448835844257, LR: 0.0003 +[2026-03-03 10:06:47] (step=0044193) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 8.646644492271571, LR: 0.0003 +[2026-03-03 10:06:55] (step=0044194) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 8.646840148698885, LR: 0.0003 +[2026-03-03 10:07:02] (step=0044195) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.647035805126198, LR: 0.0003 +[2026-03-03 10:07:10] (step=0044196) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.647231461553512, LR: 0.0003 +[2026-03-03 10:07:18] (step=0044197) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.647427117980826, LR: 0.0003 +[2026-03-03 10:07:26] (step=0044198) Train Loss: 0.4497, Train Steps/Sec: 0.12, Epoch: 8.64762277440814, LR: 0.0003 +[2026-03-03 10:07:34] (step=0044199) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 8.647818430835454, LR: 0.0003 +[2026-03-03 10:07:42] (step=0044200) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 8.648014087262766, LR: 0.0003 +[2026-03-03 10:07:50] (step=0044201) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.64820974369008, LR: 0.0003 +[2026-03-03 10:07:58] (step=0044202) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.648405400117394, LR: 0.0003 +[2026-03-03 10:08:06] (step=0044203) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.648601056544708, LR: 0.0003 +[2026-03-03 10:08:14] (step=0044204) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.648796712972022, LR: 0.0003 +[2026-03-03 10:08:21] (step=0044205) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 8.648992369399334, LR: 0.0003 +[2026-03-03 10:08:29] (step=0044206) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.649188025826648, LR: 0.0003 +[2026-03-03 10:08:37] (step=0044207) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.649383682253962, LR: 0.0003 +[2026-03-03 10:08:45] (step=0044208) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.649579338681276, LR: 0.0003 +[2026-03-03 10:08:53] (step=0044209) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.64977499510859, LR: 0.0003 +[2026-03-03 10:09:01] (step=0044210) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 8.649970651535902, LR: 0.0003 +[2026-03-03 10:09:09] (step=0044211) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 8.650166307963216, LR: 0.0003 +[2026-03-03 10:09:16] (step=0044212) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 8.65036196439053, LR: 0.0003 +[2026-03-03 10:09:24] (step=0044213) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.650557620817844, LR: 0.0003 +[2026-03-03 10:09:32] (step=0044214) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.650753277245158, LR: 0.0003 +[2026-03-03 10:09:40] (step=0044215) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.65094893367247, LR: 0.0003 +[2026-03-03 10:09:48] (step=0044216) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 8.651144590099785, LR: 0.0003 +[2026-03-03 10:09:56] (step=0044217) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.651340246527099, LR: 0.0003 +[2026-03-03 10:10:04] (step=0044218) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.651535902954413, LR: 0.0003 +[2026-03-03 10:10:12] (step=0044219) Train Loss: 0.4461, Train Steps/Sec: 0.12, Epoch: 8.651731559381727, LR: 0.0003 +[2026-03-03 10:10:20] (step=0044220) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.651927215809039, LR: 0.0003 +[2026-03-03 10:10:27] (step=0044221) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.652122872236353, LR: 0.0003 +[2026-03-03 10:10:35] (step=0044222) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.652318528663667, LR: 0.0003 +[2026-03-03 10:10:43] (step=0044223) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 8.65251418509098, LR: 0.0003 +[2026-03-03 10:10:51] (step=0044224) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 8.652709841518293, LR: 0.0003 +[2026-03-03 10:10:59] (step=0044225) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.652905497945607, LR: 0.0003 +[2026-03-03 10:11:07] (step=0044226) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 8.653101154372921, LR: 0.0003 +[2026-03-03 10:11:15] (step=0044227) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.653296810800235, LR: 0.0003 +[2026-03-03 10:11:22] (step=0044228) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.653492467227549, LR: 0.0003 +[2026-03-03 10:11:30] (step=0044229) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.653688123654861, LR: 0.0003 +[2026-03-03 10:11:38] (step=0044230) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.653883780082175, LR: 0.0003 +[2026-03-03 10:11:46] (step=0044231) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.65407943650949, LR: 0.0003 +[2026-03-03 10:11:54] (step=0044232) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.654275092936803, LR: 0.0003 +[2026-03-03 10:12:02] (step=0044233) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.654470749364117, LR: 0.0003 +[2026-03-03 10:12:10] (step=0044234) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 8.65466640579143, LR: 0.0003 +[2026-03-03 10:12:18] (step=0044235) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.654862062218744, LR: 0.0003 +[2026-03-03 10:12:25] (step=0044236) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 8.655057718646058, LR: 0.0003 +[2026-03-03 10:12:33] (step=0044237) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 8.655253375073372, LR: 0.0003 +[2026-03-03 10:12:41] (step=0044238) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.655449031500686, LR: 0.0003 +[2026-03-03 10:12:49] (step=0044239) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 8.655644687927998, LR: 0.0003 +[2026-03-03 10:12:57] (step=0044240) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.655840344355312, LR: 0.0003 +[2026-03-03 10:13:05] (step=0044241) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 8.656036000782626, LR: 0.0003 +[2026-03-03 10:13:13] (step=0044242) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.65623165720994, LR: 0.0003 +[2026-03-03 10:13:20] (step=0044243) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.656427313637254, LR: 0.0003 +[2026-03-03 10:13:28] (step=0044244) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.656622970064566, LR: 0.0003 +[2026-03-03 10:13:36] (step=0044245) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.65681862649188, LR: 0.0003 +[2026-03-03 10:13:44] (step=0044246) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.657014282919194, LR: 0.0003 +[2026-03-03 10:13:52] (step=0044247) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.657209939346508, LR: 0.0003 +[2026-03-03 10:14:00] (step=0044248) Train Loss: 0.4501, Train Steps/Sec: 0.12, Epoch: 8.65740559577382, LR: 0.0003 +[2026-03-03 10:14:08] (step=0044249) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.657601252201134, LR: 0.0003 +[2026-03-03 10:14:16] (step=0044250) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.657796908628448, LR: 0.0003 +[2026-03-03 10:14:24] (step=0044251) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.657992565055762, LR: 0.0003 +[2026-03-03 10:14:31] (step=0044252) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.658188221483076, LR: 0.0003 +[2026-03-03 10:14:39] (step=0044253) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 8.658383877910389, LR: 0.0003 +[2026-03-03 10:14:47] (step=0044254) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 8.658579534337703, LR: 0.0003 +[2026-03-03 10:14:55] (step=0044255) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.658775190765017, LR: 0.0003 +[2026-03-03 10:15:03] (step=0044256) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.65897084719233, LR: 0.0003 +[2026-03-03 10:15:11] (step=0044257) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.659166503619645, LR: 0.0003 +[2026-03-03 10:15:19] (step=0044258) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 8.659362160046957, LR: 0.0003 +[2026-03-03 10:15:26] (step=0044259) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.65955781647427, LR: 0.0003 +[2026-03-03 10:15:34] (step=0044260) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.659753472901585, LR: 0.0003 +[2026-03-03 10:15:42] (step=0044261) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 8.659949129328899, LR: 0.0003 +[2026-03-03 10:15:50] (step=0044262) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.660144785756213, LR: 0.0003 +[2026-03-03 10:15:58] (step=0044263) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.660340442183525, LR: 0.0003 +[2026-03-03 10:16:06] (step=0044264) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.660536098610839, LR: 0.0003 +[2026-03-03 10:16:14] (step=0044265) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 8.660731755038153, LR: 0.0003 +[2026-03-03 10:16:22] (step=0044266) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 8.660927411465467, LR: 0.0003 +[2026-03-03 10:16:29] (step=0044267) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.661123067892781, LR: 0.0003 +[2026-03-03 10:16:37] (step=0044268) Train Loss: 0.4452, Train Steps/Sec: 0.12, Epoch: 8.661318724320093, LR: 0.0003 +[2026-03-03 10:16:45] (step=0044269) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.661514380747407, LR: 0.0003 +[2026-03-03 10:16:53] (step=0044270) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 8.661710037174721, LR: 0.0003 +[2026-03-03 10:17:01] (step=0044271) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 8.661905693602035, LR: 0.0003 +[2026-03-03 10:17:09] (step=0044272) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.66210135002935, LR: 0.0003 +[2026-03-03 10:17:17] (step=0044273) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.662297006456662, LR: 0.0003 +[2026-03-03 10:17:25] (step=0044274) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.662492662883976, LR: 0.0003 +[2026-03-03 10:17:33] (step=0044275) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.66268831931129, LR: 0.0003 +[2026-03-03 10:17:40] (step=0044276) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.662883975738604, LR: 0.0003 +[2026-03-03 10:17:48] (step=0044277) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.663079632165916, LR: 0.0003 +[2026-03-03 10:17:56] (step=0044278) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 8.66327528859323, LR: 0.0003 +[2026-03-03 10:18:04] (step=0044279) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.663470945020544, LR: 0.0003 +[2026-03-03 10:18:12] (step=0044280) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.663666601447858, LR: 0.0003 +[2026-03-03 10:18:20] (step=0044281) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 8.663862257875172, LR: 0.0003 +[2026-03-03 10:18:28] (step=0044282) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.664057914302484, LR: 0.0003 +[2026-03-03 10:18:35] (step=0044283) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 8.664253570729798, LR: 0.0003 +[2026-03-03 10:18:43] (step=0044284) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 8.664449227157112, LR: 0.0003 +[2026-03-03 10:18:51] (step=0044285) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 8.664644883584426, LR: 0.0003 +[2026-03-03 10:18:59] (step=0044286) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.66484054001174, LR: 0.0003 +[2026-03-03 10:19:07] (step=0044287) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.665036196439052, LR: 0.0003 +[2026-03-03 10:19:15] (step=0044288) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.665231852866366, LR: 0.0003 +[2026-03-03 10:19:23] (step=0044289) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 8.66542750929368, LR: 0.0003 +[2026-03-03 10:19:30] (step=0044290) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.665623165720994, LR: 0.0003 +[2026-03-03 10:19:38] (step=0044291) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.665818822148308, LR: 0.0003 +[2026-03-03 10:19:46] (step=0044292) Train Loss: 0.4430, Train Steps/Sec: 0.12, Epoch: 8.66601447857562, LR: 0.0003 +[2026-03-03 10:19:54] (step=0044293) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.666210135002935, LR: 0.0003 +[2026-03-03 10:20:02] (step=0044294) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.666405791430249, LR: 0.0003 +[2026-03-03 10:20:10] (step=0044295) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.666601447857563, LR: 0.0003 +[2026-03-03 10:20:18] (step=0044296) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.666797104284877, LR: 0.0003 +[2026-03-03 10:20:26] (step=0044297) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 8.666992760712189, LR: 0.0003 +[2026-03-03 10:20:34] (step=0044298) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.667188417139503, LR: 0.0003 +[2026-03-03 10:20:42] (step=0044299) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.667384073566817, LR: 0.0003 +[2026-03-03 10:20:49] (step=0044300) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.66757972999413, LR: 0.0003 +[2026-03-03 10:20:57] (step=0044301) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.667775386421443, LR: 0.0003 +[2026-03-03 10:21:05] (step=0044302) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.667971042848757, LR: 0.0003 +[2026-03-03 10:21:13] (step=0044303) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.668166699276071, LR: 0.0003 +[2026-03-03 10:21:21] (step=0044304) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.668362355703385, LR: 0.0003 +[2026-03-03 10:21:29] (step=0044305) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 8.668558012130699, LR: 0.0003 +[2026-03-03 10:21:37] (step=0044306) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.668753668558011, LR: 0.0003 +[2026-03-03 10:21:44] (step=0044307) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 8.668949324985325, LR: 0.0003 +[2026-03-03 10:21:52] (step=0044308) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.66914498141264, LR: 0.0003 +[2026-03-03 10:22:00] (step=0044309) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.669340637839953, LR: 0.0003 +[2026-03-03 10:22:08] (step=0044310) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.669536294267267, LR: 0.0003 +[2026-03-03 10:22:16] (step=0044311) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.66973195069458, LR: 0.0003 +[2026-03-03 10:22:24] (step=0044312) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.669927607121894, LR: 0.0003 +[2026-03-03 10:22:32] (step=0044313) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.670123263549208, LR: 0.0003 +[2026-03-03 10:22:39] (step=0044314) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.670318919976522, LR: 0.0003 +[2026-03-03 10:22:47] (step=0044315) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.670514576403836, LR: 0.0003 +[2026-03-03 10:22:55] (step=0044316) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.670710232831148, LR: 0.0003 +[2026-03-03 10:23:03] (step=0044317) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.670905889258462, LR: 0.0003 +[2026-03-03 10:23:11] (step=0044318) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 8.671101545685776, LR: 0.0003 +[2026-03-03 10:23:19] (step=0044319) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.67129720211309, LR: 0.0003 +[2026-03-03 10:23:27] (step=0044320) Train Loss: 0.4270, Train Steps/Sec: 0.13, Epoch: 8.671492858540404, LR: 0.0003 +[2026-03-03 10:23:35] (step=0044321) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 8.671688514967716, LR: 0.0003 +[2026-03-03 10:23:43] (step=0044322) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.67188417139503, LR: 0.0003 +[2026-03-03 10:23:50] (step=0044323) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 8.672079827822344, LR: 0.0003 +[2026-03-03 10:23:58] (step=0044324) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 8.672275484249658, LR: 0.0003 +[2026-03-03 10:24:06] (step=0044325) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.672471140676972, LR: 0.0003 +[2026-03-03 10:24:14] (step=0044326) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 8.672666797104284, LR: 0.0003 +[2026-03-03 10:24:22] (step=0044327) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 8.672862453531598, LR: 0.0003 +[2026-03-03 10:24:30] (step=0044328) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.673058109958912, LR: 0.0003 +[2026-03-03 10:24:38] (step=0044329) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 8.673253766386226, LR: 0.0003 +[2026-03-03 10:24:46] (step=0044330) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.673449422813539, LR: 0.0003 +[2026-03-03 10:24:53] (step=0044331) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.673645079240853, LR: 0.0003 +[2026-03-03 10:25:01] (step=0044332) Train Loss: 0.4229, Train Steps/Sec: 0.13, Epoch: 8.673840735668167, LR: 0.0003 +[2026-03-03 10:25:09] (step=0044333) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 8.67403639209548, LR: 0.0003 +[2026-03-03 10:25:17] (step=0044334) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.674232048522795, LR: 0.0003 +[2026-03-03 10:25:25] (step=0044335) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.674427704950107, LR: 0.0003 +[2026-03-03 10:25:33] (step=0044336) Train Loss: 0.4360, Train Steps/Sec: 0.12, Epoch: 8.67462336137742, LR: 0.0003 +[2026-03-03 10:25:41] (step=0044337) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.674819017804735, LR: 0.0003 +[2026-03-03 10:25:49] (step=0044338) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.675014674232049, LR: 0.0003 +[2026-03-03 10:25:57] (step=0044339) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.675210330659363, LR: 0.0003 +[2026-03-03 10:26:04] (step=0044340) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.675405987086675, LR: 0.0003 +[2026-03-03 10:26:12] (step=0044341) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.675601643513989, LR: 0.0003 +[2026-03-03 10:26:20] (step=0044342) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.675797299941303, LR: 0.0003 +[2026-03-03 10:26:28] (step=0044343) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.675992956368617, LR: 0.0003 +[2026-03-03 10:26:36] (step=0044344) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.676188612795931, LR: 0.0003 +[2026-03-03 10:26:44] (step=0044345) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.676384269223243, LR: 0.0003 +[2026-03-03 10:26:52] (step=0044346) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.676579925650557, LR: 0.0003 +[2026-03-03 10:26:59] (step=0044347) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.676775582077871, LR: 0.0003 +[2026-03-03 10:27:07] (step=0044348) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.676971238505185, LR: 0.0003 +[2026-03-03 10:27:15] (step=0044349) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.6771668949325, LR: 0.0003 +[2026-03-03 10:27:23] (step=0044350) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.677362551359812, LR: 0.0003 +[2026-03-03 10:27:31] (step=0044351) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.677558207787126, LR: 0.0003 +[2026-03-03 10:27:39] (step=0044352) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.67775386421444, LR: 0.0003 +[2026-03-03 10:27:47] (step=0044353) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.677949520641754, LR: 0.0003 +[2026-03-03 10:27:55] (step=0044354) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.678145177069066, LR: 0.0003 +[2026-03-03 10:28:02] (step=0044355) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.67834083349638, LR: 0.0003 +[2026-03-03 10:28:10] (step=0044356) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 8.678536489923694, LR: 0.0003 +[2026-03-03 10:28:18] (step=0044357) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.678732146351008, LR: 0.0003 +[2026-03-03 10:28:26] (step=0044358) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.678927802778322, LR: 0.0003 +[2026-03-03 10:28:34] (step=0044359) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.679123459205634, LR: 0.0003 +[2026-03-03 10:28:42] (step=0044360) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.679319115632948, LR: 0.0003 +[2026-03-03 10:28:50] (step=0044361) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.679514772060262, LR: 0.0003 +[2026-03-03 10:28:57] (step=0044362) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.679710428487576, LR: 0.0003 +[2026-03-03 10:29:05] (step=0044363) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.67990608491489, LR: 0.0003 +[2026-03-03 10:29:13] (step=0044364) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.680101741342202, LR: 0.0003 +[2026-03-03 10:29:21] (step=0044365) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.680297397769516, LR: 0.0003 +[2026-03-03 10:29:29] (step=0044366) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 8.68049305419683, LR: 0.0003 +[2026-03-03 10:29:37] (step=0044367) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.680688710624144, LR: 0.0003 +[2026-03-03 10:29:45] (step=0044368) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.680884367051458, LR: 0.0003 +[2026-03-03 10:29:53] (step=0044369) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.68108002347877, LR: 0.0003 +[2026-03-03 10:30:01] (step=0044370) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.681275679906085, LR: 0.0003 +[2026-03-03 10:30:08] (step=0044371) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.681471336333399, LR: 0.0003 +[2026-03-03 10:30:16] (step=0044372) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.681666992760713, LR: 0.0003 +[2026-03-03 10:30:24] (step=0044373) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.681862649188027, LR: 0.0003 +[2026-03-03 10:30:32] (step=0044374) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 8.682058305615339, LR: 0.0003 +[2026-03-03 10:30:40] (step=0044375) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.682253962042653, LR: 0.0003 +[2026-03-03 10:30:48] (step=0044376) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.682449618469967, LR: 0.0003 +[2026-03-03 10:30:56] (step=0044377) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 8.68264527489728, LR: 0.0003 +[2026-03-03 10:31:03] (step=0044378) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 8.682840931324595, LR: 0.0003 +[2026-03-03 10:31:11] (step=0044379) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 8.683036587751907, LR: 0.0003 +[2026-03-03 10:31:19] (step=0044380) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 8.683232244179221, LR: 0.0003 +[2026-03-03 10:31:27] (step=0044381) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.683427900606535, LR: 0.0003 +[2026-03-03 10:31:35] (step=0044382) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.68362355703385, LR: 0.0003 +[2026-03-03 10:31:43] (step=0044383) Train Loss: 0.4449, Train Steps/Sec: 0.12, Epoch: 8.683819213461161, LR: 0.0003 +[2026-03-03 10:31:51] (step=0044384) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.684014869888475, LR: 0.0003 +[2026-03-03 10:31:59] (step=0044385) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.68421052631579, LR: 0.0003 +[2026-03-03 10:32:07] (step=0044386) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.684406182743103, LR: 0.0003 +[2026-03-03 10:32:14] (step=0044387) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 8.684601839170417, LR: 0.0003 +[2026-03-03 10:32:22] (step=0044388) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.68479749559773, LR: 0.0003 +[2026-03-03 10:32:30] (step=0044389) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.684993152025044, LR: 0.0003 +[2026-03-03 10:32:38] (step=0044390) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.685188808452358, LR: 0.0003 +[2026-03-03 10:32:46] (step=0044391) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.685384464879672, LR: 0.0003 +[2026-03-03 10:32:54] (step=0044392) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.685580121306986, LR: 0.0003 +[2026-03-03 10:33:02] (step=0044393) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.685775777734298, LR: 0.0003 +[2026-03-03 10:33:09] (step=0044394) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.685971434161612, LR: 0.0003 +[2026-03-03 10:33:17] (step=0044395) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 8.686167090588926, LR: 0.0003 +[2026-03-03 10:33:25] (step=0044396) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.68636274701624, LR: 0.0003 +[2026-03-03 10:33:33] (step=0044397) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.686558403443554, LR: 0.0003 +[2026-03-03 10:33:41] (step=0044398) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.686754059870866, LR: 0.0003 +[2026-03-03 10:33:49] (step=0044399) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.68694971629818, LR: 0.0003 +[2026-03-03 10:33:57] (step=0044400) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.687145372725494, LR: 0.0003 +[2026-03-03 10:34:05] (step=0044401) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.687341029152808, LR: 0.0003 +[2026-03-03 10:34:12] (step=0044402) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.687536685580122, LR: 0.0003 +[2026-03-03 10:34:20] (step=0044403) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.687732342007434, LR: 0.0003 +[2026-03-03 10:34:28] (step=0044404) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.687927998434748, LR: 0.0003 +[2026-03-03 10:34:36] (step=0044405) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.688123654862062, LR: 0.0003 +[2026-03-03 10:34:44] (step=0044406) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 8.688319311289376, LR: 0.0003 +[2026-03-03 10:34:52] (step=0044407) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.688514967716689, LR: 0.0003 +[2026-03-03 10:35:00] (step=0044408) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.688710624144003, LR: 0.0003 +[2026-03-03 10:35:08] (step=0044409) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.688906280571317, LR: 0.0003 +[2026-03-03 10:35:15] (step=0044410) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 8.68910193699863, LR: 0.0003 +[2026-03-03 10:35:23] (step=0044411) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.689297593425945, LR: 0.0003 +[2026-03-03 10:35:31] (step=0044412) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.689493249853257, LR: 0.0003 +[2026-03-03 10:35:39] (step=0044413) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.689688906280571, LR: 0.0003 +[2026-03-03 10:35:47] (step=0044414) Train Loss: 0.4385, Train Steps/Sec: 0.12, Epoch: 8.689884562707885, LR: 0.0003 +[2026-03-03 10:35:55] (step=0044415) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 8.690080219135199, LR: 0.0003 +[2026-03-03 10:36:03] (step=0044416) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.690275875562513, LR: 0.0003 +[2026-03-03 10:36:11] (step=0044417) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.690471531989825, LR: 0.0003 +[2026-03-03 10:36:18] (step=0044418) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.69066718841714, LR: 0.0003 +[2026-03-03 10:36:26] (step=0044419) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.690862844844453, LR: 0.0003 +[2026-03-03 10:36:34] (step=0044420) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 8.691058501271767, LR: 0.0003 +[2026-03-03 10:36:42] (step=0044421) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.691254157699081, LR: 0.0003 +[2026-03-03 10:36:50] (step=0044422) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 8.691449814126393, LR: 0.0003 +[2026-03-03 10:36:58] (step=0044423) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 8.691645470553707, LR: 0.0003 +[2026-03-03 10:37:06] (step=0044424) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.691841126981021, LR: 0.0003 +[2026-03-03 10:37:13] (step=0044425) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.692036783408335, LR: 0.0003 +[2026-03-03 10:37:21] (step=0044426) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 8.69223243983565, LR: 0.0003 +[2026-03-03 10:37:29] (step=0044427) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.692428096262962, LR: 0.0003 +[2026-03-03 10:37:37] (step=0044428) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 8.692623752690276, LR: 0.0003 +[2026-03-03 10:37:45] (step=0044429) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.69281940911759, LR: 0.0003 +[2026-03-03 10:37:53] (step=0044430) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.693015065544904, LR: 0.0003 +[2026-03-03 10:38:01] (step=0044431) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.693210721972218, LR: 0.0003 +[2026-03-03 10:38:09] (step=0044432) Train Loss: 0.4545, Train Steps/Sec: 0.12, Epoch: 8.69340637839953, LR: 0.0003 +[2026-03-03 10:38:17] (step=0044433) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.693602034826844, LR: 0.0003 +[2026-03-03 10:38:24] (step=0044434) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.693797691254158, LR: 0.0003 +[2026-03-03 10:38:32] (step=0044435) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.693993347681472, LR: 0.0003 +[2026-03-03 10:38:40] (step=0044436) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 8.694189004108784, LR: 0.0003 +[2026-03-03 10:38:48] (step=0044437) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.694384660536098, LR: 0.0003 +[2026-03-03 10:38:56] (step=0044438) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.694580316963412, LR: 0.0003 +[2026-03-03 10:39:04] (step=0044439) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 8.694775973390726, LR: 0.0003 +[2026-03-03 10:39:12] (step=0044440) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.69497162981804, LR: 0.0003 +[2026-03-03 10:39:20] (step=0044441) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.695167286245352, LR: 0.0003 +[2026-03-03 10:39:27] (step=0044442) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.695362942672666, LR: 0.0003 +[2026-03-03 10:39:35] (step=0044443) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.69555859909998, LR: 0.0003 +[2026-03-03 10:39:43] (step=0044444) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.695754255527294, LR: 0.0003 +[2026-03-03 10:39:51] (step=0044445) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.695949911954608, LR: 0.0003 +[2026-03-03 10:39:59] (step=0044446) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 8.69614556838192, LR: 0.0003 +[2026-03-03 10:40:07] (step=0044447) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.696341224809235, LR: 0.0003 +[2026-03-03 10:40:15] (step=0044448) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 8.696536881236549, LR: 0.0003 +[2026-03-03 10:40:23] (step=0044449) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 8.696732537663863, LR: 0.0003 +[2026-03-03 10:40:30] (step=0044450) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.696928194091177, LR: 0.0003 +[2026-03-03 10:40:38] (step=0044451) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.697123850518489, LR: 0.0003 +[2026-03-03 10:40:46] (step=0044452) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.697319506945803, LR: 0.0003 +[2026-03-03 10:40:54] (step=0044453) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.697515163373117, LR: 0.0003 +[2026-03-03 10:41:02] (step=0044454) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.697710819800431, LR: 0.0003 +[2026-03-03 10:41:10] (step=0044455) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.697906476227745, LR: 0.0003 +[2026-03-03 10:41:18] (step=0044456) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.698102132655057, LR: 0.0003 +[2026-03-03 10:41:25] (step=0044457) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.698297789082371, LR: 0.0003 +[2026-03-03 10:41:33] (step=0044458) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.698493445509685, LR: 0.0003 +[2026-03-03 10:41:41] (step=0044459) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.698689101937, LR: 0.0003 +[2026-03-03 10:41:49] (step=0044460) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.698884758364311, LR: 0.0003 +[2026-03-03 10:41:57] (step=0044461) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 8.699080414791625, LR: 0.0003 +[2026-03-03 10:42:05] (step=0044462) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 8.69927607121894, LR: 0.0003 +[2026-03-03 10:42:13] (step=0044463) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 8.699471727646253, LR: 0.0003 +[2026-03-03 10:42:20] (step=0044464) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.699667384073567, LR: 0.0003 +[2026-03-03 10:42:28] (step=0044465) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.69986304050088, LR: 0.0003 +[2026-03-03 10:42:36] (step=0044466) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.700058696928194, LR: 0.0003 +[2026-03-03 10:42:44] (step=0044467) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.700254353355508, LR: 0.0003 +[2026-03-03 10:42:52] (step=0044468) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.700450009782822, LR: 0.0003 +[2026-03-03 10:43:00] (step=0044469) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 8.700645666210136, LR: 0.0003 +[2026-03-03 10:43:08] (step=0044470) Train Loss: 0.4397, Train Steps/Sec: 0.12, Epoch: 8.700841322637448, LR: 0.0003 +[2026-03-03 10:43:16] (step=0044471) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.701036979064762, LR: 0.0003 +[2026-03-03 10:43:24] (step=0044472) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.701232635492076, LR: 0.0003 +[2026-03-03 10:43:31] (step=0044473) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.70142829191939, LR: 0.0003 +[2026-03-03 10:43:39] (step=0044474) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.701623948346704, LR: 0.0003 +[2026-03-03 10:43:47] (step=0044475) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.701819604774016, LR: 0.0003 +[2026-03-03 10:43:55] (step=0044476) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.70201526120133, LR: 0.0003 +[2026-03-03 10:44:03] (step=0044477) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 8.702210917628644, LR: 0.0003 +[2026-03-03 10:44:11] (step=0044478) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 8.702406574055958, LR: 0.0003 +[2026-03-03 10:44:19] (step=0044479) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.702602230483272, LR: 0.0003 +[2026-03-03 10:44:27] (step=0044480) Train Loss: 0.4488, Train Steps/Sec: 0.12, Epoch: 8.702797886910584, LR: 0.0003 +[2026-03-03 10:44:34] (step=0044481) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 8.702993543337898, LR: 0.0003 +[2026-03-03 10:44:42] (step=0044482) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.703189199765212, LR: 0.0003 +[2026-03-03 10:44:50] (step=0044483) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.703384856192526, LR: 0.0003 +[2026-03-03 10:44:58] (step=0044484) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.70358051261984, LR: 0.0003 +[2026-03-03 10:45:06] (step=0044485) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.703776169047153, LR: 0.0003 +[2026-03-03 10:45:14] (step=0044486) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.703971825474467, LR: 0.0003 +[2026-03-03 10:45:22] (step=0044487) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.70416748190178, LR: 0.0003 +[2026-03-03 10:45:30] (step=0044488) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.704363138329095, LR: 0.0003 +[2026-03-03 10:45:37] (step=0044489) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.704558794756407, LR: 0.0003 +[2026-03-03 10:45:45] (step=0044490) Train Loss: 0.4203, Train Steps/Sec: 0.13, Epoch: 8.704754451183721, LR: 0.0003 +[2026-03-03 10:45:53] (step=0044491) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.704950107611035, LR: 0.0003 +[2026-03-03 10:46:01] (step=0044492) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 8.705145764038349, LR: 0.0003 +[2026-03-03 10:46:09] (step=0044493) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.705341420465663, LR: 0.0003 +[2026-03-03 10:46:17] (step=0044494) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.705537076892975, LR: 0.0003 +[2026-03-03 10:46:25] (step=0044495) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.70573273332029, LR: 0.0003 +[2026-03-03 10:46:33] (step=0044496) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 8.705928389747603, LR: 0.0003 +[2026-03-03 10:46:40] (step=0044497) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 8.706124046174917, LR: 0.0003 +[2026-03-03 10:46:48] (step=0044498) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.706319702602231, LR: 0.0003 +[2026-03-03 10:46:56] (step=0044499) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.706515359029543, LR: 0.0003 +[2026-03-03 10:47:04] (step=0044500) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.706711015456857, LR: 0.0003 +[2026-03-03 10:47:04] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0044500/ +[2026-03-03 10:47:12] (step=0044501) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.706906671884171, LR: 0.0003 +[2026-03-03 10:47:20] (step=0044502) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.707102328311485, LR: 0.0003 +[2026-03-03 10:47:28] (step=0044503) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.7072979847388, LR: 0.0003 +[2026-03-03 10:47:35] (step=0044504) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.707493641166112, LR: 0.0003 +[2026-03-03 10:47:43] (step=0044505) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.707689297593426, LR: 0.0003 +[2026-03-03 10:47:51] (step=0044506) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 8.70788495402074, LR: 0.0003 +[2026-03-03 10:47:59] (step=0044507) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.708080610448054, LR: 0.0003 +[2026-03-03 10:48:07] (step=0044508) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.708276266875368, LR: 0.0003 +[2026-03-03 10:48:15] (step=0044509) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.70847192330268, LR: 0.0003 +[2026-03-03 10:48:23] (step=0044510) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 8.708667579729994, LR: 0.0003 +[2026-03-03 10:48:31] (step=0044511) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 8.708863236157308, LR: 0.0003 +[2026-03-03 10:48:38] (step=0044512) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 8.709058892584622, LR: 0.0003 +[2026-03-03 10:48:46] (step=0044513) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 8.709254549011934, LR: 0.0003 +[2026-03-03 10:48:54] (step=0044514) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.709450205439248, LR: 0.0003 +[2026-03-03 10:49:02] (step=0044515) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.709645861866562, LR: 0.0003 +[2026-03-03 10:49:10] (step=0044516) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 8.709841518293876, LR: 0.0003 +[2026-03-03 10:49:18] (step=0044517) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 8.71003717472119, LR: 0.0003 +[2026-03-03 10:49:26] (step=0044518) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.710232831148502, LR: 0.0003 +[2026-03-03 10:49:34] (step=0044519) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 8.710428487575816, LR: 0.0003 +[2026-03-03 10:49:41] (step=0044520) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.71062414400313, LR: 0.0003 +[2026-03-03 10:49:49] (step=0044521) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.710819800430444, LR: 0.0003 +[2026-03-03 10:49:57] (step=0044522) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.711015456857758, LR: 0.0003 +[2026-03-03 10:50:05] (step=0044523) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 8.71121111328507, LR: 0.0003 +[2026-03-03 10:50:13] (step=0044524) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.711406769712385, LR: 0.0003 +[2026-03-03 10:50:21] (step=0044525) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.711602426139699, LR: 0.0003 +[2026-03-03 10:50:29] (step=0044526) Train Loss: 0.4351, Train Steps/Sec: 0.12, Epoch: 8.711798082567013, LR: 0.0003 +[2026-03-03 10:50:37] (step=0044527) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 8.711993738994327, LR: 0.0003 +[2026-03-03 10:50:45] (step=0044528) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.712189395421639, LR: 0.0003 +[2026-03-03 10:50:53] (step=0044529) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 8.712385051848953, LR: 0.0003 +[2026-03-03 10:51:00] (step=0044530) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.712580708276267, LR: 0.0003 +[2026-03-03 10:51:08] (step=0044531) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.712776364703581, LR: 0.0003 +[2026-03-03 10:51:16] (step=0044532) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.712972021130895, LR: 0.0003 +[2026-03-03 10:51:24] (step=0044533) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.713167677558207, LR: 0.0003 +[2026-03-03 10:51:32] (step=0044534) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.713363333985521, LR: 0.0003 +[2026-03-03 10:51:40] (step=0044535) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.713558990412835, LR: 0.0003 +[2026-03-03 10:51:48] (step=0044536) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.71375464684015, LR: 0.0003 +[2026-03-03 10:51:55] (step=0044537) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.713950303267461, LR: 0.0003 +[2026-03-03 10:52:03] (step=0044538) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.714145959694775, LR: 0.0003 +[2026-03-03 10:52:11] (step=0044539) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.71434161612209, LR: 0.0003 +[2026-03-03 10:52:19] (step=0044540) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 8.714537272549403, LR: 0.0003 +[2026-03-03 10:52:27] (step=0044541) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.714732928976717, LR: 0.0003 +[2026-03-03 10:52:35] (step=0044542) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.71492858540403, LR: 0.0003 +[2026-03-03 10:52:43] (step=0044543) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 8.715124241831344, LR: 0.0003 +[2026-03-03 10:52:50] (step=0044544) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.715319898258658, LR: 0.0003 +[2026-03-03 10:52:58] (step=0044545) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.715515554685972, LR: 0.0003 +[2026-03-03 10:53:06] (step=0044546) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 8.715711211113286, LR: 0.0003 +[2026-03-03 10:53:14] (step=0044547) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.715906867540598, LR: 0.0003 +[2026-03-03 10:53:22] (step=0044548) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.716102523967912, LR: 0.0003 +[2026-03-03 10:53:30] (step=0044549) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.716298180395226, LR: 0.0003 +[2026-03-03 10:53:38] (step=0044550) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.71649383682254, LR: 0.0003 +[2026-03-03 10:53:46] (step=0044551) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.716689493249854, LR: 0.0003 +[2026-03-03 10:53:53] (step=0044552) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.716885149677166, LR: 0.0003 +[2026-03-03 10:54:01] (step=0044553) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.71708080610448, LR: 0.0003 +[2026-03-03 10:54:09] (step=0044554) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.717276462531794, LR: 0.0003 +[2026-03-03 10:54:17] (step=0044555) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 8.717472118959108, LR: 0.0003 +[2026-03-03 10:54:25] (step=0044556) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.717667775386422, LR: 0.0003 +[2026-03-03 10:54:33] (step=0044557) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.717863431813734, LR: 0.0003 +[2026-03-03 10:54:41] (step=0044558) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 8.718059088241048, LR: 0.0003 +[2026-03-03 10:54:48] (step=0044559) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.718254744668362, LR: 0.0003 +[2026-03-03 10:54:56] (step=0044560) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 8.718450401095676, LR: 0.0003 +[2026-03-03 10:55:04] (step=0044561) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 8.71864605752299, LR: 0.0003 +[2026-03-03 10:55:12] (step=0044562) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.718841713950303, LR: 0.0003 +[2026-03-03 10:55:20] (step=0044563) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.719037370377617, LR: 0.0003 +[2026-03-03 10:55:28] (step=0044564) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 8.71923302680493, LR: 0.0003 +[2026-03-03 10:55:36] (step=0044565) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.719428683232245, LR: 0.0003 +[2026-03-03 10:55:44] (step=0044566) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.719624339659557, LR: 0.0003 +[2026-03-03 10:55:51] (step=0044567) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.719819996086871, LR: 0.0003 +[2026-03-03 10:55:59] (step=0044568) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.720015652514185, LR: 0.0003 +[2026-03-03 10:56:07] (step=0044569) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.720211308941499, LR: 0.0003 +[2026-03-03 10:56:15] (step=0044570) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.720406965368813, LR: 0.0003 +[2026-03-03 10:56:23] (step=0044571) Train Loss: 0.4482, Train Steps/Sec: 0.12, Epoch: 8.720602621796125, LR: 0.0003 +[2026-03-03 10:56:31] (step=0044572) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.72079827822344, LR: 0.0003 +[2026-03-03 10:56:39] (step=0044573) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.720993934650753, LR: 0.0003 +[2026-03-03 10:56:47] (step=0044574) Train Loss: 0.4504, Train Steps/Sec: 0.12, Epoch: 8.721189591078067, LR: 0.0003 +[2026-03-03 10:56:55] (step=0044575) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.721385247505381, LR: 0.0003 +[2026-03-03 10:57:03] (step=0044576) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.721580903932693, LR: 0.0003 +[2026-03-03 10:57:10] (step=0044577) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.721776560360007, LR: 0.0003 +[2026-03-03 10:57:18] (step=0044578) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.721972216787321, LR: 0.0003 +[2026-03-03 10:57:26] (step=0044579) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 8.722167873214635, LR: 0.0003 +[2026-03-03 10:57:34] (step=0044580) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 8.72236352964195, LR: 0.0003 +[2026-03-03 10:57:42] (step=0044581) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.722559186069262, LR: 0.0003 +[2026-03-03 10:57:50] (step=0044582) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.722754842496576, LR: 0.0003 +[2026-03-03 10:57:58] (step=0044583) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.72295049892389, LR: 0.0003 +[2026-03-03 10:58:05] (step=0044584) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.723146155351204, LR: 0.0003 +[2026-03-03 10:58:13] (step=0044585) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.723341811778518, LR: 0.0003 +[2026-03-03 10:58:21] (step=0044586) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.72353746820583, LR: 0.0003 +[2026-03-03 10:58:29] (step=0044587) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.723733124633144, LR: 0.0003 +[2026-03-03 10:58:37] (step=0044588) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.723928781060458, LR: 0.0003 +[2026-03-03 10:58:45] (step=0044589) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.724124437487772, LR: 0.0003 +[2026-03-03 10:58:53] (step=0044590) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.724320093915084, LR: 0.0003 +[2026-03-03 10:59:01] (step=0044591) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.724515750342398, LR: 0.0003 +[2026-03-03 10:59:08] (step=0044592) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.724711406769712, LR: 0.0003 +[2026-03-03 10:59:16] (step=0044593) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.724907063197026, LR: 0.0003 +[2026-03-03 10:59:24] (step=0044594) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.72510271962434, LR: 0.0003 +[2026-03-03 10:59:32] (step=0044595) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.725298376051652, LR: 0.0003 +[2026-03-03 10:59:40] (step=0044596) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.725494032478966, LR: 0.0003 +[2026-03-03 10:59:48] (step=0044597) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 8.72568968890628, LR: 0.0003 +[2026-03-03 10:59:56] (step=0044598) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.725885345333595, LR: 0.0003 +[2026-03-03 11:00:03] (step=0044599) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.726081001760909, LR: 0.0003 +[2026-03-03 11:00:11] (step=0044600) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.72627665818822, LR: 0.0003 +[2026-03-03 11:00:19] (step=0044601) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.726472314615535, LR: 0.0003 +[2026-03-03 11:00:27] (step=0044602) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.726667971042849, LR: 0.0003 +[2026-03-03 11:00:35] (step=0044603) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.726863627470163, LR: 0.0003 +[2026-03-03 11:00:43] (step=0044604) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.727059283897477, LR: 0.0003 +[2026-03-03 11:00:51] (step=0044605) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 8.727254940324789, LR: 0.0003 +[2026-03-03 11:00:58] (step=0044606) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 8.727450596752103, LR: 0.0003 +[2026-03-03 11:01:06] (step=0044607) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.727646253179417, LR: 0.0003 +[2026-03-03 11:01:14] (step=0044608) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.727841909606731, LR: 0.0003 +[2026-03-03 11:01:22] (step=0044609) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.728037566034045, LR: 0.0003 +[2026-03-03 11:01:30] (step=0044610) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 8.728233222461357, LR: 0.0003 +[2026-03-03 11:01:38] (step=0044611) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.728428878888671, LR: 0.0003 +[2026-03-03 11:01:46] (step=0044612) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 8.728624535315985, LR: 0.0003 +[2026-03-03 11:01:53] (step=0044613) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.7288201917433, LR: 0.0003 +[2026-03-03 11:02:01] (step=0044614) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.729015848170613, LR: 0.0003 +[2026-03-03 11:02:09] (step=0044615) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.729211504597925, LR: 0.0003 +[2026-03-03 11:02:17] (step=0044616) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.72940716102524, LR: 0.0003 +[2026-03-03 11:02:25] (step=0044617) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.729602817452554, LR: 0.0003 +[2026-03-03 11:02:33] (step=0044618) Train Loss: 0.4481, Train Steps/Sec: 0.12, Epoch: 8.729798473879868, LR: 0.0003 +[2026-03-03 11:02:41] (step=0044619) Train Loss: 0.4395, Train Steps/Sec: 0.12, Epoch: 8.72999413030718, LR: 0.0003 +[2026-03-03 11:02:49] (step=0044620) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 8.730189786734494, LR: 0.0003 +[2026-03-03 11:02:57] (step=0044621) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 8.730385443161808, LR: 0.0003 +[2026-03-03 11:03:05] (step=0044622) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 8.730581099589122, LR: 0.0003 +[2026-03-03 11:03:12] (step=0044623) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.730776756016436, LR: 0.0003 +[2026-03-03 11:03:20] (step=0044624) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 8.730972412443748, LR: 0.0003 +[2026-03-03 11:03:28] (step=0044625) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.731168068871062, LR: 0.0003 +[2026-03-03 11:03:36] (step=0044626) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.731363725298376, LR: 0.0003 +[2026-03-03 11:03:44] (step=0044627) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.73155938172569, LR: 0.0003 +[2026-03-03 11:03:52] (step=0044628) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.731755038153004, LR: 0.0003 +[2026-03-03 11:04:00] (step=0044629) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.731950694580316, LR: 0.0003 +[2026-03-03 11:04:08] (step=0044630) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 8.73214635100763, LR: 0.0003 +[2026-03-03 11:04:15] (step=0044631) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.732342007434944, LR: 0.0003 +[2026-03-03 11:04:23] (step=0044632) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 8.732537663862258, LR: 0.0003 +[2026-03-03 11:04:31] (step=0044633) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 8.732733320289572, LR: 0.0003 +[2026-03-03 11:04:39] (step=0044634) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.732928976716885, LR: 0.0003 +[2026-03-03 11:04:47] (step=0044635) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.733124633144199, LR: 0.0003 +[2026-03-03 11:04:55] (step=0044636) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.733320289571513, LR: 0.0003 +[2026-03-03 11:05:03] (step=0044637) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.733515945998827, LR: 0.0003 +[2026-03-03 11:05:11] (step=0044638) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 8.73371160242614, LR: 0.0003 +[2026-03-03 11:05:18] (step=0044639) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 8.733907258853453, LR: 0.0003 +[2026-03-03 11:05:26] (step=0044640) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 8.734102915280767, LR: 0.0003 +[2026-03-03 11:05:34] (step=0044641) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.73429857170808, LR: 0.0003 +[2026-03-03 11:05:42] (step=0044642) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.734494228135395, LR: 0.0003 +[2026-03-03 11:05:50] (step=0044643) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.734689884562707, LR: 0.0003 +[2026-03-03 11:05:58] (step=0044644) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.734885540990021, LR: 0.0003 +[2026-03-03 11:06:06] (step=0044645) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.735081197417335, LR: 0.0003 +[2026-03-03 11:06:13] (step=0044646) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.735276853844649, LR: 0.0003 +[2026-03-03 11:06:21] (step=0044647) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.735472510271963, LR: 0.0003 +[2026-03-03 11:06:29] (step=0044648) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 8.735668166699275, LR: 0.0003 +[2026-03-03 11:06:37] (step=0044649) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 8.73586382312659, LR: 0.0003 +[2026-03-03 11:06:45] (step=0044650) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.736059479553903, LR: 0.0003 +[2026-03-03 11:06:53] (step=0044651) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.736255135981217, LR: 0.0003 +[2026-03-03 11:07:01] (step=0044652) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.736450792408531, LR: 0.0003 +[2026-03-03 11:07:09] (step=0044653) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.736646448835844, LR: 0.0003 +[2026-03-03 11:07:16] (step=0044654) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.736842105263158, LR: 0.0003 +[2026-03-03 11:07:24] (step=0044655) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 8.737037761690472, LR: 0.0003 +[2026-03-03 11:07:32] (step=0044656) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 8.737233418117786, LR: 0.0003 +[2026-03-03 11:07:40] (step=0044657) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.7374290745451, LR: 0.0003 +[2026-03-03 11:07:48] (step=0044658) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.737624730972412, LR: 0.0003 +[2026-03-03 11:07:56] (step=0044659) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.737820387399726, LR: 0.0003 +[2026-03-03 11:08:04] (step=0044660) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.73801604382704, LR: 0.0003 +[2026-03-03 11:08:11] (step=0044661) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.738211700254354, LR: 0.0003 +[2026-03-03 11:08:19] (step=0044662) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.738407356681668, LR: 0.0003 +[2026-03-03 11:08:27] (step=0044663) Train Loss: 0.4529, Train Steps/Sec: 0.12, Epoch: 8.73860301310898, LR: 0.0003 +[2026-03-03 11:08:35] (step=0044664) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.738798669536294, LR: 0.0003 +[2026-03-03 11:08:43] (step=0044665) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 8.738994325963608, LR: 0.0003 +[2026-03-03 11:08:51] (step=0044666) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.739189982390922, LR: 0.0003 +[2026-03-03 11:08:59] (step=0044667) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.739385638818236, LR: 0.0003 +[2026-03-03 11:09:07] (step=0044668) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.739581295245548, LR: 0.0003 +[2026-03-03 11:09:15] (step=0044669) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 8.739776951672862, LR: 0.0003 +[2026-03-03 11:09:23] (step=0044670) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.739972608100176, LR: 0.0003 +[2026-03-03 11:09:30] (step=0044671) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.74016826452749, LR: 0.0003 +[2026-03-03 11:09:38] (step=0044672) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.740363920954803, LR: 0.0003 +[2026-03-03 11:09:46] (step=0044673) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.740559577382117, LR: 0.0003 +[2026-03-03 11:09:54] (step=0044674) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.74075523380943, LR: 0.0003 +[2026-03-03 11:10:02] (step=0044675) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.740950890236745, LR: 0.0003 +[2026-03-03 11:10:10] (step=0044676) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.741146546664059, LR: 0.0003 +[2026-03-03 11:10:18] (step=0044677) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 8.74134220309137, LR: 0.0003 +[2026-03-03 11:10:26] (step=0044678) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 8.741537859518685, LR: 0.0003 +[2026-03-03 11:10:33] (step=0044679) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 8.741733515945999, LR: 0.0003 +[2026-03-03 11:10:41] (step=0044680) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.741929172373313, LR: 0.0003 +[2026-03-03 11:10:49] (step=0044681) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.742124828800627, LR: 0.0003 +[2026-03-03 11:10:57] (step=0044682) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 8.742320485227939, LR: 0.0003 +[2026-03-03 11:11:05] (step=0044683) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.742516141655253, LR: 0.0003 +[2026-03-03 11:11:13] (step=0044684) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.742711798082567, LR: 0.0003 +[2026-03-03 11:11:21] (step=0044685) Train Loss: 0.4194, Train Steps/Sec: 0.13, Epoch: 8.742907454509881, LR: 0.0003 +[2026-03-03 11:11:28] (step=0044686) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.743103110937195, LR: 0.0003 +[2026-03-03 11:11:36] (step=0044687) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.743298767364507, LR: 0.0003 +[2026-03-03 11:11:44] (step=0044688) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.743494423791821, LR: 0.0003 +[2026-03-03 11:11:52] (step=0044689) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.743690080219135, LR: 0.0003 +[2026-03-03 11:12:00] (step=0044690) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.74388573664645, LR: 0.0003 +[2026-03-03 11:12:08] (step=0044691) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.744081393073763, LR: 0.0003 +[2026-03-03 11:12:16] (step=0044692) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.744277049501076, LR: 0.0003 +[2026-03-03 11:12:24] (step=0044693) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 8.74447270592839, LR: 0.0003 +[2026-03-03 11:12:31] (step=0044694) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.744668362355704, LR: 0.0003 +[2026-03-03 11:12:39] (step=0044695) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.744864018783018, LR: 0.0003 +[2026-03-03 11:12:47] (step=0044696) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 8.74505967521033, LR: 0.0003 +[2026-03-03 11:12:55] (step=0044697) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.745255331637644, LR: 0.0003 +[2026-03-03 11:13:03] (step=0044698) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.745450988064958, LR: 0.0003 +[2026-03-03 11:13:11] (step=0044699) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.745646644492272, LR: 0.0003 +[2026-03-03 11:13:19] (step=0044700) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.745842300919586, LR: 0.0003 +[2026-03-03 11:13:26] (step=0044701) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 8.746037957346898, LR: 0.0003 +[2026-03-03 11:13:34] (step=0044702) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.746233613774212, LR: 0.0003 +[2026-03-03 11:13:42] (step=0044703) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.746429270201526, LR: 0.0003 +[2026-03-03 11:13:50] (step=0044704) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.74662492662884, LR: 0.0003 +[2026-03-03 11:13:58] (step=0044705) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.746820583056154, LR: 0.0003 +[2026-03-03 11:14:06] (step=0044706) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.747016239483466, LR: 0.0003 +[2026-03-03 11:14:14] (step=0044707) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.74721189591078, LR: 0.0003 +[2026-03-03 11:14:22] (step=0044708) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.747407552338094, LR: 0.0003 +[2026-03-03 11:14:29] (step=0044709) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.747603208765408, LR: 0.0003 +[2026-03-03 11:14:37] (step=0044710) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.747798865192722, LR: 0.0003 +[2026-03-03 11:14:45] (step=0044711) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 8.747994521620035, LR: 0.0003 +[2026-03-03 11:14:53] (step=0044712) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.748190178047349, LR: 0.0003 +[2026-03-03 11:15:01] (step=0044713) Train Loss: 0.4402, Train Steps/Sec: 0.12, Epoch: 8.748385834474663, LR: 0.0003 +[2026-03-03 11:15:09] (step=0044714) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 8.748581490901977, LR: 0.0003 +[2026-03-03 11:15:17] (step=0044715) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 8.74877714732929, LR: 0.0003 +[2026-03-03 11:15:25] (step=0044716) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.748972803756603, LR: 0.0003 +[2026-03-03 11:15:33] (step=0044717) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 8.749168460183917, LR: 0.0003 +[2026-03-03 11:15:40] (step=0044718) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.74936411661123, LR: 0.0003 +[2026-03-03 11:15:48] (step=0044719) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.749559773038545, LR: 0.0003 +[2026-03-03 11:15:56] (step=0044720) Train Loss: 0.4456, Train Steps/Sec: 0.12, Epoch: 8.749755429465859, LR: 0.0003 +[2026-03-03 11:16:04] (step=0044721) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.749951085893171, LR: 0.0003 +[2026-03-03 11:16:12] (step=0044722) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 8.750146742320485, LR: 0.0003 +[2026-03-03 11:16:20] (step=0044723) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.750342398747799, LR: 0.0003 +[2026-03-03 11:16:28] (step=0044724) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.750538055175113, LR: 0.0003 +[2026-03-03 11:16:36] (step=0044725) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 8.750733711602425, LR: 0.0003 +[2026-03-03 11:16:44] (step=0044726) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 8.75092936802974, LR: 0.0003 +[2026-03-03 11:16:51] (step=0044727) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 8.751125024457053, LR: 0.0003 +[2026-03-03 11:16:59] (step=0044728) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 8.751320680884367, LR: 0.0003 +[2026-03-03 11:17:07] (step=0044729) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.751516337311681, LR: 0.0003 +[2026-03-03 11:17:15] (step=0044730) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.751711993738994, LR: 0.0003 +[2026-03-03 11:17:23] (step=0044731) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.751907650166308, LR: 0.0003 +[2026-03-03 11:17:31] (step=0044732) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.752103306593622, LR: 0.0003 +[2026-03-03 11:17:39] (step=0044733) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 8.752298963020936, LR: 0.0003 +[2026-03-03 11:17:46] (step=0044734) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.75249461944825, LR: 0.0003 +[2026-03-03 11:17:54] (step=0044735) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 8.752690275875562, LR: 0.0003 +[2026-03-03 11:18:02] (step=0044736) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.752885932302876, LR: 0.0003 +[2026-03-03 11:18:10] (step=0044737) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.75308158873019, LR: 0.0003 +[2026-03-03 11:18:18] (step=0044738) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.753277245157504, LR: 0.0003 +[2026-03-03 11:18:26] (step=0044739) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.753472901584818, LR: 0.0003 +[2026-03-03 11:18:34] (step=0044740) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 8.75366855801213, LR: 0.0003 +[2026-03-03 11:18:41] (step=0044741) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.753864214439444, LR: 0.0003 +[2026-03-03 11:18:49] (step=0044742) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 8.754059870866758, LR: 0.0003 +[2026-03-03 11:18:57] (step=0044743) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.754255527294072, LR: 0.0003 +[2026-03-03 11:19:05] (step=0044744) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.754451183721386, LR: 0.0003 +[2026-03-03 11:19:13] (step=0044745) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.754646840148698, LR: 0.0003 +[2026-03-03 11:19:21] (step=0044746) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 8.754842496576012, LR: 0.0003 +[2026-03-03 11:19:29] (step=0044747) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.755038153003326, LR: 0.0003 +[2026-03-03 11:19:37] (step=0044748) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.75523380943064, LR: 0.0003 +[2026-03-03 11:19:44] (step=0044749) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.755429465857953, LR: 0.0003 +[2026-03-03 11:19:52] (step=0044750) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.755625122285267, LR: 0.0003 +[2026-03-03 11:20:00] (step=0044751) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.75582077871258, LR: 0.0003 +[2026-03-03 11:20:08] (step=0044752) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.756016435139895, LR: 0.0003 +[2026-03-03 11:20:16] (step=0044753) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 8.756212091567209, LR: 0.0003 +[2026-03-03 11:20:24] (step=0044754) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.75640774799452, LR: 0.0003 +[2026-03-03 11:20:32] (step=0044755) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.756603404421835, LR: 0.0003 +[2026-03-03 11:20:40] (step=0044756) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.756799060849149, LR: 0.0003 +[2026-03-03 11:20:47] (step=0044757) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.756994717276463, LR: 0.0003 +[2026-03-03 11:20:55] (step=0044758) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.757190373703777, LR: 0.0003 +[2026-03-03 11:21:03] (step=0044759) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.757386030131089, LR: 0.0003 +[2026-03-03 11:21:11] (step=0044760) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.757581686558403, LR: 0.0003 +[2026-03-03 11:21:19] (step=0044761) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.757777342985717, LR: 0.0003 +[2026-03-03 11:21:27] (step=0044762) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.757972999413031, LR: 0.0003 +[2026-03-03 11:21:35] (step=0044763) Train Loss: 0.4624, Train Steps/Sec: 0.12, Epoch: 8.758168655840345, LR: 0.0003 +[2026-03-03 11:21:43] (step=0044764) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.758364312267657, LR: 0.0003 +[2026-03-03 11:21:50] (step=0044765) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.758559968694971, LR: 0.0003 +[2026-03-03 11:21:58] (step=0044766) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.758755625122285, LR: 0.0003 +[2026-03-03 11:22:06] (step=0044767) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 8.7589512815496, LR: 0.0003 +[2026-03-03 11:22:14] (step=0044768) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.759146937976913, LR: 0.0003 +[2026-03-03 11:22:22] (step=0044769) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.759342594404226, LR: 0.0003 +[2026-03-03 11:22:30] (step=0044770) Train Loss: 0.4488, Train Steps/Sec: 0.12, Epoch: 8.75953825083154, LR: 0.0003 +[2026-03-03 11:22:38] (step=0044771) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.759733907258854, LR: 0.0003 +[2026-03-03 11:22:46] (step=0044772) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.759929563686168, LR: 0.0003 +[2026-03-03 11:22:54] (step=0044773) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.760125220113482, LR: 0.0003 +[2026-03-03 11:23:01] (step=0044774) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.760320876540794, LR: 0.0003 +[2026-03-03 11:23:09] (step=0044775) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 8.760516532968108, LR: 0.0003 +[2026-03-03 11:23:17] (step=0044776) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.760712189395422, LR: 0.0003 +[2026-03-03 11:23:25] (step=0044777) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 8.760907845822736, LR: 0.0003 +[2026-03-03 11:23:33] (step=0044778) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.761103502250048, LR: 0.0003 +[2026-03-03 11:23:41] (step=0044779) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 8.761299158677362, LR: 0.0003 +[2026-03-03 11:23:49] (step=0044780) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.761494815104676, LR: 0.0003 +[2026-03-03 11:23:56] (step=0044781) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 8.76169047153199, LR: 0.0003 +[2026-03-03 11:24:04] (step=0044782) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.761886127959304, LR: 0.0003 +[2026-03-03 11:24:12] (step=0044783) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 8.762081784386616, LR: 0.0003 +[2026-03-03 11:24:20] (step=0044784) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.76227744081393, LR: 0.0003 +[2026-03-03 11:24:28] (step=0044785) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 8.762473097241244, LR: 0.0003 +[2026-03-03 11:24:36] (step=0044786) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 8.762668753668558, LR: 0.0003 +[2026-03-03 11:24:44] (step=0044787) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.762864410095872, LR: 0.0003 +[2026-03-03 11:24:52] (step=0044788) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.763060066523185, LR: 0.0003 +[2026-03-03 11:24:59] (step=0044789) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 8.763255722950499, LR: 0.0003 +[2026-03-03 11:25:07] (step=0044790) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.763451379377813, LR: 0.0003 +[2026-03-03 11:25:15] (step=0044791) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.763647035805127, LR: 0.0003 +[2026-03-03 11:25:23] (step=0044792) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.76384269223244, LR: 0.0003 +[2026-03-03 11:25:31] (step=0044793) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.764038348659753, LR: 0.0003 +[2026-03-03 11:25:39] (step=0044794) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.764234005087067, LR: 0.0003 +[2026-03-03 11:25:47] (step=0044795) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.76442966151438, LR: 0.0003 +[2026-03-03 11:25:55] (step=0044796) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 8.764625317941695, LR: 0.0003 +[2026-03-03 11:26:02] (step=0044797) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.764820974369009, LR: 0.0003 +[2026-03-03 11:26:10] (step=0044798) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.765016630796321, LR: 0.0003 +[2026-03-03 11:26:18] (step=0044799) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.765212287223635, LR: 0.0003 +[2026-03-03 11:26:26] (step=0044800) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.765407943650949, LR: 0.0003 +[2026-03-03 11:26:34] (step=0044801) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 8.765603600078263, LR: 0.0003 +[2026-03-03 11:26:42] (step=0044802) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.765799256505575, LR: 0.0003 +[2026-03-03 11:26:50] (step=0044803) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.76599491293289, LR: 0.0003 +[2026-03-03 11:26:57] (step=0044804) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 8.766190569360203, LR: 0.0003 +[2026-03-03 11:27:05] (step=0044805) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 8.766386225787517, LR: 0.0003 +[2026-03-03 11:27:13] (step=0044806) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.766581882214831, LR: 0.0003 +[2026-03-03 11:27:21] (step=0044807) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.766777538642144, LR: 0.0003 +[2026-03-03 11:27:29] (step=0044808) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.766973195069458, LR: 0.0003 +[2026-03-03 11:27:37] (step=0044809) Train Loss: 0.4424, Train Steps/Sec: 0.12, Epoch: 8.767168851496772, LR: 0.0003 +[2026-03-03 11:27:45] (step=0044810) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.767364507924086, LR: 0.0003 +[2026-03-03 11:27:53] (step=0044811) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.7675601643514, LR: 0.0003 +[2026-03-03 11:28:01] (step=0044812) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.767755820778712, LR: 0.0003 +[2026-03-03 11:28:08] (step=0044813) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.767951477206026, LR: 0.0003 +[2026-03-03 11:28:16] (step=0044814) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.76814713363334, LR: 0.0003 +[2026-03-03 11:28:24] (step=0044815) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 8.768342790060654, LR: 0.0003 +[2026-03-03 11:28:32] (step=0044816) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.768538446487968, LR: 0.0003 +[2026-03-03 11:28:40] (step=0044817) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 8.76873410291528, LR: 0.0003 +[2026-03-03 11:28:48] (step=0044818) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.768929759342594, LR: 0.0003 +[2026-03-03 11:28:56] (step=0044819) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.769125415769908, LR: 0.0003 +[2026-03-03 11:29:04] (step=0044820) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.769321072197222, LR: 0.0003 +[2026-03-03 11:29:12] (step=0044821) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 8.769516728624536, LR: 0.0003 +[2026-03-03 11:29:19] (step=0044822) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 8.769712385051848, LR: 0.0003 +[2026-03-03 11:29:27] (step=0044823) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.769908041479162, LR: 0.0003 +[2026-03-03 11:29:35] (step=0044824) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 8.770103697906476, LR: 0.0003 +[2026-03-03 11:29:43] (step=0044825) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.77029935433379, LR: 0.0003 +[2026-03-03 11:29:51] (step=0044826) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.770495010761104, LR: 0.0003 +[2026-03-03 11:29:59] (step=0044827) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.770690667188417, LR: 0.0003 +[2026-03-03 11:30:07] (step=0044828) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.77088632361573, LR: 0.0003 +[2026-03-03 11:30:14] (step=0044829) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.771081980043045, LR: 0.0003 +[2026-03-03 11:30:22] (step=0044830) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 8.771277636470359, LR: 0.0003 +[2026-03-03 11:30:30] (step=0044831) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.77147329289767, LR: 0.0003 +[2026-03-03 11:30:38] (step=0044832) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.771668949324985, LR: 0.0003 +[2026-03-03 11:30:46] (step=0044833) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.771864605752299, LR: 0.0003 +[2026-03-03 11:30:54] (step=0044834) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.772060262179613, LR: 0.0003 +[2026-03-03 11:31:02] (step=0044835) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.772255918606927, LR: 0.0003 +[2026-03-03 11:31:10] (step=0044836) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.77245157503424, LR: 0.0003 +[2026-03-03 11:31:17] (step=0044837) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.772647231461553, LR: 0.0003 +[2026-03-03 11:31:25] (step=0044838) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.772842887888867, LR: 0.0003 +[2026-03-03 11:31:33] (step=0044839) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.773038544316181, LR: 0.0003 +[2026-03-03 11:31:41] (step=0044840) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.773234200743495, LR: 0.0003 +[2026-03-03 11:31:49] (step=0044841) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.773429857170807, LR: 0.0003 +[2026-03-03 11:31:57] (step=0044842) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 8.773625513598121, LR: 0.0003 +[2026-03-03 11:32:05] (step=0044843) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.773821170025435, LR: 0.0003 +[2026-03-03 11:32:12] (step=0044844) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 8.77401682645275, LR: 0.0003 +[2026-03-03 11:32:20] (step=0044845) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.774212482880063, LR: 0.0003 +[2026-03-03 11:32:28] (step=0044846) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.774408139307376, LR: 0.0003 +[2026-03-03 11:32:36] (step=0044847) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.77460379573469, LR: 0.0003 +[2026-03-03 11:32:44] (step=0044848) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.774799452162004, LR: 0.0003 +[2026-03-03 11:32:52] (step=0044849) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.774995108589318, LR: 0.0003 +[2026-03-03 11:33:00] (step=0044850) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.775190765016632, LR: 0.0003 +[2026-03-03 11:33:08] (step=0044851) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.775386421443944, LR: 0.0003 +[2026-03-03 11:33:15] (step=0044852) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.775582077871258, LR: 0.0003 +[2026-03-03 11:33:23] (step=0044853) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.775777734298572, LR: 0.0003 +[2026-03-03 11:33:31] (step=0044854) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.775973390725886, LR: 0.0003 +[2026-03-03 11:33:39] (step=0044855) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.776169047153198, LR: 0.0003 +[2026-03-03 11:33:47] (step=0044856) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 8.776364703580512, LR: 0.0003 +[2026-03-03 11:33:55] (step=0044857) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.776560360007826, LR: 0.0003 +[2026-03-03 11:34:03] (step=0044858) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.77675601643514, LR: 0.0003 +[2026-03-03 11:34:11] (step=0044859) Train Loss: 0.4554, Train Steps/Sec: 0.12, Epoch: 8.776951672862454, LR: 0.0003 +[2026-03-03 11:34:18] (step=0044860) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.777147329289766, LR: 0.0003 +[2026-03-03 11:34:26] (step=0044861) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 8.77734298571708, LR: 0.0003 +[2026-03-03 11:34:34] (step=0044862) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 8.777538642144394, LR: 0.0003 +[2026-03-03 11:34:42] (step=0044863) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.777734298571708, LR: 0.0003 +[2026-03-03 11:34:50] (step=0044864) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 8.777929954999022, LR: 0.0003 +[2026-03-03 11:34:58] (step=0044865) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.778125611426335, LR: 0.0003 +[2026-03-03 11:35:06] (step=0044866) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.778321267853649, LR: 0.0003 +[2026-03-03 11:35:14] (step=0044867) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 8.778516924280963, LR: 0.0003 +[2026-03-03 11:35:21] (step=0044868) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.778712580708277, LR: 0.0003 +[2026-03-03 11:35:29] (step=0044869) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.77890823713559, LR: 0.0003 +[2026-03-03 11:35:37] (step=0044870) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.779103893562903, LR: 0.0003 +[2026-03-03 11:35:45] (step=0044871) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 8.779299549990217, LR: 0.0003 +[2026-03-03 11:35:53] (step=0044872) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.779495206417531, LR: 0.0003 +[2026-03-03 11:36:01] (step=0044873) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 8.779690862844845, LR: 0.0003 +[2026-03-03 11:36:09] (step=0044874) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.779886519272159, LR: 0.0003 +[2026-03-03 11:36:17] (step=0044875) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.780082175699471, LR: 0.0003 +[2026-03-03 11:36:24] (step=0044876) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.780277832126785, LR: 0.0003 +[2026-03-03 11:36:32] (step=0044877) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.7804734885541, LR: 0.0003 +[2026-03-03 11:36:40] (step=0044878) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.780669144981413, LR: 0.0003 +[2026-03-03 11:36:48] (step=0044879) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.780864801408727, LR: 0.0003 +[2026-03-03 11:36:56] (step=0044880) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.78106045783604, LR: 0.0003 +[2026-03-03 11:37:04] (step=0044881) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 8.781256114263353, LR: 0.0003 +[2026-03-03 11:37:12] (step=0044882) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.781451770690667, LR: 0.0003 +[2026-03-03 11:37:20] (step=0044883) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.781647427117981, LR: 0.0003 +[2026-03-03 11:37:27] (step=0044884) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.781843083545294, LR: 0.0003 +[2026-03-03 11:37:35] (step=0044885) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.782038739972608, LR: 0.0003 +[2026-03-03 11:37:43] (step=0044886) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.782234396399922, LR: 0.0003 +[2026-03-03 11:37:51] (step=0044887) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.782430052827236, LR: 0.0003 +[2026-03-03 11:37:59] (step=0044888) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 8.78262570925455, LR: 0.0003 +[2026-03-03 11:38:07] (step=0044889) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.782821365681862, LR: 0.0003 +[2026-03-03 11:38:15] (step=0044890) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.783017022109176, LR: 0.0003 +[2026-03-03 11:38:22] (step=0044891) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 8.78321267853649, LR: 0.0003 +[2026-03-03 11:38:30] (step=0044892) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.783408334963804, LR: 0.0003 +[2026-03-03 11:38:38] (step=0044893) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.783603991391118, LR: 0.0003 +[2026-03-03 11:38:46] (step=0044894) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.78379964781843, LR: 0.0003 +[2026-03-03 11:38:54] (step=0044895) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 8.783995304245744, LR: 0.0003 +[2026-03-03 11:39:02] (step=0044896) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.784190960673058, LR: 0.0003 +[2026-03-03 11:39:10] (step=0044897) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.784386617100372, LR: 0.0003 +[2026-03-03 11:39:17] (step=0044898) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.784582273527686, LR: 0.0003 +[2026-03-03 11:39:25] (step=0044899) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.784777929954998, LR: 0.0003 +[2026-03-03 11:39:33] (step=0044900) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.784973586382312, LR: 0.0003 +[2026-03-03 11:39:41] (step=0044901) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 8.785169242809626, LR: 0.0003 +[2026-03-03 11:39:49] (step=0044902) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 8.78536489923694, LR: 0.0003 +[2026-03-03 11:39:57] (step=0044903) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.785560555664254, LR: 0.0003 +[2026-03-03 11:40:05] (step=0044904) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.785756212091567, LR: 0.0003 +[2026-03-03 11:40:13] (step=0044905) Train Loss: 0.4291, Train Steps/Sec: 0.12, Epoch: 8.78595186851888, LR: 0.0003 +[2026-03-03 11:40:21] (step=0044906) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.786147524946195, LR: 0.0003 +[2026-03-03 11:40:28] (step=0044907) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 8.786343181373509, LR: 0.0003 +[2026-03-03 11:40:36] (step=0044908) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 8.786538837800821, LR: 0.0003 +[2026-03-03 11:40:44] (step=0044909) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 8.786734494228135, LR: 0.0003 +[2026-03-03 11:40:52] (step=0044910) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.786930150655449, LR: 0.0003 +[2026-03-03 11:41:00] (step=0044911) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 8.787125807082763, LR: 0.0003 +[2026-03-03 11:41:08] (step=0044912) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 8.787321463510077, LR: 0.0003 +[2026-03-03 11:41:16] (step=0044913) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.78751711993739, LR: 0.0003 +[2026-03-03 11:41:24] (step=0044914) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.787712776364703, LR: 0.0003 +[2026-03-03 11:41:32] (step=0044915) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.787908432792017, LR: 0.0003 +[2026-03-03 11:41:39] (step=0044916) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.788104089219331, LR: 0.0003 +[2026-03-03 11:41:47] (step=0044917) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.788299745646645, LR: 0.0003 +[2026-03-03 11:41:55] (step=0044918) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.788495402073957, LR: 0.0003 +[2026-03-03 11:42:03] (step=0044919) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.788691058501271, LR: 0.0003 +[2026-03-03 11:42:11] (step=0044920) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 8.788886714928585, LR: 0.0003 +[2026-03-03 11:42:19] (step=0044921) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.7890823713559, LR: 0.0003 +[2026-03-03 11:42:27] (step=0044922) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.789278027783213, LR: 0.0003 +[2026-03-03 11:42:34] (step=0044923) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.789473684210526, LR: 0.0003 +[2026-03-03 11:42:42] (step=0044924) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 8.78966934063784, LR: 0.0003 +[2026-03-03 11:42:50] (step=0044925) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.789864997065154, LR: 0.0003 +[2026-03-03 11:42:58] (step=0044926) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.790060653492468, LR: 0.0003 +[2026-03-03 11:43:06] (step=0044927) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.790256309919782, LR: 0.0003 +[2026-03-03 11:43:14] (step=0044928) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 8.790451966347094, LR: 0.0003 +[2026-03-03 11:43:22] (step=0044929) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.790647622774408, LR: 0.0003 +[2026-03-03 11:43:30] (step=0044930) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.790843279201722, LR: 0.0003 +[2026-03-03 11:43:37] (step=0044931) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.791038935629036, LR: 0.0003 +[2026-03-03 11:43:45] (step=0044932) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 8.79123459205635, LR: 0.0003 +[2026-03-03 11:43:53] (step=0044933) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.791430248483662, LR: 0.0003 +[2026-03-03 11:44:01] (step=0044934) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.791625904910976, LR: 0.0003 +[2026-03-03 11:44:09] (step=0044935) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.79182156133829, LR: 0.0003 +[2026-03-03 11:44:17] (step=0044936) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.792017217765604, LR: 0.0003 +[2026-03-03 11:44:25] (step=0044937) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 8.792212874192916, LR: 0.0003 +[2026-03-03 11:44:32] (step=0044938) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.79240853062023, LR: 0.0003 +[2026-03-03 11:44:40] (step=0044939) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 8.792604187047544, LR: 0.0003 +[2026-03-03 11:44:48] (step=0044940) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.792799843474858, LR: 0.0003 +[2026-03-03 11:44:56] (step=0044941) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.792995499902172, LR: 0.0003 +[2026-03-03 11:45:04] (step=0044942) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.793191156329485, LR: 0.0003 +[2026-03-03 11:45:12] (step=0044943) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.793386812756799, LR: 0.0003 +[2026-03-03 11:45:20] (step=0044944) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.793582469184113, LR: 0.0003 +[2026-03-03 11:45:28] (step=0044945) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 8.793778125611427, LR: 0.0003 +[2026-03-03 11:45:35] (step=0044946) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.79397378203874, LR: 0.0003 +[2026-03-03 11:45:43] (step=0044947) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.794169438466053, LR: 0.0003 +[2026-03-03 11:45:51] (step=0044948) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 8.794365094893367, LR: 0.0003 +[2026-03-03 11:45:59] (step=0044949) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.794560751320681, LR: 0.0003 +[2026-03-03 11:46:07] (step=0044950) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.794756407747995, LR: 0.0003 +[2026-03-03 11:46:15] (step=0044951) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 8.794952064175309, LR: 0.0003 +[2026-03-03 11:46:23] (step=0044952) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.795147720602621, LR: 0.0003 +[2026-03-03 11:46:31] (step=0044953) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 8.795343377029935, LR: 0.0003 +[2026-03-03 11:46:38] (step=0044954) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 8.79553903345725, LR: 0.0003 +[2026-03-03 11:46:46] (step=0044955) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 8.795734689884563, LR: 0.0003 +[2026-03-03 11:46:54] (step=0044956) Train Loss: 0.4385, Train Steps/Sec: 0.12, Epoch: 8.795930346311877, LR: 0.0003 +[2026-03-03 11:47:02] (step=0044957) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 8.79612600273919, LR: 0.0003 +[2026-03-03 11:47:10] (step=0044958) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.796321659166503, LR: 0.0003 +[2026-03-03 11:47:18] (step=0044959) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 8.796517315593817, LR: 0.0003 +[2026-03-03 11:47:26] (step=0044960) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.796712972021131, LR: 0.0003 +[2026-03-03 11:47:34] (step=0044961) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.796908628448444, LR: 0.0003 +[2026-03-03 11:47:41] (step=0044962) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.797104284875758, LR: 0.0003 +[2026-03-03 11:47:49] (step=0044963) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.797299941303072, LR: 0.0003 +[2026-03-03 11:47:57] (step=0044964) Train Loss: 0.4460, Train Steps/Sec: 0.12, Epoch: 8.797495597730386, LR: 0.0003 +[2026-03-03 11:48:05] (step=0044965) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.7976912541577, LR: 0.0003 +[2026-03-03 11:48:13] (step=0044966) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.797886910585012, LR: 0.0003 +[2026-03-03 11:48:21] (step=0044967) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.798082567012326, LR: 0.0003 +[2026-03-03 11:48:29] (step=0044968) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.79827822343964, LR: 0.0003 +[2026-03-03 11:48:37] (step=0044969) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.798473879866954, LR: 0.0003 +[2026-03-03 11:48:45] (step=0044970) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.798669536294268, LR: 0.0003 +[2026-03-03 11:48:52] (step=0044971) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 8.79886519272158, LR: 0.0003 +[2026-03-03 11:49:00] (step=0044972) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 8.799060849148894, LR: 0.0003 +[2026-03-03 11:49:08] (step=0044973) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.799256505576208, LR: 0.0003 +[2026-03-03 11:49:16] (step=0044974) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.799452162003522, LR: 0.0003 +[2026-03-03 11:49:24] (step=0044975) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 8.799647818430836, LR: 0.0003 +[2026-03-03 11:49:32] (step=0044976) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.799843474858148, LR: 0.0003 +[2026-03-03 11:49:40] (step=0044977) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 8.800039131285462, LR: 0.0003 +[2026-03-03 11:49:48] (step=0044978) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.800234787712776, LR: 0.0003 +[2026-03-03 11:49:55] (step=0044979) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.80043044414009, LR: 0.0003 +[2026-03-03 11:50:03] (step=0044980) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 8.800626100567404, LR: 0.0003 +[2026-03-03 11:50:11] (step=0044981) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.800821756994717, LR: 0.0003 +[2026-03-03 11:50:19] (step=0044982) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.80101741342203, LR: 0.0003 +[2026-03-03 11:50:27] (step=0044983) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 8.801213069849345, LR: 0.0003 +[2026-03-03 11:50:35] (step=0044984) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.801408726276659, LR: 0.0003 +[2026-03-03 11:50:43] (step=0044985) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.801604382703971, LR: 0.0003 +[2026-03-03 11:50:50] (step=0044986) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.801800039131285, LR: 0.0003 +[2026-03-03 11:50:58] (step=0044987) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 8.801995695558599, LR: 0.0003 +[2026-03-03 11:51:06] (step=0044988) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.802191351985913, LR: 0.0003 +[2026-03-03 11:51:14] (step=0044989) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.802387008413227, LR: 0.0003 +[2026-03-03 11:51:22] (step=0044990) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.80258266484054, LR: 0.0003 +[2026-03-03 11:51:30] (step=0044991) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 8.802778321267853, LR: 0.0003 +[2026-03-03 11:51:38] (step=0044992) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.802973977695167, LR: 0.0003 +[2026-03-03 11:51:46] (step=0044993) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 8.803169634122481, LR: 0.0003 +[2026-03-03 11:51:53] (step=0044994) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.803365290549795, LR: 0.0003 +[2026-03-03 11:52:01] (step=0044995) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.803560946977107, LR: 0.0003 +[2026-03-03 11:52:09] (step=0044996) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.803756603404421, LR: 0.0003 +[2026-03-03 11:52:17] (step=0044997) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.803952259831735, LR: 0.0003 +[2026-03-03 11:52:25] (step=0044998) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.80414791625905, LR: 0.0003 +[2026-03-03 11:52:33] (step=0044999) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.804343572686363, LR: 0.0003 +[2026-03-03 11:52:41] (step=0045000) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 8.804539229113676, LR: 0.0003 +[2026-03-03 11:52:41] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0045000/ +[2026-03-03 11:52:48] (step=0045001) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.80473488554099, LR: 0.0003 +[2026-03-03 11:52:57] (step=0045002) Train Loss: 0.4547, Train Steps/Sec: 0.12, Epoch: 8.804930541968304, LR: 0.0003 +[2026-03-03 11:53:04] (step=0045003) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.805126198395618, LR: 0.0003 +[2026-03-03 11:53:12] (step=0045004) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.805321854822932, LR: 0.0003 +[2026-03-03 11:53:20] (step=0045005) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 8.805517511250244, LR: 0.0003 +[2026-03-03 11:53:28] (step=0045006) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.805713167677558, LR: 0.0003 +[2026-03-03 11:53:36] (step=0045007) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 8.805908824104872, LR: 0.0003 +[2026-03-03 11:53:44] (step=0045008) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.806104480532186, LR: 0.0003 +[2026-03-03 11:53:52] (step=0045009) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.8063001369595, LR: 0.0003 +[2026-03-03 11:53:59] (step=0045010) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.806495793386812, LR: 0.0003 +[2026-03-03 11:54:07] (step=0045011) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 8.806691449814126, LR: 0.0003 +[2026-03-03 11:54:15] (step=0045012) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.80688710624144, LR: 0.0003 +[2026-03-03 11:54:23] (step=0045013) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.807082762668754, LR: 0.0003 +[2026-03-03 11:54:31] (step=0045014) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 8.807278419096066, LR: 0.0003 +[2026-03-03 11:54:39] (step=0045015) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.80747407552338, LR: 0.0003 +[2026-03-03 11:54:47] (step=0045016) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 8.807669731950694, LR: 0.0003 +[2026-03-03 11:54:55] (step=0045017) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.807865388378008, LR: 0.0003 +[2026-03-03 11:55:03] (step=0045018) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.808061044805322, LR: 0.0003 +[2026-03-03 11:55:10] (step=0045019) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 8.808256701232635, LR: 0.0003 +[2026-03-03 11:55:18] (step=0045020) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.808452357659949, LR: 0.0003 +[2026-03-03 11:55:26] (step=0045021) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 8.808648014087263, LR: 0.0003 +[2026-03-03 11:55:34] (step=0045022) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.808843670514577, LR: 0.0003 +[2026-03-03 11:55:42] (step=0045023) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.80903932694189, LR: 0.0003 +[2026-03-03 11:55:50] (step=0045024) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.809234983369203, LR: 0.0003 +[2026-03-03 11:55:58] (step=0045025) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.809430639796517, LR: 0.0003 +[2026-03-03 11:56:06] (step=0045026) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 8.809626296223831, LR: 0.0003 +[2026-03-03 11:56:13] (step=0045027) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.809821952651145, LR: 0.0003 +[2026-03-03 11:56:21] (step=0045028) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 8.810017609078459, LR: 0.0003 +[2026-03-03 11:56:29] (step=0045029) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.810213265505771, LR: 0.0003 +[2026-03-03 11:56:37] (step=0045030) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.810408921933085, LR: 0.0003 +[2026-03-03 11:56:45] (step=0045031) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.8106045783604, LR: 0.0003 +[2026-03-03 11:56:53] (step=0045032) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 8.810800234787713, LR: 0.0003 +[2026-03-03 11:57:01] (step=0045033) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 8.810995891215027, LR: 0.0003 +[2026-03-03 11:57:08] (step=0045034) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.81119154764234, LR: 0.0003 +[2026-03-03 11:57:16] (step=0045035) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.811387204069653, LR: 0.0003 +[2026-03-03 11:57:24] (step=0045036) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.811582860496967, LR: 0.0003 +[2026-03-03 11:57:32] (step=0045037) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.811778516924281, LR: 0.0003 +[2026-03-03 11:57:40] (step=0045038) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.811974173351594, LR: 0.0003 +[2026-03-03 11:57:48] (step=0045039) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 8.812169829778908, LR: 0.0003 +[2026-03-03 11:57:56] (step=0045040) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.812365486206222, LR: 0.0003 +[2026-03-03 11:58:04] (step=0045041) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 8.812561142633536, LR: 0.0003 +[2026-03-03 11:58:11] (step=0045042) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.81275679906085, LR: 0.0003 +[2026-03-03 11:58:19] (step=0045043) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.812952455488162, LR: 0.0003 +[2026-03-03 11:58:27] (step=0045044) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 8.813148111915476, LR: 0.0003 +[2026-03-03 11:58:35] (step=0045045) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.81334376834279, LR: 0.0003 +[2026-03-03 11:58:43] (step=0045046) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 8.813539424770104, LR: 0.0003 +[2026-03-03 11:58:51] (step=0045047) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 8.813735081197418, LR: 0.0003 +[2026-03-03 11:58:59] (step=0045048) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.81393073762473, LR: 0.0003 +[2026-03-03 11:59:06] (step=0045049) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.814126394052044, LR: 0.0003 +[2026-03-03 11:59:14] (step=0045050) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.814322050479358, LR: 0.0003 +[2026-03-03 11:59:22] (step=0045051) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.814517706906672, LR: 0.0003 +[2026-03-03 11:59:30] (step=0045052) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 8.814713363333986, LR: 0.0003 +[2026-03-03 11:59:38] (step=0045053) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 8.814909019761298, LR: 0.0003 +[2026-03-03 11:59:46] (step=0045054) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 8.815104676188612, LR: 0.0003 +[2026-03-03 11:59:54] (step=0045055) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.815300332615926, LR: 0.0003 +[2026-03-03 12:00:02] (step=0045056) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 8.81549598904324, LR: 0.0003 +[2026-03-03 12:00:10] (step=0045057) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 8.815691645470555, LR: 0.0003 +[2026-03-03 12:00:17] (step=0045058) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.815887301897867, LR: 0.0003 +[2026-03-03 12:00:25] (step=0045059) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.81608295832518, LR: 0.0003 +[2026-03-03 12:00:33] (step=0045060) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.816278614752495, LR: 0.0003 +[2026-03-03 12:00:41] (step=0045061) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.816474271179809, LR: 0.0003 +[2026-03-03 12:00:49] (step=0045062) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 8.816669927607123, LR: 0.0003 +[2026-03-03 12:00:57] (step=0045063) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 8.816865584034435, LR: 0.0003 +[2026-03-03 12:01:05] (step=0045064) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.817061240461749, LR: 0.0003 +[2026-03-03 12:01:13] (step=0045065) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.817256896889063, LR: 0.0003 +[2026-03-03 12:01:20] (step=0045066) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 8.817452553316377, LR: 0.0003 +[2026-03-03 12:01:28] (step=0045067) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 8.81764820974369, LR: 0.0003 +[2026-03-03 12:01:36] (step=0045068) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.817843866171003, LR: 0.0003 +[2026-03-03 12:01:44] (step=0045069) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 8.818039522598317, LR: 0.0003 +[2026-03-03 12:01:52] (step=0045070) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 8.818235179025631, LR: 0.0003 +[2026-03-03 12:02:00] (step=0045071) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.818430835452945, LR: 0.0003 +[2026-03-03 12:02:08] (step=0045072) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.818626491880257, LR: 0.0003 +[2026-03-03 12:02:15] (step=0045073) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.818822148307571, LR: 0.0003 +[2026-03-03 12:02:23] (step=0045074) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.819017804734886, LR: 0.0003 +[2026-03-03 12:02:31] (step=0045075) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.8192134611622, LR: 0.0003 +[2026-03-03 12:02:39] (step=0045076) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.819409117589514, LR: 0.0003 +[2026-03-03 12:02:47] (step=0045077) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 8.819604774016826, LR: 0.0003 +[2026-03-03 12:02:55] (step=0045078) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.81980043044414, LR: 0.0003 +[2026-03-03 12:03:03] (step=0045079) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.819996086871454, LR: 0.0003 +[2026-03-03 12:03:11] (step=0045080) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 8.820191743298768, LR: 0.0003 +[2026-03-03 12:03:18] (step=0045081) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.820387399726082, LR: 0.0003 +[2026-03-03 12:03:26] (step=0045082) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 8.820583056153394, LR: 0.0003 +[2026-03-03 12:03:34] (step=0045083) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.820778712580708, LR: 0.0003 +[2026-03-03 12:03:42] (step=0045084) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.820974369008022, LR: 0.0003 +[2026-03-03 12:03:50] (step=0045085) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.821170025435336, LR: 0.0003 +[2026-03-03 12:03:58] (step=0045086) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 8.82136568186265, LR: 0.0003 +[2026-03-03 12:04:06] (step=0045087) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.821561338289962, LR: 0.0003 +[2026-03-03 12:04:13] (step=0045088) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.821756994717276, LR: 0.0003 +[2026-03-03 12:04:21] (step=0045089) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.82195265114459, LR: 0.0003 +[2026-03-03 12:04:29] (step=0045090) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.822148307571904, LR: 0.0003 +[2026-03-03 12:04:37] (step=0045091) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 8.822343963999216, LR: 0.0003 +[2026-03-03 12:04:45] (step=0045092) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.82253962042653, LR: 0.0003 +[2026-03-03 12:04:53] (step=0045093) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.822735276853845, LR: 0.0003 +[2026-03-03 12:05:01] (step=0045094) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.822930933281159, LR: 0.0003 +[2026-03-03 12:05:09] (step=0045095) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 8.823126589708473, LR: 0.0003 +[2026-03-03 12:05:16] (step=0045096) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.823322246135785, LR: 0.0003 +[2026-03-03 12:05:24] (step=0045097) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.823517902563099, LR: 0.0003 +[2026-03-03 12:05:32] (step=0045098) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.823713558990413, LR: 0.0003 +[2026-03-03 12:05:40] (step=0045099) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 8.823909215417727, LR: 0.0003 +[2026-03-03 12:05:48] (step=0045100) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.82410487184504, LR: 0.0003 +[2026-03-03 12:05:56] (step=0045101) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.824300528272353, LR: 0.0003 +[2026-03-03 12:06:04] (step=0045102) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.824496184699667, LR: 0.0003 +[2026-03-03 12:06:12] (step=0045103) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.824691841126981, LR: 0.0003 +[2026-03-03 12:06:20] (step=0045104) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.824887497554295, LR: 0.0003 +[2026-03-03 12:06:27] (step=0045105) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.825083153981609, LR: 0.0003 +[2026-03-03 12:06:35] (step=0045106) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.825278810408921, LR: 0.0003 +[2026-03-03 12:06:43] (step=0045107) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.825474466836235, LR: 0.0003 +[2026-03-03 12:06:51] (step=0045108) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 8.82567012326355, LR: 0.0003 +[2026-03-03 12:06:59] (step=0045109) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 8.825865779690863, LR: 0.0003 +[2026-03-03 12:07:07] (step=0045110) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.826061436118177, LR: 0.0003 +[2026-03-03 12:07:15] (step=0045111) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.82625709254549, LR: 0.0003 +[2026-03-03 12:07:23] (step=0045112) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.826452748972804, LR: 0.0003 +[2026-03-03 12:07:30] (step=0045113) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.826648405400118, LR: 0.0003 +[2026-03-03 12:07:38] (step=0045114) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.826844061827432, LR: 0.0003 +[2026-03-03 12:07:46] (step=0045115) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.827039718254746, LR: 0.0003 +[2026-03-03 12:07:54] (step=0045116) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.827235374682058, LR: 0.0003 +[2026-03-03 12:08:02] (step=0045117) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.827431031109372, LR: 0.0003 +[2026-03-03 12:08:10] (step=0045118) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.827626687536686, LR: 0.0003 +[2026-03-03 12:08:18] (step=0045119) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.827822343964, LR: 0.0003 +[2026-03-03 12:08:26] (step=0045120) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.828018000391312, LR: 0.0003 +[2026-03-03 12:08:33] (step=0045121) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 8.828213656818626, LR: 0.0003 +[2026-03-03 12:08:41] (step=0045122) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.82840931324594, LR: 0.0003 +[2026-03-03 12:08:49] (step=0045123) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.828604969673254, LR: 0.0003 +[2026-03-03 12:08:57] (step=0045124) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.828800626100568, LR: 0.0003 +[2026-03-03 12:09:05] (step=0045125) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.82899628252788, LR: 0.0003 +[2026-03-03 12:09:13] (step=0045126) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 8.829191938955194, LR: 0.0003 +[2026-03-03 12:09:21] (step=0045127) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 8.829387595382508, LR: 0.0003 +[2026-03-03 12:09:29] (step=0045128) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 8.829583251809822, LR: 0.0003 +[2026-03-03 12:09:36] (step=0045129) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 8.829778908237136, LR: 0.0003 +[2026-03-03 12:09:44] (step=0045130) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.829974564664449, LR: 0.0003 +[2026-03-03 12:09:52] (step=0045131) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 8.830170221091763, LR: 0.0003 +[2026-03-03 12:10:00] (step=0045132) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.830365877519077, LR: 0.0003 +[2026-03-03 12:10:08] (step=0045133) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.83056153394639, LR: 0.0003 +[2026-03-03 12:10:16] (step=0045134) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.830757190373705, LR: 0.0003 +[2026-03-03 12:10:24] (step=0045135) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.830952846801017, LR: 0.0003 +[2026-03-03 12:10:32] (step=0045136) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.83114850322833, LR: 0.0003 +[2026-03-03 12:10:39] (step=0045137) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.831344159655645, LR: 0.0003 +[2026-03-03 12:10:47] (step=0045138) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 8.831539816082959, LR: 0.0003 +[2026-03-03 12:10:55] (step=0045139) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 8.831735472510273, LR: 0.0003 +[2026-03-03 12:11:03] (step=0045140) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.831931128937585, LR: 0.0003 +[2026-03-03 12:11:11] (step=0045141) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.832126785364899, LR: 0.0003 +[2026-03-03 12:11:19] (step=0045142) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.832322441792213, LR: 0.0003 +[2026-03-03 12:11:27] (step=0045143) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.832518098219527, LR: 0.0003 +[2026-03-03 12:11:34] (step=0045144) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.83271375464684, LR: 0.0003 +[2026-03-03 12:11:42] (step=0045145) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.832909411074153, LR: 0.0003 +[2026-03-03 12:11:50] (step=0045146) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.833105067501467, LR: 0.0003 +[2026-03-03 12:11:58] (step=0045147) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 8.833300723928781, LR: 0.0003 +[2026-03-03 12:12:06] (step=0045148) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 8.833496380356095, LR: 0.0003 +[2026-03-03 12:12:14] (step=0045149) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 8.833692036783408, LR: 0.0003 +[2026-03-03 12:12:22] (step=0045150) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.833887693210722, LR: 0.0003 +[2026-03-03 12:12:30] (step=0045151) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.834083349638036, LR: 0.0003 +[2026-03-03 12:12:37] (step=0045152) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.83427900606535, LR: 0.0003 +[2026-03-03 12:12:45] (step=0045153) Train Loss: 0.4507, Train Steps/Sec: 0.12, Epoch: 8.834474662492664, LR: 0.0003 +[2026-03-03 12:12:53] (step=0045154) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.834670318919976, LR: 0.0003 +[2026-03-03 12:13:01] (step=0045155) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.83486597534729, LR: 0.0003 +[2026-03-03 12:13:09] (step=0045156) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 8.835061631774604, LR: 0.0003 +[2026-03-03 12:13:17] (step=0045157) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 8.835257288201918, LR: 0.0003 +[2026-03-03 12:13:25] (step=0045158) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.835452944629232, LR: 0.0003 +[2026-03-03 12:13:33] (step=0045159) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.835648601056544, LR: 0.0003 +[2026-03-03 12:13:41] (step=0045160) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 8.835844257483858, LR: 0.0003 +[2026-03-03 12:13:49] (step=0045161) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 8.836039913911172, LR: 0.0003 +[2026-03-03 12:13:56] (step=0045162) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 8.836235570338486, LR: 0.0003 +[2026-03-03 12:14:04] (step=0045163) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.8364312267658, LR: 0.0003 +[2026-03-03 12:14:12] (step=0045164) Train Loss: 0.4395, Train Steps/Sec: 0.12, Epoch: 8.836626883193112, LR: 0.0003 +[2026-03-03 12:14:20] (step=0045165) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.836822539620426, LR: 0.0003 +[2026-03-03 12:14:28] (step=0045166) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.83701819604774, LR: 0.0003 +[2026-03-03 12:14:36] (step=0045167) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 8.837213852475054, LR: 0.0003 +[2026-03-03 12:14:44] (step=0045168) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.837409508902368, LR: 0.0003 +[2026-03-03 12:14:52] (step=0045169) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.83760516532968, LR: 0.0003 +[2026-03-03 12:15:00] (step=0045170) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.837800821756995, LR: 0.0003 +[2026-03-03 12:15:07] (step=0045171) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 8.837996478184309, LR: 0.0003 +[2026-03-03 12:15:15] (step=0045172) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.838192134611623, LR: 0.0003 +[2026-03-03 12:15:23] (step=0045173) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.838387791038935, LR: 0.0003 +[2026-03-03 12:15:31] (step=0045174) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.838583447466249, LR: 0.0003 +[2026-03-03 12:15:39] (step=0045175) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.838779103893563, LR: 0.0003 +[2026-03-03 12:15:47] (step=0045176) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.838974760320877, LR: 0.0003 +[2026-03-03 12:15:55] (step=0045177) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.83917041674819, LR: 0.0003 +[2026-03-03 12:16:03] (step=0045178) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.839366073175503, LR: 0.0003 +[2026-03-03 12:16:11] (step=0045179) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.839561729602817, LR: 0.0003 +[2026-03-03 12:16:18] (step=0045180) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 8.839757386030131, LR: 0.0003 +[2026-03-03 12:16:26] (step=0045181) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.839953042457445, LR: 0.0003 +[2026-03-03 12:16:34] (step=0045182) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.840148698884759, LR: 0.0003 +[2026-03-03 12:16:42] (step=0045183) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.840344355312071, LR: 0.0003 +[2026-03-03 12:16:50] (step=0045184) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.840540011739385, LR: 0.0003 +[2026-03-03 12:16:58] (step=0045185) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.8407356681667, LR: 0.0003 +[2026-03-03 12:17:06] (step=0045186) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.840931324594013, LR: 0.0003 +[2026-03-03 12:17:13] (step=0045187) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.841126981021327, LR: 0.0003 +[2026-03-03 12:17:21] (step=0045188) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.84132263744864, LR: 0.0003 +[2026-03-03 12:17:29] (step=0045189) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.841518293875954, LR: 0.0003 +[2026-03-03 12:17:37] (step=0045190) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.841713950303268, LR: 0.0003 +[2026-03-03 12:17:45] (step=0045191) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.841909606730582, LR: 0.0003 +[2026-03-03 12:17:53] (step=0045192) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.842105263157896, LR: 0.0003 +[2026-03-03 12:18:01] (step=0045193) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 8.842300919585208, LR: 0.0003 +[2026-03-03 12:18:09] (step=0045194) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.842496576012522, LR: 0.0003 +[2026-03-03 12:18:16] (step=0045195) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 8.842692232439836, LR: 0.0003 +[2026-03-03 12:18:24] (step=0045196) Train Loss: 0.4268, Train Steps/Sec: 0.13, Epoch: 8.84288788886715, LR: 0.0003 +[2026-03-03 12:18:32] (step=0045197) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.843083545294462, LR: 0.0003 +[2026-03-03 12:18:40] (step=0045198) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.843279201721776, LR: 0.0003 +[2026-03-03 12:18:48] (step=0045199) Train Loss: 0.4493, Train Steps/Sec: 0.12, Epoch: 8.84347485814909, LR: 0.0003 +[2026-03-03 12:18:56] (step=0045200) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.843670514576404, LR: 0.0003 +[2026-03-03 12:19:04] (step=0045201) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.843866171003718, LR: 0.0003 +[2026-03-03 12:19:12] (step=0045202) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.84406182743103, LR: 0.0003 +[2026-03-03 12:19:20] (step=0045203) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.844257483858344, LR: 0.0003 +[2026-03-03 12:19:27] (step=0045204) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.844453140285658, LR: 0.0003 +[2026-03-03 12:19:35] (step=0045205) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.844648796712972, LR: 0.0003 +[2026-03-03 12:19:43] (step=0045206) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.844844453140286, LR: 0.0003 +[2026-03-03 12:19:51] (step=0045207) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 8.845040109567599, LR: 0.0003 +[2026-03-03 12:19:59] (step=0045208) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.845235765994913, LR: 0.0003 +[2026-03-03 12:20:07] (step=0045209) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 8.845431422422227, LR: 0.0003 +[2026-03-03 12:20:15] (step=0045210) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.84562707884954, LR: 0.0003 +[2026-03-03 12:20:22] (step=0045211) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.845822735276855, LR: 0.0003 +[2026-03-03 12:20:30] (step=0045212) Train Loss: 0.4242, Train Steps/Sec: 0.13, Epoch: 8.846018391704167, LR: 0.0003 +[2026-03-03 12:20:38] (step=0045213) Train Loss: 0.4338, Train Steps/Sec: 0.12, Epoch: 8.84621404813148, LR: 0.0003 +[2026-03-03 12:20:46] (step=0045214) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.846409704558795, LR: 0.0003 +[2026-03-03 12:20:54] (step=0045215) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.846605360986109, LR: 0.0003 +[2026-03-03 12:21:02] (step=0045216) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.846801017413423, LR: 0.0003 +[2026-03-03 12:21:10] (step=0045217) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.846996673840735, LR: 0.0003 +[2026-03-03 12:21:18] (step=0045218) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.847192330268049, LR: 0.0003 +[2026-03-03 12:21:26] (step=0045219) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 8.847387986695363, LR: 0.0003 +[2026-03-03 12:21:33] (step=0045220) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.847583643122677, LR: 0.0003 +[2026-03-03 12:21:41] (step=0045221) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.847779299549991, LR: 0.0003 +[2026-03-03 12:21:49] (step=0045222) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.847974955977303, LR: 0.0003 +[2026-03-03 12:21:57] (step=0045223) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.848170612404617, LR: 0.0003 +[2026-03-03 12:22:05] (step=0045224) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 8.848366268831931, LR: 0.0003 +[2026-03-03 12:22:13] (step=0045225) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.848561925259245, LR: 0.0003 +[2026-03-03 12:22:21] (step=0045226) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.848757581686558, LR: 0.0003 +[2026-03-03 12:22:28] (step=0045227) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.848953238113872, LR: 0.0003 +[2026-03-03 12:22:36] (step=0045228) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.849148894541186, LR: 0.0003 +[2026-03-03 12:22:44] (step=0045229) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.8493445509685, LR: 0.0003 +[2026-03-03 12:22:52] (step=0045230) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 8.849540207395814, LR: 0.0003 +[2026-03-03 12:23:00] (step=0045231) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.849735863823126, LR: 0.0003 +[2026-03-03 12:23:08] (step=0045232) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.84993152025044, LR: 0.0003 +[2026-03-03 12:23:16] (step=0045233) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.850127176677754, LR: 0.0003 +[2026-03-03 12:23:24] (step=0045234) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.850322833105068, LR: 0.0003 +[2026-03-03 12:23:31] (step=0045235) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 8.850518489532382, LR: 0.0003 +[2026-03-03 12:23:39] (step=0045236) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 8.850714145959694, LR: 0.0003 +[2026-03-03 12:23:47] (step=0045237) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.850909802387008, LR: 0.0003 +[2026-03-03 12:23:55] (step=0045238) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 8.851105458814322, LR: 0.0003 +[2026-03-03 12:24:03] (step=0045239) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.851301115241636, LR: 0.0003 +[2026-03-03 12:24:11] (step=0045240) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.85149677166895, LR: 0.0003 +[2026-03-03 12:24:19] (step=0045241) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.851692428096262, LR: 0.0003 +[2026-03-03 12:24:26] (step=0045242) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.851888084523576, LR: 0.0003 +[2026-03-03 12:24:34] (step=0045243) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.85208374095089, LR: 0.0003 +[2026-03-03 12:24:42] (step=0045244) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.852279397378204, LR: 0.0003 +[2026-03-03 12:24:50] (step=0045245) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 8.852475053805518, LR: 0.0003 +[2026-03-03 12:24:58] (step=0045246) Train Loss: 0.4432, Train Steps/Sec: 0.12, Epoch: 8.85267071023283, LR: 0.0003 +[2026-03-03 12:25:06] (step=0045247) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.852866366660145, LR: 0.0003 +[2026-03-03 12:25:14] (step=0045248) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.853062023087459, LR: 0.0003 +[2026-03-03 12:25:22] (step=0045249) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.853257679514773, LR: 0.0003 +[2026-03-03 12:25:30] (step=0045250) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.853453335942085, LR: 0.0003 +[2026-03-03 12:25:38] (step=0045251) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.853648992369399, LR: 0.0003 +[2026-03-03 12:25:45] (step=0045252) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 8.853844648796713, LR: 0.0003 +[2026-03-03 12:25:53] (step=0045253) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.854040305224027, LR: 0.0003 +[2026-03-03 12:26:01] (step=0045254) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.85423596165134, LR: 0.0003 +[2026-03-03 12:26:09] (step=0045255) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.854431618078653, LR: 0.0003 +[2026-03-03 12:26:17] (step=0045256) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.854627274505967, LR: 0.0003 +[2026-03-03 12:26:25] (step=0045257) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 8.854822930933281, LR: 0.0003 +[2026-03-03 12:26:33] (step=0045258) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.855018587360595, LR: 0.0003 +[2026-03-03 12:26:40] (step=0045259) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.85521424378791, LR: 0.0003 +[2026-03-03 12:26:48] (step=0045260) Train Loss: 0.4673, Train Steps/Sec: 0.13, Epoch: 8.855409900215221, LR: 0.0003 +[2026-03-03 12:26:56] (step=0045261) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.855605556642535, LR: 0.0003 +[2026-03-03 12:27:04] (step=0045262) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.85580121306985, LR: 0.0003 +[2026-03-03 12:27:12] (step=0045263) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.855996869497163, LR: 0.0003 +[2026-03-03 12:27:20] (step=0045264) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.856192525924477, LR: 0.0003 +[2026-03-03 12:27:28] (step=0045265) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 8.85638818235179, LR: 0.0003 +[2026-03-03 12:27:36] (step=0045266) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 8.856583838779104, LR: 0.0003 +[2026-03-03 12:27:44] (step=0045267) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 8.856779495206418, LR: 0.0003 +[2026-03-03 12:27:51] (step=0045268) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.856975151633732, LR: 0.0003 +[2026-03-03 12:27:59] (step=0045269) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.857170808061046, LR: 0.0003 +[2026-03-03 12:28:07] (step=0045270) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.857366464488358, LR: 0.0003 +[2026-03-03 12:28:15] (step=0045271) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 8.857562120915672, LR: 0.0003 +[2026-03-03 12:28:23] (step=0045272) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.857757777342986, LR: 0.0003 +[2026-03-03 12:28:31] (step=0045273) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.8579534337703, LR: 0.0003 +[2026-03-03 12:28:39] (step=0045274) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.858149090197614, LR: 0.0003 +[2026-03-03 12:28:47] (step=0045275) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.858344746624926, LR: 0.0003 +[2026-03-03 12:28:54] (step=0045276) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.85854040305224, LR: 0.0003 +[2026-03-03 12:29:02] (step=0045277) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 8.858736059479554, LR: 0.0003 +[2026-03-03 12:29:10] (step=0045278) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.858931715906868, LR: 0.0003 +[2026-03-03 12:29:18] (step=0045279) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 8.85912737233418, LR: 0.0003 +[2026-03-03 12:29:26] (step=0045280) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.859323028761494, LR: 0.0003 +[2026-03-03 12:29:34] (step=0045281) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.859518685188808, LR: 0.0003 +[2026-03-03 12:29:42] (step=0045282) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.859714341616122, LR: 0.0003 +[2026-03-03 12:29:50] (step=0045283) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.859909998043436, LR: 0.0003 +[2026-03-03 12:29:57] (step=0045284) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 8.860105654470749, LR: 0.0003 +[2026-03-03 12:30:05] (step=0045285) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.860301310898063, LR: 0.0003 +[2026-03-03 12:30:13] (step=0045286) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.860496967325377, LR: 0.0003 +[2026-03-03 12:30:21] (step=0045287) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.86069262375269, LR: 0.0003 +[2026-03-03 12:30:29] (step=0045288) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.860888280180005, LR: 0.0003 +[2026-03-03 12:30:37] (step=0045289) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 8.861083936607317, LR: 0.0003 +[2026-03-03 12:30:45] (step=0045290) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.86127959303463, LR: 0.0003 +[2026-03-03 12:30:52] (step=0045291) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.861475249461945, LR: 0.0003 +[2026-03-03 12:31:00] (step=0045292) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.861670905889259, LR: 0.0003 +[2026-03-03 12:31:08] (step=0045293) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.861866562316573, LR: 0.0003 +[2026-03-03 12:31:16] (step=0045294) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 8.862062218743885, LR: 0.0003 +[2026-03-03 12:31:24] (step=0045295) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.8622578751712, LR: 0.0003 +[2026-03-03 12:31:32] (step=0045296) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 8.862453531598513, LR: 0.0003 +[2026-03-03 12:31:40] (step=0045297) Train Loss: 0.4498, Train Steps/Sec: 0.12, Epoch: 8.862649188025827, LR: 0.0003 +[2026-03-03 12:31:48] (step=0045298) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.862844844453141, LR: 0.0003 +[2026-03-03 12:31:56] (step=0045299) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 8.863040500880453, LR: 0.0003 +[2026-03-03 12:32:03] (step=0045300) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.863236157307767, LR: 0.0003 +[2026-03-03 12:32:11] (step=0045301) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 8.863431813735081, LR: 0.0003 +[2026-03-03 12:32:19] (step=0045302) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.863627470162395, LR: 0.0003 +[2026-03-03 12:32:27] (step=0045303) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.863823126589708, LR: 0.0003 +[2026-03-03 12:32:35] (step=0045304) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 8.864018783017022, LR: 0.0003 +[2026-03-03 12:32:43] (step=0045305) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.864214439444336, LR: 0.0003 +[2026-03-03 12:32:51] (step=0045306) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 8.86441009587165, LR: 0.0003 +[2026-03-03 12:32:58] (step=0045307) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.864605752298964, LR: 0.0003 +[2026-03-03 12:33:06] (step=0045308) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.864801408726276, LR: 0.0003 +[2026-03-03 12:33:14] (step=0045309) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.86499706515359, LR: 0.0003 +[2026-03-03 12:33:22] (step=0045310) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.865192721580904, LR: 0.0003 +[2026-03-03 12:33:30] (step=0045311) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.865388378008218, LR: 0.0003 +[2026-03-03 12:33:38] (step=0045312) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.865584034435532, LR: 0.0003 +[2026-03-03 12:33:46] (step=0045313) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.865779690862844, LR: 0.0003 +[2026-03-03 12:33:54] (step=0045314) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.865975347290158, LR: 0.0003 +[2026-03-03 12:34:02] (step=0045315) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 8.866171003717472, LR: 0.0003 +[2026-03-03 12:34:09] (step=0045316) Train Loss: 0.4252, Train Steps/Sec: 0.13, Epoch: 8.866366660144786, LR: 0.0003 +[2026-03-03 12:34:17] (step=0045317) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.8665623165721, LR: 0.0003 +[2026-03-03 12:34:25] (step=0045318) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.866757972999412, LR: 0.0003 +[2026-03-03 12:34:33] (step=0045319) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.866953629426726, LR: 0.0003 +[2026-03-03 12:34:41] (step=0045320) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.86714928585404, LR: 0.0003 +[2026-03-03 12:34:49] (step=0045321) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.867344942281354, LR: 0.0003 +[2026-03-03 12:34:57] (step=0045322) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.867540598708668, LR: 0.0003 +[2026-03-03 12:35:05] (step=0045323) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.86773625513598, LR: 0.0003 +[2026-03-03 12:35:12] (step=0045324) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.867931911563295, LR: 0.0003 +[2026-03-03 12:35:20] (step=0045325) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 8.868127567990609, LR: 0.0003 +[2026-03-03 12:35:28] (step=0045326) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.868323224417923, LR: 0.0003 +[2026-03-03 12:35:36] (step=0045327) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.868518880845237, LR: 0.0003 +[2026-03-03 12:35:44] (step=0045328) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.868714537272549, LR: 0.0003 +[2026-03-03 12:35:52] (step=0045329) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 8.868910193699863, LR: 0.0003 +[2026-03-03 12:36:00] (step=0045330) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 8.869105850127177, LR: 0.0003 +[2026-03-03 12:36:08] (step=0045331) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.869301506554491, LR: 0.0003 +[2026-03-03 12:36:15] (step=0045332) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.869497162981803, LR: 0.0003 +[2026-03-03 12:36:23] (step=0045333) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 8.869692819409117, LR: 0.0003 +[2026-03-03 12:36:31] (step=0045334) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 8.869888475836431, LR: 0.0003 +[2026-03-03 12:36:39] (step=0045335) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.870084132263745, LR: 0.0003 +[2026-03-03 12:36:47] (step=0045336) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 8.87027978869106, LR: 0.0003 +[2026-03-03 12:36:55] (step=0045337) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.870475445118371, LR: 0.0003 +[2026-03-03 12:37:03] (step=0045338) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.870671101545685, LR: 0.0003 +[2026-03-03 12:37:10] (step=0045339) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.870866757973, LR: 0.0003 +[2026-03-03 12:37:18] (step=0045340) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.871062414400313, LR: 0.0003 +[2026-03-03 12:37:26] (step=0045341) Train Loss: 0.4566, Train Steps/Sec: 0.12, Epoch: 8.871258070827627, LR: 0.0003 +[2026-03-03 12:37:34] (step=0045342) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.87145372725494, LR: 0.0003 +[2026-03-03 12:37:42] (step=0045343) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.871649383682254, LR: 0.0003 +[2026-03-03 12:37:50] (step=0045344) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.871845040109568, LR: 0.0003 +[2026-03-03 12:37:58] (step=0045345) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 8.872040696536882, LR: 0.0003 +[2026-03-03 12:38:06] (step=0045346) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.872236352964196, LR: 0.0003 +[2026-03-03 12:38:14] (step=0045347) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.872432009391508, LR: 0.0003 +[2026-03-03 12:38:21] (step=0045348) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.872627665818822, LR: 0.0003 +[2026-03-03 12:38:29] (step=0045349) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.872823322246136, LR: 0.0003 +[2026-03-03 12:38:37] (step=0045350) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.87301897867345, LR: 0.0003 +[2026-03-03 12:38:45] (step=0045351) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.873214635100764, LR: 0.0003 +[2026-03-03 12:38:53] (step=0045352) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.873410291528076, LR: 0.0003 +[2026-03-03 12:39:01] (step=0045353) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 8.87360594795539, LR: 0.0003 +[2026-03-03 12:39:09] (step=0045354) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.873801604382704, LR: 0.0003 +[2026-03-03 12:39:17] (step=0045355) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.873997260810018, LR: 0.0003 +[2026-03-03 12:39:25] (step=0045356) Train Loss: 0.4507, Train Steps/Sec: 0.12, Epoch: 8.87419291723733, LR: 0.0003 +[2026-03-03 12:39:32] (step=0045357) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.874388573664644, LR: 0.0003 +[2026-03-03 12:39:40] (step=0045358) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 8.874584230091958, LR: 0.0003 +[2026-03-03 12:39:48] (step=0045359) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.874779886519272, LR: 0.0003 +[2026-03-03 12:39:56] (step=0045360) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.874975542946586, LR: 0.0003 +[2026-03-03 12:40:04] (step=0045361) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.875171199373899, LR: 0.0003 +[2026-03-03 12:40:12] (step=0045362) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.875366855801213, LR: 0.0003 +[2026-03-03 12:40:20] (step=0045363) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 8.875562512228527, LR: 0.0003 +[2026-03-03 12:40:28] (step=0045364) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.87575816865584, LR: 0.0003 +[2026-03-03 12:40:35] (step=0045365) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.875953825083155, LR: 0.0003 +[2026-03-03 12:40:43] (step=0045366) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.876149481510467, LR: 0.0003 +[2026-03-03 12:40:51] (step=0045367) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 8.876345137937781, LR: 0.0003 +[2026-03-03 12:40:59] (step=0045368) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 8.876540794365095, LR: 0.0003 +[2026-03-03 12:41:07] (step=0045369) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.876736450792409, LR: 0.0003 +[2026-03-03 12:41:15] (step=0045370) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 8.876932107219723, LR: 0.0003 +[2026-03-03 12:41:23] (step=0045371) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.877127763647035, LR: 0.0003 +[2026-03-03 12:41:31] (step=0045372) Train Loss: 0.4243, Train Steps/Sec: 0.13, Epoch: 8.87732342007435, LR: 0.0003 +[2026-03-03 12:41:38] (step=0045373) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.877519076501663, LR: 0.0003 +[2026-03-03 12:41:46] (step=0045374) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.877714732928977, LR: 0.0003 +[2026-03-03 12:41:54] (step=0045375) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.877910389356291, LR: 0.0003 +[2026-03-03 12:42:02] (step=0045376) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.878106045783603, LR: 0.0003 +[2026-03-03 12:42:10] (step=0045377) Train Loss: 0.4670, Train Steps/Sec: 0.13, Epoch: 8.878301702210917, LR: 0.0003 +[2026-03-03 12:42:18] (step=0045378) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.878497358638231, LR: 0.0003 +[2026-03-03 12:42:26] (step=0045379) Train Loss: 0.4238, Train Steps/Sec: 0.13, Epoch: 8.878693015065545, LR: 0.0003 +[2026-03-03 12:42:33] (step=0045380) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.878888671492858, LR: 0.0003 +[2026-03-03 12:42:41] (step=0045381) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.879084327920172, LR: 0.0003 +[2026-03-03 12:42:49] (step=0045382) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 8.879279984347486, LR: 0.0003 +[2026-03-03 12:42:57] (step=0045383) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.8794756407748, LR: 0.0003 +[2026-03-03 12:43:05] (step=0045384) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.879671297202114, LR: 0.0003 +[2026-03-03 12:43:13] (step=0045385) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.879866953629426, LR: 0.0003 +[2026-03-03 12:43:21] (step=0045386) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.88006261005674, LR: 0.0003 +[2026-03-03 12:43:29] (step=0045387) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 8.880258266484054, LR: 0.0003 +[2026-03-03 12:43:36] (step=0045388) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.880453922911368, LR: 0.0003 +[2026-03-03 12:43:44] (step=0045389) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 8.880649579338682, LR: 0.0003 +[2026-03-03 12:43:52] (step=0045390) Train Loss: 0.4504, Train Steps/Sec: 0.12, Epoch: 8.880845235765994, LR: 0.0003 +[2026-03-03 12:44:00] (step=0045391) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.881040892193308, LR: 0.0003 +[2026-03-03 12:44:08] (step=0045392) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.881236548620622, LR: 0.0003 +[2026-03-03 12:44:16] (step=0045393) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.881432205047936, LR: 0.0003 +[2026-03-03 12:44:24] (step=0045394) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 8.88162786147525, LR: 0.0003 +[2026-03-03 12:44:32] (step=0045395) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.881823517902562, LR: 0.0003 +[2026-03-03 12:44:40] (step=0045396) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 8.882019174329876, LR: 0.0003 +[2026-03-03 12:44:48] (step=0045397) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 8.88221483075719, LR: 0.0003 +[2026-03-03 12:44:55] (step=0045398) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 8.882410487184504, LR: 0.0003 +[2026-03-03 12:45:03] (step=0045399) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 8.882606143611818, LR: 0.0003 +[2026-03-03 12:45:11] (step=0045400) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.88280180003913, LR: 0.0003 +[2026-03-03 12:45:19] (step=0045401) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.882997456466445, LR: 0.0003 +[2026-03-03 12:45:27] (step=0045402) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.883193112893759, LR: 0.0003 +[2026-03-03 12:45:35] (step=0045403) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 8.883388769321073, LR: 0.0003 +[2026-03-03 12:45:43] (step=0045404) Train Loss: 0.4234, Train Steps/Sec: 0.13, Epoch: 8.883584425748387, LR: 0.0003 +[2026-03-03 12:45:51] (step=0045405) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.883780082175699, LR: 0.0003 +[2026-03-03 12:45:58] (step=0045406) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.883975738603013, LR: 0.0003 +[2026-03-03 12:46:06] (step=0045407) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 8.884171395030327, LR: 0.0003 +[2026-03-03 12:46:14] (step=0045408) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.884367051457641, LR: 0.0003 +[2026-03-03 12:46:22] (step=0045409) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.884562707884953, LR: 0.0003 +[2026-03-03 12:46:30] (step=0045410) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.884758364312267, LR: 0.0003 +[2026-03-03 12:46:38] (step=0045411) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.884954020739581, LR: 0.0003 +[2026-03-03 12:46:46] (step=0045412) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.885149677166895, LR: 0.0003 +[2026-03-03 12:46:54] (step=0045413) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.88534533359421, LR: 0.0003 +[2026-03-03 12:47:01] (step=0045414) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 8.885540990021521, LR: 0.0003 +[2026-03-03 12:47:09] (step=0045415) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.885736646448835, LR: 0.0003 +[2026-03-03 12:47:17] (step=0045416) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.88593230287615, LR: 0.0003 +[2026-03-03 12:47:25] (step=0045417) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 8.886127959303463, LR: 0.0003 +[2026-03-03 12:47:33] (step=0045418) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 8.886323615730777, LR: 0.0003 +[2026-03-03 12:47:41] (step=0045419) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.88651927215809, LR: 0.0003 +[2026-03-03 12:47:49] (step=0045420) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.886714928585404, LR: 0.0003 +[2026-03-03 12:47:56] (step=0045421) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 8.886910585012718, LR: 0.0003 +[2026-03-03 12:48:04] (step=0045422) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 8.887106241440032, LR: 0.0003 +[2026-03-03 12:48:12] (step=0045423) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.887301897867346, LR: 0.0003 +[2026-03-03 12:48:20] (step=0045424) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 8.887497554294658, LR: 0.0003 +[2026-03-03 12:48:28] (step=0045425) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.887693210721972, LR: 0.0003 +[2026-03-03 12:48:36] (step=0045426) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.887888867149286, LR: 0.0003 +[2026-03-03 12:48:44] (step=0045427) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 8.8880845235766, LR: 0.0003 +[2026-03-03 12:48:52] (step=0045428) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.888280180003914, LR: 0.0003 +[2026-03-03 12:48:59] (step=0045429) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.888475836431226, LR: 0.0003 +[2026-03-03 12:49:07] (step=0045430) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.88867149285854, LR: 0.0003 +[2026-03-03 12:49:15] (step=0045431) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 8.888867149285854, LR: 0.0003 +[2026-03-03 12:49:23] (step=0045432) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.889062805713168, LR: 0.0003 +[2026-03-03 12:49:31] (step=0045433) Train Loss: 0.4244, Train Steps/Sec: 0.13, Epoch: 8.88925846214048, LR: 0.0003 +[2026-03-03 12:49:39] (step=0045434) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 8.889454118567794, LR: 0.0003 +[2026-03-03 12:49:47] (step=0045435) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.889649774995108, LR: 0.0003 +[2026-03-03 12:49:55] (step=0045436) Train Loss: 0.4427, Train Steps/Sec: 0.12, Epoch: 8.889845431422422, LR: 0.0003 +[2026-03-03 12:50:03] (step=0045437) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.890041087849736, LR: 0.0003 +[2026-03-03 12:50:10] (step=0045438) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 8.890236744277049, LR: 0.0003 +[2026-03-03 12:50:18] (step=0045439) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.890432400704363, LR: 0.0003 +[2026-03-03 12:50:26] (step=0045440) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.890628057131677, LR: 0.0003 +[2026-03-03 12:50:34] (step=0045441) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.89082371355899, LR: 0.0003 +[2026-03-03 12:50:42] (step=0045442) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 8.891019369986305, LR: 0.0003 +[2026-03-03 12:50:50] (step=0045443) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 8.891215026413617, LR: 0.0003 +[2026-03-03 12:50:58] (step=0045444) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.891410682840931, LR: 0.0003 +[2026-03-03 12:51:05] (step=0045445) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 8.891606339268245, LR: 0.0003 +[2026-03-03 12:51:13] (step=0045446) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 8.891801995695559, LR: 0.0003 +[2026-03-03 12:51:21] (step=0045447) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.891997652122873, LR: 0.0003 +[2026-03-03 12:51:29] (step=0045448) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 8.892193308550185, LR: 0.0003 +[2026-03-03 12:51:37] (step=0045449) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.8923889649775, LR: 0.0003 +[2026-03-03 12:51:45] (step=0045450) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.892584621404813, LR: 0.0003 +[2026-03-03 12:51:53] (step=0045451) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.892780277832127, LR: 0.0003 +[2026-03-03 12:52:01] (step=0045452) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.892975934259441, LR: 0.0003 +[2026-03-03 12:52:09] (step=0045453) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.893171590686753, LR: 0.0003 +[2026-03-03 12:52:16] (step=0045454) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.893367247114067, LR: 0.0003 +[2026-03-03 12:52:24] (step=0045455) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 8.893562903541381, LR: 0.0003 +[2026-03-03 12:52:32] (step=0045456) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.893758559968695, LR: 0.0003 +[2026-03-03 12:52:40] (step=0045457) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.89395421639601, LR: 0.0003 +[2026-03-03 12:52:48] (step=0045458) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 8.894149872823322, LR: 0.0003 +[2026-03-03 12:52:56] (step=0045459) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 8.894345529250636, LR: 0.0003 +[2026-03-03 12:53:04] (step=0045460) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.89454118567795, LR: 0.0003 +[2026-03-03 12:53:12] (step=0045461) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.894736842105264, LR: 0.0003 +[2026-03-03 12:53:19] (step=0045462) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 8.894932498532576, LR: 0.0003 +[2026-03-03 12:53:27] (step=0045463) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 8.89512815495989, LR: 0.0003 +[2026-03-03 12:53:35] (step=0045464) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.895323811387204, LR: 0.0003 +[2026-03-03 12:53:43] (step=0045465) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.895519467814518, LR: 0.0003 +[2026-03-03 12:53:51] (step=0045466) Train Loss: 0.4251, Train Steps/Sec: 0.13, Epoch: 8.895715124241832, LR: 0.0003 +[2026-03-03 12:53:59] (step=0045467) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.895910780669144, LR: 0.0003 +[2026-03-03 12:54:07] (step=0045468) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.896106437096458, LR: 0.0003 +[2026-03-03 12:54:14] (step=0045469) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.896302093523772, LR: 0.0003 +[2026-03-03 12:54:22] (step=0045470) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 8.896497749951086, LR: 0.0003 +[2026-03-03 12:54:30] (step=0045471) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.8966934063784, LR: 0.0003 +[2026-03-03 12:54:38] (step=0045472) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.896889062805712, LR: 0.0003 +[2026-03-03 12:54:46] (step=0045473) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.897084719233026, LR: 0.0003 +[2026-03-03 12:54:54] (step=0045474) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 8.89728037566034, LR: 0.0003 +[2026-03-03 12:55:02] (step=0045475) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.897476032087654, LR: 0.0003 +[2026-03-03 12:55:10] (step=0045476) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.897671688514968, LR: 0.0003 +[2026-03-03 12:55:17] (step=0045477) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 8.89786734494228, LR: 0.0003 +[2026-03-03 12:55:25] (step=0045478) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.898063001369595, LR: 0.0003 +[2026-03-03 12:55:33] (step=0045479) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.898258657796909, LR: 0.0003 +[2026-03-03 12:55:41] (step=0045480) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.898454314224223, LR: 0.0003 +[2026-03-03 12:55:49] (step=0045481) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.898649970651537, LR: 0.0003 +[2026-03-03 12:55:57] (step=0045482) Train Loss: 0.4421, Train Steps/Sec: 0.12, Epoch: 8.898845627078849, LR: 0.0003 +[2026-03-03 12:56:05] (step=0045483) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 8.899041283506163, LR: 0.0003 +[2026-03-03 12:56:13] (step=0045484) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 8.899236939933477, LR: 0.0003 +[2026-03-03 12:56:21] (step=0045485) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.899432596360791, LR: 0.0003 +[2026-03-03 12:56:28] (step=0045486) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.899628252788103, LR: 0.0003 +[2026-03-03 12:56:36] (step=0045487) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.899823909215417, LR: 0.0003 +[2026-03-03 12:56:44] (step=0045488) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.900019565642731, LR: 0.0003 +[2026-03-03 12:56:52] (step=0045489) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.900215222070045, LR: 0.0003 +[2026-03-03 12:57:00] (step=0045490) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.90041087849736, LR: 0.0003 +[2026-03-03 12:57:08] (step=0045491) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.900606534924671, LR: 0.0003 +[2026-03-03 12:57:16] (step=0045492) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.900802191351985, LR: 0.0003 +[2026-03-03 12:57:24] (step=0045493) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.9009978477793, LR: 0.0003 +[2026-03-03 12:57:31] (step=0045494) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.901193504206613, LR: 0.0003 +[2026-03-03 12:57:39] (step=0045495) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.901389160633927, LR: 0.0003 +[2026-03-03 12:57:47] (step=0045496) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.90158481706124, LR: 0.0003 +[2026-03-03 12:57:55] (step=0045497) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 8.901780473488554, LR: 0.0003 +[2026-03-03 12:58:03] (step=0045498) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 8.901976129915868, LR: 0.0003 +[2026-03-03 12:58:11] (step=0045499) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.902171786343182, LR: 0.0003 +[2026-03-03 12:58:19] (step=0045500) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.902367442770496, LR: 0.0003 +[2026-03-03 12:58:19] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0045500/ +[2026-03-03 12:58:26] (step=0045501) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.902563099197808, LR: 0.0003 +[2026-03-03 12:58:35] (step=0045502) Train Loss: 0.4345, Train Steps/Sec: 0.12, Epoch: 8.902758755625122, LR: 0.0003 +[2026-03-03 12:58:42] (step=0045503) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 8.902954412052436, LR: 0.0003 +[2026-03-03 12:58:50] (step=0045504) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 8.90315006847975, LR: 0.0003 +[2026-03-03 12:58:58] (step=0045505) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.903345724907064, LR: 0.0003 +[2026-03-03 12:59:06] (step=0045506) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 8.903541381334376, LR: 0.0003 +[2026-03-03 12:59:14] (step=0045507) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.90373703776169, LR: 0.0003 +[2026-03-03 12:59:22] (step=0045508) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.903932694189004, LR: 0.0003 +[2026-03-03 12:59:30] (step=0045509) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.904128350616318, LR: 0.0003 +[2026-03-03 12:59:38] (step=0045510) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.904324007043632, LR: 0.0003 +[2026-03-03 12:59:45] (step=0045511) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.904519663470944, LR: 0.0003 +[2026-03-03 12:59:53] (step=0045512) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 8.904715319898258, LR: 0.0003 +[2026-03-03 13:00:01] (step=0045513) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.904910976325572, LR: 0.0003 +[2026-03-03 13:00:09] (step=0045514) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 8.905106632752886, LR: 0.0003 +[2026-03-03 13:00:17] (step=0045515) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 8.905302289180199, LR: 0.0003 +[2026-03-03 13:00:25] (step=0045516) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.905497945607513, LR: 0.0003 +[2026-03-03 13:00:33] (step=0045517) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.905693602034827, LR: 0.0003 +[2026-03-03 13:00:40] (step=0045518) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 8.90588925846214, LR: 0.0003 +[2026-03-03 13:00:48] (step=0045519) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.906084914889455, LR: 0.0003 +[2026-03-03 13:00:56] (step=0045520) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 8.906280571316767, LR: 0.0003 +[2026-03-03 13:01:04] (step=0045521) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.906476227744081, LR: 0.0003 +[2026-03-03 13:01:12] (step=0045522) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 8.906671884171395, LR: 0.0003 +[2026-03-03 13:01:20] (step=0045523) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.906867540598709, LR: 0.0003 +[2026-03-03 13:01:28] (step=0045524) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 8.907063197026023, LR: 0.0003 +[2026-03-03 13:01:36] (step=0045525) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 8.907258853453335, LR: 0.0003 +[2026-03-03 13:01:43] (step=0045526) Train Loss: 0.4269, Train Steps/Sec: 0.13, Epoch: 8.90745450988065, LR: 0.0003 +[2026-03-03 13:01:51] (step=0045527) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.907650166307963, LR: 0.0003 +[2026-03-03 13:01:59] (step=0045528) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 8.907845822735277, LR: 0.0003 +[2026-03-03 13:02:07] (step=0045529) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 8.908041479162591, LR: 0.0003 +[2026-03-03 13:02:15] (step=0045530) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.908237135589903, LR: 0.0003 +[2026-03-03 13:02:23] (step=0045531) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 8.908432792017217, LR: 0.0003 +[2026-03-03 13:02:31] (step=0045532) Train Loss: 0.4451, Train Steps/Sec: 0.12, Epoch: 8.908628448444532, LR: 0.0003 +[2026-03-03 13:02:39] (step=0045533) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.908824104871846, LR: 0.0003 +[2026-03-03 13:02:46] (step=0045534) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.90901976129916, LR: 0.0003 +[2026-03-03 13:02:54] (step=0045535) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 8.909215417726472, LR: 0.0003 +[2026-03-03 13:03:02] (step=0045536) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.909411074153786, LR: 0.0003 +[2026-03-03 13:03:10] (step=0045537) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 8.9096067305811, LR: 0.0003 +[2026-03-03 13:03:18] (step=0045538) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 8.909802387008414, LR: 0.0003 +[2026-03-03 13:03:26] (step=0045539) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.909998043435726, LR: 0.0003 +[2026-03-03 13:03:34] (step=0045540) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 8.91019369986304, LR: 0.0003 +[2026-03-03 13:03:42] (step=0045541) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 8.910389356290354, LR: 0.0003 +[2026-03-03 13:03:49] (step=0045542) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 8.910585012717668, LR: 0.0003 +[2026-03-03 13:03:57] (step=0045543) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.910780669144982, LR: 0.0003 +[2026-03-03 13:04:05] (step=0045544) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.910976325572294, LR: 0.0003 +[2026-03-03 13:04:13] (step=0045545) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.911171981999608, LR: 0.0003 +[2026-03-03 13:04:21] (step=0045546) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.911367638426922, LR: 0.0003 +[2026-03-03 13:04:29] (step=0045547) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.911563294854236, LR: 0.0003 +[2026-03-03 13:04:37] (step=0045548) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.91175895128155, LR: 0.0003 +[2026-03-03 13:04:44] (step=0045549) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 8.911954607708862, LR: 0.0003 +[2026-03-03 13:04:52] (step=0045550) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.912150264136177, LR: 0.0003 +[2026-03-03 13:05:00] (step=0045551) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.91234592056349, LR: 0.0003 +[2026-03-03 13:05:08] (step=0045552) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.912541576990805, LR: 0.0003 +[2026-03-03 13:05:16] (step=0045553) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.912737233418119, LR: 0.0003 +[2026-03-03 13:05:24] (step=0045554) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.91293288984543, LR: 0.0003 +[2026-03-03 13:05:32] (step=0045555) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.913128546272745, LR: 0.0003 +[2026-03-03 13:05:40] (step=0045556) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.913324202700059, LR: 0.0003 +[2026-03-03 13:05:47] (step=0045557) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 8.913519859127373, LR: 0.0003 +[2026-03-03 13:05:55] (step=0045558) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 8.913715515554687, LR: 0.0003 +[2026-03-03 13:06:03] (step=0045559) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.913911171981999, LR: 0.0003 +[2026-03-03 13:06:11] (step=0045560) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.914106828409313, LR: 0.0003 +[2026-03-03 13:06:19] (step=0045561) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 8.914302484836627, LR: 0.0003 +[2026-03-03 13:06:27] (step=0045562) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 8.914498141263941, LR: 0.0003 +[2026-03-03 13:06:35] (step=0045563) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 8.914693797691255, LR: 0.0003 +[2026-03-03 13:06:43] (step=0045564) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.914889454118567, LR: 0.0003 +[2026-03-03 13:06:50] (step=0045565) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 8.915085110545881, LR: 0.0003 +[2026-03-03 13:06:58] (step=0045566) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 8.915280766973195, LR: 0.0003 +[2026-03-03 13:07:06] (step=0045567) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 8.91547642340051, LR: 0.0003 +[2026-03-03 13:07:14] (step=0045568) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 8.915672079827822, LR: 0.0003 +[2026-03-03 13:07:22] (step=0045569) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.915867736255136, LR: 0.0003 +[2026-03-03 13:07:30] (step=0045570) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.91606339268245, LR: 0.0003 +[2026-03-03 13:07:38] (step=0045571) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 8.916259049109764, LR: 0.0003 +[2026-03-03 13:07:46] (step=0045572) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 8.916454705537078, LR: 0.0003 +[2026-03-03 13:07:53] (step=0045573) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.91665036196439, LR: 0.0003 +[2026-03-03 13:08:01] (step=0045574) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.916846018391704, LR: 0.0003 +[2026-03-03 13:08:09] (step=0045575) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.917041674819018, LR: 0.0003 +[2026-03-03 13:08:17] (step=0045576) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.917237331246332, LR: 0.0003 +[2026-03-03 13:08:25] (step=0045577) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 8.917432987673646, LR: 0.0003 +[2026-03-03 13:08:33] (step=0045578) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.917628644100958, LR: 0.0003 +[2026-03-03 13:08:41] (step=0045579) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 8.917824300528272, LR: 0.0003 +[2026-03-03 13:08:48] (step=0045580) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.918019956955586, LR: 0.0003 +[2026-03-03 13:08:56] (step=0045581) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 8.9182156133829, LR: 0.0003 +[2026-03-03 13:09:04] (step=0045582) Train Loss: 0.4527, Train Steps/Sec: 0.12, Epoch: 8.918411269810214, LR: 0.0003 +[2026-03-03 13:09:12] (step=0045583) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 8.918606926237526, LR: 0.0003 +[2026-03-03 13:09:20] (step=0045584) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 8.91880258266484, LR: 0.0003 +[2026-03-03 13:09:28] (step=0045585) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.918998239092154, LR: 0.0003 +[2026-03-03 13:09:36] (step=0045586) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 8.919193895519468, LR: 0.0003 +[2026-03-03 13:09:44] (step=0045587) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.919389551946782, LR: 0.0003 +[2026-03-03 13:09:52] (step=0045588) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.919585208374095, LR: 0.0003 +[2026-03-03 13:09:59] (step=0045589) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 8.919780864801409, LR: 0.0003 +[2026-03-03 13:10:07] (step=0045590) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.919976521228723, LR: 0.0003 +[2026-03-03 13:10:15] (step=0045591) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.920172177656037, LR: 0.0003 +[2026-03-03 13:10:23] (step=0045592) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.920367834083349, LR: 0.0003 +[2026-03-03 13:10:31] (step=0045593) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.920563490510663, LR: 0.0003 +[2026-03-03 13:10:39] (step=0045594) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.920759146937977, LR: 0.0003 +[2026-03-03 13:10:47] (step=0045595) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 8.92095480336529, LR: 0.0003 +[2026-03-03 13:10:55] (step=0045596) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.921150459792605, LR: 0.0003 +[2026-03-03 13:11:02] (step=0045597) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.921346116219917, LR: 0.0003 +[2026-03-03 13:11:10] (step=0045598) Train Loss: 0.4377, Train Steps/Sec: 0.12, Epoch: 8.921541772647231, LR: 0.0003 +[2026-03-03 13:11:18] (step=0045599) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.921737429074545, LR: 0.0003 +[2026-03-03 13:11:26] (step=0045600) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.921933085501859, LR: 0.0003 +[2026-03-03 13:11:34] (step=0045601) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.922128741929173, LR: 0.0003 +[2026-03-03 13:11:42] (step=0045602) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.922324398356485, LR: 0.0003 +[2026-03-03 13:11:50] (step=0045603) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 8.9225200547838, LR: 0.0003 +[2026-03-03 13:11:58] (step=0045604) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.922715711211113, LR: 0.0003 +[2026-03-03 13:12:05] (step=0045605) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 8.922911367638427, LR: 0.0003 +[2026-03-03 13:12:13] (step=0045606) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.923107024065741, LR: 0.0003 +[2026-03-03 13:12:21] (step=0045607) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.923302680493054, LR: 0.0003 +[2026-03-03 13:12:29] (step=0045608) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 8.923498336920368, LR: 0.0003 +[2026-03-03 13:12:37] (step=0045609) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 8.923693993347682, LR: 0.0003 +[2026-03-03 13:12:45] (step=0045610) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 8.923889649774996, LR: 0.0003 +[2026-03-03 13:12:53] (step=0045611) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.92408530620231, LR: 0.0003 +[2026-03-03 13:13:01] (step=0045612) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.924280962629622, LR: 0.0003 +[2026-03-03 13:13:08] (step=0045613) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.924476619056936, LR: 0.0003 +[2026-03-03 13:13:16] (step=0045614) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.92467227548425, LR: 0.0003 +[2026-03-03 13:13:24] (step=0045615) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.924867931911564, LR: 0.0003 +[2026-03-03 13:13:32] (step=0045616) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.925063588338878, LR: 0.0003 +[2026-03-03 13:13:40] (step=0045617) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.92525924476619, LR: 0.0003 +[2026-03-03 13:13:48] (step=0045618) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.925454901193504, LR: 0.0003 +[2026-03-03 13:13:56] (step=0045619) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 8.925650557620818, LR: 0.0003 +[2026-03-03 13:14:04] (step=0045620) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.925846214048132, LR: 0.0003 +[2026-03-03 13:14:11] (step=0045621) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.926041870475444, LR: 0.0003 +[2026-03-03 13:14:19] (step=0045622) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 8.926237526902758, LR: 0.0003 +[2026-03-03 13:14:27] (step=0045623) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.926433183330072, LR: 0.0003 +[2026-03-03 13:14:35] (step=0045624) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.926628839757386, LR: 0.0003 +[2026-03-03 13:14:43] (step=0045625) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.9268244961847, LR: 0.0003 +[2026-03-03 13:14:51] (step=0045626) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 8.927020152612013, LR: 0.0003 +[2026-03-03 13:14:59] (step=0045627) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 8.927215809039327, LR: 0.0003 +[2026-03-03 13:15:06] (step=0045628) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 8.92741146546664, LR: 0.0003 +[2026-03-03 13:15:14] (step=0045629) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.927607121893955, LR: 0.0003 +[2026-03-03 13:15:22] (step=0045630) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.927802778321269, LR: 0.0003 +[2026-03-03 13:15:30] (step=0045631) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 8.92799843474858, LR: 0.0003 +[2026-03-03 13:15:38] (step=0045632) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.928194091175895, LR: 0.0003 +[2026-03-03 13:15:46] (step=0045633) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 8.928389747603209, LR: 0.0003 +[2026-03-03 13:15:54] (step=0045634) Train Loss: 0.4509, Train Steps/Sec: 0.12, Epoch: 8.928585404030523, LR: 0.0003 +[2026-03-03 13:16:02] (step=0045635) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.928781060457837, LR: 0.0003 +[2026-03-03 13:16:10] (step=0045636) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 8.928976716885149, LR: 0.0003 +[2026-03-03 13:16:17] (step=0045637) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 8.929172373312463, LR: 0.0003 +[2026-03-03 13:16:25] (step=0045638) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.929368029739777, LR: 0.0003 +[2026-03-03 13:16:33] (step=0045639) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.929563686167091, LR: 0.0003 +[2026-03-03 13:16:41] (step=0045640) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.929759342594405, LR: 0.0003 +[2026-03-03 13:16:49] (step=0045641) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.929954999021717, LR: 0.0003 +[2026-03-03 13:16:57] (step=0045642) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.930150655449031, LR: 0.0003 +[2026-03-03 13:17:05] (step=0045643) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.930346311876345, LR: 0.0003 +[2026-03-03 13:17:13] (step=0045644) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.93054196830366, LR: 0.0003 +[2026-03-03 13:17:20] (step=0045645) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 8.930737624730972, LR: 0.0003 +[2026-03-03 13:17:28] (step=0045646) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 8.930933281158286, LR: 0.0003 +[2026-03-03 13:17:36] (step=0045647) Train Loss: 0.4500, Train Steps/Sec: 0.12, Epoch: 8.9311289375856, LR: 0.0003 +[2026-03-03 13:17:44] (step=0045648) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.931324594012914, LR: 0.0003 +[2026-03-03 13:17:52] (step=0045649) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.931520250440228, LR: 0.0003 +[2026-03-03 13:18:00] (step=0045650) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.93171590686754, LR: 0.0003 +[2026-03-03 13:18:08] (step=0045651) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.931911563294854, LR: 0.0003 +[2026-03-03 13:18:16] (step=0045652) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.932107219722168, LR: 0.0003 +[2026-03-03 13:18:24] (step=0045653) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.932302876149482, LR: 0.0003 +[2026-03-03 13:18:31] (step=0045654) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 8.932498532576796, LR: 0.0003 +[2026-03-03 13:18:39] (step=0045655) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 8.932694189004108, LR: 0.0003 +[2026-03-03 13:18:47] (step=0045656) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 8.932889845431422, LR: 0.0003 +[2026-03-03 13:18:55] (step=0045657) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.933085501858736, LR: 0.0003 +[2026-03-03 13:19:03] (step=0045658) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.93328115828605, LR: 0.0003 +[2026-03-03 13:19:11] (step=0045659) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.933476814713364, LR: 0.0003 +[2026-03-03 13:19:19] (step=0045660) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 8.933672471140676, LR: 0.0003 +[2026-03-03 13:19:27] (step=0045661) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.93386812756799, LR: 0.0003 +[2026-03-03 13:19:34] (step=0045662) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.934063783995304, LR: 0.0003 +[2026-03-03 13:19:42] (step=0045663) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.934259440422618, LR: 0.0003 +[2026-03-03 13:19:50] (step=0045664) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.934455096849932, LR: 0.0003 +[2026-03-03 13:19:58] (step=0045665) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.934650753277245, LR: 0.0003 +[2026-03-03 13:20:06] (step=0045666) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 8.934846409704559, LR: 0.0003 +[2026-03-03 13:20:14] (step=0045667) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.935042066131873, LR: 0.0003 +[2026-03-03 13:20:22] (step=0045668) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 8.935237722559187, LR: 0.0003 +[2026-03-03 13:20:29] (step=0045669) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.9354333789865, LR: 0.0003 +[2026-03-03 13:20:37] (step=0045670) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 8.935629035413813, LR: 0.0003 +[2026-03-03 13:20:45] (step=0045671) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.935824691841127, LR: 0.0003 +[2026-03-03 13:20:53] (step=0045672) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.93602034826844, LR: 0.0003 +[2026-03-03 13:21:01] (step=0045673) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.936216004695755, LR: 0.0003 +[2026-03-03 13:21:09] (step=0045674) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.936411661123067, LR: 0.0003 +[2026-03-03 13:21:17] (step=0045675) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.936607317550381, LR: 0.0003 +[2026-03-03 13:21:25] (step=0045676) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 8.936802973977695, LR: 0.0003 +[2026-03-03 13:21:32] (step=0045677) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.936998630405009, LR: 0.0003 +[2026-03-03 13:21:40] (step=0045678) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.937194286832323, LR: 0.0003 +[2026-03-03 13:21:48] (step=0045679) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.937389943259635, LR: 0.0003 +[2026-03-03 13:21:56] (step=0045680) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.93758559968695, LR: 0.0003 +[2026-03-03 13:22:04] (step=0045681) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 8.937781256114263, LR: 0.0003 +[2026-03-03 13:22:12] (step=0045682) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 8.937976912541577, LR: 0.0003 +[2026-03-03 13:22:20] (step=0045683) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.938172568968891, LR: 0.0003 +[2026-03-03 13:22:27] (step=0045684) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 8.938368225396204, LR: 0.0003 +[2026-03-03 13:22:36] (step=0045685) Train Loss: 0.4418, Train Steps/Sec: 0.12, Epoch: 8.938563881823518, LR: 0.0003 +[2026-03-03 13:22:43] (step=0045686) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.938759538250832, LR: 0.0003 +[2026-03-03 13:22:51] (step=0045687) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.938955194678146, LR: 0.0003 +[2026-03-03 13:22:59] (step=0045688) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.93915085110546, LR: 0.0003 +[2026-03-03 13:23:07] (step=0045689) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.939346507532772, LR: 0.0003 +[2026-03-03 13:23:15] (step=0045690) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 8.939542163960086, LR: 0.0003 +[2026-03-03 13:23:23] (step=0045691) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.9397378203874, LR: 0.0003 +[2026-03-03 13:23:31] (step=0045692) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 8.939933476814714, LR: 0.0003 +[2026-03-03 13:23:39] (step=0045693) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.940129133242028, LR: 0.0003 +[2026-03-03 13:23:46] (step=0045694) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.94032478966934, LR: 0.0003 +[2026-03-03 13:23:54] (step=0045695) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 8.940520446096654, LR: 0.0003 +[2026-03-03 13:24:02] (step=0045696) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 8.940716102523968, LR: 0.0003 +[2026-03-03 13:24:10] (step=0045697) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.940911758951282, LR: 0.0003 +[2026-03-03 13:24:18] (step=0045698) Train Loss: 0.4331, Train Steps/Sec: 0.12, Epoch: 8.941107415378594, LR: 0.0003 +[2026-03-03 13:24:26] (step=0045699) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 8.941303071805908, LR: 0.0003 +[2026-03-03 13:24:34] (step=0045700) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.941498728233222, LR: 0.0003 +[2026-03-03 13:24:42] (step=0045701) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 8.941694384660536, LR: 0.0003 +[2026-03-03 13:24:50] (step=0045702) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 8.94189004108785, LR: 0.0003 +[2026-03-03 13:24:57] (step=0045703) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 8.942085697515163, LR: 0.0003 +[2026-03-03 13:25:05] (step=0045704) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 8.942281353942477, LR: 0.0003 +[2026-03-03 13:25:13] (step=0045705) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.94247701036979, LR: 0.0003 +[2026-03-03 13:25:21] (step=0045706) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 8.942672666797105, LR: 0.0003 +[2026-03-03 13:25:29] (step=0045707) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.942868323224419, LR: 0.0003 +[2026-03-03 13:25:37] (step=0045708) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.94306397965173, LR: 0.0003 +[2026-03-03 13:25:45] (step=0045709) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.943259636079045, LR: 0.0003 +[2026-03-03 13:25:53] (step=0045710) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.943455292506359, LR: 0.0003 +[2026-03-03 13:26:00] (step=0045711) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 8.943650948933673, LR: 0.0003 +[2026-03-03 13:26:08] (step=0045712) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.943846605360987, LR: 0.0003 +[2026-03-03 13:26:16] (step=0045713) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.944042261788299, LR: 0.0003 +[2026-03-03 13:26:24] (step=0045714) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 8.944237918215613, LR: 0.0003 +[2026-03-03 13:26:32] (step=0045715) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 8.944433574642927, LR: 0.0003 +[2026-03-03 13:26:40] (step=0045716) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 8.944629231070241, LR: 0.0003 +[2026-03-03 13:26:48] (step=0045717) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 8.944824887497555, LR: 0.0003 +[2026-03-03 13:26:56] (step=0045718) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.945020543924867, LR: 0.0003 +[2026-03-03 13:27:03] (step=0045719) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.945216200352181, LR: 0.0003 +[2026-03-03 13:27:11] (step=0045720) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.945411856779495, LR: 0.0003 +[2026-03-03 13:27:19] (step=0045721) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.94560751320681, LR: 0.0003 +[2026-03-03 13:27:27] (step=0045722) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 8.945803169634123, LR: 0.0003 +[2026-03-03 13:27:35] (step=0045723) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 8.945998826061436, LR: 0.0003 +[2026-03-03 13:27:43] (step=0045724) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.94619448248875, LR: 0.0003 +[2026-03-03 13:27:51] (step=0045725) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 8.946390138916064, LR: 0.0003 +[2026-03-03 13:27:58] (step=0045726) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 8.946585795343378, LR: 0.0003 +[2026-03-03 13:28:06] (step=0045727) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.94678145177069, LR: 0.0003 +[2026-03-03 13:28:14] (step=0045728) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.946977108198004, LR: 0.0003 +[2026-03-03 13:28:22] (step=0045729) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 8.947172764625318, LR: 0.0003 +[2026-03-03 13:28:30] (step=0045730) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.947368421052632, LR: 0.0003 +[2026-03-03 13:28:38] (step=0045731) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.947564077479946, LR: 0.0003 +[2026-03-03 13:28:46] (step=0045732) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 8.947759733907258, LR: 0.0003 +[2026-03-03 13:28:54] (step=0045733) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.947955390334572, LR: 0.0003 +[2026-03-03 13:29:01] (step=0045734) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.948151046761886, LR: 0.0003 +[2026-03-03 13:29:09] (step=0045735) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.9483467031892, LR: 0.0003 +[2026-03-03 13:29:17] (step=0045736) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.948542359616514, LR: 0.0003 +[2026-03-03 13:29:25] (step=0045737) Train Loss: 0.4436, Train Steps/Sec: 0.12, Epoch: 8.948738016043826, LR: 0.0003 +[2026-03-03 13:29:33] (step=0045738) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 8.94893367247114, LR: 0.0003 +[2026-03-03 13:29:41] (step=0045739) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 8.949129328898454, LR: 0.0003 +[2026-03-03 13:29:49] (step=0045740) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.949324985325768, LR: 0.0003 +[2026-03-03 13:29:57] (step=0045741) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.949520641753082, LR: 0.0003 +[2026-03-03 13:30:05] (step=0045742) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.949716298180395, LR: 0.0003 +[2026-03-03 13:30:12] (step=0045743) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.949911954607709, LR: 0.0003 +[2026-03-03 13:30:20] (step=0045744) Train Loss: 0.4565, Train Steps/Sec: 0.12, Epoch: 8.950107611035023, LR: 0.0003 +[2026-03-03 13:30:28] (step=0045745) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 8.950303267462337, LR: 0.0003 +[2026-03-03 13:30:36] (step=0045746) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.95049892388965, LR: 0.0003 +[2026-03-03 13:30:44] (step=0045747) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 8.950694580316963, LR: 0.0003 +[2026-03-03 13:30:52] (step=0045748) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.950890236744277, LR: 0.0003 +[2026-03-03 13:31:00] (step=0045749) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 8.95108589317159, LR: 0.0003 +[2026-03-03 13:31:08] (step=0045750) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 8.951281549598905, LR: 0.0003 +[2026-03-03 13:31:16] (step=0045751) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.951477206026217, LR: 0.0003 +[2026-03-03 13:31:23] (step=0045752) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.951672862453531, LR: 0.0003 +[2026-03-03 13:31:31] (step=0045753) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.951868518880845, LR: 0.0003 +[2026-03-03 13:31:39] (step=0045754) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 8.95206417530816, LR: 0.0003 +[2026-03-03 13:31:47] (step=0045755) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 8.952259831735473, LR: 0.0003 +[2026-03-03 13:31:55] (step=0045756) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 8.952455488162785, LR: 0.0003 +[2026-03-03 13:32:03] (step=0045757) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 8.9526511445901, LR: 0.0003 +[2026-03-03 13:32:11] (step=0045758) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.952846801017413, LR: 0.0003 +[2026-03-03 13:32:19] (step=0045759) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.953042457444727, LR: 0.0003 +[2026-03-03 13:32:26] (step=0045760) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 8.953238113872041, LR: 0.0003 +[2026-03-03 13:32:34] (step=0045761) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.953433770299354, LR: 0.0003 +[2026-03-03 13:32:42] (step=0045762) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 8.953629426726668, LR: 0.0003 +[2026-03-03 13:32:50] (step=0045763) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.953825083153982, LR: 0.0003 +[2026-03-03 13:32:58] (step=0045764) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.954020739581296, LR: 0.0003 +[2026-03-03 13:33:06] (step=0045765) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 8.95421639600861, LR: 0.0003 +[2026-03-03 13:33:14] (step=0045766) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 8.954412052435922, LR: 0.0003 +[2026-03-03 13:33:22] (step=0045767) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.954607708863236, LR: 0.0003 +[2026-03-03 13:33:29] (step=0045768) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.95480336529055, LR: 0.0003 +[2026-03-03 13:33:37] (step=0045769) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 8.954999021717864, LR: 0.0003 +[2026-03-03 13:33:45] (step=0045770) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 8.955194678145178, LR: 0.0003 +[2026-03-03 13:33:53] (step=0045771) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.95539033457249, LR: 0.0003 +[2026-03-03 13:34:01] (step=0045772) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.955585990999804, LR: 0.0003 +[2026-03-03 13:34:09] (step=0045773) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.955781647427118, LR: 0.0003 +[2026-03-03 13:34:17] (step=0045774) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.955977303854432, LR: 0.0003 +[2026-03-03 13:34:25] (step=0045775) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 8.956172960281746, LR: 0.0003 +[2026-03-03 13:34:32] (step=0045776) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 8.956368616709058, LR: 0.0003 +[2026-03-03 13:34:40] (step=0045777) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 8.956564273136372, LR: 0.0003 +[2026-03-03 13:34:48] (step=0045778) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.956759929563686, LR: 0.0003 +[2026-03-03 13:34:56] (step=0045779) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 8.956955585991, LR: 0.0003 +[2026-03-03 13:35:04] (step=0045780) Train Loss: 0.4541, Train Steps/Sec: 0.12, Epoch: 8.957151242418313, LR: 0.0003 +[2026-03-03 13:35:12] (step=0045781) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 8.957346898845627, LR: 0.0003 +[2026-03-03 13:35:20] (step=0045782) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 8.95754255527294, LR: 0.0003 +[2026-03-03 13:35:28] (step=0045783) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.957738211700255, LR: 0.0003 +[2026-03-03 13:35:35] (step=0045784) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.957933868127569, LR: 0.0003 +[2026-03-03 13:35:43] (step=0045785) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.95812952455488, LR: 0.0003 +[2026-03-03 13:35:51] (step=0045786) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.958325180982195, LR: 0.0003 +[2026-03-03 13:35:59] (step=0045787) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 8.958520837409509, LR: 0.0003 +[2026-03-03 13:36:07] (step=0045788) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 8.958716493836823, LR: 0.0003 +[2026-03-03 13:36:15] (step=0045789) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.958912150264137, LR: 0.0003 +[2026-03-03 13:36:23] (step=0045790) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 8.95910780669145, LR: 0.0003 +[2026-03-03 13:36:31] (step=0045791) Train Loss: 0.4393, Train Steps/Sec: 0.12, Epoch: 8.959303463118763, LR: 0.0003 +[2026-03-03 13:36:39] (step=0045792) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 8.959499119546077, LR: 0.0003 +[2026-03-03 13:36:46] (step=0045793) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.959694775973391, LR: 0.0003 +[2026-03-03 13:36:54] (step=0045794) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 8.959890432400705, LR: 0.0003 +[2026-03-03 13:37:02] (step=0045795) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 8.960086088828017, LR: 0.0003 +[2026-03-03 13:37:10] (step=0045796) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 8.960281745255331, LR: 0.0003 +[2026-03-03 13:37:18] (step=0045797) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.960477401682645, LR: 0.0003 +[2026-03-03 13:37:26] (step=0045798) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 8.96067305810996, LR: 0.0003 +[2026-03-03 13:37:34] (step=0045799) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 8.960868714537273, LR: 0.0003 +[2026-03-03 13:37:42] (step=0045800) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.961064370964586, LR: 0.0003 +[2026-03-03 13:37:49] (step=0045801) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.9612600273919, LR: 0.0003 +[2026-03-03 13:37:57] (step=0045802) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.961455683819214, LR: 0.0003 +[2026-03-03 13:38:05] (step=0045803) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 8.961651340246528, LR: 0.0003 +[2026-03-03 13:38:13] (step=0045804) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.96184699667384, LR: 0.0003 +[2026-03-03 13:38:21] (step=0045805) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 8.962042653101154, LR: 0.0003 +[2026-03-03 13:38:29] (step=0045806) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 8.962238309528468, LR: 0.0003 +[2026-03-03 13:38:37] (step=0045807) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 8.962433965955782, LR: 0.0003 +[2026-03-03 13:38:44] (step=0045808) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.962629622383096, LR: 0.0003 +[2026-03-03 13:38:52] (step=0045809) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.962825278810408, LR: 0.0003 +[2026-03-03 13:39:00] (step=0045810) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 8.963020935237722, LR: 0.0003 +[2026-03-03 13:39:08] (step=0045811) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 8.963216591665036, LR: 0.0003 +[2026-03-03 13:39:16] (step=0045812) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 8.96341224809235, LR: 0.0003 +[2026-03-03 13:39:24] (step=0045813) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.963607904519664, LR: 0.0003 +[2026-03-03 13:39:32] (step=0045814) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 8.963803560946976, LR: 0.0003 +[2026-03-03 13:39:40] (step=0045815) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.96399921737429, LR: 0.0003 +[2026-03-03 13:39:47] (step=0045816) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 8.964194873801604, LR: 0.0003 +[2026-03-03 13:39:55] (step=0045817) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 8.964390530228918, LR: 0.0003 +[2026-03-03 13:40:03] (step=0045818) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.964586186656232, LR: 0.0003 +[2026-03-03 13:40:11] (step=0045819) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 8.964781843083545, LR: 0.0003 +[2026-03-03 13:40:19] (step=0045820) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 8.964977499510859, LR: 0.0003 +[2026-03-03 13:40:27] (step=0045821) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 8.965173155938173, LR: 0.0003 +[2026-03-03 13:40:35] (step=0045822) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 8.965368812365487, LR: 0.0003 +[2026-03-03 13:40:43] (step=0045823) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 8.9655644687928, LR: 0.0003 +[2026-03-03 13:40:50] (step=0045824) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.965760125220113, LR: 0.0003 +[2026-03-03 13:40:58] (step=0045825) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 8.965955781647427, LR: 0.0003 +[2026-03-03 13:41:06] (step=0045826) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.966151438074741, LR: 0.0003 +[2026-03-03 13:41:14] (step=0045827) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 8.966347094502055, LR: 0.0003 +[2026-03-03 13:41:22] (step=0045828) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.966542750929367, LR: 0.0003 +[2026-03-03 13:41:30] (step=0045829) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.966738407356681, LR: 0.0003 +[2026-03-03 13:41:38] (step=0045830) Train Loss: 0.4425, Train Steps/Sec: 0.12, Epoch: 8.966934063783995, LR: 0.0003 +[2026-03-03 13:41:46] (step=0045831) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.96712972021131, LR: 0.0003 +[2026-03-03 13:41:54] (step=0045832) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 8.967325376638623, LR: 0.0003 +[2026-03-03 13:42:01] (step=0045833) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 8.967521033065935, LR: 0.0003 +[2026-03-03 13:42:09] (step=0045834) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.96771668949325, LR: 0.0003 +[2026-03-03 13:42:17] (step=0045835) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.967912345920563, LR: 0.0003 +[2026-03-03 13:42:25] (step=0045836) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 8.968108002347877, LR: 0.0003 +[2026-03-03 13:42:33] (step=0045837) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 8.968303658775191, LR: 0.0003 +[2026-03-03 13:42:41] (step=0045838) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 8.968499315202504, LR: 0.0003 +[2026-03-03 13:42:49] (step=0045839) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 8.968694971629818, LR: 0.0003 +[2026-03-03 13:42:57] (step=0045840) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 8.968890628057132, LR: 0.0003 +[2026-03-03 13:43:04] (step=0045841) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 8.969086284484446, LR: 0.0003 +[2026-03-03 13:43:12] (step=0045842) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.96928194091176, LR: 0.0003 +[2026-03-03 13:43:20] (step=0045843) Train Loss: 0.4432, Train Steps/Sec: 0.12, Epoch: 8.969477597339072, LR: 0.0003 +[2026-03-03 13:43:28] (step=0045844) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.969673253766386, LR: 0.0003 +[2026-03-03 13:43:36] (step=0045845) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 8.9698689101937, LR: 0.0003 +[2026-03-03 13:43:44] (step=0045846) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 8.970064566621014, LR: 0.0003 +[2026-03-03 13:43:52] (step=0045847) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 8.970260223048328, LR: 0.0003 +[2026-03-03 13:44:00] (step=0045848) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.97045587947564, LR: 0.0003 +[2026-03-03 13:44:08] (step=0045849) Train Loss: 0.4255, Train Steps/Sec: 0.13, Epoch: 8.970651535902954, LR: 0.0003 +[2026-03-03 13:44:15] (step=0045850) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.970847192330268, LR: 0.0003 +[2026-03-03 13:44:23] (step=0045851) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.971042848757582, LR: 0.0003 +[2026-03-03 13:44:31] (step=0045852) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 8.971238505184896, LR: 0.0003 +[2026-03-03 13:44:39] (step=0045853) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 8.971434161612208, LR: 0.0003 +[2026-03-03 13:44:47] (step=0045854) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.971629818039522, LR: 0.0003 +[2026-03-03 13:44:55] (step=0045855) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.971825474466836, LR: 0.0003 +[2026-03-03 13:45:03] (step=0045856) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 8.97202113089415, LR: 0.0003 +[2026-03-03 13:45:10] (step=0045857) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 8.972216787321463, LR: 0.0003 +[2026-03-03 13:45:18] (step=0045858) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 8.972412443748777, LR: 0.0003 +[2026-03-03 13:45:26] (step=0045859) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 8.97260810017609, LR: 0.0003 +[2026-03-03 13:45:34] (step=0045860) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.972803756603405, LR: 0.0003 +[2026-03-03 13:45:42] (step=0045861) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.972999413030719, LR: 0.0003 +[2026-03-03 13:45:50] (step=0045862) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 8.973195069458031, LR: 0.0003 +[2026-03-03 13:45:58] (step=0045863) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 8.973390725885345, LR: 0.0003 +[2026-03-03 13:46:06] (step=0045864) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.973586382312659, LR: 0.0003 +[2026-03-03 13:46:13] (step=0045865) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 8.973782038739973, LR: 0.0003 +[2026-03-03 13:46:21] (step=0045866) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 8.973977695167287, LR: 0.0003 +[2026-03-03 13:46:29] (step=0045867) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 8.9741733515946, LR: 0.0003 +[2026-03-03 13:46:37] (step=0045868) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 8.974369008021913, LR: 0.0003 +[2026-03-03 13:46:45] (step=0045869) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 8.974564664449227, LR: 0.0003 +[2026-03-03 13:46:53] (step=0045870) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 8.974760320876541, LR: 0.0003 +[2026-03-03 13:47:01] (step=0045871) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 8.974955977303855, LR: 0.0003 +[2026-03-03 13:47:09] (step=0045872) Train Loss: 0.4394, Train Steps/Sec: 0.12, Epoch: 8.975151633731167, LR: 0.0003 +[2026-03-03 13:47:17] (step=0045873) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 8.975347290158481, LR: 0.0003 +[2026-03-03 13:47:24] (step=0045874) Train Loss: 0.4242, Train Steps/Sec: 0.13, Epoch: 8.975542946585795, LR: 0.0003 +[2026-03-03 13:47:32] (step=0045875) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.97573860301311, LR: 0.0003 +[2026-03-03 13:47:40] (step=0045876) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 8.975934259440423, LR: 0.0003 +[2026-03-03 13:47:48] (step=0045877) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.976129915867736, LR: 0.0003 +[2026-03-03 13:47:56] (step=0045878) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 8.97632557229505, LR: 0.0003 +[2026-03-03 13:48:04] (step=0045879) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 8.976521228722364, LR: 0.0003 +[2026-03-03 13:48:12] (step=0045880) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 8.976716885149678, LR: 0.0003 +[2026-03-03 13:48:19] (step=0045881) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.97691254157699, LR: 0.0003 +[2026-03-03 13:48:27] (step=0045882) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.977108198004304, LR: 0.0003 +[2026-03-03 13:48:35] (step=0045883) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 8.977303854431618, LR: 0.0003 +[2026-03-03 13:48:43] (step=0045884) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 8.977499510858932, LR: 0.0003 +[2026-03-03 13:48:51] (step=0045885) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 8.977695167286246, LR: 0.0003 +[2026-03-03 13:48:59] (step=0045886) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.977890823713558, LR: 0.0003 +[2026-03-03 13:49:07] (step=0045887) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 8.978086480140872, LR: 0.0003 +[2026-03-03 13:49:15] (step=0045888) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.978282136568186, LR: 0.0003 +[2026-03-03 13:49:22] (step=0045889) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 8.9784777929955, LR: 0.0003 +[2026-03-03 13:49:30] (step=0045890) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 8.978673449422814, LR: 0.0003 +[2026-03-03 13:49:38] (step=0045891) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.978869105850126, LR: 0.0003 +[2026-03-03 13:49:46] (step=0045892) Train Loss: 0.4539, Train Steps/Sec: 0.12, Epoch: 8.97906476227744, LR: 0.0003 +[2026-03-03 13:49:54] (step=0045893) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 8.979260418704754, LR: 0.0003 +[2026-03-03 13:50:02] (step=0045894) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 8.979456075132068, LR: 0.0003 +[2026-03-03 13:50:10] (step=0045895) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 8.979651731559382, LR: 0.0003 +[2026-03-03 13:50:18] (step=0045896) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.979847387986695, LR: 0.0003 +[2026-03-03 13:50:26] (step=0045897) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.980043044414009, LR: 0.0003 +[2026-03-03 13:50:33] (step=0045898) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 8.980238700841323, LR: 0.0003 +[2026-03-03 13:50:41] (step=0045899) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 8.980434357268637, LR: 0.0003 +[2026-03-03 13:50:49] (step=0045900) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.98063001369595, LR: 0.0003 +[2026-03-03 13:50:57] (step=0045901) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.980825670123263, LR: 0.0003 +[2026-03-03 13:51:05] (step=0045902) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 8.981021326550577, LR: 0.0003 +[2026-03-03 13:51:13] (step=0045903) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.981216982977891, LR: 0.0003 +[2026-03-03 13:51:21] (step=0045904) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 8.981412639405205, LR: 0.0003 +[2026-03-03 13:51:28] (step=0045905) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 8.981608295832519, LR: 0.0003 +[2026-03-03 13:51:36] (step=0045906) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 8.981803952259831, LR: 0.0003 +[2026-03-03 13:51:44] (step=0045907) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.981999608687145, LR: 0.0003 +[2026-03-03 13:51:52] (step=0045908) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 8.98219526511446, LR: 0.0003 +[2026-03-03 13:52:00] (step=0045909) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.982390921541773, LR: 0.0003 +[2026-03-03 13:52:08] (step=0045910) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 8.982586577969085, LR: 0.0003 +[2026-03-03 13:52:16] (step=0045911) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 8.9827822343964, LR: 0.0003 +[2026-03-03 13:52:24] (step=0045912) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 8.982977890823713, LR: 0.0003 +[2026-03-03 13:52:31] (step=0045913) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 8.983173547251027, LR: 0.0003 +[2026-03-03 13:52:39] (step=0045914) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 8.983369203678341, LR: 0.0003 +[2026-03-03 13:52:47] (step=0045915) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.983564860105654, LR: 0.0003 +[2026-03-03 13:52:55] (step=0045916) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 8.983760516532968, LR: 0.0003 +[2026-03-03 13:53:03] (step=0045917) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 8.983956172960282, LR: 0.0003 +[2026-03-03 13:53:11] (step=0045918) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 8.984151829387596, LR: 0.0003 +[2026-03-03 13:53:19] (step=0045919) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 8.98434748581491, LR: 0.0003 +[2026-03-03 13:53:27] (step=0045920) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 8.984543142242222, LR: 0.0003 +[2026-03-03 13:53:34] (step=0045921) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 8.984738798669536, LR: 0.0003 +[2026-03-03 13:53:42] (step=0045922) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 8.98493445509685, LR: 0.0003 +[2026-03-03 13:53:50] (step=0045923) Train Loss: 0.4426, Train Steps/Sec: 0.12, Epoch: 8.985130111524164, LR: 0.0003 +[2026-03-03 13:53:58] (step=0045924) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 8.985325767951478, LR: 0.0003 +[2026-03-03 13:54:06] (step=0045925) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.98552142437879, LR: 0.0003 +[2026-03-03 13:54:14] (step=0045926) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 8.985717080806104, LR: 0.0003 +[2026-03-03 13:54:22] (step=0045927) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 8.985912737233418, LR: 0.0003 +[2026-03-03 13:54:30] (step=0045928) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 8.986108393660732, LR: 0.0003 +[2026-03-03 13:54:38] (step=0045929) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 8.986304050088046, LR: 0.0003 +[2026-03-03 13:54:45] (step=0045930) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 8.986499706515358, LR: 0.0003 +[2026-03-03 13:54:53] (step=0045931) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.986695362942672, LR: 0.0003 +[2026-03-03 13:55:01] (step=0045932) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 8.986891019369986, LR: 0.0003 +[2026-03-03 13:55:09] (step=0045933) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 8.9870866757973, LR: 0.0003 +[2026-03-03 13:55:17] (step=0045934) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 8.987282332224613, LR: 0.0003 +[2026-03-03 13:55:25] (step=0045935) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 8.987477988651927, LR: 0.0003 +[2026-03-03 13:55:33] (step=0045936) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 8.98767364507924, LR: 0.0003 +[2026-03-03 13:55:40] (step=0045937) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 8.987869301506555, LR: 0.0003 +[2026-03-03 13:55:48] (step=0045938) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 8.988064957933869, LR: 0.0003 +[2026-03-03 13:55:56] (step=0045939) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 8.988260614361181, LR: 0.0003 +[2026-03-03 13:56:04] (step=0045940) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 8.988456270788495, LR: 0.0003 +[2026-03-03 13:56:12] (step=0045941) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 8.988651927215809, LR: 0.0003 +[2026-03-03 13:56:20] (step=0045942) Train Loss: 0.4478, Train Steps/Sec: 0.12, Epoch: 8.988847583643123, LR: 0.0003 +[2026-03-03 13:56:28] (step=0045943) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 8.989043240070437, LR: 0.0003 +[2026-03-03 13:56:36] (step=0045944) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 8.98923889649775, LR: 0.0003 +[2026-03-03 13:56:44] (step=0045945) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 8.989434552925063, LR: 0.0003 +[2026-03-03 13:56:51] (step=0045946) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 8.989630209352377, LR: 0.0003 +[2026-03-03 13:56:59] (step=0045947) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 8.989825865779691, LR: 0.0003 +[2026-03-03 13:57:07] (step=0045948) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 8.990021522207005, LR: 0.0003 +[2026-03-03 13:57:15] (step=0045949) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 8.990217178634317, LR: 0.0003 +[2026-03-03 13:57:23] (step=0045950) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 8.990412835061631, LR: 0.0003 +[2026-03-03 13:57:31] (step=0045951) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 8.990608491488945, LR: 0.0003 +[2026-03-03 13:57:39] (step=0045952) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 8.99080414791626, LR: 0.0003 +[2026-03-03 13:57:47] (step=0045953) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 8.990999804343573, LR: 0.0003 +[2026-03-03 13:57:55] (step=0045954) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 8.991195460770886, LR: 0.0003 +[2026-03-03 13:58:02] (step=0045955) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 8.9913911171982, LR: 0.0003 +[2026-03-03 13:58:10] (step=0045956) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.991586773625514, LR: 0.0003 +[2026-03-03 13:58:18] (step=0045957) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.991782430052828, LR: 0.0003 +[2026-03-03 13:58:26] (step=0045958) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 8.991978086480142, LR: 0.0003 +[2026-03-03 13:58:34] (step=0045959) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 8.992173742907454, LR: 0.0003 +[2026-03-03 13:58:42] (step=0045960) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 8.992369399334768, LR: 0.0003 +[2026-03-03 13:58:50] (step=0045961) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 8.992565055762082, LR: 0.0003 +[2026-03-03 13:58:58] (step=0045962) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 8.992760712189396, LR: 0.0003 +[2026-03-03 13:59:05] (step=0045963) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.992956368616708, LR: 0.0003 +[2026-03-03 13:59:13] (step=0045964) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 8.993152025044022, LR: 0.0003 +[2026-03-03 13:59:21] (step=0045965) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 8.993347681471336, LR: 0.0003 +[2026-03-03 13:59:29] (step=0045966) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 8.99354333789865, LR: 0.0003 +[2026-03-03 13:59:37] (step=0045967) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 8.993738994325964, LR: 0.0003 +[2026-03-03 13:59:45] (step=0045968) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 8.993934650753276, LR: 0.0003 +[2026-03-03 13:59:53] (step=0045969) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 8.99413030718059, LR: 0.0003 +[2026-03-03 14:00:00] (step=0045970) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 8.994325963607904, LR: 0.0003 +[2026-03-03 14:00:08] (step=0045971) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 8.994521620035218, LR: 0.0003 +[2026-03-03 14:00:16] (step=0045972) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 8.994717276462532, LR: 0.0003 +[2026-03-03 14:00:24] (step=0045973) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 8.994912932889845, LR: 0.0003 +[2026-03-03 14:00:32] (step=0045974) Train Loss: 0.4395, Train Steps/Sec: 0.12, Epoch: 8.995108589317159, LR: 0.0003 +[2026-03-03 14:00:40] (step=0045975) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 8.995304245744473, LR: 0.0003 +[2026-03-03 14:00:48] (step=0045976) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 8.995499902171787, LR: 0.0003 +[2026-03-03 14:00:56] (step=0045977) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 8.9956955585991, LR: 0.0003 +[2026-03-03 14:01:04] (step=0045978) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 8.995891215026413, LR: 0.0003 +[2026-03-03 14:01:11] (step=0045979) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 8.996086871453727, LR: 0.0003 +[2026-03-03 14:01:19] (step=0045980) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 8.996282527881041, LR: 0.0003 +[2026-03-03 14:01:27] (step=0045981) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 8.996478184308355, LR: 0.0003 +[2026-03-03 14:01:35] (step=0045982) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 8.996673840735669, LR: 0.0003 +[2026-03-03 14:01:43] (step=0045983) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 8.996869497162981, LR: 0.0003 +[2026-03-03 14:01:51] (step=0045984) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 8.997065153590295, LR: 0.0003 +[2026-03-03 14:01:59] (step=0045985) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 8.99726081001761, LR: 0.0003 +[2026-03-03 14:02:06] (step=0045986) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 8.997456466444923, LR: 0.0003 +[2026-03-03 14:02:14] (step=0045987) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 8.997652122872235, LR: 0.0003 +[2026-03-03 14:02:22] (step=0045988) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 8.99784777929955, LR: 0.0003 +[2026-03-03 14:02:30] (step=0045989) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 8.998043435726863, LR: 0.0003 +[2026-03-03 14:02:38] (step=0045990) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 8.998239092154177, LR: 0.0003 +[2026-03-03 14:02:46] (step=0045991) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 8.998434748581492, LR: 0.0003 +[2026-03-03 14:02:54] (step=0045992) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 8.998630405008804, LR: 0.0003 +[2026-03-03 14:03:02] (step=0045993) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 8.998826061436118, LR: 0.0003 +[2026-03-03 14:03:09] (step=0045994) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 8.999021717863432, LR: 0.0003 +[2026-03-03 14:03:17] (step=0045995) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 8.999217374290746, LR: 0.0003 +[2026-03-03 14:03:25] (step=0045996) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 8.99941303071806, LR: 0.0003 +[2026-03-03 14:03:33] (step=0045997) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 8.999608687145372, LR: 0.0003 +[2026-03-03 14:03:41] (step=0045998) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 8.999804343572686, LR: 0.0003 +[2026-03-03 14:03:49] (step=0045999) Train Loss: 0.4471, Train Steps/Sec: 0.12, Epoch: 9.0, LR: 0.0003 +[2026-03-03 14:03:49] Beginning epoch 9... +[2026-03-03 14:03:59] (step=0046000) Train Loss: 0.4407, Train Steps/Sec: 0.10, Epoch: 9.000195656427314, LR: 0.0003 +[2026-03-03 14:03:59] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0046000/ +[2026-03-03 14:04:07] (step=0046001) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.000391312854628, LR: 0.0003 +[2026-03-03 14:04:15] (step=0046002) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.00058696928194, LR: 0.0003 +[2026-03-03 14:04:22] (step=0046003) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.000782625709254, LR: 0.0003 +[2026-03-03 14:04:30] (step=0046004) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 9.000978282136568, LR: 0.0003 +[2026-03-03 14:04:38] (step=0046005) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.001173938563882, LR: 0.0003 +[2026-03-03 14:04:46] (step=0046006) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.001369594991196, LR: 0.0003 +[2026-03-03 14:04:54] (step=0046007) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.001565251418508, LR: 0.0003 +[2026-03-03 14:05:02] (step=0046008) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.001760907845823, LR: 0.0003 +[2026-03-03 14:05:10] (step=0046009) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.001956564273137, LR: 0.0003 +[2026-03-03 14:05:17] (step=0046010) Train Loss: 0.4256, Train Steps/Sec: 0.13, Epoch: 9.00215222070045, LR: 0.0003 +[2026-03-03 14:05:25] (step=0046011) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.002347877127765, LR: 0.0003 +[2026-03-03 14:05:33] (step=0046012) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 9.002543533555077, LR: 0.0003 +[2026-03-03 14:05:41] (step=0046013) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 9.00273918998239, LR: 0.0003 +[2026-03-03 14:05:49] (step=0046014) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.002934846409705, LR: 0.0003 +[2026-03-03 14:05:57] (step=0046015) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.003130502837019, LR: 0.0003 +[2026-03-03 14:06:05] (step=0046016) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.003326159264331, LR: 0.0003 +[2026-03-03 14:06:13] (step=0046017) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.003521815691645, LR: 0.0003 +[2026-03-03 14:06:20] (step=0046018) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.003717472118959, LR: 0.0003 +[2026-03-03 14:06:28] (step=0046019) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.003913128546273, LR: 0.0003 +[2026-03-03 14:06:36] (step=0046020) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 9.004108784973587, LR: 0.0003 +[2026-03-03 14:06:44] (step=0046021) Train Loss: 0.4486, Train Steps/Sec: 0.12, Epoch: 9.0043044414009, LR: 0.0003 +[2026-03-03 14:06:52] (step=0046022) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.004500097828213, LR: 0.0003 +[2026-03-03 14:07:00] (step=0046023) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.004695754255527, LR: 0.0003 +[2026-03-03 14:07:08] (step=0046024) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 9.004891410682841, LR: 0.0003 +[2026-03-03 14:07:16] (step=0046025) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.005087067110155, LR: 0.0003 +[2026-03-03 14:07:24] (step=0046026) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 9.005282723537468, LR: 0.0003 +[2026-03-03 14:07:32] (step=0046027) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 9.005478379964782, LR: 0.0003 +[2026-03-03 14:07:39] (step=0046028) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.005674036392096, LR: 0.0003 +[2026-03-03 14:07:47] (step=0046029) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.00586969281941, LR: 0.0003 +[2026-03-03 14:07:55] (step=0046030) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.006065349246724, LR: 0.0003 +[2026-03-03 14:08:03] (step=0046031) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.006261005674036, LR: 0.0003 +[2026-03-03 14:08:11] (step=0046032) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.00645666210135, LR: 0.0003 +[2026-03-03 14:08:19] (step=0046033) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.006652318528664, LR: 0.0003 +[2026-03-03 14:08:27] (step=0046034) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.006847974955978, LR: 0.0003 +[2026-03-03 14:08:34] (step=0046035) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.007043631383292, LR: 0.0003 +[2026-03-03 14:08:42] (step=0046036) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.007239287810604, LR: 0.0003 +[2026-03-03 14:08:50] (step=0046037) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.007434944237918, LR: 0.0003 +[2026-03-03 14:08:58] (step=0046038) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 9.007630600665232, LR: 0.0003 +[2026-03-03 14:09:06] (step=0046039) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.007826257092546, LR: 0.0003 +[2026-03-03 14:09:14] (step=0046040) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.008021913519858, LR: 0.0003 +[2026-03-03 14:09:22] (step=0046041) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 9.008217569947172, LR: 0.0003 +[2026-03-03 14:09:30] (step=0046042) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.008413226374486, LR: 0.0003 +[2026-03-03 14:09:37] (step=0046043) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.0086088828018, LR: 0.0003 +[2026-03-03 14:09:45] (step=0046044) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 9.008804539229114, LR: 0.0003 +[2026-03-03 14:09:53] (step=0046045) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.009000195656427, LR: 0.0003 +[2026-03-03 14:10:01] (step=0046046) Train Loss: 0.4411, Train Steps/Sec: 0.12, Epoch: 9.00919585208374, LR: 0.0003 +[2026-03-03 14:10:09] (step=0046047) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.009391508511055, LR: 0.0003 +[2026-03-03 14:10:17] (step=0046048) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.009587164938369, LR: 0.0003 +[2026-03-03 14:10:25] (step=0046049) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.009782821365683, LR: 0.0003 +[2026-03-03 14:10:33] (step=0046050) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.009978477792995, LR: 0.0003 +[2026-03-03 14:10:41] (step=0046051) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 9.010174134220309, LR: 0.0003 +[2026-03-03 14:10:48] (step=0046052) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 9.010369790647623, LR: 0.0003 +[2026-03-03 14:10:56] (step=0046053) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.010565447074937, LR: 0.0003 +[2026-03-03 14:11:04] (step=0046054) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.01076110350225, LR: 0.0003 +[2026-03-03 14:11:12] (step=0046055) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.010956759929563, LR: 0.0003 +[2026-03-03 14:11:20] (step=0046056) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.011152416356877, LR: 0.0003 +[2026-03-03 14:11:28] (step=0046057) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.011348072784191, LR: 0.0003 +[2026-03-03 14:11:36] (step=0046058) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.011543729211505, LR: 0.0003 +[2026-03-03 14:11:44] (step=0046059) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.011739385638819, LR: 0.0003 +[2026-03-03 14:11:51] (step=0046060) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 9.011935042066131, LR: 0.0003 +[2026-03-03 14:11:59] (step=0046061) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.012130698493445, LR: 0.0003 +[2026-03-03 14:12:07] (step=0046062) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.01232635492076, LR: 0.0003 +[2026-03-03 14:12:15] (step=0046063) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.012522011348073, LR: 0.0003 +[2026-03-03 14:12:23] (step=0046064) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.012717667775387, LR: 0.0003 +[2026-03-03 14:12:31] (step=0046065) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.0129133242027, LR: 0.0003 +[2026-03-03 14:12:39] (step=0046066) Train Loss: 0.4471, Train Steps/Sec: 0.12, Epoch: 9.013108980630014, LR: 0.0003 +[2026-03-03 14:12:47] (step=0046067) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.013304637057328, LR: 0.0003 +[2026-03-03 14:12:55] (step=0046068) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 9.013500293484642, LR: 0.0003 +[2026-03-03 14:13:02] (step=0046069) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.013695949911954, LR: 0.0003 +[2026-03-03 14:13:10] (step=0046070) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.013891606339268, LR: 0.0003 +[2026-03-03 14:13:18] (step=0046071) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 9.014087262766582, LR: 0.0003 +[2026-03-03 14:13:26] (step=0046072) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 9.014282919193896, LR: 0.0003 +[2026-03-03 14:13:34] (step=0046073) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.01447857562121, LR: 0.0003 +[2026-03-03 14:13:42] (step=0046074) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.014674232048522, LR: 0.0003 +[2026-03-03 14:13:50] (step=0046075) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.014869888475836, LR: 0.0003 +[2026-03-03 14:13:57] (step=0046076) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.01506554490315, LR: 0.0003 +[2026-03-03 14:14:05] (step=0046077) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.015261201330464, LR: 0.0003 +[2026-03-03 14:14:13] (step=0046078) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.015456857757778, LR: 0.0003 +[2026-03-03 14:14:21] (step=0046079) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.01565251418509, LR: 0.0003 +[2026-03-03 14:14:29] (step=0046080) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.015848170612404, LR: 0.0003 +[2026-03-03 14:14:37] (step=0046081) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.016043827039718, LR: 0.0003 +[2026-03-03 14:14:45] (step=0046082) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.016239483467032, LR: 0.0003 +[2026-03-03 14:14:53] (step=0046083) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 9.016435139894346, LR: 0.0003 +[2026-03-03 14:15:00] (step=0046084) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.016630796321659, LR: 0.0003 +[2026-03-03 14:15:08] (step=0046085) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.016826452748973, LR: 0.0003 +[2026-03-03 14:15:16] (step=0046086) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.017022109176287, LR: 0.0003 +[2026-03-03 14:15:24] (step=0046087) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.0172177656036, LR: 0.0003 +[2026-03-03 14:15:32] (step=0046088) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.017413422030915, LR: 0.0003 +[2026-03-03 14:15:40] (step=0046089) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.017609078458227, LR: 0.0003 +[2026-03-03 14:15:48] (step=0046090) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 9.01780473488554, LR: 0.0003 +[2026-03-03 14:15:56] (step=0046091) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.018000391312855, LR: 0.0003 +[2026-03-03 14:16:03] (step=0046092) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.018196047740169, LR: 0.0003 +[2026-03-03 14:16:11] (step=0046093) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.018391704167481, LR: 0.0003 +[2026-03-03 14:16:19] (step=0046094) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.018587360594795, LR: 0.0003 +[2026-03-03 14:16:27] (step=0046095) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.018783017022109, LR: 0.0003 +[2026-03-03 14:16:35] (step=0046096) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.018978673449423, LR: 0.0003 +[2026-03-03 14:16:43] (step=0046097) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.019174329876737, LR: 0.0003 +[2026-03-03 14:16:51] (step=0046098) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 9.01936998630405, LR: 0.0003 +[2026-03-03 14:16:59] (step=0046099) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.019565642731363, LR: 0.0003 +[2026-03-03 14:17:06] (step=0046100) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 9.019761299158677, LR: 0.0003 +[2026-03-03 14:17:14] (step=0046101) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.019956955585991, LR: 0.0003 +[2026-03-03 14:17:22] (step=0046102) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 9.020152612013305, LR: 0.0003 +[2026-03-03 14:17:30] (step=0046103) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.020348268440618, LR: 0.0003 +[2026-03-03 14:17:38] (step=0046104) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.020543924867932, LR: 0.0003 +[2026-03-03 14:17:46] (step=0046105) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.020739581295246, LR: 0.0003 +[2026-03-03 14:17:54] (step=0046106) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.02093523772256, LR: 0.0003 +[2026-03-03 14:18:02] (step=0046107) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.021130894149874, LR: 0.0003 +[2026-03-03 14:18:09] (step=0046108) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.021326550577186, LR: 0.0003 +[2026-03-03 14:18:17] (step=0046109) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 9.0215222070045, LR: 0.0003 +[2026-03-03 14:18:25] (step=0046110) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 9.021717863431814, LR: 0.0003 +[2026-03-03 14:18:34] (step=0046111) Train Loss: 0.4501, Train Steps/Sec: 0.12, Epoch: 9.021913519859128, LR: 0.0003 +[2026-03-03 14:18:45] (step=0046112) Train Loss: 0.4625, Train Steps/Sec: 0.08, Epoch: 9.022109176286442, LR: 0.0003 +[2026-03-03 14:18:53] (step=0046113) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.022304832713754, LR: 0.0003 +[2026-03-03 14:19:01] (step=0046114) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.022500489141068, LR: 0.0003 +[2026-03-03 14:19:09] (step=0046115) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.022696145568382, LR: 0.0003 +[2026-03-03 14:19:17] (step=0046116) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.022891801995696, LR: 0.0003 +[2026-03-03 14:19:25] (step=0046117) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.02308745842301, LR: 0.0003 +[2026-03-03 14:19:33] (step=0046118) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.023283114850322, LR: 0.0003 +[2026-03-03 14:19:41] (step=0046119) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.023478771277636, LR: 0.0003 +[2026-03-03 14:19:49] (step=0046120) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.02367442770495, LR: 0.0003 +[2026-03-03 14:19:56] (step=0046121) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.023870084132264, LR: 0.0003 +[2026-03-03 14:20:04] (step=0046122) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.024065740559577, LR: 0.0003 +[2026-03-03 14:20:12] (step=0046123) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 9.02426139698689, LR: 0.0003 +[2026-03-03 14:20:20] (step=0046124) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.024457053414205, LR: 0.0003 +[2026-03-03 14:20:28] (step=0046125) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.024652709841519, LR: 0.0003 +[2026-03-03 14:20:36] (step=0046126) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.024848366268833, LR: 0.0003 +[2026-03-03 14:20:44] (step=0046127) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 9.025044022696145, LR: 0.0003 +[2026-03-03 14:20:51] (step=0046128) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 9.025239679123459, LR: 0.0003 +[2026-03-03 14:20:59] (step=0046129) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 9.025435335550773, LR: 0.0003 +[2026-03-03 14:21:07] (step=0046130) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.025630991978087, LR: 0.0003 +[2026-03-03 14:21:15] (step=0046131) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 9.0258266484054, LR: 0.0003 +[2026-03-03 14:21:23] (step=0046132) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.026022304832713, LR: 0.0003 +[2026-03-03 14:21:31] (step=0046133) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.026217961260027, LR: 0.0003 +[2026-03-03 14:21:39] (step=0046134) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 9.026413617687341, LR: 0.0003 +[2026-03-03 14:21:46] (step=0046135) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.026609274114655, LR: 0.0003 +[2026-03-03 14:21:54] (step=0046136) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.026804930541969, LR: 0.0003 +[2026-03-03 14:22:02] (step=0046137) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 9.027000586969281, LR: 0.0003 +[2026-03-03 14:22:10] (step=0046138) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.027196243396595, LR: 0.0003 +[2026-03-03 14:22:18] (step=0046139) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.02739189982391, LR: 0.0003 +[2026-03-03 14:22:26] (step=0046140) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 9.027587556251223, LR: 0.0003 +[2026-03-03 14:22:34] (step=0046141) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 9.027783212678537, LR: 0.0003 +[2026-03-03 14:22:41] (step=0046142) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.02797886910585, LR: 0.0003 +[2026-03-03 14:22:49] (step=0046143) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.028174525533164, LR: 0.0003 +[2026-03-03 14:22:57] (step=0046144) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 9.028370181960478, LR: 0.0003 +[2026-03-03 14:23:05] (step=0046145) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.028565838387792, LR: 0.0003 +[2026-03-03 14:23:13] (step=0046146) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.028761494815104, LR: 0.0003 +[2026-03-03 14:23:21] (step=0046147) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.028957151242418, LR: 0.0003 +[2026-03-03 14:23:29] (step=0046148) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.029152807669732, LR: 0.0003 +[2026-03-03 14:23:36] (step=0046149) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.029348464097046, LR: 0.0003 +[2026-03-03 14:23:44] (step=0046150) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.02954412052436, LR: 0.0003 +[2026-03-03 14:23:52] (step=0046151) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.029739776951672, LR: 0.0003 +[2026-03-03 14:24:00] (step=0046152) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.029935433378986, LR: 0.0003 +[2026-03-03 14:24:08] (step=0046153) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.0301310898063, LR: 0.0003 +[2026-03-03 14:24:16] (step=0046154) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.030326746233614, LR: 0.0003 +[2026-03-03 14:24:24] (step=0046155) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.030522402660928, LR: 0.0003 +[2026-03-03 14:24:31] (step=0046156) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.03071805908824, LR: 0.0003 +[2026-03-03 14:24:39] (step=0046157) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.030913715515554, LR: 0.0003 +[2026-03-03 14:24:47] (step=0046158) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.031109371942868, LR: 0.0003 +[2026-03-03 14:24:55] (step=0046159) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.031305028370182, LR: 0.0003 +[2026-03-03 14:25:03] (step=0046160) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.031500684797496, LR: 0.0003 +[2026-03-03 14:25:11] (step=0046161) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.031696341224809, LR: 0.0003 +[2026-03-03 14:25:19] (step=0046162) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 9.031891997652123, LR: 0.0003 +[2026-03-03 14:25:26] (step=0046163) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.032087654079437, LR: 0.0003 +[2026-03-03 14:25:34] (step=0046164) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.03228331050675, LR: 0.0003 +[2026-03-03 14:25:42] (step=0046165) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 9.032478966934065, LR: 0.0003 +[2026-03-03 14:25:50] (step=0046166) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 9.032674623361377, LR: 0.0003 +[2026-03-03 14:25:58] (step=0046167) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.03287027978869, LR: 0.0003 +[2026-03-03 14:26:06] (step=0046168) Train Loss: 0.4504, Train Steps/Sec: 0.12, Epoch: 9.033065936216005, LR: 0.0003 +[2026-03-03 14:26:14] (step=0046169) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 9.033261592643319, LR: 0.0003 +[2026-03-03 14:26:22] (step=0046170) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.033457249070633, LR: 0.0003 +[2026-03-03 14:26:29] (step=0046171) Train Loss: 0.4697, Train Steps/Sec: 0.13, Epoch: 9.033652905497945, LR: 0.0003 +[2026-03-03 14:26:37] (step=0046172) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.033848561925259, LR: 0.0003 +[2026-03-03 14:26:45] (step=0046173) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.034044218352573, LR: 0.0003 +[2026-03-03 14:26:53] (step=0046174) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.034239874779887, LR: 0.0003 +[2026-03-03 14:27:01] (step=0046175) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.0344355312072, LR: 0.0003 +[2026-03-03 14:27:09] (step=0046176) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.034631187634513, LR: 0.0003 +[2026-03-03 14:27:17] (step=0046177) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.034826844061827, LR: 0.0003 +[2026-03-03 14:27:24] (step=0046178) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 9.035022500489141, LR: 0.0003 +[2026-03-03 14:27:32] (step=0046179) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.035218156916455, LR: 0.0003 +[2026-03-03 14:27:40] (step=0046180) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.035413813343768, LR: 0.0003 +[2026-03-03 14:27:48] (step=0046181) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.035609469771082, LR: 0.0003 +[2026-03-03 14:27:56] (step=0046182) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 9.035805126198396, LR: 0.0003 +[2026-03-03 14:28:04] (step=0046183) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.03600078262571, LR: 0.0003 +[2026-03-03 14:28:12] (step=0046184) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 9.036196439053024, LR: 0.0003 +[2026-03-03 14:28:20] (step=0046185) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.036392095480336, LR: 0.0003 +[2026-03-03 14:28:27] (step=0046186) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.03658775190765, LR: 0.0003 +[2026-03-03 14:28:35] (step=0046187) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.036783408334964, LR: 0.0003 +[2026-03-03 14:28:43] (step=0046188) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.036979064762278, LR: 0.0003 +[2026-03-03 14:28:51] (step=0046189) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 9.037174721189592, LR: 0.0003 +[2026-03-03 14:28:59] (step=0046190) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.037370377616904, LR: 0.0003 +[2026-03-03 14:29:07] (step=0046191) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 9.037566034044218, LR: 0.0003 +[2026-03-03 14:29:15] (step=0046192) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.037761690471532, LR: 0.0003 +[2026-03-03 14:29:22] (step=0046193) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 9.037957346898846, LR: 0.0003 +[2026-03-03 14:29:30] (step=0046194) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 9.03815300332616, LR: 0.0003 +[2026-03-03 14:29:38] (step=0046195) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.038348659753472, LR: 0.0003 +[2026-03-03 14:29:46] (step=0046196) Train Loss: 0.4600, Train Steps/Sec: 0.12, Epoch: 9.038544316180786, LR: 0.0003 +[2026-03-03 14:29:54] (step=0046197) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.0387399726081, LR: 0.0003 +[2026-03-03 14:30:02] (step=0046198) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.038935629035414, LR: 0.0003 +[2026-03-03 14:30:10] (step=0046199) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.039131285462727, LR: 0.0003 +[2026-03-03 14:30:18] (step=0046200) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.03932694189004, LR: 0.0003 +[2026-03-03 14:30:26] (step=0046201) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.039522598317355, LR: 0.0003 +[2026-03-03 14:30:33] (step=0046202) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.039718254744669, LR: 0.0003 +[2026-03-03 14:30:41] (step=0046203) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 9.039913911171983, LR: 0.0003 +[2026-03-03 14:30:49] (step=0046204) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.040109567599295, LR: 0.0003 +[2026-03-03 14:30:57] (step=0046205) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.040305224026609, LR: 0.0003 +[2026-03-03 14:31:05] (step=0046206) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.040500880453923, LR: 0.0003 +[2026-03-03 14:31:13] (step=0046207) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.040696536881237, LR: 0.0003 +[2026-03-03 14:31:21] (step=0046208) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.04089219330855, LR: 0.0003 +[2026-03-03 14:31:29] (step=0046209) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.041087849735863, LR: 0.0003 +[2026-03-03 14:31:36] (step=0046210) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 9.041283506163177, LR: 0.0003 +[2026-03-03 14:31:44] (step=0046211) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.041479162590491, LR: 0.0003 +[2026-03-03 14:31:52] (step=0046212) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 9.041674819017805, LR: 0.0003 +[2026-03-03 14:32:00] (step=0046213) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 9.04187047544512, LR: 0.0003 +[2026-03-03 14:32:08] (step=0046214) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.042066131872431, LR: 0.0003 +[2026-03-03 14:32:16] (step=0046215) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.042261788299745, LR: 0.0003 +[2026-03-03 14:32:24] (step=0046216) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.04245744472706, LR: 0.0003 +[2026-03-03 14:32:32] (step=0046217) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 9.042653101154373, LR: 0.0003 +[2026-03-03 14:32:39] (step=0046218) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.042848757581687, LR: 0.0003 +[2026-03-03 14:32:47] (step=0046219) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.043044414009, LR: 0.0003 +[2026-03-03 14:32:55] (step=0046220) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.043240070436314, LR: 0.0003 +[2026-03-03 14:33:03] (step=0046221) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.043435726863628, LR: 0.0003 +[2026-03-03 14:33:11] (step=0046222) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.043631383290942, LR: 0.0003 +[2026-03-03 14:33:19] (step=0046223) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.043827039718254, LR: 0.0003 +[2026-03-03 14:33:27] (step=0046224) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.044022696145568, LR: 0.0003 +[2026-03-03 14:33:35] (step=0046225) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.044218352572882, LR: 0.0003 +[2026-03-03 14:33:42] (step=0046226) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.044414009000196, LR: 0.0003 +[2026-03-03 14:33:50] (step=0046227) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.04460966542751, LR: 0.0003 +[2026-03-03 14:33:58] (step=0046228) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.044805321854822, LR: 0.0003 +[2026-03-03 14:34:06] (step=0046229) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 9.045000978282136, LR: 0.0003 +[2026-03-03 14:34:14] (step=0046230) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.04519663470945, LR: 0.0003 +[2026-03-03 14:34:22] (step=0046231) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.045392291136764, LR: 0.0003 +[2026-03-03 14:34:30] (step=0046232) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 9.045587947564078, LR: 0.0003 +[2026-03-03 14:34:37] (step=0046233) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.04578360399139, LR: 0.0003 +[2026-03-03 14:34:45] (step=0046234) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 9.045979260418704, LR: 0.0003 +[2026-03-03 14:34:53] (step=0046235) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.046174916846018, LR: 0.0003 +[2026-03-03 14:35:01] (step=0046236) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.046370573273332, LR: 0.0003 +[2026-03-03 14:35:09] (step=0046237) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.046566229700646, LR: 0.0003 +[2026-03-03 14:35:17] (step=0046238) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.046761886127959, LR: 0.0003 +[2026-03-03 14:35:25] (step=0046239) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 9.046957542555273, LR: 0.0003 +[2026-03-03 14:35:32] (step=0046240) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.047153198982587, LR: 0.0003 +[2026-03-03 14:35:40] (step=0046241) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 9.0473488554099, LR: 0.0003 +[2026-03-03 14:35:48] (step=0046242) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.047544511837215, LR: 0.0003 +[2026-03-03 14:35:56] (step=0046243) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.047740168264527, LR: 0.0003 +[2026-03-03 14:36:04] (step=0046244) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.04793582469184, LR: 0.0003 +[2026-03-03 14:36:12] (step=0046245) Train Loss: 0.4378, Train Steps/Sec: 0.12, Epoch: 9.048131481119155, LR: 0.0003 +[2026-03-03 14:36:20] (step=0046246) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 9.048327137546469, LR: 0.0003 +[2026-03-03 14:36:28] (step=0046247) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.048522793973783, LR: 0.0003 +[2026-03-03 14:36:36] (step=0046248) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 9.048718450401095, LR: 0.0003 +[2026-03-03 14:36:43] (step=0046249) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.04891410682841, LR: 0.0003 +[2026-03-03 14:36:51] (step=0046250) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.049109763255723, LR: 0.0003 +[2026-03-03 14:36:59] (step=0046251) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 9.049305419683037, LR: 0.0003 +[2026-03-03 14:37:07] (step=0046252) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 9.04950107611035, LR: 0.0003 +[2026-03-03 14:37:15] (step=0046253) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 9.049696732537663, LR: 0.0003 +[2026-03-03 14:37:23] (step=0046254) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 9.049892388964977, LR: 0.0003 +[2026-03-03 14:37:31] (step=0046255) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 9.050088045392291, LR: 0.0003 +[2026-03-03 14:37:39] (step=0046256) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.050283701819605, LR: 0.0003 +[2026-03-03 14:37:46] (step=0046257) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.050479358246918, LR: 0.0003 +[2026-03-03 14:37:54] (step=0046258) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.050675014674232, LR: 0.0003 +[2026-03-03 14:38:02] (step=0046259) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.050870671101546, LR: 0.0003 +[2026-03-03 14:38:10] (step=0046260) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.05106632752886, LR: 0.0003 +[2026-03-03 14:38:18] (step=0046261) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.051261983956174, LR: 0.0003 +[2026-03-03 14:38:26] (step=0046262) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.051457640383486, LR: 0.0003 +[2026-03-03 14:38:34] (step=0046263) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.0516532968108, LR: 0.0003 +[2026-03-03 14:38:42] (step=0046264) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 9.051848953238114, LR: 0.0003 +[2026-03-03 14:38:49] (step=0046265) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.052044609665428, LR: 0.0003 +[2026-03-03 14:38:57] (step=0046266) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.052240266092742, LR: 0.0003 +[2026-03-03 14:39:05] (step=0046267) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.052435922520054, LR: 0.0003 +[2026-03-03 14:39:13] (step=0046268) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.052631578947368, LR: 0.0003 +[2026-03-03 14:39:21] (step=0046269) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.052827235374682, LR: 0.0003 +[2026-03-03 14:39:29] (step=0046270) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.053022891801996, LR: 0.0003 +[2026-03-03 14:39:37] (step=0046271) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.05321854822931, LR: 0.0003 +[2026-03-03 14:39:44] (step=0046272) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 9.053414204656622, LR: 0.0003 +[2026-03-03 14:39:52] (step=0046273) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.053609861083936, LR: 0.0003 +[2026-03-03 14:40:00] (step=0046274) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.05380551751125, LR: 0.0003 +[2026-03-03 14:40:08] (step=0046275) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.054001173938564, LR: 0.0003 +[2026-03-03 14:40:16] (step=0046276) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.054196830365877, LR: 0.0003 +[2026-03-03 14:40:24] (step=0046277) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.05439248679319, LR: 0.0003 +[2026-03-03 14:40:32] (step=0046278) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 9.054588143220505, LR: 0.0003 +[2026-03-03 14:40:39] (step=0046279) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 9.054783799647819, LR: 0.0003 +[2026-03-03 14:40:47] (step=0046280) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.054979456075133, LR: 0.0003 +[2026-03-03 14:40:55] (step=0046281) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 9.055175112502445, LR: 0.0003 +[2026-03-03 14:41:03] (step=0046282) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.055370768929759, LR: 0.0003 +[2026-03-03 14:41:11] (step=0046283) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.055566425357073, LR: 0.0003 +[2026-03-03 14:41:19] (step=0046284) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.055762081784387, LR: 0.0003 +[2026-03-03 14:41:27] (step=0046285) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.055957738211701, LR: 0.0003 +[2026-03-03 14:41:34] (step=0046286) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 9.056153394639013, LR: 0.0003 +[2026-03-03 14:41:42] (step=0046287) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.056349051066327, LR: 0.0003 +[2026-03-03 14:41:50] (step=0046288) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.056544707493641, LR: 0.0003 +[2026-03-03 14:41:58] (step=0046289) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.056740363920955, LR: 0.0003 +[2026-03-03 14:42:06] (step=0046290) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.05693602034827, LR: 0.0003 +[2026-03-03 14:42:14] (step=0046291) Train Loss: 0.4497, Train Steps/Sec: 0.12, Epoch: 9.057131676775581, LR: 0.0003 +[2026-03-03 14:42:22] (step=0046292) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.057327333202895, LR: 0.0003 +[2026-03-03 14:42:30] (step=0046293) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 9.05752298963021, LR: 0.0003 +[2026-03-03 14:42:38] (step=0046294) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 9.057718646057523, LR: 0.0003 +[2026-03-03 14:42:45] (step=0046295) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.057914302484837, LR: 0.0003 +[2026-03-03 14:42:53] (step=0046296) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.05810995891215, LR: 0.0003 +[2026-03-03 14:43:01] (step=0046297) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.058305615339464, LR: 0.0003 +[2026-03-03 14:43:09] (step=0046298) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 9.058501271766778, LR: 0.0003 +[2026-03-03 14:43:17] (step=0046299) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.058696928194092, LR: 0.0003 +[2026-03-03 14:43:25] (step=0046300) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 9.058892584621406, LR: 0.0003 +[2026-03-03 14:43:33] (step=0046301) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.059088241048718, LR: 0.0003 +[2026-03-03 14:43:40] (step=0046302) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 9.059283897476032, LR: 0.0003 +[2026-03-03 14:43:48] (step=0046303) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.059479553903346, LR: 0.0003 +[2026-03-03 14:43:56] (step=0046304) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 9.05967521033066, LR: 0.0003 +[2026-03-03 14:44:04] (step=0046305) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 9.059870866757972, LR: 0.0003 +[2026-03-03 14:44:12] (step=0046306) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 9.060066523185286, LR: 0.0003 +[2026-03-03 14:44:20] (step=0046307) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.0602621796126, LR: 0.0003 +[2026-03-03 14:44:28] (step=0046308) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.060457836039914, LR: 0.0003 +[2026-03-03 14:44:36] (step=0046309) Train Loss: 0.4433, Train Steps/Sec: 0.12, Epoch: 9.060653492467228, LR: 0.0003 +[2026-03-03 14:44:44] (step=0046310) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 9.06084914889454, LR: 0.0003 +[2026-03-03 14:44:51] (step=0046311) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.061044805321854, LR: 0.0003 +[2026-03-03 14:44:59] (step=0046312) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.061240461749168, LR: 0.0003 +[2026-03-03 14:45:07] (step=0046313) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.061436118176482, LR: 0.0003 +[2026-03-03 14:45:15] (step=0046314) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 9.061631774603796, LR: 0.0003 +[2026-03-03 14:45:23] (step=0046315) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.061827431031109, LR: 0.0003 +[2026-03-03 14:45:31] (step=0046316) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 9.062023087458423, LR: 0.0003 +[2026-03-03 14:45:39] (step=0046317) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.062218743885737, LR: 0.0003 +[2026-03-03 14:45:46] (step=0046318) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 9.06241440031305, LR: 0.0003 +[2026-03-03 14:45:54] (step=0046319) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.062610056740365, LR: 0.0003 +[2026-03-03 14:46:02] (step=0046320) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.062805713167677, LR: 0.0003 +[2026-03-03 14:46:10] (step=0046321) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.063001369594991, LR: 0.0003 +[2026-03-03 14:46:18] (step=0046322) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 9.063197026022305, LR: 0.0003 +[2026-03-03 14:46:26] (step=0046323) Train Loss: 0.4250, Train Steps/Sec: 0.13, Epoch: 9.063392682449619, LR: 0.0003 +[2026-03-03 14:46:34] (step=0046324) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 9.063588338876933, LR: 0.0003 +[2026-03-03 14:46:41] (step=0046325) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.063783995304245, LR: 0.0003 +[2026-03-03 14:46:49] (step=0046326) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 9.06397965173156, LR: 0.0003 +[2026-03-03 14:46:57] (step=0046327) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.064175308158873, LR: 0.0003 +[2026-03-03 14:47:05] (step=0046328) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 9.064370964586187, LR: 0.0003 +[2026-03-03 14:47:13] (step=0046329) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.0645666210135, LR: 0.0003 +[2026-03-03 14:47:21] (step=0046330) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.064762277440813, LR: 0.0003 +[2026-03-03 14:47:29] (step=0046331) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.064957933868127, LR: 0.0003 +[2026-03-03 14:47:37] (step=0046332) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.065153590295441, LR: 0.0003 +[2026-03-03 14:47:44] (step=0046333) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.065349246722755, LR: 0.0003 +[2026-03-03 14:47:52] (step=0046334) Train Loss: 0.4252, Train Steps/Sec: 0.13, Epoch: 9.065544903150068, LR: 0.0003 +[2026-03-03 14:48:00] (step=0046335) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 9.065740559577382, LR: 0.0003 +[2026-03-03 14:48:08] (step=0046336) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.065936216004696, LR: 0.0003 +[2026-03-03 14:48:16] (step=0046337) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.06613187243201, LR: 0.0003 +[2026-03-03 14:48:24] (step=0046338) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.066327528859324, LR: 0.0003 +[2026-03-03 14:48:32] (step=0046339) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 9.066523185286636, LR: 0.0003 +[2026-03-03 14:48:39] (step=0046340) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 9.06671884171395, LR: 0.0003 +[2026-03-03 14:48:47] (step=0046341) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.066914498141264, LR: 0.0003 +[2026-03-03 14:48:55] (step=0046342) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.067110154568578, LR: 0.0003 +[2026-03-03 14:49:03] (step=0046343) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 9.067305810995892, LR: 0.0003 +[2026-03-03 14:49:11] (step=0046344) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.067501467423204, LR: 0.0003 +[2026-03-03 14:49:19] (step=0046345) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.067697123850518, LR: 0.0003 +[2026-03-03 14:49:27] (step=0046346) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.067892780277832, LR: 0.0003 +[2026-03-03 14:49:35] (step=0046347) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.068088436705146, LR: 0.0003 +[2026-03-03 14:49:43] (step=0046348) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.06828409313246, LR: 0.0003 +[2026-03-03 14:49:50] (step=0046349) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.068479749559772, LR: 0.0003 +[2026-03-03 14:49:58] (step=0046350) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 9.068675405987086, LR: 0.0003 +[2026-03-03 14:50:06] (step=0046351) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.0688710624144, LR: 0.0003 +[2026-03-03 14:50:14] (step=0046352) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.069066718841714, LR: 0.0003 +[2026-03-03 14:50:22] (step=0046353) Train Loss: 0.4556, Train Steps/Sec: 0.12, Epoch: 9.069262375269028, LR: 0.0003 +[2026-03-03 14:50:30] (step=0046354) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.06945803169634, LR: 0.0003 +[2026-03-03 14:50:38] (step=0046355) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 9.069653688123655, LR: 0.0003 +[2026-03-03 14:50:46] (step=0046356) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.069849344550969, LR: 0.0003 +[2026-03-03 14:50:53] (step=0046357) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 9.070045000978283, LR: 0.0003 +[2026-03-03 14:51:01] (step=0046358) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.070240657405595, LR: 0.0003 +[2026-03-03 14:51:09] (step=0046359) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 9.070436313832909, LR: 0.0003 +[2026-03-03 14:51:17] (step=0046360) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 9.070631970260223, LR: 0.0003 +[2026-03-03 14:51:25] (step=0046361) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.070827626687537, LR: 0.0003 +[2026-03-03 14:51:33] (step=0046362) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 9.071023283114851, LR: 0.0003 +[2026-03-03 14:51:41] (step=0046363) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.071218939542163, LR: 0.0003 +[2026-03-03 14:51:49] (step=0046364) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.071414595969477, LR: 0.0003 +[2026-03-03 14:51:56] (step=0046365) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.071610252396791, LR: 0.0003 +[2026-03-03 14:52:04] (step=0046366) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.071805908824105, LR: 0.0003 +[2026-03-03 14:52:12] (step=0046367) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.07200156525142, LR: 0.0003 +[2026-03-03 14:52:20] (step=0046368) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.072197221678731, LR: 0.0003 +[2026-03-03 14:52:28] (step=0046369) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.072392878106045, LR: 0.0003 +[2026-03-03 14:52:36] (step=0046370) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.07258853453336, LR: 0.0003 +[2026-03-03 14:52:44] (step=0046371) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 9.072784190960673, LR: 0.0003 +[2026-03-03 14:52:51] (step=0046372) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.072979847387987, LR: 0.0003 +[2026-03-03 14:52:59] (step=0046373) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.0731755038153, LR: 0.0003 +[2026-03-03 14:53:07] (step=0046374) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.073371160242614, LR: 0.0003 +[2026-03-03 14:53:15] (step=0046375) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.073566816669928, LR: 0.0003 +[2026-03-03 14:53:23] (step=0046376) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 9.073762473097242, LR: 0.0003 +[2026-03-03 14:53:31] (step=0046377) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 9.073958129524556, LR: 0.0003 +[2026-03-03 14:53:39] (step=0046378) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.074153785951868, LR: 0.0003 +[2026-03-03 14:53:46] (step=0046379) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.074349442379182, LR: 0.0003 +[2026-03-03 14:53:54] (step=0046380) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 9.074545098806496, LR: 0.0003 +[2026-03-03 14:54:02] (step=0046381) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.07474075523381, LR: 0.0003 +[2026-03-03 14:54:10] (step=0046382) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.074936411661122, LR: 0.0003 +[2026-03-03 14:54:18] (step=0046383) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.075132068088436, LR: 0.0003 +[2026-03-03 14:54:26] (step=0046384) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.07532772451575, LR: 0.0003 +[2026-03-03 14:54:34] (step=0046385) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.075523380943064, LR: 0.0003 +[2026-03-03 14:54:41] (step=0046386) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.075719037370378, LR: 0.0003 +[2026-03-03 14:54:49] (step=0046387) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.07591469379769, LR: 0.0003 +[2026-03-03 14:54:57] (step=0046388) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 9.076110350225004, LR: 0.0003 +[2026-03-03 14:55:05] (step=0046389) Train Loss: 0.4382, Train Steps/Sec: 0.12, Epoch: 9.076306006652318, LR: 0.0003 +[2026-03-03 14:55:13] (step=0046390) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 9.076501663079632, LR: 0.0003 +[2026-03-03 14:55:21] (step=0046391) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.076697319506946, LR: 0.0003 +[2026-03-03 14:55:29] (step=0046392) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.076892975934259, LR: 0.0003 +[2026-03-03 14:55:37] (step=0046393) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.077088632361573, LR: 0.0003 +[2026-03-03 14:55:45] (step=0046394) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.077284288788887, LR: 0.0003 +[2026-03-03 14:55:52] (step=0046395) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 9.0774799452162, LR: 0.0003 +[2026-03-03 14:56:00] (step=0046396) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.077675601643515, LR: 0.0003 +[2026-03-03 14:56:08] (step=0046397) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.077871258070827, LR: 0.0003 +[2026-03-03 14:56:16] (step=0046398) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.078066914498141, LR: 0.0003 +[2026-03-03 14:56:24] (step=0046399) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.078262570925455, LR: 0.0003 +[2026-03-03 14:56:32] (step=0046400) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.078458227352769, LR: 0.0003 +[2026-03-03 14:56:39] (step=0046401) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.078653883780083, LR: 0.0003 +[2026-03-03 14:56:47] (step=0046402) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.078849540207395, LR: 0.0003 +[2026-03-03 14:56:55] (step=0046403) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.07904519663471, LR: 0.0003 +[2026-03-03 14:57:03] (step=0046404) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 9.079240853062023, LR: 0.0003 +[2026-03-03 14:57:11] (step=0046405) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.079436509489337, LR: 0.0003 +[2026-03-03 14:57:19] (step=0046406) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 9.079632165916651, LR: 0.0003 +[2026-03-03 14:57:27] (step=0046407) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.079827822343963, LR: 0.0003 +[2026-03-03 14:57:35] (step=0046408) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.080023478771277, LR: 0.0003 +[2026-03-03 14:57:43] (step=0046409) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.080219135198591, LR: 0.0003 +[2026-03-03 14:57:50] (step=0046410) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.080414791625905, LR: 0.0003 +[2026-03-03 14:57:58] (step=0046411) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.080610448053218, LR: 0.0003 +[2026-03-03 14:58:06] (step=0046412) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.080806104480532, LR: 0.0003 +[2026-03-03 14:58:14] (step=0046413) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.081001760907846, LR: 0.0003 +[2026-03-03 14:58:22] (step=0046414) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 9.08119741733516, LR: 0.0003 +[2026-03-03 14:58:30] (step=0046415) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 9.081393073762474, LR: 0.0003 +[2026-03-03 14:58:38] (step=0046416) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.081588730189786, LR: 0.0003 +[2026-03-03 14:58:45] (step=0046417) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.0817843866171, LR: 0.0003 +[2026-03-03 14:58:53] (step=0046418) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 9.081980043044414, LR: 0.0003 +[2026-03-03 14:59:01] (step=0046419) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.082175699471728, LR: 0.0003 +[2026-03-03 14:59:09] (step=0046420) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.082371355899042, LR: 0.0003 +[2026-03-03 14:59:17] (step=0046421) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 9.082567012326354, LR: 0.0003 +[2026-03-03 14:59:25] (step=0046422) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.082762668753668, LR: 0.0003 +[2026-03-03 14:59:33] (step=0046423) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.082958325180982, LR: 0.0003 +[2026-03-03 14:59:40] (step=0046424) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.083153981608296, LR: 0.0003 +[2026-03-03 14:59:48] (step=0046425) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.08334963803561, LR: 0.0003 +[2026-03-03 14:59:56] (step=0046426) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.083545294462922, LR: 0.0003 +[2026-03-03 15:00:04] (step=0046427) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.083740950890236, LR: 0.0003 +[2026-03-03 15:00:12] (step=0046428) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 9.08393660731755, LR: 0.0003 +[2026-03-03 15:00:20] (step=0046429) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.084132263744864, LR: 0.0003 +[2026-03-03 15:00:28] (step=0046430) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.084327920172178, LR: 0.0003 +[2026-03-03 15:00:35] (step=0046431) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 9.08452357659949, LR: 0.0003 +[2026-03-03 15:00:43] (step=0046432) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.084719233026805, LR: 0.0003 +[2026-03-03 15:00:51] (step=0046433) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 9.084914889454119, LR: 0.0003 +[2026-03-03 15:00:59] (step=0046434) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.085110545881433, LR: 0.0003 +[2026-03-03 15:01:07] (step=0046435) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.085306202308745, LR: 0.0003 +[2026-03-03 15:01:15] (step=0046436) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.085501858736059, LR: 0.0003 +[2026-03-03 15:01:23] (step=0046437) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 9.085697515163373, LR: 0.0003 +[2026-03-03 15:01:31] (step=0046438) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 9.085893171590687, LR: 0.0003 +[2026-03-03 15:01:38] (step=0046439) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 9.086088828018001, LR: 0.0003 +[2026-03-03 15:01:46] (step=0046440) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.086284484445313, LR: 0.0003 +[2026-03-03 15:01:54] (step=0046441) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.086480140872627, LR: 0.0003 +[2026-03-03 15:02:02] (step=0046442) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.086675797299941, LR: 0.0003 +[2026-03-03 15:02:10] (step=0046443) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.086871453727255, LR: 0.0003 +[2026-03-03 15:02:18] (step=0046444) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.08706711015457, LR: 0.0003 +[2026-03-03 15:02:26] (step=0046445) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.087262766581881, LR: 0.0003 +[2026-03-03 15:02:33] (step=0046446) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.087458423009195, LR: 0.0003 +[2026-03-03 15:02:41] (step=0046447) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.08765407943651, LR: 0.0003 +[2026-03-03 15:02:49] (step=0046448) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 9.087849735863823, LR: 0.0003 +[2026-03-03 15:02:57] (step=0046449) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 9.088045392291138, LR: 0.0003 +[2026-03-03 15:03:05] (step=0046450) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.08824104871845, LR: 0.0003 +[2026-03-03 15:03:13] (step=0046451) Train Loss: 0.4460, Train Steps/Sec: 0.12, Epoch: 9.088436705145764, LR: 0.0003 +[2026-03-03 15:03:21] (step=0046452) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 9.088632361573078, LR: 0.0003 +[2026-03-03 15:03:29] (step=0046453) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 9.088828018000392, LR: 0.0003 +[2026-03-03 15:03:37] (step=0046454) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 9.089023674427706, LR: 0.0003 +[2026-03-03 15:03:44] (step=0046455) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.089219330855018, LR: 0.0003 +[2026-03-03 15:03:52] (step=0046456) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 9.089414987282332, LR: 0.0003 +[2026-03-03 15:04:00] (step=0046457) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.089610643709646, LR: 0.0003 +[2026-03-03 15:04:08] (step=0046458) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.08980630013696, LR: 0.0003 +[2026-03-03 15:04:16] (step=0046459) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 9.090001956564274, LR: 0.0003 +[2026-03-03 15:04:24] (step=0046460) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.090197612991586, LR: 0.0003 +[2026-03-03 15:04:32] (step=0046461) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.0903932694189, LR: 0.0003 +[2026-03-03 15:04:39] (step=0046462) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 9.090588925846214, LR: 0.0003 +[2026-03-03 15:04:47] (step=0046463) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.090784582273528, LR: 0.0003 +[2026-03-03 15:04:55] (step=0046464) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 9.09098023870084, LR: 0.0003 +[2026-03-03 15:05:03] (step=0046465) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.091175895128154, LR: 0.0003 +[2026-03-03 15:05:11] (step=0046466) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.091371551555468, LR: 0.0003 +[2026-03-03 15:05:19] (step=0046467) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.091567207982783, LR: 0.0003 +[2026-03-03 15:05:27] (step=0046468) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.091762864410097, LR: 0.0003 +[2026-03-03 15:05:34] (step=0046469) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.091958520837409, LR: 0.0003 +[2026-03-03 15:05:42] (step=0046470) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.092154177264723, LR: 0.0003 +[2026-03-03 15:05:50] (step=0046471) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.092349833692037, LR: 0.0003 +[2026-03-03 15:05:58] (step=0046472) Train Loss: 0.4268, Train Steps/Sec: 0.13, Epoch: 9.09254549011935, LR: 0.0003 +[2026-03-03 15:06:06] (step=0046473) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.092741146546665, LR: 0.0003 +[2026-03-03 15:06:14] (step=0046474) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.092936802973977, LR: 0.0003 +[2026-03-03 15:06:22] (step=0046475) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.093132459401291, LR: 0.0003 +[2026-03-03 15:06:29] (step=0046476) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.093328115828605, LR: 0.0003 +[2026-03-03 15:06:37] (step=0046477) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 9.093523772255919, LR: 0.0003 +[2026-03-03 15:06:45] (step=0046478) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.093719428683233, LR: 0.0003 +[2026-03-03 15:06:53] (step=0046479) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.093915085110545, LR: 0.0003 +[2026-03-03 15:07:01] (step=0046480) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.09411074153786, LR: 0.0003 +[2026-03-03 15:07:09] (step=0046481) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 9.094306397965173, LR: 0.0003 +[2026-03-03 15:07:17] (step=0046482) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 9.094502054392487, LR: 0.0003 +[2026-03-03 15:07:25] (step=0046483) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 9.094697710819801, LR: 0.0003 +[2026-03-03 15:07:33] (step=0046484) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 9.094893367247114, LR: 0.0003 +[2026-03-03 15:07:40] (step=0046485) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.095089023674428, LR: 0.0003 +[2026-03-03 15:07:48] (step=0046486) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.095284680101742, LR: 0.0003 +[2026-03-03 15:07:56] (step=0046487) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.095480336529056, LR: 0.0003 +[2026-03-03 15:08:04] (step=0046488) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.095675992956368, LR: 0.0003 +[2026-03-03 15:08:12] (step=0046489) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.095871649383682, LR: 0.0003 +[2026-03-03 15:08:20] (step=0046490) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.096067305810996, LR: 0.0003 +[2026-03-03 15:08:28] (step=0046491) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.09626296223831, LR: 0.0003 +[2026-03-03 15:08:35] (step=0046492) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 9.096458618665624, LR: 0.0003 +[2026-03-03 15:08:43] (step=0046493) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.096654275092936, LR: 0.0003 +[2026-03-03 15:08:51] (step=0046494) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 9.09684993152025, LR: 0.0003 +[2026-03-03 15:08:59] (step=0046495) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.097045587947564, LR: 0.0003 +[2026-03-03 15:09:07] (step=0046496) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.097241244374878, LR: 0.0003 +[2026-03-03 15:09:15] (step=0046497) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 9.097436900802192, LR: 0.0003 +[2026-03-03 15:09:23] (step=0046498) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.097632557229504, LR: 0.0003 +[2026-03-03 15:09:31] (step=0046499) Train Loss: 0.4445, Train Steps/Sec: 0.12, Epoch: 9.097828213656818, LR: 0.0003 +[2026-03-03 15:09:38] (step=0046500) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.098023870084132, LR: 0.0003 +[2026-03-03 15:09:38] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0046500/ +[2026-03-03 15:09:46] (step=0046501) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.098219526511446, LR: 0.0003 +[2026-03-03 15:09:54] (step=0046502) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.09841518293876, LR: 0.0003 +[2026-03-03 15:10:02] (step=0046503) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.098610839366073, LR: 0.0003 +[2026-03-03 15:10:10] (step=0046504) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.098806495793387, LR: 0.0003 +[2026-03-03 15:10:18] (step=0046505) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 9.0990021522207, LR: 0.0003 +[2026-03-03 15:10:26] (step=0046506) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.099197808648015, LR: 0.0003 +[2026-03-03 15:10:34] (step=0046507) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.099393465075329, LR: 0.0003 +[2026-03-03 15:10:41] (step=0046508) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.09958912150264, LR: 0.0003 +[2026-03-03 15:10:49] (step=0046509) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 9.099784777929955, LR: 0.0003 +[2026-03-03 15:10:57] (step=0046510) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 9.099980434357269, LR: 0.0003 +[2026-03-03 15:11:05] (step=0046511) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.100176090784583, LR: 0.0003 +[2026-03-03 15:11:13] (step=0046512) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.100371747211897, LR: 0.0003 +[2026-03-03 15:11:21] (step=0046513) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.100567403639209, LR: 0.0003 +[2026-03-03 15:11:29] (step=0046514) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.100763060066523, LR: 0.0003 +[2026-03-03 15:11:36] (step=0046515) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.100958716493837, LR: 0.0003 +[2026-03-03 15:11:44] (step=0046516) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.101154372921151, LR: 0.0003 +[2026-03-03 15:11:52] (step=0046517) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.101350029348463, LR: 0.0003 +[2026-03-03 15:12:00] (step=0046518) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.101545685775777, LR: 0.0003 +[2026-03-03 15:12:08] (step=0046519) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 9.101741342203091, LR: 0.0003 +[2026-03-03 15:12:16] (step=0046520) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.101936998630405, LR: 0.0003 +[2026-03-03 15:12:24] (step=0046521) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.10213265505772, LR: 0.0003 +[2026-03-03 15:12:31] (step=0046522) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.102328311485032, LR: 0.0003 +[2026-03-03 15:12:39] (step=0046523) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.102523967912346, LR: 0.0003 +[2026-03-03 15:12:47] (step=0046524) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.10271962433966, LR: 0.0003 +[2026-03-03 15:12:55] (step=0046525) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.102915280766974, LR: 0.0003 +[2026-03-03 15:13:03] (step=0046526) Train Loss: 0.4254, Train Steps/Sec: 0.13, Epoch: 9.103110937194288, LR: 0.0003 +[2026-03-03 15:13:11] (step=0046527) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 9.1033065936216, LR: 0.0003 +[2026-03-03 15:13:19] (step=0046528) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.103502250048914, LR: 0.0003 +[2026-03-03 15:13:27] (step=0046529) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.103697906476228, LR: 0.0003 +[2026-03-03 15:13:34] (step=0046530) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.103893562903542, LR: 0.0003 +[2026-03-03 15:13:42] (step=0046531) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 9.104089219330856, LR: 0.0003 +[2026-03-03 15:13:50] (step=0046532) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 9.104284875758168, LR: 0.0003 +[2026-03-03 15:13:58] (step=0046533) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.104480532185482, LR: 0.0003 +[2026-03-03 15:14:06] (step=0046534) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 9.104676188612796, LR: 0.0003 +[2026-03-03 15:14:14] (step=0046535) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.10487184504011, LR: 0.0003 +[2026-03-03 15:14:22] (step=0046536) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.105067501467424, LR: 0.0003 +[2026-03-03 15:14:29] (step=0046537) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.105263157894736, LR: 0.0003 +[2026-03-03 15:14:37] (step=0046538) Train Loss: 0.4436, Train Steps/Sec: 0.12, Epoch: 9.10545881432205, LR: 0.0003 +[2026-03-03 15:14:45] (step=0046539) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.105654470749364, LR: 0.0003 +[2026-03-03 15:14:53] (step=0046540) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.105850127176678, LR: 0.0003 +[2026-03-03 15:15:01] (step=0046541) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.10604578360399, LR: 0.0003 +[2026-03-03 15:15:09] (step=0046542) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.106241440031305, LR: 0.0003 +[2026-03-03 15:15:17] (step=0046543) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.106437096458619, LR: 0.0003 +[2026-03-03 15:15:25] (step=0046544) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.106632752885933, LR: 0.0003 +[2026-03-03 15:15:32] (step=0046545) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 9.106828409313247, LR: 0.0003 +[2026-03-03 15:15:40] (step=0046546) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.107024065740559, LR: 0.0003 +[2026-03-03 15:15:48] (step=0046547) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.107219722167873, LR: 0.0003 +[2026-03-03 15:15:56] (step=0046548) Train Loss: 0.4433, Train Steps/Sec: 0.12, Epoch: 9.107415378595187, LR: 0.0003 +[2026-03-03 15:16:04] (step=0046549) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 9.1076110350225, LR: 0.0003 +[2026-03-03 15:16:12] (step=0046550) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.107806691449815, LR: 0.0003 +[2026-03-03 15:16:20] (step=0046551) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.108002347877127, LR: 0.0003 +[2026-03-03 15:16:28] (step=0046552) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.108198004304441, LR: 0.0003 +[2026-03-03 15:16:35] (step=0046553) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.108393660731755, LR: 0.0003 +[2026-03-03 15:16:43] (step=0046554) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 9.108589317159069, LR: 0.0003 +[2026-03-03 15:16:51] (step=0046555) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 9.108784973586383, LR: 0.0003 +[2026-03-03 15:16:59] (step=0046556) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 9.108980630013695, LR: 0.0003 +[2026-03-03 15:17:07] (step=0046557) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 9.10917628644101, LR: 0.0003 +[2026-03-03 15:17:15] (step=0046558) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.109371942868323, LR: 0.0003 +[2026-03-03 15:17:23] (step=0046559) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 9.109567599295637, LR: 0.0003 +[2026-03-03 15:17:30] (step=0046560) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.109763255722951, LR: 0.0003 +[2026-03-03 15:17:38] (step=0046561) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.109958912150264, LR: 0.0003 +[2026-03-03 15:17:46] (step=0046562) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.110154568577578, LR: 0.0003 +[2026-03-03 15:17:54] (step=0046563) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 9.110350225004892, LR: 0.0003 +[2026-03-03 15:18:02] (step=0046564) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 9.110545881432206, LR: 0.0003 +[2026-03-03 15:18:10] (step=0046565) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.11074153785952, LR: 0.0003 +[2026-03-03 15:18:18] (step=0046566) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 9.110937194286832, LR: 0.0003 +[2026-03-03 15:18:25] (step=0046567) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 9.111132850714146, LR: 0.0003 +[2026-03-03 15:18:33] (step=0046568) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 9.11132850714146, LR: 0.0003 +[2026-03-03 15:18:41] (step=0046569) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 9.111524163568774, LR: 0.0003 +[2026-03-03 15:18:49] (step=0046570) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.111719819996086, LR: 0.0003 +[2026-03-03 15:18:57] (step=0046571) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.1119154764234, LR: 0.0003 +[2026-03-03 15:19:05] (step=0046572) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.112111132850714, LR: 0.0003 +[2026-03-03 15:19:13] (step=0046573) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.112306789278028, LR: 0.0003 +[2026-03-03 15:19:20] (step=0046574) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 9.112502445705342, LR: 0.0003 +[2026-03-03 15:19:28] (step=0046575) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.112698102132654, LR: 0.0003 +[2026-03-03 15:19:36] (step=0046576) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.112893758559968, LR: 0.0003 +[2026-03-03 15:19:44] (step=0046577) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 9.113089414987282, LR: 0.0003 +[2026-03-03 15:19:52] (step=0046578) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 9.113285071414596, LR: 0.0003 +[2026-03-03 15:20:00] (step=0046579) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.11348072784191, LR: 0.0003 +[2026-03-03 15:20:08] (step=0046580) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.113676384269223, LR: 0.0003 +[2026-03-03 15:20:15] (step=0046581) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.113872040696537, LR: 0.0003 +[2026-03-03 15:20:23] (step=0046582) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.11406769712385, LR: 0.0003 +[2026-03-03 15:20:31] (step=0046583) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.114263353551165, LR: 0.0003 +[2026-03-03 15:20:39] (step=0046584) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.114459009978479, LR: 0.0003 +[2026-03-03 15:20:47] (step=0046585) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.11465466640579, LR: 0.0003 +[2026-03-03 15:20:55] (step=0046586) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.114850322833105, LR: 0.0003 +[2026-03-03 15:21:03] (step=0046587) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.115045979260419, LR: 0.0003 +[2026-03-03 15:21:11] (step=0046588) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.115241635687733, LR: 0.0003 +[2026-03-03 15:21:18] (step=0046589) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 9.115437292115047, LR: 0.0003 +[2026-03-03 15:21:26] (step=0046590) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.115632948542359, LR: 0.0003 +[2026-03-03 15:21:34] (step=0046591) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.115828604969673, LR: 0.0003 +[2026-03-03 15:21:42] (step=0046592) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 9.116024261396987, LR: 0.0003 +[2026-03-03 15:21:50] (step=0046593) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 9.116219917824301, LR: 0.0003 +[2026-03-03 15:21:58] (step=0046594) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.116415574251613, LR: 0.0003 +[2026-03-03 15:22:06] (step=0046595) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.116611230678927, LR: 0.0003 +[2026-03-03 15:22:14] (step=0046596) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.116806887106241, LR: 0.0003 +[2026-03-03 15:22:21] (step=0046597) Train Loss: 0.4240, Train Steps/Sec: 0.13, Epoch: 9.117002543533555, LR: 0.0003 +[2026-03-03 15:22:29] (step=0046598) Train Loss: 0.4398, Train Steps/Sec: 0.12, Epoch: 9.11719819996087, LR: 0.0003 +[2026-03-03 15:22:37] (step=0046599) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.117393856388182, LR: 0.0003 +[2026-03-03 15:22:45] (step=0046600) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.117589512815496, LR: 0.0003 +[2026-03-03 15:22:53] (step=0046601) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.11778516924281, LR: 0.0003 +[2026-03-03 15:23:01] (step=0046602) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 9.117980825670124, LR: 0.0003 +[2026-03-03 15:23:09] (step=0046603) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 9.118176482097438, LR: 0.0003 +[2026-03-03 15:23:17] (step=0046604) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.11837213852475, LR: 0.0003 +[2026-03-03 15:23:24] (step=0046605) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.118567794952064, LR: 0.0003 +[2026-03-03 15:23:32] (step=0046606) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.118763451379378, LR: 0.0003 +[2026-03-03 15:23:40] (step=0046607) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.118959107806692, LR: 0.0003 +[2026-03-03 15:23:48] (step=0046608) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.119154764234006, LR: 0.0003 +[2026-03-03 15:23:56] (step=0046609) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.119350420661318, LR: 0.0003 +[2026-03-03 15:24:04] (step=0046610) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 9.119546077088632, LR: 0.0003 +[2026-03-03 15:24:12] (step=0046611) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.119741733515946, LR: 0.0003 +[2026-03-03 15:24:19] (step=0046612) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.11993738994326, LR: 0.0003 +[2026-03-03 15:24:27] (step=0046613) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.120133046370574, LR: 0.0003 +[2026-03-03 15:24:35] (step=0046614) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.120328702797886, LR: 0.0003 +[2026-03-03 15:24:43] (step=0046615) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.1205243592252, LR: 0.0003 +[2026-03-03 15:24:51] (step=0046616) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.120720015652514, LR: 0.0003 +[2026-03-03 15:24:59] (step=0046617) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 9.120915672079828, LR: 0.0003 +[2026-03-03 15:25:07] (step=0046618) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.121111328507142, LR: 0.0003 +[2026-03-03 15:25:14] (step=0046619) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.121306984934455, LR: 0.0003 +[2026-03-03 15:25:22] (step=0046620) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 9.121502641361769, LR: 0.0003 +[2026-03-03 15:25:30] (step=0046621) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.121698297789083, LR: 0.0003 +[2026-03-03 15:25:38] (step=0046622) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.121893954216397, LR: 0.0003 +[2026-03-03 15:25:46] (step=0046623) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.122089610643709, LR: 0.0003 +[2026-03-03 15:25:54] (step=0046624) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.122285267071023, LR: 0.0003 +[2026-03-03 15:26:02] (step=0046625) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.122480923498337, LR: 0.0003 +[2026-03-03 15:26:09] (step=0046626) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.12267657992565, LR: 0.0003 +[2026-03-03 15:26:17] (step=0046627) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 9.122872236352965, LR: 0.0003 +[2026-03-03 15:26:25] (step=0046628) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 9.123067892780277, LR: 0.0003 +[2026-03-03 15:26:33] (step=0046629) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.123263549207591, LR: 0.0003 +[2026-03-03 15:26:41] (step=0046630) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.123459205634905, LR: 0.0003 +[2026-03-03 15:26:49] (step=0046631) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.123654862062219, LR: 0.0003 +[2026-03-03 15:26:57] (step=0046632) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 9.123850518489533, LR: 0.0003 +[2026-03-03 15:27:05] (step=0046633) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.124046174916845, LR: 0.0003 +[2026-03-03 15:27:12] (step=0046634) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.12424183134416, LR: 0.0003 +[2026-03-03 15:27:20] (step=0046635) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.124437487771473, LR: 0.0003 +[2026-03-03 15:27:28] (step=0046636) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.124633144198787, LR: 0.0003 +[2026-03-03 15:27:36] (step=0046637) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.124828800626101, LR: 0.0003 +[2026-03-03 15:27:44] (step=0046638) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.125024457053414, LR: 0.0003 +[2026-03-03 15:27:52] (step=0046639) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 9.125220113480728, LR: 0.0003 +[2026-03-03 15:28:00] (step=0046640) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.125415769908042, LR: 0.0003 +[2026-03-03 15:28:07] (step=0046641) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 9.125611426335356, LR: 0.0003 +[2026-03-03 15:28:15] (step=0046642) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 9.12580708276267, LR: 0.0003 +[2026-03-03 15:28:23] (step=0046643) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.126002739189982, LR: 0.0003 +[2026-03-03 15:28:31] (step=0046644) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.126198395617296, LR: 0.0003 +[2026-03-03 15:28:39] (step=0046645) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.12639405204461, LR: 0.0003 +[2026-03-03 15:28:47] (step=0046646) Train Loss: 0.4444, Train Steps/Sec: 0.12, Epoch: 9.126589708471924, LR: 0.0003 +[2026-03-03 15:28:55] (step=0046647) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.126785364899236, LR: 0.0003 +[2026-03-03 15:29:03] (step=0046648) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 9.12698102132655, LR: 0.0003 +[2026-03-03 15:29:11] (step=0046649) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.127176677753864, LR: 0.0003 +[2026-03-03 15:29:18] (step=0046650) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.127372334181178, LR: 0.0003 +[2026-03-03 15:29:26] (step=0046651) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 9.127567990608492, LR: 0.0003 +[2026-03-03 15:29:34] (step=0046652) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.127763647035804, LR: 0.0003 +[2026-03-03 15:29:42] (step=0046653) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.127959303463118, LR: 0.0003 +[2026-03-03 15:29:50] (step=0046654) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.128154959890432, LR: 0.0003 +[2026-03-03 15:29:58] (step=0046655) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.128350616317746, LR: 0.0003 +[2026-03-03 15:30:06] (step=0046656) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.12854627274506, LR: 0.0003 +[2026-03-03 15:30:13] (step=0046657) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.128741929172373, LR: 0.0003 +[2026-03-03 15:30:21] (step=0046658) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.128937585599687, LR: 0.0003 +[2026-03-03 15:30:29] (step=0046659) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.129133242027, LR: 0.0003 +[2026-03-03 15:30:37] (step=0046660) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.129328898454315, LR: 0.0003 +[2026-03-03 15:30:45] (step=0046661) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 9.129524554881629, LR: 0.0003 +[2026-03-03 15:30:53] (step=0046662) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.12972021130894, LR: 0.0003 +[2026-03-03 15:31:01] (step=0046663) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.129915867736255, LR: 0.0003 +[2026-03-03 15:31:08] (step=0046664) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 9.130111524163569, LR: 0.0003 +[2026-03-03 15:31:16] (step=0046665) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.130307180590883, LR: 0.0003 +[2026-03-03 15:31:24] (step=0046666) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 9.130502837018197, LR: 0.0003 +[2026-03-03 15:31:32] (step=0046667) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.130698493445509, LR: 0.0003 +[2026-03-03 15:31:40] (step=0046668) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.130894149872823, LR: 0.0003 +[2026-03-03 15:31:48] (step=0046669) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.131089806300137, LR: 0.0003 +[2026-03-03 15:31:56] (step=0046670) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 9.131285462727451, LR: 0.0003 +[2026-03-03 15:32:03] (step=0046671) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.131481119154763, LR: 0.0003 +[2026-03-03 15:32:11] (step=0046672) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.131676775582077, LR: 0.0003 +[2026-03-03 15:32:19] (step=0046673) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.131872432009391, LR: 0.0003 +[2026-03-03 15:32:27] (step=0046674) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.132068088436705, LR: 0.0003 +[2026-03-03 15:32:35] (step=0046675) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.13226374486402, LR: 0.0003 +[2026-03-03 15:32:43] (step=0046676) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.132459401291332, LR: 0.0003 +[2026-03-03 15:32:51] (step=0046677) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 9.132655057718646, LR: 0.0003 +[2026-03-03 15:32:58] (step=0046678) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.13285071414596, LR: 0.0003 +[2026-03-03 15:33:06] (step=0046679) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 9.133046370573274, LR: 0.0003 +[2026-03-03 15:33:14] (step=0046680) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.133242027000588, LR: 0.0003 +[2026-03-03 15:33:22] (step=0046681) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 9.1334376834279, LR: 0.0003 +[2026-03-03 15:33:30] (step=0046682) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.133633339855214, LR: 0.0003 +[2026-03-03 15:33:38] (step=0046683) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.133828996282528, LR: 0.0003 +[2026-03-03 15:33:46] (step=0046684) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.134024652709842, LR: 0.0003 +[2026-03-03 15:33:53] (step=0046685) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.134220309137156, LR: 0.0003 +[2026-03-03 15:34:01] (step=0046686) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.134415965564468, LR: 0.0003 +[2026-03-03 15:34:09] (step=0046687) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.134611621991782, LR: 0.0003 +[2026-03-03 15:34:17] (step=0046688) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.134807278419096, LR: 0.0003 +[2026-03-03 15:34:25] (step=0046689) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.13500293484641, LR: 0.0003 +[2026-03-03 15:34:33] (step=0046690) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.135198591273724, LR: 0.0003 +[2026-03-03 15:34:41] (step=0046691) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.135394247701036, LR: 0.0003 +[2026-03-03 15:34:49] (step=0046692) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.13558990412835, LR: 0.0003 +[2026-03-03 15:34:56] (step=0046693) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.135785560555664, LR: 0.0003 +[2026-03-03 15:35:04] (step=0046694) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.135981216982978, LR: 0.0003 +[2026-03-03 15:35:12] (step=0046695) Train Loss: 0.4376, Train Steps/Sec: 0.12, Epoch: 9.136176873410292, LR: 0.0003 +[2026-03-03 15:35:20] (step=0046696) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.136372529837605, LR: 0.0003 +[2026-03-03 15:35:28] (step=0046697) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 9.136568186264919, LR: 0.0003 +[2026-03-03 15:35:36] (step=0046698) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.136763842692233, LR: 0.0003 +[2026-03-03 15:35:44] (step=0046699) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.136959499119547, LR: 0.0003 +[2026-03-03 15:35:52] (step=0046700) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 9.137155155546859, LR: 0.0003 +[2026-03-03 15:35:59] (step=0046701) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 9.137350811974173, LR: 0.0003 +[2026-03-03 15:36:07] (step=0046702) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 9.137546468401487, LR: 0.0003 +[2026-03-03 15:36:15] (step=0046703) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.1377421248288, LR: 0.0003 +[2026-03-03 15:36:23] (step=0046704) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.137937781256115, LR: 0.0003 +[2026-03-03 15:36:31] (step=0046705) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 9.138133437683427, LR: 0.0003 +[2026-03-03 15:36:39] (step=0046706) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.138329094110741, LR: 0.0003 +[2026-03-03 15:36:47] (step=0046707) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.138524750538055, LR: 0.0003 +[2026-03-03 15:36:54] (step=0046708) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.13872040696537, LR: 0.0003 +[2026-03-03 15:37:02] (step=0046709) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.138916063392683, LR: 0.0003 +[2026-03-03 15:37:10] (step=0046710) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.139111719819995, LR: 0.0003 +[2026-03-03 15:37:18] (step=0046711) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.13930737624731, LR: 0.0003 +[2026-03-03 15:37:26] (step=0046712) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 9.139503032674623, LR: 0.0003 +[2026-03-03 15:37:34] (step=0046713) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.139698689101937, LR: 0.0003 +[2026-03-03 15:37:42] (step=0046714) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.139894345529251, LR: 0.0003 +[2026-03-03 15:37:49] (step=0046715) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.140090001956564, LR: 0.0003 +[2026-03-03 15:37:57] (step=0046716) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.140285658383878, LR: 0.0003 +[2026-03-03 15:38:05] (step=0046717) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.140481314811192, LR: 0.0003 +[2026-03-03 15:38:13] (step=0046718) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.140676971238506, LR: 0.0003 +[2026-03-03 15:38:21] (step=0046719) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.14087262766582, LR: 0.0003 +[2026-03-03 15:38:29] (step=0046720) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.141068284093132, LR: 0.0003 +[2026-03-03 15:38:37] (step=0046721) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.141263940520446, LR: 0.0003 +[2026-03-03 15:38:45] (step=0046722) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 9.14145959694776, LR: 0.0003 +[2026-03-03 15:38:52] (step=0046723) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 9.141655253375074, LR: 0.0003 +[2026-03-03 15:39:00] (step=0046724) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.141850909802386, LR: 0.0003 +[2026-03-03 15:39:08] (step=0046725) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.1420465662297, LR: 0.0003 +[2026-03-03 15:39:16] (step=0046726) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 9.142242222657014, LR: 0.0003 +[2026-03-03 15:39:24] (step=0046727) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 9.142437879084328, LR: 0.0003 +[2026-03-03 15:39:32] (step=0046728) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.142633535511642, LR: 0.0003 +[2026-03-03 15:39:40] (step=0046729) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.142829191938954, LR: 0.0003 +[2026-03-03 15:39:47] (step=0046730) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 9.143024848366268, LR: 0.0003 +[2026-03-03 15:39:55] (step=0046731) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.143220504793582, LR: 0.0003 +[2026-03-03 15:40:03] (step=0046732) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.143416161220896, LR: 0.0003 +[2026-03-03 15:40:11] (step=0046733) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.14361181764821, LR: 0.0003 +[2026-03-03 15:40:19] (step=0046734) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.143807474075523, LR: 0.0003 +[2026-03-03 15:40:27] (step=0046735) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 9.144003130502837, LR: 0.0003 +[2026-03-03 15:40:35] (step=0046736) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.14419878693015, LR: 0.0003 +[2026-03-03 15:40:43] (step=0046737) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 9.144394443357465, LR: 0.0003 +[2026-03-03 15:40:50] (step=0046738) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 9.144590099784779, LR: 0.0003 +[2026-03-03 15:40:58] (step=0046739) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 9.14478575621209, LR: 0.0003 +[2026-03-03 15:41:06] (step=0046740) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.144981412639405, LR: 0.0003 +[2026-03-03 15:41:14] (step=0046741) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 9.145177069066719, LR: 0.0003 +[2026-03-03 15:41:22] (step=0046742) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.145372725494033, LR: 0.0003 +[2026-03-03 15:41:30] (step=0046743) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 9.145568381921347, LR: 0.0003 +[2026-03-03 15:41:38] (step=0046744) Train Loss: 0.4568, Train Steps/Sec: 0.12, Epoch: 9.14576403834866, LR: 0.0003 +[2026-03-03 15:41:46] (step=0046745) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.145959694775973, LR: 0.0003 +[2026-03-03 15:41:54] (step=0046746) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.146155351203287, LR: 0.0003 +[2026-03-03 15:42:01] (step=0046747) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.146351007630601, LR: 0.0003 +[2026-03-03 15:42:09] (step=0046748) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.146546664057915, LR: 0.0003 +[2026-03-03 15:42:17] (step=0046749) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.146742320485227, LR: 0.0003 +[2026-03-03 15:42:25] (step=0046750) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.146937976912541, LR: 0.0003 +[2026-03-03 15:42:33] (step=0046751) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.147133633339855, LR: 0.0003 +[2026-03-03 15:42:41] (step=0046752) Train Loss: 0.4672, Train Steps/Sec: 0.13, Epoch: 9.14732928976717, LR: 0.0003 +[2026-03-03 15:42:49] (step=0046753) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.147524946194482, LR: 0.0003 +[2026-03-03 15:42:56] (step=0046754) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.147720602621796, LR: 0.0003 +[2026-03-03 15:43:04] (step=0046755) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.14791625904911, LR: 0.0003 +[2026-03-03 15:43:12] (step=0046756) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.148111915476424, LR: 0.0003 +[2026-03-03 15:43:20] (step=0046757) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.148307571903738, LR: 0.0003 +[2026-03-03 15:43:28] (step=0046758) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.14850322833105, LR: 0.0003 +[2026-03-03 15:43:36] (step=0046759) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.148698884758364, LR: 0.0003 +[2026-03-03 15:43:44] (step=0046760) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.148894541185678, LR: 0.0003 +[2026-03-03 15:43:51] (step=0046761) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.149090197612992, LR: 0.0003 +[2026-03-03 15:43:59] (step=0046762) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.149285854040306, LR: 0.0003 +[2026-03-03 15:44:07] (step=0046763) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 9.149481510467618, LR: 0.0003 +[2026-03-03 15:44:15] (step=0046764) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.149677166894932, LR: 0.0003 +[2026-03-03 15:44:23] (step=0046765) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.149872823322246, LR: 0.0003 +[2026-03-03 15:44:31] (step=0046766) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.15006847974956, LR: 0.0003 +[2026-03-03 15:44:39] (step=0046767) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.150264136176874, LR: 0.0003 +[2026-03-03 15:44:46] (step=0046768) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.150459792604186, LR: 0.0003 +[2026-03-03 15:44:54] (step=0046769) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 9.1506554490315, LR: 0.0003 +[2026-03-03 15:45:02] (step=0046770) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.150851105458814, LR: 0.0003 +[2026-03-03 15:45:10] (step=0046771) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.151046761886128, LR: 0.0003 +[2026-03-03 15:45:18] (step=0046772) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.151242418313442, LR: 0.0003 +[2026-03-03 15:45:26] (step=0046773) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.151438074740755, LR: 0.0003 +[2026-03-03 15:45:34] (step=0046774) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 9.151633731168069, LR: 0.0003 +[2026-03-03 15:45:42] (step=0046775) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.151829387595383, LR: 0.0003 +[2026-03-03 15:45:49] (step=0046776) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.152025044022697, LR: 0.0003 +[2026-03-03 15:45:57] (step=0046777) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 9.152220700450009, LR: 0.0003 +[2026-03-03 15:46:05] (step=0046778) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.152416356877323, LR: 0.0003 +[2026-03-03 15:46:13] (step=0046779) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.152612013304637, LR: 0.0003 +[2026-03-03 15:46:21] (step=0046780) Train Loss: 0.4330, Train Steps/Sec: 0.12, Epoch: 9.152807669731951, LR: 0.0003 +[2026-03-03 15:46:29] (step=0046781) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.153003326159265, LR: 0.0003 +[2026-03-03 15:46:37] (step=0046782) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.153198982586577, LR: 0.0003 +[2026-03-03 15:46:45] (step=0046783) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.153394639013891, LR: 0.0003 +[2026-03-03 15:46:52] (step=0046784) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.153590295441205, LR: 0.0003 +[2026-03-03 15:47:00] (step=0046785) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.15378595186852, LR: 0.0003 +[2026-03-03 15:47:08] (step=0046786) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.153981608295833, LR: 0.0003 +[2026-03-03 15:47:16] (step=0046787) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 9.154177264723145, LR: 0.0003 +[2026-03-03 15:47:24] (step=0046788) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.15437292115046, LR: 0.0003 +[2026-03-03 15:47:32] (step=0046789) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.154568577577773, LR: 0.0003 +[2026-03-03 15:47:40] (step=0046790) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 9.154764234005087, LR: 0.0003 +[2026-03-03 15:47:47] (step=0046791) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 9.154959890432401, LR: 0.0003 +[2026-03-03 15:47:55] (step=0046792) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 9.155155546859714, LR: 0.0003 +[2026-03-03 15:48:03] (step=0046793) Train Loss: 0.4489, Train Steps/Sec: 0.12, Epoch: 9.155351203287028, LR: 0.0003 +[2026-03-03 15:48:11] (step=0046794) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.155546859714342, LR: 0.0003 +[2026-03-03 15:48:19] (step=0046795) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.155742516141656, LR: 0.0003 +[2026-03-03 15:48:27] (step=0046796) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.15593817256897, LR: 0.0003 +[2026-03-03 15:48:35] (step=0046797) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.156133828996282, LR: 0.0003 +[2026-03-03 15:48:43] (step=0046798) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 9.156329485423596, LR: 0.0003 +[2026-03-03 15:48:50] (step=0046799) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.15652514185091, LR: 0.0003 +[2026-03-03 15:48:58] (step=0046800) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.156720798278224, LR: 0.0003 +[2026-03-03 15:49:06] (step=0046801) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.156916454705538, LR: 0.0003 +[2026-03-03 15:49:14] (step=0046802) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 9.15711211113285, LR: 0.0003 +[2026-03-03 15:49:22] (step=0046803) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 9.157307767560164, LR: 0.0003 +[2026-03-03 15:49:30] (step=0046804) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.157503423987478, LR: 0.0003 +[2026-03-03 15:49:38] (step=0046805) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 9.157699080414792, LR: 0.0003 +[2026-03-03 15:49:45] (step=0046806) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.157894736842104, LR: 0.0003 +[2026-03-03 15:49:53] (step=0046807) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.158090393269418, LR: 0.0003 +[2026-03-03 15:50:01] (step=0046808) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.158286049696732, LR: 0.0003 +[2026-03-03 15:50:09] (step=0046809) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.158481706124046, LR: 0.0003 +[2026-03-03 15:50:17] (step=0046810) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.15867736255136, LR: 0.0003 +[2026-03-03 15:50:25] (step=0046811) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.158873018978673, LR: 0.0003 +[2026-03-03 15:50:33] (step=0046812) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.159068675405987, LR: 0.0003 +[2026-03-03 15:50:40] (step=0046813) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 9.1592643318333, LR: 0.0003 +[2026-03-03 15:50:48] (step=0046814) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.159459988260615, LR: 0.0003 +[2026-03-03 15:50:56] (step=0046815) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.159655644687929, LR: 0.0003 +[2026-03-03 15:51:04] (step=0046816) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.159851301115241, LR: 0.0003 +[2026-03-03 15:51:12] (step=0046817) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.160046957542555, LR: 0.0003 +[2026-03-03 15:51:20] (step=0046818) Train Loss: 0.4256, Train Steps/Sec: 0.13, Epoch: 9.160242613969869, LR: 0.0003 +[2026-03-03 15:51:28] (step=0046819) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 9.160438270397183, LR: 0.0003 +[2026-03-03 15:51:35] (step=0046820) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.160633926824497, LR: 0.0003 +[2026-03-03 15:51:43] (step=0046821) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 9.16082958325181, LR: 0.0003 +[2026-03-03 15:51:51] (step=0046822) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.161025239679123, LR: 0.0003 +[2026-03-03 15:51:59] (step=0046823) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.161220896106437, LR: 0.0003 +[2026-03-03 15:52:07] (step=0046824) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.161416552533751, LR: 0.0003 +[2026-03-03 15:52:15] (step=0046825) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.161612208961065, LR: 0.0003 +[2026-03-03 15:52:23] (step=0046826) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.161807865388377, LR: 0.0003 +[2026-03-03 15:52:31] (step=0046827) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.162003521815691, LR: 0.0003 +[2026-03-03 15:52:38] (step=0046828) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 9.162199178243005, LR: 0.0003 +[2026-03-03 15:52:46] (step=0046829) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.16239483467032, LR: 0.0003 +[2026-03-03 15:52:54] (step=0046830) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.162590491097632, LR: 0.0003 +[2026-03-03 15:53:02] (step=0046831) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 9.162786147524946, LR: 0.0003 +[2026-03-03 15:53:10] (step=0046832) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.16298180395226, LR: 0.0003 +[2026-03-03 15:53:18] (step=0046833) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.163177460379574, LR: 0.0003 +[2026-03-03 15:53:26] (step=0046834) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.163373116806888, LR: 0.0003 +[2026-03-03 15:53:33] (step=0046835) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.1635687732342, LR: 0.0003 +[2026-03-03 15:53:41] (step=0046836) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.163764429661514, LR: 0.0003 +[2026-03-03 15:53:49] (step=0046837) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.163960086088828, LR: 0.0003 +[2026-03-03 15:53:57] (step=0046838) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.164155742516142, LR: 0.0003 +[2026-03-03 15:54:05] (step=0046839) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.164351398943456, LR: 0.0003 +[2026-03-03 15:54:13] (step=0046840) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.164547055370768, LR: 0.0003 +[2026-03-03 15:54:21] (step=0046841) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 9.164742711798082, LR: 0.0003 +[2026-03-03 15:54:28] (step=0046842) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 9.164938368225396, LR: 0.0003 +[2026-03-03 15:54:37] (step=0046843) Train Loss: 0.4442, Train Steps/Sec: 0.12, Epoch: 9.16513402465271, LR: 0.0003 +[2026-03-03 15:54:44] (step=0046844) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.165329681080024, LR: 0.0003 +[2026-03-03 15:54:52] (step=0046845) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 9.165525337507336, LR: 0.0003 +[2026-03-03 15:55:00] (step=0046846) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.16572099393465, LR: 0.0003 +[2026-03-03 15:55:08] (step=0046847) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.165916650361964, LR: 0.0003 +[2026-03-03 15:55:16] (step=0046848) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.166112306789278, LR: 0.0003 +[2026-03-03 15:55:24] (step=0046849) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.166307963216592, LR: 0.0003 +[2026-03-03 15:55:32] (step=0046850) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.166503619643905, LR: 0.0003 +[2026-03-03 15:55:39] (step=0046851) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.166699276071219, LR: 0.0003 +[2026-03-03 15:55:47] (step=0046852) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 9.166894932498533, LR: 0.0003 +[2026-03-03 15:55:55] (step=0046853) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.167090588925847, LR: 0.0003 +[2026-03-03 15:56:03] (step=0046854) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.16728624535316, LR: 0.0003 +[2026-03-03 15:56:11] (step=0046855) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 9.167481901780473, LR: 0.0003 +[2026-03-03 15:56:19] (step=0046856) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 9.167677558207787, LR: 0.0003 +[2026-03-03 15:56:27] (step=0046857) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 9.167873214635101, LR: 0.0003 +[2026-03-03 15:56:34] (step=0046858) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 9.168068871062415, LR: 0.0003 +[2026-03-03 15:56:42] (step=0046859) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 9.168264527489727, LR: 0.0003 +[2026-03-03 15:56:50] (step=0046860) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 9.168460183917041, LR: 0.0003 +[2026-03-03 15:56:58] (step=0046861) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.168655840344355, LR: 0.0003 +[2026-03-03 15:57:06] (step=0046862) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.16885149677167, LR: 0.0003 +[2026-03-03 15:57:14] (step=0046863) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 9.169047153198983, LR: 0.0003 +[2026-03-03 15:57:22] (step=0046864) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.169242809626295, LR: 0.0003 +[2026-03-03 15:57:29] (step=0046865) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 9.16943846605361, LR: 0.0003 +[2026-03-03 15:57:37] (step=0046866) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.169634122480923, LR: 0.0003 +[2026-03-03 15:57:45] (step=0046867) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.169829778908237, LR: 0.0003 +[2026-03-03 15:57:53] (step=0046868) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.170025435335551, LR: 0.0003 +[2026-03-03 15:58:01] (step=0046869) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.170221091762864, LR: 0.0003 +[2026-03-03 15:58:09] (step=0046870) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.170416748190178, LR: 0.0003 +[2026-03-03 15:58:17] (step=0046871) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.170612404617492, LR: 0.0003 +[2026-03-03 15:58:24] (step=0046872) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.170808061044806, LR: 0.0003 +[2026-03-03 15:58:32] (step=0046873) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 9.17100371747212, LR: 0.0003 +[2026-03-03 15:58:40] (step=0046874) Train Loss: 0.4465, Train Steps/Sec: 0.12, Epoch: 9.171199373899432, LR: 0.0003 +[2026-03-03 15:58:48] (step=0046875) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.171395030326746, LR: 0.0003 +[2026-03-03 15:58:56] (step=0046876) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 9.17159068675406, LR: 0.0003 +[2026-03-03 15:59:04] (step=0046877) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.171786343181374, LR: 0.0003 +[2026-03-03 15:59:12] (step=0046878) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.171981999608688, LR: 0.0003 +[2026-03-03 15:59:20] (step=0046879) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.172177656036, LR: 0.0003 +[2026-03-03 15:59:28] (step=0046880) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 9.172373312463314, LR: 0.0003 +[2026-03-03 15:59:35] (step=0046881) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.172568968890628, LR: 0.0003 +[2026-03-03 15:59:43] (step=0046882) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 9.172764625317942, LR: 0.0003 +[2026-03-03 15:59:51] (step=0046883) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 9.172960281745254, LR: 0.0003 +[2026-03-03 15:59:59] (step=0046884) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.173155938172568, LR: 0.0003 +[2026-03-03 16:00:07] (step=0046885) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.173351594599882, LR: 0.0003 +[2026-03-03 16:00:15] (step=0046886) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.173547251027196, LR: 0.0003 +[2026-03-03 16:00:23] (step=0046887) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.17374290745451, LR: 0.0003 +[2026-03-03 16:00:30] (step=0046888) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 9.173938563881823, LR: 0.0003 +[2026-03-03 16:00:38] (step=0046889) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.174134220309137, LR: 0.0003 +[2026-03-03 16:00:46] (step=0046890) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.17432987673645, LR: 0.0003 +[2026-03-03 16:00:54] (step=0046891) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 9.174525533163765, LR: 0.0003 +[2026-03-03 16:01:02] (step=0046892) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.174721189591079, LR: 0.0003 +[2026-03-03 16:01:10] (step=0046893) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 9.174916846018391, LR: 0.0003 +[2026-03-03 16:01:18] (step=0046894) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.175112502445705, LR: 0.0003 +[2026-03-03 16:01:26] (step=0046895) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.175308158873019, LR: 0.0003 +[2026-03-03 16:01:33] (step=0046896) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 9.175503815300333, LR: 0.0003 +[2026-03-03 16:01:41] (step=0046897) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.175699471727647, LR: 0.0003 +[2026-03-03 16:01:49] (step=0046898) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.17589512815496, LR: 0.0003 +[2026-03-03 16:01:57] (step=0046899) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.176090784582273, LR: 0.0003 +[2026-03-03 16:02:05] (step=0046900) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.176286441009587, LR: 0.0003 +[2026-03-03 16:02:13] (step=0046901) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 9.176482097436901, LR: 0.0003 +[2026-03-03 16:02:21] (step=0046902) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.176677753864215, LR: 0.0003 +[2026-03-03 16:02:28] (step=0046903) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.176873410291527, LR: 0.0003 +[2026-03-03 16:02:36] (step=0046904) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 9.177069066718841, LR: 0.0003 +[2026-03-03 16:02:44] (step=0046905) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.177264723146155, LR: 0.0003 +[2026-03-03 16:02:52] (step=0046906) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.17746037957347, LR: 0.0003 +[2026-03-03 16:03:00] (step=0046907) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.177656036000784, LR: 0.0003 +[2026-03-03 16:03:08] (step=0046908) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.177851692428096, LR: 0.0003 +[2026-03-03 16:03:16] (step=0046909) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.17804734885541, LR: 0.0003 +[2026-03-03 16:03:23] (step=0046910) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.178243005282724, LR: 0.0003 +[2026-03-03 16:03:31] (step=0046911) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.178438661710038, LR: 0.0003 +[2026-03-03 16:03:39] (step=0046912) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.17863431813735, LR: 0.0003 +[2026-03-03 16:03:47] (step=0046913) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.178829974564664, LR: 0.0003 +[2026-03-03 16:03:55] (step=0046914) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.179025630991978, LR: 0.0003 +[2026-03-03 16:04:03] (step=0046915) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.179221287419292, LR: 0.0003 +[2026-03-03 16:04:11] (step=0046916) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 9.179416943846606, LR: 0.0003 +[2026-03-03 16:04:18] (step=0046917) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.179612600273918, LR: 0.0003 +[2026-03-03 16:04:26] (step=0046918) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.179808256701232, LR: 0.0003 +[2026-03-03 16:04:34] (step=0046919) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 9.180003913128546, LR: 0.0003 +[2026-03-03 16:04:42] (step=0046920) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.18019956955586, LR: 0.0003 +[2026-03-03 16:04:50] (step=0046921) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.180395225983174, LR: 0.0003 +[2026-03-03 16:04:58] (step=0046922) Train Loss: 0.4453, Train Steps/Sec: 0.12, Epoch: 9.180590882410486, LR: 0.0003 +[2026-03-03 16:05:06] (step=0046923) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.1807865388378, LR: 0.0003 +[2026-03-03 16:05:14] (step=0046924) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.180982195265114, LR: 0.0003 +[2026-03-03 16:05:21] (step=0046925) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 9.181177851692429, LR: 0.0003 +[2026-03-03 16:05:29] (step=0046926) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.181373508119743, LR: 0.0003 +[2026-03-03 16:05:37] (step=0046927) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.181569164547055, LR: 0.0003 +[2026-03-03 16:05:45] (step=0046928) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.181764820974369, LR: 0.0003 +[2026-03-03 16:05:53] (step=0046929) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.181960477401683, LR: 0.0003 +[2026-03-03 16:06:01] (step=0046930) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.182156133828997, LR: 0.0003 +[2026-03-03 16:06:09] (step=0046931) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.18235179025631, LR: 0.0003 +[2026-03-03 16:06:17] (step=0046932) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.182547446683623, LR: 0.0003 +[2026-03-03 16:06:24] (step=0046933) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 9.182743103110937, LR: 0.0003 +[2026-03-03 16:06:32] (step=0046934) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.182938759538251, LR: 0.0003 +[2026-03-03 16:06:40] (step=0046935) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 9.183134415965565, LR: 0.0003 +[2026-03-03 16:06:48] (step=0046936) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 9.183330072392877, LR: 0.0003 +[2026-03-03 16:06:56] (step=0046937) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.183525728820191, LR: 0.0003 +[2026-03-03 16:07:04] (step=0046938) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.183721385247505, LR: 0.0003 +[2026-03-03 16:07:12] (step=0046939) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 9.18391704167482, LR: 0.0003 +[2026-03-03 16:07:19] (step=0046940) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 9.184112698102133, LR: 0.0003 +[2026-03-03 16:07:27] (step=0046941) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.184308354529445, LR: 0.0003 +[2026-03-03 16:07:35] (step=0046942) Train Loss: 0.4427, Train Steps/Sec: 0.12, Epoch: 9.18450401095676, LR: 0.0003 +[2026-03-03 16:07:43] (step=0046943) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.184699667384074, LR: 0.0003 +[2026-03-03 16:07:51] (step=0046944) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.184895323811388, LR: 0.0003 +[2026-03-03 16:07:59] (step=0046945) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.185090980238702, LR: 0.0003 +[2026-03-03 16:08:07] (step=0046946) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.185286636666014, LR: 0.0003 +[2026-03-03 16:08:15] (step=0046947) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.185482293093328, LR: 0.0003 +[2026-03-03 16:08:22] (step=0046948) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 9.185677949520642, LR: 0.0003 +[2026-03-03 16:08:30] (step=0046949) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.185873605947956, LR: 0.0003 +[2026-03-03 16:08:38] (step=0046950) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.18606926237527, LR: 0.0003 +[2026-03-03 16:08:46] (step=0046951) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 9.186264918802582, LR: 0.0003 +[2026-03-03 16:08:54] (step=0046952) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 9.186460575229896, LR: 0.0003 +[2026-03-03 16:09:02] (step=0046953) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.18665623165721, LR: 0.0003 +[2026-03-03 16:09:10] (step=0046954) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.186851888084524, LR: 0.0003 +[2026-03-03 16:09:18] (step=0046955) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.187047544511838, LR: 0.0003 +[2026-03-03 16:09:25] (step=0046956) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.18724320093915, LR: 0.0003 +[2026-03-03 16:09:33] (step=0046957) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 9.187438857366464, LR: 0.0003 +[2026-03-03 16:09:41] (step=0046958) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.187634513793778, LR: 0.0003 +[2026-03-03 16:09:49] (step=0046959) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.187830170221092, LR: 0.0003 +[2026-03-03 16:09:57] (step=0046960) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 9.188025826648406, LR: 0.0003 +[2026-03-03 16:10:05] (step=0046961) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 9.188221483075719, LR: 0.0003 +[2026-03-03 16:10:13] (step=0046962) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.188417139503033, LR: 0.0003 +[2026-03-03 16:10:20] (step=0046963) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.188612795930347, LR: 0.0003 +[2026-03-03 16:10:28] (step=0046964) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.18880845235766, LR: 0.0003 +[2026-03-03 16:10:36] (step=0046965) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.189004108784973, LR: 0.0003 +[2026-03-03 16:10:44] (step=0046966) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.189199765212287, LR: 0.0003 +[2026-03-03 16:10:52] (step=0046967) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.1893954216396, LR: 0.0003 +[2026-03-03 16:11:00] (step=0046968) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.189591078066915, LR: 0.0003 +[2026-03-03 16:11:08] (step=0046969) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.189786734494229, LR: 0.0003 +[2026-03-03 16:11:16] (step=0046970) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 9.189982390921541, LR: 0.0003 +[2026-03-03 16:11:23] (step=0046971) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.190178047348855, LR: 0.0003 +[2026-03-03 16:11:31] (step=0046972) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 9.190373703776169, LR: 0.0003 +[2026-03-03 16:11:39] (step=0046973) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 9.190569360203483, LR: 0.0003 +[2026-03-03 16:11:47] (step=0046974) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.190765016630797, LR: 0.0003 +[2026-03-03 16:11:55] (step=0046975) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.19096067305811, LR: 0.0003 +[2026-03-03 16:12:03] (step=0046976) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.191156329485423, LR: 0.0003 +[2026-03-03 16:12:11] (step=0046977) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.191351985912737, LR: 0.0003 +[2026-03-03 16:12:19] (step=0046978) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.191547642340051, LR: 0.0003 +[2026-03-03 16:12:26] (step=0046979) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.191743298767365, LR: 0.0003 +[2026-03-03 16:12:34] (step=0046980) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.191938955194678, LR: 0.0003 +[2026-03-03 16:12:42] (step=0046981) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.192134611621992, LR: 0.0003 +[2026-03-03 16:12:50] (step=0046982) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 9.192330268049306, LR: 0.0003 +[2026-03-03 16:12:58] (step=0046983) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.19252592447662, LR: 0.0003 +[2026-03-03 16:13:06] (step=0046984) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 9.192721580903934, LR: 0.0003 +[2026-03-03 16:13:13] (step=0046985) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 9.192917237331246, LR: 0.0003 +[2026-03-03 16:13:21] (step=0046986) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 9.19311289375856, LR: 0.0003 +[2026-03-03 16:13:29] (step=0046987) Train Loss: 0.4499, Train Steps/Sec: 0.12, Epoch: 9.193308550185874, LR: 0.0003 +[2026-03-03 16:13:37] (step=0046988) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.193504206613188, LR: 0.0003 +[2026-03-03 16:13:45] (step=0046989) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 9.1936998630405, LR: 0.0003 +[2026-03-03 16:13:53] (step=0046990) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.193895519467814, LR: 0.0003 +[2026-03-03 16:14:01] (step=0046991) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.194091175895128, LR: 0.0003 +[2026-03-03 16:14:09] (step=0046992) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 9.194286832322442, LR: 0.0003 +[2026-03-03 16:14:17] (step=0046993) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 9.194482488749756, LR: 0.0003 +[2026-03-03 16:14:24] (step=0046994) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.194678145177068, LR: 0.0003 +[2026-03-03 16:14:32] (step=0046995) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.194873801604382, LR: 0.0003 +[2026-03-03 16:14:40] (step=0046996) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 9.195069458031696, LR: 0.0003 +[2026-03-03 16:14:48] (step=0046997) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.19526511445901, LR: 0.0003 +[2026-03-03 16:14:56] (step=0046998) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.195460770886324, LR: 0.0003 +[2026-03-03 16:15:04] (step=0046999) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.195656427313637, LR: 0.0003 +[2026-03-03 16:15:12] (step=0047000) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.19585208374095, LR: 0.0003 +[2026-03-03 16:15:12] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0047000/ +[2026-03-03 16:15:20] (step=0047001) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.196047740168265, LR: 0.0003 +[2026-03-03 16:15:27] (step=0047002) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.196243396595579, LR: 0.0003 +[2026-03-03 16:15:35] (step=0047003) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 9.196439053022893, LR: 0.0003 +[2026-03-03 16:15:43] (step=0047004) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.196634709450205, LR: 0.0003 +[2026-03-03 16:15:51] (step=0047005) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 9.196830365877519, LR: 0.0003 +[2026-03-03 16:15:59] (step=0047006) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.197026022304833, LR: 0.0003 +[2026-03-03 16:16:07] (step=0047007) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.197221678732147, LR: 0.0003 +[2026-03-03 16:16:15] (step=0047008) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 9.19741733515946, LR: 0.0003 +[2026-03-03 16:16:22] (step=0047009) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.197612991586773, LR: 0.0003 +[2026-03-03 16:16:30] (step=0047010) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.197808648014087, LR: 0.0003 +[2026-03-03 16:16:38] (step=0047011) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.198004304441401, LR: 0.0003 +[2026-03-03 16:16:46] (step=0047012) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.198199960868715, LR: 0.0003 +[2026-03-03 16:16:54] (step=0047013) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.198395617296029, LR: 0.0003 +[2026-03-03 16:17:02] (step=0047014) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.198591273723341, LR: 0.0003 +[2026-03-03 16:17:09] (step=0047015) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 9.198786930150655, LR: 0.0003 +[2026-03-03 16:17:17] (step=0047016) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.19898258657797, LR: 0.0003 +[2026-03-03 16:17:25] (step=0047017) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.199178243005283, LR: 0.0003 +[2026-03-03 16:17:33] (step=0047018) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.199373899432596, LR: 0.0003 +[2026-03-03 16:17:41] (step=0047019) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.19956955585991, LR: 0.0003 +[2026-03-03 16:17:49] (step=0047020) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.199765212287224, LR: 0.0003 +[2026-03-03 16:17:57] (step=0047021) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.199960868714538, LR: 0.0003 +[2026-03-03 16:18:05] (step=0047022) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 9.200156525141852, LR: 0.0003 +[2026-03-03 16:18:12] (step=0047023) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 9.200352181569164, LR: 0.0003 +[2026-03-03 16:18:20] (step=0047024) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.200547837996478, LR: 0.0003 +[2026-03-03 16:18:28] (step=0047025) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.200743494423792, LR: 0.0003 +[2026-03-03 16:18:36] (step=0047026) Train Loss: 0.4642, Train Steps/Sec: 0.13, Epoch: 9.200939150851106, LR: 0.0003 +[2026-03-03 16:18:44] (step=0047027) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.20113480727842, LR: 0.0003 +[2026-03-03 16:18:52] (step=0047028) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.201330463705732, LR: 0.0003 +[2026-03-03 16:19:00] (step=0047029) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.201526120133046, LR: 0.0003 +[2026-03-03 16:19:08] (step=0047030) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 9.20172177656036, LR: 0.0003 +[2026-03-03 16:19:15] (step=0047031) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.201917432987674, LR: 0.0003 +[2026-03-03 16:19:23] (step=0047032) Train Loss: 0.4505, Train Steps/Sec: 0.12, Epoch: 9.202113089414988, LR: 0.0003 +[2026-03-03 16:19:31] (step=0047033) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 9.2023087458423, LR: 0.0003 +[2026-03-03 16:19:39] (step=0047034) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.202504402269614, LR: 0.0003 +[2026-03-03 16:19:47] (step=0047035) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.202700058696928, LR: 0.0003 +[2026-03-03 16:19:55] (step=0047036) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.202895715124242, LR: 0.0003 +[2026-03-03 16:20:03] (step=0047037) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.203091371551556, LR: 0.0003 +[2026-03-03 16:20:11] (step=0047038) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.203287027978869, LR: 0.0003 +[2026-03-03 16:20:18] (step=0047039) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.203482684406183, LR: 0.0003 +[2026-03-03 16:20:26] (step=0047040) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.203678340833497, LR: 0.0003 +[2026-03-03 16:20:34] (step=0047041) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.20387399726081, LR: 0.0003 +[2026-03-03 16:20:42] (step=0047042) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.204069653688123, LR: 0.0003 +[2026-03-03 16:20:50] (step=0047043) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.204265310115437, LR: 0.0003 +[2026-03-03 16:20:58] (step=0047044) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.20446096654275, LR: 0.0003 +[2026-03-03 16:21:06] (step=0047045) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 9.204656622970065, LR: 0.0003 +[2026-03-03 16:21:13] (step=0047046) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.204852279397379, LR: 0.0003 +[2026-03-03 16:21:21] (step=0047047) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.205047935824691, LR: 0.0003 +[2026-03-03 16:21:29] (step=0047048) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.205243592252005, LR: 0.0003 +[2026-03-03 16:21:37] (step=0047049) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.205439248679319, LR: 0.0003 +[2026-03-03 16:21:45] (step=0047050) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.205634905106633, LR: 0.0003 +[2026-03-03 16:21:53] (step=0047051) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.205830561533947, LR: 0.0003 +[2026-03-03 16:22:01] (step=0047052) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.20602621796126, LR: 0.0003 +[2026-03-03 16:22:08] (step=0047053) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 9.206221874388573, LR: 0.0003 +[2026-03-03 16:22:16] (step=0047054) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.206417530815887, LR: 0.0003 +[2026-03-03 16:22:24] (step=0047055) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.206613187243201, LR: 0.0003 +[2026-03-03 16:22:32] (step=0047056) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 9.206808843670515, LR: 0.0003 +[2026-03-03 16:22:40] (step=0047057) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 9.207004500097828, LR: 0.0003 +[2026-03-03 16:22:48] (step=0047058) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 9.207200156525142, LR: 0.0003 +[2026-03-03 16:22:56] (step=0047059) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.207395812952456, LR: 0.0003 +[2026-03-03 16:23:04] (step=0047060) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.20759146937977, LR: 0.0003 +[2026-03-03 16:23:11] (step=0047061) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.207787125807084, LR: 0.0003 +[2026-03-03 16:23:19] (step=0047062) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.207982782234396, LR: 0.0003 +[2026-03-03 16:23:27] (step=0047063) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.20817843866171, LR: 0.0003 +[2026-03-03 16:23:35] (step=0047064) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.208374095089024, LR: 0.0003 +[2026-03-03 16:23:43] (step=0047065) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.208569751516338, LR: 0.0003 +[2026-03-03 16:23:51] (step=0047066) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 9.20876540794365, LR: 0.0003 +[2026-03-03 16:23:58] (step=0047067) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.208961064370964, LR: 0.0003 +[2026-03-03 16:24:06] (step=0047068) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.209156720798278, LR: 0.0003 +[2026-03-03 16:24:14] (step=0047069) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.209352377225592, LR: 0.0003 +[2026-03-03 16:24:22] (step=0047070) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.209548033652906, LR: 0.0003 +[2026-03-03 16:24:30] (step=0047071) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.209743690080218, LR: 0.0003 +[2026-03-03 16:24:38] (step=0047072) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.209939346507532, LR: 0.0003 +[2026-03-03 16:24:46] (step=0047073) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 9.210135002934846, LR: 0.0003 +[2026-03-03 16:24:54] (step=0047074) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.21033065936216, LR: 0.0003 +[2026-03-03 16:25:01] (step=0047075) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.210526315789474, LR: 0.0003 +[2026-03-03 16:25:09] (step=0047076) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 9.210721972216787, LR: 0.0003 +[2026-03-03 16:25:17] (step=0047077) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.2109176286441, LR: 0.0003 +[2026-03-03 16:25:25] (step=0047078) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.211113285071415, LR: 0.0003 +[2026-03-03 16:25:33] (step=0047079) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.211308941498729, LR: 0.0003 +[2026-03-03 16:25:41] (step=0047080) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.211504597926043, LR: 0.0003 +[2026-03-03 16:25:49] (step=0047081) Train Loss: 0.4361, Train Steps/Sec: 0.12, Epoch: 9.211700254353355, LR: 0.0003 +[2026-03-03 16:25:57] (step=0047082) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 9.211895910780669, LR: 0.0003 +[2026-03-03 16:26:04] (step=0047083) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 9.212091567207983, LR: 0.0003 +[2026-03-03 16:26:12] (step=0047084) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 9.212287223635297, LR: 0.0003 +[2026-03-03 16:26:20] (step=0047085) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.21248288006261, LR: 0.0003 +[2026-03-03 16:26:28] (step=0047086) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.212678536489923, LR: 0.0003 +[2026-03-03 16:26:36] (step=0047087) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.212874192917237, LR: 0.0003 +[2026-03-03 16:26:44] (step=0047088) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.213069849344551, LR: 0.0003 +[2026-03-03 16:26:52] (step=0047089) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.213265505771865, LR: 0.0003 +[2026-03-03 16:27:00] (step=0047090) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.213461162199179, LR: 0.0003 +[2026-03-03 16:27:07] (step=0047091) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 9.213656818626491, LR: 0.0003 +[2026-03-03 16:27:15] (step=0047092) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.213852475053805, LR: 0.0003 +[2026-03-03 16:27:23] (step=0047093) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.21404813148112, LR: 0.0003 +[2026-03-03 16:27:31] (step=0047094) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.214243787908433, LR: 0.0003 +[2026-03-03 16:27:39] (step=0047095) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.214439444335746, LR: 0.0003 +[2026-03-03 16:27:47] (step=0047096) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.21463510076306, LR: 0.0003 +[2026-03-03 16:27:54] (step=0047097) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.214830757190374, LR: 0.0003 +[2026-03-03 16:28:02] (step=0047098) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.215026413617688, LR: 0.0003 +[2026-03-03 16:28:10] (step=0047099) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.215222070045002, LR: 0.0003 +[2026-03-03 16:28:18] (step=0047100) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.215417726472314, LR: 0.0003 +[2026-03-03 16:28:26] (step=0047101) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.215613382899628, LR: 0.0003 +[2026-03-03 16:28:34] (step=0047102) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.215809039326942, LR: 0.0003 +[2026-03-03 16:28:42] (step=0047103) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 9.216004695754256, LR: 0.0003 +[2026-03-03 16:28:49] (step=0047104) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.21620035218157, LR: 0.0003 +[2026-03-03 16:28:57] (step=0047105) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.216396008608882, LR: 0.0003 +[2026-03-03 16:29:05] (step=0047106) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.216591665036196, LR: 0.0003 +[2026-03-03 16:29:13] (step=0047107) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.21678732146351, LR: 0.0003 +[2026-03-03 16:29:21] (step=0047108) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.216982977890824, LR: 0.0003 +[2026-03-03 16:29:29] (step=0047109) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 9.217178634318138, LR: 0.0003 +[2026-03-03 16:29:36] (step=0047110) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.21737429074545, LR: 0.0003 +[2026-03-03 16:29:44] (step=0047111) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.217569947172764, LR: 0.0003 +[2026-03-03 16:29:52] (step=0047112) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.217765603600078, LR: 0.0003 +[2026-03-03 16:30:00] (step=0047113) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 9.217961260027392, LR: 0.0003 +[2026-03-03 16:30:08] (step=0047114) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.218156916454706, LR: 0.0003 +[2026-03-03 16:30:16] (step=0047115) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.218352572882019, LR: 0.0003 +[2026-03-03 16:30:24] (step=0047116) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.218548229309333, LR: 0.0003 +[2026-03-03 16:30:31] (step=0047117) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 9.218743885736647, LR: 0.0003 +[2026-03-03 16:30:39] (step=0047118) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.21893954216396, LR: 0.0003 +[2026-03-03 16:30:47] (step=0047119) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.219135198591273, LR: 0.0003 +[2026-03-03 16:30:55] (step=0047120) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.219330855018587, LR: 0.0003 +[2026-03-03 16:31:03] (step=0047121) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 9.2195265114459, LR: 0.0003 +[2026-03-03 16:31:11] (step=0047122) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.219722167873215, LR: 0.0003 +[2026-03-03 16:31:19] (step=0047123) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 9.219917824300529, LR: 0.0003 +[2026-03-03 16:31:27] (step=0047124) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.220113480727841, LR: 0.0003 +[2026-03-03 16:31:34] (step=0047125) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.220309137155155, LR: 0.0003 +[2026-03-03 16:31:42] (step=0047126) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.220504793582469, LR: 0.0003 +[2026-03-03 16:31:50] (step=0047127) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.220700450009783, LR: 0.0003 +[2026-03-03 16:31:58] (step=0047128) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.220896106437097, LR: 0.0003 +[2026-03-03 16:32:06] (step=0047129) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.22109176286441, LR: 0.0003 +[2026-03-03 16:32:14] (step=0047130) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 9.221287419291723, LR: 0.0003 +[2026-03-03 16:32:22] (step=0047131) Train Loss: 0.4404, Train Steps/Sec: 0.12, Epoch: 9.221483075719037, LR: 0.0003 +[2026-03-03 16:32:30] (step=0047132) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.221678732146351, LR: 0.0003 +[2026-03-03 16:32:37] (step=0047133) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.221874388573665, LR: 0.0003 +[2026-03-03 16:32:45] (step=0047134) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.222070045000978, LR: 0.0003 +[2026-03-03 16:32:53] (step=0047135) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.222265701428292, LR: 0.0003 +[2026-03-03 16:33:01] (step=0047136) Train Loss: 0.4255, Train Steps/Sec: 0.13, Epoch: 9.222461357855606, LR: 0.0003 +[2026-03-03 16:33:09] (step=0047137) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.22265701428292, LR: 0.0003 +[2026-03-03 16:33:17] (step=0047138) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 9.222852670710234, LR: 0.0003 +[2026-03-03 16:33:25] (step=0047139) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 9.223048327137546, LR: 0.0003 +[2026-03-03 16:33:32] (step=0047140) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.22324398356486, LR: 0.0003 +[2026-03-03 16:33:40] (step=0047141) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.223439639992174, LR: 0.0003 +[2026-03-03 16:33:48] (step=0047142) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.223635296419488, LR: 0.0003 +[2026-03-03 16:33:56] (step=0047143) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.223830952846802, LR: 0.0003 +[2026-03-03 16:34:04] (step=0047144) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.224026609274114, LR: 0.0003 +[2026-03-03 16:34:12] (step=0047145) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.224222265701428, LR: 0.0003 +[2026-03-03 16:34:20] (step=0047146) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.224417922128742, LR: 0.0003 +[2026-03-03 16:34:27] (step=0047147) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.224613578556056, LR: 0.0003 +[2026-03-03 16:34:35] (step=0047148) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.224809234983368, LR: 0.0003 +[2026-03-03 16:34:43] (step=0047149) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 9.225004891410682, LR: 0.0003 +[2026-03-03 16:34:51] (step=0047150) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.225200547837996, LR: 0.0003 +[2026-03-03 16:34:59] (step=0047151) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.22539620426531, LR: 0.0003 +[2026-03-03 16:35:07] (step=0047152) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.225591860692624, LR: 0.0003 +[2026-03-03 16:35:15] (step=0047153) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 9.225787517119937, LR: 0.0003 +[2026-03-03 16:35:22] (step=0047154) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.22598317354725, LR: 0.0003 +[2026-03-03 16:35:30] (step=0047155) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.226178829974565, LR: 0.0003 +[2026-03-03 16:35:38] (step=0047156) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.226374486401879, LR: 0.0003 +[2026-03-03 16:35:46] (step=0047157) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.226570142829193, LR: 0.0003 +[2026-03-03 16:35:54] (step=0047158) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.226765799256505, LR: 0.0003 +[2026-03-03 16:36:02] (step=0047159) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.226961455683819, LR: 0.0003 +[2026-03-03 16:36:10] (step=0047160) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.227157112111133, LR: 0.0003 +[2026-03-03 16:36:17] (step=0047161) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 9.227352768538447, LR: 0.0003 +[2026-03-03 16:36:25] (step=0047162) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 9.22754842496576, LR: 0.0003 +[2026-03-03 16:36:33] (step=0047163) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.227744081393073, LR: 0.0003 +[2026-03-03 16:36:41] (step=0047164) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.227939737820387, LR: 0.0003 +[2026-03-03 16:36:49] (step=0047165) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.228135394247701, LR: 0.0003 +[2026-03-03 16:36:57] (step=0047166) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.228331050675015, LR: 0.0003 +[2026-03-03 16:37:05] (step=0047167) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.22852670710233, LR: 0.0003 +[2026-03-03 16:37:12] (step=0047168) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 9.228722363529641, LR: 0.0003 +[2026-03-03 16:37:20] (step=0047169) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 9.228918019956955, LR: 0.0003 +[2026-03-03 16:37:28] (step=0047170) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.22911367638427, LR: 0.0003 +[2026-03-03 16:37:36] (step=0047171) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 9.229309332811583, LR: 0.0003 +[2026-03-03 16:37:44] (step=0047172) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 9.229504989238896, LR: 0.0003 +[2026-03-03 16:37:52] (step=0047173) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.22970064566621, LR: 0.0003 +[2026-03-03 16:38:00] (step=0047174) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.229896302093524, LR: 0.0003 +[2026-03-03 16:38:08] (step=0047175) Train Loss: 0.4409, Train Steps/Sec: 0.12, Epoch: 9.230091958520838, LR: 0.0003 +[2026-03-03 16:38:15] (step=0047176) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 9.230287614948152, LR: 0.0003 +[2026-03-03 16:38:23] (step=0047177) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.230483271375464, LR: 0.0003 +[2026-03-03 16:38:31] (step=0047178) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.230678927802778, LR: 0.0003 +[2026-03-03 16:38:39] (step=0047179) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.230874584230092, LR: 0.0003 +[2026-03-03 16:38:47] (step=0047180) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.231070240657406, LR: 0.0003 +[2026-03-03 16:38:55] (step=0047181) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 9.23126589708472, LR: 0.0003 +[2026-03-03 16:39:03] (step=0047182) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 9.231461553512032, LR: 0.0003 +[2026-03-03 16:39:11] (step=0047183) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 9.231657209939346, LR: 0.0003 +[2026-03-03 16:39:18] (step=0047184) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.23185286636666, LR: 0.0003 +[2026-03-03 16:39:26] (step=0047185) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.232048522793974, LR: 0.0003 +[2026-03-03 16:39:34] (step=0047186) Train Loss: 0.4392, Train Steps/Sec: 0.12, Epoch: 9.232244179221288, LR: 0.0003 +[2026-03-03 16:39:42] (step=0047187) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.2324398356486, LR: 0.0003 +[2026-03-03 16:39:50] (step=0047188) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.232635492075914, LR: 0.0003 +[2026-03-03 16:39:58] (step=0047189) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.232831148503228, LR: 0.0003 +[2026-03-03 16:40:06] (step=0047190) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 9.233026804930542, LR: 0.0003 +[2026-03-03 16:40:14] (step=0047191) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.233222461357856, LR: 0.0003 +[2026-03-03 16:40:21] (step=0047192) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.233418117785169, LR: 0.0003 +[2026-03-03 16:40:29] (step=0047193) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.233613774212483, LR: 0.0003 +[2026-03-03 16:40:37] (step=0047194) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.233809430639797, LR: 0.0003 +[2026-03-03 16:40:45] (step=0047195) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 9.23400508706711, LR: 0.0003 +[2026-03-03 16:40:53] (step=0047196) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.234200743494425, LR: 0.0003 +[2026-03-03 16:41:01] (step=0047197) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.234396399921737, LR: 0.0003 +[2026-03-03 16:41:09] (step=0047198) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.234592056349051, LR: 0.0003 +[2026-03-03 16:41:16] (step=0047199) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 9.234787712776365, LR: 0.0003 +[2026-03-03 16:41:24] (step=0047200) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 9.234983369203679, LR: 0.0003 +[2026-03-03 16:41:32] (step=0047201) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 9.235179025630991, LR: 0.0003 +[2026-03-03 16:41:40] (step=0047202) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.235374682058305, LR: 0.0003 +[2026-03-03 16:41:48] (step=0047203) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 9.23557033848562, LR: 0.0003 +[2026-03-03 16:41:56] (step=0047204) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 9.235765994912933, LR: 0.0003 +[2026-03-03 16:42:04] (step=0047205) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.235961651340247, LR: 0.0003 +[2026-03-03 16:42:12] (step=0047206) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 9.23615730776756, LR: 0.0003 +[2026-03-03 16:42:19] (step=0047207) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.236352964194873, LR: 0.0003 +[2026-03-03 16:42:27] (step=0047208) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 9.236548620622187, LR: 0.0003 +[2026-03-03 16:42:35] (step=0047209) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 9.236744277049501, LR: 0.0003 +[2026-03-03 16:42:43] (step=0047210) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.236939933476815, LR: 0.0003 +[2026-03-03 16:42:51] (step=0047211) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.237135589904128, LR: 0.0003 +[2026-03-03 16:42:59] (step=0047212) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.237331246331442, LR: 0.0003 +[2026-03-03 16:43:07] (step=0047213) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.237526902758756, LR: 0.0003 +[2026-03-03 16:43:14] (step=0047214) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.23772255918607, LR: 0.0003 +[2026-03-03 16:43:22] (step=0047215) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.237918215613384, LR: 0.0003 +[2026-03-03 16:43:30] (step=0047216) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 9.238113872040696, LR: 0.0003 +[2026-03-03 16:43:38] (step=0047217) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 9.23830952846801, LR: 0.0003 +[2026-03-03 16:43:46] (step=0047218) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.238505184895324, LR: 0.0003 +[2026-03-03 16:43:54] (step=0047219) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.238700841322638, LR: 0.0003 +[2026-03-03 16:44:02] (step=0047220) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 9.238896497749952, LR: 0.0003 +[2026-03-03 16:44:09] (step=0047221) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.239092154177264, LR: 0.0003 +[2026-03-03 16:44:17] (step=0047222) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.239287810604578, LR: 0.0003 +[2026-03-03 16:44:25] (step=0047223) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 9.239483467031892, LR: 0.0003 +[2026-03-03 16:44:33] (step=0047224) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 9.239679123459206, LR: 0.0003 +[2026-03-03 16:44:41] (step=0047225) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.239874779886518, LR: 0.0003 +[2026-03-03 16:44:49] (step=0047226) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.240070436313832, LR: 0.0003 +[2026-03-03 16:44:57] (step=0047227) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.240266092741146, LR: 0.0003 +[2026-03-03 16:45:05] (step=0047228) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.24046174916846, LR: 0.0003 +[2026-03-03 16:45:12] (step=0047229) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.240657405595774, LR: 0.0003 +[2026-03-03 16:45:20] (step=0047230) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 9.240853062023087, LR: 0.0003 +[2026-03-03 16:45:28] (step=0047231) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 9.2410487184504, LR: 0.0003 +[2026-03-03 16:45:36] (step=0047232) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.241244374877715, LR: 0.0003 +[2026-03-03 16:45:44] (step=0047233) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.241440031305029, LR: 0.0003 +[2026-03-03 16:45:52] (step=0047234) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.241635687732343, LR: 0.0003 +[2026-03-03 16:46:00] (step=0047235) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.241831344159655, LR: 0.0003 +[2026-03-03 16:46:07] (step=0047236) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 9.242027000586969, LR: 0.0003 +[2026-03-03 16:46:15] (step=0047237) Train Loss: 0.4506, Train Steps/Sec: 0.12, Epoch: 9.242222657014283, LR: 0.0003 +[2026-03-03 16:46:23] (step=0047238) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.242418313441597, LR: 0.0003 +[2026-03-03 16:46:31] (step=0047239) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.242613969868911, LR: 0.0003 +[2026-03-03 16:46:39] (step=0047240) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.242809626296223, LR: 0.0003 +[2026-03-03 16:46:47] (step=0047241) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.243005282723537, LR: 0.0003 +[2026-03-03 16:46:55] (step=0047242) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 9.243200939150851, LR: 0.0003 +[2026-03-03 16:47:03] (step=0047243) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.243396595578165, LR: 0.0003 +[2026-03-03 16:47:10] (step=0047244) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.24359225200548, LR: 0.0003 +[2026-03-03 16:47:18] (step=0047245) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.243787908432791, LR: 0.0003 +[2026-03-03 16:47:26] (step=0047246) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.243983564860105, LR: 0.0003 +[2026-03-03 16:47:34] (step=0047247) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.24417922128742, LR: 0.0003 +[2026-03-03 16:47:42] (step=0047248) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 9.244374877714733, LR: 0.0003 +[2026-03-03 16:47:50] (step=0047249) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.244570534142047, LR: 0.0003 +[2026-03-03 16:47:58] (step=0047250) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.24476619056936, LR: 0.0003 +[2026-03-03 16:48:05] (step=0047251) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 9.244961846996674, LR: 0.0003 +[2026-03-03 16:48:13] (step=0047252) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.245157503423988, LR: 0.0003 +[2026-03-03 16:48:21] (step=0047253) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.245353159851302, LR: 0.0003 +[2026-03-03 16:48:29] (step=0047254) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 9.245548816278614, LR: 0.0003 +[2026-03-03 16:48:37] (step=0047255) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.245744472705928, LR: 0.0003 +[2026-03-03 16:48:45] (step=0047256) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 9.245940129133242, LR: 0.0003 +[2026-03-03 16:48:53] (step=0047257) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 9.246135785560556, LR: 0.0003 +[2026-03-03 16:49:00] (step=0047258) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.24633144198787, LR: 0.0003 +[2026-03-03 16:49:08] (step=0047259) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.246527098415182, LR: 0.0003 +[2026-03-03 16:49:16] (step=0047260) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.246722754842496, LR: 0.0003 +[2026-03-03 16:49:24] (step=0047261) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 9.24691841126981, LR: 0.0003 +[2026-03-03 16:49:32] (step=0047262) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.247114067697124, LR: 0.0003 +[2026-03-03 16:49:40] (step=0047263) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 9.247309724124438, LR: 0.0003 +[2026-03-03 16:49:48] (step=0047264) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.24750538055175, LR: 0.0003 +[2026-03-03 16:49:55] (step=0047265) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.247701036979064, LR: 0.0003 +[2026-03-03 16:50:03] (step=0047266) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 9.247896693406378, LR: 0.0003 +[2026-03-03 16:50:11] (step=0047267) Train Loss: 0.4245, Train Steps/Sec: 0.13, Epoch: 9.248092349833692, LR: 0.0003 +[2026-03-03 16:50:19] (step=0047268) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 9.248288006261006, LR: 0.0003 +[2026-03-03 16:50:27] (step=0047269) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 9.248483662688319, LR: 0.0003 +[2026-03-03 16:50:35] (step=0047270) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 9.248679319115633, LR: 0.0003 +[2026-03-03 16:50:43] (step=0047271) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.248874975542947, LR: 0.0003 +[2026-03-03 16:50:51] (step=0047272) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.24907063197026, LR: 0.0003 +[2026-03-03 16:50:58] (step=0047273) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.249266288397575, LR: 0.0003 +[2026-03-03 16:51:06] (step=0047274) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.249461944824887, LR: 0.0003 +[2026-03-03 16:51:14] (step=0047275) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.249657601252201, LR: 0.0003 +[2026-03-03 16:51:22] (step=0047276) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 9.249853257679515, LR: 0.0003 +[2026-03-03 16:51:30] (step=0047277) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.250048914106829, LR: 0.0003 +[2026-03-03 16:51:38] (step=0047278) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.250244570534141, LR: 0.0003 +[2026-03-03 16:51:46] (step=0047279) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.250440226961455, LR: 0.0003 +[2026-03-03 16:51:53] (step=0047280) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.25063588338877, LR: 0.0003 +[2026-03-03 16:52:01] (step=0047281) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 9.250831539816083, LR: 0.0003 +[2026-03-03 16:52:09] (step=0047282) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.251027196243397, LR: 0.0003 +[2026-03-03 16:52:17] (step=0047283) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 9.25122285267071, LR: 0.0003 +[2026-03-03 16:52:25] (step=0047284) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.251418509098023, LR: 0.0003 +[2026-03-03 16:52:33] (step=0047285) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 9.251614165525337, LR: 0.0003 +[2026-03-03 16:52:40] (step=0047286) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 9.251809821952651, LR: 0.0003 +[2026-03-03 16:52:48] (step=0047287) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 9.252005478379965, LR: 0.0003 +[2026-03-03 16:52:56] (step=0047288) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.252201134807278, LR: 0.0003 +[2026-03-03 16:53:04] (step=0047289) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 9.252396791234592, LR: 0.0003 +[2026-03-03 16:53:12] (step=0047290) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.252592447661906, LR: 0.0003 +[2026-03-03 16:53:20] (step=0047291) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 9.25278810408922, LR: 0.0003 +[2026-03-03 16:53:28] (step=0047292) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.252983760516534, LR: 0.0003 +[2026-03-03 16:53:36] (step=0047293) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.253179416943846, LR: 0.0003 +[2026-03-03 16:53:44] (step=0047294) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.25337507337116, LR: 0.0003 +[2026-03-03 16:53:51] (step=0047295) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.253570729798474, LR: 0.0003 +[2026-03-03 16:53:59] (step=0047296) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.253766386225788, LR: 0.0003 +[2026-03-03 16:54:07] (step=0047297) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.253962042653102, LR: 0.0003 +[2026-03-03 16:54:15] (step=0047298) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 9.254157699080414, LR: 0.0003 +[2026-03-03 16:54:23] (step=0047299) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.254353355507728, LR: 0.0003 +[2026-03-03 16:54:31] (step=0047300) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 9.254549011935042, LR: 0.0003 +[2026-03-03 16:54:39] (step=0047301) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 9.254744668362356, LR: 0.0003 +[2026-03-03 16:54:46] (step=0047302) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.25494032478967, LR: 0.0003 +[2026-03-03 16:54:54] (step=0047303) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.255135981216982, LR: 0.0003 +[2026-03-03 16:55:02] (step=0047304) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.255331637644296, LR: 0.0003 +[2026-03-03 16:55:10] (step=0047305) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 9.25552729407161, LR: 0.0003 +[2026-03-03 16:55:18] (step=0047306) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 9.255722950498924, LR: 0.0003 +[2026-03-03 16:55:26] (step=0047307) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.255918606926237, LR: 0.0003 +[2026-03-03 16:55:34] (step=0047308) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.25611426335355, LR: 0.0003 +[2026-03-03 16:55:41] (step=0047309) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.256309919780865, LR: 0.0003 +[2026-03-03 16:55:49] (step=0047310) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 9.256505576208179, LR: 0.0003 +[2026-03-03 16:55:57] (step=0047311) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.256701232635493, LR: 0.0003 +[2026-03-03 16:56:05] (step=0047312) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.256896889062805, LR: 0.0003 +[2026-03-03 16:56:13] (step=0047313) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.257092545490119, LR: 0.0003 +[2026-03-03 16:56:21] (step=0047314) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 9.257288201917433, LR: 0.0003 +[2026-03-03 16:56:29] (step=0047315) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.257483858344747, LR: 0.0003 +[2026-03-03 16:56:36] (step=0047316) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.257679514772061, LR: 0.0003 +[2026-03-03 16:56:44] (step=0047317) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.257875171199373, LR: 0.0003 +[2026-03-03 16:56:52] (step=0047318) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 9.258070827626687, LR: 0.0003 +[2026-03-03 16:57:00] (step=0047319) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.258266484054001, LR: 0.0003 +[2026-03-03 16:57:08] (step=0047320) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.258462140481315, LR: 0.0003 +[2026-03-03 16:57:16] (step=0047321) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.25865779690863, LR: 0.0003 +[2026-03-03 16:57:24] (step=0047322) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.258853453335941, LR: 0.0003 +[2026-03-03 16:57:32] (step=0047323) Train Loss: 0.4595, Train Steps/Sec: 0.12, Epoch: 9.259049109763255, LR: 0.0003 +[2026-03-03 16:57:40] (step=0047324) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 9.25924476619057, LR: 0.0003 +[2026-03-03 16:57:47] (step=0047325) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.259440422617883, LR: 0.0003 +[2026-03-03 16:57:55] (step=0047326) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.259636079045197, LR: 0.0003 +[2026-03-03 16:58:03] (step=0047327) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.25983173547251, LR: 0.0003 +[2026-03-03 16:58:11] (step=0047328) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 9.260027391899824, LR: 0.0003 +[2026-03-03 16:58:19] (step=0047329) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.260223048327138, LR: 0.0003 +[2026-03-03 16:58:27] (step=0047330) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.260418704754452, LR: 0.0003 +[2026-03-03 16:58:35] (step=0047331) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 9.260614361181764, LR: 0.0003 +[2026-03-03 16:58:42] (step=0047332) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.260810017609078, LR: 0.0003 +[2026-03-03 16:58:50] (step=0047333) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.261005674036392, LR: 0.0003 +[2026-03-03 16:58:58] (step=0047334) Train Loss: 0.4454, Train Steps/Sec: 0.12, Epoch: 9.261201330463706, LR: 0.0003 +[2026-03-03 16:59:06] (step=0047335) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.26139698689102, LR: 0.0003 +[2026-03-03 16:59:14] (step=0047336) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 9.261592643318332, LR: 0.0003 +[2026-03-03 16:59:22] (step=0047337) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.261788299745646, LR: 0.0003 +[2026-03-03 16:59:30] (step=0047338) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.26198395617296, LR: 0.0003 +[2026-03-03 16:59:38] (step=0047339) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.262179612600274, LR: 0.0003 +[2026-03-03 16:59:45] (step=0047340) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 9.262375269027588, LR: 0.0003 +[2026-03-03 16:59:53] (step=0047341) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.2625709254549, LR: 0.0003 +[2026-03-03 17:00:01] (step=0047342) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 9.262766581882214, LR: 0.0003 +[2026-03-03 17:00:09] (step=0047343) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.262962238309528, LR: 0.0003 +[2026-03-03 17:00:17] (step=0047344) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.263157894736842, LR: 0.0003 +[2026-03-03 17:00:25] (step=0047345) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 9.263353551164156, LR: 0.0003 +[2026-03-03 17:00:33] (step=0047346) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 9.263549207591469, LR: 0.0003 +[2026-03-03 17:00:40] (step=0047347) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.263744864018783, LR: 0.0003 +[2026-03-03 17:00:48] (step=0047348) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.263940520446097, LR: 0.0003 +[2026-03-03 17:00:56] (step=0047349) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.26413617687341, LR: 0.0003 +[2026-03-03 17:01:04] (step=0047350) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.264331833300725, LR: 0.0003 +[2026-03-03 17:01:12] (step=0047351) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 9.264527489728037, LR: 0.0003 +[2026-03-03 17:01:20] (step=0047352) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 9.264723146155351, LR: 0.0003 +[2026-03-03 17:01:28] (step=0047353) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.264918802582665, LR: 0.0003 +[2026-03-03 17:01:36] (step=0047354) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.265114459009979, LR: 0.0003 +[2026-03-03 17:01:43] (step=0047355) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.265310115437293, LR: 0.0003 +[2026-03-03 17:01:51] (step=0047356) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.265505771864605, LR: 0.0003 +[2026-03-03 17:01:59] (step=0047357) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 9.26570142829192, LR: 0.0003 +[2026-03-03 17:02:07] (step=0047358) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 9.265897084719233, LR: 0.0003 +[2026-03-03 17:02:15] (step=0047359) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.266092741146547, LR: 0.0003 +[2026-03-03 17:02:23] (step=0047360) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 9.26628839757386, LR: 0.0003 +[2026-03-03 17:02:31] (step=0047361) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.266484054001173, LR: 0.0003 +[2026-03-03 17:02:38] (step=0047362) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 9.266679710428487, LR: 0.0003 +[2026-03-03 17:02:46] (step=0047363) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.266875366855801, LR: 0.0003 +[2026-03-03 17:02:54] (step=0047364) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.267071023283115, LR: 0.0003 +[2026-03-03 17:03:02] (step=0047365) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 9.267266679710428, LR: 0.0003 +[2026-03-03 17:03:10] (step=0047366) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.267462336137742, LR: 0.0003 +[2026-03-03 17:03:18] (step=0047367) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.267657992565056, LR: 0.0003 +[2026-03-03 17:03:25] (step=0047368) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.26785364899237, LR: 0.0003 +[2026-03-03 17:03:33] (step=0047369) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 9.268049305419684, LR: 0.0003 +[2026-03-03 17:03:41] (step=0047370) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.268244961846996, LR: 0.0003 +[2026-03-03 17:03:49] (step=0047371) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 9.26844061827431, LR: 0.0003 +[2026-03-03 17:03:57] (step=0047372) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 9.268636274701624, LR: 0.0003 +[2026-03-03 17:04:05] (step=0047373) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.268831931128938, LR: 0.0003 +[2026-03-03 17:04:13] (step=0047374) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 9.269027587556252, LR: 0.0003 +[2026-03-03 17:04:21] (step=0047375) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.269223243983564, LR: 0.0003 +[2026-03-03 17:04:28] (step=0047376) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.269418900410878, LR: 0.0003 +[2026-03-03 17:04:36] (step=0047377) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.269614556838192, LR: 0.0003 +[2026-03-03 17:04:44] (step=0047378) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.269810213265506, LR: 0.0003 +[2026-03-03 17:04:52] (step=0047379) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.27000586969282, LR: 0.0003 +[2026-03-03 17:05:00] (step=0047380) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.270201526120132, LR: 0.0003 +[2026-03-03 17:05:08] (step=0047381) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.270397182547446, LR: 0.0003 +[2026-03-03 17:05:16] (step=0047382) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.27059283897476, LR: 0.0003 +[2026-03-03 17:05:23] (step=0047383) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.270788495402075, LR: 0.0003 +[2026-03-03 17:05:31] (step=0047384) Train Loss: 0.4543, Train Steps/Sec: 0.12, Epoch: 9.270984151829387, LR: 0.0003 +[2026-03-03 17:05:39] (step=0047385) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 9.2711798082567, LR: 0.0003 +[2026-03-03 17:05:47] (step=0047386) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 9.271375464684015, LR: 0.0003 +[2026-03-03 17:05:55] (step=0047387) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.271571121111329, LR: 0.0003 +[2026-03-03 17:06:03] (step=0047388) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 9.271766777538643, LR: 0.0003 +[2026-03-03 17:06:11] (step=0047389) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.271962433965955, LR: 0.0003 +[2026-03-03 17:06:19] (step=0047390) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.272158090393269, LR: 0.0003 +[2026-03-03 17:06:26] (step=0047391) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.272353746820583, LR: 0.0003 +[2026-03-03 17:06:34] (step=0047392) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.272549403247897, LR: 0.0003 +[2026-03-03 17:06:42] (step=0047393) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.272745059675211, LR: 0.0003 +[2026-03-03 17:06:50] (step=0047394) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.272940716102523, LR: 0.0003 +[2026-03-03 17:06:58] (step=0047395) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.273136372529837, LR: 0.0003 +[2026-03-03 17:07:06] (step=0047396) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.273332028957151, LR: 0.0003 +[2026-03-03 17:07:14] (step=0047397) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.273527685384465, LR: 0.0003 +[2026-03-03 17:07:22] (step=0047398) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.27372334181178, LR: 0.0003 +[2026-03-03 17:07:29] (step=0047399) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.273918998239091, LR: 0.0003 +[2026-03-03 17:07:37] (step=0047400) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 9.274114654666405, LR: 0.0003 +[2026-03-03 17:07:45] (step=0047401) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 9.27431031109372, LR: 0.0003 +[2026-03-03 17:07:53] (step=0047402) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.274505967521034, LR: 0.0003 +[2026-03-03 17:08:01] (step=0047403) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.274701623948348, LR: 0.0003 +[2026-03-03 17:08:09] (step=0047404) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.27489728037566, LR: 0.0003 +[2026-03-03 17:08:17] (step=0047405) Train Loss: 0.4248, Train Steps/Sec: 0.13, Epoch: 9.275092936802974, LR: 0.0003 +[2026-03-03 17:08:24] (step=0047406) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.275288593230288, LR: 0.0003 +[2026-03-03 17:08:32] (step=0047407) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.275484249657602, LR: 0.0003 +[2026-03-03 17:08:40] (step=0047408) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.275679906084916, LR: 0.0003 +[2026-03-03 17:08:48] (step=0047409) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 9.275875562512228, LR: 0.0003 +[2026-03-03 17:08:56] (step=0047410) Train Loss: 0.4269, Train Steps/Sec: 0.13, Epoch: 9.276071218939542, LR: 0.0003 +[2026-03-03 17:09:04] (step=0047411) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.276266875366856, LR: 0.0003 +[2026-03-03 17:09:12] (step=0047412) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.27646253179417, LR: 0.0003 +[2026-03-03 17:09:19] (step=0047413) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.276658188221482, LR: 0.0003 +[2026-03-03 17:09:27] (step=0047414) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.276853844648796, LR: 0.0003 +[2026-03-03 17:09:35] (step=0047415) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.27704950107611, LR: 0.0003 +[2026-03-03 17:09:43] (step=0047416) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.277245157503424, LR: 0.0003 +[2026-03-03 17:09:51] (step=0047417) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.277440813930738, LR: 0.0003 +[2026-03-03 17:09:59] (step=0047418) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.27763647035805, LR: 0.0003 +[2026-03-03 17:10:07] (step=0047419) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.277832126785365, LR: 0.0003 +[2026-03-03 17:10:15] (step=0047420) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 9.278027783212679, LR: 0.0003 +[2026-03-03 17:10:22] (step=0047421) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.278223439639993, LR: 0.0003 +[2026-03-03 17:10:30] (step=0047422) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 9.278419096067307, LR: 0.0003 +[2026-03-03 17:10:38] (step=0047423) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.278614752494619, LR: 0.0003 +[2026-03-03 17:10:46] (step=0047424) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.278810408921933, LR: 0.0003 +[2026-03-03 17:10:54] (step=0047425) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.279006065349247, LR: 0.0003 +[2026-03-03 17:11:02] (step=0047426) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.27920172177656, LR: 0.0003 +[2026-03-03 17:11:10] (step=0047427) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 9.279397378203875, LR: 0.0003 +[2026-03-03 17:11:18] (step=0047428) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.279593034631187, LR: 0.0003 +[2026-03-03 17:11:25] (step=0047429) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.279788691058501, LR: 0.0003 +[2026-03-03 17:11:33] (step=0047430) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.279984347485815, LR: 0.0003 +[2026-03-03 17:11:41] (step=0047431) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.280180003913129, LR: 0.0003 +[2026-03-03 17:11:49] (step=0047432) Train Loss: 0.4392, Train Steps/Sec: 0.12, Epoch: 9.280375660340443, LR: 0.0003 +[2026-03-03 17:11:57] (step=0047433) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.280571316767755, LR: 0.0003 +[2026-03-03 17:12:05] (step=0047434) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 9.28076697319507, LR: 0.0003 +[2026-03-03 17:12:13] (step=0047435) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.280962629622383, LR: 0.0003 +[2026-03-03 17:12:21] (step=0047436) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.281158286049697, LR: 0.0003 +[2026-03-03 17:12:28] (step=0047437) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.28135394247701, LR: 0.0003 +[2026-03-03 17:12:36] (step=0047438) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.281549598904324, LR: 0.0003 +[2026-03-03 17:12:44] (step=0047439) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 9.281745255331638, LR: 0.0003 +[2026-03-03 17:12:52] (step=0047440) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.281940911758952, LR: 0.0003 +[2026-03-03 17:13:00] (step=0047441) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.282136568186266, LR: 0.0003 +[2026-03-03 17:13:08] (step=0047442) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 9.282332224613578, LR: 0.0003 +[2026-03-03 17:13:16] (step=0047443) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.282527881040892, LR: 0.0003 +[2026-03-03 17:13:24] (step=0047444) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.282723537468206, LR: 0.0003 +[2026-03-03 17:13:31] (step=0047445) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.28291919389552, LR: 0.0003 +[2026-03-03 17:13:39] (step=0047446) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.283114850322834, LR: 0.0003 +[2026-03-03 17:13:47] (step=0047447) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 9.283310506750146, LR: 0.0003 +[2026-03-03 17:13:55] (step=0047448) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.28350616317746, LR: 0.0003 +[2026-03-03 17:14:03] (step=0047449) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 9.283701819604774, LR: 0.0003 +[2026-03-03 17:14:11] (step=0047450) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 9.283897476032088, LR: 0.0003 +[2026-03-03 17:14:19] (step=0047451) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.284093132459402, LR: 0.0003 +[2026-03-03 17:14:26] (step=0047452) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.284288788886714, LR: 0.0003 +[2026-03-03 17:14:34] (step=0047453) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.284484445314028, LR: 0.0003 +[2026-03-03 17:14:42] (step=0047454) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.284680101741342, LR: 0.0003 +[2026-03-03 17:14:50] (step=0047455) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.284875758168656, LR: 0.0003 +[2026-03-03 17:14:58] (step=0047456) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 9.28507141459597, LR: 0.0003 +[2026-03-03 17:15:06] (step=0047457) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.285267071023283, LR: 0.0003 +[2026-03-03 17:15:14] (step=0047458) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.285462727450597, LR: 0.0003 +[2026-03-03 17:15:21] (step=0047459) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.28565838387791, LR: 0.0003 +[2026-03-03 17:15:29] (step=0047460) Train Loss: 0.4249, Train Steps/Sec: 0.13, Epoch: 9.285854040305225, LR: 0.0003 +[2026-03-03 17:15:37] (step=0047461) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.286049696732539, LR: 0.0003 +[2026-03-03 17:15:45] (step=0047462) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.28624535315985, LR: 0.0003 +[2026-03-03 17:15:53] (step=0047463) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 9.286441009587165, LR: 0.0003 +[2026-03-03 17:16:01] (step=0047464) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.286636666014479, LR: 0.0003 +[2026-03-03 17:16:09] (step=0047465) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.286832322441793, LR: 0.0003 +[2026-03-03 17:16:16] (step=0047466) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.287027978869105, LR: 0.0003 +[2026-03-03 17:16:24] (step=0047467) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.287223635296419, LR: 0.0003 +[2026-03-03 17:16:32] (step=0047468) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.287419291723733, LR: 0.0003 +[2026-03-03 17:16:40] (step=0047469) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.287614948151047, LR: 0.0003 +[2026-03-03 17:16:48] (step=0047470) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 9.287810604578361, LR: 0.0003 +[2026-03-03 17:16:56] (step=0047471) Train Loss: 0.4183, Train Steps/Sec: 0.13, Epoch: 9.288006261005673, LR: 0.0003 +[2026-03-03 17:17:04] (step=0047472) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.288201917432987, LR: 0.0003 +[2026-03-03 17:17:11] (step=0047473) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.288397573860301, LR: 0.0003 +[2026-03-03 17:17:19] (step=0047474) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.288593230287615, LR: 0.0003 +[2026-03-03 17:17:27] (step=0047475) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.28878888671493, LR: 0.0003 +[2026-03-03 17:17:35] (step=0047476) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 9.288984543142242, LR: 0.0003 +[2026-03-03 17:17:43] (step=0047477) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.289180199569556, LR: 0.0003 +[2026-03-03 17:17:51] (step=0047478) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.28937585599687, LR: 0.0003 +[2026-03-03 17:17:59] (step=0047479) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.289571512424184, LR: 0.0003 +[2026-03-03 17:18:07] (step=0047480) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 9.289767168851498, LR: 0.0003 +[2026-03-03 17:18:15] (step=0047481) Train Loss: 0.4458, Train Steps/Sec: 0.12, Epoch: 9.28996282527881, LR: 0.0003 +[2026-03-03 17:18:22] (step=0047482) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.290158481706124, LR: 0.0003 +[2026-03-03 17:18:30] (step=0047483) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.290354138133438, LR: 0.0003 +[2026-03-03 17:18:38] (step=0047484) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 9.290549794560752, LR: 0.0003 +[2026-03-03 17:18:46] (step=0047485) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.290745450988066, LR: 0.0003 +[2026-03-03 17:18:54] (step=0047486) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.290941107415378, LR: 0.0003 +[2026-03-03 17:19:02] (step=0047487) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.291136763842692, LR: 0.0003 +[2026-03-03 17:19:10] (step=0047488) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.291332420270006, LR: 0.0003 +[2026-03-03 17:19:17] (step=0047489) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.29152807669732, LR: 0.0003 +[2026-03-03 17:19:25] (step=0047490) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 9.291723733124632, LR: 0.0003 +[2026-03-03 17:19:33] (step=0047491) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.291919389551946, LR: 0.0003 +[2026-03-03 17:19:41] (step=0047492) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.29211504597926, LR: 0.0003 +[2026-03-03 17:19:49] (step=0047493) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 9.292310702406574, LR: 0.0003 +[2026-03-03 17:19:57] (step=0047494) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.292506358833888, LR: 0.0003 +[2026-03-03 17:20:05] (step=0047495) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.2927020152612, LR: 0.0003 +[2026-03-03 17:20:12] (step=0047496) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.292897671688515, LR: 0.0003 +[2026-03-03 17:20:20] (step=0047497) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.293093328115829, LR: 0.0003 +[2026-03-03 17:20:28] (step=0047498) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.293288984543143, LR: 0.0003 +[2026-03-03 17:20:36] (step=0047499) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.293484640970457, LR: 0.0003 +[2026-03-03 17:20:44] (step=0047500) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.293680297397769, LR: 0.0003 +[2026-03-03 17:20:44] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0047500/ +[2026-03-03 17:20:52] (step=0047501) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.293875953825083, LR: 0.0003 +[2026-03-03 17:21:00] (step=0047502) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.294071610252397, LR: 0.0003 +[2026-03-03 17:21:07] (step=0047503) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.29426726667971, LR: 0.0003 +[2026-03-03 17:21:15] (step=0047504) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.294462923107025, LR: 0.0003 +[2026-03-03 17:21:23] (step=0047505) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 9.294658579534337, LR: 0.0003 +[2026-03-03 17:21:31] (step=0047506) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.294854235961651, LR: 0.0003 +[2026-03-03 17:21:39] (step=0047507) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.295049892388965, LR: 0.0003 +[2026-03-03 17:21:47] (step=0047508) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.295245548816279, LR: 0.0003 +[2026-03-03 17:21:55] (step=0047509) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.295441205243593, LR: 0.0003 +[2026-03-03 17:22:02] (step=0047510) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.295636861670905, LR: 0.0003 +[2026-03-03 17:22:10] (step=0047511) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.29583251809822, LR: 0.0003 +[2026-03-03 17:22:18] (step=0047512) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.296028174525533, LR: 0.0003 +[2026-03-03 17:22:26] (step=0047513) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.296223830952847, LR: 0.0003 +[2026-03-03 17:22:34] (step=0047514) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.29641948738016, LR: 0.0003 +[2026-03-03 17:22:42] (step=0047515) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.296615143807474, LR: 0.0003 +[2026-03-03 17:22:50] (step=0047516) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.296810800234788, LR: 0.0003 +[2026-03-03 17:22:57] (step=0047517) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 9.297006456662102, LR: 0.0003 +[2026-03-03 17:23:05] (step=0047518) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.297202113089416, LR: 0.0003 +[2026-03-03 17:23:13] (step=0047519) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.297397769516728, LR: 0.0003 +[2026-03-03 17:23:21] (step=0047520) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 9.297593425944042, LR: 0.0003 +[2026-03-03 17:23:29] (step=0047521) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.297789082371356, LR: 0.0003 +[2026-03-03 17:23:37] (step=0047522) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.29798473879867, LR: 0.0003 +[2026-03-03 17:23:45] (step=0047523) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.298180395225984, LR: 0.0003 +[2026-03-03 17:23:52] (step=0047524) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.298376051653296, LR: 0.0003 +[2026-03-03 17:24:00] (step=0047525) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 9.29857170808061, LR: 0.0003 +[2026-03-03 17:24:08] (step=0047526) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 9.298767364507924, LR: 0.0003 +[2026-03-03 17:24:16] (step=0047527) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.298963020935238, LR: 0.0003 +[2026-03-03 17:24:24] (step=0047528) Train Loss: 0.4463, Train Steps/Sec: 0.12, Epoch: 9.299158677362552, LR: 0.0003 +[2026-03-03 17:24:32] (step=0047529) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.299354333789864, LR: 0.0003 +[2026-03-03 17:24:40] (step=0047530) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.299549990217178, LR: 0.0003 +[2026-03-03 17:24:48] (step=0047531) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.299745646644492, LR: 0.0003 +[2026-03-03 17:24:56] (step=0047532) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 9.299941303071806, LR: 0.0003 +[2026-03-03 17:25:04] (step=0047533) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.30013695949912, LR: 0.0003 +[2026-03-03 17:25:11] (step=0047534) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.300332615926433, LR: 0.0003 +[2026-03-03 17:25:19] (step=0047535) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.300528272353747, LR: 0.0003 +[2026-03-03 17:25:27] (step=0047536) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 9.30072392878106, LR: 0.0003 +[2026-03-03 17:25:35] (step=0047537) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.300919585208375, LR: 0.0003 +[2026-03-03 17:25:43] (step=0047538) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.301115241635689, LR: 0.0003 +[2026-03-03 17:25:51] (step=0047539) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.301310898063, LR: 0.0003 +[2026-03-03 17:25:59] (step=0047540) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.301506554490315, LR: 0.0003 +[2026-03-03 17:26:06] (step=0047541) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 9.301702210917629, LR: 0.0003 +[2026-03-03 17:26:14] (step=0047542) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.301897867344943, LR: 0.0003 +[2026-03-03 17:26:22] (step=0047543) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 9.302093523772255, LR: 0.0003 +[2026-03-03 17:26:30] (step=0047544) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.302289180199569, LR: 0.0003 +[2026-03-03 17:26:38] (step=0047545) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 9.302484836626883, LR: 0.0003 +[2026-03-03 17:26:46] (step=0047546) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.302680493054197, LR: 0.0003 +[2026-03-03 17:26:54] (step=0047547) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.302876149481511, LR: 0.0003 +[2026-03-03 17:27:01] (step=0047548) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.303071805908823, LR: 0.0003 +[2026-03-03 17:27:09] (step=0047549) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.303267462336137, LR: 0.0003 +[2026-03-03 17:27:17] (step=0047550) Train Loss: 0.4265, Train Steps/Sec: 0.13, Epoch: 9.303463118763451, LR: 0.0003 +[2026-03-03 17:27:25] (step=0047551) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.303658775190765, LR: 0.0003 +[2026-03-03 17:27:33] (step=0047552) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.30385443161808, LR: 0.0003 +[2026-03-03 17:27:41] (step=0047553) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.304050088045392, LR: 0.0003 +[2026-03-03 17:27:49] (step=0047554) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 9.304245744472706, LR: 0.0003 +[2026-03-03 17:27:56] (step=0047555) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.30444140090002, LR: 0.0003 +[2026-03-03 17:28:04] (step=0047556) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.304637057327334, LR: 0.0003 +[2026-03-03 17:28:12] (step=0047557) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 9.304832713754648, LR: 0.0003 +[2026-03-03 17:28:20] (step=0047558) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.30502837018196, LR: 0.0003 +[2026-03-03 17:28:28] (step=0047559) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.305224026609274, LR: 0.0003 +[2026-03-03 17:28:36] (step=0047560) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.305419683036588, LR: 0.0003 +[2026-03-03 17:28:44] (step=0047561) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 9.305615339463902, LR: 0.0003 +[2026-03-03 17:28:51] (step=0047562) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.305810995891216, LR: 0.0003 +[2026-03-03 17:28:59] (step=0047563) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.306006652318528, LR: 0.0003 +[2026-03-03 17:29:07] (step=0047564) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.306202308745842, LR: 0.0003 +[2026-03-03 17:29:15] (step=0047565) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.306397965173156, LR: 0.0003 +[2026-03-03 17:29:23] (step=0047566) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.30659362160047, LR: 0.0003 +[2026-03-03 17:29:31] (step=0047567) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 9.306789278027782, LR: 0.0003 +[2026-03-03 17:29:39] (step=0047568) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 9.306984934455096, LR: 0.0003 +[2026-03-03 17:29:46] (step=0047569) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 9.30718059088241, LR: 0.0003 +[2026-03-03 17:29:54] (step=0047570) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.307376247309724, LR: 0.0003 +[2026-03-03 17:30:02] (step=0047571) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 9.307571903737038, LR: 0.0003 +[2026-03-03 17:30:10] (step=0047572) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.30776756016435, LR: 0.0003 +[2026-03-03 17:30:18] (step=0047573) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 9.307963216591665, LR: 0.0003 +[2026-03-03 17:30:26] (step=0047574) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.308158873018979, LR: 0.0003 +[2026-03-03 17:30:34] (step=0047575) Train Loss: 0.4456, Train Steps/Sec: 0.12, Epoch: 9.308354529446293, LR: 0.0003 +[2026-03-03 17:30:42] (step=0047576) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.308550185873607, LR: 0.0003 +[2026-03-03 17:30:50] (step=0047577) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 9.308745842300919, LR: 0.0003 +[2026-03-03 17:30:58] (step=0047578) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.308941498728233, LR: 0.0003 +[2026-03-03 17:31:05] (step=0047579) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.309137155155547, LR: 0.0003 +[2026-03-03 17:31:13] (step=0047580) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 9.30933281158286, LR: 0.0003 +[2026-03-03 17:31:21] (step=0047581) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 9.309528468010175, LR: 0.0003 +[2026-03-03 17:31:29] (step=0047582) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 9.309724124437487, LR: 0.0003 +[2026-03-03 17:31:37] (step=0047583) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.309919780864801, LR: 0.0003 +[2026-03-03 17:31:45] (step=0047584) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.310115437292115, LR: 0.0003 +[2026-03-03 17:31:53] (step=0047585) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.310311093719429, LR: 0.0003 +[2026-03-03 17:32:00] (step=0047586) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.310506750146743, LR: 0.0003 +[2026-03-03 17:32:08] (step=0047587) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 9.310702406574055, LR: 0.0003 +[2026-03-03 17:32:16] (step=0047588) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.31089806300137, LR: 0.0003 +[2026-03-03 17:32:24] (step=0047589) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.311093719428683, LR: 0.0003 +[2026-03-03 17:32:32] (step=0047590) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.311289375855997, LR: 0.0003 +[2026-03-03 17:32:40] (step=0047591) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.311485032283311, LR: 0.0003 +[2026-03-03 17:32:48] (step=0047592) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.311680688710624, LR: 0.0003 +[2026-03-03 17:32:55] (step=0047593) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.311876345137938, LR: 0.0003 +[2026-03-03 17:33:03] (step=0047594) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.312072001565252, LR: 0.0003 +[2026-03-03 17:33:11] (step=0047595) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.312267657992566, LR: 0.0003 +[2026-03-03 17:33:19] (step=0047596) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.312463314419878, LR: 0.0003 +[2026-03-03 17:33:27] (step=0047597) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.312658970847192, LR: 0.0003 +[2026-03-03 17:33:35] (step=0047598) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.312854627274506, LR: 0.0003 +[2026-03-03 17:33:43] (step=0047599) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.31305028370182, LR: 0.0003 +[2026-03-03 17:33:50] (step=0047600) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.313245940129134, LR: 0.0003 +[2026-03-03 17:33:58] (step=0047601) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.313441596556446, LR: 0.0003 +[2026-03-03 17:34:06] (step=0047602) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.31363725298376, LR: 0.0003 +[2026-03-03 17:34:14] (step=0047603) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.313832909411074, LR: 0.0003 +[2026-03-03 17:34:22] (step=0047604) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.314028565838388, LR: 0.0003 +[2026-03-03 17:34:30] (step=0047605) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.314224222265702, LR: 0.0003 +[2026-03-03 17:34:38] (step=0047606) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 9.314419878693014, LR: 0.0003 +[2026-03-03 17:34:46] (step=0047607) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.314615535120328, LR: 0.0003 +[2026-03-03 17:34:53] (step=0047608) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.314811191547642, LR: 0.0003 +[2026-03-03 17:35:01] (step=0047609) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.315006847974956, LR: 0.0003 +[2026-03-03 17:35:09] (step=0047610) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.31520250440227, LR: 0.0003 +[2026-03-03 17:35:17] (step=0047611) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.315398160829583, LR: 0.0003 +[2026-03-03 17:35:25] (step=0047612) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.315593817256897, LR: 0.0003 +[2026-03-03 17:35:33] (step=0047613) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 9.31578947368421, LR: 0.0003 +[2026-03-03 17:35:41] (step=0047614) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.315985130111525, LR: 0.0003 +[2026-03-03 17:35:48] (step=0047615) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.316180786538839, LR: 0.0003 +[2026-03-03 17:35:56] (step=0047616) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.31637644296615, LR: 0.0003 +[2026-03-03 17:36:04] (step=0047617) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.316572099393465, LR: 0.0003 +[2026-03-03 17:36:12] (step=0047618) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.316767755820779, LR: 0.0003 +[2026-03-03 17:36:20] (step=0047619) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.316963412248093, LR: 0.0003 +[2026-03-03 17:36:28] (step=0047620) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.317159068675405, LR: 0.0003 +[2026-03-03 17:36:36] (step=0047621) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.31735472510272, LR: 0.0003 +[2026-03-03 17:36:43] (step=0047622) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.317550381530033, LR: 0.0003 +[2026-03-03 17:36:51] (step=0047623) Train Loss: 0.4428, Train Steps/Sec: 0.12, Epoch: 9.317746037957347, LR: 0.0003 +[2026-03-03 17:36:59] (step=0047624) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.317941694384661, LR: 0.0003 +[2026-03-03 17:37:07] (step=0047625) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.318137350811973, LR: 0.0003 +[2026-03-03 17:37:15] (step=0047626) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.318333007239287, LR: 0.0003 +[2026-03-03 17:37:23] (step=0047627) Train Loss: 0.4409, Train Steps/Sec: 0.12, Epoch: 9.318528663666601, LR: 0.0003 +[2026-03-03 17:37:31] (step=0047628) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 9.318724320093915, LR: 0.0003 +[2026-03-03 17:37:39] (step=0047629) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.31891997652123, LR: 0.0003 +[2026-03-03 17:37:47] (step=0047630) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.319115632948542, LR: 0.0003 +[2026-03-03 17:37:54] (step=0047631) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 9.319311289375856, LR: 0.0003 +[2026-03-03 17:38:02] (step=0047632) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.31950694580317, LR: 0.0003 +[2026-03-03 17:38:10] (step=0047633) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.319702602230484, LR: 0.0003 +[2026-03-03 17:38:18] (step=0047634) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.319898258657798, LR: 0.0003 +[2026-03-03 17:38:26] (step=0047635) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 9.32009391508511, LR: 0.0003 +[2026-03-03 17:38:34] (step=0047636) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.320289571512424, LR: 0.0003 +[2026-03-03 17:38:42] (step=0047637) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.320485227939738, LR: 0.0003 +[2026-03-03 17:38:49] (step=0047638) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.320680884367052, LR: 0.0003 +[2026-03-03 17:38:57] (step=0047639) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 9.320876540794366, LR: 0.0003 +[2026-03-03 17:39:05] (step=0047640) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.321072197221678, LR: 0.0003 +[2026-03-03 17:39:13] (step=0047641) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.321267853648992, LR: 0.0003 +[2026-03-03 17:39:21] (step=0047642) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.321463510076306, LR: 0.0003 +[2026-03-03 17:39:29] (step=0047643) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.32165916650362, LR: 0.0003 +[2026-03-03 17:39:37] (step=0047644) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.321854822930934, LR: 0.0003 +[2026-03-03 17:39:44] (step=0047645) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.322050479358246, LR: 0.0003 +[2026-03-03 17:39:52] (step=0047646) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.32224613578556, LR: 0.0003 +[2026-03-03 17:40:00] (step=0047647) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 9.322441792212874, LR: 0.0003 +[2026-03-03 17:40:08] (step=0047648) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.322637448640188, LR: 0.0003 +[2026-03-03 17:40:16] (step=0047649) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 9.3228331050675, LR: 0.0003 +[2026-03-03 17:40:24] (step=0047650) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.323028761494815, LR: 0.0003 +[2026-03-03 17:40:32] (step=0047651) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 9.323224417922129, LR: 0.0003 +[2026-03-03 17:40:39] (step=0047652) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.323420074349443, LR: 0.0003 +[2026-03-03 17:40:47] (step=0047653) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.323615730776757, LR: 0.0003 +[2026-03-03 17:40:55] (step=0047654) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 9.323811387204069, LR: 0.0003 +[2026-03-03 17:41:03] (step=0047655) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.324007043631383, LR: 0.0003 +[2026-03-03 17:41:11] (step=0047656) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.324202700058697, LR: 0.0003 +[2026-03-03 17:41:19] (step=0047657) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.324398356486011, LR: 0.0003 +[2026-03-03 17:41:27] (step=0047658) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.324594012913325, LR: 0.0003 +[2026-03-03 17:41:34] (step=0047659) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.324789669340637, LR: 0.0003 +[2026-03-03 17:41:42] (step=0047660) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.324985325767951, LR: 0.0003 +[2026-03-03 17:41:50] (step=0047661) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.325180982195265, LR: 0.0003 +[2026-03-03 17:41:58] (step=0047662) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 9.32537663862258, LR: 0.0003 +[2026-03-03 17:42:06] (step=0047663) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.325572295049893, LR: 0.0003 +[2026-03-03 17:42:14] (step=0047664) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.325767951477205, LR: 0.0003 +[2026-03-03 17:42:22] (step=0047665) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.32596360790452, LR: 0.0003 +[2026-03-03 17:42:29] (step=0047666) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 9.326159264331833, LR: 0.0003 +[2026-03-03 17:42:37] (step=0047667) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.326354920759147, LR: 0.0003 +[2026-03-03 17:42:45] (step=0047668) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 9.326550577186461, LR: 0.0003 +[2026-03-03 17:42:53] (step=0047669) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.326746233613774, LR: 0.0003 +[2026-03-03 17:43:01] (step=0047670) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.326941890041088, LR: 0.0003 +[2026-03-03 17:43:09] (step=0047671) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.327137546468402, LR: 0.0003 +[2026-03-03 17:43:17] (step=0047672) Train Loss: 0.4391, Train Steps/Sec: 0.12, Epoch: 9.327333202895716, LR: 0.0003 +[2026-03-03 17:43:25] (step=0047673) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.327528859323028, LR: 0.0003 +[2026-03-03 17:43:32] (step=0047674) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.327724515750342, LR: 0.0003 +[2026-03-03 17:43:40] (step=0047675) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.327920172177656, LR: 0.0003 +[2026-03-03 17:43:48] (step=0047676) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.32811582860497, LR: 0.0003 +[2026-03-03 17:43:56] (step=0047677) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.328311485032284, LR: 0.0003 +[2026-03-03 17:44:04] (step=0047678) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.328507141459596, LR: 0.0003 +[2026-03-03 17:44:12] (step=0047679) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.32870279788691, LR: 0.0003 +[2026-03-03 17:44:20] (step=0047680) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.328898454314224, LR: 0.0003 +[2026-03-03 17:44:28] (step=0047681) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.329094110741538, LR: 0.0003 +[2026-03-03 17:44:35] (step=0047682) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.329289767168852, LR: 0.0003 +[2026-03-03 17:44:43] (step=0047683) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 9.329485423596164, LR: 0.0003 +[2026-03-03 17:44:51] (step=0047684) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.329681080023478, LR: 0.0003 +[2026-03-03 17:44:59] (step=0047685) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.329876736450792, LR: 0.0003 +[2026-03-03 17:45:07] (step=0047686) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.330072392878106, LR: 0.0003 +[2026-03-03 17:45:15] (step=0047687) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.33026804930542, LR: 0.0003 +[2026-03-03 17:45:23] (step=0047688) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.330463705732733, LR: 0.0003 +[2026-03-03 17:45:30] (step=0047689) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.330659362160047, LR: 0.0003 +[2026-03-03 17:45:38] (step=0047690) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.33085501858736, LR: 0.0003 +[2026-03-03 17:45:46] (step=0047691) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.331050675014675, LR: 0.0003 +[2026-03-03 17:45:54] (step=0047692) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.331246331441989, LR: 0.0003 +[2026-03-03 17:46:02] (step=0047693) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 9.331441987869301, LR: 0.0003 +[2026-03-03 17:46:10] (step=0047694) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.331637644296615, LR: 0.0003 +[2026-03-03 17:46:17] (step=0047695) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 9.331833300723929, LR: 0.0003 +[2026-03-03 17:46:25] (step=0047696) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 9.332028957151243, LR: 0.0003 +[2026-03-03 17:46:33] (step=0047697) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.332224613578557, LR: 0.0003 +[2026-03-03 17:46:41] (step=0047698) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.33242027000587, LR: 0.0003 +[2026-03-03 17:46:49] (step=0047699) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.332615926433183, LR: 0.0003 +[2026-03-03 17:46:57] (step=0047700) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.332811582860497, LR: 0.0003 +[2026-03-03 17:47:05] (step=0047701) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.333007239287811, LR: 0.0003 +[2026-03-03 17:47:12] (step=0047702) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.333202895715123, LR: 0.0003 +[2026-03-03 17:47:20] (step=0047703) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.333398552142437, LR: 0.0003 +[2026-03-03 17:47:28] (step=0047704) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.333594208569751, LR: 0.0003 +[2026-03-03 17:47:36] (step=0047705) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 9.333789864997065, LR: 0.0003 +[2026-03-03 17:47:44] (step=0047706) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.33398552142438, LR: 0.0003 +[2026-03-03 17:47:52] (step=0047707) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.334181177851692, LR: 0.0003 +[2026-03-03 17:48:00] (step=0047708) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.334376834279006, LR: 0.0003 +[2026-03-03 17:48:07] (step=0047709) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.33457249070632, LR: 0.0003 +[2026-03-03 17:48:15] (step=0047710) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.334768147133634, LR: 0.0003 +[2026-03-03 17:48:23] (step=0047711) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.334963803560948, LR: 0.0003 +[2026-03-03 17:48:31] (step=0047712) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.33515945998826, LR: 0.0003 +[2026-03-03 17:48:39] (step=0047713) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.335355116415574, LR: 0.0003 +[2026-03-03 17:48:47] (step=0047714) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.335550772842888, LR: 0.0003 +[2026-03-03 17:48:55] (step=0047715) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.335746429270202, LR: 0.0003 +[2026-03-03 17:49:02] (step=0047716) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 9.335942085697516, LR: 0.0003 +[2026-03-03 17:49:10] (step=0047717) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.336137742124828, LR: 0.0003 +[2026-03-03 17:49:18] (step=0047718) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.336333398552142, LR: 0.0003 +[2026-03-03 17:49:26] (step=0047719) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.336529054979456, LR: 0.0003 +[2026-03-03 17:49:34] (step=0047720) Train Loss: 0.4406, Train Steps/Sec: 0.12, Epoch: 9.33672471140677, LR: 0.0003 +[2026-03-03 17:49:42] (step=0047721) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.336920367834084, LR: 0.0003 +[2026-03-03 17:49:50] (step=0047722) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.337116024261396, LR: 0.0003 +[2026-03-03 17:49:58] (step=0047723) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 9.33731168068871, LR: 0.0003 +[2026-03-03 17:50:06] (step=0047724) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.337507337116024, LR: 0.0003 +[2026-03-03 17:50:14] (step=0047725) Train Loss: 0.4354, Train Steps/Sec: 0.12, Epoch: 9.337702993543338, LR: 0.0003 +[2026-03-03 17:50:21] (step=0047726) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.33789864997065, LR: 0.0003 +[2026-03-03 17:50:29] (step=0047727) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 9.338094306397965, LR: 0.0003 +[2026-03-03 17:50:37] (step=0047728) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.338289962825279, LR: 0.0003 +[2026-03-03 17:50:45] (step=0047729) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 9.338485619252593, LR: 0.0003 +[2026-03-03 17:50:53] (step=0047730) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 9.338681275679907, LR: 0.0003 +[2026-03-03 17:51:01] (step=0047731) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.338876932107219, LR: 0.0003 +[2026-03-03 17:51:09] (step=0047732) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.339072588534533, LR: 0.0003 +[2026-03-03 17:51:16] (step=0047733) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.339268244961847, LR: 0.0003 +[2026-03-03 17:51:24] (step=0047734) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.339463901389161, LR: 0.0003 +[2026-03-03 17:51:32] (step=0047735) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.339659557816475, LR: 0.0003 +[2026-03-03 17:51:40] (step=0047736) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 9.339855214243787, LR: 0.0003 +[2026-03-03 17:51:48] (step=0047737) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 9.340050870671101, LR: 0.0003 +[2026-03-03 17:51:56] (step=0047738) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 9.340246527098415, LR: 0.0003 +[2026-03-03 17:52:03] (step=0047739) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.34044218352573, LR: 0.0003 +[2026-03-03 17:52:11] (step=0047740) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.340637839953043, LR: 0.0003 +[2026-03-03 17:52:19] (step=0047741) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.340833496380355, LR: 0.0003 +[2026-03-03 17:52:27] (step=0047742) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.34102915280767, LR: 0.0003 +[2026-03-03 17:52:35] (step=0047743) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 9.341224809234983, LR: 0.0003 +[2026-03-03 17:52:43] (step=0047744) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.341420465662297, LR: 0.0003 +[2026-03-03 17:52:51] (step=0047745) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 9.341616122089611, LR: 0.0003 +[2026-03-03 17:52:58] (step=0047746) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.341811778516924, LR: 0.0003 +[2026-03-03 17:53:06] (step=0047747) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.342007434944238, LR: 0.0003 +[2026-03-03 17:53:14] (step=0047748) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 9.342203091371552, LR: 0.0003 +[2026-03-03 17:53:22] (step=0047749) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.342398747798866, LR: 0.0003 +[2026-03-03 17:53:30] (step=0047750) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.34259440422618, LR: 0.0003 +[2026-03-03 17:53:38] (step=0047751) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.342790060653492, LR: 0.0003 +[2026-03-03 17:53:46] (step=0047752) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.342985717080806, LR: 0.0003 +[2026-03-03 17:53:53] (step=0047753) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.34318137350812, LR: 0.0003 +[2026-03-03 17:54:01] (step=0047754) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 9.343377029935434, LR: 0.0003 +[2026-03-03 17:54:09] (step=0047755) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 9.343572686362746, LR: 0.0003 +[2026-03-03 17:54:17] (step=0047756) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.34376834279006, LR: 0.0003 +[2026-03-03 17:54:25] (step=0047757) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.343963999217374, LR: 0.0003 +[2026-03-03 17:54:33] (step=0047758) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.344159655644688, LR: 0.0003 +[2026-03-03 17:54:41] (step=0047759) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 9.344355312072002, LR: 0.0003 +[2026-03-03 17:54:48] (step=0047760) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 9.344550968499314, LR: 0.0003 +[2026-03-03 17:54:56] (step=0047761) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.344746624926628, LR: 0.0003 +[2026-03-03 17:55:04] (step=0047762) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.344942281353942, LR: 0.0003 +[2026-03-03 17:55:12] (step=0047763) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.345137937781256, LR: 0.0003 +[2026-03-03 17:55:20] (step=0047764) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.34533359420857, LR: 0.0003 +[2026-03-03 17:55:28] (step=0047765) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.345529250635883, LR: 0.0003 +[2026-03-03 17:55:36] (step=0047766) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.345724907063197, LR: 0.0003 +[2026-03-03 17:55:43] (step=0047767) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.34592056349051, LR: 0.0003 +[2026-03-03 17:55:51] (step=0047768) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 9.346116219917825, LR: 0.0003 +[2026-03-03 17:55:59] (step=0047769) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.346311876345139, LR: 0.0003 +[2026-03-03 17:56:07] (step=0047770) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.346507532772451, LR: 0.0003 +[2026-03-03 17:56:15] (step=0047771) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.346703189199765, LR: 0.0003 +[2026-03-03 17:56:23] (step=0047772) Train Loss: 0.4422, Train Steps/Sec: 0.12, Epoch: 9.346898845627079, LR: 0.0003 +[2026-03-03 17:56:31] (step=0047773) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.347094502054393, LR: 0.0003 +[2026-03-03 17:56:39] (step=0047774) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.347290158481707, LR: 0.0003 +[2026-03-03 17:56:47] (step=0047775) Train Loss: 0.4446, Train Steps/Sec: 0.12, Epoch: 9.34748581490902, LR: 0.0003 +[2026-03-03 17:56:54] (step=0047776) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 9.347681471336333, LR: 0.0003 +[2026-03-03 17:57:02] (step=0047777) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.347877127763647, LR: 0.0003 +[2026-03-03 17:57:10] (step=0047778) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.348072784190961, LR: 0.0003 +[2026-03-03 17:57:18] (step=0047779) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 9.348268440618273, LR: 0.0003 +[2026-03-03 17:57:26] (step=0047780) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.348464097045587, LR: 0.0003 +[2026-03-03 17:57:34] (step=0047781) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.348659753472901, LR: 0.0003 +[2026-03-03 17:57:42] (step=0047782) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.348855409900215, LR: 0.0003 +[2026-03-03 17:57:49] (step=0047783) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.34905106632753, LR: 0.0003 +[2026-03-03 17:57:57] (step=0047784) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.349246722754842, LR: 0.0003 +[2026-03-03 17:58:05] (step=0047785) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.349442379182156, LR: 0.0003 +[2026-03-03 17:58:13] (step=0047786) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.34963803560947, LR: 0.0003 +[2026-03-03 17:58:21] (step=0047787) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.349833692036784, LR: 0.0003 +[2026-03-03 17:58:29] (step=0047788) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.350029348464098, LR: 0.0003 +[2026-03-03 17:58:37] (step=0047789) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.35022500489141, LR: 0.0003 +[2026-03-03 17:58:45] (step=0047790) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.350420661318724, LR: 0.0003 +[2026-03-03 17:58:52] (step=0047791) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 9.350616317746038, LR: 0.0003 +[2026-03-03 17:59:00] (step=0047792) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.350811974173352, LR: 0.0003 +[2026-03-03 17:59:08] (step=0047793) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.351007630600666, LR: 0.0003 +[2026-03-03 17:59:16] (step=0047794) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.351203287027978, LR: 0.0003 +[2026-03-03 17:59:24] (step=0047795) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 9.351398943455292, LR: 0.0003 +[2026-03-03 17:59:32] (step=0047796) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.351594599882606, LR: 0.0003 +[2026-03-03 17:59:39] (step=0047797) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.35179025630992, LR: 0.0003 +[2026-03-03 17:59:47] (step=0047798) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.351985912737234, LR: 0.0003 +[2026-03-03 17:59:55] (step=0047799) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.352181569164546, LR: 0.0003 +[2026-03-03 18:00:03] (step=0047800) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.35237722559186, LR: 0.0003 +[2026-03-03 18:00:11] (step=0047801) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.352572882019174, LR: 0.0003 +[2026-03-03 18:00:19] (step=0047802) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.352768538446488, LR: 0.0003 +[2026-03-03 18:00:27] (step=0047803) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.352964194873802, LR: 0.0003 +[2026-03-03 18:00:34] (step=0047804) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.353159851301115, LR: 0.0003 +[2026-03-03 18:00:42] (step=0047805) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.353355507728429, LR: 0.0003 +[2026-03-03 18:00:50] (step=0047806) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.353551164155743, LR: 0.0003 +[2026-03-03 18:00:58] (step=0047807) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.353746820583057, LR: 0.0003 +[2026-03-03 18:01:06] (step=0047808) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.353942477010369, LR: 0.0003 +[2026-03-03 18:01:14] (step=0047809) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.354138133437683, LR: 0.0003 +[2026-03-03 18:01:22] (step=0047810) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.354333789864997, LR: 0.0003 +[2026-03-03 18:01:29] (step=0047811) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.354529446292311, LR: 0.0003 +[2026-03-03 18:01:37] (step=0047812) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.354725102719625, LR: 0.0003 +[2026-03-03 18:01:45] (step=0047813) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.354920759146937, LR: 0.0003 +[2026-03-03 18:01:53] (step=0047814) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.355116415574251, LR: 0.0003 +[2026-03-03 18:02:01] (step=0047815) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.355312072001565, LR: 0.0003 +[2026-03-03 18:02:09] (step=0047816) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.35550772842888, LR: 0.0003 +[2026-03-03 18:02:17] (step=0047817) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 9.355703384856193, LR: 0.0003 +[2026-03-03 18:02:24] (step=0047818) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.355899041283505, LR: 0.0003 +[2026-03-03 18:02:32] (step=0047819) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 9.35609469771082, LR: 0.0003 +[2026-03-03 18:02:40] (step=0047820) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.356290354138133, LR: 0.0003 +[2026-03-03 18:02:48] (step=0047821) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.356486010565447, LR: 0.0003 +[2026-03-03 18:02:56] (step=0047822) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.356681666992761, LR: 0.0003 +[2026-03-03 18:03:04] (step=0047823) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.356877323420074, LR: 0.0003 +[2026-03-03 18:03:12] (step=0047824) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.357072979847388, LR: 0.0003 +[2026-03-03 18:03:20] (step=0047825) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 9.357268636274702, LR: 0.0003 +[2026-03-03 18:03:27] (step=0047826) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.357464292702016, LR: 0.0003 +[2026-03-03 18:03:35] (step=0047827) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.35765994912933, LR: 0.0003 +[2026-03-03 18:03:43] (step=0047828) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.357855605556642, LR: 0.0003 +[2026-03-03 18:03:51] (step=0047829) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.358051261983956, LR: 0.0003 +[2026-03-03 18:03:59] (step=0047830) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.35824691841127, LR: 0.0003 +[2026-03-03 18:04:07] (step=0047831) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.358442574838584, LR: 0.0003 +[2026-03-03 18:04:15] (step=0047832) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.358638231265896, LR: 0.0003 +[2026-03-03 18:04:23] (step=0047833) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 9.35883388769321, LR: 0.0003 +[2026-03-03 18:04:30] (step=0047834) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 9.359029544120524, LR: 0.0003 +[2026-03-03 18:04:38] (step=0047835) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 9.359225200547838, LR: 0.0003 +[2026-03-03 18:04:46] (step=0047836) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.359420856975152, LR: 0.0003 +[2026-03-03 18:04:54] (step=0047837) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.359616513402464, LR: 0.0003 +[2026-03-03 18:05:02] (step=0047838) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.359812169829778, LR: 0.0003 +[2026-03-03 18:05:10] (step=0047839) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.360007826257092, LR: 0.0003 +[2026-03-03 18:05:18] (step=0047840) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.360203482684406, LR: 0.0003 +[2026-03-03 18:05:25] (step=0047841) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.36039913911172, LR: 0.0003 +[2026-03-03 18:05:33] (step=0047842) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.360594795539033, LR: 0.0003 +[2026-03-03 18:05:41] (step=0047843) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.360790451966347, LR: 0.0003 +[2026-03-03 18:05:49] (step=0047844) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.36098610839366, LR: 0.0003 +[2026-03-03 18:05:57] (step=0047845) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.361181764820975, LR: 0.0003 +[2026-03-03 18:06:05] (step=0047846) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.361377421248289, LR: 0.0003 +[2026-03-03 18:06:13] (step=0047847) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.361573077675601, LR: 0.0003 +[2026-03-03 18:06:20] (step=0047848) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.361768734102915, LR: 0.0003 +[2026-03-03 18:06:28] (step=0047849) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.361964390530229, LR: 0.0003 +[2026-03-03 18:06:36] (step=0047850) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.362160046957543, LR: 0.0003 +[2026-03-03 18:06:44] (step=0047851) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 9.362355703384857, LR: 0.0003 +[2026-03-03 18:06:52] (step=0047852) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.36255135981217, LR: 0.0003 +[2026-03-03 18:07:00] (step=0047853) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.362747016239483, LR: 0.0003 +[2026-03-03 18:07:08] (step=0047854) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.362942672666797, LR: 0.0003 +[2026-03-03 18:07:15] (step=0047855) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.363138329094111, LR: 0.0003 +[2026-03-03 18:07:23] (step=0047856) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.363333985521425, LR: 0.0003 +[2026-03-03 18:07:31] (step=0047857) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.363529641948737, LR: 0.0003 +[2026-03-03 18:07:39] (step=0047858) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.363725298376051, LR: 0.0003 +[2026-03-03 18:07:47] (step=0047859) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.363920954803366, LR: 0.0003 +[2026-03-03 18:07:55] (step=0047860) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 9.36411661123068, LR: 0.0003 +[2026-03-03 18:08:03] (step=0047861) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 9.364312267657992, LR: 0.0003 +[2026-03-03 18:08:10] (step=0047862) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.364507924085306, LR: 0.0003 +[2026-03-03 18:08:18] (step=0047863) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 9.36470358051262, LR: 0.0003 +[2026-03-03 18:08:26] (step=0047864) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 9.364899236939934, LR: 0.0003 +[2026-03-03 18:08:34] (step=0047865) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 9.365094893367248, LR: 0.0003 +[2026-03-03 18:08:42] (step=0047866) Train Loss: 0.4402, Train Steps/Sec: 0.12, Epoch: 9.36529054979456, LR: 0.0003 +[2026-03-03 18:08:50] (step=0047867) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.365486206221874, LR: 0.0003 +[2026-03-03 18:08:58] (step=0047868) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.365681862649188, LR: 0.0003 +[2026-03-03 18:09:06] (step=0047869) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.365877519076502, LR: 0.0003 +[2026-03-03 18:09:13] (step=0047870) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 9.366073175503816, LR: 0.0003 +[2026-03-03 18:09:21] (step=0047871) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.366268831931128, LR: 0.0003 +[2026-03-03 18:09:29] (step=0047872) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.366464488358442, LR: 0.0003 +[2026-03-03 18:09:37] (step=0047873) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.366660144785756, LR: 0.0003 +[2026-03-03 18:09:45] (step=0047874) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.36685580121307, LR: 0.0003 +[2026-03-03 18:09:53] (step=0047875) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.367051457640384, LR: 0.0003 +[2026-03-03 18:10:01] (step=0047876) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.367247114067697, LR: 0.0003 +[2026-03-03 18:10:09] (step=0047877) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 9.36744277049501, LR: 0.0003 +[2026-03-03 18:10:16] (step=0047878) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.367638426922325, LR: 0.0003 +[2026-03-03 18:10:24] (step=0047879) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 9.367834083349639, LR: 0.0003 +[2026-03-03 18:10:32] (step=0047880) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.368029739776953, LR: 0.0003 +[2026-03-03 18:10:40] (step=0047881) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 9.368225396204265, LR: 0.0003 +[2026-03-03 18:10:48] (step=0047882) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.368421052631579, LR: 0.0003 +[2026-03-03 18:10:56] (step=0047883) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 9.368616709058893, LR: 0.0003 +[2026-03-03 18:11:04] (step=0047884) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.368812365486207, LR: 0.0003 +[2026-03-03 18:11:11] (step=0047885) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.369008021913519, LR: 0.0003 +[2026-03-03 18:11:19] (step=0047886) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.369203678340833, LR: 0.0003 +[2026-03-03 18:11:27] (step=0047887) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 9.369399334768147, LR: 0.0003 +[2026-03-03 18:11:35] (step=0047888) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.369594991195461, LR: 0.0003 +[2026-03-03 18:11:43] (step=0047889) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.369790647622775, LR: 0.0003 +[2026-03-03 18:11:51] (step=0047890) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.369986304050087, LR: 0.0003 +[2026-03-03 18:11:59] (step=0047891) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 9.370181960477401, LR: 0.0003 +[2026-03-03 18:12:06] (step=0047892) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.370377616904715, LR: 0.0003 +[2026-03-03 18:12:14] (step=0047893) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.37057327333203, LR: 0.0003 +[2026-03-03 18:12:22] (step=0047894) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.370768929759343, LR: 0.0003 +[2026-03-03 18:12:30] (step=0047895) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.370964586186656, LR: 0.0003 +[2026-03-03 18:12:38] (step=0047896) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.37116024261397, LR: 0.0003 +[2026-03-03 18:12:46] (step=0047897) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.371355899041284, LR: 0.0003 +[2026-03-03 18:12:54] (step=0047898) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.371551555468598, LR: 0.0003 +[2026-03-03 18:13:01] (step=0047899) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.371747211895912, LR: 0.0003 +[2026-03-03 18:13:09] (step=0047900) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.371942868323224, LR: 0.0003 +[2026-03-03 18:13:17] (step=0047901) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.372138524750538, LR: 0.0003 +[2026-03-03 18:13:25] (step=0047902) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.372334181177852, LR: 0.0003 +[2026-03-03 18:13:33] (step=0047903) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.372529837605166, LR: 0.0003 +[2026-03-03 18:13:41] (step=0047904) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 9.37272549403248, LR: 0.0003 +[2026-03-03 18:13:49] (step=0047905) Train Loss: 0.4269, Train Steps/Sec: 0.13, Epoch: 9.372921150459792, LR: 0.0003 +[2026-03-03 18:13:56] (step=0047906) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.373116806887106, LR: 0.0003 +[2026-03-03 18:14:04] (step=0047907) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.37331246331442, LR: 0.0003 +[2026-03-03 18:14:12] (step=0047908) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.373508119741734, LR: 0.0003 +[2026-03-03 18:14:20] (step=0047909) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.373703776169048, LR: 0.0003 +[2026-03-03 18:14:28] (step=0047910) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.37389943259636, LR: 0.0003 +[2026-03-03 18:14:36] (step=0047911) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.374095089023674, LR: 0.0003 +[2026-03-03 18:14:44] (step=0047912) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.374290745450988, LR: 0.0003 +[2026-03-03 18:14:51] (step=0047913) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 9.374486401878302, LR: 0.0003 +[2026-03-03 18:14:59] (step=0047914) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 9.374682058305615, LR: 0.0003 +[2026-03-03 18:15:07] (step=0047915) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 9.374877714732929, LR: 0.0003 +[2026-03-03 18:15:15] (step=0047916) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.375073371160243, LR: 0.0003 +[2026-03-03 18:15:23] (step=0047917) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 9.375269027587557, LR: 0.0003 +[2026-03-03 18:15:31] (step=0047918) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.37546468401487, LR: 0.0003 +[2026-03-03 18:15:39] (step=0047919) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 9.375660340442183, LR: 0.0003 +[2026-03-03 18:15:47] (step=0047920) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.375855996869497, LR: 0.0003 +[2026-03-03 18:15:54] (step=0047921) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 9.37605165329681, LR: 0.0003 +[2026-03-03 18:16:02] (step=0047922) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.376247309724125, LR: 0.0003 +[2026-03-03 18:16:10] (step=0047923) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.376442966151439, LR: 0.0003 +[2026-03-03 18:16:18] (step=0047924) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.376638622578751, LR: 0.0003 +[2026-03-03 18:16:26] (step=0047925) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.376834279006065, LR: 0.0003 +[2026-03-03 18:16:34] (step=0047926) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 9.377029935433379, LR: 0.0003 +[2026-03-03 18:16:42] (step=0047927) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 9.377225591860693, LR: 0.0003 +[2026-03-03 18:16:49] (step=0047928) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.377421248288007, LR: 0.0003 +[2026-03-03 18:16:57] (step=0047929) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.37761690471532, LR: 0.0003 +[2026-03-03 18:17:05] (step=0047930) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 9.377812561142633, LR: 0.0003 +[2026-03-03 18:17:13] (step=0047931) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.378008217569947, LR: 0.0003 +[2026-03-03 18:17:21] (step=0047932) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.378203873997261, LR: 0.0003 +[2026-03-03 18:17:29] (step=0047933) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.378399530424575, LR: 0.0003 +[2026-03-03 18:17:37] (step=0047934) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.378595186851888, LR: 0.0003 +[2026-03-03 18:17:44] (step=0047935) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.378790843279202, LR: 0.0003 +[2026-03-03 18:17:52] (step=0047936) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.378986499706516, LR: 0.0003 +[2026-03-03 18:18:00] (step=0047937) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 9.37918215613383, LR: 0.0003 +[2026-03-03 18:18:08] (step=0047938) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 9.379377812561142, LR: 0.0003 +[2026-03-03 18:18:16] (step=0047939) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.379573468988456, LR: 0.0003 +[2026-03-03 18:18:24] (step=0047940) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.37976912541577, LR: 0.0003 +[2026-03-03 18:18:32] (step=0047941) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.379964781843084, LR: 0.0003 +[2026-03-03 18:18:39] (step=0047942) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 9.380160438270398, LR: 0.0003 +[2026-03-03 18:18:47] (step=0047943) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.38035609469771, LR: 0.0003 +[2026-03-03 18:18:55] (step=0047944) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.380551751125024, LR: 0.0003 +[2026-03-03 18:19:03] (step=0047945) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.380747407552338, LR: 0.0003 +[2026-03-03 18:19:11] (step=0047946) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 9.380943063979652, LR: 0.0003 +[2026-03-03 18:19:19] (step=0047947) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.381138720406966, LR: 0.0003 +[2026-03-03 18:19:27] (step=0047948) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 9.381334376834278, LR: 0.0003 +[2026-03-03 18:19:34] (step=0047949) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 9.381530033261592, LR: 0.0003 +[2026-03-03 18:19:42] (step=0047950) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.381725689688906, LR: 0.0003 +[2026-03-03 18:19:50] (step=0047951) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 9.38192134611622, LR: 0.0003 +[2026-03-03 18:19:58] (step=0047952) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 9.382117002543534, LR: 0.0003 +[2026-03-03 18:20:06] (step=0047953) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.382312658970847, LR: 0.0003 +[2026-03-03 18:20:14] (step=0047954) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.38250831539816, LR: 0.0003 +[2026-03-03 18:20:22] (step=0047955) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.382703971825475, LR: 0.0003 +[2026-03-03 18:20:29] (step=0047956) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 9.382899628252789, LR: 0.0003 +[2026-03-03 18:20:37] (step=0047957) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.383095284680103, LR: 0.0003 +[2026-03-03 18:20:45] (step=0047958) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 9.383290941107415, LR: 0.0003 +[2026-03-03 18:20:53] (step=0047959) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.383486597534729, LR: 0.0003 +[2026-03-03 18:21:01] (step=0047960) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.383682253962043, LR: 0.0003 +[2026-03-03 18:21:09] (step=0047961) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.383877910389357, LR: 0.0003 +[2026-03-03 18:21:17] (step=0047962) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.384073566816669, LR: 0.0003 +[2026-03-03 18:21:24] (step=0047963) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.384269223243983, LR: 0.0003 +[2026-03-03 18:21:32] (step=0047964) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.384464879671297, LR: 0.0003 +[2026-03-03 18:21:40] (step=0047965) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.384660536098611, LR: 0.0003 +[2026-03-03 18:21:48] (step=0047966) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.384856192525925, LR: 0.0003 +[2026-03-03 18:21:56] (step=0047967) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.385051848953237, LR: 0.0003 +[2026-03-03 18:22:04] (step=0047968) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.385247505380551, LR: 0.0003 +[2026-03-03 18:22:11] (step=0047969) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.385443161807865, LR: 0.0003 +[2026-03-03 18:22:19] (step=0047970) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.38563881823518, LR: 0.0003 +[2026-03-03 18:22:27] (step=0047971) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.385834474662493, LR: 0.0003 +[2026-03-03 18:22:35] (step=0047972) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.386030131089806, LR: 0.0003 +[2026-03-03 18:22:43] (step=0047973) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.38622578751712, LR: 0.0003 +[2026-03-03 18:22:51] (step=0047974) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.386421443944434, LR: 0.0003 +[2026-03-03 18:22:59] (step=0047975) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.386617100371748, LR: 0.0003 +[2026-03-03 18:23:07] (step=0047976) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.386812756799062, LR: 0.0003 +[2026-03-03 18:23:15] (step=0047977) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 9.387008413226374, LR: 0.0003 +[2026-03-03 18:23:22] (step=0047978) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.387204069653688, LR: 0.0003 +[2026-03-03 18:23:30] (step=0047979) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.387399726081002, LR: 0.0003 +[2026-03-03 18:23:38] (step=0047980) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.387595382508316, LR: 0.0003 +[2026-03-03 18:23:46] (step=0047981) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.38779103893563, LR: 0.0003 +[2026-03-03 18:23:54] (step=0047982) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.387986695362942, LR: 0.0003 +[2026-03-03 18:24:02] (step=0047983) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 9.388182351790256, LR: 0.0003 +[2026-03-03 18:24:09] (step=0047984) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 9.38837800821757, LR: 0.0003 +[2026-03-03 18:24:17] (step=0047985) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.388573664644884, LR: 0.0003 +[2026-03-03 18:24:25] (step=0047986) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 9.388769321072198, LR: 0.0003 +[2026-03-03 18:24:33] (step=0047987) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 9.38896497749951, LR: 0.0003 +[2026-03-03 18:24:41] (step=0047988) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.389160633926824, LR: 0.0003 +[2026-03-03 18:24:49] (step=0047989) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.389356290354138, LR: 0.0003 +[2026-03-03 18:24:57] (step=0047990) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.389551946781452, LR: 0.0003 +[2026-03-03 18:25:04] (step=0047991) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 9.389747603208765, LR: 0.0003 +[2026-03-03 18:25:12] (step=0047992) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.389943259636079, LR: 0.0003 +[2026-03-03 18:25:20] (step=0047993) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.390138916063393, LR: 0.0003 +[2026-03-03 18:25:28] (step=0047994) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.390334572490707, LR: 0.0003 +[2026-03-03 18:25:36] (step=0047995) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.39053022891802, LR: 0.0003 +[2026-03-03 18:25:44] (step=0047996) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.390725885345333, LR: 0.0003 +[2026-03-03 18:25:52] (step=0047997) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 9.390921541772647, LR: 0.0003 +[2026-03-03 18:25:59] (step=0047998) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.39111719819996, LR: 0.0003 +[2026-03-03 18:26:07] (step=0047999) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.391312854627275, LR: 0.0003 +[2026-03-03 18:26:15] (step=0048000) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 9.391508511054589, LR: 0.0003 +[2026-03-03 18:26:15] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0048000/ +[2026-03-03 18:26:23] (step=0048001) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 9.391704167481901, LR: 0.0003 +[2026-03-03 18:26:31] (step=0048002) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.391899823909215, LR: 0.0003 +[2026-03-03 18:26:39] (step=0048003) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.392095480336529, LR: 0.0003 +[2026-03-03 18:26:46] (step=0048004) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.392291136763843, LR: 0.0003 +[2026-03-03 18:26:54] (step=0048005) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.392486793191157, LR: 0.0003 +[2026-03-03 18:27:02] (step=0048006) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.39268244961847, LR: 0.0003 +[2026-03-03 18:27:10] (step=0048007) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.392878106045783, LR: 0.0003 +[2026-03-03 18:27:18] (step=0048008) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.393073762473097, LR: 0.0003 +[2026-03-03 18:27:26] (step=0048009) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.393269418900411, LR: 0.0003 +[2026-03-03 18:27:34] (step=0048010) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.393465075327725, LR: 0.0003 +[2026-03-03 18:27:41] (step=0048011) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.393660731755038, LR: 0.0003 +[2026-03-03 18:27:49] (step=0048012) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.393856388182352, LR: 0.0003 +[2026-03-03 18:27:57] (step=0048013) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 9.394052044609666, LR: 0.0003 +[2026-03-03 18:28:05] (step=0048014) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 9.39424770103698, LR: 0.0003 +[2026-03-03 18:28:13] (step=0048015) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.394443357464292, LR: 0.0003 +[2026-03-03 18:28:21] (step=0048016) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.394639013891606, LR: 0.0003 +[2026-03-03 18:28:29] (step=0048017) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.39483467031892, LR: 0.0003 +[2026-03-03 18:28:36] (step=0048018) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.395030326746234, LR: 0.0003 +[2026-03-03 18:28:44] (step=0048019) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.395225983173548, LR: 0.0003 +[2026-03-03 18:28:52] (step=0048020) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.39542163960086, LR: 0.0003 +[2026-03-03 18:29:00] (step=0048021) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 9.395617296028174, LR: 0.0003 +[2026-03-03 18:29:08] (step=0048022) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 9.395812952455488, LR: 0.0003 +[2026-03-03 18:29:16] (step=0048023) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 9.396008608882802, LR: 0.0003 +[2026-03-03 18:29:24] (step=0048024) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.396204265310116, LR: 0.0003 +[2026-03-03 18:29:32] (step=0048025) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.396399921737428, LR: 0.0003 +[2026-03-03 18:29:40] (step=0048026) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.396595578164742, LR: 0.0003 +[2026-03-03 18:29:47] (step=0048027) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.396791234592056, LR: 0.0003 +[2026-03-03 18:29:55] (step=0048028) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 9.39698689101937, LR: 0.0003 +[2026-03-03 18:30:03] (step=0048029) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.397182547446684, LR: 0.0003 +[2026-03-03 18:30:11] (step=0048030) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.397378203873997, LR: 0.0003 +[2026-03-03 18:30:19] (step=0048031) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.39757386030131, LR: 0.0003 +[2026-03-03 18:30:27] (step=0048032) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.397769516728625, LR: 0.0003 +[2026-03-03 18:30:34] (step=0048033) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 9.397965173155939, LR: 0.0003 +[2026-03-03 18:30:42] (step=0048034) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.398160829583253, LR: 0.0003 +[2026-03-03 18:30:50] (step=0048035) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.398356486010565, LR: 0.0003 +[2026-03-03 18:30:58] (step=0048036) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.398552142437879, LR: 0.0003 +[2026-03-03 18:31:06] (step=0048037) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.398747798865193, LR: 0.0003 +[2026-03-03 18:31:14] (step=0048038) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 9.398943455292507, LR: 0.0003 +[2026-03-03 18:31:22] (step=0048039) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.39913911171982, LR: 0.0003 +[2026-03-03 18:31:29] (step=0048040) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.399334768147133, LR: 0.0003 +[2026-03-03 18:31:37] (step=0048041) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.399530424574447, LR: 0.0003 +[2026-03-03 18:31:45] (step=0048042) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 9.399726081001761, LR: 0.0003 +[2026-03-03 18:31:53] (step=0048043) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 9.399921737429075, LR: 0.0003 +[2026-03-03 18:32:01] (step=0048044) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.400117393856387, LR: 0.0003 +[2026-03-03 18:32:09] (step=0048045) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.400313050283701, LR: 0.0003 +[2026-03-03 18:32:17] (step=0048046) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.400508706711015, LR: 0.0003 +[2026-03-03 18:32:24] (step=0048047) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.40070436313833, LR: 0.0003 +[2026-03-03 18:32:32] (step=0048048) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.400900019565643, LR: 0.0003 +[2026-03-03 18:32:40] (step=0048049) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.401095675992956, LR: 0.0003 +[2026-03-03 18:32:48] (step=0048050) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.40129133242027, LR: 0.0003 +[2026-03-03 18:32:56] (step=0048051) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.401486988847584, LR: 0.0003 +[2026-03-03 18:33:04] (step=0048052) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.401682645274898, LR: 0.0003 +[2026-03-03 18:33:12] (step=0048053) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.401878301702212, LR: 0.0003 +[2026-03-03 18:33:19] (step=0048054) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 9.402073958129524, LR: 0.0003 +[2026-03-03 18:33:27] (step=0048055) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.402269614556838, LR: 0.0003 +[2026-03-03 18:33:35] (step=0048056) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.402465270984152, LR: 0.0003 +[2026-03-03 18:33:43] (step=0048057) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.402660927411466, LR: 0.0003 +[2026-03-03 18:33:51] (step=0048058) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 9.40285658383878, LR: 0.0003 +[2026-03-03 18:33:59] (step=0048059) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.403052240266092, LR: 0.0003 +[2026-03-03 18:34:06] (step=0048060) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 9.403247896693406, LR: 0.0003 +[2026-03-03 18:34:14] (step=0048061) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.40344355312072, LR: 0.0003 +[2026-03-03 18:34:22] (step=0048062) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 9.403639209548034, LR: 0.0003 +[2026-03-03 18:34:30] (step=0048063) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.403834865975348, LR: 0.0003 +[2026-03-03 18:34:38] (step=0048064) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.40403052240266, LR: 0.0003 +[2026-03-03 18:34:46] (step=0048065) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.404226178829974, LR: 0.0003 +[2026-03-03 18:34:54] (step=0048066) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.404421835257288, LR: 0.0003 +[2026-03-03 18:35:01] (step=0048067) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 9.404617491684602, LR: 0.0003 +[2026-03-03 18:35:09] (step=0048068) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.404813148111915, LR: 0.0003 +[2026-03-03 18:35:17] (step=0048069) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 9.405008804539229, LR: 0.0003 +[2026-03-03 18:35:25] (step=0048070) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.405204460966543, LR: 0.0003 +[2026-03-03 18:35:33] (step=0048071) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.405400117393857, LR: 0.0003 +[2026-03-03 18:35:41] (step=0048072) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.40559577382117, LR: 0.0003 +[2026-03-03 18:35:49] (step=0048073) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.405791430248483, LR: 0.0003 +[2026-03-03 18:35:56] (step=0048074) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 9.405987086675797, LR: 0.0003 +[2026-03-03 18:36:04] (step=0048075) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.40618274310311, LR: 0.0003 +[2026-03-03 18:36:12] (step=0048076) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.406378399530425, LR: 0.0003 +[2026-03-03 18:36:20] (step=0048077) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.406574055957739, LR: 0.0003 +[2026-03-03 18:36:28] (step=0048078) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.406769712385051, LR: 0.0003 +[2026-03-03 18:36:36] (step=0048079) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 9.406965368812365, LR: 0.0003 +[2026-03-03 18:36:44] (step=0048080) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 9.40716102523968, LR: 0.0003 +[2026-03-03 18:36:51] (step=0048081) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.407356681666993, LR: 0.0003 +[2026-03-03 18:36:59] (step=0048082) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.407552338094307, LR: 0.0003 +[2026-03-03 18:37:07] (step=0048083) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.40774799452162, LR: 0.0003 +[2026-03-03 18:37:15] (step=0048084) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 9.407943650948933, LR: 0.0003 +[2026-03-03 18:37:23] (step=0048085) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.408139307376247, LR: 0.0003 +[2026-03-03 18:37:31] (step=0048086) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.408334963803561, LR: 0.0003 +[2026-03-03 18:37:39] (step=0048087) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.408530620230875, LR: 0.0003 +[2026-03-03 18:37:46] (step=0048088) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 9.408726276658188, LR: 0.0003 +[2026-03-03 18:37:54] (step=0048089) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.408921933085502, LR: 0.0003 +[2026-03-03 18:38:02] (step=0048090) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.409117589512816, LR: 0.0003 +[2026-03-03 18:38:10] (step=0048091) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.40931324594013, LR: 0.0003 +[2026-03-03 18:38:18] (step=0048092) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 9.409508902367444, LR: 0.0003 +[2026-03-03 18:38:26] (step=0048093) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.409704558794756, LR: 0.0003 +[2026-03-03 18:38:34] (step=0048094) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.40990021522207, LR: 0.0003 +[2026-03-03 18:38:41] (step=0048095) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.410095871649384, LR: 0.0003 +[2026-03-03 18:38:49] (step=0048096) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.410291528076698, LR: 0.0003 +[2026-03-03 18:38:57] (step=0048097) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 9.41048718450401, LR: 0.0003 +[2026-03-03 18:39:05] (step=0048098) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 9.410682840931324, LR: 0.0003 +[2026-03-03 18:39:13] (step=0048099) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.410878497358638, LR: 0.0003 +[2026-03-03 18:39:21] (step=0048100) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.411074153785952, LR: 0.0003 +[2026-03-03 18:39:29] (step=0048101) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.411269810213266, LR: 0.0003 +[2026-03-03 18:39:36] (step=0048102) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.411465466640578, LR: 0.0003 +[2026-03-03 18:39:44] (step=0048103) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.411661123067892, LR: 0.0003 +[2026-03-03 18:39:52] (step=0048104) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.411856779495206, LR: 0.0003 +[2026-03-03 18:40:00] (step=0048105) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.41205243592252, LR: 0.0003 +[2026-03-03 18:40:08] (step=0048106) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 9.412248092349834, LR: 0.0003 +[2026-03-03 18:40:16] (step=0048107) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.412443748777147, LR: 0.0003 +[2026-03-03 18:40:24] (step=0048108) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.41263940520446, LR: 0.0003 +[2026-03-03 18:40:31] (step=0048109) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.412835061631775, LR: 0.0003 +[2026-03-03 18:40:39] (step=0048110) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 9.413030718059089, LR: 0.0003 +[2026-03-03 18:40:47] (step=0048111) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 9.413226374486403, LR: 0.0003 +[2026-03-03 18:40:55] (step=0048112) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.413422030913715, LR: 0.0003 +[2026-03-03 18:41:03] (step=0048113) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.413617687341029, LR: 0.0003 +[2026-03-03 18:41:11] (step=0048114) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.413813343768343, LR: 0.0003 +[2026-03-03 18:41:18] (step=0048115) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.414009000195657, LR: 0.0003 +[2026-03-03 18:41:26] (step=0048116) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.414204656622971, LR: 0.0003 +[2026-03-03 18:41:34] (step=0048117) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 9.414400313050283, LR: 0.0003 +[2026-03-03 18:41:42] (step=0048118) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.414595969477597, LR: 0.0003 +[2026-03-03 18:41:50] (step=0048119) Train Loss: 0.4235, Train Steps/Sec: 0.13, Epoch: 9.414791625904911, LR: 0.0003 +[2026-03-03 18:41:58] (step=0048120) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.414987282332225, LR: 0.0003 +[2026-03-03 18:42:06] (step=0048121) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.415182938759537, LR: 0.0003 +[2026-03-03 18:42:14] (step=0048122) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.415378595186851, LR: 0.0003 +[2026-03-03 18:42:21] (step=0048123) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.415574251614165, LR: 0.0003 +[2026-03-03 18:42:29] (step=0048124) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.41576990804148, LR: 0.0003 +[2026-03-03 18:42:37] (step=0048125) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.415965564468793, LR: 0.0003 +[2026-03-03 18:42:45] (step=0048126) Train Loss: 0.4587, Train Steps/Sec: 0.12, Epoch: 9.416161220896106, LR: 0.0003 +[2026-03-03 18:42:53] (step=0048127) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 9.41635687732342, LR: 0.0003 +[2026-03-03 18:43:01] (step=0048128) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.416552533750734, LR: 0.0003 +[2026-03-03 18:43:09] (step=0048129) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 9.416748190178048, LR: 0.0003 +[2026-03-03 18:43:17] (step=0048130) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.416943846605362, LR: 0.0003 +[2026-03-03 18:43:24] (step=0048131) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.417139503032674, LR: 0.0003 +[2026-03-03 18:43:32] (step=0048132) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.417335159459988, LR: 0.0003 +[2026-03-03 18:43:40] (step=0048133) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.417530815887302, LR: 0.0003 +[2026-03-03 18:43:48] (step=0048134) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.417726472314616, LR: 0.0003 +[2026-03-03 18:43:56] (step=0048135) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.41792212874193, LR: 0.0003 +[2026-03-03 18:44:04] (step=0048136) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.418117785169242, LR: 0.0003 +[2026-03-03 18:44:11] (step=0048137) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.418313441596556, LR: 0.0003 +[2026-03-03 18:44:19] (step=0048138) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.41850909802387, LR: 0.0003 +[2026-03-03 18:44:27] (step=0048139) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 9.418704754451184, LR: 0.0003 +[2026-03-03 18:44:35] (step=0048140) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 9.418900410878498, LR: 0.0003 +[2026-03-03 18:44:43] (step=0048141) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.41909606730581, LR: 0.0003 +[2026-03-03 18:44:51] (step=0048142) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.419291723733124, LR: 0.0003 +[2026-03-03 18:44:59] (step=0048143) Train Loss: 0.4189, Train Steps/Sec: 0.13, Epoch: 9.419487380160438, LR: 0.0003 +[2026-03-03 18:45:06] (step=0048144) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.419683036587752, LR: 0.0003 +[2026-03-03 18:45:14] (step=0048145) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.419878693015066, LR: 0.0003 +[2026-03-03 18:45:22] (step=0048146) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 9.420074349442379, LR: 0.0003 +[2026-03-03 18:45:30] (step=0048147) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 9.420270005869693, LR: 0.0003 +[2026-03-03 18:45:38] (step=0048148) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.420465662297007, LR: 0.0003 +[2026-03-03 18:45:46] (step=0048149) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 9.42066131872432, LR: 0.0003 +[2026-03-03 18:45:54] (step=0048150) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.420856975151633, LR: 0.0003 +[2026-03-03 18:46:01] (step=0048151) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.421052631578947, LR: 0.0003 +[2026-03-03 18:46:09] (step=0048152) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.421248288006261, LR: 0.0003 +[2026-03-03 18:46:17] (step=0048153) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.421443944433575, LR: 0.0003 +[2026-03-03 18:46:25] (step=0048154) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 9.421639600860889, LR: 0.0003 +[2026-03-03 18:46:33] (step=0048155) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.421835257288201, LR: 0.0003 +[2026-03-03 18:46:41] (step=0048156) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.422030913715515, LR: 0.0003 +[2026-03-03 18:46:49] (step=0048157) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.42222657014283, LR: 0.0003 +[2026-03-03 18:46:56] (step=0048158) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.422422226570143, LR: 0.0003 +[2026-03-03 18:47:04] (step=0048159) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.422617882997457, LR: 0.0003 +[2026-03-03 18:47:12] (step=0048160) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.42281353942477, LR: 0.0003 +[2026-03-03 18:47:20] (step=0048161) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 9.423009195852083, LR: 0.0003 +[2026-03-03 18:47:28] (step=0048162) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.423204852279397, LR: 0.0003 +[2026-03-03 18:47:36] (step=0048163) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.423400508706711, LR: 0.0003 +[2026-03-03 18:47:44] (step=0048164) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 9.423596165134025, LR: 0.0003 +[2026-03-03 18:47:51] (step=0048165) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.423791821561338, LR: 0.0003 +[2026-03-03 18:47:59] (step=0048166) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.423987477988652, LR: 0.0003 +[2026-03-03 18:48:07] (step=0048167) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.424183134415966, LR: 0.0003 +[2026-03-03 18:48:15] (step=0048168) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.42437879084328, LR: 0.0003 +[2026-03-03 18:48:23] (step=0048169) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.424574447270594, LR: 0.0003 +[2026-03-03 18:48:31] (step=0048170) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 9.424770103697906, LR: 0.0003 +[2026-03-03 18:48:39] (step=0048171) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.42496576012522, LR: 0.0003 +[2026-03-03 18:48:46] (step=0048172) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 9.425161416552534, LR: 0.0003 +[2026-03-03 18:48:54] (step=0048173) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 9.425357072979848, LR: 0.0003 +[2026-03-03 18:49:02] (step=0048174) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.42555272940716, LR: 0.0003 +[2026-03-03 18:49:10] (step=0048175) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 9.425748385834474, LR: 0.0003 +[2026-03-03 18:49:18] (step=0048176) Train Loss: 0.4435, Train Steps/Sec: 0.12, Epoch: 9.425944042261788, LR: 0.0003 +[2026-03-03 18:49:26] (step=0048177) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.426139698689102, LR: 0.0003 +[2026-03-03 18:49:34] (step=0048178) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.426335355116416, LR: 0.0003 +[2026-03-03 18:49:42] (step=0048179) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.426531011543728, LR: 0.0003 +[2026-03-03 18:49:49] (step=0048180) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.426726667971042, LR: 0.0003 +[2026-03-03 18:49:57] (step=0048181) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 9.426922324398356, LR: 0.0003 +[2026-03-03 18:50:05] (step=0048182) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.42711798082567, LR: 0.0003 +[2026-03-03 18:50:13] (step=0048183) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.427313637252984, LR: 0.0003 +[2026-03-03 18:50:21] (step=0048184) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 9.427509293680297, LR: 0.0003 +[2026-03-03 18:50:29] (step=0048185) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.42770495010761, LR: 0.0003 +[2026-03-03 18:50:37] (step=0048186) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.427900606534925, LR: 0.0003 +[2026-03-03 18:50:44] (step=0048187) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.428096262962239, LR: 0.0003 +[2026-03-03 18:50:52] (step=0048188) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.428291919389553, LR: 0.0003 +[2026-03-03 18:51:00] (step=0048189) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.428487575816865, LR: 0.0003 +[2026-03-03 18:51:08] (step=0048190) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.428683232244179, LR: 0.0003 +[2026-03-03 18:51:16] (step=0048191) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.428878888671493, LR: 0.0003 +[2026-03-03 18:51:24] (step=0048192) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.429074545098807, LR: 0.0003 +[2026-03-03 18:51:31] (step=0048193) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.429270201526121, LR: 0.0003 +[2026-03-03 18:51:39] (step=0048194) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.429465857953433, LR: 0.0003 +[2026-03-03 18:51:47] (step=0048195) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.429661514380747, LR: 0.0003 +[2026-03-03 18:51:55] (step=0048196) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.429857170808061, LR: 0.0003 +[2026-03-03 18:52:03] (step=0048197) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.430052827235375, LR: 0.0003 +[2026-03-03 18:52:11] (step=0048198) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 9.43024848366269, LR: 0.0003 +[2026-03-03 18:52:19] (step=0048199) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.430444140090001, LR: 0.0003 +[2026-03-03 18:52:26] (step=0048200) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.430639796517315, LR: 0.0003 +[2026-03-03 18:52:34] (step=0048201) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.43083545294463, LR: 0.0003 +[2026-03-03 18:52:42] (step=0048202) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.431031109371943, LR: 0.0003 +[2026-03-03 18:52:50] (step=0048203) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.431226765799256, LR: 0.0003 +[2026-03-03 18:52:58] (step=0048204) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.43142242222657, LR: 0.0003 +[2026-03-03 18:53:06] (step=0048205) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.431618078653884, LR: 0.0003 +[2026-03-03 18:53:14] (step=0048206) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 9.431813735081198, LR: 0.0003 +[2026-03-03 18:53:21] (step=0048207) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 9.432009391508512, LR: 0.0003 +[2026-03-03 18:53:29] (step=0048208) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.432205047935824, LR: 0.0003 +[2026-03-03 18:53:37] (step=0048209) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.432400704363138, LR: 0.0003 +[2026-03-03 18:53:45] (step=0048210) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 9.432596360790452, LR: 0.0003 +[2026-03-03 18:53:53] (step=0048211) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.432792017217766, LR: 0.0003 +[2026-03-03 18:54:01] (step=0048212) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.43298767364508, LR: 0.0003 +[2026-03-03 18:54:09] (step=0048213) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.433183330072392, LR: 0.0003 +[2026-03-03 18:54:16] (step=0048214) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.433378986499706, LR: 0.0003 +[2026-03-03 18:54:24] (step=0048215) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 9.43357464292702, LR: 0.0003 +[2026-03-03 18:54:32] (step=0048216) Train Loss: 0.4275, Train Steps/Sec: 0.12, Epoch: 9.433770299354334, LR: 0.0003 +[2026-03-03 18:54:40] (step=0048217) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.433965955781648, LR: 0.0003 +[2026-03-03 18:54:48] (step=0048218) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.43416161220896, LR: 0.0003 +[2026-03-03 18:54:56] (step=0048219) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.434357268636274, LR: 0.0003 +[2026-03-03 18:55:04] (step=0048220) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.434552925063588, LR: 0.0003 +[2026-03-03 18:55:11] (step=0048221) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.434748581490902, LR: 0.0003 +[2026-03-03 18:55:19] (step=0048222) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 9.434944237918216, LR: 0.0003 +[2026-03-03 18:55:27] (step=0048223) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.435139894345529, LR: 0.0003 +[2026-03-03 18:55:35] (step=0048224) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.435335550772843, LR: 0.0003 +[2026-03-03 18:55:43] (step=0048225) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.435531207200157, LR: 0.0003 +[2026-03-03 18:55:51] (step=0048226) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.43572686362747, LR: 0.0003 +[2026-03-03 18:55:59] (step=0048227) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 9.435922520054783, LR: 0.0003 +[2026-03-03 18:56:06] (step=0048228) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.436118176482097, LR: 0.0003 +[2026-03-03 18:56:14] (step=0048229) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.436313832909411, LR: 0.0003 +[2026-03-03 18:56:22] (step=0048230) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.436509489336725, LR: 0.0003 +[2026-03-03 18:56:30] (step=0048231) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 9.436705145764039, LR: 0.0003 +[2026-03-03 18:56:38] (step=0048232) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.436900802191351, LR: 0.0003 +[2026-03-03 18:56:46] (step=0048233) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.437096458618665, LR: 0.0003 +[2026-03-03 18:56:54] (step=0048234) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.43729211504598, LR: 0.0003 +[2026-03-03 18:57:01] (step=0048235) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 9.437487771473293, LR: 0.0003 +[2026-03-03 18:57:09] (step=0048236) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.437683427900607, LR: 0.0003 +[2026-03-03 18:57:17] (step=0048237) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 9.43787908432792, LR: 0.0003 +[2026-03-03 18:57:25] (step=0048238) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.438074740755233, LR: 0.0003 +[2026-03-03 18:57:33] (step=0048239) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 9.438270397182547, LR: 0.0003 +[2026-03-03 18:57:41] (step=0048240) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 9.438466053609861, LR: 0.0003 +[2026-03-03 18:57:49] (step=0048241) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.438661710037175, LR: 0.0003 +[2026-03-03 18:57:56] (step=0048242) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 9.438857366464488, LR: 0.0003 +[2026-03-03 18:58:04] (step=0048243) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.439053022891802, LR: 0.0003 +[2026-03-03 18:58:12] (step=0048244) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.439248679319116, LR: 0.0003 +[2026-03-03 18:58:20] (step=0048245) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.43944433574643, LR: 0.0003 +[2026-03-03 18:58:28] (step=0048246) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.439639992173744, LR: 0.0003 +[2026-03-03 18:58:36] (step=0048247) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 9.439835648601056, LR: 0.0003 +[2026-03-03 18:58:44] (step=0048248) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.44003130502837, LR: 0.0003 +[2026-03-03 18:58:51] (step=0048249) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 9.440226961455684, LR: 0.0003 +[2026-03-03 18:58:59] (step=0048250) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.440422617882998, LR: 0.0003 +[2026-03-03 18:59:07] (step=0048251) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.440618274310312, LR: 0.0003 +[2026-03-03 18:59:15] (step=0048252) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.440813930737624, LR: 0.0003 +[2026-03-03 18:59:23] (step=0048253) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.441009587164938, LR: 0.0003 +[2026-03-03 18:59:31] (step=0048254) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.441205243592252, LR: 0.0003 +[2026-03-03 18:59:39] (step=0048255) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.441400900019566, LR: 0.0003 +[2026-03-03 18:59:46] (step=0048256) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.441596556446878, LR: 0.0003 +[2026-03-03 18:59:54] (step=0048257) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.441792212874192, LR: 0.0003 +[2026-03-03 19:00:02] (step=0048258) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.441987869301506, LR: 0.0003 +[2026-03-03 19:00:10] (step=0048259) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.44218352572882, LR: 0.0003 +[2026-03-03 19:00:18] (step=0048260) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.442379182156134, LR: 0.0003 +[2026-03-03 19:00:26] (step=0048261) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.442574838583447, LR: 0.0003 +[2026-03-03 19:00:34] (step=0048262) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.44277049501076, LR: 0.0003 +[2026-03-03 19:00:41] (step=0048263) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.442966151438075, LR: 0.0003 +[2026-03-03 19:00:49] (step=0048264) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.443161807865389, LR: 0.0003 +[2026-03-03 19:00:57] (step=0048265) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 9.443357464292703, LR: 0.0003 +[2026-03-03 19:01:05] (step=0048266) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.443553120720015, LR: 0.0003 +[2026-03-03 19:01:13] (step=0048267) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.443748777147329, LR: 0.0003 +[2026-03-03 19:01:21] (step=0048268) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.443944433574643, LR: 0.0003 +[2026-03-03 19:01:29] (step=0048269) Train Loss: 0.4357, Train Steps/Sec: 0.12, Epoch: 9.444140090001957, LR: 0.0003 +[2026-03-03 19:01:37] (step=0048270) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.444335746429271, LR: 0.0003 +[2026-03-03 19:01:44] (step=0048271) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.444531402856583, LR: 0.0003 +[2026-03-03 19:01:52] (step=0048272) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.444727059283897, LR: 0.0003 +[2026-03-03 19:02:00] (step=0048273) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.444922715711211, LR: 0.0003 +[2026-03-03 19:02:08] (step=0048274) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.445118372138525, LR: 0.0003 +[2026-03-03 19:02:16] (step=0048275) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 9.44531402856584, LR: 0.0003 +[2026-03-03 19:02:24] (step=0048276) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 9.445509684993151, LR: 0.0003 +[2026-03-03 19:02:32] (step=0048277) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.445705341420465, LR: 0.0003 +[2026-03-03 19:02:40] (step=0048278) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.44590099784778, LR: 0.0003 +[2026-03-03 19:02:47] (step=0048279) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.446096654275093, LR: 0.0003 +[2026-03-03 19:02:55] (step=0048280) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 9.446292310702406, LR: 0.0003 +[2026-03-03 19:03:03] (step=0048281) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.44648796712972, LR: 0.0003 +[2026-03-03 19:03:11] (step=0048282) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.446683623557034, LR: 0.0003 +[2026-03-03 19:03:19] (step=0048283) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 9.446879279984348, LR: 0.0003 +[2026-03-03 19:03:27] (step=0048284) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.447074936411662, LR: 0.0003 +[2026-03-03 19:03:34] (step=0048285) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.447270592838974, LR: 0.0003 +[2026-03-03 19:03:42] (step=0048286) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.447466249266288, LR: 0.0003 +[2026-03-03 19:03:50] (step=0048287) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 9.447661905693602, LR: 0.0003 +[2026-03-03 19:03:58] (step=0048288) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 9.447857562120916, LR: 0.0003 +[2026-03-03 19:04:06] (step=0048289) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.44805321854823, LR: 0.0003 +[2026-03-03 19:04:14] (step=0048290) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 9.448248874975542, LR: 0.0003 +[2026-03-03 19:04:22] (step=0048291) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.448444531402856, LR: 0.0003 +[2026-03-03 19:04:29] (step=0048292) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.44864018783017, LR: 0.0003 +[2026-03-03 19:04:37] (step=0048293) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 9.448835844257484, LR: 0.0003 +[2026-03-03 19:04:45] (step=0048294) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.449031500684798, LR: 0.0003 +[2026-03-03 19:04:53] (step=0048295) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 9.44922715711211, LR: 0.0003 +[2026-03-03 19:05:01] (step=0048296) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.449422813539424, LR: 0.0003 +[2026-03-03 19:05:09] (step=0048297) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.449618469966738, LR: 0.0003 +[2026-03-03 19:05:17] (step=0048298) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 9.449814126394052, LR: 0.0003 +[2026-03-03 19:05:24] (step=0048299) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 9.450009782821366, LR: 0.0003 +[2026-03-03 19:05:32] (step=0048300) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.450205439248679, LR: 0.0003 +[2026-03-03 19:05:40] (step=0048301) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.450401095675993, LR: 0.0003 +[2026-03-03 19:05:48] (step=0048302) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.450596752103307, LR: 0.0003 +[2026-03-03 19:05:56] (step=0048303) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 9.45079240853062, LR: 0.0003 +[2026-03-03 19:06:04] (step=0048304) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.450988064957935, LR: 0.0003 +[2026-03-03 19:06:11] (step=0048305) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.451183721385247, LR: 0.0003 +[2026-03-03 19:06:19] (step=0048306) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.451379377812561, LR: 0.0003 +[2026-03-03 19:06:27] (step=0048307) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.451575034239875, LR: 0.0003 +[2026-03-03 19:06:35] (step=0048308) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.451770690667189, LR: 0.0003 +[2026-03-03 19:06:43] (step=0048309) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.451966347094501, LR: 0.0003 +[2026-03-03 19:06:51] (step=0048310) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.452162003521815, LR: 0.0003 +[2026-03-03 19:06:59] (step=0048311) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 9.45235765994913, LR: 0.0003 +[2026-03-03 19:07:06] (step=0048312) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.452553316376443, LR: 0.0003 +[2026-03-03 19:07:14] (step=0048313) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.452748972803757, LR: 0.0003 +[2026-03-03 19:07:22] (step=0048314) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.45294462923107, LR: 0.0003 +[2026-03-03 19:07:30] (step=0048315) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.453140285658383, LR: 0.0003 +[2026-03-03 19:07:38] (step=0048316) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.453335942085697, LR: 0.0003 +[2026-03-03 19:07:46] (step=0048317) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.453531598513012, LR: 0.0003 +[2026-03-03 19:07:54] (step=0048318) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.453727254940326, LR: 0.0003 +[2026-03-03 19:08:02] (step=0048319) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.453922911367638, LR: 0.0003 +[2026-03-03 19:08:09] (step=0048320) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.454118567794952, LR: 0.0003 +[2026-03-03 19:08:17] (step=0048321) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.454314224222266, LR: 0.0003 +[2026-03-03 19:08:25] (step=0048322) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.45450988064958, LR: 0.0003 +[2026-03-03 19:08:33] (step=0048323) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.454705537076894, LR: 0.0003 +[2026-03-03 19:08:41] (step=0048324) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.454901193504206, LR: 0.0003 +[2026-03-03 19:08:49] (step=0048325) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.45509684993152, LR: 0.0003 +[2026-03-03 19:08:57] (step=0048326) Train Loss: 0.4717, Train Steps/Sec: 0.13, Epoch: 9.455292506358834, LR: 0.0003 +[2026-03-03 19:09:05] (step=0048327) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.455488162786148, LR: 0.0003 +[2026-03-03 19:09:12] (step=0048328) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.455683819213462, LR: 0.0003 +[2026-03-03 19:09:20] (step=0048329) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.455879475640774, LR: 0.0003 +[2026-03-03 19:09:28] (step=0048330) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 9.456075132068088, LR: 0.0003 +[2026-03-03 19:09:36] (step=0048331) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.456270788495402, LR: 0.0003 +[2026-03-03 19:09:44] (step=0048332) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.456466444922716, LR: 0.0003 +[2026-03-03 19:09:52] (step=0048333) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 9.456662101350028, LR: 0.0003 +[2026-03-03 19:10:00] (step=0048334) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.456857757777342, LR: 0.0003 +[2026-03-03 19:10:07] (step=0048335) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.457053414204657, LR: 0.0003 +[2026-03-03 19:10:15] (step=0048336) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.45724907063197, LR: 0.0003 +[2026-03-03 19:10:23] (step=0048337) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.457444727059285, LR: 0.0003 +[2026-03-03 19:10:31] (step=0048338) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.457640383486597, LR: 0.0003 +[2026-03-03 19:10:39] (step=0048339) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.45783603991391, LR: 0.0003 +[2026-03-03 19:10:47] (step=0048340) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.458031696341225, LR: 0.0003 +[2026-03-03 19:10:54] (step=0048341) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.458227352768539, LR: 0.0003 +[2026-03-03 19:11:02] (step=0048342) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.458423009195853, LR: 0.0003 +[2026-03-03 19:11:10] (step=0048343) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.458618665623165, LR: 0.0003 +[2026-03-03 19:11:18] (step=0048344) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.458814322050479, LR: 0.0003 +[2026-03-03 19:11:26] (step=0048345) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 9.459009978477793, LR: 0.0003 +[2026-03-03 19:11:34] (step=0048346) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.459205634905107, LR: 0.0003 +[2026-03-03 19:11:42] (step=0048347) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 9.459401291332421, LR: 0.0003 +[2026-03-03 19:11:49] (step=0048348) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.459596947759733, LR: 0.0003 +[2026-03-03 19:11:57] (step=0048349) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.459792604187047, LR: 0.0003 +[2026-03-03 19:12:05] (step=0048350) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 9.459988260614361, LR: 0.0003 +[2026-03-03 19:12:13] (step=0048351) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.460183917041675, LR: 0.0003 +[2026-03-03 19:12:21] (step=0048352) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.46037957346899, LR: 0.0003 +[2026-03-03 19:12:29] (step=0048353) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 9.460575229896302, LR: 0.0003 +[2026-03-03 19:12:37] (step=0048354) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 9.460770886323616, LR: 0.0003 +[2026-03-03 19:12:44] (step=0048355) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.46096654275093, LR: 0.0003 +[2026-03-03 19:12:52] (step=0048356) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 9.461162199178244, LR: 0.0003 +[2026-03-03 19:13:00] (step=0048357) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 9.461357855605556, LR: 0.0003 +[2026-03-03 19:13:08] (step=0048358) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.46155351203287, LR: 0.0003 +[2026-03-03 19:13:16] (step=0048359) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.461749168460184, LR: 0.0003 +[2026-03-03 19:13:24] (step=0048360) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.461944824887498, LR: 0.0003 +[2026-03-03 19:13:32] (step=0048361) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 9.462140481314812, LR: 0.0003 +[2026-03-03 19:13:39] (step=0048362) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.462336137742124, LR: 0.0003 +[2026-03-03 19:13:47] (step=0048363) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.462531794169438, LR: 0.0003 +[2026-03-03 19:13:55] (step=0048364) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 9.462727450596752, LR: 0.0003 +[2026-03-03 19:14:03] (step=0048365) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.462923107024066, LR: 0.0003 +[2026-03-03 19:14:11] (step=0048366) Train Loss: 0.4359, Train Steps/Sec: 0.12, Epoch: 9.46311876345138, LR: 0.0003 +[2026-03-03 19:14:19] (step=0048367) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.463314419878692, LR: 0.0003 +[2026-03-03 19:14:27] (step=0048368) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.463510076306006, LR: 0.0003 +[2026-03-03 19:14:35] (step=0048369) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 9.46370573273332, LR: 0.0003 +[2026-03-03 19:14:42] (step=0048370) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 9.463901389160634, LR: 0.0003 +[2026-03-03 19:14:51] (step=0048371) Train Loss: 0.4421, Train Steps/Sec: 0.12, Epoch: 9.464097045587948, LR: 0.0003 +[2026-03-03 19:14:58] (step=0048372) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.46429270201526, LR: 0.0003 +[2026-03-03 19:15:06] (step=0048373) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.464488358442575, LR: 0.0003 +[2026-03-03 19:15:14] (step=0048374) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.464684014869889, LR: 0.0003 +[2026-03-03 19:15:22] (step=0048375) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 9.464879671297203, LR: 0.0003 +[2026-03-03 19:15:30] (step=0048376) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.465075327724517, LR: 0.0003 +[2026-03-03 19:15:38] (step=0048377) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 9.465270984151829, LR: 0.0003 +[2026-03-03 19:15:45] (step=0048378) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.465466640579143, LR: 0.0003 +[2026-03-03 19:15:53] (step=0048379) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.465662297006457, LR: 0.0003 +[2026-03-03 19:16:01] (step=0048380) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.46585795343377, LR: 0.0003 +[2026-03-03 19:16:09] (step=0048381) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 9.466053609861085, LR: 0.0003 +[2026-03-03 19:16:17] (step=0048382) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.466249266288397, LR: 0.0003 +[2026-03-03 19:16:25] (step=0048383) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 9.466444922715711, LR: 0.0003 +[2026-03-03 19:16:33] (step=0048384) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.466640579143025, LR: 0.0003 +[2026-03-03 19:16:41] (step=0048385) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.466836235570339, LR: 0.0003 +[2026-03-03 19:16:48] (step=0048386) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 9.467031891997651, LR: 0.0003 +[2026-03-03 19:16:56] (step=0048387) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.467227548424965, LR: 0.0003 +[2026-03-03 19:17:04] (step=0048388) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.46742320485228, LR: 0.0003 +[2026-03-03 19:17:12] (step=0048389) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 9.467618861279593, LR: 0.0003 +[2026-03-03 19:17:20] (step=0048390) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.467814517706907, LR: 0.0003 +[2026-03-03 19:17:28] (step=0048391) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.46801017413422, LR: 0.0003 +[2026-03-03 19:17:36] (step=0048392) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.468205830561534, LR: 0.0003 +[2026-03-03 19:17:43] (step=0048393) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.468401486988848, LR: 0.0003 +[2026-03-03 19:17:51] (step=0048394) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 9.468597143416162, LR: 0.0003 +[2026-03-03 19:17:59] (step=0048395) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 9.468792799843476, LR: 0.0003 +[2026-03-03 19:18:07] (step=0048396) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.468988456270788, LR: 0.0003 +[2026-03-03 19:18:15] (step=0048397) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.469184112698102, LR: 0.0003 +[2026-03-03 19:18:23] (step=0048398) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.469379769125416, LR: 0.0003 +[2026-03-03 19:18:31] (step=0048399) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.46957542555273, LR: 0.0003 +[2026-03-03 19:18:38] (step=0048400) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.469771081980044, LR: 0.0003 +[2026-03-03 19:18:46] (step=0048401) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.469966738407356, LR: 0.0003 +[2026-03-03 19:18:54] (step=0048402) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 9.47016239483467, LR: 0.0003 +[2026-03-03 19:19:02] (step=0048403) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 9.470358051261984, LR: 0.0003 +[2026-03-03 19:19:10] (step=0048404) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.470553707689298, LR: 0.0003 +[2026-03-03 19:19:18] (step=0048405) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.470749364116612, LR: 0.0003 +[2026-03-03 19:19:26] (step=0048406) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.470945020543924, LR: 0.0003 +[2026-03-03 19:19:33] (step=0048407) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.471140676971238, LR: 0.0003 +[2026-03-03 19:19:41] (step=0048408) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.471336333398552, LR: 0.0003 +[2026-03-03 19:19:49] (step=0048409) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.471531989825866, LR: 0.0003 +[2026-03-03 19:19:57] (step=0048410) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.471727646253179, LR: 0.0003 +[2026-03-03 19:20:05] (step=0048411) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.471923302680493, LR: 0.0003 +[2026-03-03 19:20:13] (step=0048412) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.472118959107807, LR: 0.0003 +[2026-03-03 19:20:21] (step=0048413) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.47231461553512, LR: 0.0003 +[2026-03-03 19:20:28] (step=0048414) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.472510271962435, LR: 0.0003 +[2026-03-03 19:20:36] (step=0048415) Train Loss: 0.4430, Train Steps/Sec: 0.12, Epoch: 9.472705928389747, LR: 0.0003 +[2026-03-03 19:20:44] (step=0048416) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.47290158481706, LR: 0.0003 +[2026-03-03 19:20:52] (step=0048417) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.473097241244375, LR: 0.0003 +[2026-03-03 19:21:00] (step=0048418) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 9.473292897671689, LR: 0.0003 +[2026-03-03 19:21:08] (step=0048419) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.473488554099003, LR: 0.0003 +[2026-03-03 19:21:16] (step=0048420) Train Loss: 0.4532, Train Steps/Sec: 0.12, Epoch: 9.473684210526315, LR: 0.0003 +[2026-03-03 19:21:24] (step=0048421) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.473879866953629, LR: 0.0003 +[2026-03-03 19:21:32] (step=0048422) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.474075523380943, LR: 0.0003 +[2026-03-03 19:21:39] (step=0048423) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.474271179808257, LR: 0.0003 +[2026-03-03 19:21:47] (step=0048424) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.474466836235571, LR: 0.0003 +[2026-03-03 19:21:55] (step=0048425) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.474662492662883, LR: 0.0003 +[2026-03-03 19:22:03] (step=0048426) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 9.474858149090197, LR: 0.0003 +[2026-03-03 19:22:11] (step=0048427) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.475053805517511, LR: 0.0003 +[2026-03-03 19:22:19] (step=0048428) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.475249461944825, LR: 0.0003 +[2026-03-03 19:22:27] (step=0048429) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 9.47544511837214, LR: 0.0003 +[2026-03-03 19:22:34] (step=0048430) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.475640774799452, LR: 0.0003 +[2026-03-03 19:22:42] (step=0048431) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.475836431226766, LR: 0.0003 +[2026-03-03 19:22:50] (step=0048432) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.47603208765408, LR: 0.0003 +[2026-03-03 19:22:58] (step=0048433) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.476227744081394, LR: 0.0003 +[2026-03-03 19:23:06] (step=0048434) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.476423400508708, LR: 0.0003 +[2026-03-03 19:23:14] (step=0048435) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.47661905693602, LR: 0.0003 +[2026-03-03 19:23:22] (step=0048436) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.476814713363334, LR: 0.0003 +[2026-03-03 19:23:29] (step=0048437) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.477010369790648, LR: 0.0003 +[2026-03-03 19:23:37] (step=0048438) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 9.477206026217962, LR: 0.0003 +[2026-03-03 19:23:45] (step=0048439) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.477401682645274, LR: 0.0003 +[2026-03-03 19:23:53] (step=0048440) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 9.477597339072588, LR: 0.0003 +[2026-03-03 19:24:01] (step=0048441) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.477792995499902, LR: 0.0003 +[2026-03-03 19:24:09] (step=0048442) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.477988651927216, LR: 0.0003 +[2026-03-03 19:24:16] (step=0048443) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.47818430835453, LR: 0.0003 +[2026-03-03 19:24:24] (step=0048444) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.478379964781842, LR: 0.0003 +[2026-03-03 19:24:32] (step=0048445) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.478575621209156, LR: 0.0003 +[2026-03-03 19:24:40] (step=0048446) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.47877127763647, LR: 0.0003 +[2026-03-03 19:24:48] (step=0048447) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 9.478966934063784, LR: 0.0003 +[2026-03-03 19:24:56] (step=0048448) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.479162590491098, LR: 0.0003 +[2026-03-03 19:25:04] (step=0048449) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.47935824691841, LR: 0.0003 +[2026-03-03 19:25:11] (step=0048450) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.479553903345725, LR: 0.0003 +[2026-03-03 19:25:19] (step=0048451) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.479749559773039, LR: 0.0003 +[2026-03-03 19:25:27] (step=0048452) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.479945216200353, LR: 0.0003 +[2026-03-03 19:25:35] (step=0048453) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 9.480140872627667, LR: 0.0003 +[2026-03-03 19:25:43] (step=0048454) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.480336529054979, LR: 0.0003 +[2026-03-03 19:25:51] (step=0048455) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.480532185482293, LR: 0.0003 +[2026-03-03 19:25:59] (step=0048456) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.480727841909607, LR: 0.0003 +[2026-03-03 19:26:06] (step=0048457) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 9.48092349833692, LR: 0.0003 +[2026-03-03 19:26:14] (step=0048458) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.481119154764235, LR: 0.0003 +[2026-03-03 19:26:22] (step=0048459) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.481314811191547, LR: 0.0003 +[2026-03-03 19:26:30] (step=0048460) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 9.481510467618861, LR: 0.0003 +[2026-03-03 19:26:38] (step=0048461) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.481706124046175, LR: 0.0003 +[2026-03-03 19:26:46] (step=0048462) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 9.481901780473489, LR: 0.0003 +[2026-03-03 19:26:54] (step=0048463) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 9.482097436900801, LR: 0.0003 +[2026-03-03 19:27:02] (step=0048464) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.482293093328115, LR: 0.0003 +[2026-03-03 19:27:09] (step=0048465) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 9.48248874975543, LR: 0.0003 +[2026-03-03 19:27:17] (step=0048466) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 9.482684406182743, LR: 0.0003 +[2026-03-03 19:27:25] (step=0048467) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.482880062610057, LR: 0.0003 +[2026-03-03 19:27:33] (step=0048468) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.48307571903737, LR: 0.0003 +[2026-03-03 19:27:41] (step=0048469) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.483271375464684, LR: 0.0003 +[2026-03-03 19:27:49] (step=0048470) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 9.483467031891998, LR: 0.0003 +[2026-03-03 19:27:57] (step=0048471) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.483662688319312, LR: 0.0003 +[2026-03-03 19:28:05] (step=0048472) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 9.483858344746626, LR: 0.0003 +[2026-03-03 19:28:12] (step=0048473) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.484054001173938, LR: 0.0003 +[2026-03-03 19:28:20] (step=0048474) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.484249657601252, LR: 0.0003 +[2026-03-03 19:28:28] (step=0048475) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.484445314028566, LR: 0.0003 +[2026-03-03 19:28:36] (step=0048476) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.48464097045588, LR: 0.0003 +[2026-03-03 19:28:44] (step=0048477) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.484836626883194, LR: 0.0003 +[2026-03-03 19:28:52] (step=0048478) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.485032283310506, LR: 0.0003 +[2026-03-03 19:28:59] (step=0048479) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.48522793973782, LR: 0.0003 +[2026-03-03 19:29:07] (step=0048480) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 9.485423596165134, LR: 0.0003 +[2026-03-03 19:29:15] (step=0048481) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 9.485619252592448, LR: 0.0003 +[2026-03-03 19:29:23] (step=0048482) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.485814909019762, LR: 0.0003 +[2026-03-03 19:29:31] (step=0048483) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 9.486010565447074, LR: 0.0003 +[2026-03-03 19:29:39] (step=0048484) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 9.486206221874388, LR: 0.0003 +[2026-03-03 19:29:47] (step=0048485) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.486401878301702, LR: 0.0003 +[2026-03-03 19:29:55] (step=0048486) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.486597534729016, LR: 0.0003 +[2026-03-03 19:30:02] (step=0048487) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.48679319115633, LR: 0.0003 +[2026-03-03 19:30:10] (step=0048488) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 9.486988847583643, LR: 0.0003 +[2026-03-03 19:30:18] (step=0048489) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.487184504010957, LR: 0.0003 +[2026-03-03 19:30:26] (step=0048490) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.48738016043827, LR: 0.0003 +[2026-03-03 19:30:34] (step=0048491) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.487575816865585, LR: 0.0003 +[2026-03-03 19:30:42] (step=0048492) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.487771473292897, LR: 0.0003 +[2026-03-03 19:30:49] (step=0048493) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.48796712972021, LR: 0.0003 +[2026-03-03 19:30:57] (step=0048494) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 9.488162786147525, LR: 0.0003 +[2026-03-03 19:31:05] (step=0048495) Train Loss: 0.4242, Train Steps/Sec: 0.13, Epoch: 9.488358442574839, LR: 0.0003 +[2026-03-03 19:31:13] (step=0048496) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.488554099002153, LR: 0.0003 +[2026-03-03 19:31:21] (step=0048497) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 9.488749755429465, LR: 0.0003 +[2026-03-03 19:31:29] (step=0048498) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.488945411856779, LR: 0.0003 +[2026-03-03 19:31:37] (step=0048499) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.489141068284093, LR: 0.0003 +[2026-03-03 19:31:44] (step=0048500) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.489336724711407, LR: 0.0003 +[2026-03-03 19:31:45] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0048500/ +[2026-03-03 19:31:52] (step=0048501) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.489532381138721, LR: 0.0003 +[2026-03-03 19:32:00] (step=0048502) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.489728037566033, LR: 0.0003 +[2026-03-03 19:32:08] (step=0048503) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 9.489923693993347, LR: 0.0003 +[2026-03-03 19:32:16] (step=0048504) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.490119350420661, LR: 0.0003 +[2026-03-03 19:32:24] (step=0048505) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.490315006847975, LR: 0.0003 +[2026-03-03 19:32:32] (step=0048506) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 9.49051066327529, LR: 0.0003 +[2026-03-03 19:32:39] (step=0048507) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 9.490706319702602, LR: 0.0003 +[2026-03-03 19:32:47] (step=0048508) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.490901976129916, LR: 0.0003 +[2026-03-03 19:32:55] (step=0048509) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.49109763255723, LR: 0.0003 +[2026-03-03 19:33:03] (step=0048510) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.491293288984544, LR: 0.0003 +[2026-03-03 19:33:11] (step=0048511) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.491488945411858, LR: 0.0003 +[2026-03-03 19:33:19] (step=0048512) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.49168460183917, LR: 0.0003 +[2026-03-03 19:33:27] (step=0048513) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.491880258266484, LR: 0.0003 +[2026-03-03 19:33:35] (step=0048514) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 9.492075914693798, LR: 0.0003 +[2026-03-03 19:33:42] (step=0048515) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.492271571121112, LR: 0.0003 +[2026-03-03 19:33:50] (step=0048516) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.492467227548424, LR: 0.0003 +[2026-03-03 19:33:58] (step=0048517) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.492662883975738, LR: 0.0003 +[2026-03-03 19:34:06] (step=0048518) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 9.492858540403052, LR: 0.0003 +[2026-03-03 19:34:14] (step=0048519) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.493054196830366, LR: 0.0003 +[2026-03-03 19:34:22] (step=0048520) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 9.49324985325768, LR: 0.0003 +[2026-03-03 19:34:30] (step=0048521) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.493445509684992, LR: 0.0003 +[2026-03-03 19:34:38] (step=0048522) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.493641166112306, LR: 0.0003 +[2026-03-03 19:34:45] (step=0048523) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 9.49383682253962, LR: 0.0003 +[2026-03-03 19:34:53] (step=0048524) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.494032478966934, LR: 0.0003 +[2026-03-03 19:35:01] (step=0048525) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 9.494228135394248, LR: 0.0003 +[2026-03-03 19:35:09] (step=0048526) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.49442379182156, LR: 0.0003 +[2026-03-03 19:35:17] (step=0048527) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.494619448248875, LR: 0.0003 +[2026-03-03 19:35:25] (step=0048528) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.494815104676189, LR: 0.0003 +[2026-03-03 19:35:33] (step=0048529) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.495010761103503, LR: 0.0003 +[2026-03-03 19:35:40] (step=0048530) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 9.495206417530817, LR: 0.0003 +[2026-03-03 19:35:48] (step=0048531) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 9.495402073958129, LR: 0.0003 +[2026-03-03 19:35:56] (step=0048532) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.495597730385443, LR: 0.0003 +[2026-03-03 19:36:04] (step=0048533) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 9.495793386812757, LR: 0.0003 +[2026-03-03 19:36:12] (step=0048534) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.49598904324007, LR: 0.0003 +[2026-03-03 19:36:20] (step=0048535) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.496184699667385, LR: 0.0003 +[2026-03-03 19:36:28] (step=0048536) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.496380356094697, LR: 0.0003 +[2026-03-03 19:36:35] (step=0048537) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.496576012522011, LR: 0.0003 +[2026-03-03 19:36:43] (step=0048538) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.496771668949325, LR: 0.0003 +[2026-03-03 19:36:51] (step=0048539) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.49696732537664, LR: 0.0003 +[2026-03-03 19:36:59] (step=0048540) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.497162981803953, LR: 0.0003 +[2026-03-03 19:37:07] (step=0048541) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.497358638231265, LR: 0.0003 +[2026-03-03 19:37:15] (step=0048542) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.49755429465858, LR: 0.0003 +[2026-03-03 19:37:23] (step=0048543) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.497749951085893, LR: 0.0003 +[2026-03-03 19:37:30] (step=0048544) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.497945607513207, LR: 0.0003 +[2026-03-03 19:37:38] (step=0048545) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.49814126394052, LR: 0.0003 +[2026-03-03 19:37:46] (step=0048546) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.498336920367834, LR: 0.0003 +[2026-03-03 19:37:54] (step=0048547) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.498532576795148, LR: 0.0003 +[2026-03-03 19:38:02] (step=0048548) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.498728233222462, LR: 0.0003 +[2026-03-03 19:38:10] (step=0048549) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.498923889649776, LR: 0.0003 +[2026-03-03 19:38:18] (step=0048550) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.499119546077088, LR: 0.0003 +[2026-03-03 19:38:25] (step=0048551) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.499315202504402, LR: 0.0003 +[2026-03-03 19:38:33] (step=0048552) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.499510858931716, LR: 0.0003 +[2026-03-03 19:38:41] (step=0048553) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 9.49970651535903, LR: 0.0003 +[2026-03-03 19:38:49] (step=0048554) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.499902171786344, LR: 0.0003 +[2026-03-03 19:38:57] (step=0048555) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 9.500097828213656, LR: 0.0003 +[2026-03-03 19:39:05] (step=0048556) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 9.50029348464097, LR: 0.0003 +[2026-03-03 19:39:13] (step=0048557) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.500489141068284, LR: 0.0003 +[2026-03-03 19:39:20] (step=0048558) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 9.500684797495598, LR: 0.0003 +[2026-03-03 19:39:28] (step=0048559) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.500880453922912, LR: 0.0003 +[2026-03-03 19:39:36] (step=0048560) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 9.501076110350224, LR: 0.0003 +[2026-03-03 19:39:44] (step=0048561) Train Loss: 0.4369, Train Steps/Sec: 0.12, Epoch: 9.501271766777538, LR: 0.0003 +[2026-03-03 19:39:52] (step=0048562) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.501467423204852, LR: 0.0003 +[2026-03-03 19:40:00] (step=0048563) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.501663079632166, LR: 0.0003 +[2026-03-03 19:40:08] (step=0048564) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.50185873605948, LR: 0.0003 +[2026-03-03 19:40:16] (step=0048565) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.502054392486793, LR: 0.0003 +[2026-03-03 19:40:23] (step=0048566) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 9.502250048914107, LR: 0.0003 +[2026-03-03 19:40:31] (step=0048567) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.50244570534142, LR: 0.0003 +[2026-03-03 19:40:39] (step=0048568) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.502641361768735, LR: 0.0003 +[2026-03-03 19:40:47] (step=0048569) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.502837018196047, LR: 0.0003 +[2026-03-03 19:40:55] (step=0048570) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.50303267462336, LR: 0.0003 +[2026-03-03 19:41:03] (step=0048571) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.503228331050675, LR: 0.0003 +[2026-03-03 19:41:11] (step=0048572) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.503423987477989, LR: 0.0003 +[2026-03-03 19:41:19] (step=0048573) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.503619643905303, LR: 0.0003 +[2026-03-03 19:41:26] (step=0048574) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.503815300332615, LR: 0.0003 +[2026-03-03 19:41:34] (step=0048575) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.50401095675993, LR: 0.0003 +[2026-03-03 19:41:42] (step=0048576) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.504206613187243, LR: 0.0003 +[2026-03-03 19:41:50] (step=0048577) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.504402269614557, LR: 0.0003 +[2026-03-03 19:41:58] (step=0048578) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.504597926041871, LR: 0.0003 +[2026-03-03 19:42:06] (step=0048579) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 9.504793582469183, LR: 0.0003 +[2026-03-03 19:42:14] (step=0048580) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.504989238896497, LR: 0.0003 +[2026-03-03 19:42:21] (step=0048581) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.505184895323811, LR: 0.0003 +[2026-03-03 19:42:29] (step=0048582) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.505380551751125, LR: 0.0003 +[2026-03-03 19:42:37] (step=0048583) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 9.50557620817844, LR: 0.0003 +[2026-03-03 19:42:45] (step=0048584) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 9.505771864605752, LR: 0.0003 +[2026-03-03 19:42:53] (step=0048585) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.505967521033066, LR: 0.0003 +[2026-03-03 19:43:01] (step=0048586) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 9.50616317746038, LR: 0.0003 +[2026-03-03 19:43:08] (step=0048587) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 9.506358833887694, LR: 0.0003 +[2026-03-03 19:43:16] (step=0048588) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.506554490315008, LR: 0.0003 +[2026-03-03 19:43:24] (step=0048589) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.50675014674232, LR: 0.0003 +[2026-03-03 19:43:32] (step=0048590) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 9.506945803169634, LR: 0.0003 +[2026-03-03 19:43:40] (step=0048591) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.507141459596948, LR: 0.0003 +[2026-03-03 19:43:48] (step=0048592) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 9.507337116024262, LR: 0.0003 +[2026-03-03 19:43:56] (step=0048593) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.507532772451576, LR: 0.0003 +[2026-03-03 19:44:03] (step=0048594) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.507728428878888, LR: 0.0003 +[2026-03-03 19:44:11] (step=0048595) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.507924085306202, LR: 0.0003 +[2026-03-03 19:44:19] (step=0048596) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.508119741733516, LR: 0.0003 +[2026-03-03 19:44:27] (step=0048597) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.50831539816083, LR: 0.0003 +[2026-03-03 19:44:35] (step=0048598) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.508511054588142, LR: 0.0003 +[2026-03-03 19:44:43] (step=0048599) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.508706711015456, LR: 0.0003 +[2026-03-03 19:44:51] (step=0048600) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.50890236744277, LR: 0.0003 +[2026-03-03 19:44:58] (step=0048601) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.509098023870084, LR: 0.0003 +[2026-03-03 19:45:06] (step=0048602) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.509293680297398, LR: 0.0003 +[2026-03-03 19:45:14] (step=0048603) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.50948933672471, LR: 0.0003 +[2026-03-03 19:45:22] (step=0048604) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.509684993152025, LR: 0.0003 +[2026-03-03 19:45:30] (step=0048605) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.509880649579339, LR: 0.0003 +[2026-03-03 19:45:38] (step=0048606) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 9.510076306006653, LR: 0.0003 +[2026-03-03 19:45:46] (step=0048607) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 9.510271962433967, LR: 0.0003 +[2026-03-03 19:45:53] (step=0048608) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.510467618861279, LR: 0.0003 +[2026-03-03 19:46:02] (step=0048609) Train Loss: 0.4480, Train Steps/Sec: 0.12, Epoch: 9.510663275288593, LR: 0.0003 +[2026-03-03 19:46:09] (step=0048610) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.510858931715907, LR: 0.0003 +[2026-03-03 19:46:17] (step=0048611) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.511054588143221, LR: 0.0003 +[2026-03-03 19:46:25] (step=0048612) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.511250244570535, LR: 0.0003 +[2026-03-03 19:46:33] (step=0048613) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.511445900997847, LR: 0.0003 +[2026-03-03 19:46:41] (step=0048614) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.511641557425161, LR: 0.0003 +[2026-03-03 19:46:49] (step=0048615) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.511837213852475, LR: 0.0003 +[2026-03-03 19:46:57] (step=0048616) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.51203287027979, LR: 0.0003 +[2026-03-03 19:47:04] (step=0048617) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.512228526707103, LR: 0.0003 +[2026-03-03 19:47:12] (step=0048618) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.512424183134415, LR: 0.0003 +[2026-03-03 19:47:20] (step=0048619) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 9.51261983956173, LR: 0.0003 +[2026-03-03 19:47:28] (step=0048620) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 9.512815495989043, LR: 0.0003 +[2026-03-03 19:47:36] (step=0048621) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.513011152416357, LR: 0.0003 +[2026-03-03 19:47:44] (step=0048622) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.51320680884367, LR: 0.0003 +[2026-03-03 19:47:52] (step=0048623) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.513402465270984, LR: 0.0003 +[2026-03-03 19:48:00] (step=0048624) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.513598121698298, LR: 0.0003 +[2026-03-03 19:48:07] (step=0048625) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.513793778125612, LR: 0.0003 +[2026-03-03 19:48:15] (step=0048626) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 9.513989434552926, LR: 0.0003 +[2026-03-03 19:48:23] (step=0048627) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.514185090980238, LR: 0.0003 +[2026-03-03 19:48:31] (step=0048628) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.514380747407552, LR: 0.0003 +[2026-03-03 19:48:39] (step=0048629) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 9.514576403834866, LR: 0.0003 +[2026-03-03 19:48:47] (step=0048630) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.51477206026218, LR: 0.0003 +[2026-03-03 19:48:55] (step=0048631) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 9.514967716689494, LR: 0.0003 +[2026-03-03 19:49:02] (step=0048632) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.515163373116806, LR: 0.0003 +[2026-03-03 19:49:10] (step=0048633) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.51535902954412, LR: 0.0003 +[2026-03-03 19:49:18] (step=0048634) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.515554685971434, LR: 0.0003 +[2026-03-03 19:49:26] (step=0048635) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 9.515750342398748, LR: 0.0003 +[2026-03-03 19:49:34] (step=0048636) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.515945998826062, LR: 0.0003 +[2026-03-03 19:49:42] (step=0048637) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.516141655253374, LR: 0.0003 +[2026-03-03 19:49:50] (step=0048638) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.516337311680688, LR: 0.0003 +[2026-03-03 19:49:57] (step=0048639) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.516532968108002, LR: 0.0003 +[2026-03-03 19:50:05] (step=0048640) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 9.516728624535316, LR: 0.0003 +[2026-03-03 19:50:13] (step=0048641) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.51692428096263, LR: 0.0003 +[2026-03-03 19:50:21] (step=0048642) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.517119937389943, LR: 0.0003 +[2026-03-03 19:50:29] (step=0048643) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.517315593817257, LR: 0.0003 +[2026-03-03 19:50:37] (step=0048644) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.51751125024457, LR: 0.0003 +[2026-03-03 19:50:45] (step=0048645) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.517706906671885, LR: 0.0003 +[2026-03-03 19:50:52] (step=0048646) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.517902563099199, LR: 0.0003 +[2026-03-03 19:51:00] (step=0048647) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.518098219526511, LR: 0.0003 +[2026-03-03 19:51:08] (step=0048648) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 9.518293875953825, LR: 0.0003 +[2026-03-03 19:51:16] (step=0048649) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.518489532381139, LR: 0.0003 +[2026-03-03 19:51:24] (step=0048650) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.518685188808453, LR: 0.0003 +[2026-03-03 19:51:32] (step=0048651) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.518880845235765, LR: 0.0003 +[2026-03-03 19:51:40] (step=0048652) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 9.51907650166308, LR: 0.0003 +[2026-03-03 19:51:48] (step=0048653) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 9.519272158090393, LR: 0.0003 +[2026-03-03 19:51:56] (step=0048654) Train Loss: 0.4436, Train Steps/Sec: 0.12, Epoch: 9.519467814517707, LR: 0.0003 +[2026-03-03 19:52:03] (step=0048655) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 9.519663470945021, LR: 0.0003 +[2026-03-03 19:52:11] (step=0048656) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.519859127372333, LR: 0.0003 +[2026-03-03 19:52:19] (step=0048657) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 9.520054783799647, LR: 0.0003 +[2026-03-03 19:52:27] (step=0048658) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 9.520250440226961, LR: 0.0003 +[2026-03-03 19:52:35] (step=0048659) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.520446096654275, LR: 0.0003 +[2026-03-03 19:52:43] (step=0048660) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.52064175308159, LR: 0.0003 +[2026-03-03 19:52:51] (step=0048661) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.520837409508902, LR: 0.0003 +[2026-03-03 19:52:58] (step=0048662) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.521033065936216, LR: 0.0003 +[2026-03-03 19:53:06] (step=0048663) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.52122872236353, LR: 0.0003 +[2026-03-03 19:53:14] (step=0048664) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 9.521424378790844, LR: 0.0003 +[2026-03-03 19:53:22] (step=0048665) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.521620035218158, LR: 0.0003 +[2026-03-03 19:53:30] (step=0048666) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.52181569164547, LR: 0.0003 +[2026-03-03 19:53:38] (step=0048667) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.522011348072784, LR: 0.0003 +[2026-03-03 19:53:46] (step=0048668) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.522207004500098, LR: 0.0003 +[2026-03-03 19:53:53] (step=0048669) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 9.522402660927412, LR: 0.0003 +[2026-03-03 19:54:01] (step=0048670) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 9.522598317354726, LR: 0.0003 +[2026-03-03 19:54:09] (step=0048671) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 9.522793973782038, LR: 0.0003 +[2026-03-03 19:54:17] (step=0048672) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.522989630209352, LR: 0.0003 +[2026-03-03 19:54:25] (step=0048673) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.523185286636666, LR: 0.0003 +[2026-03-03 19:54:33] (step=0048674) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.52338094306398, LR: 0.0003 +[2026-03-03 19:54:41] (step=0048675) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.523576599491292, LR: 0.0003 +[2026-03-03 19:54:49] (step=0048676) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.523772255918606, LR: 0.0003 +[2026-03-03 19:54:56] (step=0048677) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 9.52396791234592, LR: 0.0003 +[2026-03-03 19:55:04] (step=0048678) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.524163568773234, LR: 0.0003 +[2026-03-03 19:55:12] (step=0048679) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.524359225200548, LR: 0.0003 +[2026-03-03 19:55:20] (step=0048680) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.52455488162786, LR: 0.0003 +[2026-03-03 19:55:28] (step=0048681) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.524750538055175, LR: 0.0003 +[2026-03-03 19:55:36] (step=0048682) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.524946194482489, LR: 0.0003 +[2026-03-03 19:55:44] (step=0048683) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.525141850909803, LR: 0.0003 +[2026-03-03 19:55:51] (step=0048684) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.525337507337117, LR: 0.0003 +[2026-03-03 19:55:59] (step=0048685) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.525533163764429, LR: 0.0003 +[2026-03-03 19:56:07] (step=0048686) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 9.525728820191743, LR: 0.0003 +[2026-03-03 19:56:15] (step=0048687) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.525924476619057, LR: 0.0003 +[2026-03-03 19:56:23] (step=0048688) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.526120133046371, LR: 0.0003 +[2026-03-03 19:56:31] (step=0048689) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.526315789473685, LR: 0.0003 +[2026-03-03 19:56:39] (step=0048690) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.526511445900997, LR: 0.0003 +[2026-03-03 19:56:46] (step=0048691) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.526707102328311, LR: 0.0003 +[2026-03-03 19:56:54] (step=0048692) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.526902758755625, LR: 0.0003 +[2026-03-03 19:57:02] (step=0048693) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 9.52709841518294, LR: 0.0003 +[2026-03-03 19:57:10] (step=0048694) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 9.527294071610253, LR: 0.0003 +[2026-03-03 19:57:18] (step=0048695) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.527489728037565, LR: 0.0003 +[2026-03-03 19:57:26] (step=0048696) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.52768538446488, LR: 0.0003 +[2026-03-03 19:57:34] (step=0048697) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.527881040892193, LR: 0.0003 +[2026-03-03 19:57:41] (step=0048698) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.528076697319507, LR: 0.0003 +[2026-03-03 19:57:49] (step=0048699) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.528272353746821, LR: 0.0003 +[2026-03-03 19:57:57] (step=0048700) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.528468010174134, LR: 0.0003 +[2026-03-03 19:58:05] (step=0048701) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.528663666601448, LR: 0.0003 +[2026-03-03 19:58:13] (step=0048702) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.528859323028762, LR: 0.0003 +[2026-03-03 19:58:21] (step=0048703) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.529054979456076, LR: 0.0003 +[2026-03-03 19:58:29] (step=0048704) Train Loss: 0.4421, Train Steps/Sec: 0.12, Epoch: 9.529250635883388, LR: 0.0003 +[2026-03-03 19:58:37] (step=0048705) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.529446292310702, LR: 0.0003 +[2026-03-03 19:58:44] (step=0048706) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.529641948738016, LR: 0.0003 +[2026-03-03 19:58:52] (step=0048707) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.52983760516533, LR: 0.0003 +[2026-03-03 19:59:00] (step=0048708) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.530033261592644, LR: 0.0003 +[2026-03-03 19:59:08] (step=0048709) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 9.530228918019956, LR: 0.0003 +[2026-03-03 19:59:16] (step=0048710) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.53042457444727, LR: 0.0003 +[2026-03-03 19:59:24] (step=0048711) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.530620230874584, LR: 0.0003 +[2026-03-03 19:59:32] (step=0048712) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.530815887301898, LR: 0.0003 +[2026-03-03 19:59:39] (step=0048713) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.531011543729212, LR: 0.0003 +[2026-03-03 19:59:47] (step=0048714) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.531207200156524, LR: 0.0003 +[2026-03-03 19:59:55] (step=0048715) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.531402856583838, LR: 0.0003 +[2026-03-03 20:00:03] (step=0048716) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 9.531598513011152, LR: 0.0003 +[2026-03-03 20:00:11] (step=0048717) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.531794169438466, LR: 0.0003 +[2026-03-03 20:00:19] (step=0048718) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.53198982586578, LR: 0.0003 +[2026-03-03 20:00:27] (step=0048719) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.532185482293093, LR: 0.0003 +[2026-03-03 20:00:34] (step=0048720) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.532381138720407, LR: 0.0003 +[2026-03-03 20:00:42] (step=0048721) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.53257679514772, LR: 0.0003 +[2026-03-03 20:00:50] (step=0048722) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.532772451575035, LR: 0.0003 +[2026-03-03 20:00:58] (step=0048723) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.532968108002349, LR: 0.0003 +[2026-03-03 20:01:06] (step=0048724) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.533163764429661, LR: 0.0003 +[2026-03-03 20:01:14] (step=0048725) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 9.533359420856975, LR: 0.0003 +[2026-03-03 20:01:22] (step=0048726) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.533555077284289, LR: 0.0003 +[2026-03-03 20:01:29] (step=0048727) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.533750733711603, LR: 0.0003 +[2026-03-03 20:01:37] (step=0048728) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 9.533946390138915, LR: 0.0003 +[2026-03-03 20:01:45] (step=0048729) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.53414204656623, LR: 0.0003 +[2026-03-03 20:01:53] (step=0048730) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 9.534337702993543, LR: 0.0003 +[2026-03-03 20:02:01] (step=0048731) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.534533359420857, LR: 0.0003 +[2026-03-03 20:02:09] (step=0048732) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.534729015848171, LR: 0.0003 +[2026-03-03 20:02:17] (step=0048733) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.534924672275483, LR: 0.0003 +[2026-03-03 20:02:24] (step=0048734) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 9.535120328702797, LR: 0.0003 +[2026-03-03 20:02:32] (step=0048735) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.535315985130111, LR: 0.0003 +[2026-03-03 20:02:40] (step=0048736) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.535511641557425, LR: 0.0003 +[2026-03-03 20:02:48] (step=0048737) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 9.53570729798474, LR: 0.0003 +[2026-03-03 20:02:56] (step=0048738) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.535902954412052, LR: 0.0003 +[2026-03-03 20:03:04] (step=0048739) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.536098610839366, LR: 0.0003 +[2026-03-03 20:03:12] (step=0048740) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 9.53629426726668, LR: 0.0003 +[2026-03-03 20:03:20] (step=0048741) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.536489923693994, LR: 0.0003 +[2026-03-03 20:03:27] (step=0048742) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.536685580121308, LR: 0.0003 +[2026-03-03 20:03:35] (step=0048743) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.53688123654862, LR: 0.0003 +[2026-03-03 20:03:43] (step=0048744) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.537076892975934, LR: 0.0003 +[2026-03-03 20:03:51] (step=0048745) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.537272549403248, LR: 0.0003 +[2026-03-03 20:03:59] (step=0048746) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 9.537468205830562, LR: 0.0003 +[2026-03-03 20:04:07] (step=0048747) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.537663862257876, LR: 0.0003 +[2026-03-03 20:04:15] (step=0048748) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.537859518685188, LR: 0.0003 +[2026-03-03 20:04:22] (step=0048749) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.538055175112502, LR: 0.0003 +[2026-03-03 20:04:30] (step=0048750) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 9.538250831539816, LR: 0.0003 +[2026-03-03 20:04:38] (step=0048751) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.53844648796713, LR: 0.0003 +[2026-03-03 20:04:46] (step=0048752) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.538642144394444, LR: 0.0003 +[2026-03-03 20:04:54] (step=0048753) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 9.538837800821756, LR: 0.0003 +[2026-03-03 20:05:02] (step=0048754) Train Loss: 0.4401, Train Steps/Sec: 0.12, Epoch: 9.53903345724907, LR: 0.0003 +[2026-03-03 20:05:10] (step=0048755) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.539229113676384, LR: 0.0003 +[2026-03-03 20:05:18] (step=0048756) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.539424770103698, LR: 0.0003 +[2026-03-03 20:05:25] (step=0048757) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.53962042653101, LR: 0.0003 +[2026-03-03 20:05:33] (step=0048758) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.539816082958325, LR: 0.0003 +[2026-03-03 20:05:41] (step=0048759) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 9.540011739385639, LR: 0.0003 +[2026-03-03 20:05:49] (step=0048760) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.540207395812953, LR: 0.0003 +[2026-03-03 20:05:57] (step=0048761) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.540403052240267, LR: 0.0003 +[2026-03-03 20:06:05] (step=0048762) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.540598708667579, LR: 0.0003 +[2026-03-03 20:06:13] (step=0048763) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.540794365094893, LR: 0.0003 +[2026-03-03 20:06:20] (step=0048764) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.540990021522207, LR: 0.0003 +[2026-03-03 20:06:28] (step=0048765) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.541185677949521, LR: 0.0003 +[2026-03-03 20:06:36] (step=0048766) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.541381334376835, LR: 0.0003 +[2026-03-03 20:06:44] (step=0048767) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.541576990804147, LR: 0.0003 +[2026-03-03 20:06:52] (step=0048768) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.541772647231461, LR: 0.0003 +[2026-03-03 20:07:00] (step=0048769) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.541968303658775, LR: 0.0003 +[2026-03-03 20:07:08] (step=0048770) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.54216396008609, LR: 0.0003 +[2026-03-03 20:07:16] (step=0048771) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.542359616513403, LR: 0.0003 +[2026-03-03 20:07:23] (step=0048772) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.542555272940715, LR: 0.0003 +[2026-03-03 20:07:31] (step=0048773) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.54275092936803, LR: 0.0003 +[2026-03-03 20:07:39] (step=0048774) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.542946585795343, LR: 0.0003 +[2026-03-03 20:07:47] (step=0048775) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 9.543142242222658, LR: 0.0003 +[2026-03-03 20:07:55] (step=0048776) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.543337898649972, LR: 0.0003 +[2026-03-03 20:08:03] (step=0048777) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.543533555077284, LR: 0.0003 +[2026-03-03 20:08:11] (step=0048778) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 9.543729211504598, LR: 0.0003 +[2026-03-03 20:08:18] (step=0048779) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.543924867931912, LR: 0.0003 +[2026-03-03 20:08:26] (step=0048780) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.544120524359226, LR: 0.0003 +[2026-03-03 20:08:34] (step=0048781) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.544316180786538, LR: 0.0003 +[2026-03-03 20:08:42] (step=0048782) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.544511837213852, LR: 0.0003 +[2026-03-03 20:08:50] (step=0048783) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.544707493641166, LR: 0.0003 +[2026-03-03 20:08:58] (step=0048784) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.54490315006848, LR: 0.0003 +[2026-03-03 20:09:06] (step=0048785) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 9.545098806495794, LR: 0.0003 +[2026-03-03 20:09:14] (step=0048786) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.545294462923106, LR: 0.0003 +[2026-03-03 20:09:21] (step=0048787) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.54549011935042, LR: 0.0003 +[2026-03-03 20:09:29] (step=0048788) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.545685775777734, LR: 0.0003 +[2026-03-03 20:09:37] (step=0048789) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 9.545881432205048, LR: 0.0003 +[2026-03-03 20:09:45] (step=0048790) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.546077088632362, LR: 0.0003 +[2026-03-03 20:09:53] (step=0048791) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.546272745059674, LR: 0.0003 +[2026-03-03 20:10:01] (step=0048792) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.546468401486988, LR: 0.0003 +[2026-03-03 20:10:09] (step=0048793) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.546664057914303, LR: 0.0003 +[2026-03-03 20:10:16] (step=0048794) Train Loss: 0.4273, Train Steps/Sec: 0.13, Epoch: 9.546859714341617, LR: 0.0003 +[2026-03-03 20:10:24] (step=0048795) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.54705537076893, LR: 0.0003 +[2026-03-03 20:10:32] (step=0048796) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 9.547251027196243, LR: 0.0003 +[2026-03-03 20:10:40] (step=0048797) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 9.547446683623557, LR: 0.0003 +[2026-03-03 20:10:48] (step=0048798) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 9.54764234005087, LR: 0.0003 +[2026-03-03 20:10:56] (step=0048799) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.547837996478185, LR: 0.0003 +[2026-03-03 20:11:03] (step=0048800) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.548033652905499, LR: 0.0003 +[2026-03-03 20:11:11] (step=0048801) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.548229309332811, LR: 0.0003 +[2026-03-03 20:11:19] (step=0048802) Train Loss: 0.4239, Train Steps/Sec: 0.13, Epoch: 9.548424965760125, LR: 0.0003 +[2026-03-03 20:11:27] (step=0048803) Train Loss: 0.4360, Train Steps/Sec: 0.12, Epoch: 9.548620622187439, LR: 0.0003 +[2026-03-03 20:11:35] (step=0048804) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 9.548816278614753, LR: 0.0003 +[2026-03-03 20:11:43] (step=0048805) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.549011935042065, LR: 0.0003 +[2026-03-03 20:11:51] (step=0048806) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.54920759146938, LR: 0.0003 +[2026-03-03 20:11:59] (step=0048807) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.549403247896693, LR: 0.0003 +[2026-03-03 20:12:07] (step=0048808) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 9.549598904324007, LR: 0.0003 +[2026-03-03 20:12:14] (step=0048809) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.549794560751321, LR: 0.0003 +[2026-03-03 20:12:22] (step=0048810) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.549990217178634, LR: 0.0003 +[2026-03-03 20:12:30] (step=0048811) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 9.550185873605948, LR: 0.0003 +[2026-03-03 20:12:38] (step=0048812) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.550381530033262, LR: 0.0003 +[2026-03-03 20:12:46] (step=0048813) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.550577186460576, LR: 0.0003 +[2026-03-03 20:12:54] (step=0048814) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.55077284288789, LR: 0.0003 +[2026-03-03 20:13:02] (step=0048815) Train Loss: 0.4265, Train Steps/Sec: 0.13, Epoch: 9.550968499315202, LR: 0.0003 +[2026-03-03 20:13:09] (step=0048816) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.551164155742516, LR: 0.0003 +[2026-03-03 20:13:17] (step=0048817) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 9.55135981216983, LR: 0.0003 +[2026-03-03 20:13:25] (step=0048818) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 9.551555468597144, LR: 0.0003 +[2026-03-03 20:13:33] (step=0048819) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 9.551751125024458, LR: 0.0003 +[2026-03-03 20:13:41] (step=0048820) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.55194678145177, LR: 0.0003 +[2026-03-03 20:13:49] (step=0048821) Train Loss: 0.4348, Train Steps/Sec: 0.12, Epoch: 9.552142437879084, LR: 0.0003 +[2026-03-03 20:13:57] (step=0048822) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.552338094306398, LR: 0.0003 +[2026-03-03 20:14:05] (step=0048823) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 9.552533750733712, LR: 0.0003 +[2026-03-03 20:14:13] (step=0048824) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.552729407161026, LR: 0.0003 +[2026-03-03 20:14:20] (step=0048825) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.552925063588338, LR: 0.0003 +[2026-03-03 20:14:28] (step=0048826) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.553120720015652, LR: 0.0003 +[2026-03-03 20:14:36] (step=0048827) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.553316376442966, LR: 0.0003 +[2026-03-03 20:14:44] (step=0048828) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 9.55351203287028, LR: 0.0003 +[2026-03-03 20:14:52] (step=0048829) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 9.553707689297594, LR: 0.0003 +[2026-03-03 20:15:00] (step=0048830) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.553903345724907, LR: 0.0003 +[2026-03-03 20:15:08] (step=0048831) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.55409900215222, LR: 0.0003 +[2026-03-03 20:15:15] (step=0048832) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.554294658579535, LR: 0.0003 +[2026-03-03 20:15:23] (step=0048833) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.554490315006849, LR: 0.0003 +[2026-03-03 20:15:31] (step=0048834) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.55468597143416, LR: 0.0003 +[2026-03-03 20:15:39] (step=0048835) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.554881627861475, LR: 0.0003 +[2026-03-03 20:15:47] (step=0048836) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.555077284288789, LR: 0.0003 +[2026-03-03 20:15:55] (step=0048837) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.555272940716103, LR: 0.0003 +[2026-03-03 20:16:03] (step=0048838) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.555468597143417, LR: 0.0003 +[2026-03-03 20:16:11] (step=0048839) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.555664253570729, LR: 0.0003 +[2026-03-03 20:16:18] (step=0048840) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 9.555859909998043, LR: 0.0003 +[2026-03-03 20:16:26] (step=0048841) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 9.556055566425357, LR: 0.0003 +[2026-03-03 20:16:34] (step=0048842) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.556251222852671, LR: 0.0003 +[2026-03-03 20:16:42] (step=0048843) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.556446879279985, LR: 0.0003 +[2026-03-03 20:16:50] (step=0048844) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.556642535707297, LR: 0.0003 +[2026-03-03 20:16:58] (step=0048845) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.556838192134611, LR: 0.0003 +[2026-03-03 20:17:06] (step=0048846) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 9.557033848561925, LR: 0.0003 +[2026-03-03 20:17:13] (step=0048847) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.55722950498924, LR: 0.0003 +[2026-03-03 20:17:21] (step=0048848) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.557425161416553, LR: 0.0003 +[2026-03-03 20:17:29] (step=0048849) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.557620817843866, LR: 0.0003 +[2026-03-03 20:17:37] (step=0048850) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.55781647427118, LR: 0.0003 +[2026-03-03 20:17:45] (step=0048851) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.558012130698494, LR: 0.0003 +[2026-03-03 20:17:53] (step=0048852) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 9.558207787125808, LR: 0.0003 +[2026-03-03 20:18:01] (step=0048853) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 9.558403443553122, LR: 0.0003 +[2026-03-03 20:18:09] (step=0048854) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.558599099980434, LR: 0.0003 +[2026-03-03 20:18:16] (step=0048855) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 9.558794756407748, LR: 0.0003 +[2026-03-03 20:18:24] (step=0048856) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.558990412835062, LR: 0.0003 +[2026-03-03 20:18:32] (step=0048857) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.559186069262376, LR: 0.0003 +[2026-03-03 20:18:40] (step=0048858) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.559381725689688, LR: 0.0003 +[2026-03-03 20:18:48] (step=0048859) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.559577382117002, LR: 0.0003 +[2026-03-03 20:18:56] (step=0048860) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.559773038544316, LR: 0.0003 +[2026-03-03 20:19:04] (step=0048861) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.55996869497163, LR: 0.0003 +[2026-03-03 20:19:11] (step=0048862) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.560164351398944, LR: 0.0003 +[2026-03-03 20:19:19] (step=0048863) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.560360007826256, LR: 0.0003 +[2026-03-03 20:19:27] (step=0048864) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.56055566425357, LR: 0.0003 +[2026-03-03 20:19:35] (step=0048865) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.560751320680884, LR: 0.0003 +[2026-03-03 20:19:43] (step=0048866) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 9.560946977108198, LR: 0.0003 +[2026-03-03 20:19:51] (step=0048867) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.561142633535512, LR: 0.0003 +[2026-03-03 20:19:59] (step=0048868) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 9.561338289962825, LR: 0.0003 +[2026-03-03 20:20:06] (step=0048869) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.561533946390139, LR: 0.0003 +[2026-03-03 20:20:14] (step=0048870) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.561729602817453, LR: 0.0003 +[2026-03-03 20:20:22] (step=0048871) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 9.561925259244767, LR: 0.0003 +[2026-03-03 20:20:30] (step=0048872) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.56212091567208, LR: 0.0003 +[2026-03-03 20:20:38] (step=0048873) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.562316572099393, LR: 0.0003 +[2026-03-03 20:20:46] (step=0048874) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.562512228526707, LR: 0.0003 +[2026-03-03 20:20:54] (step=0048875) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.56270788495402, LR: 0.0003 +[2026-03-03 20:21:02] (step=0048876) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.562903541381335, LR: 0.0003 +[2026-03-03 20:21:09] (step=0048877) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.563099197808649, LR: 0.0003 +[2026-03-03 20:21:17] (step=0048878) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.563294854235961, LR: 0.0003 +[2026-03-03 20:21:25] (step=0048879) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.563490510663275, LR: 0.0003 +[2026-03-03 20:21:33] (step=0048880) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.563686167090589, LR: 0.0003 +[2026-03-03 20:21:41] (step=0048881) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.563881823517903, LR: 0.0003 +[2026-03-03 20:21:49] (step=0048882) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 9.564077479945217, LR: 0.0003 +[2026-03-03 20:21:57] (step=0048883) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 9.56427313637253, LR: 0.0003 +[2026-03-03 20:22:04] (step=0048884) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 9.564468792799843, LR: 0.0003 +[2026-03-03 20:22:12] (step=0048885) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.564664449227157, LR: 0.0003 +[2026-03-03 20:22:20] (step=0048886) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.564860105654471, LR: 0.0003 +[2026-03-03 20:22:28] (step=0048887) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.565055762081784, LR: 0.0003 +[2026-03-03 20:22:36] (step=0048888) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.565251418509098, LR: 0.0003 +[2026-03-03 20:22:44] (step=0048889) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.565447074936412, LR: 0.0003 +[2026-03-03 20:22:52] (step=0048890) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.565642731363726, LR: 0.0003 +[2026-03-03 20:22:59] (step=0048891) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 9.56583838779104, LR: 0.0003 +[2026-03-03 20:23:07] (step=0048892) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 9.566034044218352, LR: 0.0003 +[2026-03-03 20:23:15] (step=0048893) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.566229700645666, LR: 0.0003 +[2026-03-03 20:23:23] (step=0048894) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.56642535707298, LR: 0.0003 +[2026-03-03 20:23:31] (step=0048895) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.566621013500294, LR: 0.0003 +[2026-03-03 20:23:39] (step=0048896) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 9.566816669927608, LR: 0.0003 +[2026-03-03 20:23:47] (step=0048897) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.56701232635492, LR: 0.0003 +[2026-03-03 20:23:54] (step=0048898) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.567207982782234, LR: 0.0003 +[2026-03-03 20:24:02] (step=0048899) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.567403639209548, LR: 0.0003 +[2026-03-03 20:24:10] (step=0048900) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.567599295636862, LR: 0.0003 +[2026-03-03 20:24:18] (step=0048901) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 9.567794952064176, LR: 0.0003 +[2026-03-03 20:24:26] (step=0048902) Train Loss: 0.4586, Train Steps/Sec: 0.12, Epoch: 9.567990608491488, LR: 0.0003 +[2026-03-03 20:24:34] (step=0048903) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.568186264918802, LR: 0.0003 +[2026-03-03 20:24:42] (step=0048904) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.568381921346116, LR: 0.0003 +[2026-03-03 20:24:50] (step=0048905) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.56857757777343, LR: 0.0003 +[2026-03-03 20:24:58] (step=0048906) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.568773234200744, LR: 0.0003 +[2026-03-03 20:25:05] (step=0048907) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 9.568968890628057, LR: 0.0003 +[2026-03-03 20:25:13] (step=0048908) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.56916454705537, LR: 0.0003 +[2026-03-03 20:25:21] (step=0048909) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.569360203482685, LR: 0.0003 +[2026-03-03 20:25:29] (step=0048910) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.569555859909999, LR: 0.0003 +[2026-03-03 20:25:37] (step=0048911) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.56975151633731, LR: 0.0003 +[2026-03-03 20:25:45] (step=0048912) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 9.569947172764625, LR: 0.0003 +[2026-03-03 20:25:53] (step=0048913) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 9.570142829191939, LR: 0.0003 +[2026-03-03 20:26:00] (step=0048914) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.570338485619253, LR: 0.0003 +[2026-03-03 20:26:08] (step=0048915) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.570534142046567, LR: 0.0003 +[2026-03-03 20:26:16] (step=0048916) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.570729798473879, LR: 0.0003 +[2026-03-03 20:26:24] (step=0048917) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.570925454901193, LR: 0.0003 +[2026-03-03 20:26:32] (step=0048918) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 9.571121111328507, LR: 0.0003 +[2026-03-03 20:26:40] (step=0048919) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.571316767755821, LR: 0.0003 +[2026-03-03 20:26:48] (step=0048920) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.571512424183135, LR: 0.0003 +[2026-03-03 20:26:56] (step=0048921) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.571708080610447, LR: 0.0003 +[2026-03-03 20:27:03] (step=0048922) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.571903737037761, LR: 0.0003 +[2026-03-03 20:27:11] (step=0048923) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.572099393465075, LR: 0.0003 +[2026-03-03 20:27:19] (step=0048924) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.57229504989239, LR: 0.0003 +[2026-03-03 20:27:27] (step=0048925) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.572490706319703, LR: 0.0003 +[2026-03-03 20:27:35] (step=0048926) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.572686362747016, LR: 0.0003 +[2026-03-03 20:27:43] (step=0048927) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.57288201917433, LR: 0.0003 +[2026-03-03 20:27:51] (step=0048928) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 9.573077675601644, LR: 0.0003 +[2026-03-03 20:27:58] (step=0048929) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.573273332028958, LR: 0.0003 +[2026-03-03 20:28:06] (step=0048930) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.573468988456272, LR: 0.0003 +[2026-03-03 20:28:14] (step=0048931) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.573664644883584, LR: 0.0003 +[2026-03-03 20:28:22] (step=0048932) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.573860301310898, LR: 0.0003 +[2026-03-03 20:28:30] (step=0048933) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.574055957738212, LR: 0.0003 +[2026-03-03 20:28:38] (step=0048934) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.574251614165526, LR: 0.0003 +[2026-03-03 20:28:46] (step=0048935) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.57444727059284, LR: 0.0003 +[2026-03-03 20:28:53] (step=0048936) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.574642927020152, LR: 0.0003 +[2026-03-03 20:29:01] (step=0048937) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.574838583447466, LR: 0.0003 +[2026-03-03 20:29:09] (step=0048938) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.57503423987478, LR: 0.0003 +[2026-03-03 20:29:17] (step=0048939) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.575229896302094, LR: 0.0003 +[2026-03-03 20:29:25] (step=0048940) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.575425552729406, LR: 0.0003 +[2026-03-03 20:29:33] (step=0048941) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.57562120915672, LR: 0.0003 +[2026-03-03 20:29:41] (step=0048942) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.575816865584034, LR: 0.0003 +[2026-03-03 20:29:49] (step=0048943) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.576012522011348, LR: 0.0003 +[2026-03-03 20:29:56] (step=0048944) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 9.576208178438662, LR: 0.0003 +[2026-03-03 20:30:04] (step=0048945) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.576403834865975, LR: 0.0003 +[2026-03-03 20:30:12] (step=0048946) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.576599491293289, LR: 0.0003 +[2026-03-03 20:30:20] (step=0048947) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.576795147720603, LR: 0.0003 +[2026-03-03 20:30:28] (step=0048948) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.576990804147917, LR: 0.0003 +[2026-03-03 20:30:36] (step=0048949) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.57718646057523, LR: 0.0003 +[2026-03-03 20:30:44] (step=0048950) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 9.577382117002543, LR: 0.0003 +[2026-03-03 20:30:52] (step=0048951) Train Loss: 0.4396, Train Steps/Sec: 0.12, Epoch: 9.577577773429857, LR: 0.0003 +[2026-03-03 20:30:59] (step=0048952) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 9.57777342985717, LR: 0.0003 +[2026-03-03 20:31:07] (step=0048953) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 9.577969086284485, LR: 0.0003 +[2026-03-03 20:31:15] (step=0048954) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 9.578164742711799, LR: 0.0003 +[2026-03-03 20:31:23] (step=0048955) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.578360399139111, LR: 0.0003 +[2026-03-03 20:31:31] (step=0048956) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 9.578556055566425, LR: 0.0003 +[2026-03-03 20:31:39] (step=0048957) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.578751711993739, LR: 0.0003 +[2026-03-03 20:31:47] (step=0048958) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.578947368421053, LR: 0.0003 +[2026-03-03 20:31:54] (step=0048959) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 9.579143024848367, LR: 0.0003 +[2026-03-03 20:32:02] (step=0048960) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.57933868127568, LR: 0.0003 +[2026-03-03 20:32:10] (step=0048961) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.579534337702993, LR: 0.0003 +[2026-03-03 20:32:18] (step=0048962) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.579729994130307, LR: 0.0003 +[2026-03-03 20:32:26] (step=0048963) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 9.579925650557621, LR: 0.0003 +[2026-03-03 20:32:34] (step=0048964) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.580121306984934, LR: 0.0003 +[2026-03-03 20:32:42] (step=0048965) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.580316963412248, LR: 0.0003 +[2026-03-03 20:32:50] (step=0048966) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.580512619839562, LR: 0.0003 +[2026-03-03 20:32:57] (step=0048967) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.580708276266876, LR: 0.0003 +[2026-03-03 20:33:05] (step=0048968) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.58090393269419, LR: 0.0003 +[2026-03-03 20:33:13] (step=0048969) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.581099589121502, LR: 0.0003 +[2026-03-03 20:33:21] (step=0048970) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.581295245548816, LR: 0.0003 +[2026-03-03 20:33:29] (step=0048971) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.58149090197613, LR: 0.0003 +[2026-03-03 20:33:37] (step=0048972) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.581686558403444, LR: 0.0003 +[2026-03-03 20:33:45] (step=0048973) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.581882214830758, LR: 0.0003 +[2026-03-03 20:33:53] (step=0048974) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.58207787125807, LR: 0.0003 +[2026-03-03 20:34:00] (step=0048975) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 9.582273527685384, LR: 0.0003 +[2026-03-03 20:34:08] (step=0048976) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.582469184112698, LR: 0.0003 +[2026-03-03 20:34:16] (step=0048977) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.582664840540012, LR: 0.0003 +[2026-03-03 20:34:24] (step=0048978) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.582860496967326, LR: 0.0003 +[2026-03-03 20:34:32] (step=0048979) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.583056153394638, LR: 0.0003 +[2026-03-03 20:34:40] (step=0048980) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.583251809821952, LR: 0.0003 +[2026-03-03 20:34:48] (step=0048981) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.583447466249266, LR: 0.0003 +[2026-03-03 20:34:55] (step=0048982) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.58364312267658, LR: 0.0003 +[2026-03-03 20:35:03] (step=0048983) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 9.583838779103894, LR: 0.0003 +[2026-03-03 20:35:11] (step=0048984) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.584034435531207, LR: 0.0003 +[2026-03-03 20:35:19] (step=0048985) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.58423009195852, LR: 0.0003 +[2026-03-03 20:35:27] (step=0048986) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.584425748385835, LR: 0.0003 +[2026-03-03 20:35:35] (step=0048987) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 9.584621404813149, LR: 0.0003 +[2026-03-03 20:35:43] (step=0048988) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 9.584817061240463, LR: 0.0003 +[2026-03-03 20:35:50] (step=0048989) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.585012717667775, LR: 0.0003 +[2026-03-03 20:35:58] (step=0048990) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.585208374095089, LR: 0.0003 +[2026-03-03 20:36:06] (step=0048991) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.585404030522403, LR: 0.0003 +[2026-03-03 20:36:14] (step=0048992) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 9.585599686949717, LR: 0.0003 +[2026-03-03 20:36:22] (step=0048993) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 9.585795343377029, LR: 0.0003 +[2026-03-03 20:36:30] (step=0048994) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.585990999804343, LR: 0.0003 +[2026-03-03 20:36:38] (step=0048995) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.586186656231657, LR: 0.0003 +[2026-03-03 20:36:45] (step=0048996) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.586382312658971, LR: 0.0003 +[2026-03-03 20:36:53] (step=0048997) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.586577969086285, LR: 0.0003 +[2026-03-03 20:37:01] (step=0048998) Train Loss: 0.4375, Train Steps/Sec: 0.12, Epoch: 9.586773625513597, LR: 0.0003 +[2026-03-03 20:37:09] (step=0048999) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.586969281940911, LR: 0.0003 +[2026-03-03 20:37:17] (step=0049000) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.587164938368225, LR: 0.0003 +[2026-03-03 20:37:17] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0049000/ +[2026-03-03 20:37:25] (step=0049001) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.58736059479554, LR: 0.0003 +[2026-03-03 20:37:33] (step=0049002) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.587556251222853, LR: 0.0003 +[2026-03-03 20:37:41] (step=0049003) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.587751907650166, LR: 0.0003 +[2026-03-03 20:37:48] (step=0049004) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.58794756407748, LR: 0.0003 +[2026-03-03 20:37:56] (step=0049005) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 9.588143220504794, LR: 0.0003 +[2026-03-03 20:38:04] (step=0049006) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 9.588338876932108, LR: 0.0003 +[2026-03-03 20:38:12] (step=0049007) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.588534533359422, LR: 0.0003 +[2026-03-03 20:38:20] (step=0049008) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.588730189786734, LR: 0.0003 +[2026-03-03 20:38:28] (step=0049009) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 9.588925846214048, LR: 0.0003 +[2026-03-03 20:38:36] (step=0049010) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.589121502641362, LR: 0.0003 +[2026-03-03 20:38:43] (step=0049011) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.589317159068676, LR: 0.0003 +[2026-03-03 20:38:51] (step=0049012) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.58951281549599, LR: 0.0003 +[2026-03-03 20:38:59] (step=0049013) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.589708471923302, LR: 0.0003 +[2026-03-03 20:39:07] (step=0049014) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.589904128350616, LR: 0.0003 +[2026-03-03 20:39:15] (step=0049015) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.59009978477793, LR: 0.0003 +[2026-03-03 20:39:23] (step=0049016) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 9.590295441205244, LR: 0.0003 +[2026-03-03 20:39:31] (step=0049017) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.590491097632556, LR: 0.0003 +[2026-03-03 20:39:39] (step=0049018) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.59068675405987, LR: 0.0003 +[2026-03-03 20:39:46] (step=0049019) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 9.590882410487184, LR: 0.0003 +[2026-03-03 20:39:54] (step=0049020) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.591078066914498, LR: 0.0003 +[2026-03-03 20:40:02] (step=0049021) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.591273723341812, LR: 0.0003 +[2026-03-03 20:40:10] (step=0049022) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.591469379769125, LR: 0.0003 +[2026-03-03 20:40:18] (step=0049023) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.591665036196439, LR: 0.0003 +[2026-03-03 20:40:26] (step=0049024) Train Loss: 0.4498, Train Steps/Sec: 0.12, Epoch: 9.591860692623753, LR: 0.0003 +[2026-03-03 20:40:34] (step=0049025) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.592056349051067, LR: 0.0003 +[2026-03-03 20:40:42] (step=0049026) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.59225200547838, LR: 0.0003 +[2026-03-03 20:40:49] (step=0049027) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.592447661905693, LR: 0.0003 +[2026-03-03 20:40:57] (step=0049028) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.592643318333007, LR: 0.0003 +[2026-03-03 20:41:05] (step=0049029) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.59283897476032, LR: 0.0003 +[2026-03-03 20:41:13] (step=0049030) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 9.593034631187635, LR: 0.0003 +[2026-03-03 20:41:21] (step=0049031) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.593230287614949, LR: 0.0003 +[2026-03-03 20:41:29] (step=0049032) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.593425944042261, LR: 0.0003 +[2026-03-03 20:41:37] (step=0049033) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 9.593621600469575, LR: 0.0003 +[2026-03-03 20:41:44] (step=0049034) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 9.59381725689689, LR: 0.0003 +[2026-03-03 20:41:52] (step=0049035) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 9.594012913324203, LR: 0.0003 +[2026-03-03 20:42:00] (step=0049036) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.594208569751517, LR: 0.0003 +[2026-03-03 20:42:08] (step=0049037) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.59440422617883, LR: 0.0003 +[2026-03-03 20:42:16] (step=0049038) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 9.594599882606143, LR: 0.0003 +[2026-03-03 20:42:24] (step=0049039) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.594795539033457, LR: 0.0003 +[2026-03-03 20:42:32] (step=0049040) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.594991195460771, LR: 0.0003 +[2026-03-03 20:42:40] (step=0049041) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.595186851888085, LR: 0.0003 +[2026-03-03 20:42:47] (step=0049042) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.595382508315398, LR: 0.0003 +[2026-03-03 20:42:55] (step=0049043) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.595578164742712, LR: 0.0003 +[2026-03-03 20:43:03] (step=0049044) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.595773821170026, LR: 0.0003 +[2026-03-03 20:43:11] (step=0049045) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.59596947759734, LR: 0.0003 +[2026-03-03 20:43:19] (step=0049046) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.596165134024652, LR: 0.0003 +[2026-03-03 20:43:27] (step=0049047) Train Loss: 0.4471, Train Steps/Sec: 0.12, Epoch: 9.596360790451966, LR: 0.0003 +[2026-03-03 20:43:35] (step=0049048) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.59655644687928, LR: 0.0003 +[2026-03-03 20:43:43] (step=0049049) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.596752103306594, LR: 0.0003 +[2026-03-03 20:43:51] (step=0049050) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.596947759733908, LR: 0.0003 +[2026-03-03 20:43:58] (step=0049051) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.59714341616122, LR: 0.0003 +[2026-03-03 20:44:06] (step=0049052) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.597339072588534, LR: 0.0003 +[2026-03-03 20:44:14] (step=0049053) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.597534729015848, LR: 0.0003 +[2026-03-03 20:44:22] (step=0049054) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.597730385443162, LR: 0.0003 +[2026-03-03 20:44:30] (step=0049055) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 9.597926041870476, LR: 0.0003 +[2026-03-03 20:44:38] (step=0049056) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.598121698297788, LR: 0.0003 +[2026-03-03 20:44:46] (step=0049057) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 9.598317354725102, LR: 0.0003 +[2026-03-03 20:44:53] (step=0049058) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.598513011152416, LR: 0.0003 +[2026-03-03 20:45:01] (step=0049059) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.59870866757973, LR: 0.0003 +[2026-03-03 20:45:09] (step=0049060) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.598904324007044, LR: 0.0003 +[2026-03-03 20:45:17] (step=0049061) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.599099980434357, LR: 0.0003 +[2026-03-03 20:45:25] (step=0049062) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.59929563686167, LR: 0.0003 +[2026-03-03 20:45:33] (step=0049063) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.599491293288985, LR: 0.0003 +[2026-03-03 20:45:40] (step=0049064) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.599686949716299, LR: 0.0003 +[2026-03-03 20:45:48] (step=0049065) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.599882606143613, LR: 0.0003 +[2026-03-03 20:45:56] (step=0049066) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.600078262570925, LR: 0.0003 +[2026-03-03 20:46:04] (step=0049067) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.600273918998239, LR: 0.0003 +[2026-03-03 20:46:12] (step=0049068) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 9.600469575425553, LR: 0.0003 +[2026-03-03 20:46:20] (step=0049069) Train Loss: 0.4217, Train Steps/Sec: 0.13, Epoch: 9.600665231852867, LR: 0.0003 +[2026-03-03 20:46:28] (step=0049070) Train Loss: 0.4421, Train Steps/Sec: 0.12, Epoch: 9.60086088828018, LR: 0.0003 +[2026-03-03 20:46:36] (step=0049071) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.601056544707493, LR: 0.0003 +[2026-03-03 20:46:43] (step=0049072) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 9.601252201134807, LR: 0.0003 +[2026-03-03 20:46:51] (step=0049073) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.601447857562121, LR: 0.0003 +[2026-03-03 20:46:59] (step=0049074) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.601643513989435, LR: 0.0003 +[2026-03-03 20:47:07] (step=0049075) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.601839170416747, LR: 0.0003 +[2026-03-03 20:47:15] (step=0049076) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.602034826844061, LR: 0.0003 +[2026-03-03 20:47:23] (step=0049077) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 9.602230483271375, LR: 0.0003 +[2026-03-03 20:47:31] (step=0049078) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.60242613969869, LR: 0.0003 +[2026-03-03 20:47:38] (step=0049079) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.602621796126003, LR: 0.0003 +[2026-03-03 20:47:46] (step=0049080) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.602817452553316, LR: 0.0003 +[2026-03-03 20:47:54] (step=0049081) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.60301310898063, LR: 0.0003 +[2026-03-03 20:48:02] (step=0049082) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 9.603208765407944, LR: 0.0003 +[2026-03-03 20:48:10] (step=0049083) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.603404421835258, LR: 0.0003 +[2026-03-03 20:48:18] (step=0049084) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.603600078262572, LR: 0.0003 +[2026-03-03 20:48:26] (step=0049085) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 9.603795734689884, LR: 0.0003 +[2026-03-03 20:48:33] (step=0049086) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 9.603991391117198, LR: 0.0003 +[2026-03-03 20:48:41] (step=0049087) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.604187047544512, LR: 0.0003 +[2026-03-03 20:48:49] (step=0049088) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 9.604382703971826, LR: 0.0003 +[2026-03-03 20:48:57] (step=0049089) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.60457836039914, LR: 0.0003 +[2026-03-03 20:49:05] (step=0049090) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.604774016826452, LR: 0.0003 +[2026-03-03 20:49:13] (step=0049091) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.604969673253766, LR: 0.0003 +[2026-03-03 20:49:21] (step=0049092) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.60516532968108, LR: 0.0003 +[2026-03-03 20:49:29] (step=0049093) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.605360986108394, LR: 0.0003 +[2026-03-03 20:49:36] (step=0049094) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.605556642535708, LR: 0.0003 +[2026-03-03 20:49:44] (step=0049095) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.60575229896302, LR: 0.0003 +[2026-03-03 20:49:52] (step=0049096) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.605947955390334, LR: 0.0003 +[2026-03-03 20:50:00] (step=0049097) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.606143611817648, LR: 0.0003 +[2026-03-03 20:50:08] (step=0049098) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.606339268244962, LR: 0.0003 +[2026-03-03 20:50:16] (step=0049099) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.606534924672275, LR: 0.0003 +[2026-03-03 20:50:24] (step=0049100) Train Loss: 0.4503, Train Steps/Sec: 0.12, Epoch: 9.606730581099589, LR: 0.0003 +[2026-03-03 20:50:32] (step=0049101) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.606926237526903, LR: 0.0003 +[2026-03-03 20:50:39] (step=0049102) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.607121893954217, LR: 0.0003 +[2026-03-03 20:50:47] (step=0049103) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 9.60731755038153, LR: 0.0003 +[2026-03-03 20:50:55] (step=0049104) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 9.607513206808843, LR: 0.0003 +[2026-03-03 20:51:03] (step=0049105) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.607708863236157, LR: 0.0003 +[2026-03-03 20:51:11] (step=0049106) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.607904519663471, LR: 0.0003 +[2026-03-03 20:51:19] (step=0049107) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.608100176090785, LR: 0.0003 +[2026-03-03 20:51:27] (step=0049108) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.608295832518099, LR: 0.0003 +[2026-03-03 20:51:34] (step=0049109) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.608491488945411, LR: 0.0003 +[2026-03-03 20:51:42] (step=0049110) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 9.608687145372725, LR: 0.0003 +[2026-03-03 20:51:50] (step=0049111) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.60888280180004, LR: 0.0003 +[2026-03-03 20:51:58] (step=0049112) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.609078458227353, LR: 0.0003 +[2026-03-03 20:52:06] (step=0049113) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.609274114654667, LR: 0.0003 +[2026-03-03 20:52:14] (step=0049114) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.60946977108198, LR: 0.0003 +[2026-03-03 20:52:22] (step=0049115) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.609665427509293, LR: 0.0003 +[2026-03-03 20:52:29] (step=0049116) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.609861083936607, LR: 0.0003 +[2026-03-03 20:52:37] (step=0049117) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 9.610056740363921, LR: 0.0003 +[2026-03-03 20:52:45] (step=0049118) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.610252396791235, LR: 0.0003 +[2026-03-03 20:52:53] (step=0049119) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.610448053218548, LR: 0.0003 +[2026-03-03 20:53:01] (step=0049120) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.610643709645862, LR: 0.0003 +[2026-03-03 20:53:09] (step=0049121) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.610839366073176, LR: 0.0003 +[2026-03-03 20:53:17] (step=0049122) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.61103502250049, LR: 0.0003 +[2026-03-03 20:53:25] (step=0049123) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 9.611230678927802, LR: 0.0003 +[2026-03-03 20:53:32] (step=0049124) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.611426335355116, LR: 0.0003 +[2026-03-03 20:53:40] (step=0049125) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.61162199178243, LR: 0.0003 +[2026-03-03 20:53:48] (step=0049126) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.611817648209744, LR: 0.0003 +[2026-03-03 20:53:56] (step=0049127) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 9.612013304637058, LR: 0.0003 +[2026-03-03 20:54:04] (step=0049128) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.61220896106437, LR: 0.0003 +[2026-03-03 20:54:12] (step=0049129) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 9.612404617491684, LR: 0.0003 +[2026-03-03 20:54:20] (step=0049130) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 9.612600273918998, LR: 0.0003 +[2026-03-03 20:54:27] (step=0049131) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 9.612795930346312, LR: 0.0003 +[2026-03-03 20:54:35] (step=0049132) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.612991586773626, LR: 0.0003 +[2026-03-03 20:54:43] (step=0049133) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.613187243200938, LR: 0.0003 +[2026-03-03 20:54:51] (step=0049134) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.613382899628252, LR: 0.0003 +[2026-03-03 20:54:59] (step=0049135) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.613578556055566, LR: 0.0003 +[2026-03-03 20:55:07] (step=0049136) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 9.61377421248288, LR: 0.0003 +[2026-03-03 20:55:15] (step=0049137) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.613969868910194, LR: 0.0003 +[2026-03-03 20:55:22] (step=0049138) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.614165525337507, LR: 0.0003 +[2026-03-03 20:55:30] (step=0049139) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 9.61436118176482, LR: 0.0003 +[2026-03-03 20:55:38] (step=0049140) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.614556838192135, LR: 0.0003 +[2026-03-03 20:55:46] (step=0049141) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.614752494619449, LR: 0.0003 +[2026-03-03 20:55:54] (step=0049142) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.614948151046763, LR: 0.0003 +[2026-03-03 20:56:02] (step=0049143) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.615143807474075, LR: 0.0003 +[2026-03-03 20:56:10] (step=0049144) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.615339463901389, LR: 0.0003 +[2026-03-03 20:56:17] (step=0049145) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.615535120328703, LR: 0.0003 +[2026-03-03 20:56:25] (step=0049146) Train Loss: 0.4265, Train Steps/Sec: 0.13, Epoch: 9.615730776756017, LR: 0.0003 +[2026-03-03 20:56:33] (step=0049147) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.615926433183331, LR: 0.0003 +[2026-03-03 20:56:41] (step=0049148) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 9.616122089610643, LR: 0.0003 +[2026-03-03 20:56:49] (step=0049149) Train Loss: 0.4475, Train Steps/Sec: 0.12, Epoch: 9.616317746037957, LR: 0.0003 +[2026-03-03 20:56:57] (step=0049150) Train Loss: 0.4251, Train Steps/Sec: 0.13, Epoch: 9.616513402465271, LR: 0.0003 +[2026-03-03 20:57:05] (step=0049151) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.616709058892585, LR: 0.0003 +[2026-03-03 20:57:13] (step=0049152) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 9.616904715319897, LR: 0.0003 +[2026-03-03 20:57:21] (step=0049153) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 9.617100371747211, LR: 0.0003 +[2026-03-03 20:57:29] (step=0049154) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.617296028174525, LR: 0.0003 +[2026-03-03 20:57:36] (step=0049155) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.61749168460184, LR: 0.0003 +[2026-03-03 20:57:44] (step=0049156) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.617687341029153, LR: 0.0003 +[2026-03-03 20:57:52] (step=0049157) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 9.617882997456466, LR: 0.0003 +[2026-03-03 20:58:00] (step=0049158) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.61807865388378, LR: 0.0003 +[2026-03-03 20:58:08] (step=0049159) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.618274310311094, LR: 0.0003 +[2026-03-03 20:58:16] (step=0049160) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.618469966738408, LR: 0.0003 +[2026-03-03 20:58:24] (step=0049161) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.618665623165722, LR: 0.0003 +[2026-03-03 20:58:31] (step=0049162) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 9.618861279593034, LR: 0.0003 +[2026-03-03 20:58:39] (step=0049163) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.619056936020348, LR: 0.0003 +[2026-03-03 20:58:47] (step=0049164) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 9.619252592447662, LR: 0.0003 +[2026-03-03 20:58:55] (step=0049165) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 9.619448248874976, LR: 0.0003 +[2026-03-03 20:59:03] (step=0049166) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.61964390530229, LR: 0.0003 +[2026-03-03 20:59:11] (step=0049167) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.619839561729602, LR: 0.0003 +[2026-03-03 20:59:19] (step=0049168) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 9.620035218156916, LR: 0.0003 +[2026-03-03 20:59:27] (step=0049169) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.62023087458423, LR: 0.0003 +[2026-03-03 20:59:34] (step=0049170) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 9.620426531011544, LR: 0.0003 +[2026-03-03 20:59:42] (step=0049171) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.620622187438858, LR: 0.0003 +[2026-03-03 20:59:50] (step=0049172) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.62081784386617, LR: 0.0003 +[2026-03-03 20:59:58] (step=0049173) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.621013500293484, LR: 0.0003 +[2026-03-03 21:00:06] (step=0049174) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.621209156720798, LR: 0.0003 +[2026-03-03 21:00:14] (step=0049175) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.621404813148112, LR: 0.0003 +[2026-03-03 21:00:22] (step=0049176) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.621600469575425, LR: 0.0003 +[2026-03-03 21:00:29] (step=0049177) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.621796126002739, LR: 0.0003 +[2026-03-03 21:00:37] (step=0049178) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 9.621991782430053, LR: 0.0003 +[2026-03-03 21:00:45] (step=0049179) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.622187438857367, LR: 0.0003 +[2026-03-03 21:00:53] (step=0049180) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.62238309528468, LR: 0.0003 +[2026-03-03 21:01:01] (step=0049181) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 9.622578751711993, LR: 0.0003 +[2026-03-03 21:01:09] (step=0049182) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.622774408139307, LR: 0.0003 +[2026-03-03 21:01:17] (step=0049183) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 9.622970064566621, LR: 0.0003 +[2026-03-03 21:01:24] (step=0049184) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.623165720993935, LR: 0.0003 +[2026-03-03 21:01:32] (step=0049185) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.623361377421249, LR: 0.0003 +[2026-03-03 21:01:40] (step=0049186) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.623557033848561, LR: 0.0003 +[2026-03-03 21:01:48] (step=0049187) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 9.623752690275875, LR: 0.0003 +[2026-03-03 21:01:56] (step=0049188) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.62394834670319, LR: 0.0003 +[2026-03-03 21:02:04] (step=0049189) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.624144003130503, LR: 0.0003 +[2026-03-03 21:02:12] (step=0049190) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.624339659557817, LR: 0.0003 +[2026-03-03 21:02:19] (step=0049191) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 9.62453531598513, LR: 0.0003 +[2026-03-03 21:02:27] (step=0049192) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.624730972412443, LR: 0.0003 +[2026-03-03 21:02:35] (step=0049193) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.624926628839757, LR: 0.0003 +[2026-03-03 21:02:43] (step=0049194) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.625122285267071, LR: 0.0003 +[2026-03-03 21:02:51] (step=0049195) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.625317941694385, LR: 0.0003 +[2026-03-03 21:02:59] (step=0049196) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.625513598121698, LR: 0.0003 +[2026-03-03 21:03:07] (step=0049197) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.625709254549012, LR: 0.0003 +[2026-03-03 21:03:14] (step=0049198) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.625904910976326, LR: 0.0003 +[2026-03-03 21:03:22] (step=0049199) Train Loss: 0.4490, Train Steps/Sec: 0.12, Epoch: 9.62610056740364, LR: 0.0003 +[2026-03-03 21:03:30] (step=0049200) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 9.626296223830952, LR: 0.0003 +[2026-03-03 21:03:38] (step=0049201) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.626491880258266, LR: 0.0003 +[2026-03-03 21:03:46] (step=0049202) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.62668753668558, LR: 0.0003 +[2026-03-03 21:03:54] (step=0049203) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.626883193112894, LR: 0.0003 +[2026-03-03 21:04:02] (step=0049204) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 9.627078849540208, LR: 0.0003 +[2026-03-03 21:04:10] (step=0049205) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.62727450596752, LR: 0.0003 +[2026-03-03 21:04:17] (step=0049206) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.627470162394834, LR: 0.0003 +[2026-03-03 21:04:25] (step=0049207) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.627665818822148, LR: 0.0003 +[2026-03-03 21:04:33] (step=0049208) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 9.627861475249462, LR: 0.0003 +[2026-03-03 21:04:41] (step=0049209) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 9.628057131676776, LR: 0.0003 +[2026-03-03 21:04:49] (step=0049210) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 9.628252788104088, LR: 0.0003 +[2026-03-03 21:04:57] (step=0049211) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 9.628448444531402, LR: 0.0003 +[2026-03-03 21:05:05] (step=0049212) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.628644100958716, LR: 0.0003 +[2026-03-03 21:05:12] (step=0049213) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.62883975738603, LR: 0.0003 +[2026-03-03 21:05:20] (step=0049214) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.629035413813344, LR: 0.0003 +[2026-03-03 21:05:28] (step=0049215) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 9.629231070240657, LR: 0.0003 +[2026-03-03 21:05:36] (step=0049216) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.62942672666797, LR: 0.0003 +[2026-03-03 21:05:44] (step=0049217) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 9.629622383095285, LR: 0.0003 +[2026-03-03 21:05:52] (step=0049218) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.629818039522599, LR: 0.0003 +[2026-03-03 21:06:00] (step=0049219) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 9.630013695949913, LR: 0.0003 +[2026-03-03 21:06:07] (step=0049220) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 9.630209352377225, LR: 0.0003 +[2026-03-03 21:06:15] (step=0049221) Train Loss: 0.4479, Train Steps/Sec: 0.12, Epoch: 9.630405008804539, LR: 0.0003 +[2026-03-03 21:06:23] (step=0049222) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.630600665231853, LR: 0.0003 +[2026-03-03 21:06:31] (step=0049223) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.630796321659167, LR: 0.0003 +[2026-03-03 21:06:39] (step=0049224) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.630991978086481, LR: 0.0003 +[2026-03-03 21:06:47] (step=0049225) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.631187634513793, LR: 0.0003 +[2026-03-03 21:06:55] (step=0049226) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.631383290941107, LR: 0.0003 +[2026-03-03 21:07:03] (step=0049227) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 9.631578947368421, LR: 0.0003 +[2026-03-03 21:07:11] (step=0049228) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.631774603795735, LR: 0.0003 +[2026-03-03 21:07:18] (step=0049229) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.631970260223047, LR: 0.0003 +[2026-03-03 21:07:26] (step=0049230) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.632165916650361, LR: 0.0003 +[2026-03-03 21:07:34] (step=0049231) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 9.632361573077675, LR: 0.0003 +[2026-03-03 21:07:42] (step=0049232) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.63255722950499, LR: 0.0003 +[2026-03-03 21:07:50] (step=0049233) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.632752885932303, LR: 0.0003 +[2026-03-03 21:07:58] (step=0049234) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.632948542359616, LR: 0.0003 +[2026-03-03 21:08:06] (step=0049235) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.63314419878693, LR: 0.0003 +[2026-03-03 21:08:13] (step=0049236) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 9.633339855214244, LR: 0.0003 +[2026-03-03 21:08:21] (step=0049237) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.633535511641558, LR: 0.0003 +[2026-03-03 21:08:29] (step=0049238) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.633731168068872, LR: 0.0003 +[2026-03-03 21:08:37] (step=0049239) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.633926824496184, LR: 0.0003 +[2026-03-03 21:08:45] (step=0049240) Train Loss: 0.4266, Train Steps/Sec: 0.13, Epoch: 9.634122480923498, LR: 0.0003 +[2026-03-03 21:08:53] (step=0049241) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.634318137350812, LR: 0.0003 +[2026-03-03 21:09:01] (step=0049242) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.634513793778126, LR: 0.0003 +[2026-03-03 21:09:08] (step=0049243) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.63470945020544, LR: 0.0003 +[2026-03-03 21:09:16] (step=0049244) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.634905106632752, LR: 0.0003 +[2026-03-03 21:09:24] (step=0049245) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.635100763060066, LR: 0.0003 +[2026-03-03 21:09:32] (step=0049246) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.63529641948738, LR: 0.0003 +[2026-03-03 21:09:40] (step=0049247) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.635492075914694, LR: 0.0003 +[2026-03-03 21:09:48] (step=0049248) Train Loss: 0.4382, Train Steps/Sec: 0.12, Epoch: 9.635687732342008, LR: 0.0003 +[2026-03-03 21:09:56] (step=0049249) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.63588338876932, LR: 0.0003 +[2026-03-03 21:10:04] (step=0049250) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 9.636079045196634, LR: 0.0003 +[2026-03-03 21:10:12] (step=0049251) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 9.636274701623949, LR: 0.0003 +[2026-03-03 21:10:19] (step=0049252) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.636470358051263, LR: 0.0003 +[2026-03-03 21:10:27] (step=0049253) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 9.636666014478575, LR: 0.0003 +[2026-03-03 21:10:35] (step=0049254) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.636861670905889, LR: 0.0003 +[2026-03-03 21:10:43] (step=0049255) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 9.637057327333203, LR: 0.0003 +[2026-03-03 21:10:51] (step=0049256) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.637252983760517, LR: 0.0003 +[2026-03-03 21:10:59] (step=0049257) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.63744864018783, LR: 0.0003 +[2026-03-03 21:11:06] (step=0049258) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.637644296615143, LR: 0.0003 +[2026-03-03 21:11:14] (step=0049259) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 9.637839953042457, LR: 0.0003 +[2026-03-03 21:11:22] (step=0049260) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.638035609469771, LR: 0.0003 +[2026-03-03 21:11:30] (step=0049261) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.638231265897085, LR: 0.0003 +[2026-03-03 21:11:38] (step=0049262) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.638426922324399, LR: 0.0003 +[2026-03-03 21:11:46] (step=0049263) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.638622578751711, LR: 0.0003 +[2026-03-03 21:11:54] (step=0049264) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.638818235179025, LR: 0.0003 +[2026-03-03 21:12:01] (step=0049265) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.63901389160634, LR: 0.0003 +[2026-03-03 21:12:09] (step=0049266) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.639209548033653, LR: 0.0003 +[2026-03-03 21:12:17] (step=0049267) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.639405204460967, LR: 0.0003 +[2026-03-03 21:12:25] (step=0049268) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.63960086088828, LR: 0.0003 +[2026-03-03 21:12:33] (step=0049269) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.639796517315594, LR: 0.0003 +[2026-03-03 21:12:41] (step=0049270) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 9.639992173742908, LR: 0.0003 +[2026-03-03 21:12:49] (step=0049271) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 9.640187830170222, LR: 0.0003 +[2026-03-03 21:12:57] (step=0049272) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.640383486597536, LR: 0.0003 +[2026-03-03 21:13:04] (step=0049273) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.640579143024848, LR: 0.0003 +[2026-03-03 21:13:12] (step=0049274) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.640774799452162, LR: 0.0003 +[2026-03-03 21:13:20] (step=0049275) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.640970455879476, LR: 0.0003 +[2026-03-03 21:13:28] (step=0049276) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.64116611230679, LR: 0.0003 +[2026-03-03 21:13:36] (step=0049277) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.641361768734104, LR: 0.0003 +[2026-03-03 21:13:44] (step=0049278) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 9.641557425161416, LR: 0.0003 +[2026-03-03 21:13:52] (step=0049279) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.64175308158873, LR: 0.0003 +[2026-03-03 21:13:59] (step=0049280) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 9.641948738016044, LR: 0.0003 +[2026-03-03 21:14:07] (step=0049281) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.642144394443358, LR: 0.0003 +[2026-03-03 21:14:15] (step=0049282) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.64234005087067, LR: 0.0003 +[2026-03-03 21:14:23] (step=0049283) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.642535707297984, LR: 0.0003 +[2026-03-03 21:14:31] (step=0049284) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.642731363725298, LR: 0.0003 +[2026-03-03 21:14:39] (step=0049285) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 9.642927020152612, LR: 0.0003 +[2026-03-03 21:14:47] (step=0049286) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 9.643122676579926, LR: 0.0003 +[2026-03-03 21:14:55] (step=0049287) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.643318333007239, LR: 0.0003 +[2026-03-03 21:15:02] (step=0049288) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.643513989434553, LR: 0.0003 +[2026-03-03 21:15:10] (step=0049289) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.643709645861867, LR: 0.0003 +[2026-03-03 21:15:18] (step=0049290) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 9.64390530228918, LR: 0.0003 +[2026-03-03 21:15:26] (step=0049291) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.644100958716495, LR: 0.0003 +[2026-03-03 21:15:34] (step=0049292) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 9.644296615143807, LR: 0.0003 +[2026-03-03 21:15:42] (step=0049293) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.64449227157112, LR: 0.0003 +[2026-03-03 21:15:50] (step=0049294) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.644687927998435, LR: 0.0003 +[2026-03-03 21:15:57] (step=0049295) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.644883584425749, LR: 0.0003 +[2026-03-03 21:16:05] (step=0049296) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.645079240853063, LR: 0.0003 +[2026-03-03 21:16:13] (step=0049297) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 9.645274897280375, LR: 0.0003 +[2026-03-03 21:16:21] (step=0049298) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.645470553707689, LR: 0.0003 +[2026-03-03 21:16:29] (step=0049299) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.645666210135003, LR: 0.0003 +[2026-03-03 21:16:37] (step=0049300) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.645861866562317, LR: 0.0003 +[2026-03-03 21:16:45] (step=0049301) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.646057522989631, LR: 0.0003 +[2026-03-03 21:16:53] (step=0049302) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 9.646253179416943, LR: 0.0003 +[2026-03-03 21:17:00] (step=0049303) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.646448835844257, LR: 0.0003 +[2026-03-03 21:17:08] (step=0049304) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.646644492271571, LR: 0.0003 +[2026-03-03 21:17:16] (step=0049305) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 9.646840148698885, LR: 0.0003 +[2026-03-03 21:17:24] (step=0049306) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.647035805126198, LR: 0.0003 +[2026-03-03 21:17:32] (step=0049307) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.647231461553512, LR: 0.0003 +[2026-03-03 21:17:40] (step=0049308) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.647427117980826, LR: 0.0003 +[2026-03-03 21:17:48] (step=0049309) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.64762277440814, LR: 0.0003 +[2026-03-03 21:17:55] (step=0049310) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.647818430835454, LR: 0.0003 +[2026-03-03 21:18:03] (step=0049311) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.648014087262766, LR: 0.0003 +[2026-03-03 21:18:11] (step=0049312) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.64820974369008, LR: 0.0003 +[2026-03-03 21:18:19] (step=0049313) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.648405400117394, LR: 0.0003 +[2026-03-03 21:18:27] (step=0049314) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.648601056544708, LR: 0.0003 +[2026-03-03 21:18:35] (step=0049315) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.648796712972022, LR: 0.0003 +[2026-03-03 21:18:43] (step=0049316) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.648992369399334, LR: 0.0003 +[2026-03-03 21:18:51] (step=0049317) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.649188025826648, LR: 0.0003 +[2026-03-03 21:18:58] (step=0049318) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.649383682253962, LR: 0.0003 +[2026-03-03 21:19:06] (step=0049319) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.649579338681276, LR: 0.0003 +[2026-03-03 21:19:14] (step=0049320) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 9.64977499510859, LR: 0.0003 +[2026-03-03 21:19:22] (step=0049321) Train Loss: 0.4254, Train Steps/Sec: 0.13, Epoch: 9.649970651535902, LR: 0.0003 +[2026-03-03 21:19:30] (step=0049322) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 9.650166307963216, LR: 0.0003 +[2026-03-03 21:19:38] (step=0049323) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.65036196439053, LR: 0.0003 +[2026-03-03 21:19:46] (step=0049324) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.650557620817844, LR: 0.0003 +[2026-03-03 21:19:53] (step=0049325) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.650753277245158, LR: 0.0003 +[2026-03-03 21:20:01] (step=0049326) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.65094893367247, LR: 0.0003 +[2026-03-03 21:20:09] (step=0049327) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 9.651144590099785, LR: 0.0003 +[2026-03-03 21:20:17] (step=0049328) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 9.651340246527099, LR: 0.0003 +[2026-03-03 21:20:25] (step=0049329) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.651535902954413, LR: 0.0003 +[2026-03-03 21:20:33] (step=0049330) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.651731559381727, LR: 0.0003 +[2026-03-03 21:20:41] (step=0049331) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 9.651927215809039, LR: 0.0003 +[2026-03-03 21:20:48] (step=0049332) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 9.652122872236353, LR: 0.0003 +[2026-03-03 21:20:56] (step=0049333) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.652318528663667, LR: 0.0003 +[2026-03-03 21:21:04] (step=0049334) Train Loss: 0.4262, Train Steps/Sec: 0.13, Epoch: 9.65251418509098, LR: 0.0003 +[2026-03-03 21:21:12] (step=0049335) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.652709841518293, LR: 0.0003 +[2026-03-03 21:21:20] (step=0049336) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.652905497945607, LR: 0.0003 +[2026-03-03 21:21:28] (step=0049337) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.653101154372921, LR: 0.0003 +[2026-03-03 21:21:36] (step=0049338) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 9.653296810800235, LR: 0.0003 +[2026-03-03 21:21:43] (step=0049339) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.653492467227549, LR: 0.0003 +[2026-03-03 21:21:51] (step=0049340) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 9.653688123654861, LR: 0.0003 +[2026-03-03 21:21:59] (step=0049341) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 9.653883780082175, LR: 0.0003 +[2026-03-03 21:22:07] (step=0049342) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.65407943650949, LR: 0.0003 +[2026-03-03 21:22:15] (step=0049343) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.654275092936803, LR: 0.0003 +[2026-03-03 21:22:23] (step=0049344) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.654470749364117, LR: 0.0003 +[2026-03-03 21:22:31] (step=0049345) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.65466640579143, LR: 0.0003 +[2026-03-03 21:22:38] (step=0049346) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.654862062218744, LR: 0.0003 +[2026-03-03 21:22:46] (step=0049347) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.655057718646058, LR: 0.0003 +[2026-03-03 21:22:54] (step=0049348) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 9.655253375073372, LR: 0.0003 +[2026-03-03 21:23:02] (step=0049349) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.655449031500686, LR: 0.0003 +[2026-03-03 21:23:10] (step=0049350) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.655644687927998, LR: 0.0003 +[2026-03-03 21:23:18] (step=0049351) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.655840344355312, LR: 0.0003 +[2026-03-03 21:23:26] (step=0049352) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 9.656036000782626, LR: 0.0003 +[2026-03-03 21:23:34] (step=0049353) Train Loss: 0.4375, Train Steps/Sec: 0.12, Epoch: 9.65623165720994, LR: 0.0003 +[2026-03-03 21:23:42] (step=0049354) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.656427313637254, LR: 0.0003 +[2026-03-03 21:23:49] (step=0049355) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.656622970064566, LR: 0.0003 +[2026-03-03 21:23:57] (step=0049356) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.65681862649188, LR: 0.0003 +[2026-03-03 21:24:05] (step=0049357) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.657014282919194, LR: 0.0003 +[2026-03-03 21:24:13] (step=0049358) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.657209939346508, LR: 0.0003 +[2026-03-03 21:24:21] (step=0049359) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.65740559577382, LR: 0.0003 +[2026-03-03 21:24:29] (step=0049360) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.657601252201134, LR: 0.0003 +[2026-03-03 21:24:37] (step=0049361) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.657796908628448, LR: 0.0003 +[2026-03-03 21:24:44] (step=0049362) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.657992565055762, LR: 0.0003 +[2026-03-03 21:24:52] (step=0049363) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 9.658188221483076, LR: 0.0003 +[2026-03-03 21:25:00] (step=0049364) Train Loss: 0.4287, Train Steps/Sec: 0.12, Epoch: 9.658383877910389, LR: 0.0003 +[2026-03-03 21:25:08] (step=0049365) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 9.658579534337703, LR: 0.0003 +[2026-03-03 21:25:16] (step=0049366) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 9.658775190765017, LR: 0.0003 +[2026-03-03 21:25:24] (step=0049367) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.65897084719233, LR: 0.0003 +[2026-03-03 21:25:32] (step=0049368) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.659166503619645, LR: 0.0003 +[2026-03-03 21:25:40] (step=0049369) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.659362160046957, LR: 0.0003 +[2026-03-03 21:25:47] (step=0049370) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.65955781647427, LR: 0.0003 +[2026-03-03 21:25:55] (step=0049371) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 9.659753472901585, LR: 0.0003 +[2026-03-03 21:26:03] (step=0049372) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.659949129328899, LR: 0.0003 +[2026-03-03 21:26:11] (step=0049373) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 9.660144785756213, LR: 0.0003 +[2026-03-03 21:26:19] (step=0049374) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.660340442183525, LR: 0.0003 +[2026-03-03 21:26:27] (step=0049375) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.660536098610839, LR: 0.0003 +[2026-03-03 21:26:35] (step=0049376) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.660731755038153, LR: 0.0003 +[2026-03-03 21:26:42] (step=0049377) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.660927411465467, LR: 0.0003 +[2026-03-03 21:26:50] (step=0049378) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.661123067892781, LR: 0.0003 +[2026-03-03 21:26:58] (step=0049379) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 9.661318724320093, LR: 0.0003 +[2026-03-03 21:27:06] (step=0049380) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.661514380747407, LR: 0.0003 +[2026-03-03 21:27:14] (step=0049381) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 9.661710037174721, LR: 0.0003 +[2026-03-03 21:27:22] (step=0049382) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 9.661905693602035, LR: 0.0003 +[2026-03-03 21:27:30] (step=0049383) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.66210135002935, LR: 0.0003 +[2026-03-03 21:27:38] (step=0049384) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.662297006456662, LR: 0.0003 +[2026-03-03 21:27:45] (step=0049385) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 9.662492662883976, LR: 0.0003 +[2026-03-03 21:27:53] (step=0049386) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.66268831931129, LR: 0.0003 +[2026-03-03 21:28:01] (step=0049387) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.662883975738604, LR: 0.0003 +[2026-03-03 21:28:09] (step=0049388) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.663079632165916, LR: 0.0003 +[2026-03-03 21:28:17] (step=0049389) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.66327528859323, LR: 0.0003 +[2026-03-03 21:28:25] (step=0049390) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 9.663470945020544, LR: 0.0003 +[2026-03-03 21:28:33] (step=0049391) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.663666601447858, LR: 0.0003 +[2026-03-03 21:28:40] (step=0049392) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.663862257875172, LR: 0.0003 +[2026-03-03 21:28:48] (step=0049393) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.664057914302484, LR: 0.0003 +[2026-03-03 21:28:56] (step=0049394) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.664253570729798, LR: 0.0003 +[2026-03-03 21:29:04] (step=0049395) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 9.664449227157112, LR: 0.0003 +[2026-03-03 21:29:12] (step=0049396) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 9.664644883584426, LR: 0.0003 +[2026-03-03 21:29:20] (step=0049397) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.66484054001174, LR: 0.0003 +[2026-03-03 21:29:28] (step=0049398) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 9.665036196439052, LR: 0.0003 +[2026-03-03 21:29:35] (step=0049399) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 9.665231852866366, LR: 0.0003 +[2026-03-03 21:29:43] (step=0049400) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.66542750929368, LR: 0.0003 +[2026-03-03 21:29:51] (step=0049401) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 9.665623165720994, LR: 0.0003 +[2026-03-03 21:29:59] (step=0049402) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.665818822148308, LR: 0.0003 +[2026-03-03 21:30:07] (step=0049403) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.66601447857562, LR: 0.0003 +[2026-03-03 21:30:15] (step=0049404) Train Loss: 0.4525, Train Steps/Sec: 0.12, Epoch: 9.666210135002935, LR: 0.0003 +[2026-03-03 21:30:23] (step=0049405) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.666405791430249, LR: 0.0003 +[2026-03-03 21:30:31] (step=0049406) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.666601447857563, LR: 0.0003 +[2026-03-03 21:30:39] (step=0049407) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.666797104284877, LR: 0.0003 +[2026-03-03 21:30:46] (step=0049408) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.666992760712189, LR: 0.0003 +[2026-03-03 21:30:54] (step=0049409) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.667188417139503, LR: 0.0003 +[2026-03-03 21:31:02] (step=0049410) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.667384073566817, LR: 0.0003 +[2026-03-03 21:31:10] (step=0049411) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.66757972999413, LR: 0.0003 +[2026-03-03 21:31:18] (step=0049412) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.667775386421443, LR: 0.0003 +[2026-03-03 21:31:26] (step=0049413) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.667971042848757, LR: 0.0003 +[2026-03-03 21:31:34] (step=0049414) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 9.668166699276071, LR: 0.0003 +[2026-03-03 21:31:42] (step=0049415) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.668362355703385, LR: 0.0003 +[2026-03-03 21:31:49] (step=0049416) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.668558012130699, LR: 0.0003 +[2026-03-03 21:31:57] (step=0049417) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 9.668753668558011, LR: 0.0003 +[2026-03-03 21:32:05] (step=0049418) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.668949324985325, LR: 0.0003 +[2026-03-03 21:32:13] (step=0049419) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.66914498141264, LR: 0.0003 +[2026-03-03 21:32:21] (step=0049420) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.669340637839953, LR: 0.0003 +[2026-03-03 21:32:29] (step=0049421) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.669536294267267, LR: 0.0003 +[2026-03-03 21:32:37] (step=0049422) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 9.66973195069458, LR: 0.0003 +[2026-03-03 21:32:44] (step=0049423) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.669927607121894, LR: 0.0003 +[2026-03-03 21:32:52] (step=0049424) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 9.670123263549208, LR: 0.0003 +[2026-03-03 21:33:00] (step=0049425) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.670318919976522, LR: 0.0003 +[2026-03-03 21:33:08] (step=0049426) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 9.670514576403836, LR: 0.0003 +[2026-03-03 21:33:16] (step=0049427) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.670710232831148, LR: 0.0003 +[2026-03-03 21:33:24] (step=0049428) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.670905889258462, LR: 0.0003 +[2026-03-03 21:33:32] (step=0049429) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.671101545685776, LR: 0.0003 +[2026-03-03 21:33:39] (step=0049430) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.67129720211309, LR: 0.0003 +[2026-03-03 21:33:47] (step=0049431) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 9.671492858540404, LR: 0.0003 +[2026-03-03 21:33:55] (step=0049432) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 9.671688514967716, LR: 0.0003 +[2026-03-03 21:34:03] (step=0049433) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.67188417139503, LR: 0.0003 +[2026-03-03 21:34:11] (step=0049434) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.672079827822344, LR: 0.0003 +[2026-03-03 21:34:19] (step=0049435) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.672275484249658, LR: 0.0003 +[2026-03-03 21:34:27] (step=0049436) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 9.672471140676972, LR: 0.0003 +[2026-03-03 21:34:34] (step=0049437) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.672666797104284, LR: 0.0003 +[2026-03-03 21:34:42] (step=0049438) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 9.672862453531598, LR: 0.0003 +[2026-03-03 21:34:50] (step=0049439) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.673058109958912, LR: 0.0003 +[2026-03-03 21:34:58] (step=0049440) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.673253766386226, LR: 0.0003 +[2026-03-03 21:35:06] (step=0049441) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.673449422813539, LR: 0.0003 +[2026-03-03 21:35:14] (step=0049442) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.673645079240853, LR: 0.0003 +[2026-03-03 21:35:22] (step=0049443) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.673840735668167, LR: 0.0003 +[2026-03-03 21:35:29] (step=0049444) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.67403639209548, LR: 0.0003 +[2026-03-03 21:35:37] (step=0049445) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.674232048522795, LR: 0.0003 +[2026-03-03 21:35:45] (step=0049446) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.674427704950107, LR: 0.0003 +[2026-03-03 21:35:53] (step=0049447) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 9.67462336137742, LR: 0.0003 +[2026-03-03 21:36:01] (step=0049448) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 9.674819017804735, LR: 0.0003 +[2026-03-03 21:36:09] (step=0049449) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.675014674232049, LR: 0.0003 +[2026-03-03 21:36:17] (step=0049450) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.675210330659363, LR: 0.0003 +[2026-03-03 21:36:24] (step=0049451) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 9.675405987086675, LR: 0.0003 +[2026-03-03 21:36:32] (step=0049452) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 9.675601643513989, LR: 0.0003 +[2026-03-03 21:36:40] (step=0049453) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.675797299941303, LR: 0.0003 +[2026-03-03 21:36:48] (step=0049454) Train Loss: 0.4245, Train Steps/Sec: 0.13, Epoch: 9.675992956368617, LR: 0.0003 +[2026-03-03 21:36:56] (step=0049455) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 9.676188612795931, LR: 0.0003 +[2026-03-03 21:37:04] (step=0049456) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 9.676384269223243, LR: 0.0003 +[2026-03-03 21:37:12] (step=0049457) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.676579925650557, LR: 0.0003 +[2026-03-03 21:37:20] (step=0049458) Train Loss: 0.4375, Train Steps/Sec: 0.12, Epoch: 9.676775582077871, LR: 0.0003 +[2026-03-03 21:37:27] (step=0049459) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.676971238505185, LR: 0.0003 +[2026-03-03 21:37:35] (step=0049460) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.6771668949325, LR: 0.0003 +[2026-03-03 21:37:43] (step=0049461) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.677362551359812, LR: 0.0003 +[2026-03-03 21:37:51] (step=0049462) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.677558207787126, LR: 0.0003 +[2026-03-03 21:37:59] (step=0049463) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.67775386421444, LR: 0.0003 +[2026-03-03 21:38:07] (step=0049464) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 9.677949520641754, LR: 0.0003 +[2026-03-03 21:38:15] (step=0049465) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 9.678145177069066, LR: 0.0003 +[2026-03-03 21:38:23] (step=0049466) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.67834083349638, LR: 0.0003 +[2026-03-03 21:38:31] (step=0049467) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.678536489923694, LR: 0.0003 +[2026-03-03 21:38:38] (step=0049468) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.678732146351008, LR: 0.0003 +[2026-03-03 21:38:46] (step=0049469) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.678927802778322, LR: 0.0003 +[2026-03-03 21:38:54] (step=0049470) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.679123459205634, LR: 0.0003 +[2026-03-03 21:39:02] (step=0049471) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.679319115632948, LR: 0.0003 +[2026-03-03 21:39:10] (step=0049472) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.679514772060262, LR: 0.0003 +[2026-03-03 21:39:18] (step=0049473) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 9.679710428487576, LR: 0.0003 +[2026-03-03 21:39:26] (step=0049474) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.67990608491489, LR: 0.0003 +[2026-03-03 21:39:33] (step=0049475) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 9.680101741342202, LR: 0.0003 +[2026-03-03 21:39:41] (step=0049476) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.680297397769516, LR: 0.0003 +[2026-03-03 21:39:49] (step=0049477) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.68049305419683, LR: 0.0003 +[2026-03-03 21:39:57] (step=0049478) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 9.680688710624144, LR: 0.0003 +[2026-03-03 21:40:05] (step=0049479) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.680884367051458, LR: 0.0003 +[2026-03-03 21:40:13] (step=0049480) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.68108002347877, LR: 0.0003 +[2026-03-03 21:40:21] (step=0049481) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.681275679906085, LR: 0.0003 +[2026-03-03 21:40:28] (step=0049482) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.681471336333399, LR: 0.0003 +[2026-03-03 21:40:36] (step=0049483) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.681666992760713, LR: 0.0003 +[2026-03-03 21:40:44] (step=0049484) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.681862649188027, LR: 0.0003 +[2026-03-03 21:40:52] (step=0049485) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.682058305615339, LR: 0.0003 +[2026-03-03 21:41:00] (step=0049486) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.682253962042653, LR: 0.0003 +[2026-03-03 21:41:08] (step=0049487) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.682449618469967, LR: 0.0003 +[2026-03-03 21:41:16] (step=0049488) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 9.68264527489728, LR: 0.0003 +[2026-03-03 21:41:23] (step=0049489) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.682840931324595, LR: 0.0003 +[2026-03-03 21:41:31] (step=0049490) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.683036587751907, LR: 0.0003 +[2026-03-03 21:41:39] (step=0049491) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 9.683232244179221, LR: 0.0003 +[2026-03-03 21:41:47] (step=0049492) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.683427900606535, LR: 0.0003 +[2026-03-03 21:41:55] (step=0049493) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 9.68362355703385, LR: 0.0003 +[2026-03-03 21:42:03] (step=0049494) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.683819213461161, LR: 0.0003 +[2026-03-03 21:42:11] (step=0049495) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.684014869888475, LR: 0.0003 +[2026-03-03 21:42:18] (step=0049496) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 9.68421052631579, LR: 0.0003 +[2026-03-03 21:42:26] (step=0049497) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.684406182743103, LR: 0.0003 +[2026-03-03 21:42:34] (step=0049498) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.684601839170417, LR: 0.0003 +[2026-03-03 21:42:42] (step=0049499) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.68479749559773, LR: 0.0003 +[2026-03-03 21:42:50] (step=0049500) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 9.684993152025044, LR: 0.0003 +[2026-03-03 21:42:50] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0049500/ +[2026-03-03 21:42:58] (step=0049501) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 9.685188808452358, LR: 0.0003 +[2026-03-03 21:43:06] (step=0049502) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.685384464879672, LR: 0.0003 +[2026-03-03 21:43:13] (step=0049503) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.685580121306986, LR: 0.0003 +[2026-03-03 21:43:21] (step=0049504) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.685775777734298, LR: 0.0003 +[2026-03-03 21:43:29] (step=0049505) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.685971434161612, LR: 0.0003 +[2026-03-03 21:43:37] (step=0049506) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.686167090588926, LR: 0.0003 +[2026-03-03 21:43:45] (step=0049507) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.68636274701624, LR: 0.0003 +[2026-03-03 21:43:53] (step=0049508) Train Loss: 0.4514, Train Steps/Sec: 0.12, Epoch: 9.686558403443554, LR: 0.0003 +[2026-03-03 21:44:01] (step=0049509) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.686754059870866, LR: 0.0003 +[2026-03-03 21:44:09] (step=0049510) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.68694971629818, LR: 0.0003 +[2026-03-03 21:44:17] (step=0049511) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.687145372725494, LR: 0.0003 +[2026-03-03 21:44:25] (step=0049512) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.687341029152808, LR: 0.0003 +[2026-03-03 21:44:32] (step=0049513) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 9.687536685580122, LR: 0.0003 +[2026-03-03 21:44:40] (step=0049514) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.687732342007434, LR: 0.0003 +[2026-03-03 21:44:48] (step=0049515) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.687927998434748, LR: 0.0003 +[2026-03-03 21:44:56] (step=0049516) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 9.688123654862062, LR: 0.0003 +[2026-03-03 21:45:04] (step=0049517) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 9.688319311289376, LR: 0.0003 +[2026-03-03 21:45:12] (step=0049518) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.688514967716689, LR: 0.0003 +[2026-03-03 21:45:19] (step=0049519) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.688710624144003, LR: 0.0003 +[2026-03-03 21:45:27] (step=0049520) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.688906280571317, LR: 0.0003 +[2026-03-03 21:45:35] (step=0049521) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.68910193699863, LR: 0.0003 +[2026-03-03 21:45:43] (step=0049522) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.689297593425945, LR: 0.0003 +[2026-03-03 21:45:51] (step=0049523) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.689493249853257, LR: 0.0003 +[2026-03-03 21:45:59] (step=0049524) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.689688906280571, LR: 0.0003 +[2026-03-03 21:46:07] (step=0049525) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.689884562707885, LR: 0.0003 +[2026-03-03 21:46:14] (step=0049526) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.690080219135199, LR: 0.0003 +[2026-03-03 21:46:22] (step=0049527) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 9.690275875562513, LR: 0.0003 +[2026-03-03 21:46:30] (step=0049528) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 9.690471531989825, LR: 0.0003 +[2026-03-03 21:46:38] (step=0049529) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.69066718841714, LR: 0.0003 +[2026-03-03 21:46:46] (step=0049530) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 9.690862844844453, LR: 0.0003 +[2026-03-03 21:46:54] (step=0049531) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.691058501271767, LR: 0.0003 +[2026-03-03 21:47:02] (step=0049532) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.691254157699081, LR: 0.0003 +[2026-03-03 21:47:09] (step=0049533) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.691449814126393, LR: 0.0003 +[2026-03-03 21:47:17] (step=0049534) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 9.691645470553707, LR: 0.0003 +[2026-03-03 21:47:25] (step=0049535) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 9.691841126981021, LR: 0.0003 +[2026-03-03 21:47:33] (step=0049536) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.692036783408335, LR: 0.0003 +[2026-03-03 21:47:41] (step=0049537) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.69223243983565, LR: 0.0003 +[2026-03-03 21:47:49] (step=0049538) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.692428096262962, LR: 0.0003 +[2026-03-03 21:47:57] (step=0049539) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.692623752690276, LR: 0.0003 +[2026-03-03 21:48:04] (step=0049540) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 9.69281940911759, LR: 0.0003 +[2026-03-03 21:48:12] (step=0049541) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.693015065544904, LR: 0.0003 +[2026-03-03 21:48:20] (step=0049542) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.693210721972218, LR: 0.0003 +[2026-03-03 21:48:28] (step=0049543) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.69340637839953, LR: 0.0003 +[2026-03-03 21:48:36] (step=0049544) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.693602034826844, LR: 0.0003 +[2026-03-03 21:48:44] (step=0049545) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.693797691254158, LR: 0.0003 +[2026-03-03 21:48:52] (step=0049546) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.693993347681472, LR: 0.0003 +[2026-03-03 21:48:59] (step=0049547) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.694189004108784, LR: 0.0003 +[2026-03-03 21:49:07] (step=0049548) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 9.694384660536098, LR: 0.0003 +[2026-03-03 21:49:15] (step=0049549) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.694580316963412, LR: 0.0003 +[2026-03-03 21:49:23] (step=0049550) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.694775973390726, LR: 0.0003 +[2026-03-03 21:49:31] (step=0049551) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 9.69497162981804, LR: 0.0003 +[2026-03-03 21:49:39] (step=0049552) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.695167286245352, LR: 0.0003 +[2026-03-03 21:49:47] (step=0049553) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 9.695362942672666, LR: 0.0003 +[2026-03-03 21:49:54] (step=0049554) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.69555859909998, LR: 0.0003 +[2026-03-03 21:50:02] (step=0049555) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.695754255527294, LR: 0.0003 +[2026-03-03 21:50:10] (step=0049556) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 9.695949911954608, LR: 0.0003 +[2026-03-03 21:50:18] (step=0049557) Train Loss: 0.4460, Train Steps/Sec: 0.12, Epoch: 9.69614556838192, LR: 0.0003 +[2026-03-03 21:50:26] (step=0049558) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 9.696341224809235, LR: 0.0003 +[2026-03-03 21:50:34] (step=0049559) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.696536881236549, LR: 0.0003 +[2026-03-03 21:50:42] (step=0049560) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 9.696732537663863, LR: 0.0003 +[2026-03-03 21:50:50] (step=0049561) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.696928194091177, LR: 0.0003 +[2026-03-03 21:50:58] (step=0049562) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.697123850518489, LR: 0.0003 +[2026-03-03 21:51:05] (step=0049563) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.697319506945803, LR: 0.0003 +[2026-03-03 21:51:13] (step=0049564) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.697515163373117, LR: 0.0003 +[2026-03-03 21:51:21] (step=0049565) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.697710819800431, LR: 0.0003 +[2026-03-03 21:51:29] (step=0049566) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.697906476227745, LR: 0.0003 +[2026-03-03 21:51:37] (step=0049567) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 9.698102132655057, LR: 0.0003 +[2026-03-03 21:51:45] (step=0049568) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.698297789082371, LR: 0.0003 +[2026-03-03 21:51:53] (step=0049569) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.698493445509685, LR: 0.0003 +[2026-03-03 21:52:01] (step=0049570) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 9.698689101937, LR: 0.0003 +[2026-03-03 21:52:08] (step=0049571) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.698884758364311, LR: 0.0003 +[2026-03-03 21:52:16] (step=0049572) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.699080414791625, LR: 0.0003 +[2026-03-03 21:52:24] (step=0049573) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.69927607121894, LR: 0.0003 +[2026-03-03 21:52:32] (step=0049574) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.699471727646253, LR: 0.0003 +[2026-03-03 21:52:40] (step=0049575) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.699667384073567, LR: 0.0003 +[2026-03-03 21:52:48] (step=0049576) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 9.69986304050088, LR: 0.0003 +[2026-03-03 21:52:56] (step=0049577) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.700058696928194, LR: 0.0003 +[2026-03-03 21:53:03] (step=0049578) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.700254353355508, LR: 0.0003 +[2026-03-03 21:53:11] (step=0049579) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.700450009782822, LR: 0.0003 +[2026-03-03 21:53:19] (step=0049580) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 9.700645666210136, LR: 0.0003 +[2026-03-03 21:53:27] (step=0049581) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.700841322637448, LR: 0.0003 +[2026-03-03 21:53:35] (step=0049582) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.701036979064762, LR: 0.0003 +[2026-03-03 21:53:43] (step=0049583) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.701232635492076, LR: 0.0003 +[2026-03-03 21:53:51] (step=0049584) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 9.70142829191939, LR: 0.0003 +[2026-03-03 21:53:58] (step=0049585) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.701623948346704, LR: 0.0003 +[2026-03-03 21:54:06] (step=0049586) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.701819604774016, LR: 0.0003 +[2026-03-03 21:54:14] (step=0049587) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.70201526120133, LR: 0.0003 +[2026-03-03 21:54:22] (step=0049588) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.702210917628644, LR: 0.0003 +[2026-03-03 21:54:30] (step=0049589) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.702406574055958, LR: 0.0003 +[2026-03-03 21:54:38] (step=0049590) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.702602230483272, LR: 0.0003 +[2026-03-03 21:54:46] (step=0049591) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.702797886910584, LR: 0.0003 +[2026-03-03 21:54:53] (step=0049592) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.702993543337898, LR: 0.0003 +[2026-03-03 21:55:01] (step=0049593) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.703189199765212, LR: 0.0003 +[2026-03-03 21:55:09] (step=0049594) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.703384856192526, LR: 0.0003 +[2026-03-03 21:55:17] (step=0049595) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.70358051261984, LR: 0.0003 +[2026-03-03 21:55:25] (step=0049596) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.703776169047153, LR: 0.0003 +[2026-03-03 21:55:33] (step=0049597) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 9.703971825474467, LR: 0.0003 +[2026-03-03 21:55:41] (step=0049598) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.70416748190178, LR: 0.0003 +[2026-03-03 21:55:49] (step=0049599) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.704363138329095, LR: 0.0003 +[2026-03-03 21:55:56] (step=0049600) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 9.704558794756407, LR: 0.0003 +[2026-03-03 21:56:04] (step=0049601) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.704754451183721, LR: 0.0003 +[2026-03-03 21:56:12] (step=0049602) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 9.704950107611035, LR: 0.0003 +[2026-03-03 21:56:20] (step=0049603) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.705145764038349, LR: 0.0003 +[2026-03-03 21:56:28] (step=0049604) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.705341420465663, LR: 0.0003 +[2026-03-03 21:56:36] (step=0049605) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.705537076892975, LR: 0.0003 +[2026-03-03 21:56:44] (step=0049606) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.70573273332029, LR: 0.0003 +[2026-03-03 21:56:52] (step=0049607) Train Loss: 0.4418, Train Steps/Sec: 0.12, Epoch: 9.705928389747603, LR: 0.0003 +[2026-03-03 21:57:00] (step=0049608) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.706124046174917, LR: 0.0003 +[2026-03-03 21:57:07] (step=0049609) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.706319702602231, LR: 0.0003 +[2026-03-03 21:57:15] (step=0049610) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.706515359029543, LR: 0.0003 +[2026-03-03 21:57:23] (step=0049611) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 9.706711015456857, LR: 0.0003 +[2026-03-03 21:57:31] (step=0049612) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.706906671884171, LR: 0.0003 +[2026-03-03 21:57:39] (step=0049613) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 9.707102328311485, LR: 0.0003 +[2026-03-03 21:57:47] (step=0049614) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 9.7072979847388, LR: 0.0003 +[2026-03-03 21:57:55] (step=0049615) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.707493641166112, LR: 0.0003 +[2026-03-03 21:58:02] (step=0049616) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 9.707689297593426, LR: 0.0003 +[2026-03-03 21:58:10] (step=0049617) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.70788495402074, LR: 0.0003 +[2026-03-03 21:58:18] (step=0049618) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 9.708080610448054, LR: 0.0003 +[2026-03-03 21:58:26] (step=0049619) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.708276266875368, LR: 0.0003 +[2026-03-03 21:58:34] (step=0049620) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.70847192330268, LR: 0.0003 +[2026-03-03 21:58:42] (step=0049621) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 9.708667579729994, LR: 0.0003 +[2026-03-03 21:58:50] (step=0049622) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.708863236157308, LR: 0.0003 +[2026-03-03 21:58:58] (step=0049623) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.709058892584622, LR: 0.0003 +[2026-03-03 21:59:05] (step=0049624) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.709254549011934, LR: 0.0003 +[2026-03-03 21:59:13] (step=0049625) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.709450205439248, LR: 0.0003 +[2026-03-03 21:59:21] (step=0049626) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 9.709645861866562, LR: 0.0003 +[2026-03-03 21:59:29] (step=0049627) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 9.709841518293876, LR: 0.0003 +[2026-03-03 21:59:37] (step=0049628) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.71003717472119, LR: 0.0003 +[2026-03-03 21:59:45] (step=0049629) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 9.710232831148502, LR: 0.0003 +[2026-03-03 21:59:53] (step=0049630) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.710428487575816, LR: 0.0003 +[2026-03-03 22:00:00] (step=0049631) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 9.71062414400313, LR: 0.0003 +[2026-03-03 22:00:08] (step=0049632) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.710819800430444, LR: 0.0003 +[2026-03-03 22:00:16] (step=0049633) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.711015456857758, LR: 0.0003 +[2026-03-03 22:00:24] (step=0049634) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 9.71121111328507, LR: 0.0003 +[2026-03-03 22:00:32] (step=0049635) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.711406769712385, LR: 0.0003 +[2026-03-03 22:00:40] (step=0049636) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 9.711602426139699, LR: 0.0003 +[2026-03-03 22:00:48] (step=0049637) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 9.711798082567013, LR: 0.0003 +[2026-03-03 22:00:55] (step=0049638) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.711993738994327, LR: 0.0003 +[2026-03-03 22:01:03] (step=0049639) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 9.712189395421639, LR: 0.0003 +[2026-03-03 22:01:11] (step=0049640) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.712385051848953, LR: 0.0003 +[2026-03-03 22:01:19] (step=0049641) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.712580708276267, LR: 0.0003 +[2026-03-03 22:01:27] (step=0049642) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.712776364703581, LR: 0.0003 +[2026-03-03 22:01:35] (step=0049643) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.712972021130895, LR: 0.0003 +[2026-03-03 22:01:43] (step=0049644) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.713167677558207, LR: 0.0003 +[2026-03-03 22:01:50] (step=0049645) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.713363333985521, LR: 0.0003 +[2026-03-03 22:01:58] (step=0049646) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.713558990412835, LR: 0.0003 +[2026-03-03 22:02:06] (step=0049647) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.71375464684015, LR: 0.0003 +[2026-03-03 22:02:14] (step=0049648) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.713950303267461, LR: 0.0003 +[2026-03-03 22:02:22] (step=0049649) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.714145959694775, LR: 0.0003 +[2026-03-03 22:02:30] (step=0049650) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.71434161612209, LR: 0.0003 +[2026-03-03 22:02:38] (step=0049651) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.714537272549403, LR: 0.0003 +[2026-03-03 22:02:45] (step=0049652) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.714732928976717, LR: 0.0003 +[2026-03-03 22:02:53] (step=0049653) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 9.71492858540403, LR: 0.0003 +[2026-03-03 22:03:01] (step=0049654) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.715124241831344, LR: 0.0003 +[2026-03-03 22:03:09] (step=0049655) Train Loss: 0.4481, Train Steps/Sec: 0.12, Epoch: 9.715319898258658, LR: 0.0003 +[2026-03-03 22:03:17] (step=0049656) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.715515554685972, LR: 0.0003 +[2026-03-03 22:03:25] (step=0049657) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.715711211113286, LR: 0.0003 +[2026-03-03 22:03:33] (step=0049658) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.715906867540598, LR: 0.0003 +[2026-03-03 22:03:41] (step=0049659) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.716102523967912, LR: 0.0003 +[2026-03-03 22:03:49] (step=0049660) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.716298180395226, LR: 0.0003 +[2026-03-03 22:03:56] (step=0049661) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.71649383682254, LR: 0.0003 +[2026-03-03 22:04:04] (step=0049662) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 9.716689493249854, LR: 0.0003 +[2026-03-03 22:04:12] (step=0049663) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.716885149677166, LR: 0.0003 +[2026-03-03 22:04:20] (step=0049664) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.71708080610448, LR: 0.0003 +[2026-03-03 22:04:28] (step=0049665) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 9.717276462531794, LR: 0.0003 +[2026-03-03 22:04:36] (step=0049666) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.717472118959108, LR: 0.0003 +[2026-03-03 22:04:44] (step=0049667) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.717667775386422, LR: 0.0003 +[2026-03-03 22:04:51] (step=0049668) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 9.717863431813734, LR: 0.0003 +[2026-03-03 22:04:59] (step=0049669) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.718059088241048, LR: 0.0003 +[2026-03-03 22:05:07] (step=0049670) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.718254744668362, LR: 0.0003 +[2026-03-03 22:05:15] (step=0049671) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.718450401095676, LR: 0.0003 +[2026-03-03 22:05:23] (step=0049672) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.71864605752299, LR: 0.0003 +[2026-03-03 22:05:31] (step=0049673) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.718841713950303, LR: 0.0003 +[2026-03-03 22:05:39] (step=0049674) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.719037370377617, LR: 0.0003 +[2026-03-03 22:05:47] (step=0049675) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.71923302680493, LR: 0.0003 +[2026-03-03 22:05:54] (step=0049676) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 9.719428683232245, LR: 0.0003 +[2026-03-03 22:06:02] (step=0049677) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 9.719624339659557, LR: 0.0003 +[2026-03-03 22:06:10] (step=0049678) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.719819996086871, LR: 0.0003 +[2026-03-03 22:06:18] (step=0049679) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.720015652514185, LR: 0.0003 +[2026-03-03 22:06:26] (step=0049680) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.720211308941499, LR: 0.0003 +[2026-03-03 22:06:34] (step=0049681) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 9.720406965368813, LR: 0.0003 +[2026-03-03 22:06:42] (step=0049682) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.720602621796125, LR: 0.0003 +[2026-03-03 22:06:49] (step=0049683) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 9.72079827822344, LR: 0.0003 +[2026-03-03 22:06:57] (step=0049684) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.720993934650753, LR: 0.0003 +[2026-03-03 22:07:05] (step=0049685) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.721189591078067, LR: 0.0003 +[2026-03-03 22:07:13] (step=0049686) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 9.721385247505381, LR: 0.0003 +[2026-03-03 22:07:21] (step=0049687) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.721580903932693, LR: 0.0003 +[2026-03-03 22:07:29] (step=0049688) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.721776560360007, LR: 0.0003 +[2026-03-03 22:07:37] (step=0049689) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.721972216787321, LR: 0.0003 +[2026-03-03 22:07:44] (step=0049690) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.722167873214635, LR: 0.0003 +[2026-03-03 22:07:52] (step=0049691) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 9.72236352964195, LR: 0.0003 +[2026-03-03 22:08:00] (step=0049692) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 9.722559186069262, LR: 0.0003 +[2026-03-03 22:08:08] (step=0049693) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.722754842496576, LR: 0.0003 +[2026-03-03 22:08:16] (step=0049694) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.72295049892389, LR: 0.0003 +[2026-03-03 22:08:24] (step=0049695) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.723146155351204, LR: 0.0003 +[2026-03-03 22:08:32] (step=0049696) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.723341811778518, LR: 0.0003 +[2026-03-03 22:08:39] (step=0049697) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 9.72353746820583, LR: 0.0003 +[2026-03-03 22:08:47] (step=0049698) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.723733124633144, LR: 0.0003 +[2026-03-03 22:08:55] (step=0049699) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.723928781060458, LR: 0.0003 +[2026-03-03 22:09:03] (step=0049700) Train Loss: 0.4524, Train Steps/Sec: 0.12, Epoch: 9.724124437487772, LR: 0.0003 +[2026-03-03 22:09:11] (step=0049701) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.724320093915084, LR: 0.0003 +[2026-03-03 22:09:19] (step=0049702) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 9.724515750342398, LR: 0.0003 +[2026-03-03 22:09:27] (step=0049703) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.724711406769712, LR: 0.0003 +[2026-03-03 22:09:35] (step=0049704) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.724907063197026, LR: 0.0003 +[2026-03-03 22:09:43] (step=0049705) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.72510271962434, LR: 0.0003 +[2026-03-03 22:09:50] (step=0049706) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.725298376051652, LR: 0.0003 +[2026-03-03 22:09:58] (step=0049707) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.725494032478966, LR: 0.0003 +[2026-03-03 22:10:06] (step=0049708) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.72568968890628, LR: 0.0003 +[2026-03-03 22:10:14] (step=0049709) Train Loss: 0.4368, Train Steps/Sec: 0.12, Epoch: 9.725885345333595, LR: 0.0003 +[2026-03-03 22:10:22] (step=0049710) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.726081001760909, LR: 0.0003 +[2026-03-03 22:10:30] (step=0049711) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.72627665818822, LR: 0.0003 +[2026-03-03 22:10:38] (step=0049712) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.726472314615535, LR: 0.0003 +[2026-03-03 22:10:46] (step=0049713) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.726667971042849, LR: 0.0003 +[2026-03-03 22:10:53] (step=0049714) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.726863627470163, LR: 0.0003 +[2026-03-03 22:11:01] (step=0049715) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.727059283897477, LR: 0.0003 +[2026-03-03 22:11:09] (step=0049716) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 9.727254940324789, LR: 0.0003 +[2026-03-03 22:11:17] (step=0049717) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.727450596752103, LR: 0.0003 +[2026-03-03 22:11:25] (step=0049718) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.727646253179417, LR: 0.0003 +[2026-03-03 22:11:33] (step=0049719) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.727841909606731, LR: 0.0003 +[2026-03-03 22:11:41] (step=0049720) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 9.728037566034045, LR: 0.0003 +[2026-03-03 22:11:49] (step=0049721) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.728233222461357, LR: 0.0003 +[2026-03-03 22:11:56] (step=0049722) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.728428878888671, LR: 0.0003 +[2026-03-03 22:12:04] (step=0049723) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.728624535315985, LR: 0.0003 +[2026-03-03 22:12:12] (step=0049724) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 9.7288201917433, LR: 0.0003 +[2026-03-03 22:12:20] (step=0049725) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 9.729015848170613, LR: 0.0003 +[2026-03-03 22:12:28] (step=0049726) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.729211504597925, LR: 0.0003 +[2026-03-03 22:12:36] (step=0049727) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.72940716102524, LR: 0.0003 +[2026-03-03 22:12:44] (step=0049728) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.729602817452554, LR: 0.0003 +[2026-03-03 22:12:51] (step=0049729) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.729798473879868, LR: 0.0003 +[2026-03-03 22:12:59] (step=0049730) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.72999413030718, LR: 0.0003 +[2026-03-03 22:13:07] (step=0049731) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.730189786734494, LR: 0.0003 +[2026-03-03 22:13:15] (step=0049732) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.730385443161808, LR: 0.0003 +[2026-03-03 22:13:23] (step=0049733) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.730581099589122, LR: 0.0003 +[2026-03-03 22:13:31] (step=0049734) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.730776756016436, LR: 0.0003 +[2026-03-03 22:13:39] (step=0049735) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 9.730972412443748, LR: 0.0003 +[2026-03-03 22:13:46] (step=0049736) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.731168068871062, LR: 0.0003 +[2026-03-03 22:13:54] (step=0049737) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.731363725298376, LR: 0.0003 +[2026-03-03 22:14:02] (step=0049738) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.73155938172569, LR: 0.0003 +[2026-03-03 22:14:10] (step=0049739) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 9.731755038153004, LR: 0.0003 +[2026-03-03 22:14:18] (step=0049740) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.731950694580316, LR: 0.0003 +[2026-03-03 22:14:26] (step=0049741) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.73214635100763, LR: 0.0003 +[2026-03-03 22:14:34] (step=0049742) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.732342007434944, LR: 0.0003 +[2026-03-03 22:14:41] (step=0049743) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.732537663862258, LR: 0.0003 +[2026-03-03 22:14:49] (step=0049744) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.732733320289572, LR: 0.0003 +[2026-03-03 22:14:57] (step=0049745) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 9.732928976716885, LR: 0.0003 +[2026-03-03 22:15:05] (step=0049746) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 9.733124633144199, LR: 0.0003 +[2026-03-03 22:15:13] (step=0049747) Train Loss: 0.4516, Train Steps/Sec: 0.12, Epoch: 9.733320289571513, LR: 0.0003 +[2026-03-03 22:15:21] (step=0049748) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 9.733515945998827, LR: 0.0003 +[2026-03-03 22:15:29] (step=0049749) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 9.73371160242614, LR: 0.0003 +[2026-03-03 22:15:37] (step=0049750) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.733907258853453, LR: 0.0003 +[2026-03-03 22:15:44] (step=0049751) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.734102915280767, LR: 0.0003 +[2026-03-03 22:15:52] (step=0049752) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.73429857170808, LR: 0.0003 +[2026-03-03 22:16:00] (step=0049753) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.734494228135395, LR: 0.0003 +[2026-03-03 22:16:08] (step=0049754) Train Loss: 0.4624, Train Steps/Sec: 0.13, Epoch: 9.734689884562707, LR: 0.0003 +[2026-03-03 22:16:16] (step=0049755) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.734885540990021, LR: 0.0003 +[2026-03-03 22:16:24] (step=0049756) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 9.735081197417335, LR: 0.0003 +[2026-03-03 22:16:32] (step=0049757) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.735276853844649, LR: 0.0003 +[2026-03-03 22:16:39] (step=0049758) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.735472510271963, LR: 0.0003 +[2026-03-03 22:16:47] (step=0049759) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 9.735668166699275, LR: 0.0003 +[2026-03-03 22:16:55] (step=0049760) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.73586382312659, LR: 0.0003 +[2026-03-03 22:17:03] (step=0049761) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 9.736059479553903, LR: 0.0003 +[2026-03-03 22:17:11] (step=0049762) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 9.736255135981217, LR: 0.0003 +[2026-03-03 22:17:19] (step=0049763) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.736450792408531, LR: 0.0003 +[2026-03-03 22:17:27] (step=0049764) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.736646448835844, LR: 0.0003 +[2026-03-03 22:17:35] (step=0049765) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.736842105263158, LR: 0.0003 +[2026-03-03 22:17:42] (step=0049766) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 9.737037761690472, LR: 0.0003 +[2026-03-03 22:17:50] (step=0049767) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.737233418117786, LR: 0.0003 +[2026-03-03 22:17:58] (step=0049768) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 9.7374290745451, LR: 0.0003 +[2026-03-03 22:18:06] (step=0049769) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.737624730972412, LR: 0.0003 +[2026-03-03 22:18:14] (step=0049770) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.737820387399726, LR: 0.0003 +[2026-03-03 22:18:22] (step=0049771) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.73801604382704, LR: 0.0003 +[2026-03-03 22:18:30] (step=0049772) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.738211700254354, LR: 0.0003 +[2026-03-03 22:18:38] (step=0049773) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.738407356681668, LR: 0.0003 +[2026-03-03 22:18:45] (step=0049774) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.73860301310898, LR: 0.0003 +[2026-03-03 22:18:53] (step=0049775) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 9.738798669536294, LR: 0.0003 +[2026-03-03 22:19:01] (step=0049776) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.738994325963608, LR: 0.0003 +[2026-03-03 22:19:09] (step=0049777) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.739189982390922, LR: 0.0003 +[2026-03-03 22:19:17] (step=0049778) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.739385638818236, LR: 0.0003 +[2026-03-03 22:19:25] (step=0049779) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.739581295245548, LR: 0.0003 +[2026-03-03 22:19:33] (step=0049780) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 9.739776951672862, LR: 0.0003 +[2026-03-03 22:19:40] (step=0049781) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.739972608100176, LR: 0.0003 +[2026-03-03 22:19:48] (step=0049782) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.74016826452749, LR: 0.0003 +[2026-03-03 22:19:56] (step=0049783) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.740363920954803, LR: 0.0003 +[2026-03-03 22:20:04] (step=0049784) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 9.740559577382117, LR: 0.0003 +[2026-03-03 22:20:12] (step=0049785) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.74075523380943, LR: 0.0003 +[2026-03-03 22:20:20] (step=0049786) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.740950890236745, LR: 0.0003 +[2026-03-03 22:20:28] (step=0049787) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.741146546664059, LR: 0.0003 +[2026-03-03 22:20:35] (step=0049788) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.74134220309137, LR: 0.0003 +[2026-03-03 22:20:43] (step=0049789) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 9.741537859518685, LR: 0.0003 +[2026-03-03 22:20:51] (step=0049790) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.741733515945999, LR: 0.0003 +[2026-03-03 22:20:59] (step=0049791) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.741929172373313, LR: 0.0003 +[2026-03-03 22:21:07] (step=0049792) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.742124828800627, LR: 0.0003 +[2026-03-03 22:21:15] (step=0049793) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 9.742320485227939, LR: 0.0003 +[2026-03-03 22:21:23] (step=0049794) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 9.742516141655253, LR: 0.0003 +[2026-03-03 22:21:30] (step=0049795) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.742711798082567, LR: 0.0003 +[2026-03-03 22:21:38] (step=0049796) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 9.742907454509881, LR: 0.0003 +[2026-03-03 22:21:46] (step=0049797) Train Loss: 0.4375, Train Steps/Sec: 0.12, Epoch: 9.743103110937195, LR: 0.0003 +[2026-03-03 22:21:54] (step=0049798) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.743298767364507, LR: 0.0003 +[2026-03-03 22:22:02] (step=0049799) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.743494423791821, LR: 0.0003 +[2026-03-03 22:22:10] (step=0049800) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.743690080219135, LR: 0.0003 +[2026-03-03 22:22:18] (step=0049801) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.74388573664645, LR: 0.0003 +[2026-03-03 22:22:26] (step=0049802) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.744081393073763, LR: 0.0003 +[2026-03-03 22:22:33] (step=0049803) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.744277049501076, LR: 0.0003 +[2026-03-03 22:22:41] (step=0049804) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.74447270592839, LR: 0.0003 +[2026-03-03 22:22:49] (step=0049805) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.744668362355704, LR: 0.0003 +[2026-03-03 22:22:57] (step=0049806) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.744864018783018, LR: 0.0003 +[2026-03-03 22:23:05] (step=0049807) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.74505967521033, LR: 0.0003 +[2026-03-03 22:23:13] (step=0049808) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 9.745255331637644, LR: 0.0003 +[2026-03-03 22:23:21] (step=0049809) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.745450988064958, LR: 0.0003 +[2026-03-03 22:23:28] (step=0049810) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 9.745646644492272, LR: 0.0003 +[2026-03-03 22:23:36] (step=0049811) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.745842300919586, LR: 0.0003 +[2026-03-03 22:23:44] (step=0049812) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.746037957346898, LR: 0.0003 +[2026-03-03 22:23:52] (step=0049813) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.746233613774212, LR: 0.0003 +[2026-03-03 22:24:00] (step=0049814) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.746429270201526, LR: 0.0003 +[2026-03-03 22:24:08] (step=0049815) Train Loss: 0.4592, Train Steps/Sec: 0.12, Epoch: 9.74662492662884, LR: 0.0003 +[2026-03-03 22:24:16] (step=0049816) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.746820583056154, LR: 0.0003 +[2026-03-03 22:24:24] (step=0049817) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.747016239483466, LR: 0.0003 +[2026-03-03 22:24:31] (step=0049818) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.74721189591078, LR: 0.0003 +[2026-03-03 22:24:39] (step=0049819) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.747407552338094, LR: 0.0003 +[2026-03-03 22:24:47] (step=0049820) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.747603208765408, LR: 0.0003 +[2026-03-03 22:24:55] (step=0049821) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.747798865192722, LR: 0.0003 +[2026-03-03 22:25:03] (step=0049822) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.747994521620035, LR: 0.0003 +[2026-03-03 22:25:11] (step=0049823) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.748190178047349, LR: 0.0003 +[2026-03-03 22:25:19] (step=0049824) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.748385834474663, LR: 0.0003 +[2026-03-03 22:25:26] (step=0049825) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.748581490901977, LR: 0.0003 +[2026-03-03 22:25:34] (step=0049826) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 9.74877714732929, LR: 0.0003 +[2026-03-03 22:25:42] (step=0049827) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.748972803756603, LR: 0.0003 +[2026-03-03 22:25:50] (step=0049828) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.749168460183917, LR: 0.0003 +[2026-03-03 22:25:58] (step=0049829) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.74936411661123, LR: 0.0003 +[2026-03-03 22:26:06] (step=0049830) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.749559773038545, LR: 0.0003 +[2026-03-03 22:26:14] (step=0049831) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 9.749755429465859, LR: 0.0003 +[2026-03-03 22:26:21] (step=0049832) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.749951085893171, LR: 0.0003 +[2026-03-03 22:26:29] (step=0049833) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.750146742320485, LR: 0.0003 +[2026-03-03 22:26:37] (step=0049834) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.750342398747799, LR: 0.0003 +[2026-03-03 22:26:45] (step=0049835) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.750538055175113, LR: 0.0003 +[2026-03-03 22:26:53] (step=0049836) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.750733711602425, LR: 0.0003 +[2026-03-03 22:27:01] (step=0049837) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.75092936802974, LR: 0.0003 +[2026-03-03 22:27:09] (step=0049838) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.751125024457053, LR: 0.0003 +[2026-03-03 22:27:17] (step=0049839) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.751320680884367, LR: 0.0003 +[2026-03-03 22:27:24] (step=0049840) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.751516337311681, LR: 0.0003 +[2026-03-03 22:27:32] (step=0049841) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.751711993738994, LR: 0.0003 +[2026-03-03 22:27:40] (step=0049842) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.751907650166308, LR: 0.0003 +[2026-03-03 22:27:48] (step=0049843) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.752103306593622, LR: 0.0003 +[2026-03-03 22:27:56] (step=0049844) Train Loss: 0.4272, Train Steps/Sec: 0.13, Epoch: 9.752298963020936, LR: 0.0003 +[2026-03-03 22:28:04] (step=0049845) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 9.75249461944825, LR: 0.0003 +[2026-03-03 22:28:12] (step=0049846) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.752690275875562, LR: 0.0003 +[2026-03-03 22:28:20] (step=0049847) Train Loss: 0.4442, Train Steps/Sec: 0.12, Epoch: 9.752885932302876, LR: 0.0003 +[2026-03-03 22:28:27] (step=0049848) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.75308158873019, LR: 0.0003 +[2026-03-03 22:28:35] (step=0049849) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.753277245157504, LR: 0.0003 +[2026-03-03 22:28:43] (step=0049850) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.753472901584818, LR: 0.0003 +[2026-03-03 22:28:51] (step=0049851) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.75366855801213, LR: 0.0003 +[2026-03-03 22:28:59] (step=0049852) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.753864214439444, LR: 0.0003 +[2026-03-03 22:29:07] (step=0049853) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.754059870866758, LR: 0.0003 +[2026-03-03 22:29:15] (step=0049854) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.754255527294072, LR: 0.0003 +[2026-03-03 22:29:22] (step=0049855) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.754451183721386, LR: 0.0003 +[2026-03-03 22:29:30] (step=0049856) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.754646840148698, LR: 0.0003 +[2026-03-03 22:29:38] (step=0049857) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.754842496576012, LR: 0.0003 +[2026-03-03 22:29:46] (step=0049858) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 9.755038153003326, LR: 0.0003 +[2026-03-03 22:29:54] (step=0049859) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.75523380943064, LR: 0.0003 +[2026-03-03 22:30:02] (step=0049860) Train Loss: 0.4362, Train Steps/Sec: 0.12, Epoch: 9.755429465857953, LR: 0.0003 +[2026-03-03 22:30:10] (step=0049861) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.755625122285267, LR: 0.0003 +[2026-03-03 22:30:18] (step=0049862) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.75582077871258, LR: 0.0003 +[2026-03-03 22:30:26] (step=0049863) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.756016435139895, LR: 0.0003 +[2026-03-03 22:30:33] (step=0049864) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.756212091567209, LR: 0.0003 +[2026-03-03 22:30:41] (step=0049865) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.75640774799452, LR: 0.0003 +[2026-03-03 22:30:49] (step=0049866) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.756603404421835, LR: 0.0003 +[2026-03-03 22:30:57] (step=0049867) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.756799060849149, LR: 0.0003 +[2026-03-03 22:31:05] (step=0049868) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.756994717276463, LR: 0.0003 +[2026-03-03 22:31:13] (step=0049869) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.757190373703777, LR: 0.0003 +[2026-03-03 22:31:21] (step=0049870) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.757386030131089, LR: 0.0003 +[2026-03-03 22:31:28] (step=0049871) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.757581686558403, LR: 0.0003 +[2026-03-03 22:31:36] (step=0049872) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 9.757777342985717, LR: 0.0003 +[2026-03-03 22:31:44] (step=0049873) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 9.757972999413031, LR: 0.0003 +[2026-03-03 22:31:52] (step=0049874) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.758168655840345, LR: 0.0003 +[2026-03-03 22:32:00] (step=0049875) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 9.758364312267657, LR: 0.0003 +[2026-03-03 22:32:08] (step=0049876) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.758559968694971, LR: 0.0003 +[2026-03-03 22:32:16] (step=0049877) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.758755625122285, LR: 0.0003 +[2026-03-03 22:32:23] (step=0049878) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.7589512815496, LR: 0.0003 +[2026-03-03 22:32:31] (step=0049879) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 9.759146937976913, LR: 0.0003 +[2026-03-03 22:32:39] (step=0049880) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.759342594404226, LR: 0.0003 +[2026-03-03 22:32:47] (step=0049881) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 9.75953825083154, LR: 0.0003 +[2026-03-03 22:32:55] (step=0049882) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.759733907258854, LR: 0.0003 +[2026-03-03 22:33:03] (step=0049883) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.759929563686168, LR: 0.0003 +[2026-03-03 22:33:11] (step=0049884) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.760125220113482, LR: 0.0003 +[2026-03-03 22:33:18] (step=0049885) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 9.760320876540794, LR: 0.0003 +[2026-03-03 22:33:26] (step=0049886) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 9.760516532968108, LR: 0.0003 +[2026-03-03 22:33:34] (step=0049887) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.760712189395422, LR: 0.0003 +[2026-03-03 22:33:42] (step=0049888) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.760907845822736, LR: 0.0003 +[2026-03-03 22:33:50] (step=0049889) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.761103502250048, LR: 0.0003 +[2026-03-03 22:33:58] (step=0049890) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 9.761299158677362, LR: 0.0003 +[2026-03-03 22:34:06] (step=0049891) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.761494815104676, LR: 0.0003 +[2026-03-03 22:34:13] (step=0049892) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.76169047153199, LR: 0.0003 +[2026-03-03 22:34:21] (step=0049893) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.761886127959304, LR: 0.0003 +[2026-03-03 22:34:29] (step=0049894) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.762081784386616, LR: 0.0003 +[2026-03-03 22:34:37] (step=0049895) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 9.76227744081393, LR: 0.0003 +[2026-03-03 22:34:45] (step=0049896) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.762473097241244, LR: 0.0003 +[2026-03-03 22:34:53] (step=0049897) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 9.762668753668558, LR: 0.0003 +[2026-03-03 22:35:01] (step=0049898) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.762864410095872, LR: 0.0003 +[2026-03-03 22:35:08] (step=0049899) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 9.763060066523185, LR: 0.0003 +[2026-03-03 22:35:16] (step=0049900) Train Loss: 0.4474, Train Steps/Sec: 0.12, Epoch: 9.763255722950499, LR: 0.0003 +[2026-03-03 22:35:24] (step=0049901) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.763451379377813, LR: 0.0003 +[2026-03-03 22:35:32] (step=0049902) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.763647035805127, LR: 0.0003 +[2026-03-03 22:35:40] (step=0049903) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.76384269223244, LR: 0.0003 +[2026-03-03 22:35:48] (step=0049904) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 9.764038348659753, LR: 0.0003 +[2026-03-03 22:35:56] (step=0049905) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.764234005087067, LR: 0.0003 +[2026-03-03 22:36:04] (step=0049906) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 9.76442966151438, LR: 0.0003 +[2026-03-03 22:36:12] (step=0049907) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.764625317941695, LR: 0.0003 +[2026-03-03 22:36:19] (step=0049908) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.764820974369009, LR: 0.0003 +[2026-03-03 22:36:27] (step=0049909) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 9.765016630796321, LR: 0.0003 +[2026-03-03 22:36:35] (step=0049910) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.765212287223635, LR: 0.0003 +[2026-03-03 22:36:43] (step=0049911) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 9.765407943650949, LR: 0.0003 +[2026-03-03 22:36:51] (step=0049912) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 9.765603600078263, LR: 0.0003 +[2026-03-03 22:36:59] (step=0049913) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 9.765799256505575, LR: 0.0003 +[2026-03-03 22:37:07] (step=0049914) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.76599491293289, LR: 0.0003 +[2026-03-03 22:37:15] (step=0049915) Train Loss: 0.4452, Train Steps/Sec: 0.12, Epoch: 9.766190569360203, LR: 0.0003 +[2026-03-03 22:37:22] (step=0049916) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 9.766386225787517, LR: 0.0003 +[2026-03-03 22:37:30] (step=0049917) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.766581882214831, LR: 0.0003 +[2026-03-03 22:37:38] (step=0049918) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.766777538642144, LR: 0.0003 +[2026-03-03 22:37:46] (step=0049919) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.766973195069458, LR: 0.0003 +[2026-03-03 22:37:54] (step=0049920) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.767168851496772, LR: 0.0003 +[2026-03-03 22:38:02] (step=0049921) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.767364507924086, LR: 0.0003 +[2026-03-03 22:38:10] (step=0049922) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.7675601643514, LR: 0.0003 +[2026-03-03 22:38:17] (step=0049923) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.767755820778712, LR: 0.0003 +[2026-03-03 22:38:25] (step=0049924) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.767951477206026, LR: 0.0003 +[2026-03-03 22:38:33] (step=0049925) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.76814713363334, LR: 0.0003 +[2026-03-03 22:38:41] (step=0049926) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.768342790060654, LR: 0.0003 +[2026-03-03 22:38:49] (step=0049927) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.768538446487968, LR: 0.0003 +[2026-03-03 22:38:57] (step=0049928) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.76873410291528, LR: 0.0003 +[2026-03-03 22:39:05] (step=0049929) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.768929759342594, LR: 0.0003 +[2026-03-03 22:39:12] (step=0049930) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.769125415769908, LR: 0.0003 +[2026-03-03 22:39:20] (step=0049931) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 9.769321072197222, LR: 0.0003 +[2026-03-03 22:39:28] (step=0049932) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.769516728624536, LR: 0.0003 +[2026-03-03 22:39:36] (step=0049933) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.769712385051848, LR: 0.0003 +[2026-03-03 22:39:44] (step=0049934) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 9.769908041479162, LR: 0.0003 +[2026-03-03 22:39:52] (step=0049935) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.770103697906476, LR: 0.0003 +[2026-03-03 22:40:00] (step=0049936) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.77029935433379, LR: 0.0003 +[2026-03-03 22:40:07] (step=0049937) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.770495010761104, LR: 0.0003 +[2026-03-03 22:40:15] (step=0049938) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.770690667188417, LR: 0.0003 +[2026-03-03 22:40:23] (step=0049939) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.77088632361573, LR: 0.0003 +[2026-03-03 22:40:31] (step=0049940) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.771081980043045, LR: 0.0003 +[2026-03-03 22:40:39] (step=0049941) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.771277636470359, LR: 0.0003 +[2026-03-03 22:40:47] (step=0049942) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.77147329289767, LR: 0.0003 +[2026-03-03 22:40:55] (step=0049943) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 9.771668949324985, LR: 0.0003 +[2026-03-03 22:41:02] (step=0049944) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.771864605752299, LR: 0.0003 +[2026-03-03 22:41:10] (step=0049945) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.772060262179613, LR: 0.0003 +[2026-03-03 22:41:18] (step=0049946) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.772255918606927, LR: 0.0003 +[2026-03-03 22:41:26] (step=0049947) Train Loss: 0.4446, Train Steps/Sec: 0.12, Epoch: 9.77245157503424, LR: 0.0003 +[2026-03-03 22:41:34] (step=0049948) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.772647231461553, LR: 0.0003 +[2026-03-03 22:41:42] (step=0049949) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.772842887888867, LR: 0.0003 +[2026-03-03 22:41:50] (step=0049950) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.773038544316181, LR: 0.0003 +[2026-03-03 22:41:58] (step=0049951) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 9.773234200743495, LR: 0.0003 +[2026-03-03 22:42:05] (step=0049952) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 9.773429857170807, LR: 0.0003 +[2026-03-03 22:42:13] (step=0049953) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.773625513598121, LR: 0.0003 +[2026-03-03 22:42:21] (step=0049954) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 9.773821170025435, LR: 0.0003 +[2026-03-03 22:42:29] (step=0049955) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.77401682645275, LR: 0.0003 +[2026-03-03 22:42:37] (step=0049956) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.774212482880063, LR: 0.0003 +[2026-03-03 22:42:45] (step=0049957) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.774408139307376, LR: 0.0003 +[2026-03-03 22:42:53] (step=0049958) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.77460379573469, LR: 0.0003 +[2026-03-03 22:43:01] (step=0049959) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.774799452162004, LR: 0.0003 +[2026-03-03 22:43:08] (step=0049960) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 9.774995108589318, LR: 0.0003 +[2026-03-03 22:43:16] (step=0049961) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.775190765016632, LR: 0.0003 +[2026-03-03 22:43:24] (step=0049962) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.775386421443944, LR: 0.0003 +[2026-03-03 22:43:32] (step=0049963) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 9.775582077871258, LR: 0.0003 +[2026-03-03 22:43:40] (step=0049964) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.775777734298572, LR: 0.0003 +[2026-03-03 22:43:48] (step=0049965) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.775973390725886, LR: 0.0003 +[2026-03-03 22:43:56] (step=0049966) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.776169047153198, LR: 0.0003 +[2026-03-03 22:44:03] (step=0049967) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.776364703580512, LR: 0.0003 +[2026-03-03 22:44:11] (step=0049968) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.776560360007826, LR: 0.0003 +[2026-03-03 22:44:19] (step=0049969) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.77675601643514, LR: 0.0003 +[2026-03-03 22:44:27] (step=0049970) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.776951672862454, LR: 0.0003 +[2026-03-03 22:44:35] (step=0049971) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.777147329289766, LR: 0.0003 +[2026-03-03 22:44:43] (step=0049972) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.77734298571708, LR: 0.0003 +[2026-03-03 22:44:51] (step=0049973) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.777538642144394, LR: 0.0003 +[2026-03-03 22:44:58] (step=0049974) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.777734298571708, LR: 0.0003 +[2026-03-03 22:45:06] (step=0049975) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.777929954999022, LR: 0.0003 +[2026-03-03 22:45:14] (step=0049976) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.778125611426335, LR: 0.0003 +[2026-03-03 22:45:22] (step=0049977) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.778321267853649, LR: 0.0003 +[2026-03-03 22:45:30] (step=0049978) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.778516924280963, LR: 0.0003 +[2026-03-03 22:45:38] (step=0049979) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.778712580708277, LR: 0.0003 +[2026-03-03 22:45:46] (step=0049980) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 9.77890823713559, LR: 0.0003 +[2026-03-03 22:45:54] (step=0049981) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.779103893562903, LR: 0.0003 +[2026-03-03 22:46:01] (step=0049982) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.779299549990217, LR: 0.0003 +[2026-03-03 22:46:09] (step=0049983) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.779495206417531, LR: 0.0003 +[2026-03-03 22:46:17] (step=0049984) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.779690862844845, LR: 0.0003 +[2026-03-03 22:46:25] (step=0049985) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.779886519272159, LR: 0.0003 +[2026-03-03 22:46:33] (step=0049986) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 9.780082175699471, LR: 0.0003 +[2026-03-03 22:46:41] (step=0049987) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.780277832126785, LR: 0.0003 +[2026-03-03 22:46:49] (step=0049988) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.7804734885541, LR: 0.0003 +[2026-03-03 22:46:56] (step=0049989) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.780669144981413, LR: 0.0003 +[2026-03-03 22:47:04] (step=0049990) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 9.780864801408727, LR: 0.0003 +[2026-03-03 22:47:12] (step=0049991) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 9.78106045783604, LR: 0.0003 +[2026-03-03 22:47:20] (step=0049992) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.781256114263353, LR: 0.0003 +[2026-03-03 22:47:28] (step=0049993) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.781451770690667, LR: 0.0003 +[2026-03-03 22:47:36] (step=0049994) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.781647427117981, LR: 0.0003 +[2026-03-03 22:47:44] (step=0049995) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.781843083545294, LR: 0.0003 +[2026-03-03 22:47:52] (step=0049996) Train Loss: 0.4495, Train Steps/Sec: 0.12, Epoch: 9.782038739972608, LR: 0.0003 +[2026-03-03 22:48:00] (step=0049997) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.782234396399922, LR: 0.0003 +[2026-03-03 22:48:07] (step=0049998) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.782430052827236, LR: 0.0003 +[2026-03-03 22:48:15] (step=0049999) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.78262570925455, LR: 0.0003 +[2026-03-03 22:48:23] (step=0050000) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 9.782821365681862, LR: 0.0003 +[2026-03-03 22:48:23] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0050000/ +[2026-03-03 22:48:31] (step=0050001) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.783017022109176, LR: 0.0003 +[2026-03-03 22:48:39] (step=0050002) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.78321267853649, LR: 0.0003 +[2026-03-03 22:48:47] (step=0050003) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.783408334963804, LR: 0.0003 +[2026-03-03 22:48:55] (step=0050004) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.783603991391118, LR: 0.0003 +[2026-03-03 22:49:02] (step=0050005) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.78379964781843, LR: 0.0003 +[2026-03-03 22:49:10] (step=0050006) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 9.783995304245744, LR: 0.0003 +[2026-03-03 22:49:18] (step=0050007) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.784190960673058, LR: 0.0003 +[2026-03-03 22:49:26] (step=0050008) Train Loss: 0.4262, Train Steps/Sec: 0.13, Epoch: 9.784386617100372, LR: 0.0003 +[2026-03-03 22:49:34] (step=0050009) Train Loss: 0.4605, Train Steps/Sec: 0.12, Epoch: 9.784582273527686, LR: 0.0003 +[2026-03-03 22:49:42] (step=0050010) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 9.784777929954998, LR: 0.0003 +[2026-03-03 22:49:50] (step=0050011) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 9.784973586382312, LR: 0.0003 +[2026-03-03 22:49:58] (step=0050012) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.785169242809626, LR: 0.0003 +[2026-03-03 22:50:05] (step=0050013) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.78536489923694, LR: 0.0003 +[2026-03-03 22:50:13] (step=0050014) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.785560555664254, LR: 0.0003 +[2026-03-03 22:50:21] (step=0050015) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 9.785756212091567, LR: 0.0003 +[2026-03-03 22:50:29] (step=0050016) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 9.78595186851888, LR: 0.0003 +[2026-03-03 22:50:37] (step=0050017) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.786147524946195, LR: 0.0003 +[2026-03-03 22:50:45] (step=0050018) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.786343181373509, LR: 0.0003 +[2026-03-03 22:50:53] (step=0050019) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.786538837800821, LR: 0.0003 +[2026-03-03 22:51:01] (step=0050020) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.786734494228135, LR: 0.0003 +[2026-03-03 22:51:08] (step=0050021) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 9.786930150655449, LR: 0.0003 +[2026-03-03 22:51:16] (step=0050022) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.787125807082763, LR: 0.0003 +[2026-03-03 22:51:24] (step=0050023) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 9.787321463510077, LR: 0.0003 +[2026-03-03 22:51:32] (step=0050024) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.78751711993739, LR: 0.0003 +[2026-03-03 22:51:40] (step=0050025) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 9.787712776364703, LR: 0.0003 +[2026-03-03 22:51:48] (step=0050026) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.787908432792017, LR: 0.0003 +[2026-03-03 22:51:56] (step=0050027) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.788104089219331, LR: 0.0003 +[2026-03-03 22:52:03] (step=0050028) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 9.788299745646645, LR: 0.0003 +[2026-03-03 22:52:11] (step=0050029) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 9.788495402073957, LR: 0.0003 +[2026-03-03 22:52:19] (step=0050030) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 9.788691058501271, LR: 0.0003 +[2026-03-03 22:52:27] (step=0050031) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.788886714928585, LR: 0.0003 +[2026-03-03 22:52:35] (step=0050032) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 9.7890823713559, LR: 0.0003 +[2026-03-03 22:52:43] (step=0050033) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.789278027783213, LR: 0.0003 +[2026-03-03 22:52:51] (step=0050034) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 9.789473684210526, LR: 0.0003 +[2026-03-03 22:52:58] (step=0050035) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.78966934063784, LR: 0.0003 +[2026-03-03 22:53:06] (step=0050036) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.789864997065154, LR: 0.0003 +[2026-03-03 22:53:14] (step=0050037) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.790060653492468, LR: 0.0003 +[2026-03-03 22:53:22] (step=0050038) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.790256309919782, LR: 0.0003 +[2026-03-03 22:53:30] (step=0050039) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.790451966347094, LR: 0.0003 +[2026-03-03 22:53:38] (step=0050040) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.790647622774408, LR: 0.0003 +[2026-03-03 22:53:46] (step=0050041) Train Loss: 0.4369, Train Steps/Sec: 0.12, Epoch: 9.790843279201722, LR: 0.0003 +[2026-03-03 22:53:54] (step=0050042) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.791038935629036, LR: 0.0003 +[2026-03-03 22:54:01] (step=0050043) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.79123459205635, LR: 0.0003 +[2026-03-03 22:54:09] (step=0050044) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.791430248483662, LR: 0.0003 +[2026-03-03 22:54:17] (step=0050045) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.791625904910976, LR: 0.0003 +[2026-03-03 22:54:25] (step=0050046) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 9.79182156133829, LR: 0.0003 +[2026-03-03 22:54:33] (step=0050047) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.792017217765604, LR: 0.0003 +[2026-03-03 22:54:41] (step=0050048) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.792212874192916, LR: 0.0003 +[2026-03-03 22:54:49] (step=0050049) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.79240853062023, LR: 0.0003 +[2026-03-03 22:54:57] (step=0050050) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 9.792604187047544, LR: 0.0003 +[2026-03-03 22:55:04] (step=0050051) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 9.792799843474858, LR: 0.0003 +[2026-03-03 22:55:12] (step=0050052) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.792995499902172, LR: 0.0003 +[2026-03-03 22:55:20] (step=0050053) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.793191156329485, LR: 0.0003 +[2026-03-03 22:55:28] (step=0050054) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.793386812756799, LR: 0.0003 +[2026-03-03 22:55:36] (step=0050055) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 9.793582469184113, LR: 0.0003 +[2026-03-03 22:55:44] (step=0050056) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.793778125611427, LR: 0.0003 +[2026-03-03 22:55:51] (step=0050057) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 9.79397378203874, LR: 0.0003 +[2026-03-03 22:55:59] (step=0050058) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 9.794169438466053, LR: 0.0003 +[2026-03-03 22:56:07] (step=0050059) Train Loss: 0.4415, Train Steps/Sec: 0.12, Epoch: 9.794365094893367, LR: 0.0003 +[2026-03-03 22:56:15] (step=0050060) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 9.794560751320681, LR: 0.0003 +[2026-03-03 22:56:23] (step=0050061) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.794756407747995, LR: 0.0003 +[2026-03-03 22:56:31] (step=0050062) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 9.794952064175309, LR: 0.0003 +[2026-03-03 22:56:39] (step=0050063) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.795147720602621, LR: 0.0003 +[2026-03-03 22:56:47] (step=0050064) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.795343377029935, LR: 0.0003 +[2026-03-03 22:56:54] (step=0050065) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 9.79553903345725, LR: 0.0003 +[2026-03-03 22:57:02] (step=0050066) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 9.795734689884563, LR: 0.0003 +[2026-03-03 22:57:10] (step=0050067) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.795930346311877, LR: 0.0003 +[2026-03-03 22:57:18] (step=0050068) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.79612600273919, LR: 0.0003 +[2026-03-03 22:57:26] (step=0050069) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.796321659166503, LR: 0.0003 +[2026-03-03 22:57:34] (step=0050070) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.796517315593817, LR: 0.0003 +[2026-03-03 22:57:41] (step=0050071) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.796712972021131, LR: 0.0003 +[2026-03-03 22:57:49] (step=0050072) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 9.796908628448444, LR: 0.0003 +[2026-03-03 22:57:57] (step=0050073) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.797104284875758, LR: 0.0003 +[2026-03-03 22:58:05] (step=0050074) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 9.797299941303072, LR: 0.0003 +[2026-03-03 22:58:13] (step=0050075) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.797495597730386, LR: 0.0003 +[2026-03-03 22:58:21] (step=0050076) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.7976912541577, LR: 0.0003 +[2026-03-03 22:58:29] (step=0050077) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.797886910585012, LR: 0.0003 +[2026-03-03 22:58:36] (step=0050078) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.798082567012326, LR: 0.0003 +[2026-03-03 22:58:44] (step=0050079) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 9.79827822343964, LR: 0.0003 +[2026-03-03 22:58:52] (step=0050080) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.798473879866954, LR: 0.0003 +[2026-03-03 22:59:00] (step=0050081) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.798669536294268, LR: 0.0003 +[2026-03-03 22:59:08] (step=0050082) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.79886519272158, LR: 0.0003 +[2026-03-03 22:59:16] (step=0050083) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 9.799060849148894, LR: 0.0003 +[2026-03-03 22:59:24] (step=0050084) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.799256505576208, LR: 0.0003 +[2026-03-03 22:59:31] (step=0050085) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.799452162003522, LR: 0.0003 +[2026-03-03 22:59:39] (step=0050086) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.799647818430836, LR: 0.0003 +[2026-03-03 22:59:47] (step=0050087) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 9.799843474858148, LR: 0.0003 +[2026-03-03 22:59:55] (step=0050088) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.800039131285462, LR: 0.0003 +[2026-03-03 23:00:03] (step=0050089) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.800234787712776, LR: 0.0003 +[2026-03-03 23:00:11] (step=0050090) Train Loss: 0.4335, Train Steps/Sec: 0.12, Epoch: 9.80043044414009, LR: 0.0003 +[2026-03-03 23:00:19] (step=0050091) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.800626100567404, LR: 0.0003 +[2026-03-03 23:00:27] (step=0050092) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.800821756994717, LR: 0.0003 +[2026-03-03 23:00:34] (step=0050093) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.80101741342203, LR: 0.0003 +[2026-03-03 23:00:42] (step=0050094) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.801213069849345, LR: 0.0003 +[2026-03-03 23:00:50] (step=0050095) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 9.801408726276659, LR: 0.0003 +[2026-03-03 23:00:58] (step=0050096) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.801604382703971, LR: 0.0003 +[2026-03-03 23:01:06] (step=0050097) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.801800039131285, LR: 0.0003 +[2026-03-03 23:01:14] (step=0050098) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 9.801995695558599, LR: 0.0003 +[2026-03-03 23:01:21] (step=0050099) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.802191351985913, LR: 0.0003 +[2026-03-03 23:01:29] (step=0050100) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.802387008413227, LR: 0.0003 +[2026-03-03 23:01:37] (step=0050101) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.80258266484054, LR: 0.0003 +[2026-03-03 23:01:45] (step=0050102) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.802778321267853, LR: 0.0003 +[2026-03-03 23:01:53] (step=0050103) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.802973977695167, LR: 0.0003 +[2026-03-03 23:02:01] (step=0050104) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.803169634122481, LR: 0.0003 +[2026-03-03 23:02:09] (step=0050105) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.803365290549795, LR: 0.0003 +[2026-03-03 23:02:16] (step=0050106) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.803560946977107, LR: 0.0003 +[2026-03-03 23:02:24] (step=0050107) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.803756603404421, LR: 0.0003 +[2026-03-03 23:02:32] (step=0050108) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.803952259831735, LR: 0.0003 +[2026-03-03 23:02:40] (step=0050109) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.80414791625905, LR: 0.0003 +[2026-03-03 23:02:48] (step=0050110) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.804343572686363, LR: 0.0003 +[2026-03-03 23:02:56] (step=0050111) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.804539229113676, LR: 0.0003 +[2026-03-03 23:03:04] (step=0050112) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.80473488554099, LR: 0.0003 +[2026-03-03 23:03:12] (step=0050113) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 9.804930541968304, LR: 0.0003 +[2026-03-03 23:03:20] (step=0050114) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.805126198395618, LR: 0.0003 +[2026-03-03 23:03:27] (step=0050115) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.805321854822932, LR: 0.0003 +[2026-03-03 23:03:35] (step=0050116) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 9.805517511250244, LR: 0.0003 +[2026-03-03 23:03:43] (step=0050117) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.805713167677558, LR: 0.0003 +[2026-03-03 23:03:51] (step=0050118) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.805908824104872, LR: 0.0003 +[2026-03-03 23:03:59] (step=0050119) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.806104480532186, LR: 0.0003 +[2026-03-03 23:04:07] (step=0050120) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.8063001369595, LR: 0.0003 +[2026-03-03 23:04:15] (step=0050121) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 9.806495793386812, LR: 0.0003 +[2026-03-03 23:04:22] (step=0050122) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.806691449814126, LR: 0.0003 +[2026-03-03 23:04:30] (step=0050123) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 9.80688710624144, LR: 0.0003 +[2026-03-03 23:04:38] (step=0050124) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.807082762668754, LR: 0.0003 +[2026-03-03 23:04:46] (step=0050125) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.807278419096066, LR: 0.0003 +[2026-03-03 23:04:54] (step=0050126) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.80747407552338, LR: 0.0003 +[2026-03-03 23:05:02] (step=0050127) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 9.807669731950694, LR: 0.0003 +[2026-03-03 23:05:10] (step=0050128) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 9.807865388378008, LR: 0.0003 +[2026-03-03 23:05:17] (step=0050129) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.808061044805322, LR: 0.0003 +[2026-03-03 23:05:25] (step=0050130) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.808256701232635, LR: 0.0003 +[2026-03-03 23:05:33] (step=0050131) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.808452357659949, LR: 0.0003 +[2026-03-03 23:05:41] (step=0050132) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 9.808648014087263, LR: 0.0003 +[2026-03-03 23:05:49] (step=0050133) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 9.808843670514577, LR: 0.0003 +[2026-03-03 23:05:57] (step=0050134) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 9.80903932694189, LR: 0.0003 +[2026-03-03 23:06:05] (step=0050135) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.809234983369203, LR: 0.0003 +[2026-03-03 23:06:12] (step=0050136) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.809430639796517, LR: 0.0003 +[2026-03-03 23:06:20] (step=0050137) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 9.809626296223831, LR: 0.0003 +[2026-03-03 23:06:28] (step=0050138) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.809821952651145, LR: 0.0003 +[2026-03-03 23:06:36] (step=0050139) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.810017609078459, LR: 0.0003 +[2026-03-03 23:06:44] (step=0050140) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.810213265505771, LR: 0.0003 +[2026-03-03 23:06:52] (step=0050141) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 9.810408921933085, LR: 0.0003 +[2026-03-03 23:07:00] (step=0050142) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.8106045783604, LR: 0.0003 +[2026-03-03 23:07:08] (step=0050143) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.810800234787713, LR: 0.0003 +[2026-03-03 23:07:15] (step=0050144) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.810995891215027, LR: 0.0003 +[2026-03-03 23:07:23] (step=0050145) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.81119154764234, LR: 0.0003 +[2026-03-03 23:07:31] (step=0050146) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.811387204069653, LR: 0.0003 +[2026-03-03 23:07:39] (step=0050147) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.811582860496967, LR: 0.0003 +[2026-03-03 23:07:47] (step=0050148) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.811778516924281, LR: 0.0003 +[2026-03-03 23:07:55] (step=0050149) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.811974173351594, LR: 0.0003 +[2026-03-03 23:08:03] (step=0050150) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.812169829778908, LR: 0.0003 +[2026-03-03 23:08:10] (step=0050151) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.812365486206222, LR: 0.0003 +[2026-03-03 23:08:18] (step=0050152) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.812561142633536, LR: 0.0003 +[2026-03-03 23:08:26] (step=0050153) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.81275679906085, LR: 0.0003 +[2026-03-03 23:08:34] (step=0050154) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.812952455488162, LR: 0.0003 +[2026-03-03 23:08:42] (step=0050155) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.813148111915476, LR: 0.0003 +[2026-03-03 23:08:50] (step=0050156) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.81334376834279, LR: 0.0003 +[2026-03-03 23:08:58] (step=0050157) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.813539424770104, LR: 0.0003 +[2026-03-03 23:09:06] (step=0050158) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.813735081197418, LR: 0.0003 +[2026-03-03 23:09:13] (step=0050159) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.81393073762473, LR: 0.0003 +[2026-03-03 23:09:21] (step=0050160) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.814126394052044, LR: 0.0003 +[2026-03-03 23:09:29] (step=0050161) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.814322050479358, LR: 0.0003 +[2026-03-03 23:09:37] (step=0050162) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.814517706906672, LR: 0.0003 +[2026-03-03 23:09:45] (step=0050163) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.814713363333986, LR: 0.0003 +[2026-03-03 23:09:53] (step=0050164) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.814909019761298, LR: 0.0003 +[2026-03-03 23:10:01] (step=0050165) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.815104676188612, LR: 0.0003 +[2026-03-03 23:10:09] (step=0050166) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.815300332615926, LR: 0.0003 +[2026-03-03 23:10:16] (step=0050167) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.81549598904324, LR: 0.0003 +[2026-03-03 23:10:24] (step=0050168) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 9.815691645470555, LR: 0.0003 +[2026-03-03 23:10:32] (step=0050169) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 9.815887301897867, LR: 0.0003 +[2026-03-03 23:10:40] (step=0050170) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 9.81608295832518, LR: 0.0003 +[2026-03-03 23:10:48] (step=0050171) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.816278614752495, LR: 0.0003 +[2026-03-03 23:10:56] (step=0050172) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.816474271179809, LR: 0.0003 +[2026-03-03 23:11:04] (step=0050173) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.816669927607123, LR: 0.0003 +[2026-03-03 23:11:11] (step=0050174) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.816865584034435, LR: 0.0003 +[2026-03-03 23:11:19] (step=0050175) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.817061240461749, LR: 0.0003 +[2026-03-03 23:11:27] (step=0050176) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 9.817256896889063, LR: 0.0003 +[2026-03-03 23:11:35] (step=0050177) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 9.817452553316377, LR: 0.0003 +[2026-03-03 23:11:43] (step=0050178) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.81764820974369, LR: 0.0003 +[2026-03-03 23:11:51] (step=0050179) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.817843866171003, LR: 0.0003 +[2026-03-03 23:11:59] (step=0050180) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 9.818039522598317, LR: 0.0003 +[2026-03-03 23:12:07] (step=0050181) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.818235179025631, LR: 0.0003 +[2026-03-03 23:12:14] (step=0050182) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 9.818430835452945, LR: 0.0003 +[2026-03-03 23:12:22] (step=0050183) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.818626491880257, LR: 0.0003 +[2026-03-03 23:12:30] (step=0050184) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.818822148307571, LR: 0.0003 +[2026-03-03 23:12:38] (step=0050185) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.819017804734886, LR: 0.0003 +[2026-03-03 23:12:46] (step=0050186) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.8192134611622, LR: 0.0003 +[2026-03-03 23:12:54] (step=0050187) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 9.819409117589514, LR: 0.0003 +[2026-03-03 23:13:02] (step=0050188) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.819604774016826, LR: 0.0003 +[2026-03-03 23:13:09] (step=0050189) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.81980043044414, LR: 0.0003 +[2026-03-03 23:13:17] (step=0050190) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.819996086871454, LR: 0.0003 +[2026-03-03 23:13:25] (step=0050191) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 9.820191743298768, LR: 0.0003 +[2026-03-03 23:13:33] (step=0050192) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 9.820387399726082, LR: 0.0003 +[2026-03-03 23:13:41] (step=0050193) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.820583056153394, LR: 0.0003 +[2026-03-03 23:13:49] (step=0050194) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.820778712580708, LR: 0.0003 +[2026-03-03 23:13:57] (step=0050195) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.820974369008022, LR: 0.0003 +[2026-03-03 23:14:04] (step=0050196) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.821170025435336, LR: 0.0003 +[2026-03-03 23:14:12] (step=0050197) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.82136568186265, LR: 0.0003 +[2026-03-03 23:14:20] (step=0050198) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.821561338289962, LR: 0.0003 +[2026-03-03 23:14:28] (step=0050199) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.821756994717276, LR: 0.0003 +[2026-03-03 23:14:36] (step=0050200) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.82195265114459, LR: 0.0003 +[2026-03-03 23:14:44] (step=0050201) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 9.822148307571904, LR: 0.0003 +[2026-03-03 23:14:52] (step=0050202) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 9.822343963999216, LR: 0.0003 +[2026-03-03 23:15:00] (step=0050203) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.82253962042653, LR: 0.0003 +[2026-03-03 23:15:07] (step=0050204) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.822735276853845, LR: 0.0003 +[2026-03-03 23:15:15] (step=0050205) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.822930933281159, LR: 0.0003 +[2026-03-03 23:15:23] (step=0050206) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.823126589708473, LR: 0.0003 +[2026-03-03 23:15:31] (step=0050207) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.823322246135785, LR: 0.0003 +[2026-03-03 23:15:39] (step=0050208) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.823517902563099, LR: 0.0003 +[2026-03-03 23:15:47] (step=0050209) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.823713558990413, LR: 0.0003 +[2026-03-03 23:15:55] (step=0050210) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.823909215417727, LR: 0.0003 +[2026-03-03 23:16:03] (step=0050211) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.82410487184504, LR: 0.0003 +[2026-03-03 23:16:10] (step=0050212) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 9.824300528272353, LR: 0.0003 +[2026-03-03 23:16:18] (step=0050213) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.824496184699667, LR: 0.0003 +[2026-03-03 23:16:26] (step=0050214) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.824691841126981, LR: 0.0003 +[2026-03-03 23:16:34] (step=0050215) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 9.824887497554295, LR: 0.0003 +[2026-03-03 23:16:42] (step=0050216) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.825083153981609, LR: 0.0003 +[2026-03-03 23:16:50] (step=0050217) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 9.825278810408921, LR: 0.0003 +[2026-03-03 23:16:57] (step=0050218) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.825474466836235, LR: 0.0003 +[2026-03-03 23:17:05] (step=0050219) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.82567012326355, LR: 0.0003 +[2026-03-03 23:17:13] (step=0050220) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.825865779690863, LR: 0.0003 +[2026-03-03 23:17:21] (step=0050221) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.826061436118177, LR: 0.0003 +[2026-03-03 23:17:29] (step=0050222) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.82625709254549, LR: 0.0003 +[2026-03-03 23:17:37] (step=0050223) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.826452748972804, LR: 0.0003 +[2026-03-03 23:17:45] (step=0050224) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.826648405400118, LR: 0.0003 +[2026-03-03 23:17:52] (step=0050225) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.826844061827432, LR: 0.0003 +[2026-03-03 23:18:00] (step=0050226) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.827039718254746, LR: 0.0003 +[2026-03-03 23:18:08] (step=0050227) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.827235374682058, LR: 0.0003 +[2026-03-03 23:18:16] (step=0050228) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.827431031109372, LR: 0.0003 +[2026-03-03 23:18:24] (step=0050229) Train Loss: 0.4467, Train Steps/Sec: 0.12, Epoch: 9.827626687536686, LR: 0.0003 +[2026-03-03 23:18:32] (step=0050230) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.827822343964, LR: 0.0003 +[2026-03-03 23:18:40] (step=0050231) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.828018000391312, LR: 0.0003 +[2026-03-03 23:18:48] (step=0050232) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.828213656818626, LR: 0.0003 +[2026-03-03 23:18:56] (step=0050233) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.82840931324594, LR: 0.0003 +[2026-03-03 23:19:03] (step=0050234) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 9.828604969673254, LR: 0.0003 +[2026-03-03 23:19:11] (step=0050235) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 9.828800626100568, LR: 0.0003 +[2026-03-03 23:19:19] (step=0050236) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.82899628252788, LR: 0.0003 +[2026-03-03 23:19:27] (step=0050237) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.829191938955194, LR: 0.0003 +[2026-03-03 23:19:35] (step=0050238) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.829387595382508, LR: 0.0003 +[2026-03-03 23:19:43] (step=0050239) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.829583251809822, LR: 0.0003 +[2026-03-03 23:19:51] (step=0050240) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.829778908237136, LR: 0.0003 +[2026-03-03 23:19:58] (step=0050241) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.829974564664449, LR: 0.0003 +[2026-03-03 23:20:06] (step=0050242) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.830170221091763, LR: 0.0003 +[2026-03-03 23:20:14] (step=0050243) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 9.830365877519077, LR: 0.0003 +[2026-03-03 23:20:22] (step=0050244) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.83056153394639, LR: 0.0003 +[2026-03-03 23:20:30] (step=0050245) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 9.830757190373705, LR: 0.0003 +[2026-03-03 23:20:38] (step=0050246) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.830952846801017, LR: 0.0003 +[2026-03-03 23:20:46] (step=0050247) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 9.83114850322833, LR: 0.0003 +[2026-03-03 23:20:54] (step=0050248) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 9.831344159655645, LR: 0.0003 +[2026-03-03 23:21:01] (step=0050249) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.831539816082959, LR: 0.0003 +[2026-03-03 23:21:09] (step=0050250) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.831735472510273, LR: 0.0003 +[2026-03-03 23:21:17] (step=0050251) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 9.831931128937585, LR: 0.0003 +[2026-03-03 23:21:25] (step=0050252) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 9.832126785364899, LR: 0.0003 +[2026-03-03 23:21:33] (step=0050253) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 9.832322441792213, LR: 0.0003 +[2026-03-03 23:21:41] (step=0050254) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.832518098219527, LR: 0.0003 +[2026-03-03 23:21:49] (step=0050255) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.83271375464684, LR: 0.0003 +[2026-03-03 23:21:56] (step=0050256) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 9.832909411074153, LR: 0.0003 +[2026-03-03 23:22:04] (step=0050257) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.833105067501467, LR: 0.0003 +[2026-03-03 23:22:12] (step=0050258) Train Loss: 0.4361, Train Steps/Sec: 0.12, Epoch: 9.833300723928781, LR: 0.0003 +[2026-03-03 23:22:20] (step=0050259) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 9.833496380356095, LR: 0.0003 +[2026-03-03 23:22:28] (step=0050260) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.833692036783408, LR: 0.0003 +[2026-03-03 23:22:36] (step=0050261) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.833887693210722, LR: 0.0003 +[2026-03-03 23:22:44] (step=0050262) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 9.834083349638036, LR: 0.0003 +[2026-03-03 23:22:52] (step=0050263) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 9.83427900606535, LR: 0.0003 +[2026-03-03 23:23:00] (step=0050264) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.834474662492664, LR: 0.0003 +[2026-03-03 23:23:07] (step=0050265) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.834670318919976, LR: 0.0003 +[2026-03-03 23:23:15] (step=0050266) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 9.83486597534729, LR: 0.0003 +[2026-03-03 23:23:23] (step=0050267) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 9.835061631774604, LR: 0.0003 +[2026-03-03 23:23:31] (step=0050268) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.835257288201918, LR: 0.0003 +[2026-03-03 23:23:39] (step=0050269) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.835452944629232, LR: 0.0003 +[2026-03-03 23:23:47] (step=0050270) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.835648601056544, LR: 0.0003 +[2026-03-03 23:23:55] (step=0050271) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.835844257483858, LR: 0.0003 +[2026-03-03 23:24:02] (step=0050272) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.836039913911172, LR: 0.0003 +[2026-03-03 23:24:10] (step=0050273) Train Loss: 0.4487, Train Steps/Sec: 0.12, Epoch: 9.836235570338486, LR: 0.0003 +[2026-03-03 23:24:18] (step=0050274) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.8364312267658, LR: 0.0003 +[2026-03-03 23:24:26] (step=0050275) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.836626883193112, LR: 0.0003 +[2026-03-03 23:24:34] (step=0050276) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 9.836822539620426, LR: 0.0003 +[2026-03-03 23:24:42] (step=0050277) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 9.83701819604774, LR: 0.0003 +[2026-03-03 23:24:50] (step=0050278) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 9.837213852475054, LR: 0.0003 +[2026-03-03 23:24:58] (step=0050279) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.837409508902368, LR: 0.0003 +[2026-03-03 23:25:05] (step=0050280) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 9.83760516532968, LR: 0.0003 +[2026-03-03 23:25:13] (step=0050281) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.837800821756995, LR: 0.0003 +[2026-03-03 23:25:21] (step=0050282) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.837996478184309, LR: 0.0003 +[2026-03-03 23:25:29] (step=0050283) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.838192134611623, LR: 0.0003 +[2026-03-03 23:25:37] (step=0050284) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.838387791038935, LR: 0.0003 +[2026-03-03 23:25:45] (step=0050285) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.838583447466249, LR: 0.0003 +[2026-03-03 23:25:53] (step=0050286) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.838779103893563, LR: 0.0003 +[2026-03-03 23:26:01] (step=0050287) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.838974760320877, LR: 0.0003 +[2026-03-03 23:26:08] (step=0050288) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.83917041674819, LR: 0.0003 +[2026-03-03 23:26:16] (step=0050289) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.839366073175503, LR: 0.0003 +[2026-03-03 23:26:24] (step=0050290) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.839561729602817, LR: 0.0003 +[2026-03-03 23:26:32] (step=0050291) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.839757386030131, LR: 0.0003 +[2026-03-03 23:26:40] (step=0050292) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.839953042457445, LR: 0.0003 +[2026-03-03 23:26:48] (step=0050293) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 9.840148698884759, LR: 0.0003 +[2026-03-03 23:26:56] (step=0050294) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 9.840344355312071, LR: 0.0003 +[2026-03-03 23:27:03] (step=0050295) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 9.840540011739385, LR: 0.0003 +[2026-03-03 23:27:11] (step=0050296) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.8407356681667, LR: 0.0003 +[2026-03-03 23:27:19] (step=0050297) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.840931324594013, LR: 0.0003 +[2026-03-03 23:27:27] (step=0050298) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 9.841126981021327, LR: 0.0003 +[2026-03-03 23:27:35] (step=0050299) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.84132263744864, LR: 0.0003 +[2026-03-03 23:27:43] (step=0050300) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.841518293875954, LR: 0.0003 +[2026-03-03 23:27:51] (step=0050301) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.841713950303268, LR: 0.0003 +[2026-03-03 23:27:58] (step=0050302) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.841909606730582, LR: 0.0003 +[2026-03-03 23:28:06] (step=0050303) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.842105263157896, LR: 0.0003 +[2026-03-03 23:28:14] (step=0050304) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.842300919585208, LR: 0.0003 +[2026-03-03 23:28:22] (step=0050305) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.842496576012522, LR: 0.0003 +[2026-03-03 23:28:30] (step=0050306) Train Loss: 0.4260, Train Steps/Sec: 0.13, Epoch: 9.842692232439836, LR: 0.0003 +[2026-03-03 23:28:38] (step=0050307) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.84288788886715, LR: 0.0003 +[2026-03-03 23:28:46] (step=0050308) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.843083545294462, LR: 0.0003 +[2026-03-03 23:28:54] (step=0050309) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 9.843279201721776, LR: 0.0003 +[2026-03-03 23:29:01] (step=0050310) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 9.84347485814909, LR: 0.0003 +[2026-03-03 23:29:09] (step=0050311) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.843670514576404, LR: 0.0003 +[2026-03-03 23:29:17] (step=0050312) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.843866171003718, LR: 0.0003 +[2026-03-03 23:29:25] (step=0050313) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.84406182743103, LR: 0.0003 +[2026-03-03 23:29:33] (step=0050314) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.844257483858344, LR: 0.0003 +[2026-03-03 23:29:41] (step=0050315) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.844453140285658, LR: 0.0003 +[2026-03-03 23:29:49] (step=0050316) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 9.844648796712972, LR: 0.0003 +[2026-03-03 23:29:56] (step=0050317) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.844844453140286, LR: 0.0003 +[2026-03-03 23:30:04] (step=0050318) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.845040109567599, LR: 0.0003 +[2026-03-03 23:30:12] (step=0050319) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.845235765994913, LR: 0.0003 +[2026-03-03 23:30:20] (step=0050320) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.845431422422227, LR: 0.0003 +[2026-03-03 23:30:28] (step=0050321) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.84562707884954, LR: 0.0003 +[2026-03-03 23:30:36] (step=0050322) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.845822735276855, LR: 0.0003 +[2026-03-03 23:30:44] (step=0050323) Train Loss: 0.4420, Train Steps/Sec: 0.12, Epoch: 9.846018391704167, LR: 0.0003 +[2026-03-03 23:30:52] (step=0050324) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.84621404813148, LR: 0.0003 +[2026-03-03 23:30:59] (step=0050325) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.846409704558795, LR: 0.0003 +[2026-03-03 23:31:07] (step=0050326) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.846605360986109, LR: 0.0003 +[2026-03-03 23:31:15] (step=0050327) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.846801017413423, LR: 0.0003 +[2026-03-03 23:31:23] (step=0050328) Train Loss: 0.4276, Train Steps/Sec: 0.13, Epoch: 9.846996673840735, LR: 0.0003 +[2026-03-03 23:31:31] (step=0050329) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.847192330268049, LR: 0.0003 +[2026-03-03 23:31:39] (step=0050330) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.847387986695363, LR: 0.0003 +[2026-03-03 23:31:47] (step=0050331) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.847583643122677, LR: 0.0003 +[2026-03-03 23:31:55] (step=0050332) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.847779299549991, LR: 0.0003 +[2026-03-03 23:32:02] (step=0050333) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 9.847974955977303, LR: 0.0003 +[2026-03-03 23:32:10] (step=0050334) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.848170612404617, LR: 0.0003 +[2026-03-03 23:32:18] (step=0050335) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.848366268831931, LR: 0.0003 +[2026-03-03 23:32:26] (step=0050336) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.848561925259245, LR: 0.0003 +[2026-03-03 23:32:34] (step=0050337) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.848757581686558, LR: 0.0003 +[2026-03-03 23:32:42] (step=0050338) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 9.848953238113872, LR: 0.0003 +[2026-03-03 23:32:49] (step=0050339) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.849148894541186, LR: 0.0003 +[2026-03-03 23:32:57] (step=0050340) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.8493445509685, LR: 0.0003 +[2026-03-03 23:33:05] (step=0050341) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 9.849540207395814, LR: 0.0003 +[2026-03-03 23:33:13] (step=0050342) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.849735863823126, LR: 0.0003 +[2026-03-03 23:33:21] (step=0050343) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.84993152025044, LR: 0.0003 +[2026-03-03 23:33:29] (step=0050344) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.850127176677754, LR: 0.0003 +[2026-03-03 23:33:37] (step=0050345) Train Loss: 0.4646, Train Steps/Sec: 0.13, Epoch: 9.850322833105068, LR: 0.0003 +[2026-03-03 23:33:44] (step=0050346) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 9.850518489532382, LR: 0.0003 +[2026-03-03 23:33:52] (step=0050347) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.850714145959694, LR: 0.0003 +[2026-03-03 23:34:00] (step=0050348) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 9.850909802387008, LR: 0.0003 +[2026-03-03 23:34:08] (step=0050349) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.851105458814322, LR: 0.0003 +[2026-03-03 23:34:16] (step=0050350) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.851301115241636, LR: 0.0003 +[2026-03-03 23:34:24] (step=0050351) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.85149677166895, LR: 0.0003 +[2026-03-03 23:34:32] (step=0050352) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.851692428096262, LR: 0.0003 +[2026-03-03 23:34:40] (step=0050353) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.851888084523576, LR: 0.0003 +[2026-03-03 23:34:48] (step=0050354) Train Loss: 0.4563, Train Steps/Sec: 0.12, Epoch: 9.85208374095089, LR: 0.0003 +[2026-03-03 23:34:55] (step=0050355) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 9.852279397378204, LR: 0.0003 +[2026-03-03 23:35:03] (step=0050356) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.852475053805518, LR: 0.0003 +[2026-03-03 23:35:11] (step=0050357) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 9.85267071023283, LR: 0.0003 +[2026-03-03 23:35:19] (step=0050358) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 9.852866366660145, LR: 0.0003 +[2026-03-03 23:35:27] (step=0050359) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.853062023087459, LR: 0.0003 +[2026-03-03 23:35:35] (step=0050360) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.853257679514773, LR: 0.0003 +[2026-03-03 23:35:43] (step=0050361) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 9.853453335942085, LR: 0.0003 +[2026-03-03 23:35:50] (step=0050362) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.853648992369399, LR: 0.0003 +[2026-03-03 23:35:58] (step=0050363) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.853844648796713, LR: 0.0003 +[2026-03-03 23:36:06] (step=0050364) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.854040305224027, LR: 0.0003 +[2026-03-03 23:36:14] (step=0050365) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.85423596165134, LR: 0.0003 +[2026-03-03 23:36:22] (step=0050366) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.854431618078653, LR: 0.0003 +[2026-03-03 23:36:30] (step=0050367) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.854627274505967, LR: 0.0003 +[2026-03-03 23:36:38] (step=0050368) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.854822930933281, LR: 0.0003 +[2026-03-03 23:36:45] (step=0050369) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 9.855018587360595, LR: 0.0003 +[2026-03-03 23:36:53] (step=0050370) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 9.85521424378791, LR: 0.0003 +[2026-03-03 23:37:01] (step=0050371) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.855409900215221, LR: 0.0003 +[2026-03-03 23:37:09] (step=0050372) Train Loss: 0.4657, Train Steps/Sec: 0.13, Epoch: 9.855605556642535, LR: 0.0003 +[2026-03-03 23:37:17] (step=0050373) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 9.85580121306985, LR: 0.0003 +[2026-03-03 23:37:25] (step=0050374) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.855996869497163, LR: 0.0003 +[2026-03-03 23:37:33] (step=0050375) Train Loss: 0.4412, Train Steps/Sec: 0.12, Epoch: 9.856192525924477, LR: 0.0003 +[2026-03-03 23:37:41] (step=0050376) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.85638818235179, LR: 0.0003 +[2026-03-03 23:37:49] (step=0050377) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.856583838779104, LR: 0.0003 +[2026-03-03 23:37:56] (step=0050378) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 9.856779495206418, LR: 0.0003 +[2026-03-03 23:38:04] (step=0050379) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.856975151633732, LR: 0.0003 +[2026-03-03 23:38:12] (step=0050380) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.857170808061046, LR: 0.0003 +[2026-03-03 23:38:20] (step=0050381) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.857366464488358, LR: 0.0003 +[2026-03-03 23:38:28] (step=0050382) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 9.857562120915672, LR: 0.0003 +[2026-03-03 23:38:36] (step=0050383) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.857757777342986, LR: 0.0003 +[2026-03-03 23:38:44] (step=0050384) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.8579534337703, LR: 0.0003 +[2026-03-03 23:38:51] (step=0050385) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.858149090197614, LR: 0.0003 +[2026-03-03 23:38:59] (step=0050386) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.858344746624926, LR: 0.0003 +[2026-03-03 23:39:07] (step=0050387) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.85854040305224, LR: 0.0003 +[2026-03-03 23:39:15] (step=0050388) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.858736059479554, LR: 0.0003 +[2026-03-03 23:39:23] (step=0050389) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.858931715906868, LR: 0.0003 +[2026-03-03 23:39:31] (step=0050390) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 9.85912737233418, LR: 0.0003 +[2026-03-03 23:39:39] (step=0050391) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 9.859323028761494, LR: 0.0003 +[2026-03-03 23:39:46] (step=0050392) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 9.859518685188808, LR: 0.0003 +[2026-03-03 23:39:54] (step=0050393) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.859714341616122, LR: 0.0003 +[2026-03-03 23:40:02] (step=0050394) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 9.859909998043436, LR: 0.0003 +[2026-03-03 23:40:10] (step=0050395) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.860105654470749, LR: 0.0003 +[2026-03-03 23:40:18] (step=0050396) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.860301310898063, LR: 0.0003 +[2026-03-03 23:40:26] (step=0050397) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.860496967325377, LR: 0.0003 +[2026-03-03 23:40:34] (step=0050398) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.86069262375269, LR: 0.0003 +[2026-03-03 23:40:41] (step=0050399) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.860888280180005, LR: 0.0003 +[2026-03-03 23:40:49] (step=0050400) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 9.861083936607317, LR: 0.0003 +[2026-03-03 23:40:57] (step=0050401) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.86127959303463, LR: 0.0003 +[2026-03-03 23:41:05] (step=0050402) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.861475249461945, LR: 0.0003 +[2026-03-03 23:41:13] (step=0050403) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 9.861670905889259, LR: 0.0003 +[2026-03-03 23:41:21] (step=0050404) Train Loss: 0.4341, Train Steps/Sec: 0.12, Epoch: 9.861866562316573, LR: 0.0003 +[2026-03-03 23:41:29] (step=0050405) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.862062218743885, LR: 0.0003 +[2026-03-03 23:41:37] (step=0050406) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.8622578751712, LR: 0.0003 +[2026-03-03 23:41:44] (step=0050407) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.862453531598513, LR: 0.0003 +[2026-03-03 23:41:52] (step=0050408) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 9.862649188025827, LR: 0.0003 +[2026-03-03 23:42:00] (step=0050409) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.862844844453141, LR: 0.0003 +[2026-03-03 23:42:08] (step=0050410) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.863040500880453, LR: 0.0003 +[2026-03-03 23:42:16] (step=0050411) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.863236157307767, LR: 0.0003 +[2026-03-03 23:42:24] (step=0050412) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.863431813735081, LR: 0.0003 +[2026-03-03 23:42:32] (step=0050413) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 9.863627470162395, LR: 0.0003 +[2026-03-03 23:42:40] (step=0050414) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.863823126589708, LR: 0.0003 +[2026-03-03 23:42:48] (step=0050415) Train Loss: 0.4382, Train Steps/Sec: 0.12, Epoch: 9.864018783017022, LR: 0.0003 +[2026-03-03 23:42:55] (step=0050416) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.864214439444336, LR: 0.0003 +[2026-03-03 23:43:03] (step=0050417) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.86441009587165, LR: 0.0003 +[2026-03-03 23:43:11] (step=0050418) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 9.864605752298964, LR: 0.0003 +[2026-03-03 23:43:19] (step=0050419) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.864801408726276, LR: 0.0003 +[2026-03-03 23:43:27] (step=0050420) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 9.86499706515359, LR: 0.0003 +[2026-03-03 23:43:35] (step=0050421) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 9.865192721580904, LR: 0.0003 +[2026-03-03 23:43:43] (step=0050422) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 9.865388378008218, LR: 0.0003 +[2026-03-03 23:43:50] (step=0050423) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.865584034435532, LR: 0.0003 +[2026-03-03 23:43:58] (step=0050424) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 9.865779690862844, LR: 0.0003 +[2026-03-03 23:44:06] (step=0050425) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.865975347290158, LR: 0.0003 +[2026-03-03 23:44:14] (step=0050426) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 9.866171003717472, LR: 0.0003 +[2026-03-03 23:44:22] (step=0050427) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.866366660144786, LR: 0.0003 +[2026-03-03 23:44:30] (step=0050428) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.8665623165721, LR: 0.0003 +[2026-03-03 23:44:38] (step=0050429) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.866757972999412, LR: 0.0003 +[2026-03-03 23:44:45] (step=0050430) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 9.866953629426726, LR: 0.0003 +[2026-03-03 23:44:53] (step=0050431) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.86714928585404, LR: 0.0003 +[2026-03-03 23:45:01] (step=0050432) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.867344942281354, LR: 0.0003 +[2026-03-03 23:45:09] (step=0050433) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.867540598708668, LR: 0.0003 +[2026-03-03 23:45:17] (step=0050434) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.86773625513598, LR: 0.0003 +[2026-03-03 23:45:25] (step=0050435) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.867931911563295, LR: 0.0003 +[2026-03-03 23:45:33] (step=0050436) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.868127567990609, LR: 0.0003 +[2026-03-03 23:45:40] (step=0050437) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.868323224417923, LR: 0.0003 +[2026-03-03 23:45:48] (step=0050438) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.868518880845237, LR: 0.0003 +[2026-03-03 23:45:56] (step=0050439) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.868714537272549, LR: 0.0003 +[2026-03-03 23:46:04] (step=0050440) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.868910193699863, LR: 0.0003 +[2026-03-03 23:46:12] (step=0050441) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.869105850127177, LR: 0.0003 +[2026-03-03 23:46:20] (step=0050442) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.869301506554491, LR: 0.0003 +[2026-03-03 23:46:28] (step=0050443) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 9.869497162981803, LR: 0.0003 +[2026-03-03 23:46:35] (step=0050444) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.869692819409117, LR: 0.0003 +[2026-03-03 23:46:43] (step=0050445) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.869888475836431, LR: 0.0003 +[2026-03-03 23:46:51] (step=0050446) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 9.870084132263745, LR: 0.0003 +[2026-03-03 23:46:59] (step=0050447) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 9.87027978869106, LR: 0.0003 +[2026-03-03 23:47:07] (step=0050448) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.870475445118371, LR: 0.0003 +[2026-03-03 23:47:15] (step=0050449) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.870671101545685, LR: 0.0003 +[2026-03-03 23:47:23] (step=0050450) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.870866757973, LR: 0.0003 +[2026-03-03 23:47:31] (step=0050451) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.871062414400313, LR: 0.0003 +[2026-03-03 23:47:38] (step=0050452) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.871258070827627, LR: 0.0003 +[2026-03-03 23:47:46] (step=0050453) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 9.87145372725494, LR: 0.0003 +[2026-03-03 23:47:54] (step=0050454) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.871649383682254, LR: 0.0003 +[2026-03-03 23:48:02] (step=0050455) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.871845040109568, LR: 0.0003 +[2026-03-03 23:48:10] (step=0050456) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.872040696536882, LR: 0.0003 +[2026-03-03 23:48:18] (step=0050457) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.872236352964196, LR: 0.0003 +[2026-03-03 23:48:26] (step=0050458) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 9.872432009391508, LR: 0.0003 +[2026-03-03 23:48:34] (step=0050459) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 9.872627665818822, LR: 0.0003 +[2026-03-03 23:48:41] (step=0050460) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.872823322246136, LR: 0.0003 +[2026-03-03 23:48:49] (step=0050461) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.87301897867345, LR: 0.0003 +[2026-03-03 23:48:57] (step=0050462) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.873214635100764, LR: 0.0003 +[2026-03-03 23:49:05] (step=0050463) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.873410291528076, LR: 0.0003 +[2026-03-03 23:49:13] (step=0050464) Train Loss: 0.4373, Train Steps/Sec: 0.12, Epoch: 9.87360594795539, LR: 0.0003 +[2026-03-03 23:49:21] (step=0050465) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.873801604382704, LR: 0.0003 +[2026-03-03 23:49:29] (step=0050466) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.873997260810018, LR: 0.0003 +[2026-03-03 23:49:37] (step=0050467) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 9.87419291723733, LR: 0.0003 +[2026-03-03 23:49:44] (step=0050468) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.874388573664644, LR: 0.0003 +[2026-03-03 23:49:52] (step=0050469) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.874584230091958, LR: 0.0003 +[2026-03-03 23:50:00] (step=0050470) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 9.874779886519272, LR: 0.0003 +[2026-03-03 23:50:08] (step=0050471) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 9.874975542946586, LR: 0.0003 +[2026-03-03 23:50:16] (step=0050472) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.875171199373899, LR: 0.0003 +[2026-03-03 23:50:24] (step=0050473) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.875366855801213, LR: 0.0003 +[2026-03-03 23:50:32] (step=0050474) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.875562512228527, LR: 0.0003 +[2026-03-03 23:50:39] (step=0050475) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 9.87575816865584, LR: 0.0003 +[2026-03-03 23:50:47] (step=0050476) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 9.875953825083155, LR: 0.0003 +[2026-03-03 23:50:55] (step=0050477) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.876149481510467, LR: 0.0003 +[2026-03-03 23:51:03] (step=0050478) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.876345137937781, LR: 0.0003 +[2026-03-03 23:51:11] (step=0050479) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 9.876540794365095, LR: 0.0003 +[2026-03-03 23:51:19] (step=0050480) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.876736450792409, LR: 0.0003 +[2026-03-03 23:51:27] (step=0050481) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 9.876932107219723, LR: 0.0003 +[2026-03-03 23:51:34] (step=0050482) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 9.877127763647035, LR: 0.0003 +[2026-03-03 23:51:42] (step=0050483) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.87732342007435, LR: 0.0003 +[2026-03-03 23:51:50] (step=0050484) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 9.877519076501663, LR: 0.0003 +[2026-03-03 23:51:58] (step=0050485) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 9.877714732928977, LR: 0.0003 +[2026-03-03 23:52:06] (step=0050486) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.877910389356291, LR: 0.0003 +[2026-03-03 23:52:14] (step=0050487) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.878106045783603, LR: 0.0003 +[2026-03-03 23:52:22] (step=0050488) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 9.878301702210917, LR: 0.0003 +[2026-03-03 23:52:29] (step=0050489) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.878497358638231, LR: 0.0003 +[2026-03-03 23:52:37] (step=0050490) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.878693015065545, LR: 0.0003 +[2026-03-03 23:52:45] (step=0050491) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 9.878888671492858, LR: 0.0003 +[2026-03-03 23:52:53] (step=0050492) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 9.879084327920172, LR: 0.0003 +[2026-03-03 23:53:01] (step=0050493) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.879279984347486, LR: 0.0003 +[2026-03-03 23:53:09] (step=0050494) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.8794756407748, LR: 0.0003 +[2026-03-03 23:53:17] (step=0050495) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 9.879671297202114, LR: 0.0003 +[2026-03-03 23:53:24] (step=0050496) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 9.879866953629426, LR: 0.0003 +[2026-03-03 23:53:32] (step=0050497) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 9.88006261005674, LR: 0.0003 +[2026-03-03 23:53:40] (step=0050498) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 9.880258266484054, LR: 0.0003 +[2026-03-03 23:53:48] (step=0050499) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.880453922911368, LR: 0.0003 +[2026-03-03 23:53:56] (step=0050500) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 9.880649579338682, LR: 0.0003 +[2026-03-03 23:53:56] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0050500/ +[2026-03-03 23:54:04] (step=0050501) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 9.880845235765994, LR: 0.0003 +[2026-03-03 23:54:12] (step=0050502) Train Loss: 0.4389, Train Steps/Sec: 0.12, Epoch: 9.881040892193308, LR: 0.0003 +[2026-03-03 23:54:20] (step=0050503) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 9.881236548620622, LR: 0.0003 +[2026-03-03 23:54:27] (step=0050504) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.881432205047936, LR: 0.0003 +[2026-03-03 23:54:35] (step=0050505) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.88162786147525, LR: 0.0003 +[2026-03-03 23:54:43] (step=0050506) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.881823517902562, LR: 0.0003 +[2026-03-03 23:54:51] (step=0050507) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 9.882019174329876, LR: 0.0003 +[2026-03-03 23:54:59] (step=0050508) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.88221483075719, LR: 0.0003 +[2026-03-03 23:55:07] (step=0050509) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.882410487184504, LR: 0.0003 +[2026-03-03 23:55:15] (step=0050510) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.882606143611818, LR: 0.0003 +[2026-03-03 23:55:23] (step=0050511) Train Loss: 0.4415, Train Steps/Sec: 0.12, Epoch: 9.88280180003913, LR: 0.0003 +[2026-03-03 23:55:31] (step=0050512) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 9.882997456466445, LR: 0.0003 +[2026-03-03 23:55:38] (step=0050513) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 9.883193112893759, LR: 0.0003 +[2026-03-03 23:55:46] (step=0050514) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 9.883388769321073, LR: 0.0003 +[2026-03-03 23:55:54] (step=0050515) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.883584425748387, LR: 0.0003 +[2026-03-03 23:56:02] (step=0050516) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.883780082175699, LR: 0.0003 +[2026-03-03 23:56:10] (step=0050517) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.883975738603013, LR: 0.0003 +[2026-03-03 23:56:18] (step=0050518) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.884171395030327, LR: 0.0003 +[2026-03-03 23:56:26] (step=0050519) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.884367051457641, LR: 0.0003 +[2026-03-03 23:56:33] (step=0050520) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.884562707884953, LR: 0.0003 +[2026-03-03 23:56:41] (step=0050521) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 9.884758364312267, LR: 0.0003 +[2026-03-03 23:56:49] (step=0050522) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 9.884954020739581, LR: 0.0003 +[2026-03-03 23:56:57] (step=0050523) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 9.885149677166895, LR: 0.0003 +[2026-03-03 23:57:05] (step=0050524) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 9.88534533359421, LR: 0.0003 +[2026-03-03 23:57:13] (step=0050525) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.885540990021521, LR: 0.0003 +[2026-03-03 23:57:21] (step=0050526) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.885736646448835, LR: 0.0003 +[2026-03-03 23:57:28] (step=0050527) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.88593230287615, LR: 0.0003 +[2026-03-03 23:57:36] (step=0050528) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.886127959303463, LR: 0.0003 +[2026-03-03 23:57:44] (step=0050529) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.886323615730777, LR: 0.0003 +[2026-03-03 23:57:52] (step=0050530) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.88651927215809, LR: 0.0003 +[2026-03-03 23:58:00] (step=0050531) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 9.886714928585404, LR: 0.0003 +[2026-03-03 23:58:08] (step=0050532) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.886910585012718, LR: 0.0003 +[2026-03-03 23:58:16] (step=0050533) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.887106241440032, LR: 0.0003 +[2026-03-03 23:58:23] (step=0050534) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 9.887301897867346, LR: 0.0003 +[2026-03-03 23:58:31] (step=0050535) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.887497554294658, LR: 0.0003 +[2026-03-03 23:58:39] (step=0050536) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.887693210721972, LR: 0.0003 +[2026-03-03 23:58:47] (step=0050537) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.887888867149286, LR: 0.0003 +[2026-03-03 23:58:55] (step=0050538) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.8880845235766, LR: 0.0003 +[2026-03-03 23:59:03] (step=0050539) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 9.888280180003914, LR: 0.0003 +[2026-03-03 23:59:11] (step=0050540) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 9.888475836431226, LR: 0.0003 +[2026-03-03 23:59:19] (step=0050541) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.88867149285854, LR: 0.0003 +[2026-03-03 23:59:26] (step=0050542) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.888867149285854, LR: 0.0003 +[2026-03-03 23:59:34] (step=0050543) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.889062805713168, LR: 0.0003 +[2026-03-03 23:59:42] (step=0050544) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.88925846214048, LR: 0.0003 +[2026-03-03 23:59:50] (step=0050545) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.889454118567794, LR: 0.0003 +[2026-03-03 23:59:58] (step=0050546) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.889649774995108, LR: 0.0003 +[2026-03-04 00:00:06] (step=0050547) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.889845431422422, LR: 0.0003 +[2026-03-04 00:00:14] (step=0050548) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 9.890041087849736, LR: 0.0003 +[2026-03-04 00:00:21] (step=0050549) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.890236744277049, LR: 0.0003 +[2026-03-04 00:00:29] (step=0050550) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 9.890432400704363, LR: 0.0003 +[2026-03-04 00:00:37] (step=0050551) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.890628057131677, LR: 0.0003 +[2026-03-04 00:00:45] (step=0050552) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.89082371355899, LR: 0.0003 +[2026-03-04 00:00:53] (step=0050553) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.891019369986305, LR: 0.0003 +[2026-03-04 00:01:01] (step=0050554) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.891215026413617, LR: 0.0003 +[2026-03-04 00:01:09] (step=0050555) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.891410682840931, LR: 0.0003 +[2026-03-04 00:01:17] (step=0050556) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 9.891606339268245, LR: 0.0003 +[2026-03-04 00:01:24] (step=0050557) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.891801995695559, LR: 0.0003 +[2026-03-04 00:01:32] (step=0050558) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.891997652122873, LR: 0.0003 +[2026-03-04 00:01:40] (step=0050559) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.892193308550185, LR: 0.0003 +[2026-03-04 00:01:48] (step=0050560) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.8923889649775, LR: 0.0003 +[2026-03-04 00:01:56] (step=0050561) Train Loss: 0.4489, Train Steps/Sec: 0.12, Epoch: 9.892584621404813, LR: 0.0003 +[2026-03-04 00:02:04] (step=0050562) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.892780277832127, LR: 0.0003 +[2026-03-04 00:02:12] (step=0050563) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 9.892975934259441, LR: 0.0003 +[2026-03-04 00:02:20] (step=0050564) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 9.893171590686753, LR: 0.0003 +[2026-03-04 00:02:28] (step=0050565) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.893367247114067, LR: 0.0003 +[2026-03-04 00:02:35] (step=0050566) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 9.893562903541381, LR: 0.0003 +[2026-03-04 00:02:43] (step=0050567) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.893758559968695, LR: 0.0003 +[2026-03-04 00:02:51] (step=0050568) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 9.89395421639601, LR: 0.0003 +[2026-03-04 00:02:59] (step=0050569) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.894149872823322, LR: 0.0003 +[2026-03-04 00:03:07] (step=0050570) Train Loss: 0.4268, Train Steps/Sec: 0.13, Epoch: 9.894345529250636, LR: 0.0003 +[2026-03-04 00:03:15] (step=0050571) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 9.89454118567795, LR: 0.0003 +[2026-03-04 00:03:23] (step=0050572) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 9.894736842105264, LR: 0.0003 +[2026-03-04 00:03:30] (step=0050573) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.894932498532576, LR: 0.0003 +[2026-03-04 00:03:38] (step=0050574) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.89512815495989, LR: 0.0003 +[2026-03-04 00:03:46] (step=0050575) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.895323811387204, LR: 0.0003 +[2026-03-04 00:03:54] (step=0050576) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.895519467814518, LR: 0.0003 +[2026-03-04 00:04:02] (step=0050577) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.895715124241832, LR: 0.0003 +[2026-03-04 00:04:10] (step=0050578) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 9.895910780669144, LR: 0.0003 +[2026-03-04 00:04:18] (step=0050579) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.896106437096458, LR: 0.0003 +[2026-03-04 00:04:25] (step=0050580) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.896302093523772, LR: 0.0003 +[2026-03-04 00:04:33] (step=0050581) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.896497749951086, LR: 0.0003 +[2026-03-04 00:04:41] (step=0050582) Train Loss: 0.4192, Train Steps/Sec: 0.13, Epoch: 9.8966934063784, LR: 0.0003 +[2026-03-04 00:04:49] (step=0050583) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.896889062805712, LR: 0.0003 +[2026-03-04 00:04:57] (step=0050584) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 9.897084719233026, LR: 0.0003 +[2026-03-04 00:05:05] (step=0050585) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 9.89728037566034, LR: 0.0003 +[2026-03-04 00:05:13] (step=0050586) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.897476032087654, LR: 0.0003 +[2026-03-04 00:05:20] (step=0050587) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.897671688514968, LR: 0.0003 +[2026-03-04 00:05:28] (step=0050588) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 9.89786734494228, LR: 0.0003 +[2026-03-04 00:05:36] (step=0050589) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 9.898063001369595, LR: 0.0003 +[2026-03-04 00:05:44] (step=0050590) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.898258657796909, LR: 0.0003 +[2026-03-04 00:05:52] (step=0050591) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 9.898454314224223, LR: 0.0003 +[2026-03-04 00:06:00] (step=0050592) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.898649970651537, LR: 0.0003 +[2026-03-04 00:06:08] (step=0050593) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.898845627078849, LR: 0.0003 +[2026-03-04 00:06:15] (step=0050594) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 9.899041283506163, LR: 0.0003 +[2026-03-04 00:06:23] (step=0050595) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 9.899236939933477, LR: 0.0003 +[2026-03-04 00:06:31] (step=0050596) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.899432596360791, LR: 0.0003 +[2026-03-04 00:06:39] (step=0050597) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 9.899628252788103, LR: 0.0003 +[2026-03-04 00:06:47] (step=0050598) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.899823909215417, LR: 0.0003 +[2026-03-04 00:06:55] (step=0050599) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.900019565642731, LR: 0.0003 +[2026-03-04 00:07:03] (step=0050600) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.900215222070045, LR: 0.0003 +[2026-03-04 00:07:11] (step=0050601) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.90041087849736, LR: 0.0003 +[2026-03-04 00:07:18] (step=0050602) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.900606534924671, LR: 0.0003 +[2026-03-04 00:07:26] (step=0050603) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.900802191351985, LR: 0.0003 +[2026-03-04 00:07:34] (step=0050604) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.9009978477793, LR: 0.0003 +[2026-03-04 00:07:42] (step=0050605) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 9.901193504206613, LR: 0.0003 +[2026-03-04 00:07:50] (step=0050606) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 9.901389160633927, LR: 0.0003 +[2026-03-04 00:07:58] (step=0050607) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.90158481706124, LR: 0.0003 +[2026-03-04 00:08:06] (step=0050608) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.901780473488554, LR: 0.0003 +[2026-03-04 00:08:14] (step=0050609) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 9.901976129915868, LR: 0.0003 +[2026-03-04 00:08:21] (step=0050610) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 9.902171786343182, LR: 0.0003 +[2026-03-04 00:08:29] (step=0050611) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.902367442770496, LR: 0.0003 +[2026-03-04 00:08:37] (step=0050612) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 9.902563099197808, LR: 0.0003 +[2026-03-04 00:08:45] (step=0050613) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 9.902758755625122, LR: 0.0003 +[2026-03-04 00:08:53] (step=0050614) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.902954412052436, LR: 0.0003 +[2026-03-04 00:09:01] (step=0050615) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.90315006847975, LR: 0.0003 +[2026-03-04 00:09:09] (step=0050616) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 9.903345724907064, LR: 0.0003 +[2026-03-04 00:09:17] (step=0050617) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.903541381334376, LR: 0.0003 +[2026-03-04 00:09:24] (step=0050618) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 9.90373703776169, LR: 0.0003 +[2026-03-04 00:09:32] (step=0050619) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.903932694189004, LR: 0.0003 +[2026-03-04 00:09:40] (step=0050620) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.904128350616318, LR: 0.0003 +[2026-03-04 00:09:48] (step=0050621) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.904324007043632, LR: 0.0003 +[2026-03-04 00:09:56] (step=0050622) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.904519663470944, LR: 0.0003 +[2026-03-04 00:10:04] (step=0050623) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.904715319898258, LR: 0.0003 +[2026-03-04 00:10:12] (step=0050624) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.904910976325572, LR: 0.0003 +[2026-03-04 00:10:20] (step=0050625) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.905106632752886, LR: 0.0003 +[2026-03-04 00:10:27] (step=0050626) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.905302289180199, LR: 0.0003 +[2026-03-04 00:10:35] (step=0050627) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 9.905497945607513, LR: 0.0003 +[2026-03-04 00:10:43] (step=0050628) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.905693602034827, LR: 0.0003 +[2026-03-04 00:10:51] (step=0050629) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.90588925846214, LR: 0.0003 +[2026-03-04 00:10:59] (step=0050630) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.906084914889455, LR: 0.0003 +[2026-03-04 00:11:07] (step=0050631) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.906280571316767, LR: 0.0003 +[2026-03-04 00:11:15] (step=0050632) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.906476227744081, LR: 0.0003 +[2026-03-04 00:11:22] (step=0050633) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.906671884171395, LR: 0.0003 +[2026-03-04 00:11:30] (step=0050634) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.906867540598709, LR: 0.0003 +[2026-03-04 00:11:38] (step=0050635) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 9.907063197026023, LR: 0.0003 +[2026-03-04 00:11:46] (step=0050636) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.907258853453335, LR: 0.0003 +[2026-03-04 00:11:54] (step=0050637) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.90745450988065, LR: 0.0003 +[2026-03-04 00:12:02] (step=0050638) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.907650166307963, LR: 0.0003 +[2026-03-04 00:12:10] (step=0050639) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 9.907845822735277, LR: 0.0003 +[2026-03-04 00:12:17] (step=0050640) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.908041479162591, LR: 0.0003 +[2026-03-04 00:12:25] (step=0050641) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.908237135589903, LR: 0.0003 +[2026-03-04 00:12:33] (step=0050642) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.908432792017217, LR: 0.0003 +[2026-03-04 00:12:41] (step=0050643) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.908628448444532, LR: 0.0003 +[2026-03-04 00:12:49] (step=0050644) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.908824104871846, LR: 0.0003 +[2026-03-04 00:12:57] (step=0050645) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.90901976129916, LR: 0.0003 +[2026-03-04 00:13:05] (step=0050646) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.909215417726472, LR: 0.0003 +[2026-03-04 00:13:12] (step=0050647) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 9.909411074153786, LR: 0.0003 +[2026-03-04 00:13:20] (step=0050648) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.9096067305811, LR: 0.0003 +[2026-03-04 00:13:28] (step=0050649) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.909802387008414, LR: 0.0003 +[2026-03-04 00:13:36] (step=0050650) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.909998043435726, LR: 0.0003 +[2026-03-04 00:13:44] (step=0050651) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 9.91019369986304, LR: 0.0003 +[2026-03-04 00:13:52] (step=0050652) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 9.910389356290354, LR: 0.0003 +[2026-03-04 00:14:00] (step=0050653) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 9.910585012717668, LR: 0.0003 +[2026-03-04 00:14:07] (step=0050654) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.910780669144982, LR: 0.0003 +[2026-03-04 00:14:15] (step=0050655) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.910976325572294, LR: 0.0003 +[2026-03-04 00:14:23] (step=0050656) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.911171981999608, LR: 0.0003 +[2026-03-04 00:14:31] (step=0050657) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.911367638426922, LR: 0.0003 +[2026-03-04 00:14:39] (step=0050658) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.911563294854236, LR: 0.0003 +[2026-03-04 00:14:47] (step=0050659) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 9.91175895128155, LR: 0.0003 +[2026-03-04 00:14:55] (step=0050660) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.911954607708862, LR: 0.0003 +[2026-03-04 00:15:03] (step=0050661) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 9.912150264136177, LR: 0.0003 +[2026-03-04 00:15:10] (step=0050662) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 9.91234592056349, LR: 0.0003 +[2026-03-04 00:15:18] (step=0050663) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 9.912541576990805, LR: 0.0003 +[2026-03-04 00:15:26] (step=0050664) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.912737233418119, LR: 0.0003 +[2026-03-04 00:15:34] (step=0050665) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 9.91293288984543, LR: 0.0003 +[2026-03-04 00:15:42] (step=0050666) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 9.913128546272745, LR: 0.0003 +[2026-03-04 00:15:50] (step=0050667) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 9.913324202700059, LR: 0.0003 +[2026-03-04 00:15:58] (step=0050668) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 9.913519859127373, LR: 0.0003 +[2026-03-04 00:16:06] (step=0050669) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.913715515554687, LR: 0.0003 +[2026-03-04 00:16:13] (step=0050670) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.913911171981999, LR: 0.0003 +[2026-03-04 00:16:21] (step=0050671) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 9.914106828409313, LR: 0.0003 +[2026-03-04 00:16:29] (step=0050672) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 9.914302484836627, LR: 0.0003 +[2026-03-04 00:16:37] (step=0050673) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 9.914498141263941, LR: 0.0003 +[2026-03-04 00:16:45] (step=0050674) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.914693797691255, LR: 0.0003 +[2026-03-04 00:16:53] (step=0050675) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 9.914889454118567, LR: 0.0003 +[2026-03-04 00:17:01] (step=0050676) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.915085110545881, LR: 0.0003 +[2026-03-04 00:17:08] (step=0050677) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.915280766973195, LR: 0.0003 +[2026-03-04 00:17:16] (step=0050678) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.91547642340051, LR: 0.0003 +[2026-03-04 00:17:24] (step=0050679) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 9.915672079827822, LR: 0.0003 +[2026-03-04 00:17:32] (step=0050680) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.915867736255136, LR: 0.0003 +[2026-03-04 00:17:40] (step=0050681) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 9.91606339268245, LR: 0.0003 +[2026-03-04 00:17:48] (step=0050682) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.916259049109764, LR: 0.0003 +[2026-03-04 00:17:56] (step=0050683) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.916454705537078, LR: 0.0003 +[2026-03-04 00:18:03] (step=0050684) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 9.91665036196439, LR: 0.0003 +[2026-03-04 00:18:11] (step=0050685) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.916846018391704, LR: 0.0003 +[2026-03-04 00:18:19] (step=0050686) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 9.917041674819018, LR: 0.0003 +[2026-03-04 00:18:27] (step=0050687) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 9.917237331246332, LR: 0.0003 +[2026-03-04 00:18:35] (step=0050688) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.917432987673646, LR: 0.0003 +[2026-03-04 00:18:43] (step=0050689) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 9.917628644100958, LR: 0.0003 +[2026-03-04 00:18:51] (step=0050690) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.917824300528272, LR: 0.0003 +[2026-03-04 00:18:58] (step=0050691) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.918019956955586, LR: 0.0003 +[2026-03-04 00:19:06] (step=0050692) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.9182156133829, LR: 0.0003 +[2026-03-04 00:19:14] (step=0050693) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.918411269810214, LR: 0.0003 +[2026-03-04 00:19:22] (step=0050694) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.918606926237526, LR: 0.0003 +[2026-03-04 00:19:30] (step=0050695) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 9.91880258266484, LR: 0.0003 +[2026-03-04 00:19:38] (step=0050696) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 9.918998239092154, LR: 0.0003 +[2026-03-04 00:19:46] (step=0050697) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.919193895519468, LR: 0.0003 +[2026-03-04 00:19:54] (step=0050698) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 9.919389551946782, LR: 0.0003 +[2026-03-04 00:20:01] (step=0050699) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.919585208374095, LR: 0.0003 +[2026-03-04 00:20:09] (step=0050700) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.919780864801409, LR: 0.0003 +[2026-03-04 00:20:17] (step=0050701) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.919976521228723, LR: 0.0003 +[2026-03-04 00:20:25] (step=0050702) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 9.920172177656037, LR: 0.0003 +[2026-03-04 00:20:33] (step=0050703) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.920367834083349, LR: 0.0003 +[2026-03-04 00:20:41] (step=0050704) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.920563490510663, LR: 0.0003 +[2026-03-04 00:20:49] (step=0050705) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.920759146937977, LR: 0.0003 +[2026-03-04 00:20:57] (step=0050706) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.92095480336529, LR: 0.0003 +[2026-03-04 00:21:04] (step=0050707) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.921150459792605, LR: 0.0003 +[2026-03-04 00:21:12] (step=0050708) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 9.921346116219917, LR: 0.0003 +[2026-03-04 00:21:20] (step=0050709) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.921541772647231, LR: 0.0003 +[2026-03-04 00:21:28] (step=0050710) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 9.921737429074545, LR: 0.0003 +[2026-03-04 00:21:36] (step=0050711) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 9.921933085501859, LR: 0.0003 +[2026-03-04 00:21:44] (step=0050712) Train Loss: 0.4386, Train Steps/Sec: 0.12, Epoch: 9.922128741929173, LR: 0.0003 +[2026-03-04 00:21:52] (step=0050713) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.922324398356485, LR: 0.0003 +[2026-03-04 00:22:00] (step=0050714) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 9.9225200547838, LR: 0.0003 +[2026-03-04 00:22:07] (step=0050715) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.922715711211113, LR: 0.0003 +[2026-03-04 00:22:15] (step=0050716) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 9.922911367638427, LR: 0.0003 +[2026-03-04 00:22:23] (step=0050717) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 9.923107024065741, LR: 0.0003 +[2026-03-04 00:22:31] (step=0050718) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.923302680493054, LR: 0.0003 +[2026-03-04 00:22:39] (step=0050719) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 9.923498336920368, LR: 0.0003 +[2026-03-04 00:22:47] (step=0050720) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.923693993347682, LR: 0.0003 +[2026-03-04 00:22:55] (step=0050721) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.923889649774996, LR: 0.0003 +[2026-03-04 00:23:02] (step=0050722) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.92408530620231, LR: 0.0003 +[2026-03-04 00:23:10] (step=0050723) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.924280962629622, LR: 0.0003 +[2026-03-04 00:23:18] (step=0050724) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.924476619056936, LR: 0.0003 +[2026-03-04 00:23:26] (step=0050725) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.92467227548425, LR: 0.0003 +[2026-03-04 00:23:34] (step=0050726) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 9.924867931911564, LR: 0.0003 +[2026-03-04 00:23:42] (step=0050727) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 9.925063588338878, LR: 0.0003 +[2026-03-04 00:23:50] (step=0050728) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 9.92525924476619, LR: 0.0003 +[2026-03-04 00:23:57] (step=0050729) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 9.925454901193504, LR: 0.0003 +[2026-03-04 00:24:05] (step=0050730) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 9.925650557620818, LR: 0.0003 +[2026-03-04 00:24:13] (step=0050731) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.925846214048132, LR: 0.0003 +[2026-03-04 00:24:21] (step=0050732) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.926041870475444, LR: 0.0003 +[2026-03-04 00:24:29] (step=0050733) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.926237526902758, LR: 0.0003 +[2026-03-04 00:24:37] (step=0050734) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 9.926433183330072, LR: 0.0003 +[2026-03-04 00:24:45] (step=0050735) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.926628839757386, LR: 0.0003 +[2026-03-04 00:24:52] (step=0050736) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 9.9268244961847, LR: 0.0003 +[2026-03-04 00:25:00] (step=0050737) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.927020152612013, LR: 0.0003 +[2026-03-04 00:25:08] (step=0050738) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.927215809039327, LR: 0.0003 +[2026-03-04 00:25:16] (step=0050739) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 9.92741146546664, LR: 0.0003 +[2026-03-04 00:25:24] (step=0050740) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 9.927607121893955, LR: 0.0003 +[2026-03-04 00:25:32] (step=0050741) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.927802778321269, LR: 0.0003 +[2026-03-04 00:25:40] (step=0050742) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 9.92799843474858, LR: 0.0003 +[2026-03-04 00:25:47] (step=0050743) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.928194091175895, LR: 0.0003 +[2026-03-04 00:25:55] (step=0050744) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.928389747603209, LR: 0.0003 +[2026-03-04 00:26:03] (step=0050745) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.928585404030523, LR: 0.0003 +[2026-03-04 00:26:11] (step=0050746) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 9.928781060457837, LR: 0.0003 +[2026-03-04 00:26:19] (step=0050747) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.928976716885149, LR: 0.0003 +[2026-03-04 00:26:27] (step=0050748) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 9.929172373312463, LR: 0.0003 +[2026-03-04 00:26:35] (step=0050749) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.929368029739777, LR: 0.0003 +[2026-03-04 00:26:42] (step=0050750) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.929563686167091, LR: 0.0003 +[2026-03-04 00:26:50] (step=0050751) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 9.929759342594405, LR: 0.0003 +[2026-03-04 00:26:58] (step=0050752) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 9.929954999021717, LR: 0.0003 +[2026-03-04 00:27:06] (step=0050753) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 9.930150655449031, LR: 0.0003 +[2026-03-04 00:27:14] (step=0050754) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.930346311876345, LR: 0.0003 +[2026-03-04 00:27:22] (step=0050755) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.93054196830366, LR: 0.0003 +[2026-03-04 00:27:30] (step=0050756) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.930737624730972, LR: 0.0003 +[2026-03-04 00:27:38] (step=0050757) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.930933281158286, LR: 0.0003 +[2026-03-04 00:27:46] (step=0050758) Train Loss: 0.4313, Train Steps/Sec: 0.12, Epoch: 9.9311289375856, LR: 0.0003 +[2026-03-04 00:27:53] (step=0050759) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.931324594012914, LR: 0.0003 +[2026-03-04 00:28:01] (step=0050760) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.931520250440228, LR: 0.0003 +[2026-03-04 00:28:09] (step=0050761) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.93171590686754, LR: 0.0003 +[2026-03-04 00:28:17] (step=0050762) Train Loss: 0.4522, Train Steps/Sec: 0.12, Epoch: 9.931911563294854, LR: 0.0003 +[2026-03-04 00:28:25] (step=0050763) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.932107219722168, LR: 0.0003 +[2026-03-04 00:28:33] (step=0050764) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.932302876149482, LR: 0.0003 +[2026-03-04 00:28:41] (step=0050765) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 9.932498532576796, LR: 0.0003 +[2026-03-04 00:28:49] (step=0050766) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 9.932694189004108, LR: 0.0003 +[2026-03-04 00:28:56] (step=0050767) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.932889845431422, LR: 0.0003 +[2026-03-04 00:29:04] (step=0050768) Train Loss: 0.4248, Train Steps/Sec: 0.13, Epoch: 9.933085501858736, LR: 0.0003 +[2026-03-04 00:29:12] (step=0050769) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 9.93328115828605, LR: 0.0003 +[2026-03-04 00:29:20] (step=0050770) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 9.933476814713364, LR: 0.0003 +[2026-03-04 00:29:28] (step=0050771) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.933672471140676, LR: 0.0003 +[2026-03-04 00:29:36] (step=0050772) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.93386812756799, LR: 0.0003 +[2026-03-04 00:29:44] (step=0050773) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.934063783995304, LR: 0.0003 +[2026-03-04 00:29:51] (step=0050774) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.934259440422618, LR: 0.0003 +[2026-03-04 00:29:59] (step=0050775) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.934455096849932, LR: 0.0003 +[2026-03-04 00:30:07] (step=0050776) Train Loss: 0.4270, Train Steps/Sec: 0.13, Epoch: 9.934650753277245, LR: 0.0003 +[2026-03-04 00:30:15] (step=0050777) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.934846409704559, LR: 0.0003 +[2026-03-04 00:30:23] (step=0050778) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.935042066131873, LR: 0.0003 +[2026-03-04 00:30:31] (step=0050779) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.935237722559187, LR: 0.0003 +[2026-03-04 00:30:39] (step=0050780) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.9354333789865, LR: 0.0003 +[2026-03-04 00:30:46] (step=0050781) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.935629035413813, LR: 0.0003 +[2026-03-04 00:30:54] (step=0050782) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 9.935824691841127, LR: 0.0003 +[2026-03-04 00:31:02] (step=0050783) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.93602034826844, LR: 0.0003 +[2026-03-04 00:31:10] (step=0050784) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 9.936216004695755, LR: 0.0003 +[2026-03-04 00:31:18] (step=0050785) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.936411661123067, LR: 0.0003 +[2026-03-04 00:31:26] (step=0050786) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.936607317550381, LR: 0.0003 +[2026-03-04 00:31:34] (step=0050787) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.936802973977695, LR: 0.0003 +[2026-03-04 00:31:41] (step=0050788) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 9.936998630405009, LR: 0.0003 +[2026-03-04 00:31:49] (step=0050789) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 9.937194286832323, LR: 0.0003 +[2026-03-04 00:31:57] (step=0050790) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.937389943259635, LR: 0.0003 +[2026-03-04 00:32:05] (step=0050791) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 9.93758559968695, LR: 0.0003 +[2026-03-04 00:32:13] (step=0050792) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.937781256114263, LR: 0.0003 +[2026-03-04 00:32:21] (step=0050793) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 9.937976912541577, LR: 0.0003 +[2026-03-04 00:32:29] (step=0050794) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 9.938172568968891, LR: 0.0003 +[2026-03-04 00:32:36] (step=0050795) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.938368225396204, LR: 0.0003 +[2026-03-04 00:32:44] (step=0050796) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.938563881823518, LR: 0.0003 +[2026-03-04 00:32:52] (step=0050797) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 9.938759538250832, LR: 0.0003 +[2026-03-04 00:33:00] (step=0050798) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 9.938955194678146, LR: 0.0003 +[2026-03-04 00:33:08] (step=0050799) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.93915085110546, LR: 0.0003 +[2026-03-04 00:33:16] (step=0050800) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 9.939346507532772, LR: 0.0003 +[2026-03-04 00:33:24] (step=0050801) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.939542163960086, LR: 0.0003 +[2026-03-04 00:33:31] (step=0050802) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.9397378203874, LR: 0.0003 +[2026-03-04 00:33:39] (step=0050803) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.939933476814714, LR: 0.0003 +[2026-03-04 00:33:47] (step=0050804) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.940129133242028, LR: 0.0003 +[2026-03-04 00:33:55] (step=0050805) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.94032478966934, LR: 0.0003 +[2026-03-04 00:34:03] (step=0050806) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 9.940520446096654, LR: 0.0003 +[2026-03-04 00:34:11] (step=0050807) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 9.940716102523968, LR: 0.0003 +[2026-03-04 00:34:19] (step=0050808) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.940911758951282, LR: 0.0003 +[2026-03-04 00:34:27] (step=0050809) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 9.941107415378594, LR: 0.0003 +[2026-03-04 00:34:34] (step=0050810) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 9.941303071805908, LR: 0.0003 +[2026-03-04 00:34:42] (step=0050811) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 9.941498728233222, LR: 0.0003 +[2026-03-04 00:34:50] (step=0050812) Train Loss: 0.4464, Train Steps/Sec: 0.12, Epoch: 9.941694384660536, LR: 0.0003 +[2026-03-04 00:34:58] (step=0050813) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.94189004108785, LR: 0.0003 +[2026-03-04 00:35:06] (step=0050814) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 9.942085697515163, LR: 0.0003 +[2026-03-04 00:35:14] (step=0050815) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.942281353942477, LR: 0.0003 +[2026-03-04 00:35:22] (step=0050816) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.94247701036979, LR: 0.0003 +[2026-03-04 00:35:30] (step=0050817) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.942672666797105, LR: 0.0003 +[2026-03-04 00:35:37] (step=0050818) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.942868323224419, LR: 0.0003 +[2026-03-04 00:35:45] (step=0050819) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.94306397965173, LR: 0.0003 +[2026-03-04 00:35:53] (step=0050820) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 9.943259636079045, LR: 0.0003 +[2026-03-04 00:36:01] (step=0050821) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.943455292506359, LR: 0.0003 +[2026-03-04 00:36:09] (step=0050822) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 9.943650948933673, LR: 0.0003 +[2026-03-04 00:36:17] (step=0050823) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 9.943846605360987, LR: 0.0003 +[2026-03-04 00:36:25] (step=0050824) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 9.944042261788299, LR: 0.0003 +[2026-03-04 00:36:32] (step=0050825) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 9.944237918215613, LR: 0.0003 +[2026-03-04 00:36:40] (step=0050826) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 9.944433574642927, LR: 0.0003 +[2026-03-04 00:36:48] (step=0050827) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 9.944629231070241, LR: 0.0003 +[2026-03-04 00:36:56] (step=0050828) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.944824887497555, LR: 0.0003 +[2026-03-04 00:37:04] (step=0050829) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 9.945020543924867, LR: 0.0003 +[2026-03-04 00:37:12] (step=0050830) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.945216200352181, LR: 0.0003 +[2026-03-04 00:37:20] (step=0050831) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 9.945411856779495, LR: 0.0003 +[2026-03-04 00:37:27] (step=0050832) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 9.94560751320681, LR: 0.0003 +[2026-03-04 00:37:35] (step=0050833) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.945803169634123, LR: 0.0003 +[2026-03-04 00:37:43] (step=0050834) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 9.945998826061436, LR: 0.0003 +[2026-03-04 00:37:51] (step=0050835) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.94619448248875, LR: 0.0003 +[2026-03-04 00:37:59] (step=0050836) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 9.946390138916064, LR: 0.0003 +[2026-03-04 00:38:07] (step=0050837) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.946585795343378, LR: 0.0003 +[2026-03-04 00:38:15] (step=0050838) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 9.94678145177069, LR: 0.0003 +[2026-03-04 00:38:22] (step=0050839) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.946977108198004, LR: 0.0003 +[2026-03-04 00:38:30] (step=0050840) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 9.947172764625318, LR: 0.0003 +[2026-03-04 00:38:38] (step=0050841) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.947368421052632, LR: 0.0003 +[2026-03-04 00:38:46] (step=0050842) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.947564077479946, LR: 0.0003 +[2026-03-04 00:38:54] (step=0050843) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 9.947759733907258, LR: 0.0003 +[2026-03-04 00:39:02] (step=0050844) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.947955390334572, LR: 0.0003 +[2026-03-04 00:39:10] (step=0050845) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 9.948151046761886, LR: 0.0003 +[2026-03-04 00:39:17] (step=0050846) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.9483467031892, LR: 0.0003 +[2026-03-04 00:39:25] (step=0050847) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 9.948542359616514, LR: 0.0003 +[2026-03-04 00:39:33] (step=0050848) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 9.948738016043826, LR: 0.0003 +[2026-03-04 00:39:41] (step=0050849) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 9.94893367247114, LR: 0.0003 +[2026-03-04 00:39:49] (step=0050850) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.949129328898454, LR: 0.0003 +[2026-03-04 00:39:57] (step=0050851) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.949324985325768, LR: 0.0003 +[2026-03-04 00:40:05] (step=0050852) Train Loss: 0.4475, Train Steps/Sec: 0.12, Epoch: 9.949520641753082, LR: 0.0003 +[2026-03-04 00:40:13] (step=0050853) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 9.949716298180395, LR: 0.0003 +[2026-03-04 00:40:20] (step=0050854) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 9.949911954607709, LR: 0.0003 +[2026-03-04 00:40:28] (step=0050855) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.950107611035023, LR: 0.0003 +[2026-03-04 00:40:36] (step=0050856) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 9.950303267462337, LR: 0.0003 +[2026-03-04 00:40:44] (step=0050857) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 9.95049892388965, LR: 0.0003 +[2026-03-04 00:40:52] (step=0050858) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.950694580316963, LR: 0.0003 +[2026-03-04 00:41:00] (step=0050859) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.950890236744277, LR: 0.0003 +[2026-03-04 00:41:08] (step=0050860) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 9.95108589317159, LR: 0.0003 +[2026-03-04 00:41:15] (step=0050861) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.951281549598905, LR: 0.0003 +[2026-03-04 00:41:23] (step=0050862) Train Loss: 0.4615, Train Steps/Sec: 0.12, Epoch: 9.951477206026217, LR: 0.0003 +[2026-03-04 00:41:31] (step=0050863) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.951672862453531, LR: 0.0003 +[2026-03-04 00:41:39] (step=0050864) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.951868518880845, LR: 0.0003 +[2026-03-04 00:41:47] (step=0050865) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 9.95206417530816, LR: 0.0003 +[2026-03-04 00:41:55] (step=0050866) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 9.952259831735473, LR: 0.0003 +[2026-03-04 00:42:03] (step=0050867) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.952455488162785, LR: 0.0003 +[2026-03-04 00:42:11] (step=0050868) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.9526511445901, LR: 0.0003 +[2026-03-04 00:42:18] (step=0050869) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.952846801017413, LR: 0.0003 +[2026-03-04 00:42:26] (step=0050870) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 9.953042457444727, LR: 0.0003 +[2026-03-04 00:42:34] (step=0050871) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 9.953238113872041, LR: 0.0003 +[2026-03-04 00:42:42] (step=0050872) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 9.953433770299354, LR: 0.0003 +[2026-03-04 00:42:50] (step=0050873) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 9.953629426726668, LR: 0.0003 +[2026-03-04 00:42:58] (step=0050874) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 9.953825083153982, LR: 0.0003 +[2026-03-04 00:43:06] (step=0050875) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 9.954020739581296, LR: 0.0003 +[2026-03-04 00:43:13] (step=0050876) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 9.95421639600861, LR: 0.0003 +[2026-03-04 00:43:21] (step=0050877) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.954412052435922, LR: 0.0003 +[2026-03-04 00:43:29] (step=0050878) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.954607708863236, LR: 0.0003 +[2026-03-04 00:43:37] (step=0050879) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.95480336529055, LR: 0.0003 +[2026-03-04 00:43:45] (step=0050880) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 9.954999021717864, LR: 0.0003 +[2026-03-04 00:43:53] (step=0050881) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.955194678145178, LR: 0.0003 +[2026-03-04 00:44:01] (step=0050882) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.95539033457249, LR: 0.0003 +[2026-03-04 00:44:08] (step=0050883) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.955585990999804, LR: 0.0003 +[2026-03-04 00:44:16] (step=0050884) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 9.955781647427118, LR: 0.0003 +[2026-03-04 00:44:24] (step=0050885) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.955977303854432, LR: 0.0003 +[2026-03-04 00:44:32] (step=0050886) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 9.956172960281746, LR: 0.0003 +[2026-03-04 00:44:40] (step=0050887) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.956368616709058, LR: 0.0003 +[2026-03-04 00:44:48] (step=0050888) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.956564273136372, LR: 0.0003 +[2026-03-04 00:44:55] (step=0050889) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 9.956759929563686, LR: 0.0003 +[2026-03-04 00:45:03] (step=0050890) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.956955585991, LR: 0.0003 +[2026-03-04 00:45:11] (step=0050891) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 9.957151242418313, LR: 0.0003 +[2026-03-04 00:45:19] (step=0050892) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 9.957346898845627, LR: 0.0003 +[2026-03-04 00:45:27] (step=0050893) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.95754255527294, LR: 0.0003 +[2026-03-04 00:45:35] (step=0050894) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 9.957738211700255, LR: 0.0003 +[2026-03-04 00:45:42] (step=0050895) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 9.957933868127569, LR: 0.0003 +[2026-03-04 00:45:50] (step=0050896) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.95812952455488, LR: 0.0003 +[2026-03-04 00:45:58] (step=0050897) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.958325180982195, LR: 0.0003 +[2026-03-04 00:46:06] (step=0050898) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 9.958520837409509, LR: 0.0003 +[2026-03-04 00:46:14] (step=0050899) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.958716493836823, LR: 0.0003 +[2026-03-04 00:46:22] (step=0050900) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 9.958912150264137, LR: 0.0003 +[2026-03-04 00:46:30] (step=0050901) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 9.95910780669145, LR: 0.0003 +[2026-03-04 00:46:38] (step=0050902) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.959303463118763, LR: 0.0003 +[2026-03-04 00:46:45] (step=0050903) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.959499119546077, LR: 0.0003 +[2026-03-04 00:46:53] (step=0050904) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.959694775973391, LR: 0.0003 +[2026-03-04 00:47:01] (step=0050905) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 9.959890432400705, LR: 0.0003 +[2026-03-04 00:47:09] (step=0050906) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 9.960086088828017, LR: 0.0003 +[2026-03-04 00:47:17] (step=0050907) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.960281745255331, LR: 0.0003 +[2026-03-04 00:47:25] (step=0050908) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 9.960477401682645, LR: 0.0003 +[2026-03-04 00:47:33] (step=0050909) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 9.96067305810996, LR: 0.0003 +[2026-03-04 00:47:40] (step=0050910) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.960868714537273, LR: 0.0003 +[2026-03-04 00:47:48] (step=0050911) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 9.961064370964586, LR: 0.0003 +[2026-03-04 00:47:56] (step=0050912) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.9612600273919, LR: 0.0003 +[2026-03-04 00:48:04] (step=0050913) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 9.961455683819214, LR: 0.0003 +[2026-03-04 00:48:12] (step=0050914) Train Loss: 0.4579, Train Steps/Sec: 0.12, Epoch: 9.961651340246528, LR: 0.0003 +[2026-03-04 00:48:20] (step=0050915) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.96184699667384, LR: 0.0003 +[2026-03-04 00:48:28] (step=0050916) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.962042653101154, LR: 0.0003 +[2026-03-04 00:48:36] (step=0050917) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.962238309528468, LR: 0.0003 +[2026-03-04 00:48:43] (step=0050918) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.962433965955782, LR: 0.0003 +[2026-03-04 00:48:51] (step=0050919) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 9.962629622383096, LR: 0.0003 +[2026-03-04 00:48:59] (step=0050920) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.962825278810408, LR: 0.0003 +[2026-03-04 00:49:07] (step=0050921) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 9.963020935237722, LR: 0.0003 +[2026-03-04 00:49:15] (step=0050922) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 9.963216591665036, LR: 0.0003 +[2026-03-04 00:49:23] (step=0050923) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 9.96341224809235, LR: 0.0003 +[2026-03-04 00:49:31] (step=0050924) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 9.963607904519664, LR: 0.0003 +[2026-03-04 00:49:38] (step=0050925) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 9.963803560946976, LR: 0.0003 +[2026-03-04 00:49:46] (step=0050926) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 9.96399921737429, LR: 0.0003 +[2026-03-04 00:49:54] (step=0050927) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.964194873801604, LR: 0.0003 +[2026-03-04 00:50:02] (step=0050928) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.964390530228918, LR: 0.0003 +[2026-03-04 00:50:10] (step=0050929) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.964586186656232, LR: 0.0003 +[2026-03-04 00:50:18] (step=0050930) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.964781843083545, LR: 0.0003 +[2026-03-04 00:50:25] (step=0050931) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.964977499510859, LR: 0.0003 +[2026-03-04 00:50:33] (step=0050932) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.965173155938173, LR: 0.0003 +[2026-03-04 00:50:41] (step=0050933) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 9.965368812365487, LR: 0.0003 +[2026-03-04 00:50:49] (step=0050934) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 9.9655644687928, LR: 0.0003 +[2026-03-04 00:50:57] (step=0050935) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 9.965760125220113, LR: 0.0003 +[2026-03-04 00:51:05] (step=0050936) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.965955781647427, LR: 0.0003 +[2026-03-04 00:51:12] (step=0050937) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 9.966151438074741, LR: 0.0003 +[2026-03-04 00:51:20] (step=0050938) Train Loss: 0.4236, Train Steps/Sec: 0.13, Epoch: 9.966347094502055, LR: 0.0003 +[2026-03-04 00:51:28] (step=0050939) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.966542750929367, LR: 0.0003 +[2026-03-04 00:51:36] (step=0050940) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.966738407356681, LR: 0.0003 +[2026-03-04 00:51:44] (step=0050941) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.966934063783995, LR: 0.0003 +[2026-03-04 00:51:52] (step=0050942) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.96712972021131, LR: 0.0003 +[2026-03-04 00:52:00] (step=0050943) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 9.967325376638623, LR: 0.0003 +[2026-03-04 00:52:07] (step=0050944) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.967521033065935, LR: 0.0003 +[2026-03-04 00:52:15] (step=0050945) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 9.96771668949325, LR: 0.0003 +[2026-03-04 00:52:23] (step=0050946) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 9.967912345920563, LR: 0.0003 +[2026-03-04 00:52:31] (step=0050947) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 9.968108002347877, LR: 0.0003 +[2026-03-04 00:52:39] (step=0050948) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 9.968303658775191, LR: 0.0003 +[2026-03-04 00:52:47] (step=0050949) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.968499315202504, LR: 0.0003 +[2026-03-04 00:52:54] (step=0050950) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 9.968694971629818, LR: 0.0003 +[2026-03-04 00:53:02] (step=0050951) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.968890628057132, LR: 0.0003 +[2026-03-04 00:53:10] (step=0050952) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 9.969086284484446, LR: 0.0003 +[2026-03-04 00:53:18] (step=0050953) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.96928194091176, LR: 0.0003 +[2026-03-04 00:53:26] (step=0050954) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 9.969477597339072, LR: 0.0003 +[2026-03-04 00:53:34] (step=0050955) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.969673253766386, LR: 0.0003 +[2026-03-04 00:53:42] (step=0050956) Train Loss: 0.4338, Train Steps/Sec: 0.12, Epoch: 9.9698689101937, LR: 0.0003 +[2026-03-04 00:53:50] (step=0050957) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 9.970064566621014, LR: 0.0003 +[2026-03-04 00:53:57] (step=0050958) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.970260223048328, LR: 0.0003 +[2026-03-04 00:54:05] (step=0050959) Train Loss: 0.4259, Train Steps/Sec: 0.13, Epoch: 9.97045587947564, LR: 0.0003 +[2026-03-04 00:54:13] (step=0050960) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 9.970651535902954, LR: 0.0003 +[2026-03-04 00:54:21] (step=0050961) Train Loss: 0.4459, Train Steps/Sec: 0.12, Epoch: 9.970847192330268, LR: 0.0003 +[2026-03-04 00:54:29] (step=0050962) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.971042848757582, LR: 0.0003 +[2026-03-04 00:54:37] (step=0050963) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 9.971238505184896, LR: 0.0003 +[2026-03-04 00:54:45] (step=0050964) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 9.971434161612208, LR: 0.0003 +[2026-03-04 00:54:53] (step=0050965) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 9.971629818039522, LR: 0.0003 +[2026-03-04 00:55:00] (step=0050966) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 9.971825474466836, LR: 0.0003 +[2026-03-04 00:55:08] (step=0050967) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.97202113089415, LR: 0.0003 +[2026-03-04 00:55:16] (step=0050968) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 9.972216787321463, LR: 0.0003 +[2026-03-04 00:55:24] (step=0050969) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.972412443748777, LR: 0.0003 +[2026-03-04 00:55:32] (step=0050970) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.97260810017609, LR: 0.0003 +[2026-03-04 00:55:40] (step=0050971) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 9.972803756603405, LR: 0.0003 +[2026-03-04 00:55:48] (step=0050972) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 9.972999413030719, LR: 0.0003 +[2026-03-04 00:55:56] (step=0050973) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 9.973195069458031, LR: 0.0003 +[2026-03-04 00:56:03] (step=0050974) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 9.973390725885345, LR: 0.0003 +[2026-03-04 00:56:11] (step=0050975) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 9.973586382312659, LR: 0.0003 +[2026-03-04 00:56:19] (step=0050976) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 9.973782038739973, LR: 0.0003 +[2026-03-04 00:56:27] (step=0050977) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 9.973977695167287, LR: 0.0003 +[2026-03-04 00:56:35] (step=0050978) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 9.9741733515946, LR: 0.0003 +[2026-03-04 00:56:43] (step=0050979) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 9.974369008021913, LR: 0.0003 +[2026-03-04 00:56:51] (step=0050980) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 9.974564664449227, LR: 0.0003 +[2026-03-04 00:56:58] (step=0050981) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 9.974760320876541, LR: 0.0003 +[2026-03-04 00:57:06] (step=0050982) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 9.974955977303855, LR: 0.0003 +[2026-03-04 00:57:14] (step=0050983) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.975151633731167, LR: 0.0003 +[2026-03-04 00:57:22] (step=0050984) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 9.975347290158481, LR: 0.0003 +[2026-03-04 00:57:30] (step=0050985) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.975542946585795, LR: 0.0003 +[2026-03-04 00:57:38] (step=0050986) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 9.97573860301311, LR: 0.0003 +[2026-03-04 00:57:46] (step=0050987) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.975934259440423, LR: 0.0003 +[2026-03-04 00:57:54] (step=0050988) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 9.976129915867736, LR: 0.0003 +[2026-03-04 00:58:01] (step=0050989) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 9.97632557229505, LR: 0.0003 +[2026-03-04 00:58:09] (step=0050990) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 9.976521228722364, LR: 0.0003 +[2026-03-04 00:58:17] (step=0050991) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 9.976716885149678, LR: 0.0003 +[2026-03-04 00:58:25] (step=0050992) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.97691254157699, LR: 0.0003 +[2026-03-04 00:58:33] (step=0050993) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 9.977108198004304, LR: 0.0003 +[2026-03-04 00:58:41] (step=0050994) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 9.977303854431618, LR: 0.0003 +[2026-03-04 00:58:49] (step=0050995) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 9.977499510858932, LR: 0.0003 +[2026-03-04 00:58:56] (step=0050996) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 9.977695167286246, LR: 0.0003 +[2026-03-04 00:59:04] (step=0050997) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.977890823713558, LR: 0.0003 +[2026-03-04 00:59:12] (step=0050998) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 9.978086480140872, LR: 0.0003 +[2026-03-04 00:59:20] (step=0050999) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 9.978282136568186, LR: 0.0003 +[2026-03-04 00:59:28] (step=0051000) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.9784777929955, LR: 0.0003 +[2026-03-04 00:59:28] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0051000/ +[2026-03-04 00:59:36] (step=0051001) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 9.978673449422814, LR: 0.0003 +[2026-03-04 00:59:44] (step=0051002) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 9.978869105850126, LR: 0.0003 +[2026-03-04 00:59:52] (step=0051003) Train Loss: 0.4250, Train Steps/Sec: 0.13, Epoch: 9.97906476227744, LR: 0.0003 +[2026-03-04 01:00:00] (step=0051004) Train Loss: 0.4428, Train Steps/Sec: 0.12, Epoch: 9.979260418704754, LR: 0.0003 +[2026-03-04 01:00:07] (step=0051005) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.979456075132068, LR: 0.0003 +[2026-03-04 01:00:15] (step=0051006) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 9.979651731559382, LR: 0.0003 +[2026-03-04 01:00:23] (step=0051007) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 9.979847387986695, LR: 0.0003 +[2026-03-04 01:00:31] (step=0051008) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 9.980043044414009, LR: 0.0003 +[2026-03-04 01:00:39] (step=0051009) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 9.980238700841323, LR: 0.0003 +[2026-03-04 01:00:47] (step=0051010) Train Loss: 0.4479, Train Steps/Sec: 0.12, Epoch: 9.980434357268637, LR: 0.0003 +[2026-03-04 01:00:55] (step=0051011) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 9.98063001369595, LR: 0.0003 +[2026-03-04 01:01:03] (step=0051012) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 9.980825670123263, LR: 0.0003 +[2026-03-04 01:01:10] (step=0051013) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 9.981021326550577, LR: 0.0003 +[2026-03-04 01:01:18] (step=0051014) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 9.981216982977891, LR: 0.0003 +[2026-03-04 01:01:26] (step=0051015) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 9.981412639405205, LR: 0.0003 +[2026-03-04 01:01:34] (step=0051016) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 9.981608295832519, LR: 0.0003 +[2026-03-04 01:01:42] (step=0051017) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 9.981803952259831, LR: 0.0003 +[2026-03-04 01:01:50] (step=0051018) Train Loss: 0.4187, Train Steps/Sec: 0.13, Epoch: 9.981999608687145, LR: 0.0003 +[2026-03-04 01:01:58] (step=0051019) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 9.98219526511446, LR: 0.0003 +[2026-03-04 01:02:05] (step=0051020) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 9.982390921541773, LR: 0.0003 +[2026-03-04 01:02:13] (step=0051021) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 9.982586577969085, LR: 0.0003 +[2026-03-04 01:02:21] (step=0051022) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 9.9827822343964, LR: 0.0003 +[2026-03-04 01:02:29] (step=0051023) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 9.982977890823713, LR: 0.0003 +[2026-03-04 01:02:37] (step=0051024) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.983173547251027, LR: 0.0003 +[2026-03-04 01:02:45] (step=0051025) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 9.983369203678341, LR: 0.0003 +[2026-03-04 01:02:53] (step=0051026) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 9.983564860105654, LR: 0.0003 +[2026-03-04 01:03:00] (step=0051027) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 9.983760516532968, LR: 0.0003 +[2026-03-04 01:03:08] (step=0051028) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 9.983956172960282, LR: 0.0003 +[2026-03-04 01:03:16] (step=0051029) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 9.984151829387596, LR: 0.0003 +[2026-03-04 01:03:24] (step=0051030) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 9.98434748581491, LR: 0.0003 +[2026-03-04 01:03:32] (step=0051031) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.984543142242222, LR: 0.0003 +[2026-03-04 01:03:40] (step=0051032) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 9.984738798669536, LR: 0.0003 +[2026-03-04 01:03:48] (step=0051033) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.98493445509685, LR: 0.0003 +[2026-03-04 01:03:55] (step=0051034) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 9.985130111524164, LR: 0.0003 +[2026-03-04 01:04:03] (step=0051035) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 9.985325767951478, LR: 0.0003 +[2026-03-04 01:04:11] (step=0051036) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.98552142437879, LR: 0.0003 +[2026-03-04 01:04:19] (step=0051037) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 9.985717080806104, LR: 0.0003 +[2026-03-04 01:04:27] (step=0051038) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 9.985912737233418, LR: 0.0003 +[2026-03-04 01:04:35] (step=0051039) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 9.986108393660732, LR: 0.0003 +[2026-03-04 01:04:43] (step=0051040) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 9.986304050088046, LR: 0.0003 +[2026-03-04 01:04:50] (step=0051041) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.986499706515358, LR: 0.0003 +[2026-03-04 01:04:58] (step=0051042) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 9.986695362942672, LR: 0.0003 +[2026-03-04 01:05:06] (step=0051043) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 9.986891019369986, LR: 0.0003 +[2026-03-04 01:05:14] (step=0051044) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 9.9870866757973, LR: 0.0003 +[2026-03-04 01:05:22] (step=0051045) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.987282332224613, LR: 0.0003 +[2026-03-04 01:05:30] (step=0051046) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 9.987477988651927, LR: 0.0003 +[2026-03-04 01:05:38] (step=0051047) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.98767364507924, LR: 0.0003 +[2026-03-04 01:05:46] (step=0051048) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 9.987869301506555, LR: 0.0003 +[2026-03-04 01:05:53] (step=0051049) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 9.988064957933869, LR: 0.0003 +[2026-03-04 01:06:01] (step=0051050) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 9.988260614361181, LR: 0.0003 +[2026-03-04 01:06:09] (step=0051051) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.988456270788495, LR: 0.0003 +[2026-03-04 01:06:17] (step=0051052) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 9.988651927215809, LR: 0.0003 +[2026-03-04 01:06:25] (step=0051053) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 9.988847583643123, LR: 0.0003 +[2026-03-04 01:06:33] (step=0051054) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 9.989043240070437, LR: 0.0003 +[2026-03-04 01:06:41] (step=0051055) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 9.98923889649775, LR: 0.0003 +[2026-03-04 01:06:49] (step=0051056) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 9.989434552925063, LR: 0.0003 +[2026-03-04 01:06:56] (step=0051057) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 9.989630209352377, LR: 0.0003 +[2026-03-04 01:07:04] (step=0051058) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.989825865779691, LR: 0.0003 +[2026-03-04 01:07:12] (step=0051059) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 9.990021522207005, LR: 0.0003 +[2026-03-04 01:07:20] (step=0051060) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 9.990217178634317, LR: 0.0003 +[2026-03-04 01:07:28] (step=0051061) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 9.990412835061631, LR: 0.0003 +[2026-03-04 01:07:36] (step=0051062) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 9.990608491488945, LR: 0.0003 +[2026-03-04 01:07:44] (step=0051063) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 9.99080414791626, LR: 0.0003 +[2026-03-04 01:07:52] (step=0051064) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.990999804343573, LR: 0.0003 +[2026-03-04 01:07:59] (step=0051065) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 9.991195460770886, LR: 0.0003 +[2026-03-04 01:08:07] (step=0051066) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.9913911171982, LR: 0.0003 +[2026-03-04 01:08:15] (step=0051067) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 9.991586773625514, LR: 0.0003 +[2026-03-04 01:08:23] (step=0051068) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 9.991782430052828, LR: 0.0003 +[2026-03-04 01:08:31] (step=0051069) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 9.991978086480142, LR: 0.0003 +[2026-03-04 01:08:39] (step=0051070) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 9.992173742907454, LR: 0.0003 +[2026-03-04 01:08:47] (step=0051071) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 9.992369399334768, LR: 0.0003 +[2026-03-04 01:08:54] (step=0051072) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 9.992565055762082, LR: 0.0003 +[2026-03-04 01:09:02] (step=0051073) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 9.992760712189396, LR: 0.0003 +[2026-03-04 01:09:10] (step=0051074) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 9.992956368616708, LR: 0.0003 +[2026-03-04 01:09:18] (step=0051075) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 9.993152025044022, LR: 0.0003 +[2026-03-04 01:09:26] (step=0051076) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 9.993347681471336, LR: 0.0003 +[2026-03-04 01:09:34] (step=0051077) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.99354333789865, LR: 0.0003 +[2026-03-04 01:09:42] (step=0051078) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 9.993738994325964, LR: 0.0003 +[2026-03-04 01:09:49] (step=0051079) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 9.993934650753276, LR: 0.0003 +[2026-03-04 01:09:57] (step=0051080) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 9.99413030718059, LR: 0.0003 +[2026-03-04 01:10:05] (step=0051081) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 9.994325963607904, LR: 0.0003 +[2026-03-04 01:10:13] (step=0051082) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 9.994521620035218, LR: 0.0003 +[2026-03-04 01:10:21] (step=0051083) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 9.994717276462532, LR: 0.0003 +[2026-03-04 01:10:29] (step=0051084) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 9.994912932889845, LR: 0.0003 +[2026-03-04 01:10:37] (step=0051085) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 9.995108589317159, LR: 0.0003 +[2026-03-04 01:10:44] (step=0051086) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 9.995304245744473, LR: 0.0003 +[2026-03-04 01:10:52] (step=0051087) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.995499902171787, LR: 0.0003 +[2026-03-04 01:11:00] (step=0051088) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 9.9956955585991, LR: 0.0003 +[2026-03-04 01:11:08] (step=0051089) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 9.995891215026413, LR: 0.0003 +[2026-03-04 01:11:16] (step=0051090) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 9.996086871453727, LR: 0.0003 +[2026-03-04 01:11:24] (step=0051091) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 9.996282527881041, LR: 0.0003 +[2026-03-04 01:11:32] (step=0051092) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 9.996478184308355, LR: 0.0003 +[2026-03-04 01:11:39] (step=0051093) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 9.996673840735669, LR: 0.0003 +[2026-03-04 01:11:47] (step=0051094) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 9.996869497162981, LR: 0.0003 +[2026-03-04 01:11:55] (step=0051095) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 9.997065153590295, LR: 0.0003 +[2026-03-04 01:12:03] (step=0051096) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 9.99726081001761, LR: 0.0003 +[2026-03-04 01:12:11] (step=0051097) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 9.997456466444923, LR: 0.0003 +[2026-03-04 01:12:19] (step=0051098) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.997652122872235, LR: 0.0003 +[2026-03-04 01:12:27] (step=0051099) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 9.99784777929955, LR: 0.0003 +[2026-03-04 01:12:35] (step=0051100) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 9.998043435726863, LR: 0.0003 +[2026-03-04 01:12:43] (step=0051101) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 9.998239092154177, LR: 0.0003 +[2026-03-04 01:12:50] (step=0051102) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 9.998434748581492, LR: 0.0003 +[2026-03-04 01:12:58] (step=0051103) Train Loss: 0.4384, Train Steps/Sec: 0.12, Epoch: 9.998630405008804, LR: 0.0003 +[2026-03-04 01:13:06] (step=0051104) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 9.998826061436118, LR: 0.0003 +[2026-03-04 01:13:14] (step=0051105) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 9.999021717863432, LR: 0.0003 +[2026-03-04 01:13:22] (step=0051106) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 9.999217374290746, LR: 0.0003 +[2026-03-04 01:13:30] (step=0051107) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 9.99941303071806, LR: 0.0003 +[2026-03-04 01:13:38] (step=0051108) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 9.999608687145372, LR: 0.0003 +[2026-03-04 01:13:46] (step=0051109) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 9.999804343572686, LR: 0.0003 +[2026-03-04 01:13:54] (step=0051110) Train Loss: 0.4397, Train Steps/Sec: 0.12, Epoch: 10.0, LR: 0.0003 +[2026-03-04 01:13:54] Beginning epoch 10... +[2026-03-04 01:14:03] (step=0051111) Train Loss: 0.4358, Train Steps/Sec: 0.10, Epoch: 10.000195656427314, LR: 0.0003 +[2026-03-04 01:14:11] (step=0051112) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.000391312854628, LR: 0.0003 +[2026-03-04 01:14:19] (step=0051113) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.00058696928194, LR: 0.0003 +[2026-03-04 01:14:27] (step=0051114) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.000782625709254, LR: 0.0003 +[2026-03-04 01:14:35] (step=0051115) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.000978282136568, LR: 0.0003 +[2026-03-04 01:14:43] (step=0051116) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.001173938563882, LR: 0.0003 +[2026-03-04 01:14:51] (step=0051117) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.001369594991196, LR: 0.0003 +[2026-03-04 01:14:58] (step=0051118) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.001565251418508, LR: 0.0003 +[2026-03-04 01:15:06] (step=0051119) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.001760907845823, LR: 0.0003 +[2026-03-04 01:15:14] (step=0051120) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.001956564273137, LR: 0.0003 +[2026-03-04 01:15:22] (step=0051121) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.00215222070045, LR: 0.0003 +[2026-03-04 01:15:30] (step=0051122) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.002347877127765, LR: 0.0003 +[2026-03-04 01:15:38] (step=0051123) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.002543533555077, LR: 0.0003 +[2026-03-04 01:15:46] (step=0051124) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.00273918998239, LR: 0.0003 +[2026-03-04 01:15:53] (step=0051125) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.002934846409705, LR: 0.0003 +[2026-03-04 01:16:01] (step=0051126) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.003130502837019, LR: 0.0003 +[2026-03-04 01:16:09] (step=0051127) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.003326159264331, LR: 0.0003 +[2026-03-04 01:16:17] (step=0051128) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.003521815691645, LR: 0.0003 +[2026-03-04 01:16:25] (step=0051129) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.003717472118959, LR: 0.0003 +[2026-03-04 01:16:33] (step=0051130) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.003913128546273, LR: 0.0003 +[2026-03-04 01:16:41] (step=0051131) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.004108784973587, LR: 0.0003 +[2026-03-04 01:16:49] (step=0051132) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 10.0043044414009, LR: 0.0003 +[2026-03-04 01:16:56] (step=0051133) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.004500097828213, LR: 0.0003 +[2026-03-04 01:17:04] (step=0051134) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.004695754255527, LR: 0.0003 +[2026-03-04 01:17:12] (step=0051135) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.004891410682841, LR: 0.0003 +[2026-03-04 01:17:20] (step=0051136) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.005087067110155, LR: 0.0003 +[2026-03-04 01:17:28] (step=0051137) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.005282723537468, LR: 0.0003 +[2026-03-04 01:17:36] (step=0051138) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.005478379964782, LR: 0.0003 +[2026-03-04 01:17:44] (step=0051139) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.005674036392096, LR: 0.0003 +[2026-03-04 01:17:51] (step=0051140) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.00586969281941, LR: 0.0003 +[2026-03-04 01:17:59] (step=0051141) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.006065349246724, LR: 0.0003 +[2026-03-04 01:18:07] (step=0051142) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.006261005674036, LR: 0.0003 +[2026-03-04 01:18:15] (step=0051143) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 10.00645666210135, LR: 0.0003 +[2026-03-04 01:18:23] (step=0051144) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 10.006652318528664, LR: 0.0003 +[2026-03-04 01:18:31] (step=0051145) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.006847974955978, LR: 0.0003 +[2026-03-04 01:18:39] (step=0051146) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 10.007043631383292, LR: 0.0003 +[2026-03-04 01:18:47] (step=0051147) Train Loss: 0.4473, Train Steps/Sec: 0.12, Epoch: 10.007239287810604, LR: 0.0003 +[2026-03-04 01:18:54] (step=0051148) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 10.007434944237918, LR: 0.0003 +[2026-03-04 01:19:02] (step=0051149) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.007630600665232, LR: 0.0003 +[2026-03-04 01:19:10] (step=0051150) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.007826257092546, LR: 0.0003 +[2026-03-04 01:19:18] (step=0051151) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.008021913519858, LR: 0.0003 +[2026-03-04 01:19:26] (step=0051152) Train Loss: 0.4373, Train Steps/Sec: 0.12, Epoch: 10.008217569947172, LR: 0.0003 +[2026-03-04 01:19:34] (step=0051153) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.008413226374486, LR: 0.0003 +[2026-03-04 01:19:42] (step=0051154) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.0086088828018, LR: 0.0003 +[2026-03-04 01:19:50] (step=0051155) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.008804539229114, LR: 0.0003 +[2026-03-04 01:19:58] (step=0051156) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.009000195656427, LR: 0.0003 +[2026-03-04 01:20:05] (step=0051157) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.00919585208374, LR: 0.0003 +[2026-03-04 01:20:13] (step=0051158) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 10.009391508511055, LR: 0.0003 +[2026-03-04 01:20:21] (step=0051159) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.009587164938369, LR: 0.0003 +[2026-03-04 01:20:29] (step=0051160) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.009782821365683, LR: 0.0003 +[2026-03-04 01:20:37] (step=0051161) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 10.009978477792995, LR: 0.0003 +[2026-03-04 01:20:45] (step=0051162) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 10.010174134220309, LR: 0.0003 +[2026-03-04 01:20:53] (step=0051163) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.010369790647623, LR: 0.0003 +[2026-03-04 01:21:00] (step=0051164) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.010565447074937, LR: 0.0003 +[2026-03-04 01:21:08] (step=0051165) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 10.01076110350225, LR: 0.0003 +[2026-03-04 01:21:16] (step=0051166) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.010956759929563, LR: 0.0003 +[2026-03-04 01:21:24] (step=0051167) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.011152416356877, LR: 0.0003 +[2026-03-04 01:21:32] (step=0051168) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 10.011348072784191, LR: 0.0003 +[2026-03-04 01:21:40] (step=0051169) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.011543729211505, LR: 0.0003 +[2026-03-04 01:21:48] (step=0051170) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 10.011739385638819, LR: 0.0003 +[2026-03-04 01:21:55] (step=0051171) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.011935042066131, LR: 0.0003 +[2026-03-04 01:22:03] (step=0051172) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.012130698493445, LR: 0.0003 +[2026-03-04 01:22:11] (step=0051173) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.01232635492076, LR: 0.0003 +[2026-03-04 01:22:19] (step=0051174) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.012522011348073, LR: 0.0003 +[2026-03-04 01:22:27] (step=0051175) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.012717667775387, LR: 0.0003 +[2026-03-04 01:22:35] (step=0051176) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.0129133242027, LR: 0.0003 +[2026-03-04 01:22:43] (step=0051177) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 10.013108980630014, LR: 0.0003 +[2026-03-04 01:22:50] (step=0051178) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.013304637057328, LR: 0.0003 +[2026-03-04 01:22:58] (step=0051179) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.013500293484642, LR: 0.0003 +[2026-03-04 01:23:06] (step=0051180) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.013695949911954, LR: 0.0003 +[2026-03-04 01:23:14] (step=0051181) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.013891606339268, LR: 0.0003 +[2026-03-04 01:23:22] (step=0051182) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.014087262766582, LR: 0.0003 +[2026-03-04 01:23:30] (step=0051183) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.014282919193896, LR: 0.0003 +[2026-03-04 01:23:38] (step=0051184) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.01447857562121, LR: 0.0003 +[2026-03-04 01:23:45] (step=0051185) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 10.014674232048522, LR: 0.0003 +[2026-03-04 01:23:53] (step=0051186) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.014869888475836, LR: 0.0003 +[2026-03-04 01:24:01] (step=0051187) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 10.01506554490315, LR: 0.0003 +[2026-03-04 01:24:09] (step=0051188) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 10.015261201330464, LR: 0.0003 +[2026-03-04 01:24:17] (step=0051189) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 10.015456857757778, LR: 0.0003 +[2026-03-04 01:24:25] (step=0051190) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.01565251418509, LR: 0.0003 +[2026-03-04 01:24:33] (step=0051191) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.015848170612404, LR: 0.0003 +[2026-03-04 01:24:40] (step=0051192) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.016043827039718, LR: 0.0003 +[2026-03-04 01:24:48] (step=0051193) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.016239483467032, LR: 0.0003 +[2026-03-04 01:24:56] (step=0051194) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 10.016435139894346, LR: 0.0003 +[2026-03-04 01:25:04] (step=0051195) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.016630796321659, LR: 0.0003 +[2026-03-04 01:25:12] (step=0051196) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.016826452748973, LR: 0.0003 +[2026-03-04 01:25:20] (step=0051197) Train Loss: 0.4630, Train Steps/Sec: 0.12, Epoch: 10.017022109176287, LR: 0.0003 +[2026-03-04 01:25:28] (step=0051198) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.0172177656036, LR: 0.0003 +[2026-03-04 01:25:36] (step=0051199) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 10.017413422030915, LR: 0.0003 +[2026-03-04 01:25:44] (step=0051200) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.017609078458227, LR: 0.0003 +[2026-03-04 01:25:51] (step=0051201) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.01780473488554, LR: 0.0003 +[2026-03-04 01:25:59] (step=0051202) Train Loss: 0.4396, Train Steps/Sec: 0.12, Epoch: 10.018000391312855, LR: 0.0003 +[2026-03-04 01:26:07] (step=0051203) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.018196047740169, LR: 0.0003 +[2026-03-04 01:26:15] (step=0051204) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.018391704167481, LR: 0.0003 +[2026-03-04 01:26:23] (step=0051205) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 10.018587360594795, LR: 0.0003 +[2026-03-04 01:26:31] (step=0051206) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.018783017022109, LR: 0.0003 +[2026-03-04 01:26:39] (step=0051207) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.018978673449423, LR: 0.0003 +[2026-03-04 01:26:47] (step=0051208) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.019174329876737, LR: 0.0003 +[2026-03-04 01:26:55] (step=0051209) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 10.01936998630405, LR: 0.0003 +[2026-03-04 01:27:02] (step=0051210) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.019565642731363, LR: 0.0003 +[2026-03-04 01:27:10] (step=0051211) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.019761299158677, LR: 0.0003 +[2026-03-04 01:27:18] (step=0051212) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.019956955585991, LR: 0.0003 +[2026-03-04 01:27:26] (step=0051213) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.020152612013305, LR: 0.0003 +[2026-03-04 01:27:34] (step=0051214) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.020348268440618, LR: 0.0003 +[2026-03-04 01:27:42] (step=0051215) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.020543924867932, LR: 0.0003 +[2026-03-04 01:27:50] (step=0051216) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.020739581295246, LR: 0.0003 +[2026-03-04 01:27:57] (step=0051217) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.02093523772256, LR: 0.0003 +[2026-03-04 01:28:05] (step=0051218) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.021130894149874, LR: 0.0003 +[2026-03-04 01:28:13] (step=0051219) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 10.021326550577186, LR: 0.0003 +[2026-03-04 01:28:21] (step=0051220) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.0215222070045, LR: 0.0003 +[2026-03-04 01:28:29] (step=0051221) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.021717863431814, LR: 0.0003 +[2026-03-04 01:28:37] (step=0051222) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.021913519859128, LR: 0.0003 +[2026-03-04 01:28:45] (step=0051223) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.022109176286442, LR: 0.0003 +[2026-03-04 01:28:52] (step=0051224) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.022304832713754, LR: 0.0003 +[2026-03-04 01:29:00] (step=0051225) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 10.022500489141068, LR: 0.0003 +[2026-03-04 01:29:08] (step=0051226) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.022696145568382, LR: 0.0003 +[2026-03-04 01:29:16] (step=0051227) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 10.022891801995696, LR: 0.0003 +[2026-03-04 01:29:24] (step=0051228) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.02308745842301, LR: 0.0003 +[2026-03-04 01:29:32] (step=0051229) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.023283114850322, LR: 0.0003 +[2026-03-04 01:29:40] (step=0051230) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.023478771277636, LR: 0.0003 +[2026-03-04 01:29:48] (step=0051231) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.02367442770495, LR: 0.0003 +[2026-03-04 01:29:55] (step=0051232) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.023870084132264, LR: 0.0003 +[2026-03-04 01:30:03] (step=0051233) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.024065740559577, LR: 0.0003 +[2026-03-04 01:30:11] (step=0051234) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 10.02426139698689, LR: 0.0003 +[2026-03-04 01:30:19] (step=0051235) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.024457053414205, LR: 0.0003 +[2026-03-04 01:30:27] (step=0051236) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.024652709841519, LR: 0.0003 +[2026-03-04 01:30:35] (step=0051237) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 10.024848366268833, LR: 0.0003 +[2026-03-04 01:30:43] (step=0051238) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 10.025044022696145, LR: 0.0003 +[2026-03-04 01:30:50] (step=0051239) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.025239679123459, LR: 0.0003 +[2026-03-04 01:30:58] (step=0051240) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 10.025435335550773, LR: 0.0003 +[2026-03-04 01:31:06] (step=0051241) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.025630991978087, LR: 0.0003 +[2026-03-04 01:31:14] (step=0051242) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.0258266484054, LR: 0.0003 +[2026-03-04 01:31:22] (step=0051243) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.026022304832713, LR: 0.0003 +[2026-03-04 01:31:30] (step=0051244) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.026217961260027, LR: 0.0003 +[2026-03-04 01:31:38] (step=0051245) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 10.026413617687341, LR: 0.0003 +[2026-03-04 01:31:45] (step=0051246) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.026609274114655, LR: 0.0003 +[2026-03-04 01:31:53] (step=0051247) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.026804930541969, LR: 0.0003 +[2026-03-04 01:32:01] (step=0051248) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.027000586969281, LR: 0.0003 +[2026-03-04 01:32:09] (step=0051249) Train Loss: 0.4375, Train Steps/Sec: 0.12, Epoch: 10.027196243396595, LR: 0.0003 +[2026-03-04 01:32:17] (step=0051250) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.02739189982391, LR: 0.0003 +[2026-03-04 01:32:25] (step=0051251) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.027587556251223, LR: 0.0003 +[2026-03-04 01:32:33] (step=0051252) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.027783212678537, LR: 0.0003 +[2026-03-04 01:32:41] (step=0051253) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.02797886910585, LR: 0.0003 +[2026-03-04 01:32:48] (step=0051254) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.028174525533164, LR: 0.0003 +[2026-03-04 01:32:56] (step=0051255) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.028370181960478, LR: 0.0003 +[2026-03-04 01:33:04] (step=0051256) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.028565838387792, LR: 0.0003 +[2026-03-04 01:33:12] (step=0051257) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.028761494815104, LR: 0.0003 +[2026-03-04 01:33:20] (step=0051258) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 10.028957151242418, LR: 0.0003 +[2026-03-04 01:33:28] (step=0051259) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.029152807669732, LR: 0.0003 +[2026-03-04 01:33:36] (step=0051260) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.029348464097046, LR: 0.0003 +[2026-03-04 01:33:44] (step=0051261) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.02954412052436, LR: 0.0003 +[2026-03-04 01:33:51] (step=0051262) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.029739776951672, LR: 0.0003 +[2026-03-04 01:33:59] (step=0051263) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 10.029935433378986, LR: 0.0003 +[2026-03-04 01:34:07] (step=0051264) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.0301310898063, LR: 0.0003 +[2026-03-04 01:34:15] (step=0051265) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.030326746233614, LR: 0.0003 +[2026-03-04 01:34:23] (step=0051266) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.030522402660928, LR: 0.0003 +[2026-03-04 01:34:31] (step=0051267) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 10.03071805908824, LR: 0.0003 +[2026-03-04 01:34:39] (step=0051268) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.030913715515554, LR: 0.0003 +[2026-03-04 01:34:46] (step=0051269) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.031109371942868, LR: 0.0003 +[2026-03-04 01:34:54] (step=0051270) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.031305028370182, LR: 0.0003 +[2026-03-04 01:35:02] (step=0051271) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.031500684797496, LR: 0.0003 +[2026-03-04 01:35:10] (step=0051272) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.031696341224809, LR: 0.0003 +[2026-03-04 01:35:18] (step=0051273) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 10.031891997652123, LR: 0.0003 +[2026-03-04 01:35:26] (step=0051274) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.032087654079437, LR: 0.0003 +[2026-03-04 01:35:34] (step=0051275) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.03228331050675, LR: 0.0003 +[2026-03-04 01:35:41] (step=0051276) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 10.032478966934065, LR: 0.0003 +[2026-03-04 01:35:49] (step=0051277) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.032674623361377, LR: 0.0003 +[2026-03-04 01:35:57] (step=0051278) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 10.03287027978869, LR: 0.0003 +[2026-03-04 01:36:05] (step=0051279) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 10.033065936216005, LR: 0.0003 +[2026-03-04 01:36:13] (step=0051280) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.033261592643319, LR: 0.0003 +[2026-03-04 01:36:21] (step=0051281) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.033457249070633, LR: 0.0003 +[2026-03-04 01:36:29] (step=0051282) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.033652905497945, LR: 0.0003 +[2026-03-04 01:36:36] (step=0051283) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.033848561925259, LR: 0.0003 +[2026-03-04 01:36:44] (step=0051284) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.034044218352573, LR: 0.0003 +[2026-03-04 01:36:52] (step=0051285) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 10.034239874779887, LR: 0.0003 +[2026-03-04 01:37:00] (step=0051286) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.0344355312072, LR: 0.0003 +[2026-03-04 01:37:08] (step=0051287) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.034631187634513, LR: 0.0003 +[2026-03-04 01:37:16] (step=0051288) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 10.034826844061827, LR: 0.0003 +[2026-03-04 01:37:24] (step=0051289) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 10.035022500489141, LR: 0.0003 +[2026-03-04 01:37:31] (step=0051290) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.035218156916455, LR: 0.0003 +[2026-03-04 01:37:39] (step=0051291) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.035413813343768, LR: 0.0003 +[2026-03-04 01:37:47] (step=0051292) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 10.035609469771082, LR: 0.0003 +[2026-03-04 01:37:55] (step=0051293) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.035805126198396, LR: 0.0003 +[2026-03-04 01:38:03] (step=0051294) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 10.03600078262571, LR: 0.0003 +[2026-03-04 01:38:11] (step=0051295) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.036196439053024, LR: 0.0003 +[2026-03-04 01:38:19] (step=0051296) Train Loss: 0.4417, Train Steps/Sec: 0.12, Epoch: 10.036392095480336, LR: 0.0003 +[2026-03-04 01:38:27] (step=0051297) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.03658775190765, LR: 0.0003 +[2026-03-04 01:38:34] (step=0051298) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.036783408334964, LR: 0.0003 +[2026-03-04 01:38:42] (step=0051299) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.036979064762278, LR: 0.0003 +[2026-03-04 01:38:50] (step=0051300) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 10.037174721189592, LR: 0.0003 +[2026-03-04 01:38:58] (step=0051301) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 10.037370377616904, LR: 0.0003 +[2026-03-04 01:39:06] (step=0051302) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.037566034044218, LR: 0.0003 +[2026-03-04 01:39:14] (step=0051303) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.037761690471532, LR: 0.0003 +[2026-03-04 01:39:22] (step=0051304) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.037957346898846, LR: 0.0003 +[2026-03-04 01:39:30] (step=0051305) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.03815300332616, LR: 0.0003 +[2026-03-04 01:39:37] (step=0051306) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.038348659753472, LR: 0.0003 +[2026-03-04 01:39:45] (step=0051307) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.038544316180786, LR: 0.0003 +[2026-03-04 01:39:53] (step=0051308) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.0387399726081, LR: 0.0003 +[2026-03-04 01:40:01] (step=0051309) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.038935629035414, LR: 0.0003 +[2026-03-04 01:40:09] (step=0051310) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.039131285462727, LR: 0.0003 +[2026-03-04 01:40:17] (step=0051311) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 10.03932694189004, LR: 0.0003 +[2026-03-04 01:40:25] (step=0051312) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.039522598317355, LR: 0.0003 +[2026-03-04 01:40:33] (step=0051313) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 10.039718254744669, LR: 0.0003 +[2026-03-04 01:40:40] (step=0051314) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.039913911171983, LR: 0.0003 +[2026-03-04 01:40:48] (step=0051315) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 10.040109567599295, LR: 0.0003 +[2026-03-04 01:40:56] (step=0051316) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.040305224026609, LR: 0.0003 +[2026-03-04 01:41:04] (step=0051317) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.040500880453923, LR: 0.0003 +[2026-03-04 01:41:12] (step=0051318) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.040696536881237, LR: 0.0003 +[2026-03-04 01:41:20] (step=0051319) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.04089219330855, LR: 0.0003 +[2026-03-04 01:41:28] (step=0051320) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.041087849735863, LR: 0.0003 +[2026-03-04 01:41:35] (step=0051321) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.041283506163177, LR: 0.0003 +[2026-03-04 01:41:43] (step=0051322) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 10.041479162590491, LR: 0.0003 +[2026-03-04 01:41:51] (step=0051323) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.041674819017805, LR: 0.0003 +[2026-03-04 01:41:59] (step=0051324) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 10.04187047544512, LR: 0.0003 +[2026-03-04 01:42:07] (step=0051325) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 10.042066131872431, LR: 0.0003 +[2026-03-04 01:42:15] (step=0051326) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.042261788299745, LR: 0.0003 +[2026-03-04 01:42:23] (step=0051327) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 10.04245744472706, LR: 0.0003 +[2026-03-04 01:42:30] (step=0051328) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.042653101154373, LR: 0.0003 +[2026-03-04 01:42:38] (step=0051329) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.042848757581687, LR: 0.0003 +[2026-03-04 01:42:46] (step=0051330) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 10.043044414009, LR: 0.0003 +[2026-03-04 01:42:54] (step=0051331) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.043240070436314, LR: 0.0003 +[2026-03-04 01:43:02] (step=0051332) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.043435726863628, LR: 0.0003 +[2026-03-04 01:43:10] (step=0051333) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.043631383290942, LR: 0.0003 +[2026-03-04 01:43:18] (step=0051334) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.043827039718254, LR: 0.0003 +[2026-03-04 01:43:25] (step=0051335) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 10.044022696145568, LR: 0.0003 +[2026-03-04 01:43:33] (step=0051336) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.044218352572882, LR: 0.0003 +[2026-03-04 01:43:41] (step=0051337) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.044414009000196, LR: 0.0003 +[2026-03-04 01:43:49] (step=0051338) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 10.04460966542751, LR: 0.0003 +[2026-03-04 01:43:57] (step=0051339) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.044805321854822, LR: 0.0003 +[2026-03-04 01:44:05] (step=0051340) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 10.045000978282136, LR: 0.0003 +[2026-03-04 01:44:13] (step=0051341) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 10.04519663470945, LR: 0.0003 +[2026-03-04 01:44:21] (step=0051342) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.045392291136764, LR: 0.0003 +[2026-03-04 01:44:28] (step=0051343) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.045587947564078, LR: 0.0003 +[2026-03-04 01:44:36] (step=0051344) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.04578360399139, LR: 0.0003 +[2026-03-04 01:44:44] (step=0051345) Train Loss: 0.4409, Train Steps/Sec: 0.12, Epoch: 10.045979260418704, LR: 0.0003 +[2026-03-04 01:44:52] (step=0051346) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 10.046174916846018, LR: 0.0003 +[2026-03-04 01:45:00] (step=0051347) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 10.046370573273332, LR: 0.0003 +[2026-03-04 01:45:08] (step=0051348) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 10.046566229700646, LR: 0.0003 +[2026-03-04 01:45:16] (step=0051349) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.046761886127959, LR: 0.0003 +[2026-03-04 01:45:24] (step=0051350) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.046957542555273, LR: 0.0003 +[2026-03-04 01:45:32] (step=0051351) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.047153198982587, LR: 0.0003 +[2026-03-04 01:45:40] (step=0051352) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 10.0473488554099, LR: 0.0003 +[2026-03-04 01:45:47] (step=0051353) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.047544511837215, LR: 0.0003 +[2026-03-04 01:45:55] (step=0051354) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.047740168264527, LR: 0.0003 +[2026-03-04 01:46:03] (step=0051355) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.04793582469184, LR: 0.0003 +[2026-03-04 01:46:11] (step=0051356) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.048131481119155, LR: 0.0003 +[2026-03-04 01:46:19] (step=0051357) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.048327137546469, LR: 0.0003 +[2026-03-04 01:46:27] (step=0051358) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.048522793973783, LR: 0.0003 +[2026-03-04 01:46:35] (step=0051359) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.048718450401095, LR: 0.0003 +[2026-03-04 01:46:42] (step=0051360) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 10.04891410682841, LR: 0.0003 +[2026-03-04 01:46:50] (step=0051361) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.049109763255723, LR: 0.0003 +[2026-03-04 01:46:58] (step=0051362) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.049305419683037, LR: 0.0003 +[2026-03-04 01:47:06] (step=0051363) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 10.04950107611035, LR: 0.0003 +[2026-03-04 01:47:14] (step=0051364) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 10.049696732537663, LR: 0.0003 +[2026-03-04 01:47:22] (step=0051365) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 10.049892388964977, LR: 0.0003 +[2026-03-04 01:47:29] (step=0051366) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.050088045392291, LR: 0.0003 +[2026-03-04 01:47:37] (step=0051367) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.050283701819605, LR: 0.0003 +[2026-03-04 01:47:45] (step=0051368) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 10.050479358246918, LR: 0.0003 +[2026-03-04 01:47:53] (step=0051369) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 10.050675014674232, LR: 0.0003 +[2026-03-04 01:48:01] (step=0051370) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.050870671101546, LR: 0.0003 +[2026-03-04 01:48:09] (step=0051371) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.05106632752886, LR: 0.0003 +[2026-03-04 01:48:17] (step=0051372) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 10.051261983956174, LR: 0.0003 +[2026-03-04 01:48:24] (step=0051373) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 10.051457640383486, LR: 0.0003 +[2026-03-04 01:48:32] (step=0051374) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.0516532968108, LR: 0.0003 +[2026-03-04 01:48:40] (step=0051375) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 10.051848953238114, LR: 0.0003 +[2026-03-04 01:48:48] (step=0051376) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.052044609665428, LR: 0.0003 +[2026-03-04 01:48:56] (step=0051377) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.052240266092742, LR: 0.0003 +[2026-03-04 01:49:04] (step=0051378) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.052435922520054, LR: 0.0003 +[2026-03-04 01:49:12] (step=0051379) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 10.052631578947368, LR: 0.0003 +[2026-03-04 01:49:20] (step=0051380) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.052827235374682, LR: 0.0003 +[2026-03-04 01:49:27] (step=0051381) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.053022891801996, LR: 0.0003 +[2026-03-04 01:49:35] (step=0051382) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 10.05321854822931, LR: 0.0003 +[2026-03-04 01:49:43] (step=0051383) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.053414204656622, LR: 0.0003 +[2026-03-04 01:49:51] (step=0051384) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.053609861083936, LR: 0.0003 +[2026-03-04 01:49:59] (step=0051385) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.05380551751125, LR: 0.0003 +[2026-03-04 01:50:07] (step=0051386) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.054001173938564, LR: 0.0003 +[2026-03-04 01:50:15] (step=0051387) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.054196830365877, LR: 0.0003 +[2026-03-04 01:50:22] (step=0051388) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.05439248679319, LR: 0.0003 +[2026-03-04 01:50:30] (step=0051389) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.054588143220505, LR: 0.0003 +[2026-03-04 01:50:38] (step=0051390) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 10.054783799647819, LR: 0.0003 +[2026-03-04 01:50:46] (step=0051391) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.054979456075133, LR: 0.0003 +[2026-03-04 01:50:54] (step=0051392) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.055175112502445, LR: 0.0003 +[2026-03-04 01:51:02] (step=0051393) Train Loss: 0.4466, Train Steps/Sec: 0.12, Epoch: 10.055370768929759, LR: 0.0003 +[2026-03-04 01:51:10] (step=0051394) Train Loss: 0.4460, Train Steps/Sec: 0.12, Epoch: 10.055566425357073, LR: 0.0003 +[2026-03-04 01:51:18] (step=0051395) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 10.055762081784387, LR: 0.0003 +[2026-03-04 01:51:26] (step=0051396) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 10.055957738211701, LR: 0.0003 +[2026-03-04 01:51:34] (step=0051397) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.056153394639013, LR: 0.0003 +[2026-03-04 01:51:41] (step=0051398) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.056349051066327, LR: 0.0003 +[2026-03-04 01:51:49] (step=0051399) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.056544707493641, LR: 0.0003 +[2026-03-04 01:51:57] (step=0051400) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.056740363920955, LR: 0.0003 +[2026-03-04 01:52:05] (step=0051401) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.05693602034827, LR: 0.0003 +[2026-03-04 01:52:13] (step=0051402) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 10.057131676775581, LR: 0.0003 +[2026-03-04 01:52:21] (step=0051403) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.057327333202895, LR: 0.0003 +[2026-03-04 01:52:29] (step=0051404) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 10.05752298963021, LR: 0.0003 +[2026-03-04 01:52:36] (step=0051405) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.057718646057523, LR: 0.0003 +[2026-03-04 01:52:44] (step=0051406) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.057914302484837, LR: 0.0003 +[2026-03-04 01:52:52] (step=0051407) Train Loss: 0.4211, Train Steps/Sec: 0.13, Epoch: 10.05810995891215, LR: 0.0003 +[2026-03-04 01:53:00] (step=0051408) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 10.058305615339464, LR: 0.0003 +[2026-03-04 01:53:08] (step=0051409) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.058501271766778, LR: 0.0003 +[2026-03-04 01:53:16] (step=0051410) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.058696928194092, LR: 0.0003 +[2026-03-04 01:53:24] (step=0051411) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.058892584621406, LR: 0.0003 +[2026-03-04 01:53:31] (step=0051412) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.059088241048718, LR: 0.0003 +[2026-03-04 01:53:39] (step=0051413) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.059283897476032, LR: 0.0003 +[2026-03-04 01:53:47] (step=0051414) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.059479553903346, LR: 0.0003 +[2026-03-04 01:53:55] (step=0051415) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.05967521033066, LR: 0.0003 +[2026-03-04 01:54:03] (step=0051416) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.059870866757972, LR: 0.0003 +[2026-03-04 01:54:11] (step=0051417) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.060066523185286, LR: 0.0003 +[2026-03-04 01:54:19] (step=0051418) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.0602621796126, LR: 0.0003 +[2026-03-04 01:54:26] (step=0051419) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.060457836039914, LR: 0.0003 +[2026-03-04 01:54:34] (step=0051420) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.060653492467228, LR: 0.0003 +[2026-03-04 01:54:42] (step=0051421) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.06084914889454, LR: 0.0003 +[2026-03-04 01:54:50] (step=0051422) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.061044805321854, LR: 0.0003 +[2026-03-04 01:54:58] (step=0051423) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.061240461749168, LR: 0.0003 +[2026-03-04 01:55:06] (step=0051424) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.061436118176482, LR: 0.0003 +[2026-03-04 01:55:14] (step=0051425) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.061631774603796, LR: 0.0003 +[2026-03-04 01:55:21] (step=0051426) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.061827431031109, LR: 0.0003 +[2026-03-04 01:55:29] (step=0051427) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 10.062023087458423, LR: 0.0003 +[2026-03-04 01:55:37] (step=0051428) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.062218743885737, LR: 0.0003 +[2026-03-04 01:55:45] (step=0051429) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 10.06241440031305, LR: 0.0003 +[2026-03-04 01:55:53] (step=0051430) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.062610056740365, LR: 0.0003 +[2026-03-04 01:56:01] (step=0051431) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.062805713167677, LR: 0.0003 +[2026-03-04 01:56:09] (step=0051432) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.063001369594991, LR: 0.0003 +[2026-03-04 01:56:17] (step=0051433) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.063197026022305, LR: 0.0003 +[2026-03-04 01:56:24] (step=0051434) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.063392682449619, LR: 0.0003 +[2026-03-04 01:56:32] (step=0051435) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 10.063588338876933, LR: 0.0003 +[2026-03-04 01:56:40] (step=0051436) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.063783995304245, LR: 0.0003 +[2026-03-04 01:56:48] (step=0051437) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.06397965173156, LR: 0.0003 +[2026-03-04 01:56:56] (step=0051438) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.064175308158873, LR: 0.0003 +[2026-03-04 01:57:04] (step=0051439) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.064370964586187, LR: 0.0003 +[2026-03-04 01:57:12] (step=0051440) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 10.0645666210135, LR: 0.0003 +[2026-03-04 01:57:19] (step=0051441) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 10.064762277440813, LR: 0.0003 +[2026-03-04 01:57:27] (step=0051442) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.064957933868127, LR: 0.0003 +[2026-03-04 01:57:35] (step=0051443) Train Loss: 0.4614, Train Steps/Sec: 0.12, Epoch: 10.065153590295441, LR: 0.0003 +[2026-03-04 01:57:43] (step=0051444) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.065349246722755, LR: 0.0003 +[2026-03-04 01:57:51] (step=0051445) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.065544903150068, LR: 0.0003 +[2026-03-04 01:57:59] (step=0051446) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.065740559577382, LR: 0.0003 +[2026-03-04 01:58:07] (step=0051447) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.065936216004696, LR: 0.0003 +[2026-03-04 01:58:15] (step=0051448) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.06613187243201, LR: 0.0003 +[2026-03-04 01:58:23] (step=0051449) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 10.066327528859324, LR: 0.0003 +[2026-03-04 01:58:30] (step=0051450) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.066523185286636, LR: 0.0003 +[2026-03-04 01:58:38] (step=0051451) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.06671884171395, LR: 0.0003 +[2026-03-04 01:58:46] (step=0051452) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 10.066914498141264, LR: 0.0003 +[2026-03-04 01:58:54] (step=0051453) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 10.067110154568578, LR: 0.0003 +[2026-03-04 01:59:02] (step=0051454) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.067305810995892, LR: 0.0003 +[2026-03-04 01:59:10] (step=0051455) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 10.067501467423204, LR: 0.0003 +[2026-03-04 01:59:18] (step=0051456) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 10.067697123850518, LR: 0.0003 +[2026-03-04 01:59:25] (step=0051457) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.067892780277832, LR: 0.0003 +[2026-03-04 01:59:33] (step=0051458) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.068088436705146, LR: 0.0003 +[2026-03-04 01:59:41] (step=0051459) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 10.06828409313246, LR: 0.0003 +[2026-03-04 01:59:49] (step=0051460) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.068479749559772, LR: 0.0003 +[2026-03-04 01:59:57] (step=0051461) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.068675405987086, LR: 0.0003 +[2026-03-04 02:00:05] (step=0051462) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.0688710624144, LR: 0.0003 +[2026-03-04 02:00:13] (step=0051463) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.069066718841714, LR: 0.0003 +[2026-03-04 02:00:21] (step=0051464) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 10.069262375269028, LR: 0.0003 +[2026-03-04 02:00:28] (step=0051465) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.06945803169634, LR: 0.0003 +[2026-03-04 02:00:36] (step=0051466) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.069653688123655, LR: 0.0003 +[2026-03-04 02:00:44] (step=0051467) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.069849344550969, LR: 0.0003 +[2026-03-04 02:00:52] (step=0051468) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.070045000978283, LR: 0.0003 +[2026-03-04 02:01:00] (step=0051469) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 10.070240657405595, LR: 0.0003 +[2026-03-04 02:01:08] (step=0051470) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.070436313832909, LR: 0.0003 +[2026-03-04 02:01:16] (step=0051471) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.070631970260223, LR: 0.0003 +[2026-03-04 02:01:23] (step=0051472) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 10.070827626687537, LR: 0.0003 +[2026-03-04 02:01:31] (step=0051473) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 10.071023283114851, LR: 0.0003 +[2026-03-04 02:01:39] (step=0051474) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.071218939542163, LR: 0.0003 +[2026-03-04 02:01:47] (step=0051475) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.071414595969477, LR: 0.0003 +[2026-03-04 02:01:55] (step=0051476) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.071610252396791, LR: 0.0003 +[2026-03-04 02:02:03] (step=0051477) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 10.071805908824105, LR: 0.0003 +[2026-03-04 02:02:11] (step=0051478) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.07200156525142, LR: 0.0003 +[2026-03-04 02:02:18] (step=0051479) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 10.072197221678731, LR: 0.0003 +[2026-03-04 02:02:26] (step=0051480) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.072392878106045, LR: 0.0003 +[2026-03-04 02:02:34] (step=0051481) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.07258853453336, LR: 0.0003 +[2026-03-04 02:02:42] (step=0051482) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 10.072784190960673, LR: 0.0003 +[2026-03-04 02:02:50] (step=0051483) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.072979847387987, LR: 0.0003 +[2026-03-04 02:02:58] (step=0051484) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.0731755038153, LR: 0.0003 +[2026-03-04 02:03:06] (step=0051485) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.073371160242614, LR: 0.0003 +[2026-03-04 02:03:13] (step=0051486) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 10.073566816669928, LR: 0.0003 +[2026-03-04 02:03:21] (step=0051487) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.073762473097242, LR: 0.0003 +[2026-03-04 02:03:29] (step=0051488) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 10.073958129524556, LR: 0.0003 +[2026-03-04 02:03:37] (step=0051489) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.074153785951868, LR: 0.0003 +[2026-03-04 02:03:45] (step=0051490) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 10.074349442379182, LR: 0.0003 +[2026-03-04 02:03:53] (step=0051491) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 10.074545098806496, LR: 0.0003 +[2026-03-04 02:04:01] (step=0051492) Train Loss: 0.4509, Train Steps/Sec: 0.12, Epoch: 10.07474075523381, LR: 0.0003 +[2026-03-04 02:04:09] (step=0051493) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.074936411661122, LR: 0.0003 +[2026-03-04 02:04:17] (step=0051494) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 10.075132068088436, LR: 0.0003 +[2026-03-04 02:04:24] (step=0051495) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.07532772451575, LR: 0.0003 +[2026-03-04 02:04:32] (step=0051496) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 10.075523380943064, LR: 0.0003 +[2026-03-04 02:04:40] (step=0051497) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.075719037370378, LR: 0.0003 +[2026-03-04 02:04:48] (step=0051498) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 10.07591469379769, LR: 0.0003 +[2026-03-04 02:04:56] (step=0051499) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.076110350225004, LR: 0.0003 +[2026-03-04 02:05:04] (step=0051500) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 10.076306006652318, LR: 0.0003 +[2026-03-04 02:05:04] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0051500/ +[2026-03-04 02:05:12] (step=0051501) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 10.076501663079632, LR: 0.0003 +[2026-03-04 02:05:19] (step=0051502) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.076697319506946, LR: 0.0003 +[2026-03-04 02:05:27] (step=0051503) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.076892975934259, LR: 0.0003 +[2026-03-04 02:05:35] (step=0051504) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.077088632361573, LR: 0.0003 +[2026-03-04 02:05:43] (step=0051505) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.077284288788887, LR: 0.0003 +[2026-03-04 02:05:51] (step=0051506) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.0774799452162, LR: 0.0003 +[2026-03-04 02:05:59] (step=0051507) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.077675601643515, LR: 0.0003 +[2026-03-04 02:06:07] (step=0051508) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.077871258070827, LR: 0.0003 +[2026-03-04 02:06:14] (step=0051509) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.078066914498141, LR: 0.0003 +[2026-03-04 02:06:22] (step=0051510) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.078262570925455, LR: 0.0003 +[2026-03-04 02:06:30] (step=0051511) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.078458227352769, LR: 0.0003 +[2026-03-04 02:06:38] (step=0051512) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.078653883780083, LR: 0.0003 +[2026-03-04 02:06:46] (step=0051513) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 10.078849540207395, LR: 0.0003 +[2026-03-04 02:06:54] (step=0051514) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.07904519663471, LR: 0.0003 +[2026-03-04 02:07:02] (step=0051515) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.079240853062023, LR: 0.0003 +[2026-03-04 02:07:10] (step=0051516) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 10.079436509489337, LR: 0.0003 +[2026-03-04 02:07:17] (step=0051517) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.079632165916651, LR: 0.0003 +[2026-03-04 02:07:25] (step=0051518) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.079827822343963, LR: 0.0003 +[2026-03-04 02:07:33] (step=0051519) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.080023478771277, LR: 0.0003 +[2026-03-04 02:07:41] (step=0051520) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.080219135198591, LR: 0.0003 +[2026-03-04 02:07:49] (step=0051521) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.080414791625905, LR: 0.0003 +[2026-03-04 02:07:57] (step=0051522) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.080610448053218, LR: 0.0003 +[2026-03-04 02:08:05] (step=0051523) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.080806104480532, LR: 0.0003 +[2026-03-04 02:08:12] (step=0051524) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 10.081001760907846, LR: 0.0003 +[2026-03-04 02:08:20] (step=0051525) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 10.08119741733516, LR: 0.0003 +[2026-03-04 02:08:28] (step=0051526) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.081393073762474, LR: 0.0003 +[2026-03-04 02:08:36] (step=0051527) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.081588730189786, LR: 0.0003 +[2026-03-04 02:08:44] (step=0051528) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.0817843866171, LR: 0.0003 +[2026-03-04 02:08:52] (step=0051529) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.081980043044414, LR: 0.0003 +[2026-03-04 02:09:00] (step=0051530) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.082175699471728, LR: 0.0003 +[2026-03-04 02:09:08] (step=0051531) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.082371355899042, LR: 0.0003 +[2026-03-04 02:09:15] (step=0051532) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.082567012326354, LR: 0.0003 +[2026-03-04 02:09:23] (step=0051533) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.082762668753668, LR: 0.0003 +[2026-03-04 02:09:31] (step=0051534) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 10.082958325180982, LR: 0.0003 +[2026-03-04 02:09:39] (step=0051535) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 10.083153981608296, LR: 0.0003 +[2026-03-04 02:09:47] (step=0051536) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 10.08334963803561, LR: 0.0003 +[2026-03-04 02:09:55] (step=0051537) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.083545294462922, LR: 0.0003 +[2026-03-04 02:10:03] (step=0051538) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.083740950890236, LR: 0.0003 +[2026-03-04 02:10:11] (step=0051539) Train Loss: 0.4405, Train Steps/Sec: 0.12, Epoch: 10.08393660731755, LR: 0.0003 +[2026-03-04 02:10:19] (step=0051540) Train Loss: 0.4452, Train Steps/Sec: 0.12, Epoch: 10.084132263744864, LR: 0.0003 +[2026-03-04 02:10:27] (step=0051541) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 10.084327920172178, LR: 0.0003 +[2026-03-04 02:10:34] (step=0051542) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 10.08452357659949, LR: 0.0003 +[2026-03-04 02:10:42] (step=0051543) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 10.084719233026805, LR: 0.0003 +[2026-03-04 02:10:50] (step=0051544) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.084914889454119, LR: 0.0003 +[2026-03-04 02:10:58] (step=0051545) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.085110545881433, LR: 0.0003 +[2026-03-04 02:11:06] (step=0051546) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 10.085306202308745, LR: 0.0003 +[2026-03-04 02:11:14] (step=0051547) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.085501858736059, LR: 0.0003 +[2026-03-04 02:11:22] (step=0051548) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.085697515163373, LR: 0.0003 +[2026-03-04 02:11:29] (step=0051549) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.085893171590687, LR: 0.0003 +[2026-03-04 02:11:37] (step=0051550) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.086088828018001, LR: 0.0003 +[2026-03-04 02:11:45] (step=0051551) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 10.086284484445313, LR: 0.0003 +[2026-03-04 02:11:53] (step=0051552) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.086480140872627, LR: 0.0003 +[2026-03-04 02:12:01] (step=0051553) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 10.086675797299941, LR: 0.0003 +[2026-03-04 02:12:09] (step=0051554) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 10.086871453727255, LR: 0.0003 +[2026-03-04 02:12:17] (step=0051555) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.08706711015457, LR: 0.0003 +[2026-03-04 02:12:24] (step=0051556) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.087262766581881, LR: 0.0003 +[2026-03-04 02:12:32] (step=0051557) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 10.087458423009195, LR: 0.0003 +[2026-03-04 02:12:40] (step=0051558) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 10.08765407943651, LR: 0.0003 +[2026-03-04 02:12:48] (step=0051559) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.087849735863823, LR: 0.0003 +[2026-03-04 02:12:56] (step=0051560) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.088045392291138, LR: 0.0003 +[2026-03-04 02:13:04] (step=0051561) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.08824104871845, LR: 0.0003 +[2026-03-04 02:13:12] (step=0051562) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.088436705145764, LR: 0.0003 +[2026-03-04 02:13:19] (step=0051563) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 10.088632361573078, LR: 0.0003 +[2026-03-04 02:13:27] (step=0051564) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.088828018000392, LR: 0.0003 +[2026-03-04 02:13:35] (step=0051565) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.089023674427706, LR: 0.0003 +[2026-03-04 02:13:43] (step=0051566) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.089219330855018, LR: 0.0003 +[2026-03-04 02:13:51] (step=0051567) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.089414987282332, LR: 0.0003 +[2026-03-04 02:13:59] (step=0051568) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 10.089610643709646, LR: 0.0003 +[2026-03-04 02:14:07] (step=0051569) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.08980630013696, LR: 0.0003 +[2026-03-04 02:14:15] (step=0051570) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.090001956564274, LR: 0.0003 +[2026-03-04 02:14:22] (step=0051571) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.090197612991586, LR: 0.0003 +[2026-03-04 02:14:30] (step=0051572) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.0903932694189, LR: 0.0003 +[2026-03-04 02:14:38] (step=0051573) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.090588925846214, LR: 0.0003 +[2026-03-04 02:14:46] (step=0051574) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 10.090784582273528, LR: 0.0003 +[2026-03-04 02:14:54] (step=0051575) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.09098023870084, LR: 0.0003 +[2026-03-04 02:15:02] (step=0051576) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.091175895128154, LR: 0.0003 +[2026-03-04 02:15:10] (step=0051577) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 10.091371551555468, LR: 0.0003 +[2026-03-04 02:15:17] (step=0051578) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 10.091567207982783, LR: 0.0003 +[2026-03-04 02:15:25] (step=0051579) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.091762864410097, LR: 0.0003 +[2026-03-04 02:15:33] (step=0051580) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.091958520837409, LR: 0.0003 +[2026-03-04 02:15:41] (step=0051581) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.092154177264723, LR: 0.0003 +[2026-03-04 02:15:49] (step=0051582) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 10.092349833692037, LR: 0.0003 +[2026-03-04 02:15:57] (step=0051583) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.09254549011935, LR: 0.0003 +[2026-03-04 02:16:05] (step=0051584) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.092741146546665, LR: 0.0003 +[2026-03-04 02:16:13] (step=0051585) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 10.092936802973977, LR: 0.0003 +[2026-03-04 02:16:20] (step=0051586) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.093132459401291, LR: 0.0003 +[2026-03-04 02:16:28] (step=0051587) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.093328115828605, LR: 0.0003 +[2026-03-04 02:16:36] (step=0051588) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.093523772255919, LR: 0.0003 +[2026-03-04 02:16:44] (step=0051589) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 10.093719428683233, LR: 0.0003 +[2026-03-04 02:16:52] (step=0051590) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.093915085110545, LR: 0.0003 +[2026-03-04 02:17:00] (step=0051591) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.09411074153786, LR: 0.0003 +[2026-03-04 02:17:08] (step=0051592) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 10.094306397965173, LR: 0.0003 +[2026-03-04 02:17:16] (step=0051593) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 10.094502054392487, LR: 0.0003 +[2026-03-04 02:17:23] (step=0051594) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 10.094697710819801, LR: 0.0003 +[2026-03-04 02:17:31] (step=0051595) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.094893367247114, LR: 0.0003 +[2026-03-04 02:17:39] (step=0051596) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.095089023674428, LR: 0.0003 +[2026-03-04 02:17:47] (step=0051597) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.095284680101742, LR: 0.0003 +[2026-03-04 02:17:55] (step=0051598) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 10.095480336529056, LR: 0.0003 +[2026-03-04 02:18:03] (step=0051599) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.095675992956368, LR: 0.0003 +[2026-03-04 02:18:10] (step=0051600) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.095871649383682, LR: 0.0003 +[2026-03-04 02:18:18] (step=0051601) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.096067305810996, LR: 0.0003 +[2026-03-04 02:18:26] (step=0051602) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.09626296223831, LR: 0.0003 +[2026-03-04 02:18:34] (step=0051603) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.096458618665624, LR: 0.0003 +[2026-03-04 02:18:42] (step=0051604) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.096654275092936, LR: 0.0003 +[2026-03-04 02:18:50] (step=0051605) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.09684993152025, LR: 0.0003 +[2026-03-04 02:18:58] (step=0051606) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.097045587947564, LR: 0.0003 +[2026-03-04 02:19:05] (step=0051607) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.097241244374878, LR: 0.0003 +[2026-03-04 02:19:13] (step=0051608) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.097436900802192, LR: 0.0003 +[2026-03-04 02:19:21] (step=0051609) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 10.097632557229504, LR: 0.0003 +[2026-03-04 02:19:29] (step=0051610) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 10.097828213656818, LR: 0.0003 +[2026-03-04 02:19:37] (step=0051611) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.098023870084132, LR: 0.0003 +[2026-03-04 02:19:45] (step=0051612) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.098219526511446, LR: 0.0003 +[2026-03-04 02:19:53] (step=0051613) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.09841518293876, LR: 0.0003 +[2026-03-04 02:20:00] (step=0051614) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.098610839366073, LR: 0.0003 +[2026-03-04 02:20:08] (step=0051615) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.098806495793387, LR: 0.0003 +[2026-03-04 02:20:16] (step=0051616) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.0990021522207, LR: 0.0003 +[2026-03-04 02:20:24] (step=0051617) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.099197808648015, LR: 0.0003 +[2026-03-04 02:20:32] (step=0051618) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.099393465075329, LR: 0.0003 +[2026-03-04 02:20:40] (step=0051619) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.09958912150264, LR: 0.0003 +[2026-03-04 02:20:48] (step=0051620) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.099784777929955, LR: 0.0003 +[2026-03-04 02:20:56] (step=0051621) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.099980434357269, LR: 0.0003 +[2026-03-04 02:21:03] (step=0051622) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.100176090784583, LR: 0.0003 +[2026-03-04 02:21:11] (step=0051623) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.100371747211897, LR: 0.0003 +[2026-03-04 02:21:19] (step=0051624) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.100567403639209, LR: 0.0003 +[2026-03-04 02:21:27] (step=0051625) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.100763060066523, LR: 0.0003 +[2026-03-04 02:21:35] (step=0051626) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.100958716493837, LR: 0.0003 +[2026-03-04 02:21:43] (step=0051627) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.101154372921151, LR: 0.0003 +[2026-03-04 02:21:51] (step=0051628) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.101350029348463, LR: 0.0003 +[2026-03-04 02:21:58] (step=0051629) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 10.101545685775777, LR: 0.0003 +[2026-03-04 02:22:06] (step=0051630) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 10.101741342203091, LR: 0.0003 +[2026-03-04 02:22:14] (step=0051631) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.101936998630405, LR: 0.0003 +[2026-03-04 02:22:22] (step=0051632) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.10213265505772, LR: 0.0003 +[2026-03-04 02:22:30] (step=0051633) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.102328311485032, LR: 0.0003 +[2026-03-04 02:22:38] (step=0051634) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 10.102523967912346, LR: 0.0003 +[2026-03-04 02:22:46] (step=0051635) Train Loss: 0.4460, Train Steps/Sec: 0.12, Epoch: 10.10271962433966, LR: 0.0003 +[2026-03-04 02:22:54] (step=0051636) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.102915280766974, LR: 0.0003 +[2026-03-04 02:23:01] (step=0051637) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.103110937194288, LR: 0.0003 +[2026-03-04 02:23:09] (step=0051638) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.1033065936216, LR: 0.0003 +[2026-03-04 02:23:17] (step=0051639) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.103502250048914, LR: 0.0003 +[2026-03-04 02:23:25] (step=0051640) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 10.103697906476228, LR: 0.0003 +[2026-03-04 02:23:33] (step=0051641) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.103893562903542, LR: 0.0003 +[2026-03-04 02:23:41] (step=0051642) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 10.104089219330856, LR: 0.0003 +[2026-03-04 02:23:49] (step=0051643) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.104284875758168, LR: 0.0003 +[2026-03-04 02:23:57] (step=0051644) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.104480532185482, LR: 0.0003 +[2026-03-04 02:24:05] (step=0051645) Train Loss: 0.4197, Train Steps/Sec: 0.13, Epoch: 10.104676188612796, LR: 0.0003 +[2026-03-04 02:24:12] (step=0051646) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.10487184504011, LR: 0.0003 +[2026-03-04 02:24:20] (step=0051647) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 10.105067501467424, LR: 0.0003 +[2026-03-04 02:24:28] (step=0051648) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 10.105263157894736, LR: 0.0003 +[2026-03-04 02:24:36] (step=0051649) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.10545881432205, LR: 0.0003 +[2026-03-04 02:24:44] (step=0051650) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.105654470749364, LR: 0.0003 +[2026-03-04 02:24:52] (step=0051651) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.105850127176678, LR: 0.0003 +[2026-03-04 02:25:00] (step=0051652) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.10604578360399, LR: 0.0003 +[2026-03-04 02:25:07] (step=0051653) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.106241440031305, LR: 0.0003 +[2026-03-04 02:25:15] (step=0051654) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 10.106437096458619, LR: 0.0003 +[2026-03-04 02:25:23] (step=0051655) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.106632752885933, LR: 0.0003 +[2026-03-04 02:25:31] (step=0051656) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.106828409313247, LR: 0.0003 +[2026-03-04 02:25:39] (step=0051657) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.107024065740559, LR: 0.0003 +[2026-03-04 02:25:47] (step=0051658) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.107219722167873, LR: 0.0003 +[2026-03-04 02:25:55] (step=0051659) Train Loss: 0.4273, Train Steps/Sec: 0.13, Epoch: 10.107415378595187, LR: 0.0003 +[2026-03-04 02:26:02] (step=0051660) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.1076110350225, LR: 0.0003 +[2026-03-04 02:26:10] (step=0051661) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.107806691449815, LR: 0.0003 +[2026-03-04 02:26:18] (step=0051662) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.108002347877127, LR: 0.0003 +[2026-03-04 02:26:26] (step=0051663) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 10.108198004304441, LR: 0.0003 +[2026-03-04 02:26:34] (step=0051664) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.108393660731755, LR: 0.0003 +[2026-03-04 02:26:42] (step=0051665) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.108589317159069, LR: 0.0003 +[2026-03-04 02:26:50] (step=0051666) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.108784973586383, LR: 0.0003 +[2026-03-04 02:26:57] (step=0051667) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.108980630013695, LR: 0.0003 +[2026-03-04 02:27:05] (step=0051668) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.10917628644101, LR: 0.0003 +[2026-03-04 02:27:13] (step=0051669) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.109371942868323, LR: 0.0003 +[2026-03-04 02:27:21] (step=0051670) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.109567599295637, LR: 0.0003 +[2026-03-04 02:27:29] (step=0051671) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.109763255722951, LR: 0.0003 +[2026-03-04 02:27:37] (step=0051672) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 10.109958912150264, LR: 0.0003 +[2026-03-04 02:27:45] (step=0051673) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 10.110154568577578, LR: 0.0003 +[2026-03-04 02:27:53] (step=0051674) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.110350225004892, LR: 0.0003 +[2026-03-04 02:28:00] (step=0051675) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.110545881432206, LR: 0.0003 +[2026-03-04 02:28:08] (step=0051676) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.11074153785952, LR: 0.0003 +[2026-03-04 02:28:16] (step=0051677) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.110937194286832, LR: 0.0003 +[2026-03-04 02:28:24] (step=0051678) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.111132850714146, LR: 0.0003 +[2026-03-04 02:28:32] (step=0051679) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 10.11132850714146, LR: 0.0003 +[2026-03-04 02:28:40] (step=0051680) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.111524163568774, LR: 0.0003 +[2026-03-04 02:28:48] (step=0051681) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.111719819996086, LR: 0.0003 +[2026-03-04 02:28:55] (step=0051682) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.1119154764234, LR: 0.0003 +[2026-03-04 02:29:03] (step=0051683) Train Loss: 0.4559, Train Steps/Sec: 0.12, Epoch: 10.112111132850714, LR: 0.0003 +[2026-03-04 02:29:11] (step=0051684) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.112306789278028, LR: 0.0003 +[2026-03-04 02:29:19] (step=0051685) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.112502445705342, LR: 0.0003 +[2026-03-04 02:29:27] (step=0051686) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.112698102132654, LR: 0.0003 +[2026-03-04 02:29:35] (step=0051687) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.112893758559968, LR: 0.0003 +[2026-03-04 02:29:43] (step=0051688) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 10.113089414987282, LR: 0.0003 +[2026-03-04 02:29:51] (step=0051689) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.113285071414596, LR: 0.0003 +[2026-03-04 02:29:59] (step=0051690) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.11348072784191, LR: 0.0003 +[2026-03-04 02:30:07] (step=0051691) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 10.113676384269223, LR: 0.0003 +[2026-03-04 02:30:14] (step=0051692) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.113872040696537, LR: 0.0003 +[2026-03-04 02:30:22] (step=0051693) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.11406769712385, LR: 0.0003 +[2026-03-04 02:30:30] (step=0051694) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.114263353551165, LR: 0.0003 +[2026-03-04 02:30:38] (step=0051695) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.114459009978479, LR: 0.0003 +[2026-03-04 02:30:46] (step=0051696) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.11465466640579, LR: 0.0003 +[2026-03-04 02:30:54] (step=0051697) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 10.114850322833105, LR: 0.0003 +[2026-03-04 02:31:02] (step=0051698) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.115045979260419, LR: 0.0003 +[2026-03-04 02:31:09] (step=0051699) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.115241635687733, LR: 0.0003 +[2026-03-04 02:31:17] (step=0051700) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.115437292115047, LR: 0.0003 +[2026-03-04 02:31:25] (step=0051701) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.115632948542359, LR: 0.0003 +[2026-03-04 02:31:33] (step=0051702) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.115828604969673, LR: 0.0003 +[2026-03-04 02:31:41] (step=0051703) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.116024261396987, LR: 0.0003 +[2026-03-04 02:31:49] (step=0051704) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.116219917824301, LR: 0.0003 +[2026-03-04 02:31:57] (step=0051705) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 10.116415574251613, LR: 0.0003 +[2026-03-04 02:32:04] (step=0051706) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 10.116611230678927, LR: 0.0003 +[2026-03-04 02:32:12] (step=0051707) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.116806887106241, LR: 0.0003 +[2026-03-04 02:32:20] (step=0051708) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.117002543533555, LR: 0.0003 +[2026-03-04 02:32:28] (step=0051709) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.11719819996087, LR: 0.0003 +[2026-03-04 02:32:36] (step=0051710) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.117393856388182, LR: 0.0003 +[2026-03-04 02:32:44] (step=0051711) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.117589512815496, LR: 0.0003 +[2026-03-04 02:32:52] (step=0051712) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.11778516924281, LR: 0.0003 +[2026-03-04 02:32:59] (step=0051713) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.117980825670124, LR: 0.0003 +[2026-03-04 02:33:07] (step=0051714) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.118176482097438, LR: 0.0003 +[2026-03-04 02:33:15] (step=0051715) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.11837213852475, LR: 0.0003 +[2026-03-04 02:33:23] (step=0051716) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.118567794952064, LR: 0.0003 +[2026-03-04 02:33:31] (step=0051717) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.118763451379378, LR: 0.0003 +[2026-03-04 02:33:39] (step=0051718) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.118959107806692, LR: 0.0003 +[2026-03-04 02:33:47] (step=0051719) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.119154764234006, LR: 0.0003 +[2026-03-04 02:33:55] (step=0051720) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.119350420661318, LR: 0.0003 +[2026-03-04 02:34:02] (step=0051721) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.119546077088632, LR: 0.0003 +[2026-03-04 02:34:10] (step=0051722) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.119741733515946, LR: 0.0003 +[2026-03-04 02:34:18] (step=0051723) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.11993738994326, LR: 0.0003 +[2026-03-04 02:34:26] (step=0051724) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.120133046370574, LR: 0.0003 +[2026-03-04 02:34:34] (step=0051725) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.120328702797886, LR: 0.0003 +[2026-03-04 02:34:42] (step=0051726) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.1205243592252, LR: 0.0003 +[2026-03-04 02:34:50] (step=0051727) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 10.120720015652514, LR: 0.0003 +[2026-03-04 02:34:57] (step=0051728) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.120915672079828, LR: 0.0003 +[2026-03-04 02:35:05] (step=0051729) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 10.121111328507142, LR: 0.0003 +[2026-03-04 02:35:13] (step=0051730) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.121306984934455, LR: 0.0003 +[2026-03-04 02:35:21] (step=0051731) Train Loss: 0.4425, Train Steps/Sec: 0.12, Epoch: 10.121502641361769, LR: 0.0003 +[2026-03-04 02:35:29] (step=0051732) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.121698297789083, LR: 0.0003 +[2026-03-04 02:35:37] (step=0051733) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 10.121893954216397, LR: 0.0003 +[2026-03-04 02:35:45] (step=0051734) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.122089610643709, LR: 0.0003 +[2026-03-04 02:35:53] (step=0051735) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.122285267071023, LR: 0.0003 +[2026-03-04 02:36:00] (step=0051736) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.122480923498337, LR: 0.0003 +[2026-03-04 02:36:08] (step=0051737) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 10.12267657992565, LR: 0.0003 +[2026-03-04 02:36:16] (step=0051738) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.122872236352965, LR: 0.0003 +[2026-03-04 02:36:24] (step=0051739) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 10.123067892780277, LR: 0.0003 +[2026-03-04 02:36:32] (step=0051740) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.123263549207591, LR: 0.0003 +[2026-03-04 02:36:40] (step=0051741) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.123459205634905, LR: 0.0003 +[2026-03-04 02:36:48] (step=0051742) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.123654862062219, LR: 0.0003 +[2026-03-04 02:36:56] (step=0051743) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.123850518489533, LR: 0.0003 +[2026-03-04 02:37:03] (step=0051744) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.124046174916845, LR: 0.0003 +[2026-03-04 02:37:11] (step=0051745) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 10.12424183134416, LR: 0.0003 +[2026-03-04 02:37:19] (step=0051746) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.124437487771473, LR: 0.0003 +[2026-03-04 02:37:27] (step=0051747) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.124633144198787, LR: 0.0003 +[2026-03-04 02:37:35] (step=0051748) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.124828800626101, LR: 0.0003 +[2026-03-04 02:37:43] (step=0051749) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.125024457053414, LR: 0.0003 +[2026-03-04 02:37:51] (step=0051750) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 10.125220113480728, LR: 0.0003 +[2026-03-04 02:37:59] (step=0051751) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 10.125415769908042, LR: 0.0003 +[2026-03-04 02:38:06] (step=0051752) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.125611426335356, LR: 0.0003 +[2026-03-04 02:38:14] (step=0051753) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 10.12580708276267, LR: 0.0003 +[2026-03-04 02:38:22] (step=0051754) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 10.126002739189982, LR: 0.0003 +[2026-03-04 02:38:30] (step=0051755) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.126198395617296, LR: 0.0003 +[2026-03-04 02:38:38] (step=0051756) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 10.12639405204461, LR: 0.0003 +[2026-03-04 02:38:46] (step=0051757) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.126589708471924, LR: 0.0003 +[2026-03-04 02:38:54] (step=0051758) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 10.126785364899236, LR: 0.0003 +[2026-03-04 02:39:02] (step=0051759) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.12698102132655, LR: 0.0003 +[2026-03-04 02:39:09] (step=0051760) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 10.127176677753864, LR: 0.0003 +[2026-03-04 02:39:17] (step=0051761) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.127372334181178, LR: 0.0003 +[2026-03-04 02:39:25] (step=0051762) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.127567990608492, LR: 0.0003 +[2026-03-04 02:39:33] (step=0051763) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 10.127763647035804, LR: 0.0003 +[2026-03-04 02:39:41] (step=0051764) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 10.127959303463118, LR: 0.0003 +[2026-03-04 02:39:49] (step=0051765) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.128154959890432, LR: 0.0003 +[2026-03-04 02:39:57] (step=0051766) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.128350616317746, LR: 0.0003 +[2026-03-04 02:40:04] (step=0051767) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.12854627274506, LR: 0.0003 +[2026-03-04 02:40:12] (step=0051768) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.128741929172373, LR: 0.0003 +[2026-03-04 02:40:20] (step=0051769) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.128937585599687, LR: 0.0003 +[2026-03-04 02:40:28] (step=0051770) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.129133242027, LR: 0.0003 +[2026-03-04 02:40:36] (step=0051771) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.129328898454315, LR: 0.0003 +[2026-03-04 02:40:44] (step=0051772) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.129524554881629, LR: 0.0003 +[2026-03-04 02:40:52] (step=0051773) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.12972021130894, LR: 0.0003 +[2026-03-04 02:40:59] (step=0051774) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.129915867736255, LR: 0.0003 +[2026-03-04 02:41:07] (step=0051775) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.130111524163569, LR: 0.0003 +[2026-03-04 02:41:15] (step=0051776) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.130307180590883, LR: 0.0003 +[2026-03-04 02:41:23] (step=0051777) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 10.130502837018197, LR: 0.0003 +[2026-03-04 02:41:31] (step=0051778) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.130698493445509, LR: 0.0003 +[2026-03-04 02:41:39] (step=0051779) Train Loss: 0.4352, Train Steps/Sec: 0.12, Epoch: 10.130894149872823, LR: 0.0003 +[2026-03-04 02:41:47] (step=0051780) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 10.131089806300137, LR: 0.0003 +[2026-03-04 02:41:55] (step=0051781) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 10.131285462727451, LR: 0.0003 +[2026-03-04 02:42:03] (step=0051782) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.131481119154763, LR: 0.0003 +[2026-03-04 02:42:10] (step=0051783) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.131676775582077, LR: 0.0003 +[2026-03-04 02:42:18] (step=0051784) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.131872432009391, LR: 0.0003 +[2026-03-04 02:42:26] (step=0051785) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.132068088436705, LR: 0.0003 +[2026-03-04 02:42:34] (step=0051786) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.13226374486402, LR: 0.0003 +[2026-03-04 02:42:42] (step=0051787) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.132459401291332, LR: 0.0003 +[2026-03-04 02:42:50] (step=0051788) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.132655057718646, LR: 0.0003 +[2026-03-04 02:42:58] (step=0051789) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.13285071414596, LR: 0.0003 +[2026-03-04 02:43:06] (step=0051790) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.133046370573274, LR: 0.0003 +[2026-03-04 02:43:13] (step=0051791) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 10.133242027000588, LR: 0.0003 +[2026-03-04 02:43:21] (step=0051792) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.1334376834279, LR: 0.0003 +[2026-03-04 02:43:29] (step=0051793) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 10.133633339855214, LR: 0.0003 +[2026-03-04 02:43:37] (step=0051794) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.133828996282528, LR: 0.0003 +[2026-03-04 02:43:45] (step=0051795) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 10.134024652709842, LR: 0.0003 +[2026-03-04 02:43:53] (step=0051796) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.134220309137156, LR: 0.0003 +[2026-03-04 02:44:01] (step=0051797) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.134415965564468, LR: 0.0003 +[2026-03-04 02:44:09] (step=0051798) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.134611621991782, LR: 0.0003 +[2026-03-04 02:44:16] (step=0051799) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 10.134807278419096, LR: 0.0003 +[2026-03-04 02:44:24] (step=0051800) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.13500293484641, LR: 0.0003 +[2026-03-04 02:44:32] (step=0051801) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.135198591273724, LR: 0.0003 +[2026-03-04 02:44:40] (step=0051802) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.135394247701036, LR: 0.0003 +[2026-03-04 02:44:48] (step=0051803) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.13558990412835, LR: 0.0003 +[2026-03-04 02:44:56] (step=0051804) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.135785560555664, LR: 0.0003 +[2026-03-04 02:45:04] (step=0051805) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.135981216982978, LR: 0.0003 +[2026-03-04 02:45:11] (step=0051806) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 10.136176873410292, LR: 0.0003 +[2026-03-04 02:45:19] (step=0051807) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.136372529837605, LR: 0.0003 +[2026-03-04 02:45:27] (step=0051808) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.136568186264919, LR: 0.0003 +[2026-03-04 02:45:35] (step=0051809) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.136763842692233, LR: 0.0003 +[2026-03-04 02:45:43] (step=0051810) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.136959499119547, LR: 0.0003 +[2026-03-04 02:45:51] (step=0051811) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 10.137155155546859, LR: 0.0003 +[2026-03-04 02:45:59] (step=0051812) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.137350811974173, LR: 0.0003 +[2026-03-04 02:46:06] (step=0051813) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.137546468401487, LR: 0.0003 +[2026-03-04 02:46:14] (step=0051814) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.1377421248288, LR: 0.0003 +[2026-03-04 02:46:22] (step=0051815) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.137937781256115, LR: 0.0003 +[2026-03-04 02:46:30] (step=0051816) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 10.138133437683427, LR: 0.0003 +[2026-03-04 02:46:38] (step=0051817) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.138329094110741, LR: 0.0003 +[2026-03-04 02:46:46] (step=0051818) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 10.138524750538055, LR: 0.0003 +[2026-03-04 02:46:54] (step=0051819) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.13872040696537, LR: 0.0003 +[2026-03-04 02:47:02] (step=0051820) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.138916063392683, LR: 0.0003 +[2026-03-04 02:47:09] (step=0051821) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.139111719819995, LR: 0.0003 +[2026-03-04 02:47:17] (step=0051822) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.13930737624731, LR: 0.0003 +[2026-03-04 02:47:25] (step=0051823) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.139503032674623, LR: 0.0003 +[2026-03-04 02:47:33] (step=0051824) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.139698689101937, LR: 0.0003 +[2026-03-04 02:47:41] (step=0051825) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 10.139894345529251, LR: 0.0003 +[2026-03-04 02:47:49] (step=0051826) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.140090001956564, LR: 0.0003 +[2026-03-04 02:47:57] (step=0051827) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.140285658383878, LR: 0.0003 +[2026-03-04 02:48:04] (step=0051828) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 10.140481314811192, LR: 0.0003 +[2026-03-04 02:48:12] (step=0051829) Train Loss: 0.4297, Train Steps/Sec: 0.12, Epoch: 10.140676971238506, LR: 0.0003 +[2026-03-04 02:48:20] (step=0051830) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.14087262766582, LR: 0.0003 +[2026-03-04 02:48:28] (step=0051831) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.141068284093132, LR: 0.0003 +[2026-03-04 02:48:36] (step=0051832) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 10.141263940520446, LR: 0.0003 +[2026-03-04 02:48:44] (step=0051833) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.14145959694776, LR: 0.0003 +[2026-03-04 02:48:52] (step=0051834) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.141655253375074, LR: 0.0003 +[2026-03-04 02:49:00] (step=0051835) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.141850909802386, LR: 0.0003 +[2026-03-04 02:49:07] (step=0051836) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 10.1420465662297, LR: 0.0003 +[2026-03-04 02:49:15] (step=0051837) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 10.142242222657014, LR: 0.0003 +[2026-03-04 02:49:23] (step=0051838) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.142437879084328, LR: 0.0003 +[2026-03-04 02:49:31] (step=0051839) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.142633535511642, LR: 0.0003 +[2026-03-04 02:49:39] (step=0051840) Train Loss: 0.4523, Train Steps/Sec: 0.12, Epoch: 10.142829191938954, LR: 0.0003 +[2026-03-04 02:49:47] (step=0051841) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.143024848366268, LR: 0.0003 +[2026-03-04 02:49:55] (step=0051842) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.143220504793582, LR: 0.0003 +[2026-03-04 02:50:03] (step=0051843) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 10.143416161220896, LR: 0.0003 +[2026-03-04 02:50:10] (step=0051844) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.14361181764821, LR: 0.0003 +[2026-03-04 02:50:18] (step=0051845) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.143807474075523, LR: 0.0003 +[2026-03-04 02:50:26] (step=0051846) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 10.144003130502837, LR: 0.0003 +[2026-03-04 02:50:34] (step=0051847) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 10.14419878693015, LR: 0.0003 +[2026-03-04 02:50:42] (step=0051848) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.144394443357465, LR: 0.0003 +[2026-03-04 02:50:50] (step=0051849) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.144590099784779, LR: 0.0003 +[2026-03-04 02:50:58] (step=0051850) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.14478575621209, LR: 0.0003 +[2026-03-04 02:51:06] (step=0051851) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 10.144981412639405, LR: 0.0003 +[2026-03-04 02:51:13] (step=0051852) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 10.145177069066719, LR: 0.0003 +[2026-03-04 02:51:21] (step=0051853) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.145372725494033, LR: 0.0003 +[2026-03-04 02:51:29] (step=0051854) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.145568381921347, LR: 0.0003 +[2026-03-04 02:51:37] (step=0051855) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.14576403834866, LR: 0.0003 +[2026-03-04 02:51:45] (step=0051856) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.145959694775973, LR: 0.0003 +[2026-03-04 02:51:53] (step=0051857) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.146155351203287, LR: 0.0003 +[2026-03-04 02:52:01] (step=0051858) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.146351007630601, LR: 0.0003 +[2026-03-04 02:52:08] (step=0051859) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 10.146546664057915, LR: 0.0003 +[2026-03-04 02:52:16] (step=0051860) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.146742320485227, LR: 0.0003 +[2026-03-04 02:52:24] (step=0051861) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.146937976912541, LR: 0.0003 +[2026-03-04 02:52:32] (step=0051862) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.147133633339855, LR: 0.0003 +[2026-03-04 02:52:40] (step=0051863) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.14732928976717, LR: 0.0003 +[2026-03-04 02:52:48] (step=0051864) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.147524946194482, LR: 0.0003 +[2026-03-04 02:52:56] (step=0051865) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 10.147720602621796, LR: 0.0003 +[2026-03-04 02:53:03] (step=0051866) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.14791625904911, LR: 0.0003 +[2026-03-04 02:53:11] (step=0051867) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.148111915476424, LR: 0.0003 +[2026-03-04 02:53:19] (step=0051868) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.148307571903738, LR: 0.0003 +[2026-03-04 02:53:27] (step=0051869) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 10.14850322833105, LR: 0.0003 +[2026-03-04 02:53:35] (step=0051870) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.148698884758364, LR: 0.0003 +[2026-03-04 02:53:43] (step=0051871) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 10.148894541185678, LR: 0.0003 +[2026-03-04 02:53:51] (step=0051872) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.149090197612992, LR: 0.0003 +[2026-03-04 02:53:58] (step=0051873) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.149285854040306, LR: 0.0003 +[2026-03-04 02:54:06] (step=0051874) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.149481510467618, LR: 0.0003 +[2026-03-04 02:54:14] (step=0051875) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.149677166894932, LR: 0.0003 +[2026-03-04 02:54:22] (step=0051876) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.149872823322246, LR: 0.0003 +[2026-03-04 02:54:30] (step=0051877) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 10.15006847974956, LR: 0.0003 +[2026-03-04 02:54:38] (step=0051878) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.150264136176874, LR: 0.0003 +[2026-03-04 02:54:46] (step=0051879) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 10.150459792604186, LR: 0.0003 +[2026-03-04 02:54:54] (step=0051880) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.1506554490315, LR: 0.0003 +[2026-03-04 02:55:02] (step=0051881) Train Loss: 0.4503, Train Steps/Sec: 0.12, Epoch: 10.150851105458814, LR: 0.0003 +[2026-03-04 02:55:09] (step=0051882) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.151046761886128, LR: 0.0003 +[2026-03-04 02:55:17] (step=0051883) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.151242418313442, LR: 0.0003 +[2026-03-04 02:55:25] (step=0051884) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.151438074740755, LR: 0.0003 +[2026-03-04 02:55:33] (step=0051885) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.151633731168069, LR: 0.0003 +[2026-03-04 02:55:41] (step=0051886) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.151829387595383, LR: 0.0003 +[2026-03-04 02:55:49] (step=0051887) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.152025044022697, LR: 0.0003 +[2026-03-04 02:55:57] (step=0051888) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.152220700450009, LR: 0.0003 +[2026-03-04 02:56:04] (step=0051889) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.152416356877323, LR: 0.0003 +[2026-03-04 02:56:12] (step=0051890) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.152612013304637, LR: 0.0003 +[2026-03-04 02:56:20] (step=0051891) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.152807669731951, LR: 0.0003 +[2026-03-04 02:56:28] (step=0051892) Train Loss: 0.4454, Train Steps/Sec: 0.12, Epoch: 10.153003326159265, LR: 0.0003 +[2026-03-04 02:56:36] (step=0051893) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.153198982586577, LR: 0.0003 +[2026-03-04 02:56:44] (step=0051894) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.153394639013891, LR: 0.0003 +[2026-03-04 02:56:52] (step=0051895) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.153590295441205, LR: 0.0003 +[2026-03-04 02:57:00] (step=0051896) Train Loss: 0.4262, Train Steps/Sec: 0.13, Epoch: 10.15378595186852, LR: 0.0003 +[2026-03-04 02:57:07] (step=0051897) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 10.153981608295833, LR: 0.0003 +[2026-03-04 02:57:15] (step=0051898) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.154177264723145, LR: 0.0003 +[2026-03-04 02:57:23] (step=0051899) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.15437292115046, LR: 0.0003 +[2026-03-04 02:57:31] (step=0051900) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.154568577577773, LR: 0.0003 +[2026-03-04 02:57:39] (step=0051901) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 10.154764234005087, LR: 0.0003 +[2026-03-04 02:57:47] (step=0051902) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 10.154959890432401, LR: 0.0003 +[2026-03-04 02:57:55] (step=0051903) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 10.155155546859714, LR: 0.0003 +[2026-03-04 02:58:02] (step=0051904) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.155351203287028, LR: 0.0003 +[2026-03-04 02:58:10] (step=0051905) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 10.155546859714342, LR: 0.0003 +[2026-03-04 02:58:18] (step=0051906) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.155742516141656, LR: 0.0003 +[2026-03-04 02:58:26] (step=0051907) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 10.15593817256897, LR: 0.0003 +[2026-03-04 02:58:34] (step=0051908) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.156133828996282, LR: 0.0003 +[2026-03-04 02:58:42] (step=0051909) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.156329485423596, LR: 0.0003 +[2026-03-04 02:58:50] (step=0051910) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.15652514185091, LR: 0.0003 +[2026-03-04 02:58:58] (step=0051911) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.156720798278224, LR: 0.0003 +[2026-03-04 02:59:05] (step=0051912) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.156916454705538, LR: 0.0003 +[2026-03-04 02:59:13] (step=0051913) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.15711211113285, LR: 0.0003 +[2026-03-04 02:59:21] (step=0051914) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.157307767560164, LR: 0.0003 +[2026-03-04 02:59:29] (step=0051915) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 10.157503423987478, LR: 0.0003 +[2026-03-04 02:59:37] (step=0051916) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.157699080414792, LR: 0.0003 +[2026-03-04 02:59:45] (step=0051917) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.157894736842104, LR: 0.0003 +[2026-03-04 02:59:53] (step=0051918) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.158090393269418, LR: 0.0003 +[2026-03-04 03:00:00] (step=0051919) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.158286049696732, LR: 0.0003 +[2026-03-04 03:00:08] (step=0051920) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 10.158481706124046, LR: 0.0003 +[2026-03-04 03:00:16] (step=0051921) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.15867736255136, LR: 0.0003 +[2026-03-04 03:00:24] (step=0051922) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.158873018978673, LR: 0.0003 +[2026-03-04 03:00:32] (step=0051923) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 10.159068675405987, LR: 0.0003 +[2026-03-04 03:00:40] (step=0051924) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.1592643318333, LR: 0.0003 +[2026-03-04 03:00:48] (step=0051925) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.159459988260615, LR: 0.0003 +[2026-03-04 03:00:55] (step=0051926) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.159655644687929, LR: 0.0003 +[2026-03-04 03:01:03] (step=0051927) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.159851301115241, LR: 0.0003 +[2026-03-04 03:01:11] (step=0051928) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 10.160046957542555, LR: 0.0003 +[2026-03-04 03:01:19] (step=0051929) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.160242613969869, LR: 0.0003 +[2026-03-04 03:01:27] (step=0051930) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.160438270397183, LR: 0.0003 +[2026-03-04 03:01:35] (step=0051931) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.160633926824497, LR: 0.0003 +[2026-03-04 03:01:43] (step=0051932) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 10.16082958325181, LR: 0.0003 +[2026-03-04 03:01:51] (step=0051933) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.161025239679123, LR: 0.0003 +[2026-03-04 03:01:58] (step=0051934) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 10.161220896106437, LR: 0.0003 +[2026-03-04 03:02:06] (step=0051935) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.161416552533751, LR: 0.0003 +[2026-03-04 03:02:14] (step=0051936) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 10.161612208961065, LR: 0.0003 +[2026-03-04 03:02:22] (step=0051937) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.161807865388377, LR: 0.0003 +[2026-03-04 03:02:30] (step=0051938) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.162003521815691, LR: 0.0003 +[2026-03-04 03:02:38] (step=0051939) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.162199178243005, LR: 0.0003 +[2026-03-04 03:02:46] (step=0051940) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.16239483467032, LR: 0.0003 +[2026-03-04 03:02:54] (step=0051941) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.162590491097632, LR: 0.0003 +[2026-03-04 03:03:01] (step=0051942) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 10.162786147524946, LR: 0.0003 +[2026-03-04 03:03:09] (step=0051943) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 10.16298180395226, LR: 0.0003 +[2026-03-04 03:03:17] (step=0051944) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.163177460379574, LR: 0.0003 +[2026-03-04 03:03:25] (step=0051945) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 10.163373116806888, LR: 0.0003 +[2026-03-04 03:03:33] (step=0051946) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.1635687732342, LR: 0.0003 +[2026-03-04 03:03:41] (step=0051947) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.163764429661514, LR: 0.0003 +[2026-03-04 03:03:49] (step=0051948) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.163960086088828, LR: 0.0003 +[2026-03-04 03:03:57] (step=0051949) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.164155742516142, LR: 0.0003 +[2026-03-04 03:04:04] (step=0051950) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.164351398943456, LR: 0.0003 +[2026-03-04 03:04:12] (step=0051951) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.164547055370768, LR: 0.0003 +[2026-03-04 03:04:20] (step=0051952) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.164742711798082, LR: 0.0003 +[2026-03-04 03:04:28] (step=0051953) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.164938368225396, LR: 0.0003 +[2026-03-04 03:04:36] (step=0051954) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.16513402465271, LR: 0.0003 +[2026-03-04 03:04:44] (step=0051955) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.165329681080024, LR: 0.0003 +[2026-03-04 03:04:52] (step=0051956) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.165525337507336, LR: 0.0003 +[2026-03-04 03:04:59] (step=0051957) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.16572099393465, LR: 0.0003 +[2026-03-04 03:05:07] (step=0051958) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 10.165916650361964, LR: 0.0003 +[2026-03-04 03:05:15] (step=0051959) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 10.166112306789278, LR: 0.0003 +[2026-03-04 03:05:23] (step=0051960) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.166307963216592, LR: 0.0003 +[2026-03-04 03:05:31] (step=0051961) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 10.166503619643905, LR: 0.0003 +[2026-03-04 03:05:39] (step=0051962) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 10.166699276071219, LR: 0.0003 +[2026-03-04 03:05:46] (step=0051963) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 10.166894932498533, LR: 0.0003 +[2026-03-04 03:05:54] (step=0051964) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 10.167090588925847, LR: 0.0003 +[2026-03-04 03:06:02] (step=0051965) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.16728624535316, LR: 0.0003 +[2026-03-04 03:06:10] (step=0051966) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.167481901780473, LR: 0.0003 +[2026-03-04 03:06:18] (step=0051967) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.167677558207787, LR: 0.0003 +[2026-03-04 03:06:26] (step=0051968) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.167873214635101, LR: 0.0003 +[2026-03-04 03:06:34] (step=0051969) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.168068871062415, LR: 0.0003 +[2026-03-04 03:06:42] (step=0051970) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.168264527489727, LR: 0.0003 +[2026-03-04 03:06:49] (step=0051971) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.168460183917041, LR: 0.0003 +[2026-03-04 03:06:57] (step=0051972) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.168655840344355, LR: 0.0003 +[2026-03-04 03:07:05] (step=0051973) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.16885149677167, LR: 0.0003 +[2026-03-04 03:07:13] (step=0051974) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 10.169047153198983, LR: 0.0003 +[2026-03-04 03:07:21] (step=0051975) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.169242809626295, LR: 0.0003 +[2026-03-04 03:07:29] (step=0051976) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.16943846605361, LR: 0.0003 +[2026-03-04 03:07:37] (step=0051977) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.169634122480923, LR: 0.0003 +[2026-03-04 03:07:45] (step=0051978) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 10.169829778908237, LR: 0.0003 +[2026-03-04 03:07:53] (step=0051979) Train Loss: 0.4475, Train Steps/Sec: 0.12, Epoch: 10.170025435335551, LR: 0.0003 +[2026-03-04 03:08:00] (step=0051980) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.170221091762864, LR: 0.0003 +[2026-03-04 03:08:08] (step=0051981) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.170416748190178, LR: 0.0003 +[2026-03-04 03:08:16] (step=0051982) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.170612404617492, LR: 0.0003 +[2026-03-04 03:08:24] (step=0051983) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.170808061044806, LR: 0.0003 +[2026-03-04 03:08:32] (step=0051984) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.17100371747212, LR: 0.0003 +[2026-03-04 03:08:40] (step=0051985) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 10.171199373899432, LR: 0.0003 +[2026-03-04 03:08:48] (step=0051986) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.171395030326746, LR: 0.0003 +[2026-03-04 03:08:56] (step=0051987) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.17159068675406, LR: 0.0003 +[2026-03-04 03:09:03] (step=0051988) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 10.171786343181374, LR: 0.0003 +[2026-03-04 03:09:11] (step=0051989) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 10.171981999608688, LR: 0.0003 +[2026-03-04 03:09:19] (step=0051990) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.172177656036, LR: 0.0003 +[2026-03-04 03:09:27] (step=0051991) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.172373312463314, LR: 0.0003 +[2026-03-04 03:09:35] (step=0051992) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.172568968890628, LR: 0.0003 +[2026-03-04 03:09:43] (step=0051993) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.172764625317942, LR: 0.0003 +[2026-03-04 03:09:51] (step=0051994) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 10.172960281745254, LR: 0.0003 +[2026-03-04 03:09:59] (step=0051995) Train Loss: 0.4491, Train Steps/Sec: 0.12, Epoch: 10.173155938172568, LR: 0.0003 +[2026-03-04 03:10:06] (step=0051996) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.173351594599882, LR: 0.0003 +[2026-03-04 03:10:14] (step=0051997) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.173547251027196, LR: 0.0003 +[2026-03-04 03:10:22] (step=0051998) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 10.17374290745451, LR: 0.0003 +[2026-03-04 03:10:30] (step=0051999) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.173938563881823, LR: 0.0003 +[2026-03-04 03:10:38] (step=0052000) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.174134220309137, LR: 0.0003 +[2026-03-04 03:10:38] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0052000/ +[2026-03-04 03:10:46] (step=0052001) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.17432987673645, LR: 0.0003 +[2026-03-04 03:10:54] (step=0052002) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 10.174525533163765, LR: 0.0003 +[2026-03-04 03:11:02] (step=0052003) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.174721189591079, LR: 0.0003 +[2026-03-04 03:11:09] (step=0052004) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.174916846018391, LR: 0.0003 +[2026-03-04 03:11:17] (step=0052005) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.175112502445705, LR: 0.0003 +[2026-03-04 03:11:25] (step=0052006) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.175308158873019, LR: 0.0003 +[2026-03-04 03:11:33] (step=0052007) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.175503815300333, LR: 0.0003 +[2026-03-04 03:11:41] (step=0052008) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.175699471727647, LR: 0.0003 +[2026-03-04 03:11:49] (step=0052009) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 10.17589512815496, LR: 0.0003 +[2026-03-04 03:11:57] (step=0052010) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 10.176090784582273, LR: 0.0003 +[2026-03-04 03:12:04] (step=0052011) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.176286441009587, LR: 0.0003 +[2026-03-04 03:12:12] (step=0052012) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.176482097436901, LR: 0.0003 +[2026-03-04 03:12:20] (step=0052013) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.176677753864215, LR: 0.0003 +[2026-03-04 03:12:28] (step=0052014) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.176873410291527, LR: 0.0003 +[2026-03-04 03:12:36] (step=0052015) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.177069066718841, LR: 0.0003 +[2026-03-04 03:12:44] (step=0052016) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 10.177264723146155, LR: 0.0003 +[2026-03-04 03:12:52] (step=0052017) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.17746037957347, LR: 0.0003 +[2026-03-04 03:13:00] (step=0052018) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.177656036000784, LR: 0.0003 +[2026-03-04 03:13:07] (step=0052019) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 10.177851692428096, LR: 0.0003 +[2026-03-04 03:13:15] (step=0052020) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.17804734885541, LR: 0.0003 +[2026-03-04 03:13:23] (step=0052021) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.178243005282724, LR: 0.0003 +[2026-03-04 03:13:31] (step=0052022) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.178438661710038, LR: 0.0003 +[2026-03-04 03:13:39] (step=0052023) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.17863431813735, LR: 0.0003 +[2026-03-04 03:13:47] (step=0052024) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.178829974564664, LR: 0.0003 +[2026-03-04 03:13:55] (step=0052025) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.179025630991978, LR: 0.0003 +[2026-03-04 03:14:02] (step=0052026) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.179221287419292, LR: 0.0003 +[2026-03-04 03:14:10] (step=0052027) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 10.179416943846606, LR: 0.0003 +[2026-03-04 03:14:18] (step=0052028) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 10.179612600273918, LR: 0.0003 +[2026-03-04 03:14:26] (step=0052029) Train Loss: 0.4538, Train Steps/Sec: 0.12, Epoch: 10.179808256701232, LR: 0.0003 +[2026-03-04 03:14:34] (step=0052030) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 10.180003913128546, LR: 0.0003 +[2026-03-04 03:14:42] (step=0052031) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 10.18019956955586, LR: 0.0003 +[2026-03-04 03:14:50] (step=0052032) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.180395225983174, LR: 0.0003 +[2026-03-04 03:14:58] (step=0052033) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 10.180590882410486, LR: 0.0003 +[2026-03-04 03:15:05] (step=0052034) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 10.1807865388378, LR: 0.0003 +[2026-03-04 03:15:13] (step=0052035) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.180982195265114, LR: 0.0003 +[2026-03-04 03:15:21] (step=0052036) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.181177851692429, LR: 0.0003 +[2026-03-04 03:15:29] (step=0052037) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.181373508119743, LR: 0.0003 +[2026-03-04 03:15:37] (step=0052038) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.181569164547055, LR: 0.0003 +[2026-03-04 03:15:45] (step=0052039) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.181764820974369, LR: 0.0003 +[2026-03-04 03:15:53] (step=0052040) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.181960477401683, LR: 0.0003 +[2026-03-04 03:16:01] (step=0052041) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.182156133828997, LR: 0.0003 +[2026-03-04 03:16:08] (step=0052042) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.18235179025631, LR: 0.0003 +[2026-03-04 03:16:16] (step=0052043) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.182547446683623, LR: 0.0003 +[2026-03-04 03:16:24] (step=0052044) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.182743103110937, LR: 0.0003 +[2026-03-04 03:16:32] (step=0052045) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.182938759538251, LR: 0.0003 +[2026-03-04 03:16:40] (step=0052046) Train Loss: 0.4417, Train Steps/Sec: 0.12, Epoch: 10.183134415965565, LR: 0.0003 +[2026-03-04 03:16:48] (step=0052047) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.183330072392877, LR: 0.0003 +[2026-03-04 03:16:56] (step=0052048) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.183525728820191, LR: 0.0003 +[2026-03-04 03:17:04] (step=0052049) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 10.183721385247505, LR: 0.0003 +[2026-03-04 03:17:11] (step=0052050) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.18391704167482, LR: 0.0003 +[2026-03-04 03:17:19] (step=0052051) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.184112698102133, LR: 0.0003 +[2026-03-04 03:17:27] (step=0052052) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 10.184308354529445, LR: 0.0003 +[2026-03-04 03:17:35] (step=0052053) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.18450401095676, LR: 0.0003 +[2026-03-04 03:17:43] (step=0052054) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.184699667384074, LR: 0.0003 +[2026-03-04 03:17:51] (step=0052055) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.184895323811388, LR: 0.0003 +[2026-03-04 03:17:59] (step=0052056) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.185090980238702, LR: 0.0003 +[2026-03-04 03:18:07] (step=0052057) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 10.185286636666014, LR: 0.0003 +[2026-03-04 03:18:14] (step=0052058) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.185482293093328, LR: 0.0003 +[2026-03-04 03:18:22] (step=0052059) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.185677949520642, LR: 0.0003 +[2026-03-04 03:18:30] (step=0052060) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.185873605947956, LR: 0.0003 +[2026-03-04 03:18:38] (step=0052061) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 10.18606926237527, LR: 0.0003 +[2026-03-04 03:18:46] (step=0052062) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.186264918802582, LR: 0.0003 +[2026-03-04 03:18:54] (step=0052063) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.186460575229896, LR: 0.0003 +[2026-03-04 03:19:02] (step=0052064) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.18665623165721, LR: 0.0003 +[2026-03-04 03:19:09] (step=0052065) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 10.186851888084524, LR: 0.0003 +[2026-03-04 03:19:17] (step=0052066) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.187047544511838, LR: 0.0003 +[2026-03-04 03:19:25] (step=0052067) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.18724320093915, LR: 0.0003 +[2026-03-04 03:19:33] (step=0052068) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.187438857366464, LR: 0.0003 +[2026-03-04 03:19:41] (step=0052069) Train Loss: 0.4278, Train Steps/Sec: 0.13, Epoch: 10.187634513793778, LR: 0.0003 +[2026-03-04 03:19:49] (step=0052070) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.187830170221092, LR: 0.0003 +[2026-03-04 03:19:57] (step=0052071) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 10.188025826648406, LR: 0.0003 +[2026-03-04 03:20:04] (step=0052072) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 10.188221483075719, LR: 0.0003 +[2026-03-04 03:20:12] (step=0052073) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.188417139503033, LR: 0.0003 +[2026-03-04 03:20:20] (step=0052074) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 10.188612795930347, LR: 0.0003 +[2026-03-04 03:20:28] (step=0052075) Train Loss: 0.4443, Train Steps/Sec: 0.12, Epoch: 10.18880845235766, LR: 0.0003 +[2026-03-04 03:20:36] (step=0052076) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.189004108784973, LR: 0.0003 +[2026-03-04 03:20:44] (step=0052077) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.189199765212287, LR: 0.0003 +[2026-03-04 03:20:52] (step=0052078) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.1893954216396, LR: 0.0003 +[2026-03-04 03:21:00] (step=0052079) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.189591078066915, LR: 0.0003 +[2026-03-04 03:21:08] (step=0052080) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.189786734494229, LR: 0.0003 +[2026-03-04 03:21:15] (step=0052081) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.189982390921541, LR: 0.0003 +[2026-03-04 03:21:23] (step=0052082) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.190178047348855, LR: 0.0003 +[2026-03-04 03:21:31] (step=0052083) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 10.190373703776169, LR: 0.0003 +[2026-03-04 03:21:39] (step=0052084) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.190569360203483, LR: 0.0003 +[2026-03-04 03:21:47] (step=0052085) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.190765016630797, LR: 0.0003 +[2026-03-04 03:21:55] (step=0052086) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.19096067305811, LR: 0.0003 +[2026-03-04 03:22:03] (step=0052087) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 10.191156329485423, LR: 0.0003 +[2026-03-04 03:22:10] (step=0052088) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.191351985912737, LR: 0.0003 +[2026-03-04 03:22:18] (step=0052089) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.191547642340051, LR: 0.0003 +[2026-03-04 03:22:26] (step=0052090) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.191743298767365, LR: 0.0003 +[2026-03-04 03:22:34] (step=0052091) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.191938955194678, LR: 0.0003 +[2026-03-04 03:22:42] (step=0052092) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.192134611621992, LR: 0.0003 +[2026-03-04 03:22:50] (step=0052093) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 10.192330268049306, LR: 0.0003 +[2026-03-04 03:22:58] (step=0052094) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.19252592447662, LR: 0.0003 +[2026-03-04 03:23:05] (step=0052095) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.192721580903934, LR: 0.0003 +[2026-03-04 03:23:13] (step=0052096) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 10.192917237331246, LR: 0.0003 +[2026-03-04 03:23:21] (step=0052097) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.19311289375856, LR: 0.0003 +[2026-03-04 03:23:29] (step=0052098) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.193308550185874, LR: 0.0003 +[2026-03-04 03:23:37] (step=0052099) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.193504206613188, LR: 0.0003 +[2026-03-04 03:23:45] (step=0052100) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 10.1936998630405, LR: 0.0003 +[2026-03-04 03:23:53] (step=0052101) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.193895519467814, LR: 0.0003 +[2026-03-04 03:24:01] (step=0052102) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.194091175895128, LR: 0.0003 +[2026-03-04 03:24:08] (step=0052103) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 10.194286832322442, LR: 0.0003 +[2026-03-04 03:24:16] (step=0052104) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 10.194482488749756, LR: 0.0003 +[2026-03-04 03:24:24] (step=0052105) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.194678145177068, LR: 0.0003 +[2026-03-04 03:24:32] (step=0052106) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.194873801604382, LR: 0.0003 +[2026-03-04 03:24:40] (step=0052107) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.195069458031696, LR: 0.0003 +[2026-03-04 03:24:48] (step=0052108) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 10.19526511445901, LR: 0.0003 +[2026-03-04 03:24:56] (step=0052109) Train Loss: 0.4257, Train Steps/Sec: 0.13, Epoch: 10.195460770886324, LR: 0.0003 +[2026-03-04 03:25:04] (step=0052110) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.195656427313637, LR: 0.0003 +[2026-03-04 03:25:11] (step=0052111) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.19585208374095, LR: 0.0003 +[2026-03-04 03:25:19] (step=0052112) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.196047740168265, LR: 0.0003 +[2026-03-04 03:25:27] (step=0052113) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.196243396595579, LR: 0.0003 +[2026-03-04 03:25:35] (step=0052114) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.196439053022893, LR: 0.0003 +[2026-03-04 03:25:43] (step=0052115) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 10.196634709450205, LR: 0.0003 +[2026-03-04 03:25:51] (step=0052116) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 10.196830365877519, LR: 0.0003 +[2026-03-04 03:25:59] (step=0052117) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.197026022304833, LR: 0.0003 +[2026-03-04 03:26:06] (step=0052118) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.197221678732147, LR: 0.0003 +[2026-03-04 03:26:14] (step=0052119) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.19741733515946, LR: 0.0003 +[2026-03-04 03:26:22] (step=0052120) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.197612991586773, LR: 0.0003 +[2026-03-04 03:26:30] (step=0052121) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.197808648014087, LR: 0.0003 +[2026-03-04 03:26:38] (step=0052122) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.198004304441401, LR: 0.0003 +[2026-03-04 03:26:46] (step=0052123) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.198199960868715, LR: 0.0003 +[2026-03-04 03:26:54] (step=0052124) Train Loss: 0.4441, Train Steps/Sec: 0.12, Epoch: 10.198395617296029, LR: 0.0003 +[2026-03-04 03:27:02] (step=0052125) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.198591273723341, LR: 0.0003 +[2026-03-04 03:27:10] (step=0052126) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.198786930150655, LR: 0.0003 +[2026-03-04 03:27:17] (step=0052127) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.19898258657797, LR: 0.0003 +[2026-03-04 03:27:25] (step=0052128) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.199178243005283, LR: 0.0003 +[2026-03-04 03:27:33] (step=0052129) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.199373899432596, LR: 0.0003 +[2026-03-04 03:27:41] (step=0052130) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 10.19956955585991, LR: 0.0003 +[2026-03-04 03:27:49] (step=0052131) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.199765212287224, LR: 0.0003 +[2026-03-04 03:27:57] (step=0052132) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.199960868714538, LR: 0.0003 +[2026-03-04 03:28:05] (step=0052133) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 10.200156525141852, LR: 0.0003 +[2026-03-04 03:28:12] (step=0052134) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.200352181569164, LR: 0.0003 +[2026-03-04 03:28:20] (step=0052135) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 10.200547837996478, LR: 0.0003 +[2026-03-04 03:28:28] (step=0052136) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 10.200743494423792, LR: 0.0003 +[2026-03-04 03:28:36] (step=0052137) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.200939150851106, LR: 0.0003 +[2026-03-04 03:28:44] (step=0052138) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 10.20113480727842, LR: 0.0003 +[2026-03-04 03:28:52] (step=0052139) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.201330463705732, LR: 0.0003 +[2026-03-04 03:29:00] (step=0052140) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.201526120133046, LR: 0.0003 +[2026-03-04 03:29:07] (step=0052141) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.20172177656036, LR: 0.0003 +[2026-03-04 03:29:15] (step=0052142) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.201917432987674, LR: 0.0003 +[2026-03-04 03:29:23] (step=0052143) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 10.202113089414988, LR: 0.0003 +[2026-03-04 03:29:31] (step=0052144) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.2023087458423, LR: 0.0003 +[2026-03-04 03:29:39] (step=0052145) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.202504402269614, LR: 0.0003 +[2026-03-04 03:29:47] (step=0052146) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.202700058696928, LR: 0.0003 +[2026-03-04 03:29:55] (step=0052147) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.202895715124242, LR: 0.0003 +[2026-03-04 03:30:03] (step=0052148) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.203091371551556, LR: 0.0003 +[2026-03-04 03:30:11] (step=0052149) Train Loss: 0.4462, Train Steps/Sec: 0.12, Epoch: 10.203287027978869, LR: 0.0003 +[2026-03-04 03:30:18] (step=0052150) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 10.203482684406183, LR: 0.0003 +[2026-03-04 03:30:26] (step=0052151) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.203678340833497, LR: 0.0003 +[2026-03-04 03:30:34] (step=0052152) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.20387399726081, LR: 0.0003 +[2026-03-04 03:30:42] (step=0052153) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 10.204069653688123, LR: 0.0003 +[2026-03-04 03:30:50] (step=0052154) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.204265310115437, LR: 0.0003 +[2026-03-04 03:30:58] (step=0052155) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 10.20446096654275, LR: 0.0003 +[2026-03-04 03:31:06] (step=0052156) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.204656622970065, LR: 0.0003 +[2026-03-04 03:31:13] (step=0052157) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 10.204852279397379, LR: 0.0003 +[2026-03-04 03:31:21] (step=0052158) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.205047935824691, LR: 0.0003 +[2026-03-04 03:31:29] (step=0052159) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.205243592252005, LR: 0.0003 +[2026-03-04 03:31:37] (step=0052160) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.205439248679319, LR: 0.0003 +[2026-03-04 03:31:45] (step=0052161) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.205634905106633, LR: 0.0003 +[2026-03-04 03:31:53] (step=0052162) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.205830561533947, LR: 0.0003 +[2026-03-04 03:32:01] (step=0052163) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.20602621796126, LR: 0.0003 +[2026-03-04 03:32:09] (step=0052164) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.206221874388573, LR: 0.0003 +[2026-03-04 03:32:16] (step=0052165) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.206417530815887, LR: 0.0003 +[2026-03-04 03:32:24] (step=0052166) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 10.206613187243201, LR: 0.0003 +[2026-03-04 03:32:32] (step=0052167) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.206808843670515, LR: 0.0003 +[2026-03-04 03:32:40] (step=0052168) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.207004500097828, LR: 0.0003 +[2026-03-04 03:32:48] (step=0052169) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.207200156525142, LR: 0.0003 +[2026-03-04 03:32:56] (step=0052170) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.207395812952456, LR: 0.0003 +[2026-03-04 03:33:04] (step=0052171) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 10.20759146937977, LR: 0.0003 +[2026-03-04 03:33:12] (step=0052172) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.207787125807084, LR: 0.0003 +[2026-03-04 03:33:19] (step=0052173) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.207982782234396, LR: 0.0003 +[2026-03-04 03:33:27] (step=0052174) Train Loss: 0.4572, Train Steps/Sec: 0.12, Epoch: 10.20817843866171, LR: 0.0003 +[2026-03-04 03:33:35] (step=0052175) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.208374095089024, LR: 0.0003 +[2026-03-04 03:33:43] (step=0052176) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.208569751516338, LR: 0.0003 +[2026-03-04 03:33:51] (step=0052177) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 10.20876540794365, LR: 0.0003 +[2026-03-04 03:33:59] (step=0052178) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 10.208961064370964, LR: 0.0003 +[2026-03-04 03:34:07] (step=0052179) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 10.209156720798278, LR: 0.0003 +[2026-03-04 03:34:15] (step=0052180) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.209352377225592, LR: 0.0003 +[2026-03-04 03:34:22] (step=0052181) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.209548033652906, LR: 0.0003 +[2026-03-04 03:34:30] (step=0052182) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.209743690080218, LR: 0.0003 +[2026-03-04 03:34:38] (step=0052183) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 10.209939346507532, LR: 0.0003 +[2026-03-04 03:34:46] (step=0052184) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.210135002934846, LR: 0.0003 +[2026-03-04 03:34:54] (step=0052185) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.21033065936216, LR: 0.0003 +[2026-03-04 03:35:02] (step=0052186) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 10.210526315789474, LR: 0.0003 +[2026-03-04 03:35:10] (step=0052187) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.210721972216787, LR: 0.0003 +[2026-03-04 03:35:18] (step=0052188) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.2109176286441, LR: 0.0003 +[2026-03-04 03:35:25] (step=0052189) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.211113285071415, LR: 0.0003 +[2026-03-04 03:35:33] (step=0052190) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.211308941498729, LR: 0.0003 +[2026-03-04 03:35:41] (step=0052191) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.211504597926043, LR: 0.0003 +[2026-03-04 03:35:49] (step=0052192) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.211700254353355, LR: 0.0003 +[2026-03-04 03:35:57] (step=0052193) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 10.211895910780669, LR: 0.0003 +[2026-03-04 03:36:05] (step=0052194) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.212091567207983, LR: 0.0003 +[2026-03-04 03:36:13] (step=0052195) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.212287223635297, LR: 0.0003 +[2026-03-04 03:36:20] (step=0052196) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.21248288006261, LR: 0.0003 +[2026-03-04 03:36:28] (step=0052197) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.212678536489923, LR: 0.0003 +[2026-03-04 03:36:36] (step=0052198) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 10.212874192917237, LR: 0.0003 +[2026-03-04 03:36:44] (step=0052199) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.213069849344551, LR: 0.0003 +[2026-03-04 03:36:52] (step=0052200) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 10.213265505771865, LR: 0.0003 +[2026-03-04 03:37:00] (step=0052201) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.213461162199179, LR: 0.0003 +[2026-03-04 03:37:08] (step=0052202) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.213656818626491, LR: 0.0003 +[2026-03-04 03:37:16] (step=0052203) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.213852475053805, LR: 0.0003 +[2026-03-04 03:37:23] (step=0052204) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.21404813148112, LR: 0.0003 +[2026-03-04 03:37:31] (step=0052205) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.214243787908433, LR: 0.0003 +[2026-03-04 03:37:39] (step=0052206) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.214439444335746, LR: 0.0003 +[2026-03-04 03:37:47] (step=0052207) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.21463510076306, LR: 0.0003 +[2026-03-04 03:37:55] (step=0052208) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 10.214830757190374, LR: 0.0003 +[2026-03-04 03:38:03] (step=0052209) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.215026413617688, LR: 0.0003 +[2026-03-04 03:38:10] (step=0052210) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 10.215222070045002, LR: 0.0003 +[2026-03-04 03:38:18] (step=0052211) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.215417726472314, LR: 0.0003 +[2026-03-04 03:38:26] (step=0052212) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.215613382899628, LR: 0.0003 +[2026-03-04 03:38:34] (step=0052213) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.215809039326942, LR: 0.0003 +[2026-03-04 03:38:42] (step=0052214) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.216004695754256, LR: 0.0003 +[2026-03-04 03:38:50] (step=0052215) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 10.21620035218157, LR: 0.0003 +[2026-03-04 03:38:58] (step=0052216) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 10.216396008608882, LR: 0.0003 +[2026-03-04 03:39:06] (step=0052217) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 10.216591665036196, LR: 0.0003 +[2026-03-04 03:39:13] (step=0052218) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.21678732146351, LR: 0.0003 +[2026-03-04 03:39:21] (step=0052219) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.216982977890824, LR: 0.0003 +[2026-03-04 03:39:29] (step=0052220) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.217178634318138, LR: 0.0003 +[2026-03-04 03:39:37] (step=0052221) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.21737429074545, LR: 0.0003 +[2026-03-04 03:39:45] (step=0052222) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 10.217569947172764, LR: 0.0003 +[2026-03-04 03:39:53] (step=0052223) Train Loss: 0.4309, Train Steps/Sec: 0.12, Epoch: 10.217765603600078, LR: 0.0003 +[2026-03-04 03:40:01] (step=0052224) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.217961260027392, LR: 0.0003 +[2026-03-04 03:40:09] (step=0052225) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.218156916454706, LR: 0.0003 +[2026-03-04 03:40:17] (step=0052226) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.218352572882019, LR: 0.0003 +[2026-03-04 03:40:24] (step=0052227) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.218548229309333, LR: 0.0003 +[2026-03-04 03:40:32] (step=0052228) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.218743885736647, LR: 0.0003 +[2026-03-04 03:40:40] (step=0052229) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.21893954216396, LR: 0.0003 +[2026-03-04 03:40:48] (step=0052230) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.219135198591273, LR: 0.0003 +[2026-03-04 03:40:56] (step=0052231) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.219330855018587, LR: 0.0003 +[2026-03-04 03:41:04] (step=0052232) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.2195265114459, LR: 0.0003 +[2026-03-04 03:41:12] (step=0052233) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.219722167873215, LR: 0.0003 +[2026-03-04 03:41:19] (step=0052234) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.219917824300529, LR: 0.0003 +[2026-03-04 03:41:27] (step=0052235) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.220113480727841, LR: 0.0003 +[2026-03-04 03:41:35] (step=0052236) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.220309137155155, LR: 0.0003 +[2026-03-04 03:41:43] (step=0052237) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.220504793582469, LR: 0.0003 +[2026-03-04 03:41:51] (step=0052238) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 10.220700450009783, LR: 0.0003 +[2026-03-04 03:41:59] (step=0052239) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.220896106437097, LR: 0.0003 +[2026-03-04 03:42:07] (step=0052240) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 10.22109176286441, LR: 0.0003 +[2026-03-04 03:42:14] (step=0052241) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.221287419291723, LR: 0.0003 +[2026-03-04 03:42:22] (step=0052242) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.221483075719037, LR: 0.0003 +[2026-03-04 03:42:30] (step=0052243) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.221678732146351, LR: 0.0003 +[2026-03-04 03:42:38] (step=0052244) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.221874388573665, LR: 0.0003 +[2026-03-04 03:42:46] (step=0052245) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.222070045000978, LR: 0.0003 +[2026-03-04 03:42:54] (step=0052246) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.222265701428292, LR: 0.0003 +[2026-03-04 03:43:02] (step=0052247) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.222461357855606, LR: 0.0003 +[2026-03-04 03:43:10] (step=0052248) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.22265701428292, LR: 0.0003 +[2026-03-04 03:43:18] (step=0052249) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.222852670710234, LR: 0.0003 +[2026-03-04 03:43:25] (step=0052250) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.223048327137546, LR: 0.0003 +[2026-03-04 03:43:33] (step=0052251) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.22324398356486, LR: 0.0003 +[2026-03-04 03:43:41] (step=0052252) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 10.223439639992174, LR: 0.0003 +[2026-03-04 03:43:49] (step=0052253) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 10.223635296419488, LR: 0.0003 +[2026-03-04 03:43:57] (step=0052254) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.223830952846802, LR: 0.0003 +[2026-03-04 03:44:05] (step=0052255) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.224026609274114, LR: 0.0003 +[2026-03-04 03:44:13] (step=0052256) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.224222265701428, LR: 0.0003 +[2026-03-04 03:44:20] (step=0052257) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 10.224417922128742, LR: 0.0003 +[2026-03-04 03:44:28] (step=0052258) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.224613578556056, LR: 0.0003 +[2026-03-04 03:44:36] (step=0052259) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.224809234983368, LR: 0.0003 +[2026-03-04 03:44:44] (step=0052260) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.225004891410682, LR: 0.0003 +[2026-03-04 03:44:52] (step=0052261) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.225200547837996, LR: 0.0003 +[2026-03-04 03:45:00] (step=0052262) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.22539620426531, LR: 0.0003 +[2026-03-04 03:45:08] (step=0052263) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 10.225591860692624, LR: 0.0003 +[2026-03-04 03:45:15] (step=0052264) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.225787517119937, LR: 0.0003 +[2026-03-04 03:45:23] (step=0052265) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.22598317354725, LR: 0.0003 +[2026-03-04 03:45:31] (step=0052266) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.226178829974565, LR: 0.0003 +[2026-03-04 03:45:39] (step=0052267) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.226374486401879, LR: 0.0003 +[2026-03-04 03:45:47] (step=0052268) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 10.226570142829193, LR: 0.0003 +[2026-03-04 03:45:55] (step=0052269) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 10.226765799256505, LR: 0.0003 +[2026-03-04 03:46:03] (step=0052270) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.226961455683819, LR: 0.0003 +[2026-03-04 03:46:10] (step=0052271) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.227157112111133, LR: 0.0003 +[2026-03-04 03:46:18] (step=0052272) Train Loss: 0.4453, Train Steps/Sec: 0.12, Epoch: 10.227352768538447, LR: 0.0003 +[2026-03-04 03:46:26] (step=0052273) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.22754842496576, LR: 0.0003 +[2026-03-04 03:46:34] (step=0052274) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.227744081393073, LR: 0.0003 +[2026-03-04 03:46:42] (step=0052275) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 10.227939737820387, LR: 0.0003 +[2026-03-04 03:46:50] (step=0052276) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 10.228135394247701, LR: 0.0003 +[2026-03-04 03:46:58] (step=0052277) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.228331050675015, LR: 0.0003 +[2026-03-04 03:47:06] (step=0052278) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.22852670710233, LR: 0.0003 +[2026-03-04 03:47:14] (step=0052279) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.228722363529641, LR: 0.0003 +[2026-03-04 03:47:21] (step=0052280) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.228918019956955, LR: 0.0003 +[2026-03-04 03:47:29] (step=0052281) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.22911367638427, LR: 0.0003 +[2026-03-04 03:47:37] (step=0052282) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.229309332811583, LR: 0.0003 +[2026-03-04 03:47:45] (step=0052283) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.229504989238896, LR: 0.0003 +[2026-03-04 03:47:53] (step=0052284) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 10.22970064566621, LR: 0.0003 +[2026-03-04 03:48:01] (step=0052285) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 10.229896302093524, LR: 0.0003 +[2026-03-04 03:48:09] (step=0052286) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.230091958520838, LR: 0.0003 +[2026-03-04 03:48:16] (step=0052287) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.230287614948152, LR: 0.0003 +[2026-03-04 03:48:24] (step=0052288) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.230483271375464, LR: 0.0003 +[2026-03-04 03:48:32] (step=0052289) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 10.230678927802778, LR: 0.0003 +[2026-03-04 03:48:40] (step=0052290) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.230874584230092, LR: 0.0003 +[2026-03-04 03:48:48] (step=0052291) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.231070240657406, LR: 0.0003 +[2026-03-04 03:48:56] (step=0052292) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 10.23126589708472, LR: 0.0003 +[2026-03-04 03:49:04] (step=0052293) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.231461553512032, LR: 0.0003 +[2026-03-04 03:49:12] (step=0052294) Train Loss: 0.4674, Train Steps/Sec: 0.13, Epoch: 10.231657209939346, LR: 0.0003 +[2026-03-04 03:49:19] (step=0052295) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.23185286636666, LR: 0.0003 +[2026-03-04 03:49:27] (step=0052296) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.232048522793974, LR: 0.0003 +[2026-03-04 03:49:35] (step=0052297) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.232244179221288, LR: 0.0003 +[2026-03-04 03:49:43] (step=0052298) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.2324398356486, LR: 0.0003 +[2026-03-04 03:49:51] (step=0052299) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.232635492075914, LR: 0.0003 +[2026-03-04 03:49:59] (step=0052300) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.232831148503228, LR: 0.0003 +[2026-03-04 03:50:07] (step=0052301) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.233026804930542, LR: 0.0003 +[2026-03-04 03:50:14] (step=0052302) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 10.233222461357856, LR: 0.0003 +[2026-03-04 03:50:22] (step=0052303) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.233418117785169, LR: 0.0003 +[2026-03-04 03:50:30] (step=0052304) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 10.233613774212483, LR: 0.0003 +[2026-03-04 03:50:38] (step=0052305) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.233809430639797, LR: 0.0003 +[2026-03-04 03:50:46] (step=0052306) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.23400508706711, LR: 0.0003 +[2026-03-04 03:50:54] (step=0052307) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 10.234200743494425, LR: 0.0003 +[2026-03-04 03:51:02] (step=0052308) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.234396399921737, LR: 0.0003 +[2026-03-04 03:51:09] (step=0052309) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 10.234592056349051, LR: 0.0003 +[2026-03-04 03:51:17] (step=0052310) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.234787712776365, LR: 0.0003 +[2026-03-04 03:51:25] (step=0052311) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.234983369203679, LR: 0.0003 +[2026-03-04 03:51:33] (step=0052312) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.235179025630991, LR: 0.0003 +[2026-03-04 03:51:41] (step=0052313) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.235374682058305, LR: 0.0003 +[2026-03-04 03:51:49] (step=0052314) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.23557033848562, LR: 0.0003 +[2026-03-04 03:51:57] (step=0052315) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.235765994912933, LR: 0.0003 +[2026-03-04 03:52:04] (step=0052316) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.235961651340247, LR: 0.0003 +[2026-03-04 03:52:12] (step=0052317) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 10.23615730776756, LR: 0.0003 +[2026-03-04 03:52:20] (step=0052318) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.236352964194873, LR: 0.0003 +[2026-03-04 03:52:28] (step=0052319) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.236548620622187, LR: 0.0003 +[2026-03-04 03:52:36] (step=0052320) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.236744277049501, LR: 0.0003 +[2026-03-04 03:52:44] (step=0052321) Train Loss: 0.4400, Train Steps/Sec: 0.12, Epoch: 10.236939933476815, LR: 0.0003 +[2026-03-04 03:52:52] (step=0052322) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 10.237135589904128, LR: 0.0003 +[2026-03-04 03:53:00] (step=0052323) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 10.237331246331442, LR: 0.0003 +[2026-03-04 03:53:07] (step=0052324) Train Loss: 0.4276, Train Steps/Sec: 0.13, Epoch: 10.237526902758756, LR: 0.0003 +[2026-03-04 03:53:15] (step=0052325) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 10.23772255918607, LR: 0.0003 +[2026-03-04 03:53:23] (step=0052326) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.237918215613384, LR: 0.0003 +[2026-03-04 03:53:31] (step=0052327) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.238113872040696, LR: 0.0003 +[2026-03-04 03:53:39] (step=0052328) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 10.23830952846801, LR: 0.0003 +[2026-03-04 03:53:47] (step=0052329) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 10.238505184895324, LR: 0.0003 +[2026-03-04 03:53:55] (step=0052330) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.238700841322638, LR: 0.0003 +[2026-03-04 03:54:03] (step=0052331) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 10.238896497749952, LR: 0.0003 +[2026-03-04 03:54:10] (step=0052332) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.239092154177264, LR: 0.0003 +[2026-03-04 03:54:18] (step=0052333) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.239287810604578, LR: 0.0003 +[2026-03-04 03:54:26] (step=0052334) Train Loss: 0.4435, Train Steps/Sec: 0.12, Epoch: 10.239483467031892, LR: 0.0003 +[2026-03-04 03:54:34] (step=0052335) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.239679123459206, LR: 0.0003 +[2026-03-04 03:54:42] (step=0052336) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.239874779886518, LR: 0.0003 +[2026-03-04 03:54:50] (step=0052337) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.240070436313832, LR: 0.0003 +[2026-03-04 03:54:58] (step=0052338) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 10.240266092741146, LR: 0.0003 +[2026-03-04 03:55:06] (step=0052339) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.24046174916846, LR: 0.0003 +[2026-03-04 03:55:14] (step=0052340) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 10.240657405595774, LR: 0.0003 +[2026-03-04 03:55:21] (step=0052341) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.240853062023087, LR: 0.0003 +[2026-03-04 03:55:29] (step=0052342) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.2410487184504, LR: 0.0003 +[2026-03-04 03:55:37] (step=0052343) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.241244374877715, LR: 0.0003 +[2026-03-04 03:55:45] (step=0052344) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 10.241440031305029, LR: 0.0003 +[2026-03-04 03:55:53] (step=0052345) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 10.241635687732343, LR: 0.0003 +[2026-03-04 03:56:01] (step=0052346) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 10.241831344159655, LR: 0.0003 +[2026-03-04 03:56:09] (step=0052347) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.242027000586969, LR: 0.0003 +[2026-03-04 03:56:16] (step=0052348) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 10.242222657014283, LR: 0.0003 +[2026-03-04 03:56:24] (step=0052349) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 10.242418313441597, LR: 0.0003 +[2026-03-04 03:56:32] (step=0052350) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.242613969868911, LR: 0.0003 +[2026-03-04 03:56:40] (step=0052351) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.242809626296223, LR: 0.0003 +[2026-03-04 03:56:48] (step=0052352) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 10.243005282723537, LR: 0.0003 +[2026-03-04 03:56:56] (step=0052353) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.243200939150851, LR: 0.0003 +[2026-03-04 03:57:04] (step=0052354) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.243396595578165, LR: 0.0003 +[2026-03-04 03:57:11] (step=0052355) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 10.24359225200548, LR: 0.0003 +[2026-03-04 03:57:19] (step=0052356) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 10.243787908432791, LR: 0.0003 +[2026-03-04 03:57:27] (step=0052357) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.243983564860105, LR: 0.0003 +[2026-03-04 03:57:35] (step=0052358) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.24417922128742, LR: 0.0003 +[2026-03-04 03:57:43] (step=0052359) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.244374877714733, LR: 0.0003 +[2026-03-04 03:57:51] (step=0052360) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.244570534142047, LR: 0.0003 +[2026-03-04 03:57:59] (step=0052361) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.24476619056936, LR: 0.0003 +[2026-03-04 03:58:07] (step=0052362) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.244961846996674, LR: 0.0003 +[2026-03-04 03:58:14] (step=0052363) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.245157503423988, LR: 0.0003 +[2026-03-04 03:58:22] (step=0052364) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 10.245353159851302, LR: 0.0003 +[2026-03-04 03:58:30] (step=0052365) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.245548816278614, LR: 0.0003 +[2026-03-04 03:58:38] (step=0052366) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.245744472705928, LR: 0.0003 +[2026-03-04 03:58:46] (step=0052367) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.245940129133242, LR: 0.0003 +[2026-03-04 03:58:54] (step=0052368) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 10.246135785560556, LR: 0.0003 +[2026-03-04 03:59:02] (step=0052369) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.24633144198787, LR: 0.0003 +[2026-03-04 03:59:10] (step=0052370) Train Loss: 0.4427, Train Steps/Sec: 0.12, Epoch: 10.246527098415182, LR: 0.0003 +[2026-03-04 03:59:17] (step=0052371) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 10.246722754842496, LR: 0.0003 +[2026-03-04 03:59:25] (step=0052372) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 10.24691841126981, LR: 0.0003 +[2026-03-04 03:59:33] (step=0052373) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.247114067697124, LR: 0.0003 +[2026-03-04 03:59:41] (step=0052374) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.247309724124438, LR: 0.0003 +[2026-03-04 03:59:49] (step=0052375) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.24750538055175, LR: 0.0003 +[2026-03-04 03:59:57] (step=0052376) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 10.247701036979064, LR: 0.0003 +[2026-03-04 04:00:05] (step=0052377) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.247896693406378, LR: 0.0003 +[2026-03-04 04:00:13] (step=0052378) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 10.248092349833692, LR: 0.0003 +[2026-03-04 04:00:20] (step=0052379) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.248288006261006, LR: 0.0003 +[2026-03-04 04:00:28] (step=0052380) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.248483662688319, LR: 0.0003 +[2026-03-04 04:00:36] (step=0052381) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.248679319115633, LR: 0.0003 +[2026-03-04 04:00:44] (step=0052382) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.248874975542947, LR: 0.0003 +[2026-03-04 04:00:52] (step=0052383) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.24907063197026, LR: 0.0003 +[2026-03-04 04:01:00] (step=0052384) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.249266288397575, LR: 0.0003 +[2026-03-04 04:01:08] (step=0052385) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.249461944824887, LR: 0.0003 +[2026-03-04 04:01:16] (step=0052386) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.249657601252201, LR: 0.0003 +[2026-03-04 04:01:23] (step=0052387) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.249853257679515, LR: 0.0003 +[2026-03-04 04:01:31] (step=0052388) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.250048914106829, LR: 0.0003 +[2026-03-04 04:01:39] (step=0052389) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.250244570534141, LR: 0.0003 +[2026-03-04 04:01:47] (step=0052390) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.250440226961455, LR: 0.0003 +[2026-03-04 04:01:55] (step=0052391) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.25063588338877, LR: 0.0003 +[2026-03-04 04:02:03] (step=0052392) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 10.250831539816083, LR: 0.0003 +[2026-03-04 04:02:11] (step=0052393) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.251027196243397, LR: 0.0003 +[2026-03-04 04:02:19] (step=0052394) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.25122285267071, LR: 0.0003 +[2026-03-04 04:02:26] (step=0052395) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.251418509098023, LR: 0.0003 +[2026-03-04 04:02:34] (step=0052396) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 10.251614165525337, LR: 0.0003 +[2026-03-04 04:02:42] (step=0052397) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.251809821952651, LR: 0.0003 +[2026-03-04 04:02:50] (step=0052398) Train Loss: 0.4256, Train Steps/Sec: 0.13, Epoch: 10.252005478379965, LR: 0.0003 +[2026-03-04 04:02:58] (step=0052399) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 10.252201134807278, LR: 0.0003 +[2026-03-04 04:03:06] (step=0052400) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.252396791234592, LR: 0.0003 +[2026-03-04 04:03:14] (step=0052401) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.252592447661906, LR: 0.0003 +[2026-03-04 04:03:21] (step=0052402) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 10.25278810408922, LR: 0.0003 +[2026-03-04 04:03:29] (step=0052403) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.252983760516534, LR: 0.0003 +[2026-03-04 04:03:37] (step=0052404) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.253179416943846, LR: 0.0003 +[2026-03-04 04:03:45] (step=0052405) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.25337507337116, LR: 0.0003 +[2026-03-04 04:03:53] (step=0052406) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.253570729798474, LR: 0.0003 +[2026-03-04 04:04:01] (step=0052407) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.253766386225788, LR: 0.0003 +[2026-03-04 04:04:09] (step=0052408) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.253962042653102, LR: 0.0003 +[2026-03-04 04:04:17] (step=0052409) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.254157699080414, LR: 0.0003 +[2026-03-04 04:04:24] (step=0052410) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.254353355507728, LR: 0.0003 +[2026-03-04 04:04:32] (step=0052411) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.254549011935042, LR: 0.0003 +[2026-03-04 04:04:40] (step=0052412) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.254744668362356, LR: 0.0003 +[2026-03-04 04:04:48] (step=0052413) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.25494032478967, LR: 0.0003 +[2026-03-04 04:04:56] (step=0052414) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.255135981216982, LR: 0.0003 +[2026-03-04 04:05:04] (step=0052415) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.255331637644296, LR: 0.0003 +[2026-03-04 04:05:12] (step=0052416) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.25552729407161, LR: 0.0003 +[2026-03-04 04:05:19] (step=0052417) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.255722950498924, LR: 0.0003 +[2026-03-04 04:05:27] (step=0052418) Train Loss: 0.4369, Train Steps/Sec: 0.12, Epoch: 10.255918606926237, LR: 0.0003 +[2026-03-04 04:05:35] (step=0052419) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 10.25611426335355, LR: 0.0003 +[2026-03-04 04:05:43] (step=0052420) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 10.256309919780865, LR: 0.0003 +[2026-03-04 04:05:51] (step=0052421) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.256505576208179, LR: 0.0003 +[2026-03-04 04:05:59] (step=0052422) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 10.256701232635493, LR: 0.0003 +[2026-03-04 04:06:07] (step=0052423) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.256896889062805, LR: 0.0003 +[2026-03-04 04:06:15] (step=0052424) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 10.257092545490119, LR: 0.0003 +[2026-03-04 04:06:23] (step=0052425) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 10.257288201917433, LR: 0.0003 +[2026-03-04 04:06:30] (step=0052426) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 10.257483858344747, LR: 0.0003 +[2026-03-04 04:06:38] (step=0052427) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.257679514772061, LR: 0.0003 +[2026-03-04 04:06:46] (step=0052428) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.257875171199373, LR: 0.0003 +[2026-03-04 04:06:54] (step=0052429) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 10.258070827626687, LR: 0.0003 +[2026-03-04 04:07:02] (step=0052430) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 10.258266484054001, LR: 0.0003 +[2026-03-04 04:07:10] (step=0052431) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 10.258462140481315, LR: 0.0003 +[2026-03-04 04:07:18] (step=0052432) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.25865779690863, LR: 0.0003 +[2026-03-04 04:07:25] (step=0052433) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.258853453335941, LR: 0.0003 +[2026-03-04 04:07:33] (step=0052434) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 10.259049109763255, LR: 0.0003 +[2026-03-04 04:07:41] (step=0052435) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.25924476619057, LR: 0.0003 +[2026-03-04 04:07:49] (step=0052436) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 10.259440422617883, LR: 0.0003 +[2026-03-04 04:07:57] (step=0052437) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.259636079045197, LR: 0.0003 +[2026-03-04 04:08:05] (step=0052438) Train Loss: 0.4380, Train Steps/Sec: 0.12, Epoch: 10.25983173547251, LR: 0.0003 +[2026-03-04 04:08:13] (step=0052439) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.260027391899824, LR: 0.0003 +[2026-03-04 04:08:21] (step=0052440) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.260223048327138, LR: 0.0003 +[2026-03-04 04:08:29] (step=0052441) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.260418704754452, LR: 0.0003 +[2026-03-04 04:08:36] (step=0052442) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.260614361181764, LR: 0.0003 +[2026-03-04 04:08:44] (step=0052443) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.260810017609078, LR: 0.0003 +[2026-03-04 04:08:52] (step=0052444) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.261005674036392, LR: 0.0003 +[2026-03-04 04:09:00] (step=0052445) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.261201330463706, LR: 0.0003 +[2026-03-04 04:09:08] (step=0052446) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 10.26139698689102, LR: 0.0003 +[2026-03-04 04:09:16] (step=0052447) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.261592643318332, LR: 0.0003 +[2026-03-04 04:09:24] (step=0052448) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 10.261788299745646, LR: 0.0003 +[2026-03-04 04:09:31] (step=0052449) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.26198395617296, LR: 0.0003 +[2026-03-04 04:09:39] (step=0052450) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.262179612600274, LR: 0.0003 +[2026-03-04 04:09:47] (step=0052451) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.262375269027588, LR: 0.0003 +[2026-03-04 04:09:55] (step=0052452) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.2625709254549, LR: 0.0003 +[2026-03-04 04:10:03] (step=0052453) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.262766581882214, LR: 0.0003 +[2026-03-04 04:10:11] (step=0052454) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.262962238309528, LR: 0.0003 +[2026-03-04 04:10:19] (step=0052455) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 10.263157894736842, LR: 0.0003 +[2026-03-04 04:10:27] (step=0052456) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.263353551164156, LR: 0.0003 +[2026-03-04 04:10:34] (step=0052457) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.263549207591469, LR: 0.0003 +[2026-03-04 04:10:42] (step=0052458) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.263744864018783, LR: 0.0003 +[2026-03-04 04:10:50] (step=0052459) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 10.263940520446097, LR: 0.0003 +[2026-03-04 04:10:58] (step=0052460) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 10.26413617687341, LR: 0.0003 +[2026-03-04 04:11:06] (step=0052461) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.264331833300725, LR: 0.0003 +[2026-03-04 04:11:14] (step=0052462) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.264527489728037, LR: 0.0003 +[2026-03-04 04:11:22] (step=0052463) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.264723146155351, LR: 0.0003 +[2026-03-04 04:11:30] (step=0052464) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.264918802582665, LR: 0.0003 +[2026-03-04 04:11:37] (step=0052465) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 10.265114459009979, LR: 0.0003 +[2026-03-04 04:11:45] (step=0052466) Train Loss: 0.4241, Train Steps/Sec: 0.13, Epoch: 10.265310115437293, LR: 0.0003 +[2026-03-04 04:11:53] (step=0052467) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.265505771864605, LR: 0.0003 +[2026-03-04 04:12:01] (step=0052468) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.26570142829192, LR: 0.0003 +[2026-03-04 04:12:09] (step=0052469) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 10.265897084719233, LR: 0.0003 +[2026-03-04 04:12:17] (step=0052470) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.266092741146547, LR: 0.0003 +[2026-03-04 04:12:25] (step=0052471) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.26628839757386, LR: 0.0003 +[2026-03-04 04:12:32] (step=0052472) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 10.266484054001173, LR: 0.0003 +[2026-03-04 04:12:40] (step=0052473) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.266679710428487, LR: 0.0003 +[2026-03-04 04:12:48] (step=0052474) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.266875366855801, LR: 0.0003 +[2026-03-04 04:12:56] (step=0052475) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.267071023283115, LR: 0.0003 +[2026-03-04 04:13:04] (step=0052476) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.267266679710428, LR: 0.0003 +[2026-03-04 04:13:12] (step=0052477) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 10.267462336137742, LR: 0.0003 +[2026-03-04 04:13:20] (step=0052478) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.267657992565056, LR: 0.0003 +[2026-03-04 04:13:28] (step=0052479) Train Loss: 0.4659, Train Steps/Sec: 0.13, Epoch: 10.26785364899237, LR: 0.0003 +[2026-03-04 04:13:35] (step=0052480) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 10.268049305419684, LR: 0.0003 +[2026-03-04 04:13:43] (step=0052481) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.268244961846996, LR: 0.0003 +[2026-03-04 04:13:51] (step=0052482) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 10.26844061827431, LR: 0.0003 +[2026-03-04 04:13:59] (step=0052483) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.268636274701624, LR: 0.0003 +[2026-03-04 04:14:07] (step=0052484) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.268831931128938, LR: 0.0003 +[2026-03-04 04:14:15] (step=0052485) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 10.269027587556252, LR: 0.0003 +[2026-03-04 04:14:23] (step=0052486) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.269223243983564, LR: 0.0003 +[2026-03-04 04:14:31] (step=0052487) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.269418900410878, LR: 0.0003 +[2026-03-04 04:14:38] (step=0052488) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 10.269614556838192, LR: 0.0003 +[2026-03-04 04:14:46] (step=0052489) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.269810213265506, LR: 0.0003 +[2026-03-04 04:14:54] (step=0052490) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 10.27000586969282, LR: 0.0003 +[2026-03-04 04:15:02] (step=0052491) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 10.270201526120132, LR: 0.0003 +[2026-03-04 04:15:10] (step=0052492) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.270397182547446, LR: 0.0003 +[2026-03-04 04:15:18] (step=0052493) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.27059283897476, LR: 0.0003 +[2026-03-04 04:15:26] (step=0052494) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.270788495402075, LR: 0.0003 +[2026-03-04 04:15:33] (step=0052495) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 10.270984151829387, LR: 0.0003 +[2026-03-04 04:15:41] (step=0052496) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.2711798082567, LR: 0.0003 +[2026-03-04 04:15:49] (step=0052497) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.271375464684015, LR: 0.0003 +[2026-03-04 04:15:57] (step=0052498) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 10.271571121111329, LR: 0.0003 +[2026-03-04 04:16:05] (step=0052499) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.271766777538643, LR: 0.0003 +[2026-03-04 04:16:13] (step=0052500) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.271962433965955, LR: 0.0003 +[2026-03-04 04:16:13] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0052500/ +[2026-03-04 04:16:21] (step=0052501) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.272158090393269, LR: 0.0003 +[2026-03-04 04:16:29] (step=0052502) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.272353746820583, LR: 0.0003 +[2026-03-04 04:16:36] (step=0052503) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.272549403247897, LR: 0.0003 +[2026-03-04 04:16:44] (step=0052504) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.272745059675211, LR: 0.0003 +[2026-03-04 04:16:52] (step=0052505) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.272940716102523, LR: 0.0003 +[2026-03-04 04:17:00] (step=0052506) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.273136372529837, LR: 0.0003 +[2026-03-04 04:17:08] (step=0052507) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 10.273332028957151, LR: 0.0003 +[2026-03-04 04:17:16] (step=0052508) Train Loss: 0.4416, Train Steps/Sec: 0.12, Epoch: 10.273527685384465, LR: 0.0003 +[2026-03-04 04:17:24] (step=0052509) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.27372334181178, LR: 0.0003 +[2026-03-04 04:17:32] (step=0052510) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.273918998239091, LR: 0.0003 +[2026-03-04 04:17:39] (step=0052511) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.274114654666405, LR: 0.0003 +[2026-03-04 04:17:47] (step=0052512) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.27431031109372, LR: 0.0003 +[2026-03-04 04:17:55] (step=0052513) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.274505967521034, LR: 0.0003 +[2026-03-04 04:18:03] (step=0052514) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.274701623948348, LR: 0.0003 +[2026-03-04 04:18:11] (step=0052515) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 10.27489728037566, LR: 0.0003 +[2026-03-04 04:18:19] (step=0052516) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.275092936802974, LR: 0.0003 +[2026-03-04 04:18:27] (step=0052517) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.275288593230288, LR: 0.0003 +[2026-03-04 04:18:34] (step=0052518) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 10.275484249657602, LR: 0.0003 +[2026-03-04 04:18:42] (step=0052519) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 10.275679906084916, LR: 0.0003 +[2026-03-04 04:18:50] (step=0052520) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 10.275875562512228, LR: 0.0003 +[2026-03-04 04:18:58] (step=0052521) Train Loss: 0.4217, Train Steps/Sec: 0.13, Epoch: 10.276071218939542, LR: 0.0003 +[2026-03-04 04:19:06] (step=0052522) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.276266875366856, LR: 0.0003 +[2026-03-04 04:19:14] (step=0052523) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.27646253179417, LR: 0.0003 +[2026-03-04 04:19:22] (step=0052524) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.276658188221482, LR: 0.0003 +[2026-03-04 04:19:30] (step=0052525) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.276853844648796, LR: 0.0003 +[2026-03-04 04:19:37] (step=0052526) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.27704950107611, LR: 0.0003 +[2026-03-04 04:19:45] (step=0052527) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 10.277245157503424, LR: 0.0003 +[2026-03-04 04:19:53] (step=0052528) Train Loss: 0.4270, Train Steps/Sec: 0.13, Epoch: 10.277440813930738, LR: 0.0003 +[2026-03-04 04:20:01] (step=0052529) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.27763647035805, LR: 0.0003 +[2026-03-04 04:20:09] (step=0052530) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.277832126785365, LR: 0.0003 +[2026-03-04 04:20:17] (step=0052531) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.278027783212679, LR: 0.0003 +[2026-03-04 04:20:25] (step=0052532) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.278223439639993, LR: 0.0003 +[2026-03-04 04:20:32] (step=0052533) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 10.278419096067307, LR: 0.0003 +[2026-03-04 04:20:40] (step=0052534) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.278614752494619, LR: 0.0003 +[2026-03-04 04:20:48] (step=0052535) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.278810408921933, LR: 0.0003 +[2026-03-04 04:20:56] (step=0052536) Train Loss: 0.4563, Train Steps/Sec: 0.12, Epoch: 10.279006065349247, LR: 0.0003 +[2026-03-04 04:21:04] (step=0052537) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.27920172177656, LR: 0.0003 +[2026-03-04 04:21:12] (step=0052538) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 10.279397378203875, LR: 0.0003 +[2026-03-04 04:21:20] (step=0052539) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.279593034631187, LR: 0.0003 +[2026-03-04 04:21:28] (step=0052540) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.279788691058501, LR: 0.0003 +[2026-03-04 04:21:35] (step=0052541) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.279984347485815, LR: 0.0003 +[2026-03-04 04:21:43] (step=0052542) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 10.280180003913129, LR: 0.0003 +[2026-03-04 04:21:51] (step=0052543) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.280375660340443, LR: 0.0003 +[2026-03-04 04:21:59] (step=0052544) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.280571316767755, LR: 0.0003 +[2026-03-04 04:22:07] (step=0052545) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.28076697319507, LR: 0.0003 +[2026-03-04 04:22:15] (step=0052546) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 10.280962629622383, LR: 0.0003 +[2026-03-04 04:22:23] (step=0052547) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.281158286049697, LR: 0.0003 +[2026-03-04 04:22:31] (step=0052548) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.28135394247701, LR: 0.0003 +[2026-03-04 04:22:38] (step=0052549) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.281549598904324, LR: 0.0003 +[2026-03-04 04:22:46] (step=0052550) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.281745255331638, LR: 0.0003 +[2026-03-04 04:22:54] (step=0052551) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.281940911758952, LR: 0.0003 +[2026-03-04 04:23:02] (step=0052552) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.282136568186266, LR: 0.0003 +[2026-03-04 04:23:10] (step=0052553) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.282332224613578, LR: 0.0003 +[2026-03-04 04:23:18] (step=0052554) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.282527881040892, LR: 0.0003 +[2026-03-04 04:23:26] (step=0052555) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 10.282723537468206, LR: 0.0003 +[2026-03-04 04:23:34] (step=0052556) Train Loss: 0.4391, Train Steps/Sec: 0.12, Epoch: 10.28291919389552, LR: 0.0003 +[2026-03-04 04:23:42] (step=0052557) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.283114850322834, LR: 0.0003 +[2026-03-04 04:23:49] (step=0052558) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.283310506750146, LR: 0.0003 +[2026-03-04 04:23:57] (step=0052559) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 10.28350616317746, LR: 0.0003 +[2026-03-04 04:24:05] (step=0052560) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.283701819604774, LR: 0.0003 +[2026-03-04 04:24:13] (step=0052561) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 10.283897476032088, LR: 0.0003 +[2026-03-04 04:24:21] (step=0052562) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.284093132459402, LR: 0.0003 +[2026-03-04 04:24:29] (step=0052563) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.284288788886714, LR: 0.0003 +[2026-03-04 04:24:37] (step=0052564) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.284484445314028, LR: 0.0003 +[2026-03-04 04:24:44] (step=0052565) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.284680101741342, LR: 0.0003 +[2026-03-04 04:24:52] (step=0052566) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.284875758168656, LR: 0.0003 +[2026-03-04 04:25:00] (step=0052567) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.28507141459597, LR: 0.0003 +[2026-03-04 04:25:08] (step=0052568) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.285267071023283, LR: 0.0003 +[2026-03-04 04:25:16] (step=0052569) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 10.285462727450597, LR: 0.0003 +[2026-03-04 04:25:24] (step=0052570) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.28565838387791, LR: 0.0003 +[2026-03-04 04:25:32] (step=0052571) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.285854040305225, LR: 0.0003 +[2026-03-04 04:25:40] (step=0052572) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 10.286049696732539, LR: 0.0003 +[2026-03-04 04:25:47] (step=0052573) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.28624535315985, LR: 0.0003 +[2026-03-04 04:25:55] (step=0052574) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.286441009587165, LR: 0.0003 +[2026-03-04 04:26:03] (step=0052575) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 10.286636666014479, LR: 0.0003 +[2026-03-04 04:26:11] (step=0052576) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 10.286832322441793, LR: 0.0003 +[2026-03-04 04:26:19] (step=0052577) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 10.287027978869105, LR: 0.0003 +[2026-03-04 04:26:27] (step=0052578) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.287223635296419, LR: 0.0003 +[2026-03-04 04:26:35] (step=0052579) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 10.287419291723733, LR: 0.0003 +[2026-03-04 04:26:42] (step=0052580) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.287614948151047, LR: 0.0003 +[2026-03-04 04:26:50] (step=0052581) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 10.287810604578361, LR: 0.0003 +[2026-03-04 04:26:58] (step=0052582) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.288006261005673, LR: 0.0003 +[2026-03-04 04:27:06] (step=0052583) Train Loss: 0.4561, Train Steps/Sec: 0.12, Epoch: 10.288201917432987, LR: 0.0003 +[2026-03-04 04:27:14] (step=0052584) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 10.288397573860301, LR: 0.0003 +[2026-03-04 04:27:22] (step=0052585) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.288593230287615, LR: 0.0003 +[2026-03-04 04:27:30] (step=0052586) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.28878888671493, LR: 0.0003 +[2026-03-04 04:27:38] (step=0052587) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.288984543142242, LR: 0.0003 +[2026-03-04 04:27:46] (step=0052588) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.289180199569556, LR: 0.0003 +[2026-03-04 04:27:53] (step=0052589) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.28937585599687, LR: 0.0003 +[2026-03-04 04:28:01] (step=0052590) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.289571512424184, LR: 0.0003 +[2026-03-04 04:28:09] (step=0052591) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.289767168851498, LR: 0.0003 +[2026-03-04 04:28:17] (step=0052592) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.28996282527881, LR: 0.0003 +[2026-03-04 04:28:25] (step=0052593) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.290158481706124, LR: 0.0003 +[2026-03-04 04:28:33] (step=0052594) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 10.290354138133438, LR: 0.0003 +[2026-03-04 04:28:41] (step=0052595) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.290549794560752, LR: 0.0003 +[2026-03-04 04:28:48] (step=0052596) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.290745450988066, LR: 0.0003 +[2026-03-04 04:28:56] (step=0052597) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 10.290941107415378, LR: 0.0003 +[2026-03-04 04:29:04] (step=0052598) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.291136763842692, LR: 0.0003 +[2026-03-04 04:29:12] (step=0052599) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.291332420270006, LR: 0.0003 +[2026-03-04 04:29:20] (step=0052600) Train Loss: 0.4459, Train Steps/Sec: 0.12, Epoch: 10.29152807669732, LR: 0.0003 +[2026-03-04 04:29:28] (step=0052601) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.291723733124632, LR: 0.0003 +[2026-03-04 04:29:36] (step=0052602) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.291919389551946, LR: 0.0003 +[2026-03-04 04:29:44] (step=0052603) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 10.29211504597926, LR: 0.0003 +[2026-03-04 04:29:51] (step=0052604) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 10.292310702406574, LR: 0.0003 +[2026-03-04 04:29:59] (step=0052605) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.292506358833888, LR: 0.0003 +[2026-03-04 04:30:07] (step=0052606) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.2927020152612, LR: 0.0003 +[2026-03-04 04:30:15] (step=0052607) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.292897671688515, LR: 0.0003 +[2026-03-04 04:30:23] (step=0052608) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.293093328115829, LR: 0.0003 +[2026-03-04 04:30:31] (step=0052609) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.293288984543143, LR: 0.0003 +[2026-03-04 04:30:39] (step=0052610) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.293484640970457, LR: 0.0003 +[2026-03-04 04:30:46] (step=0052611) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.293680297397769, LR: 0.0003 +[2026-03-04 04:30:54] (step=0052612) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.293875953825083, LR: 0.0003 +[2026-03-04 04:31:02] (step=0052613) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.294071610252397, LR: 0.0003 +[2026-03-04 04:31:10] (step=0052614) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.29426726667971, LR: 0.0003 +[2026-03-04 04:31:18] (step=0052615) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 10.294462923107025, LR: 0.0003 +[2026-03-04 04:31:26] (step=0052616) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.294658579534337, LR: 0.0003 +[2026-03-04 04:31:34] (step=0052617) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.294854235961651, LR: 0.0003 +[2026-03-04 04:31:42] (step=0052618) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.295049892388965, LR: 0.0003 +[2026-03-04 04:31:49] (step=0052619) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 10.295245548816279, LR: 0.0003 +[2026-03-04 04:31:57] (step=0052620) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 10.295441205243593, LR: 0.0003 +[2026-03-04 04:32:05] (step=0052621) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.295636861670905, LR: 0.0003 +[2026-03-04 04:32:13] (step=0052622) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.29583251809822, LR: 0.0003 +[2026-03-04 04:32:21] (step=0052623) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 10.296028174525533, LR: 0.0003 +[2026-03-04 04:32:29] (step=0052624) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.296223830952847, LR: 0.0003 +[2026-03-04 04:32:37] (step=0052625) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 10.29641948738016, LR: 0.0003 +[2026-03-04 04:32:45] (step=0052626) Train Loss: 0.4488, Train Steps/Sec: 0.12, Epoch: 10.296615143807474, LR: 0.0003 +[2026-03-04 04:32:52] (step=0052627) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.296810800234788, LR: 0.0003 +[2026-03-04 04:33:00] (step=0052628) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.297006456662102, LR: 0.0003 +[2026-03-04 04:33:08] (step=0052629) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 10.297202113089416, LR: 0.0003 +[2026-03-04 04:33:16] (step=0052630) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.297397769516728, LR: 0.0003 +[2026-03-04 04:33:24] (step=0052631) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 10.297593425944042, LR: 0.0003 +[2026-03-04 04:33:32] (step=0052632) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.297789082371356, LR: 0.0003 +[2026-03-04 04:33:40] (step=0052633) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.29798473879867, LR: 0.0003 +[2026-03-04 04:33:48] (step=0052634) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.298180395225984, LR: 0.0003 +[2026-03-04 04:33:55] (step=0052635) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.298376051653296, LR: 0.0003 +[2026-03-04 04:34:03] (step=0052636) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.29857170808061, LR: 0.0003 +[2026-03-04 04:34:11] (step=0052637) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 10.298767364507924, LR: 0.0003 +[2026-03-04 04:34:19] (step=0052638) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.298963020935238, LR: 0.0003 +[2026-03-04 04:34:27] (step=0052639) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.299158677362552, LR: 0.0003 +[2026-03-04 04:34:35] (step=0052640) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 10.299354333789864, LR: 0.0003 +[2026-03-04 04:34:43] (step=0052641) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 10.299549990217178, LR: 0.0003 +[2026-03-04 04:34:50] (step=0052642) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.299745646644492, LR: 0.0003 +[2026-03-04 04:34:58] (step=0052643) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.299941303071806, LR: 0.0003 +[2026-03-04 04:35:06] (step=0052644) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 10.30013695949912, LR: 0.0003 +[2026-03-04 04:35:14] (step=0052645) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.300332615926433, LR: 0.0003 +[2026-03-04 04:35:22] (step=0052646) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.300528272353747, LR: 0.0003 +[2026-03-04 04:35:30] (step=0052647) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.30072392878106, LR: 0.0003 +[2026-03-04 04:35:38] (step=0052648) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.300919585208375, LR: 0.0003 +[2026-03-04 04:35:46] (step=0052649) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.301115241635689, LR: 0.0003 +[2026-03-04 04:35:53] (step=0052650) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.301310898063, LR: 0.0003 +[2026-03-04 04:36:01] (step=0052651) Train Loss: 0.4378, Train Steps/Sec: 0.12, Epoch: 10.301506554490315, LR: 0.0003 +[2026-03-04 04:36:09] (step=0052652) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.301702210917629, LR: 0.0003 +[2026-03-04 04:36:17] (step=0052653) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.301897867344943, LR: 0.0003 +[2026-03-04 04:36:25] (step=0052654) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 10.302093523772255, LR: 0.0003 +[2026-03-04 04:36:33] (step=0052655) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 10.302289180199569, LR: 0.0003 +[2026-03-04 04:36:41] (step=0052656) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.302484836626883, LR: 0.0003 +[2026-03-04 04:36:49] (step=0052657) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.302680493054197, LR: 0.0003 +[2026-03-04 04:36:56] (step=0052658) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.302876149481511, LR: 0.0003 +[2026-03-04 04:37:04] (step=0052659) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.303071805908823, LR: 0.0003 +[2026-03-04 04:37:12] (step=0052660) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.303267462336137, LR: 0.0003 +[2026-03-04 04:37:20] (step=0052661) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.303463118763451, LR: 0.0003 +[2026-03-04 04:37:28] (step=0052662) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.303658775190765, LR: 0.0003 +[2026-03-04 04:37:36] (step=0052663) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.30385443161808, LR: 0.0003 +[2026-03-04 04:37:44] (step=0052664) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.304050088045392, LR: 0.0003 +[2026-03-04 04:37:52] (step=0052665) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.304245744472706, LR: 0.0003 +[2026-03-04 04:37:59] (step=0052666) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.30444140090002, LR: 0.0003 +[2026-03-04 04:38:07] (step=0052667) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.304637057327334, LR: 0.0003 +[2026-03-04 04:38:15] (step=0052668) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.304832713754648, LR: 0.0003 +[2026-03-04 04:38:23] (step=0052669) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.30502837018196, LR: 0.0003 +[2026-03-04 04:38:31] (step=0052670) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 10.305224026609274, LR: 0.0003 +[2026-03-04 04:38:39] (step=0052671) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 10.305419683036588, LR: 0.0003 +[2026-03-04 04:38:47] (step=0052672) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.305615339463902, LR: 0.0003 +[2026-03-04 04:38:54] (step=0052673) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.305810995891216, LR: 0.0003 +[2026-03-04 04:39:02] (step=0052674) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 10.306006652318528, LR: 0.0003 +[2026-03-04 04:39:10] (step=0052675) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.306202308745842, LR: 0.0003 +[2026-03-04 04:39:18] (step=0052676) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.306397965173156, LR: 0.0003 +[2026-03-04 04:39:26] (step=0052677) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 10.30659362160047, LR: 0.0003 +[2026-03-04 04:39:34] (step=0052678) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.306789278027782, LR: 0.0003 +[2026-03-04 04:39:42] (step=0052679) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.306984934455096, LR: 0.0003 +[2026-03-04 04:39:50] (step=0052680) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.30718059088241, LR: 0.0003 +[2026-03-04 04:39:57] (step=0052681) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.307376247309724, LR: 0.0003 +[2026-03-04 04:40:05] (step=0052682) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.307571903737038, LR: 0.0003 +[2026-03-04 04:40:13] (step=0052683) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 10.30776756016435, LR: 0.0003 +[2026-03-04 04:40:21] (step=0052684) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.307963216591665, LR: 0.0003 +[2026-03-04 04:40:29] (step=0052685) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.308158873018979, LR: 0.0003 +[2026-03-04 04:40:37] (step=0052686) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 10.308354529446293, LR: 0.0003 +[2026-03-04 04:40:45] (step=0052687) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 10.308550185873607, LR: 0.0003 +[2026-03-04 04:40:52] (step=0052688) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.308745842300919, LR: 0.0003 +[2026-03-04 04:41:00] (step=0052689) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.308941498728233, LR: 0.0003 +[2026-03-04 04:41:08] (step=0052690) Train Loss: 0.4260, Train Steps/Sec: 0.13, Epoch: 10.309137155155547, LR: 0.0003 +[2026-03-04 04:41:16] (step=0052691) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.30933281158286, LR: 0.0003 +[2026-03-04 04:41:24] (step=0052692) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.309528468010175, LR: 0.0003 +[2026-03-04 04:41:32] (step=0052693) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 10.309724124437487, LR: 0.0003 +[2026-03-04 04:41:40] (step=0052694) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.309919780864801, LR: 0.0003 +[2026-03-04 04:41:47] (step=0052695) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.310115437292115, LR: 0.0003 +[2026-03-04 04:41:55] (step=0052696) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.310311093719429, LR: 0.0003 +[2026-03-04 04:42:03] (step=0052697) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.310506750146743, LR: 0.0003 +[2026-03-04 04:42:11] (step=0052698) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.310702406574055, LR: 0.0003 +[2026-03-04 04:42:19] (step=0052699) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.31089806300137, LR: 0.0003 +[2026-03-04 04:42:27] (step=0052700) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.311093719428683, LR: 0.0003 +[2026-03-04 04:42:35] (step=0052701) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.311289375855997, LR: 0.0003 +[2026-03-04 04:42:43] (step=0052702) Train Loss: 0.4441, Train Steps/Sec: 0.12, Epoch: 10.311485032283311, LR: 0.0003 +[2026-03-04 04:42:51] (step=0052703) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 10.311680688710624, LR: 0.0003 +[2026-03-04 04:42:58] (step=0052704) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 10.311876345137938, LR: 0.0003 +[2026-03-04 04:43:06] (step=0052705) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 10.312072001565252, LR: 0.0003 +[2026-03-04 04:43:14] (step=0052706) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.312267657992566, LR: 0.0003 +[2026-03-04 04:43:22] (step=0052707) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.312463314419878, LR: 0.0003 +[2026-03-04 04:43:30] (step=0052708) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.312658970847192, LR: 0.0003 +[2026-03-04 04:43:38] (step=0052709) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.312854627274506, LR: 0.0003 +[2026-03-04 04:43:46] (step=0052710) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.31305028370182, LR: 0.0003 +[2026-03-04 04:43:54] (step=0052711) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.313245940129134, LR: 0.0003 +[2026-03-04 04:44:01] (step=0052712) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.313441596556446, LR: 0.0003 +[2026-03-04 04:44:09] (step=0052713) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.31363725298376, LR: 0.0003 +[2026-03-04 04:44:17] (step=0052714) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.313832909411074, LR: 0.0003 +[2026-03-04 04:44:25] (step=0052715) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.314028565838388, LR: 0.0003 +[2026-03-04 04:44:33] (step=0052716) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.314224222265702, LR: 0.0003 +[2026-03-04 04:44:41] (step=0052717) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 10.314419878693014, LR: 0.0003 +[2026-03-04 04:44:49] (step=0052718) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.314615535120328, LR: 0.0003 +[2026-03-04 04:44:56] (step=0052719) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.314811191547642, LR: 0.0003 +[2026-03-04 04:45:04] (step=0052720) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 10.315006847974956, LR: 0.0003 +[2026-03-04 04:45:12] (step=0052721) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 10.31520250440227, LR: 0.0003 +[2026-03-04 04:45:20] (step=0052722) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.315398160829583, LR: 0.0003 +[2026-03-04 04:45:28] (step=0052723) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.315593817256897, LR: 0.0003 +[2026-03-04 04:45:36] (step=0052724) Train Loss: 0.4551, Train Steps/Sec: 0.12, Epoch: 10.31578947368421, LR: 0.0003 +[2026-03-04 04:45:44] (step=0052725) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.315985130111525, LR: 0.0003 +[2026-03-04 04:45:52] (step=0052726) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.316180786538839, LR: 0.0003 +[2026-03-04 04:45:59] (step=0052727) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.31637644296615, LR: 0.0003 +[2026-03-04 04:46:07] (step=0052728) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.316572099393465, LR: 0.0003 +[2026-03-04 04:46:15] (step=0052729) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.316767755820779, LR: 0.0003 +[2026-03-04 04:46:23] (step=0052730) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.316963412248093, LR: 0.0003 +[2026-03-04 04:46:31] (step=0052731) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.317159068675405, LR: 0.0003 +[2026-03-04 04:46:39] (step=0052732) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.31735472510272, LR: 0.0003 +[2026-03-04 04:46:47] (step=0052733) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 10.317550381530033, LR: 0.0003 +[2026-03-04 04:46:54] (step=0052734) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.317746037957347, LR: 0.0003 +[2026-03-04 04:47:02] (step=0052735) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.317941694384661, LR: 0.0003 +[2026-03-04 04:47:10] (step=0052736) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 10.318137350811973, LR: 0.0003 +[2026-03-04 04:47:18] (step=0052737) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.318333007239287, LR: 0.0003 +[2026-03-04 04:47:26] (step=0052738) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.318528663666601, LR: 0.0003 +[2026-03-04 04:47:34] (step=0052739) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.318724320093915, LR: 0.0003 +[2026-03-04 04:47:42] (step=0052740) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.31891997652123, LR: 0.0003 +[2026-03-04 04:47:49] (step=0052741) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.319115632948542, LR: 0.0003 +[2026-03-04 04:47:57] (step=0052742) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.319311289375856, LR: 0.0003 +[2026-03-04 04:48:05] (step=0052743) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.31950694580317, LR: 0.0003 +[2026-03-04 04:48:13] (step=0052744) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.319702602230484, LR: 0.0003 +[2026-03-04 04:48:21] (step=0052745) Train Loss: 0.4272, Train Steps/Sec: 0.13, Epoch: 10.319898258657798, LR: 0.0003 +[2026-03-04 04:48:29] (step=0052746) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.32009391508511, LR: 0.0003 +[2026-03-04 04:48:37] (step=0052747) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.320289571512424, LR: 0.0003 +[2026-03-04 04:48:45] (step=0052748) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.320485227939738, LR: 0.0003 +[2026-03-04 04:48:52] (step=0052749) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.320680884367052, LR: 0.0003 +[2026-03-04 04:49:00] (step=0052750) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 10.320876540794366, LR: 0.0003 +[2026-03-04 04:49:08] (step=0052751) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.321072197221678, LR: 0.0003 +[2026-03-04 04:49:16] (step=0052752) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 10.321267853648992, LR: 0.0003 +[2026-03-04 04:49:24] (step=0052753) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.321463510076306, LR: 0.0003 +[2026-03-04 04:49:32] (step=0052754) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.32165916650362, LR: 0.0003 +[2026-03-04 04:49:40] (step=0052755) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.321854822930934, LR: 0.0003 +[2026-03-04 04:49:48] (step=0052756) Train Loss: 0.4536, Train Steps/Sec: 0.12, Epoch: 10.322050479358246, LR: 0.0003 +[2026-03-04 04:49:56] (step=0052757) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.32224613578556, LR: 0.0003 +[2026-03-04 04:50:03] (step=0052758) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 10.322441792212874, LR: 0.0003 +[2026-03-04 04:50:11] (step=0052759) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 10.322637448640188, LR: 0.0003 +[2026-03-04 04:50:19] (step=0052760) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.3228331050675, LR: 0.0003 +[2026-03-04 04:50:27] (step=0052761) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.323028761494815, LR: 0.0003 +[2026-03-04 04:50:35] (step=0052762) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.323224417922129, LR: 0.0003 +[2026-03-04 04:50:43] (step=0052763) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 10.323420074349443, LR: 0.0003 +[2026-03-04 04:50:51] (step=0052764) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.323615730776757, LR: 0.0003 +[2026-03-04 04:50:58] (step=0052765) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 10.323811387204069, LR: 0.0003 +[2026-03-04 04:51:06] (step=0052766) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.324007043631383, LR: 0.0003 +[2026-03-04 04:51:14] (step=0052767) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 10.324202700058697, LR: 0.0003 +[2026-03-04 04:51:22] (step=0052768) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.324398356486011, LR: 0.0003 +[2026-03-04 04:51:30] (step=0052769) Train Loss: 0.4379, Train Steps/Sec: 0.12, Epoch: 10.324594012913325, LR: 0.0003 +[2026-03-04 04:51:38] (step=0052770) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 10.324789669340637, LR: 0.0003 +[2026-03-04 04:51:46] (step=0052771) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.324985325767951, LR: 0.0003 +[2026-03-04 04:51:54] (step=0052772) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.325180982195265, LR: 0.0003 +[2026-03-04 04:52:02] (step=0052773) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.32537663862258, LR: 0.0003 +[2026-03-04 04:52:09] (step=0052774) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.325572295049893, LR: 0.0003 +[2026-03-04 04:52:17] (step=0052775) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 10.325767951477205, LR: 0.0003 +[2026-03-04 04:52:25] (step=0052776) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 10.32596360790452, LR: 0.0003 +[2026-03-04 04:52:33] (step=0052777) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.326159264331833, LR: 0.0003 +[2026-03-04 04:52:41] (step=0052778) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 10.326354920759147, LR: 0.0003 +[2026-03-04 04:52:49] (step=0052779) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.326550577186461, LR: 0.0003 +[2026-03-04 04:52:57] (step=0052780) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.326746233613774, LR: 0.0003 +[2026-03-04 04:53:04] (step=0052781) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.326941890041088, LR: 0.0003 +[2026-03-04 04:53:12] (step=0052782) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.327137546468402, LR: 0.0003 +[2026-03-04 04:53:20] (step=0052783) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.327333202895716, LR: 0.0003 +[2026-03-04 04:53:28] (step=0052784) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.327528859323028, LR: 0.0003 +[2026-03-04 04:53:36] (step=0052785) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.327724515750342, LR: 0.0003 +[2026-03-04 04:53:44] (step=0052786) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 10.327920172177656, LR: 0.0003 +[2026-03-04 04:53:52] (step=0052787) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 10.32811582860497, LR: 0.0003 +[2026-03-04 04:54:00] (step=0052788) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 10.328311485032284, LR: 0.0003 +[2026-03-04 04:54:07] (step=0052789) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 10.328507141459596, LR: 0.0003 +[2026-03-04 04:54:15] (step=0052790) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.32870279788691, LR: 0.0003 +[2026-03-04 04:54:23] (step=0052791) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 10.328898454314224, LR: 0.0003 +[2026-03-04 04:54:31] (step=0052792) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.329094110741538, LR: 0.0003 +[2026-03-04 04:54:39] (step=0052793) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 10.329289767168852, LR: 0.0003 +[2026-03-04 04:54:47] (step=0052794) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.329485423596164, LR: 0.0003 +[2026-03-04 04:54:55] (step=0052795) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.329681080023478, LR: 0.0003 +[2026-03-04 04:55:02] (step=0052796) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.329876736450792, LR: 0.0003 +[2026-03-04 04:55:10] (step=0052797) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 10.330072392878106, LR: 0.0003 +[2026-03-04 04:55:18] (step=0052798) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.33026804930542, LR: 0.0003 +[2026-03-04 04:55:26] (step=0052799) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.330463705732733, LR: 0.0003 +[2026-03-04 04:55:34] (step=0052800) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.330659362160047, LR: 0.0003 +[2026-03-04 04:55:42] (step=0052801) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 10.33085501858736, LR: 0.0003 +[2026-03-04 04:55:50] (step=0052802) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.331050675014675, LR: 0.0003 +[2026-03-04 04:55:57] (step=0052803) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.331246331441989, LR: 0.0003 +[2026-03-04 04:56:05] (step=0052804) Train Loss: 0.4226, Train Steps/Sec: 0.13, Epoch: 10.331441987869301, LR: 0.0003 +[2026-03-04 04:56:13] (step=0052805) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 10.331637644296615, LR: 0.0003 +[2026-03-04 04:56:21] (step=0052806) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.331833300723929, LR: 0.0003 +[2026-03-04 04:56:29] (step=0052807) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.332028957151243, LR: 0.0003 +[2026-03-04 04:56:37] (step=0052808) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 10.332224613578557, LR: 0.0003 +[2026-03-04 04:56:45] (step=0052809) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.33242027000587, LR: 0.0003 +[2026-03-04 04:56:53] (step=0052810) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.332615926433183, LR: 0.0003 +[2026-03-04 04:57:00] (step=0052811) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.332811582860497, LR: 0.0003 +[2026-03-04 04:57:08] (step=0052812) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 10.333007239287811, LR: 0.0003 +[2026-03-04 04:57:16] (step=0052813) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.333202895715123, LR: 0.0003 +[2026-03-04 04:57:24] (step=0052814) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.333398552142437, LR: 0.0003 +[2026-03-04 04:57:32] (step=0052815) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 10.333594208569751, LR: 0.0003 +[2026-03-04 04:57:40] (step=0052816) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.333789864997065, LR: 0.0003 +[2026-03-04 04:57:48] (step=0052817) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.33398552142438, LR: 0.0003 +[2026-03-04 04:57:56] (step=0052818) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.334181177851692, LR: 0.0003 +[2026-03-04 04:58:03] (step=0052819) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.334376834279006, LR: 0.0003 +[2026-03-04 04:58:11] (step=0052820) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.33457249070632, LR: 0.0003 +[2026-03-04 04:58:19] (step=0052821) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 10.334768147133634, LR: 0.0003 +[2026-03-04 04:58:27] (step=0052822) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 10.334963803560948, LR: 0.0003 +[2026-03-04 04:58:35] (step=0052823) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.33515945998826, LR: 0.0003 +[2026-03-04 04:58:43] (step=0052824) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 10.335355116415574, LR: 0.0003 +[2026-03-04 04:58:51] (step=0052825) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.335550772842888, LR: 0.0003 +[2026-03-04 04:58:59] (step=0052826) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.335746429270202, LR: 0.0003 +[2026-03-04 04:59:06] (step=0052827) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.335942085697516, LR: 0.0003 +[2026-03-04 04:59:14] (step=0052828) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.336137742124828, LR: 0.0003 +[2026-03-04 04:59:22] (step=0052829) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.336333398552142, LR: 0.0003 +[2026-03-04 04:59:30] (step=0052830) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.336529054979456, LR: 0.0003 +[2026-03-04 04:59:38] (step=0052831) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.33672471140677, LR: 0.0003 +[2026-03-04 04:59:46] (step=0052832) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.336920367834084, LR: 0.0003 +[2026-03-04 04:59:54] (step=0052833) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.337116024261396, LR: 0.0003 +[2026-03-04 05:00:01] (step=0052834) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.33731168068871, LR: 0.0003 +[2026-03-04 05:00:09] (step=0052835) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 10.337507337116024, LR: 0.0003 +[2026-03-04 05:00:17] (step=0052836) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 10.337702993543338, LR: 0.0003 +[2026-03-04 05:00:25] (step=0052837) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.33789864997065, LR: 0.0003 +[2026-03-04 05:00:33] (step=0052838) Train Loss: 0.4252, Train Steps/Sec: 0.13, Epoch: 10.338094306397965, LR: 0.0003 +[2026-03-04 05:00:41] (step=0052839) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.338289962825279, LR: 0.0003 +[2026-03-04 05:00:49] (step=0052840) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.338485619252593, LR: 0.0003 +[2026-03-04 05:00:57] (step=0052841) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.338681275679907, LR: 0.0003 +[2026-03-04 05:01:04] (step=0052842) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.338876932107219, LR: 0.0003 +[2026-03-04 05:01:12] (step=0052843) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.339072588534533, LR: 0.0003 +[2026-03-04 05:01:20] (step=0052844) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 10.339268244961847, LR: 0.0003 +[2026-03-04 05:01:28] (step=0052845) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.339463901389161, LR: 0.0003 +[2026-03-04 05:01:36] (step=0052846) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.339659557816475, LR: 0.0003 +[2026-03-04 05:01:44] (step=0052847) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.339855214243787, LR: 0.0003 +[2026-03-04 05:01:52] (step=0052848) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.340050870671101, LR: 0.0003 +[2026-03-04 05:01:59] (step=0052849) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 10.340246527098415, LR: 0.0003 +[2026-03-04 05:02:07] (step=0052850) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 10.34044218352573, LR: 0.0003 +[2026-03-04 05:02:15] (step=0052851) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.340637839953043, LR: 0.0003 +[2026-03-04 05:02:23] (step=0052852) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.340833496380355, LR: 0.0003 +[2026-03-04 05:02:31] (step=0052853) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.34102915280767, LR: 0.0003 +[2026-03-04 05:02:39] (step=0052854) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.341224809234983, LR: 0.0003 +[2026-03-04 05:02:47] (step=0052855) Train Loss: 0.4414, Train Steps/Sec: 0.12, Epoch: 10.341420465662297, LR: 0.0003 +[2026-03-04 05:02:55] (step=0052856) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 10.341616122089611, LR: 0.0003 +[2026-03-04 05:03:02] (step=0052857) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 10.341811778516924, LR: 0.0003 +[2026-03-04 05:03:10] (step=0052858) Train Loss: 0.4248, Train Steps/Sec: 0.13, Epoch: 10.342007434944238, LR: 0.0003 +[2026-03-04 05:03:18] (step=0052859) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.342203091371552, LR: 0.0003 +[2026-03-04 05:03:26] (step=0052860) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.342398747798866, LR: 0.0003 +[2026-03-04 05:03:34] (step=0052861) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.34259440422618, LR: 0.0003 +[2026-03-04 05:03:42] (step=0052862) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.342790060653492, LR: 0.0003 +[2026-03-04 05:03:50] (step=0052863) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.342985717080806, LR: 0.0003 +[2026-03-04 05:03:57] (step=0052864) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.34318137350812, LR: 0.0003 +[2026-03-04 05:04:05] (step=0052865) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.343377029935434, LR: 0.0003 +[2026-03-04 05:04:13] (step=0052866) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 10.343572686362746, LR: 0.0003 +[2026-03-04 05:04:21] (step=0052867) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.34376834279006, LR: 0.0003 +[2026-03-04 05:04:29] (step=0052868) Train Loss: 0.4398, Train Steps/Sec: 0.12, Epoch: 10.343963999217374, LR: 0.0003 +[2026-03-04 05:04:37] (step=0052869) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.344159655644688, LR: 0.0003 +[2026-03-04 05:04:45] (step=0052870) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 10.344355312072002, LR: 0.0003 +[2026-03-04 05:04:53] (step=0052871) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.344550968499314, LR: 0.0003 +[2026-03-04 05:05:01] (step=0052872) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.344746624926628, LR: 0.0003 +[2026-03-04 05:05:08] (step=0052873) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.344942281353942, LR: 0.0003 +[2026-03-04 05:05:16] (step=0052874) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.345137937781256, LR: 0.0003 +[2026-03-04 05:05:24] (step=0052875) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.34533359420857, LR: 0.0003 +[2026-03-04 05:05:32] (step=0052876) Train Loss: 0.4671, Train Steps/Sec: 0.13, Epoch: 10.345529250635883, LR: 0.0003 +[2026-03-04 05:05:40] (step=0052877) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.345724907063197, LR: 0.0003 +[2026-03-04 05:05:48] (step=0052878) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 10.34592056349051, LR: 0.0003 +[2026-03-04 05:05:56] (step=0052879) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 10.346116219917825, LR: 0.0003 +[2026-03-04 05:06:03] (step=0052880) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.346311876345139, LR: 0.0003 +[2026-03-04 05:06:11] (step=0052881) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.346507532772451, LR: 0.0003 +[2026-03-04 05:06:19] (step=0052882) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 10.346703189199765, LR: 0.0003 +[2026-03-04 05:06:27] (step=0052883) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 10.346898845627079, LR: 0.0003 +[2026-03-04 05:06:35] (step=0052884) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 10.347094502054393, LR: 0.0003 +[2026-03-04 05:06:43] (step=0052885) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 10.347290158481707, LR: 0.0003 +[2026-03-04 05:06:51] (step=0052886) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 10.34748581490902, LR: 0.0003 +[2026-03-04 05:06:58] (step=0052887) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.347681471336333, LR: 0.0003 +[2026-03-04 05:07:06] (step=0052888) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.347877127763647, LR: 0.0003 +[2026-03-04 05:07:14] (step=0052889) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.348072784190961, LR: 0.0003 +[2026-03-04 05:07:22] (step=0052890) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 10.348268440618273, LR: 0.0003 +[2026-03-04 05:07:30] (step=0052891) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 10.348464097045587, LR: 0.0003 +[2026-03-04 05:07:38] (step=0052892) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.348659753472901, LR: 0.0003 +[2026-03-04 05:07:46] (step=0052893) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.348855409900215, LR: 0.0003 +[2026-03-04 05:07:54] (step=0052894) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.34905106632753, LR: 0.0003 +[2026-03-04 05:08:01] (step=0052895) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.349246722754842, LR: 0.0003 +[2026-03-04 05:08:09] (step=0052896) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.349442379182156, LR: 0.0003 +[2026-03-04 05:08:17] (step=0052897) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.34963803560947, LR: 0.0003 +[2026-03-04 05:08:25] (step=0052898) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.349833692036784, LR: 0.0003 +[2026-03-04 05:08:33] (step=0052899) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.350029348464098, LR: 0.0003 +[2026-03-04 05:08:41] (step=0052900) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.35022500489141, LR: 0.0003 +[2026-03-04 05:08:49] (step=0052901) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.350420661318724, LR: 0.0003 +[2026-03-04 05:08:56] (step=0052902) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.350616317746038, LR: 0.0003 +[2026-03-04 05:09:04] (step=0052903) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.350811974173352, LR: 0.0003 +[2026-03-04 05:09:12] (step=0052904) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.351007630600666, LR: 0.0003 +[2026-03-04 05:09:20] (step=0052905) Train Loss: 0.4584, Train Steps/Sec: 0.12, Epoch: 10.351203287027978, LR: 0.0003 +[2026-03-04 05:09:28] (step=0052906) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.351398943455292, LR: 0.0003 +[2026-03-04 05:09:36] (step=0052907) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.351594599882606, LR: 0.0003 +[2026-03-04 05:09:44] (step=0052908) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.35179025630992, LR: 0.0003 +[2026-03-04 05:09:52] (step=0052909) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.351985912737234, LR: 0.0003 +[2026-03-04 05:09:59] (step=0052910) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 10.352181569164546, LR: 0.0003 +[2026-03-04 05:10:07] (step=0052911) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 10.35237722559186, LR: 0.0003 +[2026-03-04 05:10:15] (step=0052912) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.352572882019174, LR: 0.0003 +[2026-03-04 05:10:23] (step=0052913) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 10.352768538446488, LR: 0.0003 +[2026-03-04 05:10:31] (step=0052914) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 10.352964194873802, LR: 0.0003 +[2026-03-04 05:10:39] (step=0052915) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 10.353159851301115, LR: 0.0003 +[2026-03-04 05:10:47] (step=0052916) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 10.353355507728429, LR: 0.0003 +[2026-03-04 05:10:55] (step=0052917) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.353551164155743, LR: 0.0003 +[2026-03-04 05:11:02] (step=0052918) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.353746820583057, LR: 0.0003 +[2026-03-04 05:11:10] (step=0052919) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.353942477010369, LR: 0.0003 +[2026-03-04 05:11:18] (step=0052920) Train Loss: 0.4474, Train Steps/Sec: 0.12, Epoch: 10.354138133437683, LR: 0.0003 +[2026-03-04 05:11:26] (step=0052921) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 10.354333789864997, LR: 0.0003 +[2026-03-04 05:11:34] (step=0052922) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.354529446292311, LR: 0.0003 +[2026-03-04 05:11:42] (step=0052923) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.354725102719625, LR: 0.0003 +[2026-03-04 05:11:50] (step=0052924) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.354920759146937, LR: 0.0003 +[2026-03-04 05:11:58] (step=0052925) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.355116415574251, LR: 0.0003 +[2026-03-04 05:12:06] (step=0052926) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 10.355312072001565, LR: 0.0003 +[2026-03-04 05:12:13] (step=0052927) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.35550772842888, LR: 0.0003 +[2026-03-04 05:12:21] (step=0052928) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.355703384856193, LR: 0.0003 +[2026-03-04 05:12:29] (step=0052929) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.355899041283505, LR: 0.0003 +[2026-03-04 05:12:37] (step=0052930) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 10.35609469771082, LR: 0.0003 +[2026-03-04 05:12:45] (step=0052931) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.356290354138133, LR: 0.0003 +[2026-03-04 05:12:53] (step=0052932) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.356486010565447, LR: 0.0003 +[2026-03-04 05:13:01] (step=0052933) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 10.356681666992761, LR: 0.0003 +[2026-03-04 05:13:08] (step=0052934) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.356877323420074, LR: 0.0003 +[2026-03-04 05:13:16] (step=0052935) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.357072979847388, LR: 0.0003 +[2026-03-04 05:13:24] (step=0052936) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 10.357268636274702, LR: 0.0003 +[2026-03-04 05:13:32] (step=0052937) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.357464292702016, LR: 0.0003 +[2026-03-04 05:13:40] (step=0052938) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 10.35765994912933, LR: 0.0003 +[2026-03-04 05:13:48] (step=0052939) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.357855605556642, LR: 0.0003 +[2026-03-04 05:13:56] (step=0052940) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 10.358051261983956, LR: 0.0003 +[2026-03-04 05:14:03] (step=0052941) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.35824691841127, LR: 0.0003 +[2026-03-04 05:14:11] (step=0052942) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.358442574838584, LR: 0.0003 +[2026-03-04 05:14:19] (step=0052943) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.358638231265896, LR: 0.0003 +[2026-03-04 05:14:27] (step=0052944) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.35883388769321, LR: 0.0003 +[2026-03-04 05:14:35] (step=0052945) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.359029544120524, LR: 0.0003 +[2026-03-04 05:14:43] (step=0052946) Train Loss: 0.4261, Train Steps/Sec: 0.13, Epoch: 10.359225200547838, LR: 0.0003 +[2026-03-04 05:14:51] (step=0052947) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.359420856975152, LR: 0.0003 +[2026-03-04 05:14:58] (step=0052948) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.359616513402464, LR: 0.0003 +[2026-03-04 05:15:06] (step=0052949) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.359812169829778, LR: 0.0003 +[2026-03-04 05:15:14] (step=0052950) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 10.360007826257092, LR: 0.0003 +[2026-03-04 05:15:22] (step=0052951) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.360203482684406, LR: 0.0003 +[2026-03-04 05:15:30] (step=0052952) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.36039913911172, LR: 0.0003 +[2026-03-04 05:15:38] (step=0052953) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.360594795539033, LR: 0.0003 +[2026-03-04 05:15:46] (step=0052954) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.360790451966347, LR: 0.0003 +[2026-03-04 05:15:54] (step=0052955) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.36098610839366, LR: 0.0003 +[2026-03-04 05:16:01] (step=0052956) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 10.361181764820975, LR: 0.0003 +[2026-03-04 05:16:09] (step=0052957) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 10.361377421248289, LR: 0.0003 +[2026-03-04 05:16:17] (step=0052958) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.361573077675601, LR: 0.0003 +[2026-03-04 05:16:25] (step=0052959) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.361768734102915, LR: 0.0003 +[2026-03-04 05:16:33] (step=0052960) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.361964390530229, LR: 0.0003 +[2026-03-04 05:16:41] (step=0052961) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.362160046957543, LR: 0.0003 +[2026-03-04 05:16:49] (step=0052962) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.362355703384857, LR: 0.0003 +[2026-03-04 05:16:57] (step=0052963) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.36255135981217, LR: 0.0003 +[2026-03-04 05:17:04] (step=0052964) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.362747016239483, LR: 0.0003 +[2026-03-04 05:17:12] (step=0052965) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.362942672666797, LR: 0.0003 +[2026-03-04 05:17:20] (step=0052966) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 10.363138329094111, LR: 0.0003 +[2026-03-04 05:17:28] (step=0052967) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 10.363333985521425, LR: 0.0003 +[2026-03-04 05:17:36] (step=0052968) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 10.363529641948737, LR: 0.0003 +[2026-03-04 05:17:44] (step=0052969) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.363725298376051, LR: 0.0003 +[2026-03-04 05:17:52] (step=0052970) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.363920954803366, LR: 0.0003 +[2026-03-04 05:18:00] (step=0052971) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.36411661123068, LR: 0.0003 +[2026-03-04 05:18:08] (step=0052972) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.364312267657992, LR: 0.0003 +[2026-03-04 05:18:15] (step=0052973) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 10.364507924085306, LR: 0.0003 +[2026-03-04 05:18:23] (step=0052974) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 10.36470358051262, LR: 0.0003 +[2026-03-04 05:18:31] (step=0052975) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.364899236939934, LR: 0.0003 +[2026-03-04 05:18:39] (step=0052976) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.365094893367248, LR: 0.0003 +[2026-03-04 05:18:47] (step=0052977) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 10.36529054979456, LR: 0.0003 +[2026-03-04 05:18:55] (step=0052978) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.365486206221874, LR: 0.0003 +[2026-03-04 05:19:03] (step=0052979) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.365681862649188, LR: 0.0003 +[2026-03-04 05:19:10] (step=0052980) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.365877519076502, LR: 0.0003 +[2026-03-04 05:19:18] (step=0052981) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.366073175503816, LR: 0.0003 +[2026-03-04 05:19:26] (step=0052982) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.366268831931128, LR: 0.0003 +[2026-03-04 05:19:34] (step=0052983) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.366464488358442, LR: 0.0003 +[2026-03-04 05:19:42] (step=0052984) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.366660144785756, LR: 0.0003 +[2026-03-04 05:19:50] (step=0052985) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 10.36685580121307, LR: 0.0003 +[2026-03-04 05:19:58] (step=0052986) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 10.367051457640384, LR: 0.0003 +[2026-03-04 05:20:05] (step=0052987) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.367247114067697, LR: 0.0003 +[2026-03-04 05:20:13] (step=0052988) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.36744277049501, LR: 0.0003 +[2026-03-04 05:20:21] (step=0052989) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.367638426922325, LR: 0.0003 +[2026-03-04 05:20:29] (step=0052990) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.367834083349639, LR: 0.0003 +[2026-03-04 05:20:37] (step=0052991) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 10.368029739776953, LR: 0.0003 +[2026-03-04 05:20:45] (step=0052992) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.368225396204265, LR: 0.0003 +[2026-03-04 05:20:53] (step=0052993) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.368421052631579, LR: 0.0003 +[2026-03-04 05:21:00] (step=0052994) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.368616709058893, LR: 0.0003 +[2026-03-04 05:21:08] (step=0052995) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 10.368812365486207, LR: 0.0003 +[2026-03-04 05:21:16] (step=0052996) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.369008021913519, LR: 0.0003 +[2026-03-04 05:21:24] (step=0052997) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.369203678340833, LR: 0.0003 +[2026-03-04 05:21:32] (step=0052998) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.369399334768147, LR: 0.0003 +[2026-03-04 05:21:40] (step=0052999) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.369594991195461, LR: 0.0003 +[2026-03-04 05:21:48] (step=0053000) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 10.369790647622775, LR: 0.0003 +[2026-03-04 05:21:48] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0053000/ +[2026-03-04 05:21:56] (step=0053001) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 10.369986304050087, LR: 0.0003 +[2026-03-04 05:22:03] (step=0053002) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.370181960477401, LR: 0.0003 +[2026-03-04 05:22:11] (step=0053003) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.370377616904715, LR: 0.0003 +[2026-03-04 05:22:19] (step=0053004) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.37057327333203, LR: 0.0003 +[2026-03-04 05:22:27] (step=0053005) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.370768929759343, LR: 0.0003 +[2026-03-04 05:22:35] (step=0053006) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.370964586186656, LR: 0.0003 +[2026-03-04 05:22:43] (step=0053007) Train Loss: 0.4379, Train Steps/Sec: 0.12, Epoch: 10.37116024261397, LR: 0.0003 +[2026-03-04 05:22:51] (step=0053008) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.371355899041284, LR: 0.0003 +[2026-03-04 05:22:59] (step=0053009) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.371551555468598, LR: 0.0003 +[2026-03-04 05:23:07] (step=0053010) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.371747211895912, LR: 0.0003 +[2026-03-04 05:23:14] (step=0053011) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 10.371942868323224, LR: 0.0003 +[2026-03-04 05:23:22] (step=0053012) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.372138524750538, LR: 0.0003 +[2026-03-04 05:23:30] (step=0053013) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 10.372334181177852, LR: 0.0003 +[2026-03-04 05:23:38] (step=0053014) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.372529837605166, LR: 0.0003 +[2026-03-04 05:23:46] (step=0053015) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.37272549403248, LR: 0.0003 +[2026-03-04 05:23:54] (step=0053016) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.372921150459792, LR: 0.0003 +[2026-03-04 05:24:02] (step=0053017) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.373116806887106, LR: 0.0003 +[2026-03-04 05:24:10] (step=0053018) Train Loss: 0.4499, Train Steps/Sec: 0.12, Epoch: 10.37331246331442, LR: 0.0003 +[2026-03-04 05:24:17] (step=0053019) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.373508119741734, LR: 0.0003 +[2026-03-04 05:24:25] (step=0053020) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.373703776169048, LR: 0.0003 +[2026-03-04 05:24:33] (step=0053021) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.37389943259636, LR: 0.0003 +[2026-03-04 05:24:41] (step=0053022) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.374095089023674, LR: 0.0003 +[2026-03-04 05:24:49] (step=0053023) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.374290745450988, LR: 0.0003 +[2026-03-04 05:24:57] (step=0053024) Train Loss: 0.4259, Train Steps/Sec: 0.13, Epoch: 10.374486401878302, LR: 0.0003 +[2026-03-04 05:25:05] (step=0053025) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.374682058305615, LR: 0.0003 +[2026-03-04 05:25:12] (step=0053026) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.374877714732929, LR: 0.0003 +[2026-03-04 05:25:20] (step=0053027) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 10.375073371160243, LR: 0.0003 +[2026-03-04 05:25:28] (step=0053028) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 10.375269027587557, LR: 0.0003 +[2026-03-04 05:25:36] (step=0053029) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.37546468401487, LR: 0.0003 +[2026-03-04 05:25:44] (step=0053030) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.375660340442183, LR: 0.0003 +[2026-03-04 05:25:52] (step=0053031) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.375855996869497, LR: 0.0003 +[2026-03-04 05:26:00] (step=0053032) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 10.37605165329681, LR: 0.0003 +[2026-03-04 05:26:08] (step=0053033) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.376247309724125, LR: 0.0003 +[2026-03-04 05:26:15] (step=0053034) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.376442966151439, LR: 0.0003 +[2026-03-04 05:26:23] (step=0053035) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.376638622578751, LR: 0.0003 +[2026-03-04 05:26:31] (step=0053036) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.376834279006065, LR: 0.0003 +[2026-03-04 05:26:39] (step=0053037) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.377029935433379, LR: 0.0003 +[2026-03-04 05:26:47] (step=0053038) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.377225591860693, LR: 0.0003 +[2026-03-04 05:26:55] (step=0053039) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 10.377421248288007, LR: 0.0003 +[2026-03-04 05:27:02] (step=0053040) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.37761690471532, LR: 0.0003 +[2026-03-04 05:27:10] (step=0053041) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 10.377812561142633, LR: 0.0003 +[2026-03-04 05:27:18] (step=0053042) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.378008217569947, LR: 0.0003 +[2026-03-04 05:27:26] (step=0053043) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.378203873997261, LR: 0.0003 +[2026-03-04 05:27:34] (step=0053044) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.378399530424575, LR: 0.0003 +[2026-03-04 05:27:42] (step=0053045) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 10.378595186851888, LR: 0.0003 +[2026-03-04 05:27:50] (step=0053046) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.378790843279202, LR: 0.0003 +[2026-03-04 05:27:57] (step=0053047) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 10.378986499706516, LR: 0.0003 +[2026-03-04 05:28:05] (step=0053048) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 10.37918215613383, LR: 0.0003 +[2026-03-04 05:28:13] (step=0053049) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.379377812561142, LR: 0.0003 +[2026-03-04 05:28:21] (step=0053050) Train Loss: 0.4404, Train Steps/Sec: 0.12, Epoch: 10.379573468988456, LR: 0.0003 +[2026-03-04 05:28:29] (step=0053051) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.37976912541577, LR: 0.0003 +[2026-03-04 05:28:37] (step=0053052) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.379964781843084, LR: 0.0003 +[2026-03-04 05:28:45] (step=0053053) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.380160438270398, LR: 0.0003 +[2026-03-04 05:28:53] (step=0053054) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.38035609469771, LR: 0.0003 +[2026-03-04 05:29:00] (step=0053055) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.380551751125024, LR: 0.0003 +[2026-03-04 05:29:08] (step=0053056) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.380747407552338, LR: 0.0003 +[2026-03-04 05:29:16] (step=0053057) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.380943063979652, LR: 0.0003 +[2026-03-04 05:29:24] (step=0053058) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 10.381138720406966, LR: 0.0003 +[2026-03-04 05:29:32] (step=0053059) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.381334376834278, LR: 0.0003 +[2026-03-04 05:29:40] (step=0053060) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.381530033261592, LR: 0.0003 +[2026-03-04 05:29:47] (step=0053061) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 10.381725689688906, LR: 0.0003 +[2026-03-04 05:29:55] (step=0053062) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.38192134611622, LR: 0.0003 +[2026-03-04 05:30:03] (step=0053063) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.382117002543534, LR: 0.0003 +[2026-03-04 05:30:11] (step=0053064) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.382312658970847, LR: 0.0003 +[2026-03-04 05:30:19] (step=0053065) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.38250831539816, LR: 0.0003 +[2026-03-04 05:30:27] (step=0053066) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.382703971825475, LR: 0.0003 +[2026-03-04 05:30:35] (step=0053067) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.382899628252789, LR: 0.0003 +[2026-03-04 05:30:42] (step=0053068) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.383095284680103, LR: 0.0003 +[2026-03-04 05:30:50] (step=0053069) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.383290941107415, LR: 0.0003 +[2026-03-04 05:30:58] (step=0053070) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.383486597534729, LR: 0.0003 +[2026-03-04 05:31:06] (step=0053071) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 10.383682253962043, LR: 0.0003 +[2026-03-04 05:31:14] (step=0053072) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.383877910389357, LR: 0.0003 +[2026-03-04 05:31:22] (step=0053073) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 10.384073566816669, LR: 0.0003 +[2026-03-04 05:31:30] (step=0053074) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 10.384269223243983, LR: 0.0003 +[2026-03-04 05:31:38] (step=0053075) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.384464879671297, LR: 0.0003 +[2026-03-04 05:31:45] (step=0053076) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 10.384660536098611, LR: 0.0003 +[2026-03-04 05:31:53] (step=0053077) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.384856192525925, LR: 0.0003 +[2026-03-04 05:32:01] (step=0053078) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.385051848953237, LR: 0.0003 +[2026-03-04 05:32:09] (step=0053079) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.385247505380551, LR: 0.0003 +[2026-03-04 05:32:17] (step=0053080) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 10.385443161807865, LR: 0.0003 +[2026-03-04 05:32:25] (step=0053081) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.38563881823518, LR: 0.0003 +[2026-03-04 05:32:33] (step=0053082) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 10.385834474662493, LR: 0.0003 +[2026-03-04 05:32:41] (step=0053083) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.386030131089806, LR: 0.0003 +[2026-03-04 05:32:48] (step=0053084) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.38622578751712, LR: 0.0003 +[2026-03-04 05:32:56] (step=0053085) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.386421443944434, LR: 0.0003 +[2026-03-04 05:33:04] (step=0053086) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.386617100371748, LR: 0.0003 +[2026-03-04 05:33:12] (step=0053087) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.386812756799062, LR: 0.0003 +[2026-03-04 05:33:20] (step=0053088) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 10.387008413226374, LR: 0.0003 +[2026-03-04 05:33:28] (step=0053089) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.387204069653688, LR: 0.0003 +[2026-03-04 05:33:36] (step=0053090) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.387399726081002, LR: 0.0003 +[2026-03-04 05:33:43] (step=0053091) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 10.387595382508316, LR: 0.0003 +[2026-03-04 05:33:51] (step=0053092) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 10.38779103893563, LR: 0.0003 +[2026-03-04 05:33:59] (step=0053093) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.387986695362942, LR: 0.0003 +[2026-03-04 05:34:07] (step=0053094) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.388182351790256, LR: 0.0003 +[2026-03-04 05:34:15] (step=0053095) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.38837800821757, LR: 0.0003 +[2026-03-04 05:34:23] (step=0053096) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 10.388573664644884, LR: 0.0003 +[2026-03-04 05:34:31] (step=0053097) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.388769321072198, LR: 0.0003 +[2026-03-04 05:34:38] (step=0053098) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.38896497749951, LR: 0.0003 +[2026-03-04 05:34:46] (step=0053099) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 10.389160633926824, LR: 0.0003 +[2026-03-04 05:34:54] (step=0053100) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.389356290354138, LR: 0.0003 +[2026-03-04 05:35:02] (step=0053101) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 10.389551946781452, LR: 0.0003 +[2026-03-04 05:35:10] (step=0053102) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.389747603208765, LR: 0.0003 +[2026-03-04 05:35:18] (step=0053103) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.389943259636079, LR: 0.0003 +[2026-03-04 05:35:26] (step=0053104) Train Loss: 0.4392, Train Steps/Sec: 0.12, Epoch: 10.390138916063393, LR: 0.0003 +[2026-03-04 05:35:34] (step=0053105) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.390334572490707, LR: 0.0003 +[2026-03-04 05:35:42] (step=0053106) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.39053022891802, LR: 0.0003 +[2026-03-04 05:35:49] (step=0053107) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.390725885345333, LR: 0.0003 +[2026-03-04 05:35:57] (step=0053108) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 10.390921541772647, LR: 0.0003 +[2026-03-04 05:36:05] (step=0053109) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.39111719819996, LR: 0.0003 +[2026-03-04 05:36:13] (step=0053110) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.391312854627275, LR: 0.0003 +[2026-03-04 05:36:21] (step=0053111) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.391508511054589, LR: 0.0003 +[2026-03-04 05:36:29] (step=0053112) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 10.391704167481901, LR: 0.0003 +[2026-03-04 05:36:37] (step=0053113) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 10.391899823909215, LR: 0.0003 +[2026-03-04 05:36:44] (step=0053114) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.392095480336529, LR: 0.0003 +[2026-03-04 05:36:52] (step=0053115) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 10.392291136763843, LR: 0.0003 +[2026-03-04 05:37:00] (step=0053116) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.392486793191157, LR: 0.0003 +[2026-03-04 05:37:08] (step=0053117) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 10.39268244961847, LR: 0.0003 +[2026-03-04 05:37:16] (step=0053118) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.392878106045783, LR: 0.0003 +[2026-03-04 05:37:24] (step=0053119) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.393073762473097, LR: 0.0003 +[2026-03-04 05:37:32] (step=0053120) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.393269418900411, LR: 0.0003 +[2026-03-04 05:37:39] (step=0053121) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.393465075327725, LR: 0.0003 +[2026-03-04 05:37:47] (step=0053122) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 10.393660731755038, LR: 0.0003 +[2026-03-04 05:37:55] (step=0053123) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.393856388182352, LR: 0.0003 +[2026-03-04 05:38:03] (step=0053124) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.394052044609666, LR: 0.0003 +[2026-03-04 05:38:11] (step=0053125) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.39424770103698, LR: 0.0003 +[2026-03-04 05:38:19] (step=0053126) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.394443357464292, LR: 0.0003 +[2026-03-04 05:38:27] (step=0053127) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.394639013891606, LR: 0.0003 +[2026-03-04 05:38:35] (step=0053128) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.39483467031892, LR: 0.0003 +[2026-03-04 05:38:42] (step=0053129) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.395030326746234, LR: 0.0003 +[2026-03-04 05:38:50] (step=0053130) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 10.395225983173548, LR: 0.0003 +[2026-03-04 05:38:58] (step=0053131) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.39542163960086, LR: 0.0003 +[2026-03-04 05:39:06] (step=0053132) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 10.395617296028174, LR: 0.0003 +[2026-03-04 05:39:14] (step=0053133) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.395812952455488, LR: 0.0003 +[2026-03-04 05:39:22] (step=0053134) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 10.396008608882802, LR: 0.0003 +[2026-03-04 05:39:30] (step=0053135) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.396204265310116, LR: 0.0003 +[2026-03-04 05:39:38] (step=0053136) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 10.396399921737428, LR: 0.0003 +[2026-03-04 05:39:45] (step=0053137) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 10.396595578164742, LR: 0.0003 +[2026-03-04 05:39:53] (step=0053138) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.396791234592056, LR: 0.0003 +[2026-03-04 05:40:01] (step=0053139) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.39698689101937, LR: 0.0003 +[2026-03-04 05:40:09] (step=0053140) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.397182547446684, LR: 0.0003 +[2026-03-04 05:40:17] (step=0053141) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.397378203873997, LR: 0.0003 +[2026-03-04 05:40:25] (step=0053142) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.39757386030131, LR: 0.0003 +[2026-03-04 05:40:33] (step=0053143) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.397769516728625, LR: 0.0003 +[2026-03-04 05:40:40] (step=0053144) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.397965173155939, LR: 0.0003 +[2026-03-04 05:40:48] (step=0053145) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.398160829583253, LR: 0.0003 +[2026-03-04 05:40:56] (step=0053146) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.398356486010565, LR: 0.0003 +[2026-03-04 05:41:04] (step=0053147) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 10.398552142437879, LR: 0.0003 +[2026-03-04 05:41:12] (step=0053148) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.398747798865193, LR: 0.0003 +[2026-03-04 05:41:20] (step=0053149) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.398943455292507, LR: 0.0003 +[2026-03-04 05:41:28] (step=0053150) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.39913911171982, LR: 0.0003 +[2026-03-04 05:41:36] (step=0053151) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.399334768147133, LR: 0.0003 +[2026-03-04 05:41:43] (step=0053152) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.399530424574447, LR: 0.0003 +[2026-03-04 05:41:51] (step=0053153) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.399726081001761, LR: 0.0003 +[2026-03-04 05:41:59] (step=0053154) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.399921737429075, LR: 0.0003 +[2026-03-04 05:42:07] (step=0053155) Train Loss: 0.4437, Train Steps/Sec: 0.12, Epoch: 10.400117393856387, LR: 0.0003 +[2026-03-04 05:42:15] (step=0053156) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.400313050283701, LR: 0.0003 +[2026-03-04 05:42:23] (step=0053157) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 10.400508706711015, LR: 0.0003 +[2026-03-04 05:42:31] (step=0053158) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.40070436313833, LR: 0.0003 +[2026-03-04 05:42:39] (step=0053159) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.400900019565643, LR: 0.0003 +[2026-03-04 05:42:46] (step=0053160) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.401095675992956, LR: 0.0003 +[2026-03-04 05:42:54] (step=0053161) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 10.40129133242027, LR: 0.0003 +[2026-03-04 05:43:02] (step=0053162) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 10.401486988847584, LR: 0.0003 +[2026-03-04 05:43:10] (step=0053163) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.401682645274898, LR: 0.0003 +[2026-03-04 05:43:18] (step=0053164) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.401878301702212, LR: 0.0003 +[2026-03-04 05:43:26] (step=0053165) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.402073958129524, LR: 0.0003 +[2026-03-04 05:43:34] (step=0053166) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.402269614556838, LR: 0.0003 +[2026-03-04 05:43:41] (step=0053167) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 10.402465270984152, LR: 0.0003 +[2026-03-04 05:43:49] (step=0053168) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.402660927411466, LR: 0.0003 +[2026-03-04 05:43:57] (step=0053169) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.40285658383878, LR: 0.0003 +[2026-03-04 05:44:05] (step=0053170) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.403052240266092, LR: 0.0003 +[2026-03-04 05:44:13] (step=0053171) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.403247896693406, LR: 0.0003 +[2026-03-04 05:44:21] (step=0053172) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 10.40344355312072, LR: 0.0003 +[2026-03-04 05:44:29] (step=0053173) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 10.403639209548034, LR: 0.0003 +[2026-03-04 05:44:37] (step=0053174) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.403834865975348, LR: 0.0003 +[2026-03-04 05:44:44] (step=0053175) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.40403052240266, LR: 0.0003 +[2026-03-04 05:44:52] (step=0053176) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.404226178829974, LR: 0.0003 +[2026-03-04 05:45:00] (step=0053177) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.404421835257288, LR: 0.0003 +[2026-03-04 05:45:08] (step=0053178) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 10.404617491684602, LR: 0.0003 +[2026-03-04 05:45:16] (step=0053179) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.404813148111915, LR: 0.0003 +[2026-03-04 05:45:24] (step=0053180) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.405008804539229, LR: 0.0003 +[2026-03-04 05:45:32] (step=0053181) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 10.405204460966543, LR: 0.0003 +[2026-03-04 05:45:39] (step=0053182) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.405400117393857, LR: 0.0003 +[2026-03-04 05:45:47] (step=0053183) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 10.40559577382117, LR: 0.0003 +[2026-03-04 05:45:55] (step=0053184) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.405791430248483, LR: 0.0003 +[2026-03-04 05:46:03] (step=0053185) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 10.405987086675797, LR: 0.0003 +[2026-03-04 05:46:11] (step=0053186) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.40618274310311, LR: 0.0003 +[2026-03-04 05:46:19] (step=0053187) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 10.406378399530425, LR: 0.0003 +[2026-03-04 05:46:27] (step=0053188) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.406574055957739, LR: 0.0003 +[2026-03-04 05:46:35] (step=0053189) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.406769712385051, LR: 0.0003 +[2026-03-04 05:46:42] (step=0053190) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.406965368812365, LR: 0.0003 +[2026-03-04 05:46:50] (step=0053191) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 10.40716102523968, LR: 0.0003 +[2026-03-04 05:46:58] (step=0053192) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.407356681666993, LR: 0.0003 +[2026-03-04 05:47:06] (step=0053193) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.407552338094307, LR: 0.0003 +[2026-03-04 05:47:14] (step=0053194) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 10.40774799452162, LR: 0.0003 +[2026-03-04 05:47:22] (step=0053195) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.407943650948933, LR: 0.0003 +[2026-03-04 05:47:30] (step=0053196) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 10.408139307376247, LR: 0.0003 +[2026-03-04 05:47:37] (step=0053197) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 10.408334963803561, LR: 0.0003 +[2026-03-04 05:47:45] (step=0053198) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.408530620230875, LR: 0.0003 +[2026-03-04 05:47:53] (step=0053199) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.408726276658188, LR: 0.0003 +[2026-03-04 05:48:01] (step=0053200) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.408921933085502, LR: 0.0003 +[2026-03-04 05:48:09] (step=0053201) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.409117589512816, LR: 0.0003 +[2026-03-04 05:48:17] (step=0053202) Train Loss: 0.4569, Train Steps/Sec: 0.12, Epoch: 10.40931324594013, LR: 0.0003 +[2026-03-04 05:48:25] (step=0053203) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.409508902367444, LR: 0.0003 +[2026-03-04 05:48:33] (step=0053204) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.409704558794756, LR: 0.0003 +[2026-03-04 05:48:41] (step=0053205) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 10.40990021522207, LR: 0.0003 +[2026-03-04 05:48:48] (step=0053206) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.410095871649384, LR: 0.0003 +[2026-03-04 05:48:56] (step=0053207) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.410291528076698, LR: 0.0003 +[2026-03-04 05:49:04] (step=0053208) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 10.41048718450401, LR: 0.0003 +[2026-03-04 05:49:12] (step=0053209) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.410682840931324, LR: 0.0003 +[2026-03-04 05:49:20] (step=0053210) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.410878497358638, LR: 0.0003 +[2026-03-04 05:49:28] (step=0053211) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.411074153785952, LR: 0.0003 +[2026-03-04 05:49:36] (step=0053212) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.411269810213266, LR: 0.0003 +[2026-03-04 05:49:44] (step=0053213) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.411465466640578, LR: 0.0003 +[2026-03-04 05:49:51] (step=0053214) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.411661123067892, LR: 0.0003 +[2026-03-04 05:49:59] (step=0053215) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.411856779495206, LR: 0.0003 +[2026-03-04 05:50:07] (step=0053216) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.41205243592252, LR: 0.0003 +[2026-03-04 05:50:15] (step=0053217) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 10.412248092349834, LR: 0.0003 +[2026-03-04 05:50:23] (step=0053218) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.412443748777147, LR: 0.0003 +[2026-03-04 05:50:31] (step=0053219) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 10.41263940520446, LR: 0.0003 +[2026-03-04 05:50:39] (step=0053220) Train Loss: 0.4493, Train Steps/Sec: 0.12, Epoch: 10.412835061631775, LR: 0.0003 +[2026-03-04 05:50:47] (step=0053221) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.413030718059089, LR: 0.0003 +[2026-03-04 05:50:54] (step=0053222) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.413226374486403, LR: 0.0003 +[2026-03-04 05:51:02] (step=0053223) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 10.413422030913715, LR: 0.0003 +[2026-03-04 05:51:10] (step=0053224) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.413617687341029, LR: 0.0003 +[2026-03-04 05:51:18] (step=0053225) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 10.413813343768343, LR: 0.0003 +[2026-03-04 05:51:26] (step=0053226) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 10.414009000195657, LR: 0.0003 +[2026-03-04 05:51:34] (step=0053227) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.414204656622971, LR: 0.0003 +[2026-03-04 05:51:42] (step=0053228) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.414400313050283, LR: 0.0003 +[2026-03-04 05:51:50] (step=0053229) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.414595969477597, LR: 0.0003 +[2026-03-04 05:51:57] (step=0053230) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 10.414791625904911, LR: 0.0003 +[2026-03-04 05:52:05] (step=0053231) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.414987282332225, LR: 0.0003 +[2026-03-04 05:52:13] (step=0053232) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.415182938759537, LR: 0.0003 +[2026-03-04 05:52:21] (step=0053233) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.415378595186851, LR: 0.0003 +[2026-03-04 05:52:29] (step=0053234) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.415574251614165, LR: 0.0003 +[2026-03-04 05:52:37] (step=0053235) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.41576990804148, LR: 0.0003 +[2026-03-04 05:52:45] (step=0053236) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.415965564468793, LR: 0.0003 +[2026-03-04 05:52:52] (step=0053237) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.416161220896106, LR: 0.0003 +[2026-03-04 05:53:00] (step=0053238) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.41635687732342, LR: 0.0003 +[2026-03-04 05:53:08] (step=0053239) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.416552533750734, LR: 0.0003 +[2026-03-04 05:53:16] (step=0053240) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.416748190178048, LR: 0.0003 +[2026-03-04 05:53:24] (step=0053241) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.416943846605362, LR: 0.0003 +[2026-03-04 05:53:32] (step=0053242) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 10.417139503032674, LR: 0.0003 +[2026-03-04 05:53:40] (step=0053243) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 10.417335159459988, LR: 0.0003 +[2026-03-04 05:53:48] (step=0053244) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 10.417530815887302, LR: 0.0003 +[2026-03-04 05:53:55] (step=0053245) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.417726472314616, LR: 0.0003 +[2026-03-04 05:54:03] (step=0053246) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.41792212874193, LR: 0.0003 +[2026-03-04 05:54:11] (step=0053247) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.418117785169242, LR: 0.0003 +[2026-03-04 05:54:19] (step=0053248) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.418313441596556, LR: 0.0003 +[2026-03-04 05:54:27] (step=0053249) Train Loss: 0.4512, Train Steps/Sec: 0.12, Epoch: 10.41850909802387, LR: 0.0003 +[2026-03-04 05:54:35] (step=0053250) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.418704754451184, LR: 0.0003 +[2026-03-04 05:54:43] (step=0053251) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.418900410878498, LR: 0.0003 +[2026-03-04 05:54:51] (step=0053252) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.41909606730581, LR: 0.0003 +[2026-03-04 05:54:58] (step=0053253) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.419291723733124, LR: 0.0003 +[2026-03-04 05:55:06] (step=0053254) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 10.419487380160438, LR: 0.0003 +[2026-03-04 05:55:14] (step=0053255) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.419683036587752, LR: 0.0003 +[2026-03-04 05:55:22] (step=0053256) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.419878693015066, LR: 0.0003 +[2026-03-04 05:55:30] (step=0053257) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.420074349442379, LR: 0.0003 +[2026-03-04 05:55:38] (step=0053258) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.420270005869693, LR: 0.0003 +[2026-03-04 05:55:46] (step=0053259) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.420465662297007, LR: 0.0003 +[2026-03-04 05:55:54] (step=0053260) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 10.42066131872432, LR: 0.0003 +[2026-03-04 05:56:01] (step=0053261) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 10.420856975151633, LR: 0.0003 +[2026-03-04 05:56:09] (step=0053262) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.421052631578947, LR: 0.0003 +[2026-03-04 05:56:17] (step=0053263) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.421248288006261, LR: 0.0003 +[2026-03-04 05:56:25] (step=0053264) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 10.421443944433575, LR: 0.0003 +[2026-03-04 05:56:33] (step=0053265) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.421639600860889, LR: 0.0003 +[2026-03-04 05:56:41] (step=0053266) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.421835257288201, LR: 0.0003 +[2026-03-04 05:56:49] (step=0053267) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.422030913715515, LR: 0.0003 +[2026-03-04 05:56:56] (step=0053268) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.42222657014283, LR: 0.0003 +[2026-03-04 05:57:05] (step=0053269) Train Loss: 0.4426, Train Steps/Sec: 0.12, Epoch: 10.422422226570143, LR: 0.0003 +[2026-03-04 05:57:12] (step=0053270) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.422617882997457, LR: 0.0003 +[2026-03-04 05:57:20] (step=0053271) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 10.42281353942477, LR: 0.0003 +[2026-03-04 05:57:28] (step=0053272) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 10.423009195852083, LR: 0.0003 +[2026-03-04 05:57:36] (step=0053273) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.423204852279397, LR: 0.0003 +[2026-03-04 05:57:44] (step=0053274) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.423400508706711, LR: 0.0003 +[2026-03-04 05:57:52] (step=0053275) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.423596165134025, LR: 0.0003 +[2026-03-04 05:58:00] (step=0053276) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 10.423791821561338, LR: 0.0003 +[2026-03-04 05:58:07] (step=0053277) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 10.423987477988652, LR: 0.0003 +[2026-03-04 05:58:15] (step=0053278) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.424183134415966, LR: 0.0003 +[2026-03-04 05:58:23] (step=0053279) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 10.42437879084328, LR: 0.0003 +[2026-03-04 05:58:31] (step=0053280) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 10.424574447270594, LR: 0.0003 +[2026-03-04 05:58:39] (step=0053281) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.424770103697906, LR: 0.0003 +[2026-03-04 05:58:47] (step=0053282) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.42496576012522, LR: 0.0003 +[2026-03-04 05:58:55] (step=0053283) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.425161416552534, LR: 0.0003 +[2026-03-04 05:59:02] (step=0053284) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.425357072979848, LR: 0.0003 +[2026-03-04 05:59:10] (step=0053285) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.42555272940716, LR: 0.0003 +[2026-03-04 05:59:18] (step=0053286) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 10.425748385834474, LR: 0.0003 +[2026-03-04 05:59:26] (step=0053287) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.425944042261788, LR: 0.0003 +[2026-03-04 05:59:34] (step=0053288) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.426139698689102, LR: 0.0003 +[2026-03-04 05:59:42] (step=0053289) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.426335355116416, LR: 0.0003 +[2026-03-04 05:59:50] (step=0053290) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.426531011543728, LR: 0.0003 +[2026-03-04 05:59:57] (step=0053291) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.426726667971042, LR: 0.0003 +[2026-03-04 06:00:05] (step=0053292) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 10.426922324398356, LR: 0.0003 +[2026-03-04 06:00:13] (step=0053293) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 10.42711798082567, LR: 0.0003 +[2026-03-04 06:00:21] (step=0053294) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.427313637252984, LR: 0.0003 +[2026-03-04 06:00:29] (step=0053295) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.427509293680297, LR: 0.0003 +[2026-03-04 06:00:37] (step=0053296) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.42770495010761, LR: 0.0003 +[2026-03-04 06:00:45] (step=0053297) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.427900606534925, LR: 0.0003 +[2026-03-04 06:00:52] (step=0053298) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.428096262962239, LR: 0.0003 +[2026-03-04 06:01:00] (step=0053299) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.428291919389553, LR: 0.0003 +[2026-03-04 06:01:08] (step=0053300) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 10.428487575816865, LR: 0.0003 +[2026-03-04 06:01:16] (step=0053301) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.428683232244179, LR: 0.0003 +[2026-03-04 06:01:24] (step=0053302) Train Loss: 0.4541, Train Steps/Sec: 0.12, Epoch: 10.428878888671493, LR: 0.0003 +[2026-03-04 06:01:32] (step=0053303) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 10.429074545098807, LR: 0.0003 +[2026-03-04 06:01:40] (step=0053304) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.429270201526121, LR: 0.0003 +[2026-03-04 06:01:48] (step=0053305) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.429465857953433, LR: 0.0003 +[2026-03-04 06:01:55] (step=0053306) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.429661514380747, LR: 0.0003 +[2026-03-04 06:02:03] (step=0053307) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.429857170808061, LR: 0.0003 +[2026-03-04 06:02:11] (step=0053308) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.430052827235375, LR: 0.0003 +[2026-03-04 06:02:19] (step=0053309) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 10.43024848366269, LR: 0.0003 +[2026-03-04 06:02:27] (step=0053310) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 10.430444140090001, LR: 0.0003 +[2026-03-04 06:02:35] (step=0053311) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 10.430639796517315, LR: 0.0003 +[2026-03-04 06:02:43] (step=0053312) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.43083545294463, LR: 0.0003 +[2026-03-04 06:02:50] (step=0053313) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.431031109371943, LR: 0.0003 +[2026-03-04 06:02:58] (step=0053314) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.431226765799256, LR: 0.0003 +[2026-03-04 06:03:06] (step=0053315) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.43142242222657, LR: 0.0003 +[2026-03-04 06:03:14] (step=0053316) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 10.431618078653884, LR: 0.0003 +[2026-03-04 06:03:22] (step=0053317) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 10.431813735081198, LR: 0.0003 +[2026-03-04 06:03:30] (step=0053318) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 10.432009391508512, LR: 0.0003 +[2026-03-04 06:03:38] (step=0053319) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.432205047935824, LR: 0.0003 +[2026-03-04 06:03:46] (step=0053320) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.432400704363138, LR: 0.0003 +[2026-03-04 06:03:53] (step=0053321) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.432596360790452, LR: 0.0003 +[2026-03-04 06:04:01] (step=0053322) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 10.432792017217766, LR: 0.0003 +[2026-03-04 06:04:09] (step=0053323) Train Loss: 0.4428, Train Steps/Sec: 0.12, Epoch: 10.43298767364508, LR: 0.0003 +[2026-03-04 06:04:17] (step=0053324) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.433183330072392, LR: 0.0003 +[2026-03-04 06:04:25] (step=0053325) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 10.433378986499706, LR: 0.0003 +[2026-03-04 06:04:33] (step=0053326) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.43357464292702, LR: 0.0003 +[2026-03-04 06:04:41] (step=0053327) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.433770299354334, LR: 0.0003 +[2026-03-04 06:04:49] (step=0053328) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.433965955781648, LR: 0.0003 +[2026-03-04 06:04:56] (step=0053329) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.43416161220896, LR: 0.0003 +[2026-03-04 06:05:04] (step=0053330) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 10.434357268636274, LR: 0.0003 +[2026-03-04 06:05:12] (step=0053331) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.434552925063588, LR: 0.0003 +[2026-03-04 06:05:20] (step=0053332) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.434748581490902, LR: 0.0003 +[2026-03-04 06:05:28] (step=0053333) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 10.434944237918216, LR: 0.0003 +[2026-03-04 06:05:36] (step=0053334) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 10.435139894345529, LR: 0.0003 +[2026-03-04 06:05:44] (step=0053335) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.435335550772843, LR: 0.0003 +[2026-03-04 06:05:51] (step=0053336) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.435531207200157, LR: 0.0003 +[2026-03-04 06:05:59] (step=0053337) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.43572686362747, LR: 0.0003 +[2026-03-04 06:06:07] (step=0053338) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.435922520054783, LR: 0.0003 +[2026-03-04 06:06:15] (step=0053339) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.436118176482097, LR: 0.0003 +[2026-03-04 06:06:23] (step=0053340) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.436313832909411, LR: 0.0003 +[2026-03-04 06:06:31] (step=0053341) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.436509489336725, LR: 0.0003 +[2026-03-04 06:06:39] (step=0053342) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.436705145764039, LR: 0.0003 +[2026-03-04 06:06:47] (step=0053343) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.436900802191351, LR: 0.0003 +[2026-03-04 06:06:54] (step=0053344) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 10.437096458618665, LR: 0.0003 +[2026-03-04 06:07:02] (step=0053345) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.43729211504598, LR: 0.0003 +[2026-03-04 06:07:10] (step=0053346) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.437487771473293, LR: 0.0003 +[2026-03-04 06:07:18] (step=0053347) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.437683427900607, LR: 0.0003 +[2026-03-04 06:07:26] (step=0053348) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.43787908432792, LR: 0.0003 +[2026-03-04 06:07:34] (step=0053349) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.438074740755233, LR: 0.0003 +[2026-03-04 06:07:42] (step=0053350) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.438270397182547, LR: 0.0003 +[2026-03-04 06:07:49] (step=0053351) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.438466053609861, LR: 0.0003 +[2026-03-04 06:07:57] (step=0053352) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 10.438661710037175, LR: 0.0003 +[2026-03-04 06:08:05] (step=0053353) Train Loss: 0.4539, Train Steps/Sec: 0.12, Epoch: 10.438857366464488, LR: 0.0003 +[2026-03-04 06:08:13] (step=0053354) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.439053022891802, LR: 0.0003 +[2026-03-04 06:08:21] (step=0053355) Train Loss: 0.4276, Train Steps/Sec: 0.13, Epoch: 10.439248679319116, LR: 0.0003 +[2026-03-04 06:08:29] (step=0053356) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.43944433574643, LR: 0.0003 +[2026-03-04 06:08:37] (step=0053357) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 10.439639992173744, LR: 0.0003 +[2026-03-04 06:08:45] (step=0053358) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.439835648601056, LR: 0.0003 +[2026-03-04 06:08:52] (step=0053359) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.44003130502837, LR: 0.0003 +[2026-03-04 06:09:00] (step=0053360) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.440226961455684, LR: 0.0003 +[2026-03-04 06:09:08] (step=0053361) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.440422617882998, LR: 0.0003 +[2026-03-04 06:09:16] (step=0053362) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.440618274310312, LR: 0.0003 +[2026-03-04 06:09:24] (step=0053363) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.440813930737624, LR: 0.0003 +[2026-03-04 06:09:32] (step=0053364) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.441009587164938, LR: 0.0003 +[2026-03-04 06:09:40] (step=0053365) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.441205243592252, LR: 0.0003 +[2026-03-04 06:09:47] (step=0053366) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.441400900019566, LR: 0.0003 +[2026-03-04 06:09:55] (step=0053367) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.441596556446878, LR: 0.0003 +[2026-03-04 06:10:03] (step=0053368) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.441792212874192, LR: 0.0003 +[2026-03-04 06:10:11] (step=0053369) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.441987869301506, LR: 0.0003 +[2026-03-04 06:10:19] (step=0053370) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 10.44218352572882, LR: 0.0003 +[2026-03-04 06:10:27] (step=0053371) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.442379182156134, LR: 0.0003 +[2026-03-04 06:10:35] (step=0053372) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.442574838583447, LR: 0.0003 +[2026-03-04 06:10:43] (step=0053373) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.44277049501076, LR: 0.0003 +[2026-03-04 06:10:50] (step=0053374) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.442966151438075, LR: 0.0003 +[2026-03-04 06:10:58] (step=0053375) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.443161807865389, LR: 0.0003 +[2026-03-04 06:11:06] (step=0053376) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.443357464292703, LR: 0.0003 +[2026-03-04 06:11:14] (step=0053377) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.443553120720015, LR: 0.0003 +[2026-03-04 06:11:22] (step=0053378) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.443748777147329, LR: 0.0003 +[2026-03-04 06:11:30] (step=0053379) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.443944433574643, LR: 0.0003 +[2026-03-04 06:11:38] (step=0053380) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.444140090001957, LR: 0.0003 +[2026-03-04 06:11:45] (step=0053381) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.444335746429271, LR: 0.0003 +[2026-03-04 06:11:53] (step=0053382) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.444531402856583, LR: 0.0003 +[2026-03-04 06:12:01] (step=0053383) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.444727059283897, LR: 0.0003 +[2026-03-04 06:12:09] (step=0053384) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.444922715711211, LR: 0.0003 +[2026-03-04 06:12:17] (step=0053385) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.445118372138525, LR: 0.0003 +[2026-03-04 06:12:25] (step=0053386) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.44531402856584, LR: 0.0003 +[2026-03-04 06:12:33] (step=0053387) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.445509684993151, LR: 0.0003 +[2026-03-04 06:12:40] (step=0053388) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 10.445705341420465, LR: 0.0003 +[2026-03-04 06:12:48] (step=0053389) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.44590099784778, LR: 0.0003 +[2026-03-04 06:12:56] (step=0053390) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 10.446096654275093, LR: 0.0003 +[2026-03-04 06:13:04] (step=0053391) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.446292310702406, LR: 0.0003 +[2026-03-04 06:13:12] (step=0053392) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.44648796712972, LR: 0.0003 +[2026-03-04 06:13:20] (step=0053393) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.446683623557034, LR: 0.0003 +[2026-03-04 06:13:28] (step=0053394) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.446879279984348, LR: 0.0003 +[2026-03-04 06:13:35] (step=0053395) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.447074936411662, LR: 0.0003 +[2026-03-04 06:13:43] (step=0053396) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 10.447270592838974, LR: 0.0003 +[2026-03-04 06:13:51] (step=0053397) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.447466249266288, LR: 0.0003 +[2026-03-04 06:13:59] (step=0053398) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.447661905693602, LR: 0.0003 +[2026-03-04 06:14:07] (step=0053399) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.447857562120916, LR: 0.0003 +[2026-03-04 06:14:15] (step=0053400) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 10.44805321854823, LR: 0.0003 +[2026-03-04 06:14:23] (step=0053401) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.448248874975542, LR: 0.0003 +[2026-03-04 06:14:31] (step=0053402) Train Loss: 0.4446, Train Steps/Sec: 0.12, Epoch: 10.448444531402856, LR: 0.0003 +[2026-03-04 06:14:38] (step=0053403) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.44864018783017, LR: 0.0003 +[2026-03-04 06:14:46] (step=0053404) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.448835844257484, LR: 0.0003 +[2026-03-04 06:14:54] (step=0053405) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.449031500684798, LR: 0.0003 +[2026-03-04 06:15:02] (step=0053406) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.44922715711211, LR: 0.0003 +[2026-03-04 06:15:10] (step=0053407) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.449422813539424, LR: 0.0003 +[2026-03-04 06:15:18] (step=0053408) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 10.449618469966738, LR: 0.0003 +[2026-03-04 06:15:26] (step=0053409) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.449814126394052, LR: 0.0003 +[2026-03-04 06:15:33] (step=0053410) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.450009782821366, LR: 0.0003 +[2026-03-04 06:15:41] (step=0053411) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.450205439248679, LR: 0.0003 +[2026-03-04 06:15:49] (step=0053412) Train Loss: 0.4209, Train Steps/Sec: 0.13, Epoch: 10.450401095675993, LR: 0.0003 +[2026-03-04 06:15:57] (step=0053413) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.450596752103307, LR: 0.0003 +[2026-03-04 06:16:05] (step=0053414) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.45079240853062, LR: 0.0003 +[2026-03-04 06:16:13] (step=0053415) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.450988064957935, LR: 0.0003 +[2026-03-04 06:16:21] (step=0053416) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.451183721385247, LR: 0.0003 +[2026-03-04 06:16:29] (step=0053417) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.451379377812561, LR: 0.0003 +[2026-03-04 06:16:36] (step=0053418) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 10.451575034239875, LR: 0.0003 +[2026-03-04 06:16:44] (step=0053419) Train Loss: 0.4466, Train Steps/Sec: 0.12, Epoch: 10.451770690667189, LR: 0.0003 +[2026-03-04 06:16:52] (step=0053420) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.451966347094501, LR: 0.0003 +[2026-03-04 06:17:00] (step=0053421) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.452162003521815, LR: 0.0003 +[2026-03-04 06:17:08] (step=0053422) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.45235765994913, LR: 0.0003 +[2026-03-04 06:17:16] (step=0053423) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 10.452553316376443, LR: 0.0003 +[2026-03-04 06:17:24] (step=0053424) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.452748972803757, LR: 0.0003 +[2026-03-04 06:17:32] (step=0053425) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.45294462923107, LR: 0.0003 +[2026-03-04 06:17:39] (step=0053426) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 10.453140285658383, LR: 0.0003 +[2026-03-04 06:17:47] (step=0053427) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 10.453335942085697, LR: 0.0003 +[2026-03-04 06:17:55] (step=0053428) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.453531598513012, LR: 0.0003 +[2026-03-04 06:18:03] (step=0053429) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.453727254940326, LR: 0.0003 +[2026-03-04 06:18:11] (step=0053430) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.453922911367638, LR: 0.0003 +[2026-03-04 06:18:19] (step=0053431) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.454118567794952, LR: 0.0003 +[2026-03-04 06:18:27] (step=0053432) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.454314224222266, LR: 0.0003 +[2026-03-04 06:18:34] (step=0053433) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.45450988064958, LR: 0.0003 +[2026-03-04 06:18:42] (step=0053434) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.454705537076894, LR: 0.0003 +[2026-03-04 06:18:50] (step=0053435) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.454901193504206, LR: 0.0003 +[2026-03-04 06:18:58] (step=0053436) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.45509684993152, LR: 0.0003 +[2026-03-04 06:19:06] (step=0053437) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 10.455292506358834, LR: 0.0003 +[2026-03-04 06:19:14] (step=0053438) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.455488162786148, LR: 0.0003 +[2026-03-04 06:19:22] (step=0053439) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.455683819213462, LR: 0.0003 +[2026-03-04 06:19:29] (step=0053440) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 10.455879475640774, LR: 0.0003 +[2026-03-04 06:19:37] (step=0053441) Train Loss: 0.4251, Train Steps/Sec: 0.13, Epoch: 10.456075132068088, LR: 0.0003 +[2026-03-04 06:19:45] (step=0053442) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.456270788495402, LR: 0.0003 +[2026-03-04 06:19:53] (step=0053443) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 10.456466444922716, LR: 0.0003 +[2026-03-04 06:20:01] (step=0053444) Train Loss: 0.4252, Train Steps/Sec: 0.13, Epoch: 10.456662101350028, LR: 0.0003 +[2026-03-04 06:20:09] (step=0053445) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.456857757777342, LR: 0.0003 +[2026-03-04 06:20:17] (step=0053446) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.457053414204657, LR: 0.0003 +[2026-03-04 06:20:25] (step=0053447) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.45724907063197, LR: 0.0003 +[2026-03-04 06:20:32] (step=0053448) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.457444727059285, LR: 0.0003 +[2026-03-04 06:20:40] (step=0053449) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 10.457640383486597, LR: 0.0003 +[2026-03-04 06:20:48] (step=0053450) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.45783603991391, LR: 0.0003 +[2026-03-04 06:20:56] (step=0053451) Train Loss: 0.4357, Train Steps/Sec: 0.12, Epoch: 10.458031696341225, LR: 0.0003 +[2026-03-04 06:21:04] (step=0053452) Train Loss: 0.4266, Train Steps/Sec: 0.13, Epoch: 10.458227352768539, LR: 0.0003 +[2026-03-04 06:21:12] (step=0053453) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.458423009195853, LR: 0.0003 +[2026-03-04 06:21:20] (step=0053454) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.458618665623165, LR: 0.0003 +[2026-03-04 06:21:28] (step=0053455) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.458814322050479, LR: 0.0003 +[2026-03-04 06:21:35] (step=0053456) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.459009978477793, LR: 0.0003 +[2026-03-04 06:21:43] (step=0053457) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.459205634905107, LR: 0.0003 +[2026-03-04 06:21:51] (step=0053458) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.459401291332421, LR: 0.0003 +[2026-03-04 06:21:59] (step=0053459) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.459596947759733, LR: 0.0003 +[2026-03-04 06:22:07] (step=0053460) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.459792604187047, LR: 0.0003 +[2026-03-04 06:22:15] (step=0053461) Train Loss: 0.4278, Train Steps/Sec: 0.13, Epoch: 10.459988260614361, LR: 0.0003 +[2026-03-04 06:22:23] (step=0053462) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.460183917041675, LR: 0.0003 +[2026-03-04 06:22:31] (step=0053463) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 10.46037957346899, LR: 0.0003 +[2026-03-04 06:22:38] (step=0053464) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 10.460575229896302, LR: 0.0003 +[2026-03-04 06:22:46] (step=0053465) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.460770886323616, LR: 0.0003 +[2026-03-04 06:22:54] (step=0053466) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.46096654275093, LR: 0.0003 +[2026-03-04 06:23:02] (step=0053467) Train Loss: 0.4329, Train Steps/Sec: 0.12, Epoch: 10.461162199178244, LR: 0.0003 +[2026-03-04 06:23:10] (step=0053468) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.461357855605556, LR: 0.0003 +[2026-03-04 06:23:18] (step=0053469) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.46155351203287, LR: 0.0003 +[2026-03-04 06:23:26] (step=0053470) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.461749168460184, LR: 0.0003 +[2026-03-04 06:23:34] (step=0053471) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 10.461944824887498, LR: 0.0003 +[2026-03-04 06:23:41] (step=0053472) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.462140481314812, LR: 0.0003 +[2026-03-04 06:23:49] (step=0053473) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.462336137742124, LR: 0.0003 +[2026-03-04 06:23:57] (step=0053474) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.462531794169438, LR: 0.0003 +[2026-03-04 06:24:05] (step=0053475) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.462727450596752, LR: 0.0003 +[2026-03-04 06:24:13] (step=0053476) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.462923107024066, LR: 0.0003 +[2026-03-04 06:24:21] (step=0053477) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.46311876345138, LR: 0.0003 +[2026-03-04 06:24:29] (step=0053478) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.463314419878692, LR: 0.0003 +[2026-03-04 06:24:36] (step=0053479) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.463510076306006, LR: 0.0003 +[2026-03-04 06:24:44] (step=0053480) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.46370573273332, LR: 0.0003 +[2026-03-04 06:24:52] (step=0053481) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.463901389160634, LR: 0.0003 +[2026-03-04 06:25:00] (step=0053482) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 10.464097045587948, LR: 0.0003 +[2026-03-04 06:25:08] (step=0053483) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.46429270201526, LR: 0.0003 +[2026-03-04 06:25:16] (step=0053484) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 10.464488358442575, LR: 0.0003 +[2026-03-04 06:25:24] (step=0053485) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 10.464684014869889, LR: 0.0003 +[2026-03-04 06:25:31] (step=0053486) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.464879671297203, LR: 0.0003 +[2026-03-04 06:25:39] (step=0053487) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.465075327724517, LR: 0.0003 +[2026-03-04 06:25:47] (step=0053488) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 10.465270984151829, LR: 0.0003 +[2026-03-04 06:25:55] (step=0053489) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.465466640579143, LR: 0.0003 +[2026-03-04 06:26:03] (step=0053490) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.465662297006457, LR: 0.0003 +[2026-03-04 06:26:11] (step=0053491) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.46585795343377, LR: 0.0003 +[2026-03-04 06:26:19] (step=0053492) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 10.466053609861085, LR: 0.0003 +[2026-03-04 06:26:27] (step=0053493) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.466249266288397, LR: 0.0003 +[2026-03-04 06:26:34] (step=0053494) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.466444922715711, LR: 0.0003 +[2026-03-04 06:26:42] (step=0053495) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.466640579143025, LR: 0.0003 +[2026-03-04 06:26:50] (step=0053496) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 10.466836235570339, LR: 0.0003 +[2026-03-04 06:26:58] (step=0053497) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.467031891997651, LR: 0.0003 +[2026-03-04 06:27:06] (step=0053498) Train Loss: 0.4497, Train Steps/Sec: 0.12, Epoch: 10.467227548424965, LR: 0.0003 +[2026-03-04 06:27:14] (step=0053499) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.46742320485228, LR: 0.0003 +[2026-03-04 06:27:22] (step=0053500) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.467618861279593, LR: 0.0003 +[2026-03-04 06:27:22] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0053500/ +[2026-03-04 06:27:30] (step=0053501) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.467814517706907, LR: 0.0003 +[2026-03-04 06:27:37] (step=0053502) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.46801017413422, LR: 0.0003 +[2026-03-04 06:27:45] (step=0053503) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 10.468205830561534, LR: 0.0003 +[2026-03-04 06:27:53] (step=0053504) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.468401486988848, LR: 0.0003 +[2026-03-04 06:28:01] (step=0053505) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.468597143416162, LR: 0.0003 +[2026-03-04 06:28:09] (step=0053506) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.468792799843476, LR: 0.0003 +[2026-03-04 06:28:17] (step=0053507) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.468988456270788, LR: 0.0003 +[2026-03-04 06:28:25] (step=0053508) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.469184112698102, LR: 0.0003 +[2026-03-04 06:28:33] (step=0053509) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.469379769125416, LR: 0.0003 +[2026-03-04 06:28:40] (step=0053510) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.46957542555273, LR: 0.0003 +[2026-03-04 06:28:48] (step=0053511) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.469771081980044, LR: 0.0003 +[2026-03-04 06:28:56] (step=0053512) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.469966738407356, LR: 0.0003 +[2026-03-04 06:29:04] (step=0053513) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.47016239483467, LR: 0.0003 +[2026-03-04 06:29:12] (step=0053514) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.470358051261984, LR: 0.0003 +[2026-03-04 06:29:20] (step=0053515) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 10.470553707689298, LR: 0.0003 +[2026-03-04 06:29:28] (step=0053516) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.470749364116612, LR: 0.0003 +[2026-03-04 06:29:36] (step=0053517) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 10.470945020543924, LR: 0.0003 +[2026-03-04 06:29:43] (step=0053518) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.471140676971238, LR: 0.0003 +[2026-03-04 06:29:51] (step=0053519) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 10.471336333398552, LR: 0.0003 +[2026-03-04 06:29:59] (step=0053520) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 10.471531989825866, LR: 0.0003 +[2026-03-04 06:30:07] (step=0053521) Train Loss: 0.4259, Train Steps/Sec: 0.13, Epoch: 10.471727646253179, LR: 0.0003 +[2026-03-04 06:30:15] (step=0053522) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.471923302680493, LR: 0.0003 +[2026-03-04 06:30:23] (step=0053523) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.472118959107807, LR: 0.0003 +[2026-03-04 06:30:31] (step=0053524) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.47231461553512, LR: 0.0003 +[2026-03-04 06:30:38] (step=0053525) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 10.472510271962435, LR: 0.0003 +[2026-03-04 06:30:46] (step=0053526) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 10.472705928389747, LR: 0.0003 +[2026-03-04 06:30:54] (step=0053527) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.47290158481706, LR: 0.0003 +[2026-03-04 06:31:02] (step=0053528) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.473097241244375, LR: 0.0003 +[2026-03-04 06:31:10] (step=0053529) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.473292897671689, LR: 0.0003 +[2026-03-04 06:31:18] (step=0053530) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.473488554099003, LR: 0.0003 +[2026-03-04 06:31:26] (step=0053531) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.473684210526315, LR: 0.0003 +[2026-03-04 06:31:33] (step=0053532) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.473879866953629, LR: 0.0003 +[2026-03-04 06:31:41] (step=0053533) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.474075523380943, LR: 0.0003 +[2026-03-04 06:31:49] (step=0053534) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.474271179808257, LR: 0.0003 +[2026-03-04 06:31:57] (step=0053535) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.474466836235571, LR: 0.0003 +[2026-03-04 06:32:05] (step=0053536) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.474662492662883, LR: 0.0003 +[2026-03-04 06:32:13] (step=0053537) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.474858149090197, LR: 0.0003 +[2026-03-04 06:32:21] (step=0053538) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.475053805517511, LR: 0.0003 +[2026-03-04 06:32:28] (step=0053539) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.475249461944825, LR: 0.0003 +[2026-03-04 06:32:36] (step=0053540) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 10.47544511837214, LR: 0.0003 +[2026-03-04 06:32:44] (step=0053541) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.475640774799452, LR: 0.0003 +[2026-03-04 06:32:52] (step=0053542) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.475836431226766, LR: 0.0003 +[2026-03-04 06:33:00] (step=0053543) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.47603208765408, LR: 0.0003 +[2026-03-04 06:33:08] (step=0053544) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.476227744081394, LR: 0.0003 +[2026-03-04 06:33:16] (step=0053545) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.476423400508708, LR: 0.0003 +[2026-03-04 06:33:24] (step=0053546) Train Loss: 0.4678, Train Steps/Sec: 0.13, Epoch: 10.47661905693602, LR: 0.0003 +[2026-03-04 06:33:31] (step=0053547) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.476814713363334, LR: 0.0003 +[2026-03-04 06:33:39] (step=0053548) Train Loss: 0.4411, Train Steps/Sec: 0.12, Epoch: 10.477010369790648, LR: 0.0003 +[2026-03-04 06:33:47] (step=0053549) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.477206026217962, LR: 0.0003 +[2026-03-04 06:33:55] (step=0053550) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.477401682645274, LR: 0.0003 +[2026-03-04 06:34:03] (step=0053551) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 10.477597339072588, LR: 0.0003 +[2026-03-04 06:34:11] (step=0053552) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 10.477792995499902, LR: 0.0003 +[2026-03-04 06:34:19] (step=0053553) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.477988651927216, LR: 0.0003 +[2026-03-04 06:34:27] (step=0053554) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 10.47818430835453, LR: 0.0003 +[2026-03-04 06:34:34] (step=0053555) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.478379964781842, LR: 0.0003 +[2026-03-04 06:34:42] (step=0053556) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.478575621209156, LR: 0.0003 +[2026-03-04 06:34:50] (step=0053557) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.47877127763647, LR: 0.0003 +[2026-03-04 06:34:58] (step=0053558) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.478966934063784, LR: 0.0003 +[2026-03-04 06:35:06] (step=0053559) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.479162590491098, LR: 0.0003 +[2026-03-04 06:35:14] (step=0053560) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.47935824691841, LR: 0.0003 +[2026-03-04 06:35:22] (step=0053561) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 10.479553903345725, LR: 0.0003 +[2026-03-04 06:35:29] (step=0053562) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.479749559773039, LR: 0.0003 +[2026-03-04 06:35:37] (step=0053563) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.479945216200353, LR: 0.0003 +[2026-03-04 06:35:45] (step=0053564) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.480140872627667, LR: 0.0003 +[2026-03-04 06:35:53] (step=0053565) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.480336529054979, LR: 0.0003 +[2026-03-04 06:36:01] (step=0053566) Train Loss: 0.4545, Train Steps/Sec: 0.12, Epoch: 10.480532185482293, LR: 0.0003 +[2026-03-04 06:36:09] (step=0053567) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 10.480727841909607, LR: 0.0003 +[2026-03-04 06:36:17] (step=0053568) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 10.48092349833692, LR: 0.0003 +[2026-03-04 06:36:25] (step=0053569) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.481119154764235, LR: 0.0003 +[2026-03-04 06:36:32] (step=0053570) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.481314811191547, LR: 0.0003 +[2026-03-04 06:36:40] (step=0053571) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.481510467618861, LR: 0.0003 +[2026-03-04 06:36:48] (step=0053572) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.481706124046175, LR: 0.0003 +[2026-03-04 06:36:56] (step=0053573) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.481901780473489, LR: 0.0003 +[2026-03-04 06:37:04] (step=0053574) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 10.482097436900801, LR: 0.0003 +[2026-03-04 06:37:12] (step=0053575) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.482293093328115, LR: 0.0003 +[2026-03-04 06:37:20] (step=0053576) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.48248874975543, LR: 0.0003 +[2026-03-04 06:37:28] (step=0053577) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 10.482684406182743, LR: 0.0003 +[2026-03-04 06:37:35] (step=0053578) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.482880062610057, LR: 0.0003 +[2026-03-04 06:37:43] (step=0053579) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 10.48307571903737, LR: 0.0003 +[2026-03-04 06:37:51] (step=0053580) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 10.483271375464684, LR: 0.0003 +[2026-03-04 06:37:59] (step=0053581) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.483467031891998, LR: 0.0003 +[2026-03-04 06:38:07] (step=0053582) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.483662688319312, LR: 0.0003 +[2026-03-04 06:38:15] (step=0053583) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.483858344746626, LR: 0.0003 +[2026-03-04 06:38:23] (step=0053584) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.484054001173938, LR: 0.0003 +[2026-03-04 06:38:30] (step=0053585) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.484249657601252, LR: 0.0003 +[2026-03-04 06:38:38] (step=0053586) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.484445314028566, LR: 0.0003 +[2026-03-04 06:38:46] (step=0053587) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.48464097045588, LR: 0.0003 +[2026-03-04 06:38:54] (step=0053588) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.484836626883194, LR: 0.0003 +[2026-03-04 06:39:02] (step=0053589) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 10.485032283310506, LR: 0.0003 +[2026-03-04 06:39:10] (step=0053590) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 10.48522793973782, LR: 0.0003 +[2026-03-04 06:39:17] (step=0053591) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.485423596165134, LR: 0.0003 +[2026-03-04 06:39:25] (step=0053592) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.485619252592448, LR: 0.0003 +[2026-03-04 06:39:33] (step=0053593) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 10.485814909019762, LR: 0.0003 +[2026-03-04 06:39:41] (step=0053594) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.486010565447074, LR: 0.0003 +[2026-03-04 06:39:49] (step=0053595) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.486206221874388, LR: 0.0003 +[2026-03-04 06:39:57] (step=0053596) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.486401878301702, LR: 0.0003 +[2026-03-04 06:40:05] (step=0053597) Train Loss: 0.4378, Train Steps/Sec: 0.12, Epoch: 10.486597534729016, LR: 0.0003 +[2026-03-04 06:40:13] (step=0053598) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.48679319115633, LR: 0.0003 +[2026-03-04 06:40:21] (step=0053599) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.486988847583643, LR: 0.0003 +[2026-03-04 06:40:28] (step=0053600) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 10.487184504010957, LR: 0.0003 +[2026-03-04 06:40:36] (step=0053601) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.48738016043827, LR: 0.0003 +[2026-03-04 06:40:44] (step=0053602) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 10.487575816865585, LR: 0.0003 +[2026-03-04 06:40:52] (step=0053603) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.487771473292897, LR: 0.0003 +[2026-03-04 06:41:00] (step=0053604) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.48796712972021, LR: 0.0003 +[2026-03-04 06:41:08] (step=0053605) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.488162786147525, LR: 0.0003 +[2026-03-04 06:41:16] (step=0053606) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 10.488358442574839, LR: 0.0003 +[2026-03-04 06:41:24] (step=0053607) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 10.488554099002153, LR: 0.0003 +[2026-03-04 06:41:31] (step=0053608) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.488749755429465, LR: 0.0003 +[2026-03-04 06:41:39] (step=0053609) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 10.488945411856779, LR: 0.0003 +[2026-03-04 06:41:47] (step=0053610) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 10.489141068284093, LR: 0.0003 +[2026-03-04 06:41:55] (step=0053611) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.489336724711407, LR: 0.0003 +[2026-03-04 06:42:03] (step=0053612) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.489532381138721, LR: 0.0003 +[2026-03-04 06:42:11] (step=0053613) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.489728037566033, LR: 0.0003 +[2026-03-04 06:42:19] (step=0053614) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 10.489923693993347, LR: 0.0003 +[2026-03-04 06:42:27] (step=0053615) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.490119350420661, LR: 0.0003 +[2026-03-04 06:42:35] (step=0053616) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 10.490315006847975, LR: 0.0003 +[2026-03-04 06:42:42] (step=0053617) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 10.49051066327529, LR: 0.0003 +[2026-03-04 06:42:50] (step=0053618) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 10.490706319702602, LR: 0.0003 +[2026-03-04 06:42:58] (step=0053619) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.490901976129916, LR: 0.0003 +[2026-03-04 06:43:06] (step=0053620) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.49109763255723, LR: 0.0003 +[2026-03-04 06:43:14] (step=0053621) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.491293288984544, LR: 0.0003 +[2026-03-04 06:43:22] (step=0053622) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.491488945411858, LR: 0.0003 +[2026-03-04 06:43:30] (step=0053623) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 10.49168460183917, LR: 0.0003 +[2026-03-04 06:43:37] (step=0053624) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.491880258266484, LR: 0.0003 +[2026-03-04 06:43:45] (step=0053625) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 10.492075914693798, LR: 0.0003 +[2026-03-04 06:43:53] (step=0053626) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.492271571121112, LR: 0.0003 +[2026-03-04 06:44:01] (step=0053627) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.492467227548424, LR: 0.0003 +[2026-03-04 06:44:09] (step=0053628) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.492662883975738, LR: 0.0003 +[2026-03-04 06:44:17] (step=0053629) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 10.492858540403052, LR: 0.0003 +[2026-03-04 06:44:25] (step=0053630) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.493054196830366, LR: 0.0003 +[2026-03-04 06:44:32] (step=0053631) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.49324985325768, LR: 0.0003 +[2026-03-04 06:44:40] (step=0053632) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.493445509684992, LR: 0.0003 +[2026-03-04 06:44:48] (step=0053633) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.493641166112306, LR: 0.0003 +[2026-03-04 06:44:56] (step=0053634) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.49383682253962, LR: 0.0003 +[2026-03-04 06:45:04] (step=0053635) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 10.494032478966934, LR: 0.0003 +[2026-03-04 06:45:12] (step=0053636) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.494228135394248, LR: 0.0003 +[2026-03-04 06:45:20] (step=0053637) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 10.49442379182156, LR: 0.0003 +[2026-03-04 06:45:28] (step=0053638) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.494619448248875, LR: 0.0003 +[2026-03-04 06:45:35] (step=0053639) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.494815104676189, LR: 0.0003 +[2026-03-04 06:45:43] (step=0053640) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 10.495010761103503, LR: 0.0003 +[2026-03-04 06:45:51] (step=0053641) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.495206417530817, LR: 0.0003 +[2026-03-04 06:45:59] (step=0053642) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.495402073958129, LR: 0.0003 +[2026-03-04 06:46:07] (step=0053643) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.495597730385443, LR: 0.0003 +[2026-03-04 06:46:15] (step=0053644) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 10.495793386812757, LR: 0.0003 +[2026-03-04 06:46:23] (step=0053645) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.49598904324007, LR: 0.0003 +[2026-03-04 06:46:30] (step=0053646) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.496184699667385, LR: 0.0003 +[2026-03-04 06:46:38] (step=0053647) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 10.496380356094697, LR: 0.0003 +[2026-03-04 06:46:46] (step=0053648) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.496576012522011, LR: 0.0003 +[2026-03-04 06:46:54] (step=0053649) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.496771668949325, LR: 0.0003 +[2026-03-04 06:47:02] (step=0053650) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 10.49696732537664, LR: 0.0003 +[2026-03-04 06:47:10] (step=0053651) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.497162981803953, LR: 0.0003 +[2026-03-04 06:47:18] (step=0053652) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.497358638231265, LR: 0.0003 +[2026-03-04 06:47:26] (step=0053653) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.49755429465858, LR: 0.0003 +[2026-03-04 06:47:33] (step=0053654) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.497749951085893, LR: 0.0003 +[2026-03-04 06:47:41] (step=0053655) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.497945607513207, LR: 0.0003 +[2026-03-04 06:47:49] (step=0053656) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.49814126394052, LR: 0.0003 +[2026-03-04 06:47:57] (step=0053657) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 10.498336920367834, LR: 0.0003 +[2026-03-04 06:48:05] (step=0053658) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.498532576795148, LR: 0.0003 +[2026-03-04 06:48:13] (step=0053659) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.498728233222462, LR: 0.0003 +[2026-03-04 06:48:21] (step=0053660) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 10.498923889649776, LR: 0.0003 +[2026-03-04 06:48:28] (step=0053661) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.499119546077088, LR: 0.0003 +[2026-03-04 06:48:36] (step=0053662) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.499315202504402, LR: 0.0003 +[2026-03-04 06:48:44] (step=0053663) Train Loss: 0.4243, Train Steps/Sec: 0.13, Epoch: 10.499510858931716, LR: 0.0003 +[2026-03-04 06:48:52] (step=0053664) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 10.49970651535903, LR: 0.0003 +[2026-03-04 06:49:00] (step=0053665) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.499902171786344, LR: 0.0003 +[2026-03-04 06:49:08] (step=0053666) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.500097828213656, LR: 0.0003 +[2026-03-04 06:49:16] (step=0053667) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.50029348464097, LR: 0.0003 +[2026-03-04 06:49:23] (step=0053668) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.500489141068284, LR: 0.0003 +[2026-03-04 06:49:31] (step=0053669) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.500684797495598, LR: 0.0003 +[2026-03-04 06:49:39] (step=0053670) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 10.500880453922912, LR: 0.0003 +[2026-03-04 06:49:47] (step=0053671) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 10.501076110350224, LR: 0.0003 +[2026-03-04 06:49:55] (step=0053672) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 10.501271766777538, LR: 0.0003 +[2026-03-04 06:50:03] (step=0053673) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 10.501467423204852, LR: 0.0003 +[2026-03-04 06:50:11] (step=0053674) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 10.501663079632166, LR: 0.0003 +[2026-03-04 06:50:19] (step=0053675) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.50185873605948, LR: 0.0003 +[2026-03-04 06:50:26] (step=0053676) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.502054392486793, LR: 0.0003 +[2026-03-04 06:50:34] (step=0053677) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.502250048914107, LR: 0.0003 +[2026-03-04 06:50:42] (step=0053678) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.50244570534142, LR: 0.0003 +[2026-03-04 06:50:50] (step=0053679) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.502641361768735, LR: 0.0003 +[2026-03-04 06:50:58] (step=0053680) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.502837018196047, LR: 0.0003 +[2026-03-04 06:51:06] (step=0053681) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.50303267462336, LR: 0.0003 +[2026-03-04 06:51:14] (step=0053682) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.503228331050675, LR: 0.0003 +[2026-03-04 06:51:22] (step=0053683) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.503423987477989, LR: 0.0003 +[2026-03-04 06:51:29] (step=0053684) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.503619643905303, LR: 0.0003 +[2026-03-04 06:51:37] (step=0053685) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.503815300332615, LR: 0.0003 +[2026-03-04 06:51:45] (step=0053686) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 10.50401095675993, LR: 0.0003 +[2026-03-04 06:51:53] (step=0053687) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.504206613187243, LR: 0.0003 +[2026-03-04 06:52:01] (step=0053688) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.504402269614557, LR: 0.0003 +[2026-03-04 06:52:09] (step=0053689) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.504597926041871, LR: 0.0003 +[2026-03-04 06:52:17] (step=0053690) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.504793582469183, LR: 0.0003 +[2026-03-04 06:52:24] (step=0053691) Train Loss: 0.4201, Train Steps/Sec: 0.13, Epoch: 10.504989238896497, LR: 0.0003 +[2026-03-04 06:52:32] (step=0053692) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 10.505184895323811, LR: 0.0003 +[2026-03-04 06:52:40] (step=0053693) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.505380551751125, LR: 0.0003 +[2026-03-04 06:52:48] (step=0053694) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.50557620817844, LR: 0.0003 +[2026-03-04 06:52:56] (step=0053695) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.505771864605752, LR: 0.0003 +[2026-03-04 06:53:04] (step=0053696) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.505967521033066, LR: 0.0003 +[2026-03-04 06:53:12] (step=0053697) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.50616317746038, LR: 0.0003 +[2026-03-04 06:53:20] (step=0053698) Train Loss: 0.4455, Train Steps/Sec: 0.12, Epoch: 10.506358833887694, LR: 0.0003 +[2026-03-04 06:53:28] (step=0053699) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.506554490315008, LR: 0.0003 +[2026-03-04 06:53:35] (step=0053700) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.50675014674232, LR: 0.0003 +[2026-03-04 06:53:43] (step=0053701) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.506945803169634, LR: 0.0003 +[2026-03-04 06:53:51] (step=0053702) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.507141459596948, LR: 0.0003 +[2026-03-04 06:53:59] (step=0053703) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.507337116024262, LR: 0.0003 +[2026-03-04 06:54:07] (step=0053704) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.507532772451576, LR: 0.0003 +[2026-03-04 06:54:15] (step=0053705) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.507728428878888, LR: 0.0003 +[2026-03-04 06:54:23] (step=0053706) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.507924085306202, LR: 0.0003 +[2026-03-04 06:54:30] (step=0053707) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.508119741733516, LR: 0.0003 +[2026-03-04 06:54:38] (step=0053708) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.50831539816083, LR: 0.0003 +[2026-03-04 06:54:46] (step=0053709) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.508511054588142, LR: 0.0003 +[2026-03-04 06:54:54] (step=0053710) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.508706711015456, LR: 0.0003 +[2026-03-04 06:55:02] (step=0053711) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 10.50890236744277, LR: 0.0003 +[2026-03-04 06:55:10] (step=0053712) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.509098023870084, LR: 0.0003 +[2026-03-04 06:55:18] (step=0053713) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.509293680297398, LR: 0.0003 +[2026-03-04 06:55:25] (step=0053714) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 10.50948933672471, LR: 0.0003 +[2026-03-04 06:55:34] (step=0053715) Train Loss: 0.4343, Train Steps/Sec: 0.12, Epoch: 10.509684993152025, LR: 0.0003 +[2026-03-04 06:55:41] (step=0053716) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.509880649579339, LR: 0.0003 +[2026-03-04 06:55:49] (step=0053717) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.510076306006653, LR: 0.0003 +[2026-03-04 06:55:57] (step=0053718) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.510271962433967, LR: 0.0003 +[2026-03-04 06:56:05] (step=0053719) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.510467618861279, LR: 0.0003 +[2026-03-04 06:56:13] (step=0053720) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.510663275288593, LR: 0.0003 +[2026-03-04 06:56:21] (step=0053721) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 10.510858931715907, LR: 0.0003 +[2026-03-04 06:56:29] (step=0053722) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 10.511054588143221, LR: 0.0003 +[2026-03-04 06:56:36] (step=0053723) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.511250244570535, LR: 0.0003 +[2026-03-04 06:56:44] (step=0053724) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.511445900997847, LR: 0.0003 +[2026-03-04 06:56:52] (step=0053725) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 10.511641557425161, LR: 0.0003 +[2026-03-04 06:57:00] (step=0053726) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 10.511837213852475, LR: 0.0003 +[2026-03-04 06:57:08] (step=0053727) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.51203287027979, LR: 0.0003 +[2026-03-04 06:57:16] (step=0053728) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.512228526707103, LR: 0.0003 +[2026-03-04 06:57:24] (step=0053729) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.512424183134415, LR: 0.0003 +[2026-03-04 06:57:31] (step=0053730) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.51261983956173, LR: 0.0003 +[2026-03-04 06:57:39] (step=0053731) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.512815495989043, LR: 0.0003 +[2026-03-04 06:57:47] (step=0053732) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.513011152416357, LR: 0.0003 +[2026-03-04 06:57:55] (step=0053733) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.51320680884367, LR: 0.0003 +[2026-03-04 06:58:03] (step=0053734) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.513402465270984, LR: 0.0003 +[2026-03-04 06:58:11] (step=0053735) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 10.513598121698298, LR: 0.0003 +[2026-03-04 06:58:19] (step=0053736) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.513793778125612, LR: 0.0003 +[2026-03-04 06:58:26] (step=0053737) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.513989434552926, LR: 0.0003 +[2026-03-04 06:58:34] (step=0053738) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.514185090980238, LR: 0.0003 +[2026-03-04 06:58:42] (step=0053739) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.514380747407552, LR: 0.0003 +[2026-03-04 06:58:50] (step=0053740) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 10.514576403834866, LR: 0.0003 +[2026-03-04 06:58:58] (step=0053741) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.51477206026218, LR: 0.0003 +[2026-03-04 06:59:06] (step=0053742) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.514967716689494, LR: 0.0003 +[2026-03-04 06:59:14] (step=0053743) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.515163373116806, LR: 0.0003 +[2026-03-04 06:59:21] (step=0053744) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 10.51535902954412, LR: 0.0003 +[2026-03-04 06:59:30] (step=0053745) Train Loss: 0.4446, Train Steps/Sec: 0.12, Epoch: 10.515554685971434, LR: 0.0003 +[2026-03-04 06:59:37] (step=0053746) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.515750342398748, LR: 0.0003 +[2026-03-04 06:59:45] (step=0053747) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.515945998826062, LR: 0.0003 +[2026-03-04 06:59:53] (step=0053748) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.516141655253374, LR: 0.0003 +[2026-03-04 07:00:01] (step=0053749) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.516337311680688, LR: 0.0003 +[2026-03-04 07:00:09] (step=0053750) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 10.516532968108002, LR: 0.0003 +[2026-03-04 07:00:17] (step=0053751) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.516728624535316, LR: 0.0003 +[2026-03-04 07:00:25] (step=0053752) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.51692428096263, LR: 0.0003 +[2026-03-04 07:00:32] (step=0053753) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.517119937389943, LR: 0.0003 +[2026-03-04 07:00:40] (step=0053754) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.517315593817257, LR: 0.0003 +[2026-03-04 07:00:48] (step=0053755) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.51751125024457, LR: 0.0003 +[2026-03-04 07:00:56] (step=0053756) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.517706906671885, LR: 0.0003 +[2026-03-04 07:01:04] (step=0053757) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.517902563099199, LR: 0.0003 +[2026-03-04 07:01:12] (step=0053758) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.518098219526511, LR: 0.0003 +[2026-03-04 07:01:20] (step=0053759) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.518293875953825, LR: 0.0003 +[2026-03-04 07:01:28] (step=0053760) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.518489532381139, LR: 0.0003 +[2026-03-04 07:01:35] (step=0053761) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.518685188808453, LR: 0.0003 +[2026-03-04 07:01:43] (step=0053762) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 10.518880845235765, LR: 0.0003 +[2026-03-04 07:01:51] (step=0053763) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.51907650166308, LR: 0.0003 +[2026-03-04 07:01:59] (step=0053764) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.519272158090393, LR: 0.0003 +[2026-03-04 07:02:07] (step=0053765) Train Loss: 0.4507, Train Steps/Sec: 0.12, Epoch: 10.519467814517707, LR: 0.0003 +[2026-03-04 07:02:15] (step=0053766) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.519663470945021, LR: 0.0003 +[2026-03-04 07:02:23] (step=0053767) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 10.519859127372333, LR: 0.0003 +[2026-03-04 07:02:31] (step=0053768) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.520054783799647, LR: 0.0003 +[2026-03-04 07:02:38] (step=0053769) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.520250440226961, LR: 0.0003 +[2026-03-04 07:02:46] (step=0053770) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.520446096654275, LR: 0.0003 +[2026-03-04 07:02:54] (step=0053771) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.52064175308159, LR: 0.0003 +[2026-03-04 07:03:02] (step=0053772) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.520837409508902, LR: 0.0003 +[2026-03-04 07:03:10] (step=0053773) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 10.521033065936216, LR: 0.0003 +[2026-03-04 07:03:18] (step=0053774) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.52122872236353, LR: 0.0003 +[2026-03-04 07:03:26] (step=0053775) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.521424378790844, LR: 0.0003 +[2026-03-04 07:03:33] (step=0053776) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.521620035218158, LR: 0.0003 +[2026-03-04 07:03:41] (step=0053777) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 10.52181569164547, LR: 0.0003 +[2026-03-04 07:03:49] (step=0053778) Train Loss: 0.4231, Train Steps/Sec: 0.13, Epoch: 10.522011348072784, LR: 0.0003 +[2026-03-04 07:03:57] (step=0053779) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.522207004500098, LR: 0.0003 +[2026-03-04 07:04:05] (step=0053780) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.522402660927412, LR: 0.0003 +[2026-03-04 07:04:13] (step=0053781) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.522598317354726, LR: 0.0003 +[2026-03-04 07:04:21] (step=0053782) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.522793973782038, LR: 0.0003 +[2026-03-04 07:04:29] (step=0053783) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.522989630209352, LR: 0.0003 +[2026-03-04 07:04:36] (step=0053784) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 10.523185286636666, LR: 0.0003 +[2026-03-04 07:04:44] (step=0053785) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 10.52338094306398, LR: 0.0003 +[2026-03-04 07:04:52] (step=0053786) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.523576599491292, LR: 0.0003 +[2026-03-04 07:05:00] (step=0053787) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 10.523772255918606, LR: 0.0003 +[2026-03-04 07:05:08] (step=0053788) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.52396791234592, LR: 0.0003 +[2026-03-04 07:05:16] (step=0053789) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.524163568773234, LR: 0.0003 +[2026-03-04 07:05:24] (step=0053790) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 10.524359225200548, LR: 0.0003 +[2026-03-04 07:05:31] (step=0053791) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.52455488162786, LR: 0.0003 +[2026-03-04 07:05:39] (step=0053792) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.524750538055175, LR: 0.0003 +[2026-03-04 07:05:47] (step=0053793) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.524946194482489, LR: 0.0003 +[2026-03-04 07:05:55] (step=0053794) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.525141850909803, LR: 0.0003 +[2026-03-04 07:06:03] (step=0053795) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.525337507337117, LR: 0.0003 +[2026-03-04 07:06:11] (step=0053796) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.525533163764429, LR: 0.0003 +[2026-03-04 07:06:19] (step=0053797) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.525728820191743, LR: 0.0003 +[2026-03-04 07:06:27] (step=0053798) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.525924476619057, LR: 0.0003 +[2026-03-04 07:06:34] (step=0053799) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.526120133046371, LR: 0.0003 +[2026-03-04 07:06:42] (step=0053800) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.526315789473685, LR: 0.0003 +[2026-03-04 07:06:50] (step=0053801) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.526511445900997, LR: 0.0003 +[2026-03-04 07:06:58] (step=0053802) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.526707102328311, LR: 0.0003 +[2026-03-04 07:07:06] (step=0053803) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.526902758755625, LR: 0.0003 +[2026-03-04 07:07:14] (step=0053804) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.52709841518294, LR: 0.0003 +[2026-03-04 07:07:22] (step=0053805) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.527294071610253, LR: 0.0003 +[2026-03-04 07:07:29] (step=0053806) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.527489728037565, LR: 0.0003 +[2026-03-04 07:07:37] (step=0053807) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 10.52768538446488, LR: 0.0003 +[2026-03-04 07:07:45] (step=0053808) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 10.527881040892193, LR: 0.0003 +[2026-03-04 07:07:53] (step=0053809) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 10.528076697319507, LR: 0.0003 +[2026-03-04 07:08:01] (step=0053810) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.528272353746821, LR: 0.0003 +[2026-03-04 07:08:09] (step=0053811) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.528468010174134, LR: 0.0003 +[2026-03-04 07:08:17] (step=0053812) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.528663666601448, LR: 0.0003 +[2026-03-04 07:08:24] (step=0053813) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.528859323028762, LR: 0.0003 +[2026-03-04 07:08:32] (step=0053814) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 10.529054979456076, LR: 0.0003 +[2026-03-04 07:08:40] (step=0053815) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 10.529250635883388, LR: 0.0003 +[2026-03-04 07:08:48] (step=0053816) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.529446292310702, LR: 0.0003 +[2026-03-04 07:08:56] (step=0053817) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.529641948738016, LR: 0.0003 +[2026-03-04 07:09:04] (step=0053818) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.52983760516533, LR: 0.0003 +[2026-03-04 07:09:12] (step=0053819) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 10.530033261592644, LR: 0.0003 +[2026-03-04 07:09:20] (step=0053820) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.530228918019956, LR: 0.0003 +[2026-03-04 07:09:27] (step=0053821) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 10.53042457444727, LR: 0.0003 +[2026-03-04 07:09:35] (step=0053822) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.530620230874584, LR: 0.0003 +[2026-03-04 07:09:43] (step=0053823) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.530815887301898, LR: 0.0003 +[2026-03-04 07:09:51] (step=0053824) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 10.531011543729212, LR: 0.0003 +[2026-03-04 07:09:59] (step=0053825) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.531207200156524, LR: 0.0003 +[2026-03-04 07:10:07] (step=0053826) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.531402856583838, LR: 0.0003 +[2026-03-04 07:10:15] (step=0053827) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.531598513011152, LR: 0.0003 +[2026-03-04 07:10:23] (step=0053828) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 10.531794169438466, LR: 0.0003 +[2026-03-04 07:10:30] (step=0053829) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.53198982586578, LR: 0.0003 +[2026-03-04 07:10:38] (step=0053830) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.532185482293093, LR: 0.0003 +[2026-03-04 07:10:46] (step=0053831) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 10.532381138720407, LR: 0.0003 +[2026-03-04 07:10:54] (step=0053832) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.53257679514772, LR: 0.0003 +[2026-03-04 07:11:02] (step=0053833) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.532772451575035, LR: 0.0003 +[2026-03-04 07:11:10] (step=0053834) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.532968108002349, LR: 0.0003 +[2026-03-04 07:11:18] (step=0053835) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.533163764429661, LR: 0.0003 +[2026-03-04 07:11:25] (step=0053836) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.533359420856975, LR: 0.0003 +[2026-03-04 07:11:33] (step=0053837) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 10.533555077284289, LR: 0.0003 +[2026-03-04 07:11:41] (step=0053838) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 10.533750733711603, LR: 0.0003 +[2026-03-04 07:11:49] (step=0053839) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.533946390138915, LR: 0.0003 +[2026-03-04 07:11:57] (step=0053840) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.53414204656623, LR: 0.0003 +[2026-03-04 07:12:05] (step=0053841) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.534337702993543, LR: 0.0003 +[2026-03-04 07:12:13] (step=0053842) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.534533359420857, LR: 0.0003 +[2026-03-04 07:12:21] (step=0053843) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.534729015848171, LR: 0.0003 +[2026-03-04 07:12:29] (step=0053844) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.534924672275483, LR: 0.0003 +[2026-03-04 07:12:36] (step=0053845) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 10.535120328702797, LR: 0.0003 +[2026-03-04 07:12:44] (step=0053846) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.535315985130111, LR: 0.0003 +[2026-03-04 07:12:52] (step=0053847) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.535511641557425, LR: 0.0003 +[2026-03-04 07:13:00] (step=0053848) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.53570729798474, LR: 0.0003 +[2026-03-04 07:13:08] (step=0053849) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.535902954412052, LR: 0.0003 +[2026-03-04 07:13:16] (step=0053850) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.536098610839366, LR: 0.0003 +[2026-03-04 07:13:24] (step=0053851) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.53629426726668, LR: 0.0003 +[2026-03-04 07:13:31] (step=0053852) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.536489923693994, LR: 0.0003 +[2026-03-04 07:13:39] (step=0053853) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.536685580121308, LR: 0.0003 +[2026-03-04 07:13:47] (step=0053854) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.53688123654862, LR: 0.0003 +[2026-03-04 07:13:55] (step=0053855) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.537076892975934, LR: 0.0003 +[2026-03-04 07:14:03] (step=0053856) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.537272549403248, LR: 0.0003 +[2026-03-04 07:14:11] (step=0053857) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 10.537468205830562, LR: 0.0003 +[2026-03-04 07:14:18] (step=0053858) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.537663862257876, LR: 0.0003 +[2026-03-04 07:14:26] (step=0053859) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.537859518685188, LR: 0.0003 +[2026-03-04 07:14:34] (step=0053860) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.538055175112502, LR: 0.0003 +[2026-03-04 07:14:42] (step=0053861) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.538250831539816, LR: 0.0003 +[2026-03-04 07:14:50] (step=0053862) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 10.53844648796713, LR: 0.0003 +[2026-03-04 07:14:58] (step=0053863) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.538642144394444, LR: 0.0003 +[2026-03-04 07:15:06] (step=0053864) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.538837800821756, LR: 0.0003 +[2026-03-04 07:15:14] (step=0053865) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.53903345724907, LR: 0.0003 +[2026-03-04 07:15:21] (step=0053866) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 10.539229113676384, LR: 0.0003 +[2026-03-04 07:15:29] (step=0053867) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 10.539424770103698, LR: 0.0003 +[2026-03-04 07:15:37] (step=0053868) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.53962042653101, LR: 0.0003 +[2026-03-04 07:15:45] (step=0053869) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.539816082958325, LR: 0.0003 +[2026-03-04 07:15:53] (step=0053870) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.540011739385639, LR: 0.0003 +[2026-03-04 07:16:01] (step=0053871) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.540207395812953, LR: 0.0003 +[2026-03-04 07:16:09] (step=0053872) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.540403052240267, LR: 0.0003 +[2026-03-04 07:16:17] (step=0053873) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.540598708667579, LR: 0.0003 +[2026-03-04 07:16:24] (step=0053874) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.540794365094893, LR: 0.0003 +[2026-03-04 07:16:32] (step=0053875) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.540990021522207, LR: 0.0003 +[2026-03-04 07:16:40] (step=0053876) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.541185677949521, LR: 0.0003 +[2026-03-04 07:16:48] (step=0053877) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.541381334376835, LR: 0.0003 +[2026-03-04 07:16:56] (step=0053878) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 10.541576990804147, LR: 0.0003 +[2026-03-04 07:17:04] (step=0053879) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.541772647231461, LR: 0.0003 +[2026-03-04 07:17:12] (step=0053880) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.541968303658775, LR: 0.0003 +[2026-03-04 07:17:19] (step=0053881) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.54216396008609, LR: 0.0003 +[2026-03-04 07:17:27] (step=0053882) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.542359616513403, LR: 0.0003 +[2026-03-04 07:17:35] (step=0053883) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.542555272940715, LR: 0.0003 +[2026-03-04 07:17:43] (step=0053884) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.54275092936803, LR: 0.0003 +[2026-03-04 07:17:51] (step=0053885) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.542946585795343, LR: 0.0003 +[2026-03-04 07:17:59] (step=0053886) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.543142242222658, LR: 0.0003 +[2026-03-04 07:18:06] (step=0053887) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 10.543337898649972, LR: 0.0003 +[2026-03-04 07:18:14] (step=0053888) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 10.543533555077284, LR: 0.0003 +[2026-03-04 07:18:22] (step=0053889) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.543729211504598, LR: 0.0003 +[2026-03-04 07:18:30] (step=0053890) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 10.543924867931912, LR: 0.0003 +[2026-03-04 07:18:38] (step=0053891) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.544120524359226, LR: 0.0003 +[2026-03-04 07:18:46] (step=0053892) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 10.544316180786538, LR: 0.0003 +[2026-03-04 07:18:54] (step=0053893) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.544511837213852, LR: 0.0003 +[2026-03-04 07:19:02] (step=0053894) Train Loss: 0.4485, Train Steps/Sec: 0.12, Epoch: 10.544707493641166, LR: 0.0003 +[2026-03-04 07:19:09] (step=0053895) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.54490315006848, LR: 0.0003 +[2026-03-04 07:19:17] (step=0053896) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.545098806495794, LR: 0.0003 +[2026-03-04 07:19:25] (step=0053897) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 10.545294462923106, LR: 0.0003 +[2026-03-04 07:19:33] (step=0053898) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 10.54549011935042, LR: 0.0003 +[2026-03-04 07:19:41] (step=0053899) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 10.545685775777734, LR: 0.0003 +[2026-03-04 07:19:49] (step=0053900) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.545881432205048, LR: 0.0003 +[2026-03-04 07:19:57] (step=0053901) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.546077088632362, LR: 0.0003 +[2026-03-04 07:20:04] (step=0053902) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.546272745059674, LR: 0.0003 +[2026-03-04 07:20:12] (step=0053903) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.546468401486988, LR: 0.0003 +[2026-03-04 07:20:20] (step=0053904) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 10.546664057914303, LR: 0.0003 +[2026-03-04 07:20:28] (step=0053905) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 10.546859714341617, LR: 0.0003 +[2026-03-04 07:20:36] (step=0053906) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.54705537076893, LR: 0.0003 +[2026-03-04 07:20:44] (step=0053907) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.547251027196243, LR: 0.0003 +[2026-03-04 07:20:52] (step=0053908) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.547446683623557, LR: 0.0003 +[2026-03-04 07:20:59] (step=0053909) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.54764234005087, LR: 0.0003 +[2026-03-04 07:21:07] (step=0053910) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.547837996478185, LR: 0.0003 +[2026-03-04 07:21:15] (step=0053911) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.548033652905499, LR: 0.0003 +[2026-03-04 07:21:23] (step=0053912) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.548229309332811, LR: 0.0003 +[2026-03-04 07:21:31] (step=0053913) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.548424965760125, LR: 0.0003 +[2026-03-04 07:21:39] (step=0053914) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.548620622187439, LR: 0.0003 +[2026-03-04 07:21:46] (step=0053915) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.548816278614753, LR: 0.0003 +[2026-03-04 07:21:54] (step=0053916) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.549011935042065, LR: 0.0003 +[2026-03-04 07:22:02] (step=0053917) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.54920759146938, LR: 0.0003 +[2026-03-04 07:22:10] (step=0053918) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.549403247896693, LR: 0.0003 +[2026-03-04 07:22:18] (step=0053919) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 10.549598904324007, LR: 0.0003 +[2026-03-04 07:22:26] (step=0053920) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.549794560751321, LR: 0.0003 +[2026-03-04 07:22:34] (step=0053921) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.549990217178634, LR: 0.0003 +[2026-03-04 07:22:42] (step=0053922) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.550185873605948, LR: 0.0003 +[2026-03-04 07:22:49] (step=0053923) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.550381530033262, LR: 0.0003 +[2026-03-04 07:22:57] (step=0053924) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 10.550577186460576, LR: 0.0003 +[2026-03-04 07:23:05] (step=0053925) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.55077284288789, LR: 0.0003 +[2026-03-04 07:23:13] (step=0053926) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.550968499315202, LR: 0.0003 +[2026-03-04 07:23:21] (step=0053927) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.551164155742516, LR: 0.0003 +[2026-03-04 07:23:29] (step=0053928) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 10.55135981216983, LR: 0.0003 +[2026-03-04 07:23:37] (step=0053929) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.551555468597144, LR: 0.0003 +[2026-03-04 07:23:44] (step=0053930) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.551751125024458, LR: 0.0003 +[2026-03-04 07:23:52] (step=0053931) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.55194678145177, LR: 0.0003 +[2026-03-04 07:24:00] (step=0053932) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.552142437879084, LR: 0.0003 +[2026-03-04 07:24:08] (step=0053933) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.552338094306398, LR: 0.0003 +[2026-03-04 07:24:16] (step=0053934) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.552533750733712, LR: 0.0003 +[2026-03-04 07:24:24] (step=0053935) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 10.552729407161026, LR: 0.0003 +[2026-03-04 07:24:32] (step=0053936) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.552925063588338, LR: 0.0003 +[2026-03-04 07:24:39] (step=0053937) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.553120720015652, LR: 0.0003 +[2026-03-04 07:24:47] (step=0053938) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 10.553316376442966, LR: 0.0003 +[2026-03-04 07:24:55] (step=0053939) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.55351203287028, LR: 0.0003 +[2026-03-04 07:25:03] (step=0053940) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.553707689297594, LR: 0.0003 +[2026-03-04 07:25:11] (step=0053941) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.553903345724907, LR: 0.0003 +[2026-03-04 07:25:19] (step=0053942) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.55409900215222, LR: 0.0003 +[2026-03-04 07:25:27] (step=0053943) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.554294658579535, LR: 0.0003 +[2026-03-04 07:25:35] (step=0053944) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 10.554490315006849, LR: 0.0003 +[2026-03-04 07:25:42] (step=0053945) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.55468597143416, LR: 0.0003 +[2026-03-04 07:25:50] (step=0053946) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.554881627861475, LR: 0.0003 +[2026-03-04 07:25:58] (step=0053947) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.555077284288789, LR: 0.0003 +[2026-03-04 07:26:06] (step=0053948) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.555272940716103, LR: 0.0003 +[2026-03-04 07:26:14] (step=0053949) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.555468597143417, LR: 0.0003 +[2026-03-04 07:26:22] (step=0053950) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.555664253570729, LR: 0.0003 +[2026-03-04 07:26:30] (step=0053951) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.555859909998043, LR: 0.0003 +[2026-03-04 07:26:37] (step=0053952) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.556055566425357, LR: 0.0003 +[2026-03-04 07:26:45] (step=0053953) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.556251222852671, LR: 0.0003 +[2026-03-04 07:26:53] (step=0053954) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.556446879279985, LR: 0.0003 +[2026-03-04 07:27:01] (step=0053955) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.556642535707297, LR: 0.0003 +[2026-03-04 07:27:09] (step=0053956) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 10.556838192134611, LR: 0.0003 +[2026-03-04 07:27:17] (step=0053957) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.557033848561925, LR: 0.0003 +[2026-03-04 07:27:25] (step=0053958) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.55722950498924, LR: 0.0003 +[2026-03-04 07:27:32] (step=0053959) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.557425161416553, LR: 0.0003 +[2026-03-04 07:27:40] (step=0053960) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.557620817843866, LR: 0.0003 +[2026-03-04 07:27:48] (step=0053961) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.55781647427118, LR: 0.0003 +[2026-03-04 07:27:56] (step=0053962) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.558012130698494, LR: 0.0003 +[2026-03-04 07:28:04] (step=0053963) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 10.558207787125808, LR: 0.0003 +[2026-03-04 07:28:12] (step=0053964) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.558403443553122, LR: 0.0003 +[2026-03-04 07:28:20] (step=0053965) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 10.558599099980434, LR: 0.0003 +[2026-03-04 07:28:28] (step=0053966) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 10.558794756407748, LR: 0.0003 +[2026-03-04 07:28:36] (step=0053967) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.558990412835062, LR: 0.0003 +[2026-03-04 07:28:43] (step=0053968) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.559186069262376, LR: 0.0003 +[2026-03-04 07:28:51] (step=0053969) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.559381725689688, LR: 0.0003 +[2026-03-04 07:28:59] (step=0053970) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.559577382117002, LR: 0.0003 +[2026-03-04 07:29:07] (step=0053971) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.559773038544316, LR: 0.0003 +[2026-03-04 07:29:15] (step=0053972) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.55996869497163, LR: 0.0003 +[2026-03-04 07:29:23] (step=0053973) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.560164351398944, LR: 0.0003 +[2026-03-04 07:29:31] (step=0053974) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.560360007826256, LR: 0.0003 +[2026-03-04 07:29:38] (step=0053975) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 10.56055566425357, LR: 0.0003 +[2026-03-04 07:29:46] (step=0053976) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.560751320680884, LR: 0.0003 +[2026-03-04 07:29:54] (step=0053977) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.560946977108198, LR: 0.0003 +[2026-03-04 07:30:02] (step=0053978) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.561142633535512, LR: 0.0003 +[2026-03-04 07:30:10] (step=0053979) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.561338289962825, LR: 0.0003 +[2026-03-04 07:30:18] (step=0053980) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.561533946390139, LR: 0.0003 +[2026-03-04 07:30:26] (step=0053981) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 10.561729602817453, LR: 0.0003 +[2026-03-04 07:30:33] (step=0053982) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 10.561925259244767, LR: 0.0003 +[2026-03-04 07:30:41] (step=0053983) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 10.56212091567208, LR: 0.0003 +[2026-03-04 07:30:49] (step=0053984) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 10.562316572099393, LR: 0.0003 +[2026-03-04 07:30:57] (step=0053985) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.562512228526707, LR: 0.0003 +[2026-03-04 07:31:05] (step=0053986) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.56270788495402, LR: 0.0003 +[2026-03-04 07:31:13] (step=0053987) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.562903541381335, LR: 0.0003 +[2026-03-04 07:31:21] (step=0053988) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.563099197808649, LR: 0.0003 +[2026-03-04 07:31:29] (step=0053989) Train Loss: 0.4422, Train Steps/Sec: 0.12, Epoch: 10.563294854235961, LR: 0.0003 +[2026-03-04 07:31:37] (step=0053990) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.563490510663275, LR: 0.0003 +[2026-03-04 07:31:44] (step=0053991) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 10.563686167090589, LR: 0.0003 +[2026-03-04 07:31:52] (step=0053992) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 10.563881823517903, LR: 0.0003 +[2026-03-04 07:32:00] (step=0053993) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.564077479945217, LR: 0.0003 +[2026-03-04 07:32:08] (step=0053994) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.56427313637253, LR: 0.0003 +[2026-03-04 07:32:16] (step=0053995) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.564468792799843, LR: 0.0003 +[2026-03-04 07:32:24] (step=0053996) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.564664449227157, LR: 0.0003 +[2026-03-04 07:32:32] (step=0053997) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 10.564860105654471, LR: 0.0003 +[2026-03-04 07:32:39] (step=0053998) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.565055762081784, LR: 0.0003 +[2026-03-04 07:32:47] (step=0053999) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.565251418509098, LR: 0.0003 +[2026-03-04 07:32:55] (step=0054000) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 10.565447074936412, LR: 0.0003 +[2026-03-04 07:32:55] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0054000/ +[2026-03-04 07:33:03] (step=0054001) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.565642731363726, LR: 0.0003 +[2026-03-04 07:33:11] (step=0054002) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 10.56583838779104, LR: 0.0003 +[2026-03-04 07:33:19] (step=0054003) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.566034044218352, LR: 0.0003 +[2026-03-04 07:33:27] (step=0054004) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 10.566229700645666, LR: 0.0003 +[2026-03-04 07:33:34] (step=0054005) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.56642535707298, LR: 0.0003 +[2026-03-04 07:33:42] (step=0054006) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.566621013500294, LR: 0.0003 +[2026-03-04 07:33:50] (step=0054007) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.566816669927608, LR: 0.0003 +[2026-03-04 07:33:58] (step=0054008) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.56701232635492, LR: 0.0003 +[2026-03-04 07:34:06] (step=0054009) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 10.567207982782234, LR: 0.0003 +[2026-03-04 07:34:14] (step=0054010) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.567403639209548, LR: 0.0003 +[2026-03-04 07:34:22] (step=0054011) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.567599295636862, LR: 0.0003 +[2026-03-04 07:34:29] (step=0054012) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.567794952064176, LR: 0.0003 +[2026-03-04 07:34:37] (step=0054013) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 10.567990608491488, LR: 0.0003 +[2026-03-04 07:34:45] (step=0054014) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 10.568186264918802, LR: 0.0003 +[2026-03-04 07:34:53] (step=0054015) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.568381921346116, LR: 0.0003 +[2026-03-04 07:35:01] (step=0054016) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.56857757777343, LR: 0.0003 +[2026-03-04 07:35:09] (step=0054017) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.568773234200744, LR: 0.0003 +[2026-03-04 07:35:17] (step=0054018) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.568968890628057, LR: 0.0003 +[2026-03-04 07:35:24] (step=0054019) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.56916454705537, LR: 0.0003 +[2026-03-04 07:35:32] (step=0054020) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.569360203482685, LR: 0.0003 +[2026-03-04 07:35:40] (step=0054021) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.569555859909999, LR: 0.0003 +[2026-03-04 07:35:48] (step=0054022) Train Loss: 0.4196, Train Steps/Sec: 0.13, Epoch: 10.56975151633731, LR: 0.0003 +[2026-03-04 07:35:56] (step=0054023) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.569947172764625, LR: 0.0003 +[2026-03-04 07:36:04] (step=0054024) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 10.570142829191939, LR: 0.0003 +[2026-03-04 07:36:12] (step=0054025) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 10.570338485619253, LR: 0.0003 +[2026-03-04 07:36:20] (step=0054026) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 10.570534142046567, LR: 0.0003 +[2026-03-04 07:36:27] (step=0054027) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 10.570729798473879, LR: 0.0003 +[2026-03-04 07:36:35] (step=0054028) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 10.570925454901193, LR: 0.0003 +[2026-03-04 07:36:43] (step=0054029) Train Loss: 0.4268, Train Steps/Sec: 0.13, Epoch: 10.571121111328507, LR: 0.0003 +[2026-03-04 07:36:51] (step=0054030) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.571316767755821, LR: 0.0003 +[2026-03-04 07:36:59] (step=0054031) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.571512424183135, LR: 0.0003 +[2026-03-04 07:37:07] (step=0054032) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.571708080610447, LR: 0.0003 +[2026-03-04 07:37:15] (step=0054033) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.571903737037761, LR: 0.0003 +[2026-03-04 07:37:22] (step=0054034) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.572099393465075, LR: 0.0003 +[2026-03-04 07:37:30] (step=0054035) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.57229504989239, LR: 0.0003 +[2026-03-04 07:37:38] (step=0054036) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.572490706319703, LR: 0.0003 +[2026-03-04 07:37:46] (step=0054037) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.572686362747016, LR: 0.0003 +[2026-03-04 07:37:54] (step=0054038) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.57288201917433, LR: 0.0003 +[2026-03-04 07:38:02] (step=0054039) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 10.573077675601644, LR: 0.0003 +[2026-03-04 07:38:10] (step=0054040) Train Loss: 0.4301, Train Steps/Sec: 0.12, Epoch: 10.573273332028958, LR: 0.0003 +[2026-03-04 07:38:18] (step=0054041) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.573468988456272, LR: 0.0003 +[2026-03-04 07:38:26] (step=0054042) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.573664644883584, LR: 0.0003 +[2026-03-04 07:38:33] (step=0054043) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.573860301310898, LR: 0.0003 +[2026-03-04 07:38:41] (step=0054044) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.574055957738212, LR: 0.0003 +[2026-03-04 07:38:49] (step=0054045) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.574251614165526, LR: 0.0003 +[2026-03-04 07:38:57] (step=0054046) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 10.57444727059284, LR: 0.0003 +[2026-03-04 07:39:05] (step=0054047) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 10.574642927020152, LR: 0.0003 +[2026-03-04 07:39:13] (step=0054048) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.574838583447466, LR: 0.0003 +[2026-03-04 07:39:21] (step=0054049) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.57503423987478, LR: 0.0003 +[2026-03-04 07:39:28] (step=0054050) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 10.575229896302094, LR: 0.0003 +[2026-03-04 07:39:36] (step=0054051) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.575425552729406, LR: 0.0003 +[2026-03-04 07:39:44] (step=0054052) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 10.57562120915672, LR: 0.0003 +[2026-03-04 07:39:52] (step=0054053) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.575816865584034, LR: 0.0003 +[2026-03-04 07:40:00] (step=0054054) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.576012522011348, LR: 0.0003 +[2026-03-04 07:40:08] (step=0054055) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.576208178438662, LR: 0.0003 +[2026-03-04 07:40:16] (step=0054056) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.576403834865975, LR: 0.0003 +[2026-03-04 07:40:23] (step=0054057) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.576599491293289, LR: 0.0003 +[2026-03-04 07:40:31] (step=0054058) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.576795147720603, LR: 0.0003 +[2026-03-04 07:40:39] (step=0054059) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.576990804147917, LR: 0.0003 +[2026-03-04 07:40:47] (step=0054060) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.57718646057523, LR: 0.0003 +[2026-03-04 07:40:55] (step=0054061) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 10.577382117002543, LR: 0.0003 +[2026-03-04 07:41:03] (step=0054062) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 10.577577773429857, LR: 0.0003 +[2026-03-04 07:41:11] (step=0054063) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.57777342985717, LR: 0.0003 +[2026-03-04 07:41:19] (step=0054064) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.577969086284485, LR: 0.0003 +[2026-03-04 07:41:26] (step=0054065) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.578164742711799, LR: 0.0003 +[2026-03-04 07:41:34] (step=0054066) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.578360399139111, LR: 0.0003 +[2026-03-04 07:41:42] (step=0054067) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.578556055566425, LR: 0.0003 +[2026-03-04 07:41:50] (step=0054068) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.578751711993739, LR: 0.0003 +[2026-03-04 07:41:58] (step=0054069) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 10.578947368421053, LR: 0.0003 +[2026-03-04 07:42:06] (step=0054070) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 10.579143024848367, LR: 0.0003 +[2026-03-04 07:42:14] (step=0054071) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.57933868127568, LR: 0.0003 +[2026-03-04 07:42:22] (step=0054072) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.579534337702993, LR: 0.0003 +[2026-03-04 07:42:29] (step=0054073) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.579729994130307, LR: 0.0003 +[2026-03-04 07:42:37] (step=0054074) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.579925650557621, LR: 0.0003 +[2026-03-04 07:42:45] (step=0054075) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.580121306984934, LR: 0.0003 +[2026-03-04 07:42:53] (step=0054076) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.580316963412248, LR: 0.0003 +[2026-03-04 07:43:01] (step=0054077) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.580512619839562, LR: 0.0003 +[2026-03-04 07:43:09] (step=0054078) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.580708276266876, LR: 0.0003 +[2026-03-04 07:43:17] (step=0054079) Train Loss: 0.4261, Train Steps/Sec: 0.13, Epoch: 10.58090393269419, LR: 0.0003 +[2026-03-04 07:43:24] (step=0054080) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 10.581099589121502, LR: 0.0003 +[2026-03-04 07:43:32] (step=0054081) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.581295245548816, LR: 0.0003 +[2026-03-04 07:43:40] (step=0054082) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.58149090197613, LR: 0.0003 +[2026-03-04 07:43:48] (step=0054083) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.581686558403444, LR: 0.0003 +[2026-03-04 07:43:56] (step=0054084) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.581882214830758, LR: 0.0003 +[2026-03-04 07:44:04] (step=0054085) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.58207787125807, LR: 0.0003 +[2026-03-04 07:44:12] (step=0054086) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 10.582273527685384, LR: 0.0003 +[2026-03-04 07:44:19] (step=0054087) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.582469184112698, LR: 0.0003 +[2026-03-04 07:44:27] (step=0054088) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.582664840540012, LR: 0.0003 +[2026-03-04 07:44:35] (step=0054089) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 10.582860496967326, LR: 0.0003 +[2026-03-04 07:44:43] (step=0054090) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 10.583056153394638, LR: 0.0003 +[2026-03-04 07:44:51] (step=0054091) Train Loss: 0.4532, Train Steps/Sec: 0.12, Epoch: 10.583251809821952, LR: 0.0003 +[2026-03-04 07:44:59] (step=0054092) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 10.583447466249266, LR: 0.0003 +[2026-03-04 07:45:07] (step=0054093) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.58364312267658, LR: 0.0003 +[2026-03-04 07:45:15] (step=0054094) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 10.583838779103894, LR: 0.0003 +[2026-03-04 07:45:23] (step=0054095) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 10.584034435531207, LR: 0.0003 +[2026-03-04 07:45:30] (step=0054096) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.58423009195852, LR: 0.0003 +[2026-03-04 07:45:38] (step=0054097) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.584425748385835, LR: 0.0003 +[2026-03-04 07:45:46] (step=0054098) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 10.584621404813149, LR: 0.0003 +[2026-03-04 07:45:54] (step=0054099) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.584817061240463, LR: 0.0003 +[2026-03-04 07:46:02] (step=0054100) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 10.585012717667775, LR: 0.0003 +[2026-03-04 07:46:10] (step=0054101) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.585208374095089, LR: 0.0003 +[2026-03-04 07:46:18] (step=0054102) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.585404030522403, LR: 0.0003 +[2026-03-04 07:46:25] (step=0054103) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.585599686949717, LR: 0.0003 +[2026-03-04 07:46:33] (step=0054104) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.585795343377029, LR: 0.0003 +[2026-03-04 07:46:41] (step=0054105) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.585990999804343, LR: 0.0003 +[2026-03-04 07:46:49] (step=0054106) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.586186656231657, LR: 0.0003 +[2026-03-04 07:46:57] (step=0054107) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 10.586382312658971, LR: 0.0003 +[2026-03-04 07:47:05] (step=0054108) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 10.586577969086285, LR: 0.0003 +[2026-03-04 07:47:13] (step=0054109) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 10.586773625513597, LR: 0.0003 +[2026-03-04 07:47:20] (step=0054110) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.586969281940911, LR: 0.0003 +[2026-03-04 07:47:28] (step=0054111) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 10.587164938368225, LR: 0.0003 +[2026-03-04 07:47:36] (step=0054112) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.58736059479554, LR: 0.0003 +[2026-03-04 07:47:44] (step=0054113) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.587556251222853, LR: 0.0003 +[2026-03-04 07:47:52] (step=0054114) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 10.587751907650166, LR: 0.0003 +[2026-03-04 07:48:00] (step=0054115) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.58794756407748, LR: 0.0003 +[2026-03-04 07:48:08] (step=0054116) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.588143220504794, LR: 0.0003 +[2026-03-04 07:48:16] (step=0054117) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 10.588338876932108, LR: 0.0003 +[2026-03-04 07:48:23] (step=0054118) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 10.588534533359422, LR: 0.0003 +[2026-03-04 07:48:31] (step=0054119) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.588730189786734, LR: 0.0003 +[2026-03-04 07:48:39] (step=0054120) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.588925846214048, LR: 0.0003 +[2026-03-04 07:48:47] (step=0054121) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.589121502641362, LR: 0.0003 +[2026-03-04 07:48:55] (step=0054122) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.589317159068676, LR: 0.0003 +[2026-03-04 07:49:03] (step=0054123) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 10.58951281549599, LR: 0.0003 +[2026-03-04 07:49:11] (step=0054124) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.589708471923302, LR: 0.0003 +[2026-03-04 07:49:19] (step=0054125) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.589904128350616, LR: 0.0003 +[2026-03-04 07:49:26] (step=0054126) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.59009978477793, LR: 0.0003 +[2026-03-04 07:49:34] (step=0054127) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.590295441205244, LR: 0.0003 +[2026-03-04 07:49:42] (step=0054128) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 10.590491097632556, LR: 0.0003 +[2026-03-04 07:49:50] (step=0054129) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.59068675405987, LR: 0.0003 +[2026-03-04 07:49:58] (step=0054130) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.590882410487184, LR: 0.0003 +[2026-03-04 07:50:06] (step=0054131) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 10.591078066914498, LR: 0.0003 +[2026-03-04 07:50:14] (step=0054132) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.591273723341812, LR: 0.0003 +[2026-03-04 07:50:21] (step=0054133) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.591469379769125, LR: 0.0003 +[2026-03-04 07:50:29] (step=0054134) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.591665036196439, LR: 0.0003 +[2026-03-04 07:50:37] (step=0054135) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.591860692623753, LR: 0.0003 +[2026-03-04 07:50:45] (step=0054136) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.592056349051067, LR: 0.0003 +[2026-03-04 07:50:53] (step=0054137) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 10.59225200547838, LR: 0.0003 +[2026-03-04 07:51:01] (step=0054138) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.592447661905693, LR: 0.0003 +[2026-03-04 07:51:09] (step=0054139) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.592643318333007, LR: 0.0003 +[2026-03-04 07:51:16] (step=0054140) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 10.59283897476032, LR: 0.0003 +[2026-03-04 07:51:24] (step=0054141) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 10.593034631187635, LR: 0.0003 +[2026-03-04 07:51:32] (step=0054142) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.593230287614949, LR: 0.0003 +[2026-03-04 07:51:40] (step=0054143) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.593425944042261, LR: 0.0003 +[2026-03-04 07:51:48] (step=0054144) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.593621600469575, LR: 0.0003 +[2026-03-04 07:51:56] (step=0054145) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.59381725689689, LR: 0.0003 +[2026-03-04 07:52:04] (step=0054146) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.594012913324203, LR: 0.0003 +[2026-03-04 07:52:11] (step=0054147) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 10.594208569751517, LR: 0.0003 +[2026-03-04 07:52:19] (step=0054148) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 10.59440422617883, LR: 0.0003 +[2026-03-04 07:52:27] (step=0054149) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.594599882606143, LR: 0.0003 +[2026-03-04 07:52:35] (step=0054150) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.594795539033457, LR: 0.0003 +[2026-03-04 07:52:43] (step=0054151) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.594991195460771, LR: 0.0003 +[2026-03-04 07:52:51] (step=0054152) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.595186851888085, LR: 0.0003 +[2026-03-04 07:52:59] (step=0054153) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.595382508315398, LR: 0.0003 +[2026-03-04 07:53:07] (step=0054154) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.595578164742712, LR: 0.0003 +[2026-03-04 07:53:14] (step=0054155) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.595773821170026, LR: 0.0003 +[2026-03-04 07:53:22] (step=0054156) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.59596947759734, LR: 0.0003 +[2026-03-04 07:53:30] (step=0054157) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.596165134024652, LR: 0.0003 +[2026-03-04 07:53:38] (step=0054158) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.596360790451966, LR: 0.0003 +[2026-03-04 07:53:46] (step=0054159) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.59655644687928, LR: 0.0003 +[2026-03-04 07:53:54] (step=0054160) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.596752103306594, LR: 0.0003 +[2026-03-04 07:54:02] (step=0054161) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.596947759733908, LR: 0.0003 +[2026-03-04 07:54:09] (step=0054162) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.59714341616122, LR: 0.0003 +[2026-03-04 07:54:17] (step=0054163) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.597339072588534, LR: 0.0003 +[2026-03-04 07:54:25] (step=0054164) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.597534729015848, LR: 0.0003 +[2026-03-04 07:54:33] (step=0054165) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.597730385443162, LR: 0.0003 +[2026-03-04 07:54:41] (step=0054166) Train Loss: 0.4440, Train Steps/Sec: 0.12, Epoch: 10.597926041870476, LR: 0.0003 +[2026-03-04 07:54:49] (step=0054167) Train Loss: 0.4273, Train Steps/Sec: 0.13, Epoch: 10.598121698297788, LR: 0.0003 +[2026-03-04 07:54:57] (step=0054168) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 10.598317354725102, LR: 0.0003 +[2026-03-04 07:55:05] (step=0054169) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.598513011152416, LR: 0.0003 +[2026-03-04 07:55:12] (step=0054170) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.59870866757973, LR: 0.0003 +[2026-03-04 07:55:20] (step=0054171) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.598904324007044, LR: 0.0003 +[2026-03-04 07:55:28] (step=0054172) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 10.599099980434357, LR: 0.0003 +[2026-03-04 07:55:36] (step=0054173) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.59929563686167, LR: 0.0003 +[2026-03-04 07:55:44] (step=0054174) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.599491293288985, LR: 0.0003 +[2026-03-04 07:55:52] (step=0054175) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 10.599686949716299, LR: 0.0003 +[2026-03-04 07:56:00] (step=0054176) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.599882606143613, LR: 0.0003 +[2026-03-04 07:56:08] (step=0054177) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.600078262570925, LR: 0.0003 +[2026-03-04 07:56:15] (step=0054178) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 10.600273918998239, LR: 0.0003 +[2026-03-04 07:56:23] (step=0054179) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.600469575425553, LR: 0.0003 +[2026-03-04 07:56:31] (step=0054180) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.600665231852867, LR: 0.0003 +[2026-03-04 07:56:39] (step=0054181) Train Loss: 0.4261, Train Steps/Sec: 0.13, Epoch: 10.60086088828018, LR: 0.0003 +[2026-03-04 07:56:47] (step=0054182) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.601056544707493, LR: 0.0003 +[2026-03-04 07:56:55] (step=0054183) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.601252201134807, LR: 0.0003 +[2026-03-04 07:57:03] (step=0054184) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.601447857562121, LR: 0.0003 +[2026-03-04 07:57:10] (step=0054185) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.601643513989435, LR: 0.0003 +[2026-03-04 07:57:18] (step=0054186) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 10.601839170416747, LR: 0.0003 +[2026-03-04 07:57:26] (step=0054187) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.602034826844061, LR: 0.0003 +[2026-03-04 07:57:34] (step=0054188) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.602230483271375, LR: 0.0003 +[2026-03-04 07:57:42] (step=0054189) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.60242613969869, LR: 0.0003 +[2026-03-04 07:57:50] (step=0054190) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 10.602621796126003, LR: 0.0003 +[2026-03-04 07:57:58] (step=0054191) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.602817452553316, LR: 0.0003 +[2026-03-04 07:58:06] (step=0054192) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.60301310898063, LR: 0.0003 +[2026-03-04 07:58:13] (step=0054193) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.603208765407944, LR: 0.0003 +[2026-03-04 07:58:21] (step=0054194) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.603404421835258, LR: 0.0003 +[2026-03-04 07:58:29] (step=0054195) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 10.603600078262572, LR: 0.0003 +[2026-03-04 07:58:37] (step=0054196) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 10.603795734689884, LR: 0.0003 +[2026-03-04 07:58:45] (step=0054197) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.603991391117198, LR: 0.0003 +[2026-03-04 07:58:53] (step=0054198) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.604187047544512, LR: 0.0003 +[2026-03-04 07:59:00] (step=0054199) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.604382703971826, LR: 0.0003 +[2026-03-04 07:59:08] (step=0054200) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.60457836039914, LR: 0.0003 +[2026-03-04 07:59:16] (step=0054201) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.604774016826452, LR: 0.0003 +[2026-03-04 07:59:24] (step=0054202) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 10.604969673253766, LR: 0.0003 +[2026-03-04 07:59:32] (step=0054203) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.60516532968108, LR: 0.0003 +[2026-03-04 07:59:40] (step=0054204) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.605360986108394, LR: 0.0003 +[2026-03-04 07:59:48] (step=0054205) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 10.605556642535708, LR: 0.0003 +[2026-03-04 07:59:55] (step=0054206) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.60575229896302, LR: 0.0003 +[2026-03-04 08:00:03] (step=0054207) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.605947955390334, LR: 0.0003 +[2026-03-04 08:00:11] (step=0054208) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 10.606143611817648, LR: 0.0003 +[2026-03-04 08:00:19] (step=0054209) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.606339268244962, LR: 0.0003 +[2026-03-04 08:00:27] (step=0054210) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.606534924672275, LR: 0.0003 +[2026-03-04 08:00:35] (step=0054211) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 10.606730581099589, LR: 0.0003 +[2026-03-04 08:00:43] (step=0054212) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.606926237526903, LR: 0.0003 +[2026-03-04 08:00:51] (step=0054213) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 10.607121893954217, LR: 0.0003 +[2026-03-04 08:00:59] (step=0054214) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 10.60731755038153, LR: 0.0003 +[2026-03-04 08:01:06] (step=0054215) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.607513206808843, LR: 0.0003 +[2026-03-04 08:01:14] (step=0054216) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.607708863236157, LR: 0.0003 +[2026-03-04 08:01:22] (step=0054217) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.607904519663471, LR: 0.0003 +[2026-03-04 08:01:30] (step=0054218) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.608100176090785, LR: 0.0003 +[2026-03-04 08:01:38] (step=0054219) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.608295832518099, LR: 0.0003 +[2026-03-04 08:01:46] (step=0054220) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 10.608491488945411, LR: 0.0003 +[2026-03-04 08:01:54] (step=0054221) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.608687145372725, LR: 0.0003 +[2026-03-04 08:02:01] (step=0054222) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.60888280180004, LR: 0.0003 +[2026-03-04 08:02:09] (step=0054223) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.609078458227353, LR: 0.0003 +[2026-03-04 08:02:17] (step=0054224) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.609274114654667, LR: 0.0003 +[2026-03-04 08:02:25] (step=0054225) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.60946977108198, LR: 0.0003 +[2026-03-04 08:02:33] (step=0054226) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.609665427509293, LR: 0.0003 +[2026-03-04 08:02:41] (step=0054227) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.609861083936607, LR: 0.0003 +[2026-03-04 08:02:49] (step=0054228) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.610056740363921, LR: 0.0003 +[2026-03-04 08:02:56] (step=0054229) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.610252396791235, LR: 0.0003 +[2026-03-04 08:03:04] (step=0054230) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.610448053218548, LR: 0.0003 +[2026-03-04 08:03:12] (step=0054231) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.610643709645862, LR: 0.0003 +[2026-03-04 08:03:20] (step=0054232) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.610839366073176, LR: 0.0003 +[2026-03-04 08:03:28] (step=0054233) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 10.61103502250049, LR: 0.0003 +[2026-03-04 08:03:36] (step=0054234) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.611230678927802, LR: 0.0003 +[2026-03-04 08:03:44] (step=0054235) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.611426335355116, LR: 0.0003 +[2026-03-04 08:03:51] (step=0054236) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.61162199178243, LR: 0.0003 +[2026-03-04 08:03:59] (step=0054237) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.611817648209744, LR: 0.0003 +[2026-03-04 08:04:07] (step=0054238) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.612013304637058, LR: 0.0003 +[2026-03-04 08:04:15] (step=0054239) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.61220896106437, LR: 0.0003 +[2026-03-04 08:04:23] (step=0054240) Train Loss: 0.4451, Train Steps/Sec: 0.12, Epoch: 10.612404617491684, LR: 0.0003 +[2026-03-04 08:04:31] (step=0054241) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 10.612600273918998, LR: 0.0003 +[2026-03-04 08:04:39] (step=0054242) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.612795930346312, LR: 0.0003 +[2026-03-04 08:04:47] (step=0054243) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 10.612991586773626, LR: 0.0003 +[2026-03-04 08:04:54] (step=0054244) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.613187243200938, LR: 0.0003 +[2026-03-04 08:05:02] (step=0054245) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.613382899628252, LR: 0.0003 +[2026-03-04 08:05:10] (step=0054246) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 10.613578556055566, LR: 0.0003 +[2026-03-04 08:05:18] (step=0054247) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.61377421248288, LR: 0.0003 +[2026-03-04 08:05:26] (step=0054248) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.613969868910194, LR: 0.0003 +[2026-03-04 08:05:34] (step=0054249) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.614165525337507, LR: 0.0003 +[2026-03-04 08:05:42] (step=0054250) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 10.61436118176482, LR: 0.0003 +[2026-03-04 08:05:49] (step=0054251) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.614556838192135, LR: 0.0003 +[2026-03-04 08:05:57] (step=0054252) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 10.614752494619449, LR: 0.0003 +[2026-03-04 08:06:05] (step=0054253) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 10.614948151046763, LR: 0.0003 +[2026-03-04 08:06:13] (step=0054254) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 10.615143807474075, LR: 0.0003 +[2026-03-04 08:06:21] (step=0054255) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.615339463901389, LR: 0.0003 +[2026-03-04 08:06:29] (step=0054256) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.615535120328703, LR: 0.0003 +[2026-03-04 08:06:37] (step=0054257) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.615730776756017, LR: 0.0003 +[2026-03-04 08:06:45] (step=0054258) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.615926433183331, LR: 0.0003 +[2026-03-04 08:06:52] (step=0054259) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.616122089610643, LR: 0.0003 +[2026-03-04 08:07:00] (step=0054260) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.616317746037957, LR: 0.0003 +[2026-03-04 08:07:08] (step=0054261) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.616513402465271, LR: 0.0003 +[2026-03-04 08:07:16] (step=0054262) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.616709058892585, LR: 0.0003 +[2026-03-04 08:07:24] (step=0054263) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 10.616904715319897, LR: 0.0003 +[2026-03-04 08:07:32] (step=0054264) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.617100371747211, LR: 0.0003 +[2026-03-04 08:07:40] (step=0054265) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.617296028174525, LR: 0.0003 +[2026-03-04 08:07:47] (step=0054266) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.61749168460184, LR: 0.0003 +[2026-03-04 08:07:55] (step=0054267) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.617687341029153, LR: 0.0003 +[2026-03-04 08:08:03] (step=0054268) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.617882997456466, LR: 0.0003 +[2026-03-04 08:08:11] (step=0054269) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 10.61807865388378, LR: 0.0003 +[2026-03-04 08:08:19] (step=0054270) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.618274310311094, LR: 0.0003 +[2026-03-04 08:08:27] (step=0054271) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 10.618469966738408, LR: 0.0003 +[2026-03-04 08:08:35] (step=0054272) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 10.618665623165722, LR: 0.0003 +[2026-03-04 08:08:42] (step=0054273) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.618861279593034, LR: 0.0003 +[2026-03-04 08:08:50] (step=0054274) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 10.619056936020348, LR: 0.0003 +[2026-03-04 08:08:58] (step=0054275) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.619252592447662, LR: 0.0003 +[2026-03-04 08:09:06] (step=0054276) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.619448248874976, LR: 0.0003 +[2026-03-04 08:09:14] (step=0054277) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 10.61964390530229, LR: 0.0003 +[2026-03-04 08:09:22] (step=0054278) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.619839561729602, LR: 0.0003 +[2026-03-04 08:09:30] (step=0054279) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.620035218156916, LR: 0.0003 +[2026-03-04 08:09:37] (step=0054280) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.62023087458423, LR: 0.0003 +[2026-03-04 08:09:45] (step=0054281) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.620426531011544, LR: 0.0003 +[2026-03-04 08:09:53] (step=0054282) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.620622187438858, LR: 0.0003 +[2026-03-04 08:10:01] (step=0054283) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.62081784386617, LR: 0.0003 +[2026-03-04 08:10:09] (step=0054284) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 10.621013500293484, LR: 0.0003 +[2026-03-04 08:10:17] (step=0054285) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.621209156720798, LR: 0.0003 +[2026-03-04 08:10:25] (step=0054286) Train Loss: 0.4494, Train Steps/Sec: 0.12, Epoch: 10.621404813148112, LR: 0.0003 +[2026-03-04 08:10:33] (step=0054287) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.621600469575425, LR: 0.0003 +[2026-03-04 08:10:40] (step=0054288) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 10.621796126002739, LR: 0.0003 +[2026-03-04 08:10:48] (step=0054289) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.621991782430053, LR: 0.0003 +[2026-03-04 08:10:56] (step=0054290) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 10.622187438857367, LR: 0.0003 +[2026-03-04 08:11:04] (step=0054291) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.62238309528468, LR: 0.0003 +[2026-03-04 08:11:12] (step=0054292) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.622578751711993, LR: 0.0003 +[2026-03-04 08:11:20] (step=0054293) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.622774408139307, LR: 0.0003 +[2026-03-04 08:11:28] (step=0054294) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 10.622970064566621, LR: 0.0003 +[2026-03-04 08:11:36] (step=0054295) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.623165720993935, LR: 0.0003 +[2026-03-04 08:11:43] (step=0054296) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.623361377421249, LR: 0.0003 +[2026-03-04 08:11:51] (step=0054297) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.623557033848561, LR: 0.0003 +[2026-03-04 08:11:59] (step=0054298) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 10.623752690275875, LR: 0.0003 +[2026-03-04 08:12:07] (step=0054299) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 10.62394834670319, LR: 0.0003 +[2026-03-04 08:12:15] (step=0054300) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 10.624144003130503, LR: 0.0003 +[2026-03-04 08:12:23] (step=0054301) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.624339659557817, LR: 0.0003 +[2026-03-04 08:12:31] (step=0054302) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.62453531598513, LR: 0.0003 +[2026-03-04 08:12:38] (step=0054303) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 10.624730972412443, LR: 0.0003 +[2026-03-04 08:12:46] (step=0054304) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.624926628839757, LR: 0.0003 +[2026-03-04 08:12:54] (step=0054305) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.625122285267071, LR: 0.0003 +[2026-03-04 08:13:02] (step=0054306) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 10.625317941694385, LR: 0.0003 +[2026-03-04 08:13:10] (step=0054307) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 10.625513598121698, LR: 0.0003 +[2026-03-04 08:13:18] (step=0054308) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 10.625709254549012, LR: 0.0003 +[2026-03-04 08:13:26] (step=0054309) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 10.625904910976326, LR: 0.0003 +[2026-03-04 08:13:34] (step=0054310) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.62610056740364, LR: 0.0003 +[2026-03-04 08:13:41] (step=0054311) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.626296223830952, LR: 0.0003 +[2026-03-04 08:13:49] (step=0054312) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.626491880258266, LR: 0.0003 +[2026-03-04 08:13:57] (step=0054313) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.62668753668558, LR: 0.0003 +[2026-03-04 08:14:05] (step=0054314) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.626883193112894, LR: 0.0003 +[2026-03-04 08:14:13] (step=0054315) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.627078849540208, LR: 0.0003 +[2026-03-04 08:14:21] (step=0054316) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.62727450596752, LR: 0.0003 +[2026-03-04 08:14:29] (step=0054317) Train Loss: 0.4230, Train Steps/Sec: 0.13, Epoch: 10.627470162394834, LR: 0.0003 +[2026-03-04 08:14:36] (step=0054318) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.627665818822148, LR: 0.0003 +[2026-03-04 08:14:44] (step=0054319) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.627861475249462, LR: 0.0003 +[2026-03-04 08:14:52] (step=0054320) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 10.628057131676776, LR: 0.0003 +[2026-03-04 08:15:00] (step=0054321) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.628252788104088, LR: 0.0003 +[2026-03-04 08:15:08] (step=0054322) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.628448444531402, LR: 0.0003 +[2026-03-04 08:15:16] (step=0054323) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 10.628644100958716, LR: 0.0003 +[2026-03-04 08:15:24] (step=0054324) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 10.62883975738603, LR: 0.0003 +[2026-03-04 08:15:31] (step=0054325) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.629035413813344, LR: 0.0003 +[2026-03-04 08:15:39] (step=0054326) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.629231070240657, LR: 0.0003 +[2026-03-04 08:15:47] (step=0054327) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 10.62942672666797, LR: 0.0003 +[2026-03-04 08:15:55] (step=0054328) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.629622383095285, LR: 0.0003 +[2026-03-04 08:16:03] (step=0054329) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.629818039522599, LR: 0.0003 +[2026-03-04 08:16:11] (step=0054330) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.630013695949913, LR: 0.0003 +[2026-03-04 08:16:19] (step=0054331) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.630209352377225, LR: 0.0003 +[2026-03-04 08:16:26] (step=0054332) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.630405008804539, LR: 0.0003 +[2026-03-04 08:16:34] (step=0054333) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.630600665231853, LR: 0.0003 +[2026-03-04 08:16:42] (step=0054334) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 10.630796321659167, LR: 0.0003 +[2026-03-04 08:16:50] (step=0054335) Train Loss: 0.4602, Train Steps/Sec: 0.12, Epoch: 10.630991978086481, LR: 0.0003 +[2026-03-04 08:16:58] (step=0054336) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.631187634513793, LR: 0.0003 +[2026-03-04 08:17:06] (step=0054337) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.631383290941107, LR: 0.0003 +[2026-03-04 08:17:14] (step=0054338) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.631578947368421, LR: 0.0003 +[2026-03-04 08:17:22] (step=0054339) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 10.631774603795735, LR: 0.0003 +[2026-03-04 08:17:30] (step=0054340) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.631970260223047, LR: 0.0003 +[2026-03-04 08:17:37] (step=0054341) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 10.632165916650361, LR: 0.0003 +[2026-03-04 08:17:45] (step=0054342) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.632361573077675, LR: 0.0003 +[2026-03-04 08:17:53] (step=0054343) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 10.63255722950499, LR: 0.0003 +[2026-03-04 08:18:01] (step=0054344) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.632752885932303, LR: 0.0003 +[2026-03-04 08:18:09] (step=0054345) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.632948542359616, LR: 0.0003 +[2026-03-04 08:18:17] (step=0054346) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.63314419878693, LR: 0.0003 +[2026-03-04 08:18:25] (step=0054347) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.633339855214244, LR: 0.0003 +[2026-03-04 08:18:32] (step=0054348) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.633535511641558, LR: 0.0003 +[2026-03-04 08:18:40] (step=0054349) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 10.633731168068872, LR: 0.0003 +[2026-03-04 08:18:48] (step=0054350) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.633926824496184, LR: 0.0003 +[2026-03-04 08:18:56] (step=0054351) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.634122480923498, LR: 0.0003 +[2026-03-04 08:19:04] (step=0054352) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.634318137350812, LR: 0.0003 +[2026-03-04 08:19:12] (step=0054353) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.634513793778126, LR: 0.0003 +[2026-03-04 08:19:20] (step=0054354) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.63470945020544, LR: 0.0003 +[2026-03-04 08:19:27] (step=0054355) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.634905106632752, LR: 0.0003 +[2026-03-04 08:19:35] (step=0054356) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.635100763060066, LR: 0.0003 +[2026-03-04 08:19:43] (step=0054357) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 10.63529641948738, LR: 0.0003 +[2026-03-04 08:19:51] (step=0054358) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.635492075914694, LR: 0.0003 +[2026-03-04 08:19:59] (step=0054359) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.635687732342008, LR: 0.0003 +[2026-03-04 08:20:07] (step=0054360) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.63588338876932, LR: 0.0003 +[2026-03-04 08:20:15] (step=0054361) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.636079045196634, LR: 0.0003 +[2026-03-04 08:20:23] (step=0054362) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.636274701623949, LR: 0.0003 +[2026-03-04 08:20:30] (step=0054363) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.636470358051263, LR: 0.0003 +[2026-03-04 08:20:38] (step=0054364) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.636666014478575, LR: 0.0003 +[2026-03-04 08:20:46] (step=0054365) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 10.636861670905889, LR: 0.0003 +[2026-03-04 08:20:54] (step=0054366) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 10.637057327333203, LR: 0.0003 +[2026-03-04 08:21:02] (step=0054367) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.637252983760517, LR: 0.0003 +[2026-03-04 08:21:10] (step=0054368) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.63744864018783, LR: 0.0003 +[2026-03-04 08:21:18] (step=0054369) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.637644296615143, LR: 0.0003 +[2026-03-04 08:21:25] (step=0054370) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.637839953042457, LR: 0.0003 +[2026-03-04 08:21:33] (step=0054371) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.638035609469771, LR: 0.0003 +[2026-03-04 08:21:41] (step=0054372) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.638231265897085, LR: 0.0003 +[2026-03-04 08:21:49] (step=0054373) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.638426922324399, LR: 0.0003 +[2026-03-04 08:21:57] (step=0054374) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.638622578751711, LR: 0.0003 +[2026-03-04 08:22:05] (step=0054375) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.638818235179025, LR: 0.0003 +[2026-03-04 08:22:12] (step=0054376) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.63901389160634, LR: 0.0003 +[2026-03-04 08:22:20] (step=0054377) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.639209548033653, LR: 0.0003 +[2026-03-04 08:22:28] (step=0054378) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.639405204460967, LR: 0.0003 +[2026-03-04 08:22:36] (step=0054379) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.63960086088828, LR: 0.0003 +[2026-03-04 08:22:44] (step=0054380) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.639796517315594, LR: 0.0003 +[2026-03-04 08:22:52] (step=0054381) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.639992173742908, LR: 0.0003 +[2026-03-04 08:23:00] (step=0054382) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.640187830170222, LR: 0.0003 +[2026-03-04 08:23:07] (step=0054383) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.640383486597536, LR: 0.0003 +[2026-03-04 08:23:15] (step=0054384) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.640579143024848, LR: 0.0003 +[2026-03-04 08:23:23] (step=0054385) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.640774799452162, LR: 0.0003 +[2026-03-04 08:23:31] (step=0054386) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.640970455879476, LR: 0.0003 +[2026-03-04 08:23:39] (step=0054387) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 10.64116611230679, LR: 0.0003 +[2026-03-04 08:23:47] (step=0054388) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.641361768734104, LR: 0.0003 +[2026-03-04 08:23:55] (step=0054389) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.641557425161416, LR: 0.0003 +[2026-03-04 08:24:03] (step=0054390) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.64175308158873, LR: 0.0003 +[2026-03-04 08:24:10] (step=0054391) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 10.641948738016044, LR: 0.0003 +[2026-03-04 08:24:18] (step=0054392) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.642144394443358, LR: 0.0003 +[2026-03-04 08:24:26] (step=0054393) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 10.64234005087067, LR: 0.0003 +[2026-03-04 08:24:34] (step=0054394) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.642535707297984, LR: 0.0003 +[2026-03-04 08:24:42] (step=0054395) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.642731363725298, LR: 0.0003 +[2026-03-04 08:24:50] (step=0054396) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.642927020152612, LR: 0.0003 +[2026-03-04 08:24:57] (step=0054397) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.643122676579926, LR: 0.0003 +[2026-03-04 08:25:05] (step=0054398) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 10.643318333007239, LR: 0.0003 +[2026-03-04 08:25:13] (step=0054399) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.643513989434553, LR: 0.0003 +[2026-03-04 08:25:21] (step=0054400) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 10.643709645861867, LR: 0.0003 +[2026-03-04 08:25:29] (step=0054401) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.64390530228918, LR: 0.0003 +[2026-03-04 08:25:37] (step=0054402) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.644100958716495, LR: 0.0003 +[2026-03-04 08:25:45] (step=0054403) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.644296615143807, LR: 0.0003 +[2026-03-04 08:25:52] (step=0054404) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.64449227157112, LR: 0.0003 +[2026-03-04 08:26:00] (step=0054405) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.644687927998435, LR: 0.0003 +[2026-03-04 08:26:08] (step=0054406) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.644883584425749, LR: 0.0003 +[2026-03-04 08:26:16] (step=0054407) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.645079240853063, LR: 0.0003 +[2026-03-04 08:26:24] (step=0054408) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.645274897280375, LR: 0.0003 +[2026-03-04 08:26:32] (step=0054409) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.645470553707689, LR: 0.0003 +[2026-03-04 08:26:40] (step=0054410) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 10.645666210135003, LR: 0.0003 +[2026-03-04 08:26:48] (step=0054411) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.645861866562317, LR: 0.0003 +[2026-03-04 08:26:55] (step=0054412) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.646057522989631, LR: 0.0003 +[2026-03-04 08:27:03] (step=0054413) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.646253179416943, LR: 0.0003 +[2026-03-04 08:27:11] (step=0054414) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 10.646448835844257, LR: 0.0003 +[2026-03-04 08:27:19] (step=0054415) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.646644492271571, LR: 0.0003 +[2026-03-04 08:27:27] (step=0054416) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 10.646840148698885, LR: 0.0003 +[2026-03-04 08:27:35] (step=0054417) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.647035805126198, LR: 0.0003 +[2026-03-04 08:27:43] (step=0054418) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.647231461553512, LR: 0.0003 +[2026-03-04 08:27:50] (step=0054419) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.647427117980826, LR: 0.0003 +[2026-03-04 08:27:58] (step=0054420) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.64762277440814, LR: 0.0003 +[2026-03-04 08:28:06] (step=0054421) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 10.647818430835454, LR: 0.0003 +[2026-03-04 08:28:14] (step=0054422) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.648014087262766, LR: 0.0003 +[2026-03-04 08:28:22] (step=0054423) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.64820974369008, LR: 0.0003 +[2026-03-04 08:28:30] (step=0054424) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 10.648405400117394, LR: 0.0003 +[2026-03-04 08:28:38] (step=0054425) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.648601056544708, LR: 0.0003 +[2026-03-04 08:28:46] (step=0054426) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.648796712972022, LR: 0.0003 +[2026-03-04 08:28:53] (step=0054427) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.648992369399334, LR: 0.0003 +[2026-03-04 08:29:01] (step=0054428) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.649188025826648, LR: 0.0003 +[2026-03-04 08:29:09] (step=0054429) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 10.649383682253962, LR: 0.0003 +[2026-03-04 08:29:17] (step=0054430) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.649579338681276, LR: 0.0003 +[2026-03-04 08:29:25] (step=0054431) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.64977499510859, LR: 0.0003 +[2026-03-04 08:29:33] (step=0054432) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.649970651535902, LR: 0.0003 +[2026-03-04 08:29:41] (step=0054433) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 10.650166307963216, LR: 0.0003 +[2026-03-04 08:29:49] (step=0054434) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.65036196439053, LR: 0.0003 +[2026-03-04 08:29:56] (step=0054435) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.650557620817844, LR: 0.0003 +[2026-03-04 08:30:04] (step=0054436) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 10.650753277245158, LR: 0.0003 +[2026-03-04 08:30:12] (step=0054437) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.65094893367247, LR: 0.0003 +[2026-03-04 08:30:20] (step=0054438) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 10.651144590099785, LR: 0.0003 +[2026-03-04 08:30:28] (step=0054439) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.651340246527099, LR: 0.0003 +[2026-03-04 08:30:36] (step=0054440) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.651535902954413, LR: 0.0003 +[2026-03-04 08:30:44] (step=0054441) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.651731559381727, LR: 0.0003 +[2026-03-04 08:30:51] (step=0054442) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.651927215809039, LR: 0.0003 +[2026-03-04 08:30:59] (step=0054443) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.652122872236353, LR: 0.0003 +[2026-03-04 08:31:07] (step=0054444) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.652318528663667, LR: 0.0003 +[2026-03-04 08:31:15] (step=0054445) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.65251418509098, LR: 0.0003 +[2026-03-04 08:31:23] (step=0054446) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.652709841518293, LR: 0.0003 +[2026-03-04 08:31:31] (step=0054447) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.652905497945607, LR: 0.0003 +[2026-03-04 08:31:38] (step=0054448) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 10.653101154372921, LR: 0.0003 +[2026-03-04 08:31:46] (step=0054449) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.653296810800235, LR: 0.0003 +[2026-03-04 08:31:54] (step=0054450) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 10.653492467227549, LR: 0.0003 +[2026-03-04 08:32:02] (step=0054451) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.653688123654861, LR: 0.0003 +[2026-03-04 08:32:10] (step=0054452) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 10.653883780082175, LR: 0.0003 +[2026-03-04 08:32:18] (step=0054453) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.65407943650949, LR: 0.0003 +[2026-03-04 08:32:26] (step=0054454) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.654275092936803, LR: 0.0003 +[2026-03-04 08:32:34] (step=0054455) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.654470749364117, LR: 0.0003 +[2026-03-04 08:32:41] (step=0054456) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.65466640579143, LR: 0.0003 +[2026-03-04 08:32:49] (step=0054457) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 10.654862062218744, LR: 0.0003 +[2026-03-04 08:32:57] (step=0054458) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.655057718646058, LR: 0.0003 +[2026-03-04 08:33:05] (step=0054459) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.655253375073372, LR: 0.0003 +[2026-03-04 08:33:13] (step=0054460) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.655449031500686, LR: 0.0003 +[2026-03-04 08:33:21] (step=0054461) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.655644687927998, LR: 0.0003 +[2026-03-04 08:33:29] (step=0054462) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.655840344355312, LR: 0.0003 +[2026-03-04 08:33:36] (step=0054463) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.656036000782626, LR: 0.0003 +[2026-03-04 08:33:44] (step=0054464) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 10.65623165720994, LR: 0.0003 +[2026-03-04 08:33:52] (step=0054465) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 10.656427313637254, LR: 0.0003 +[2026-03-04 08:34:00] (step=0054466) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.656622970064566, LR: 0.0003 +[2026-03-04 08:34:08] (step=0054467) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.65681862649188, LR: 0.0003 +[2026-03-04 08:34:16] (step=0054468) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.657014282919194, LR: 0.0003 +[2026-03-04 08:34:24] (step=0054469) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.657209939346508, LR: 0.0003 +[2026-03-04 08:34:31] (step=0054470) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.65740559577382, LR: 0.0003 +[2026-03-04 08:34:39] (step=0054471) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.657601252201134, LR: 0.0003 +[2026-03-04 08:34:47] (step=0054472) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.657796908628448, LR: 0.0003 +[2026-03-04 08:34:55] (step=0054473) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.657992565055762, LR: 0.0003 +[2026-03-04 08:35:03] (step=0054474) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.658188221483076, LR: 0.0003 +[2026-03-04 08:35:11] (step=0054475) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.658383877910389, LR: 0.0003 +[2026-03-04 08:35:19] (step=0054476) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.658579534337703, LR: 0.0003 +[2026-03-04 08:35:26] (step=0054477) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.658775190765017, LR: 0.0003 +[2026-03-04 08:35:34] (step=0054478) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.65897084719233, LR: 0.0003 +[2026-03-04 08:35:42] (step=0054479) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.659166503619645, LR: 0.0003 +[2026-03-04 08:35:50] (step=0054480) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.659362160046957, LR: 0.0003 +[2026-03-04 08:35:58] (step=0054481) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.65955781647427, LR: 0.0003 +[2026-03-04 08:36:06] (step=0054482) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.659753472901585, LR: 0.0003 +[2026-03-04 08:36:14] (step=0054483) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 10.659949129328899, LR: 0.0003 +[2026-03-04 08:36:21] (step=0054484) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.660144785756213, LR: 0.0003 +[2026-03-04 08:36:29] (step=0054485) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 10.660340442183525, LR: 0.0003 +[2026-03-04 08:36:37] (step=0054486) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.660536098610839, LR: 0.0003 +[2026-03-04 08:36:45] (step=0054487) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.660731755038153, LR: 0.0003 +[2026-03-04 08:36:53] (step=0054488) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.660927411465467, LR: 0.0003 +[2026-03-04 08:37:01] (step=0054489) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.661123067892781, LR: 0.0003 +[2026-03-04 08:37:09] (step=0054490) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 10.661318724320093, LR: 0.0003 +[2026-03-04 08:37:16] (step=0054491) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.661514380747407, LR: 0.0003 +[2026-03-04 08:37:24] (step=0054492) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.661710037174721, LR: 0.0003 +[2026-03-04 08:37:32] (step=0054493) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.661905693602035, LR: 0.0003 +[2026-03-04 08:37:40] (step=0054494) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.66210135002935, LR: 0.0003 +[2026-03-04 08:37:48] (step=0054495) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 10.662297006456662, LR: 0.0003 +[2026-03-04 08:37:56] (step=0054496) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 10.662492662883976, LR: 0.0003 +[2026-03-04 08:38:04] (step=0054497) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.66268831931129, LR: 0.0003 +[2026-03-04 08:38:11] (step=0054498) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.662883975738604, LR: 0.0003 +[2026-03-04 08:38:19] (step=0054499) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.663079632165916, LR: 0.0003 +[2026-03-04 08:38:27] (step=0054500) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.66327528859323, LR: 0.0003 +[2026-03-04 08:38:27] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0054500/ +[2026-03-04 08:38:35] (step=0054501) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.663470945020544, LR: 0.0003 +[2026-03-04 08:38:43] (step=0054502) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.663666601447858, LR: 0.0003 +[2026-03-04 08:38:51] (step=0054503) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.663862257875172, LR: 0.0003 +[2026-03-04 08:38:59] (step=0054504) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.664057914302484, LR: 0.0003 +[2026-03-04 08:39:07] (step=0054505) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.664253570729798, LR: 0.0003 +[2026-03-04 08:39:14] (step=0054506) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 10.664449227157112, LR: 0.0003 +[2026-03-04 08:39:22] (step=0054507) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.664644883584426, LR: 0.0003 +[2026-03-04 08:39:30] (step=0054508) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 10.66484054001174, LR: 0.0003 +[2026-03-04 08:39:38] (step=0054509) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 10.665036196439052, LR: 0.0003 +[2026-03-04 08:39:46] (step=0054510) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.665231852866366, LR: 0.0003 +[2026-03-04 08:39:54] (step=0054511) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.66542750929368, LR: 0.0003 +[2026-03-04 08:40:02] (step=0054512) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.665623165720994, LR: 0.0003 +[2026-03-04 08:40:09] (step=0054513) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.665818822148308, LR: 0.0003 +[2026-03-04 08:40:17] (step=0054514) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.66601447857562, LR: 0.0003 +[2026-03-04 08:40:25] (step=0054515) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 10.666210135002935, LR: 0.0003 +[2026-03-04 08:40:33] (step=0054516) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.666405791430249, LR: 0.0003 +[2026-03-04 08:40:41] (step=0054517) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.666601447857563, LR: 0.0003 +[2026-03-04 08:40:49] (step=0054518) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.666797104284877, LR: 0.0003 +[2026-03-04 08:40:57] (step=0054519) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.666992760712189, LR: 0.0003 +[2026-03-04 08:41:05] (step=0054520) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.667188417139503, LR: 0.0003 +[2026-03-04 08:41:12] (step=0054521) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.667384073566817, LR: 0.0003 +[2026-03-04 08:41:20] (step=0054522) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 10.66757972999413, LR: 0.0003 +[2026-03-04 08:41:28] (step=0054523) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.667775386421443, LR: 0.0003 +[2026-03-04 08:41:36] (step=0054524) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 10.667971042848757, LR: 0.0003 +[2026-03-04 08:41:44] (step=0054525) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.668166699276071, LR: 0.0003 +[2026-03-04 08:41:52] (step=0054526) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 10.668362355703385, LR: 0.0003 +[2026-03-04 08:41:59] (step=0054527) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 10.668558012130699, LR: 0.0003 +[2026-03-04 08:42:07] (step=0054528) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.668753668558011, LR: 0.0003 +[2026-03-04 08:42:15] (step=0054529) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.668949324985325, LR: 0.0003 +[2026-03-04 08:42:23] (step=0054530) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.66914498141264, LR: 0.0003 +[2026-03-04 08:42:31] (step=0054531) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.669340637839953, LR: 0.0003 +[2026-03-04 08:42:39] (step=0054532) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 10.669536294267267, LR: 0.0003 +[2026-03-04 08:42:47] (step=0054533) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.66973195069458, LR: 0.0003 +[2026-03-04 08:42:55] (step=0054534) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.669927607121894, LR: 0.0003 +[2026-03-04 08:43:02] (step=0054535) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.670123263549208, LR: 0.0003 +[2026-03-04 08:43:10] (step=0054536) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 10.670318919976522, LR: 0.0003 +[2026-03-04 08:43:18] (step=0054537) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.670514576403836, LR: 0.0003 +[2026-03-04 08:43:26] (step=0054538) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.670710232831148, LR: 0.0003 +[2026-03-04 08:43:34] (step=0054539) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.670905889258462, LR: 0.0003 +[2026-03-04 08:43:42] (step=0054540) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 10.671101545685776, LR: 0.0003 +[2026-03-04 08:43:50] (step=0054541) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 10.67129720211309, LR: 0.0003 +[2026-03-04 08:43:58] (step=0054542) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 10.671492858540404, LR: 0.0003 +[2026-03-04 08:44:05] (step=0054543) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.671688514967716, LR: 0.0003 +[2026-03-04 08:44:13] (step=0054544) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 10.67188417139503, LR: 0.0003 +[2026-03-04 08:44:21] (step=0054545) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 10.672079827822344, LR: 0.0003 +[2026-03-04 08:44:29] (step=0054546) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 10.672275484249658, LR: 0.0003 +[2026-03-04 08:44:37] (step=0054547) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 10.672471140676972, LR: 0.0003 +[2026-03-04 08:44:45] (step=0054548) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.672666797104284, LR: 0.0003 +[2026-03-04 08:44:53] (step=0054549) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 10.672862453531598, LR: 0.0003 +[2026-03-04 08:45:00] (step=0054550) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.673058109958912, LR: 0.0003 +[2026-03-04 08:45:08] (step=0054551) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.673253766386226, LR: 0.0003 +[2026-03-04 08:45:16] (step=0054552) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.673449422813539, LR: 0.0003 +[2026-03-04 08:45:24] (step=0054553) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 10.673645079240853, LR: 0.0003 +[2026-03-04 08:45:32] (step=0054554) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.673840735668167, LR: 0.0003 +[2026-03-04 08:45:40] (step=0054555) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.67403639209548, LR: 0.0003 +[2026-03-04 08:45:48] (step=0054556) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 10.674232048522795, LR: 0.0003 +[2026-03-04 08:45:55] (step=0054557) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.674427704950107, LR: 0.0003 +[2026-03-04 08:46:03] (step=0054558) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.67462336137742, LR: 0.0003 +[2026-03-04 08:46:11] (step=0054559) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.674819017804735, LR: 0.0003 +[2026-03-04 08:46:19] (step=0054560) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.675014674232049, LR: 0.0003 +[2026-03-04 08:46:27] (step=0054561) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.675210330659363, LR: 0.0003 +[2026-03-04 08:46:35] (step=0054562) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.675405987086675, LR: 0.0003 +[2026-03-04 08:46:43] (step=0054563) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.675601643513989, LR: 0.0003 +[2026-03-04 08:46:50] (step=0054564) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.675797299941303, LR: 0.0003 +[2026-03-04 08:46:58] (step=0054565) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 10.675992956368617, LR: 0.0003 +[2026-03-04 08:47:06] (step=0054566) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.676188612795931, LR: 0.0003 +[2026-03-04 08:47:14] (step=0054567) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.676384269223243, LR: 0.0003 +[2026-03-04 08:47:22] (step=0054568) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 10.676579925650557, LR: 0.0003 +[2026-03-04 08:47:30] (step=0054569) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.676775582077871, LR: 0.0003 +[2026-03-04 08:47:38] (step=0054570) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 10.676971238505185, LR: 0.0003 +[2026-03-04 08:47:45] (step=0054571) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.6771668949325, LR: 0.0003 +[2026-03-04 08:47:53] (step=0054572) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.677362551359812, LR: 0.0003 +[2026-03-04 08:48:01] (step=0054573) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.677558207787126, LR: 0.0003 +[2026-03-04 08:48:09] (step=0054574) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.67775386421444, LR: 0.0003 +[2026-03-04 08:48:17] (step=0054575) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.677949520641754, LR: 0.0003 +[2026-03-04 08:48:25] (step=0054576) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.678145177069066, LR: 0.0003 +[2026-03-04 08:48:33] (step=0054577) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.67834083349638, LR: 0.0003 +[2026-03-04 08:48:40] (step=0054578) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.678536489923694, LR: 0.0003 +[2026-03-04 08:48:48] (step=0054579) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 10.678732146351008, LR: 0.0003 +[2026-03-04 08:48:56] (step=0054580) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 10.678927802778322, LR: 0.0003 +[2026-03-04 08:49:04] (step=0054581) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 10.679123459205634, LR: 0.0003 +[2026-03-04 08:49:12] (step=0054582) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.679319115632948, LR: 0.0003 +[2026-03-04 08:49:20] (step=0054583) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 10.679514772060262, LR: 0.0003 +[2026-03-04 08:49:28] (step=0054584) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.679710428487576, LR: 0.0003 +[2026-03-04 08:49:36] (step=0054585) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.67990608491489, LR: 0.0003 +[2026-03-04 08:49:43] (step=0054586) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 10.680101741342202, LR: 0.0003 +[2026-03-04 08:49:51] (step=0054587) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.680297397769516, LR: 0.0003 +[2026-03-04 08:49:59] (step=0054588) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.68049305419683, LR: 0.0003 +[2026-03-04 08:50:07] (step=0054589) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.680688710624144, LR: 0.0003 +[2026-03-04 08:50:15] (step=0054590) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 10.680884367051458, LR: 0.0003 +[2026-03-04 08:50:23] (step=0054591) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.68108002347877, LR: 0.0003 +[2026-03-04 08:50:30] (step=0054592) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.681275679906085, LR: 0.0003 +[2026-03-04 08:50:38] (step=0054593) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.681471336333399, LR: 0.0003 +[2026-03-04 08:50:46] (step=0054594) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.681666992760713, LR: 0.0003 +[2026-03-04 08:50:54] (step=0054595) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.681862649188027, LR: 0.0003 +[2026-03-04 08:51:02] (step=0054596) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.682058305615339, LR: 0.0003 +[2026-03-04 08:51:10] (step=0054597) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.682253962042653, LR: 0.0003 +[2026-03-04 08:51:18] (step=0054598) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.682449618469967, LR: 0.0003 +[2026-03-04 08:51:26] (step=0054599) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.68264527489728, LR: 0.0003 +[2026-03-04 08:51:33] (step=0054600) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.682840931324595, LR: 0.0003 +[2026-03-04 08:51:41] (step=0054601) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.683036587751907, LR: 0.0003 +[2026-03-04 08:51:49] (step=0054602) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 10.683232244179221, LR: 0.0003 +[2026-03-04 08:51:57] (step=0054603) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.683427900606535, LR: 0.0003 +[2026-03-04 08:52:05] (step=0054604) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 10.68362355703385, LR: 0.0003 +[2026-03-04 08:52:13] (step=0054605) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.683819213461161, LR: 0.0003 +[2026-03-04 08:52:21] (step=0054606) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 10.684014869888475, LR: 0.0003 +[2026-03-04 08:52:28] (step=0054607) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.68421052631579, LR: 0.0003 +[2026-03-04 08:52:36] (step=0054608) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 10.684406182743103, LR: 0.0003 +[2026-03-04 08:52:44] (step=0054609) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 10.684601839170417, LR: 0.0003 +[2026-03-04 08:52:52] (step=0054610) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.68479749559773, LR: 0.0003 +[2026-03-04 08:53:00] (step=0054611) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.684993152025044, LR: 0.0003 +[2026-03-04 08:53:08] (step=0054612) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.685188808452358, LR: 0.0003 +[2026-03-04 08:53:16] (step=0054613) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.685384464879672, LR: 0.0003 +[2026-03-04 08:53:23] (step=0054614) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 10.685580121306986, LR: 0.0003 +[2026-03-04 08:53:31] (step=0054615) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.685775777734298, LR: 0.0003 +[2026-03-04 08:53:39] (step=0054616) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 10.685971434161612, LR: 0.0003 +[2026-03-04 08:53:47] (step=0054617) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.686167090588926, LR: 0.0003 +[2026-03-04 08:53:55] (step=0054618) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 10.68636274701624, LR: 0.0003 +[2026-03-04 08:54:03] (step=0054619) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.686558403443554, LR: 0.0003 +[2026-03-04 08:54:11] (step=0054620) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.686754059870866, LR: 0.0003 +[2026-03-04 08:54:18] (step=0054621) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.68694971629818, LR: 0.0003 +[2026-03-04 08:54:26] (step=0054622) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.687145372725494, LR: 0.0003 +[2026-03-04 08:54:34] (step=0054623) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 10.687341029152808, LR: 0.0003 +[2026-03-04 08:54:42] (step=0054624) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 10.687536685580122, LR: 0.0003 +[2026-03-04 08:54:50] (step=0054625) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.687732342007434, LR: 0.0003 +[2026-03-04 08:54:58] (step=0054626) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.687927998434748, LR: 0.0003 +[2026-03-04 08:55:06] (step=0054627) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.688123654862062, LR: 0.0003 +[2026-03-04 08:55:13] (step=0054628) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.688319311289376, LR: 0.0003 +[2026-03-04 08:55:21] (step=0054629) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.688514967716689, LR: 0.0003 +[2026-03-04 08:55:29] (step=0054630) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 10.688710624144003, LR: 0.0003 +[2026-03-04 08:55:37] (step=0054631) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.688906280571317, LR: 0.0003 +[2026-03-04 08:55:45] (step=0054632) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 10.68910193699863, LR: 0.0003 +[2026-03-04 08:55:53] (step=0054633) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.689297593425945, LR: 0.0003 +[2026-03-04 08:56:01] (step=0054634) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.689493249853257, LR: 0.0003 +[2026-03-04 08:56:08] (step=0054635) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.689688906280571, LR: 0.0003 +[2026-03-04 08:56:16] (step=0054636) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.689884562707885, LR: 0.0003 +[2026-03-04 08:56:24] (step=0054637) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 10.690080219135199, LR: 0.0003 +[2026-03-04 08:56:32] (step=0054638) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.690275875562513, LR: 0.0003 +[2026-03-04 08:56:40] (step=0054639) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.690471531989825, LR: 0.0003 +[2026-03-04 08:56:48] (step=0054640) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 10.69066718841714, LR: 0.0003 +[2026-03-04 08:56:56] (step=0054641) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.690862844844453, LR: 0.0003 +[2026-03-04 08:57:03] (step=0054642) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.691058501271767, LR: 0.0003 +[2026-03-04 08:57:11] (step=0054643) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 10.691254157699081, LR: 0.0003 +[2026-03-04 08:57:19] (step=0054644) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.691449814126393, LR: 0.0003 +[2026-03-04 08:57:27] (step=0054645) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 10.691645470553707, LR: 0.0003 +[2026-03-04 08:57:35] (step=0054646) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 10.691841126981021, LR: 0.0003 +[2026-03-04 08:57:43] (step=0054647) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.692036783408335, LR: 0.0003 +[2026-03-04 08:57:51] (step=0054648) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.69223243983565, LR: 0.0003 +[2026-03-04 08:57:59] (step=0054649) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.692428096262962, LR: 0.0003 +[2026-03-04 08:58:06] (step=0054650) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.692623752690276, LR: 0.0003 +[2026-03-04 08:58:14] (step=0054651) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.69281940911759, LR: 0.0003 +[2026-03-04 08:58:22] (step=0054652) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.693015065544904, LR: 0.0003 +[2026-03-04 08:58:30] (step=0054653) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.693210721972218, LR: 0.0003 +[2026-03-04 08:58:38] (step=0054654) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.69340637839953, LR: 0.0003 +[2026-03-04 08:58:46] (step=0054655) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.693602034826844, LR: 0.0003 +[2026-03-04 08:58:54] (step=0054656) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.693797691254158, LR: 0.0003 +[2026-03-04 08:59:01] (step=0054657) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.693993347681472, LR: 0.0003 +[2026-03-04 08:59:09] (step=0054658) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.694189004108784, LR: 0.0003 +[2026-03-04 08:59:17] (step=0054659) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.694384660536098, LR: 0.0003 +[2026-03-04 08:59:25] (step=0054660) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 10.694580316963412, LR: 0.0003 +[2026-03-04 08:59:33] (step=0054661) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 10.694775973390726, LR: 0.0003 +[2026-03-04 08:59:41] (step=0054662) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.69497162981804, LR: 0.0003 +[2026-03-04 08:59:49] (step=0054663) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.695167286245352, LR: 0.0003 +[2026-03-04 08:59:56] (step=0054664) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.695362942672666, LR: 0.0003 +[2026-03-04 09:00:04] (step=0054665) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.69555859909998, LR: 0.0003 +[2026-03-04 09:00:12] (step=0054666) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.695754255527294, LR: 0.0003 +[2026-03-04 09:00:20] (step=0054667) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.695949911954608, LR: 0.0003 +[2026-03-04 09:00:28] (step=0054668) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.69614556838192, LR: 0.0003 +[2026-03-04 09:00:36] (step=0054669) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.696341224809235, LR: 0.0003 +[2026-03-04 09:00:44] (step=0054670) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 10.696536881236549, LR: 0.0003 +[2026-03-04 09:00:51] (step=0054671) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.696732537663863, LR: 0.0003 +[2026-03-04 09:00:59] (step=0054672) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.696928194091177, LR: 0.0003 +[2026-03-04 09:01:07] (step=0054673) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 10.697123850518489, LR: 0.0003 +[2026-03-04 09:01:15] (step=0054674) Train Loss: 0.4520, Train Steps/Sec: 0.12, Epoch: 10.697319506945803, LR: 0.0003 +[2026-03-04 09:01:23] (step=0054675) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 10.697515163373117, LR: 0.0003 +[2026-03-04 09:01:31] (step=0054676) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 10.697710819800431, LR: 0.0003 +[2026-03-04 09:01:39] (step=0054677) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.697906476227745, LR: 0.0003 +[2026-03-04 09:01:47] (step=0054678) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 10.698102132655057, LR: 0.0003 +[2026-03-04 09:01:54] (step=0054679) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.698297789082371, LR: 0.0003 +[2026-03-04 09:02:02] (step=0054680) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.698493445509685, LR: 0.0003 +[2026-03-04 09:02:10] (step=0054681) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.698689101937, LR: 0.0003 +[2026-03-04 09:02:18] (step=0054682) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.698884758364311, LR: 0.0003 +[2026-03-04 09:02:26] (step=0054683) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.699080414791625, LR: 0.0003 +[2026-03-04 09:02:34] (step=0054684) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 10.69927607121894, LR: 0.0003 +[2026-03-04 09:02:42] (step=0054685) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 10.699471727646253, LR: 0.0003 +[2026-03-04 09:02:49] (step=0054686) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.699667384073567, LR: 0.0003 +[2026-03-04 09:02:57] (step=0054687) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 10.69986304050088, LR: 0.0003 +[2026-03-04 09:03:05] (step=0054688) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.700058696928194, LR: 0.0003 +[2026-03-04 09:03:13] (step=0054689) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.700254353355508, LR: 0.0003 +[2026-03-04 09:03:21] (step=0054690) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.700450009782822, LR: 0.0003 +[2026-03-04 09:03:29] (step=0054691) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.700645666210136, LR: 0.0003 +[2026-03-04 09:03:37] (step=0054692) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.700841322637448, LR: 0.0003 +[2026-03-04 09:03:44] (step=0054693) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.701036979064762, LR: 0.0003 +[2026-03-04 09:03:52] (step=0054694) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.701232635492076, LR: 0.0003 +[2026-03-04 09:04:00] (step=0054695) Train Loss: 0.4444, Train Steps/Sec: 0.12, Epoch: 10.70142829191939, LR: 0.0003 +[2026-03-04 09:04:08] (step=0054696) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.701623948346704, LR: 0.0003 +[2026-03-04 09:04:16] (step=0054697) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.701819604774016, LR: 0.0003 +[2026-03-04 09:04:24] (step=0054698) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.70201526120133, LR: 0.0003 +[2026-03-04 09:04:32] (step=0054699) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 10.702210917628644, LR: 0.0003 +[2026-03-04 09:04:40] (step=0054700) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 10.702406574055958, LR: 0.0003 +[2026-03-04 09:04:47] (step=0054701) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.702602230483272, LR: 0.0003 +[2026-03-04 09:04:55] (step=0054702) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 10.702797886910584, LR: 0.0003 +[2026-03-04 09:05:03] (step=0054703) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.702993543337898, LR: 0.0003 +[2026-03-04 09:05:11] (step=0054704) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.703189199765212, LR: 0.0003 +[2026-03-04 09:05:19] (step=0054705) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.703384856192526, LR: 0.0003 +[2026-03-04 09:05:27] (step=0054706) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.70358051261984, LR: 0.0003 +[2026-03-04 09:05:35] (step=0054707) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.703776169047153, LR: 0.0003 +[2026-03-04 09:05:42] (step=0054708) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.703971825474467, LR: 0.0003 +[2026-03-04 09:05:50] (step=0054709) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 10.70416748190178, LR: 0.0003 +[2026-03-04 09:05:58] (step=0054710) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.704363138329095, LR: 0.0003 +[2026-03-04 09:06:06] (step=0054711) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.704558794756407, LR: 0.0003 +[2026-03-04 09:06:14] (step=0054712) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.704754451183721, LR: 0.0003 +[2026-03-04 09:06:22] (step=0054713) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.704950107611035, LR: 0.0003 +[2026-03-04 09:06:30] (step=0054714) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.705145764038349, LR: 0.0003 +[2026-03-04 09:06:37] (step=0054715) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.705341420465663, LR: 0.0003 +[2026-03-04 09:06:45] (step=0054716) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 10.705537076892975, LR: 0.0003 +[2026-03-04 09:06:53] (step=0054717) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.70573273332029, LR: 0.0003 +[2026-03-04 09:07:01] (step=0054718) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.705928389747603, LR: 0.0003 +[2026-03-04 09:07:09] (step=0054719) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.706124046174917, LR: 0.0003 +[2026-03-04 09:07:17] (step=0054720) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.706319702602231, LR: 0.0003 +[2026-03-04 09:07:25] (step=0054721) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 10.706515359029543, LR: 0.0003 +[2026-03-04 09:07:32] (step=0054722) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.706711015456857, LR: 0.0003 +[2026-03-04 09:07:40] (step=0054723) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 10.706906671884171, LR: 0.0003 +[2026-03-04 09:07:48] (step=0054724) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.707102328311485, LR: 0.0003 +[2026-03-04 09:07:56] (step=0054725) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.7072979847388, LR: 0.0003 +[2026-03-04 09:08:04] (step=0054726) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.707493641166112, LR: 0.0003 +[2026-03-04 09:08:12] (step=0054727) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.707689297593426, LR: 0.0003 +[2026-03-04 09:08:20] (step=0054728) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.70788495402074, LR: 0.0003 +[2026-03-04 09:08:28] (step=0054729) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 10.708080610448054, LR: 0.0003 +[2026-03-04 09:08:35] (step=0054730) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.708276266875368, LR: 0.0003 +[2026-03-04 09:08:43] (step=0054731) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.70847192330268, LR: 0.0003 +[2026-03-04 09:08:51] (step=0054732) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.708667579729994, LR: 0.0003 +[2026-03-04 09:08:59] (step=0054733) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.708863236157308, LR: 0.0003 +[2026-03-04 09:09:07] (step=0054734) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.709058892584622, LR: 0.0003 +[2026-03-04 09:09:15] (step=0054735) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.709254549011934, LR: 0.0003 +[2026-03-04 09:09:22] (step=0054736) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 10.709450205439248, LR: 0.0003 +[2026-03-04 09:09:30] (step=0054737) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.709645861866562, LR: 0.0003 +[2026-03-04 09:09:38] (step=0054738) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.709841518293876, LR: 0.0003 +[2026-03-04 09:09:46] (step=0054739) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.71003717472119, LR: 0.0003 +[2026-03-04 09:09:54] (step=0054740) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.710232831148502, LR: 0.0003 +[2026-03-04 09:10:02] (step=0054741) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.710428487575816, LR: 0.0003 +[2026-03-04 09:10:10] (step=0054742) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.71062414400313, LR: 0.0003 +[2026-03-04 09:10:18] (step=0054743) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.710819800430444, LR: 0.0003 +[2026-03-04 09:10:25] (step=0054744) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.711015456857758, LR: 0.0003 +[2026-03-04 09:10:33] (step=0054745) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.71121111328507, LR: 0.0003 +[2026-03-04 09:10:41] (step=0054746) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.711406769712385, LR: 0.0003 +[2026-03-04 09:10:49] (step=0054747) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 10.711602426139699, LR: 0.0003 +[2026-03-04 09:10:57] (step=0054748) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.711798082567013, LR: 0.0003 +[2026-03-04 09:11:05] (step=0054749) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 10.711993738994327, LR: 0.0003 +[2026-03-04 09:11:13] (step=0054750) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.712189395421639, LR: 0.0003 +[2026-03-04 09:11:20] (step=0054751) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.712385051848953, LR: 0.0003 +[2026-03-04 09:11:28] (step=0054752) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.712580708276267, LR: 0.0003 +[2026-03-04 09:11:36] (step=0054753) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.712776364703581, LR: 0.0003 +[2026-03-04 09:11:44] (step=0054754) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.712972021130895, LR: 0.0003 +[2026-03-04 09:11:52] (step=0054755) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 10.713167677558207, LR: 0.0003 +[2026-03-04 09:12:00] (step=0054756) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.713363333985521, LR: 0.0003 +[2026-03-04 09:12:08] (step=0054757) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 10.713558990412835, LR: 0.0003 +[2026-03-04 09:12:15] (step=0054758) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 10.71375464684015, LR: 0.0003 +[2026-03-04 09:12:23] (step=0054759) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.713950303267461, LR: 0.0003 +[2026-03-04 09:12:31] (step=0054760) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 10.714145959694775, LR: 0.0003 +[2026-03-04 09:12:39] (step=0054761) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.71434161612209, LR: 0.0003 +[2026-03-04 09:12:47] (step=0054762) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.714537272549403, LR: 0.0003 +[2026-03-04 09:12:55] (step=0054763) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 10.714732928976717, LR: 0.0003 +[2026-03-04 09:13:03] (step=0054764) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.71492858540403, LR: 0.0003 +[2026-03-04 09:13:10] (step=0054765) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.715124241831344, LR: 0.0003 +[2026-03-04 09:13:18] (step=0054766) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.715319898258658, LR: 0.0003 +[2026-03-04 09:13:26] (step=0054767) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.715515554685972, LR: 0.0003 +[2026-03-04 09:13:34] (step=0054768) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.715711211113286, LR: 0.0003 +[2026-03-04 09:13:42] (step=0054769) Train Loss: 0.4247, Train Steps/Sec: 0.13, Epoch: 10.715906867540598, LR: 0.0003 +[2026-03-04 09:13:50] (step=0054770) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.716102523967912, LR: 0.0003 +[2026-03-04 09:13:58] (step=0054771) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.716298180395226, LR: 0.0003 +[2026-03-04 09:14:05] (step=0054772) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.71649383682254, LR: 0.0003 +[2026-03-04 09:14:13] (step=0054773) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.716689493249854, LR: 0.0003 +[2026-03-04 09:14:21] (step=0054774) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.716885149677166, LR: 0.0003 +[2026-03-04 09:14:29] (step=0054775) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.71708080610448, LR: 0.0003 +[2026-03-04 09:14:37] (step=0054776) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.717276462531794, LR: 0.0003 +[2026-03-04 09:14:45] (step=0054777) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.717472118959108, LR: 0.0003 +[2026-03-04 09:14:53] (step=0054778) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.717667775386422, LR: 0.0003 +[2026-03-04 09:15:00] (step=0054779) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.717863431813734, LR: 0.0003 +[2026-03-04 09:15:08] (step=0054780) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 10.718059088241048, LR: 0.0003 +[2026-03-04 09:15:16] (step=0054781) Train Loss: 0.4477, Train Steps/Sec: 0.12, Epoch: 10.718254744668362, LR: 0.0003 +[2026-03-04 09:15:24] (step=0054782) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.718450401095676, LR: 0.0003 +[2026-03-04 09:15:32] (step=0054783) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.71864605752299, LR: 0.0003 +[2026-03-04 09:15:40] (step=0054784) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.718841713950303, LR: 0.0003 +[2026-03-04 09:15:48] (step=0054785) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.719037370377617, LR: 0.0003 +[2026-03-04 09:15:56] (step=0054786) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.71923302680493, LR: 0.0003 +[2026-03-04 09:16:03] (step=0054787) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.719428683232245, LR: 0.0003 +[2026-03-04 09:16:11] (step=0054788) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.719624339659557, LR: 0.0003 +[2026-03-04 09:16:19] (step=0054789) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.719819996086871, LR: 0.0003 +[2026-03-04 09:16:27] (step=0054790) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 10.720015652514185, LR: 0.0003 +[2026-03-04 09:16:35] (step=0054791) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.720211308941499, LR: 0.0003 +[2026-03-04 09:16:43] (step=0054792) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.720406965368813, LR: 0.0003 +[2026-03-04 09:16:51] (step=0054793) Train Loss: 0.4397, Train Steps/Sec: 0.12, Epoch: 10.720602621796125, LR: 0.0003 +[2026-03-04 09:16:59] (step=0054794) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.72079827822344, LR: 0.0003 +[2026-03-04 09:17:06] (step=0054795) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 10.720993934650753, LR: 0.0003 +[2026-03-04 09:17:14] (step=0054796) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.721189591078067, LR: 0.0003 +[2026-03-04 09:17:22] (step=0054797) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.721385247505381, LR: 0.0003 +[2026-03-04 09:17:30] (step=0054798) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 10.721580903932693, LR: 0.0003 +[2026-03-04 09:17:38] (step=0054799) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 10.721776560360007, LR: 0.0003 +[2026-03-04 09:17:46] (step=0054800) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 10.721972216787321, LR: 0.0003 +[2026-03-04 09:17:54] (step=0054801) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.722167873214635, LR: 0.0003 +[2026-03-04 09:18:01] (step=0054802) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 10.72236352964195, LR: 0.0003 +[2026-03-04 09:18:09] (step=0054803) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.722559186069262, LR: 0.0003 +[2026-03-04 09:18:17] (step=0054804) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.722754842496576, LR: 0.0003 +[2026-03-04 09:18:25] (step=0054805) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.72295049892389, LR: 0.0003 +[2026-03-04 09:18:33] (step=0054806) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.723146155351204, LR: 0.0003 +[2026-03-04 09:18:41] (step=0054807) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 10.723341811778518, LR: 0.0003 +[2026-03-04 09:18:49] (step=0054808) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 10.72353746820583, LR: 0.0003 +[2026-03-04 09:18:56] (step=0054809) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.723733124633144, LR: 0.0003 +[2026-03-04 09:19:04] (step=0054810) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.723928781060458, LR: 0.0003 +[2026-03-04 09:19:12] (step=0054811) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.724124437487772, LR: 0.0003 +[2026-03-04 09:19:20] (step=0054812) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.724320093915084, LR: 0.0003 +[2026-03-04 09:19:28] (step=0054813) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 10.724515750342398, LR: 0.0003 +[2026-03-04 09:19:36] (step=0054814) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.724711406769712, LR: 0.0003 +[2026-03-04 09:19:43] (step=0054815) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.724907063197026, LR: 0.0003 +[2026-03-04 09:19:51] (step=0054816) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.72510271962434, LR: 0.0003 +[2026-03-04 09:19:59] (step=0054817) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.725298376051652, LR: 0.0003 +[2026-03-04 09:20:07] (step=0054818) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.725494032478966, LR: 0.0003 +[2026-03-04 09:20:15] (step=0054819) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.72568968890628, LR: 0.0003 +[2026-03-04 09:20:23] (step=0054820) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.725885345333595, LR: 0.0003 +[2026-03-04 09:20:31] (step=0054821) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.726081001760909, LR: 0.0003 +[2026-03-04 09:20:38] (step=0054822) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.72627665818822, LR: 0.0003 +[2026-03-04 09:20:46] (step=0054823) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 10.726472314615535, LR: 0.0003 +[2026-03-04 09:20:54] (step=0054824) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 10.726667971042849, LR: 0.0003 +[2026-03-04 09:21:02] (step=0054825) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 10.726863627470163, LR: 0.0003 +[2026-03-04 09:21:10] (step=0054826) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 10.727059283897477, LR: 0.0003 +[2026-03-04 09:21:18] (step=0054827) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.727254940324789, LR: 0.0003 +[2026-03-04 09:21:26] (step=0054828) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.727450596752103, LR: 0.0003 +[2026-03-04 09:21:33] (step=0054829) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.727646253179417, LR: 0.0003 +[2026-03-04 09:21:41] (step=0054830) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.727841909606731, LR: 0.0003 +[2026-03-04 09:21:49] (step=0054831) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 10.728037566034045, LR: 0.0003 +[2026-03-04 09:21:57] (step=0054832) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.728233222461357, LR: 0.0003 +[2026-03-04 09:22:05] (step=0054833) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.728428878888671, LR: 0.0003 +[2026-03-04 09:22:13] (step=0054834) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.728624535315985, LR: 0.0003 +[2026-03-04 09:22:21] (step=0054835) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.7288201917433, LR: 0.0003 +[2026-03-04 09:22:28] (step=0054836) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.729015848170613, LR: 0.0003 +[2026-03-04 09:22:36] (step=0054837) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.729211504597925, LR: 0.0003 +[2026-03-04 09:22:44] (step=0054838) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 10.72940716102524, LR: 0.0003 +[2026-03-04 09:22:52] (step=0054839) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 10.729602817452554, LR: 0.0003 +[2026-03-04 09:23:00] (step=0054840) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.729798473879868, LR: 0.0003 +[2026-03-04 09:23:08] (step=0054841) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.72999413030718, LR: 0.0003 +[2026-03-04 09:23:16] (step=0054842) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 10.730189786734494, LR: 0.0003 +[2026-03-04 09:23:23] (step=0054843) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 10.730385443161808, LR: 0.0003 +[2026-03-04 09:23:31] (step=0054844) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.730581099589122, LR: 0.0003 +[2026-03-04 09:23:39] (step=0054845) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.730776756016436, LR: 0.0003 +[2026-03-04 09:23:47] (step=0054846) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.730972412443748, LR: 0.0003 +[2026-03-04 09:23:55] (step=0054847) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.731168068871062, LR: 0.0003 +[2026-03-04 09:24:03] (step=0054848) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.731363725298376, LR: 0.0003 +[2026-03-04 09:24:11] (step=0054849) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.73155938172569, LR: 0.0003 +[2026-03-04 09:24:18] (step=0054850) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 10.731755038153004, LR: 0.0003 +[2026-03-04 09:24:26] (step=0054851) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 10.731950694580316, LR: 0.0003 +[2026-03-04 09:24:34] (step=0054852) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.73214635100763, LR: 0.0003 +[2026-03-04 09:24:42] (step=0054853) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 10.732342007434944, LR: 0.0003 +[2026-03-04 09:24:50] (step=0054854) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 10.732537663862258, LR: 0.0003 +[2026-03-04 09:24:58] (step=0054855) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 10.732733320289572, LR: 0.0003 +[2026-03-04 09:25:06] (step=0054856) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 10.732928976716885, LR: 0.0003 +[2026-03-04 09:25:13] (step=0054857) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.733124633144199, LR: 0.0003 +[2026-03-04 09:25:21] (step=0054858) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.733320289571513, LR: 0.0003 +[2026-03-04 09:25:29] (step=0054859) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.733515945998827, LR: 0.0003 +[2026-03-04 09:25:37] (step=0054860) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 10.73371160242614, LR: 0.0003 +[2026-03-04 09:25:45] (step=0054861) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.733907258853453, LR: 0.0003 +[2026-03-04 09:25:53] (step=0054862) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.734102915280767, LR: 0.0003 +[2026-03-04 09:26:01] (step=0054863) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.73429857170808, LR: 0.0003 +[2026-03-04 09:26:08] (step=0054864) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 10.734494228135395, LR: 0.0003 +[2026-03-04 09:26:16] (step=0054865) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.734689884562707, LR: 0.0003 +[2026-03-04 09:26:24] (step=0054866) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 10.734885540990021, LR: 0.0003 +[2026-03-04 09:26:32] (step=0054867) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.735081197417335, LR: 0.0003 +[2026-03-04 09:26:40] (step=0054868) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.735276853844649, LR: 0.0003 +[2026-03-04 09:26:48] (step=0054869) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.735472510271963, LR: 0.0003 +[2026-03-04 09:26:56] (step=0054870) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.735668166699275, LR: 0.0003 +[2026-03-04 09:27:03] (step=0054871) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.73586382312659, LR: 0.0003 +[2026-03-04 09:27:11] (step=0054872) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.736059479553903, LR: 0.0003 +[2026-03-04 09:27:19] (step=0054873) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.736255135981217, LR: 0.0003 +[2026-03-04 09:27:27] (step=0054874) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.736450792408531, LR: 0.0003 +[2026-03-04 09:27:35] (step=0054875) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.736646448835844, LR: 0.0003 +[2026-03-04 09:27:43] (step=0054876) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.736842105263158, LR: 0.0003 +[2026-03-04 09:27:51] (step=0054877) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.737037761690472, LR: 0.0003 +[2026-03-04 09:27:58] (step=0054878) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.737233418117786, LR: 0.0003 +[2026-03-04 09:28:06] (step=0054879) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.7374290745451, LR: 0.0003 +[2026-03-04 09:28:14] (step=0054880) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.737624730972412, LR: 0.0003 +[2026-03-04 09:28:22] (step=0054881) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.737820387399726, LR: 0.0003 +[2026-03-04 09:28:30] (step=0054882) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.73801604382704, LR: 0.0003 +[2026-03-04 09:28:38] (step=0054883) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.738211700254354, LR: 0.0003 +[2026-03-04 09:28:46] (step=0054884) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.738407356681668, LR: 0.0003 +[2026-03-04 09:28:54] (step=0054885) Train Loss: 0.4515, Train Steps/Sec: 0.12, Epoch: 10.73860301310898, LR: 0.0003 +[2026-03-04 09:29:01] (step=0054886) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 10.738798669536294, LR: 0.0003 +[2026-03-04 09:29:09] (step=0054887) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.738994325963608, LR: 0.0003 +[2026-03-04 09:29:17] (step=0054888) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.739189982390922, LR: 0.0003 +[2026-03-04 09:29:25] (step=0054889) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.739385638818236, LR: 0.0003 +[2026-03-04 09:29:33] (step=0054890) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.739581295245548, LR: 0.0003 +[2026-03-04 09:29:41] (step=0054891) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 10.739776951672862, LR: 0.0003 +[2026-03-04 09:29:49] (step=0054892) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.739972608100176, LR: 0.0003 +[2026-03-04 09:29:57] (step=0054893) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.74016826452749, LR: 0.0003 +[2026-03-04 09:30:04] (step=0054894) Train Loss: 0.4276, Train Steps/Sec: 0.13, Epoch: 10.740363920954803, LR: 0.0003 +[2026-03-04 09:30:12] (step=0054895) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.740559577382117, LR: 0.0003 +[2026-03-04 09:30:20] (step=0054896) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.74075523380943, LR: 0.0003 +[2026-03-04 09:30:28] (step=0054897) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.740950890236745, LR: 0.0003 +[2026-03-04 09:30:36] (step=0054898) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.741146546664059, LR: 0.0003 +[2026-03-04 09:30:44] (step=0054899) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.74134220309137, LR: 0.0003 +[2026-03-04 09:30:52] (step=0054900) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.741537859518685, LR: 0.0003 +[2026-03-04 09:30:59] (step=0054901) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 10.741733515945999, LR: 0.0003 +[2026-03-04 09:31:07] (step=0054902) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.741929172373313, LR: 0.0003 +[2026-03-04 09:31:15] (step=0054903) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.742124828800627, LR: 0.0003 +[2026-03-04 09:31:23] (step=0054904) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.742320485227939, LR: 0.0003 +[2026-03-04 09:31:31] (step=0054905) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.742516141655253, LR: 0.0003 +[2026-03-04 09:31:39] (step=0054906) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 10.742711798082567, LR: 0.0003 +[2026-03-04 09:31:47] (step=0054907) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.742907454509881, LR: 0.0003 +[2026-03-04 09:31:54] (step=0054908) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.743103110937195, LR: 0.0003 +[2026-03-04 09:32:02] (step=0054909) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.743298767364507, LR: 0.0003 +[2026-03-04 09:32:10] (step=0054910) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.743494423791821, LR: 0.0003 +[2026-03-04 09:32:18] (step=0054911) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.743690080219135, LR: 0.0003 +[2026-03-04 09:32:26] (step=0054912) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.74388573664645, LR: 0.0003 +[2026-03-04 09:32:34] (step=0054913) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.744081393073763, LR: 0.0003 +[2026-03-04 09:32:42] (step=0054914) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.744277049501076, LR: 0.0003 +[2026-03-04 09:32:50] (step=0054915) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.74447270592839, LR: 0.0003 +[2026-03-04 09:32:57] (step=0054916) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 10.744668362355704, LR: 0.0003 +[2026-03-04 09:33:05] (step=0054917) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.744864018783018, LR: 0.0003 +[2026-03-04 09:33:13] (step=0054918) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.74505967521033, LR: 0.0003 +[2026-03-04 09:33:21] (step=0054919) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.745255331637644, LR: 0.0003 +[2026-03-04 09:33:29] (step=0054920) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 10.745450988064958, LR: 0.0003 +[2026-03-04 09:33:37] (step=0054921) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.745646644492272, LR: 0.0003 +[2026-03-04 09:33:45] (step=0054922) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 10.745842300919586, LR: 0.0003 +[2026-03-04 09:33:52] (step=0054923) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.746037957346898, LR: 0.0003 +[2026-03-04 09:34:00] (step=0054924) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.746233613774212, LR: 0.0003 +[2026-03-04 09:34:08] (step=0054925) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 10.746429270201526, LR: 0.0003 +[2026-03-04 09:34:16] (step=0054926) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.74662492662884, LR: 0.0003 +[2026-03-04 09:34:24] (step=0054927) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 10.746820583056154, LR: 0.0003 +[2026-03-04 09:34:32] (step=0054928) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 10.747016239483466, LR: 0.0003 +[2026-03-04 09:34:40] (step=0054929) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 10.74721189591078, LR: 0.0003 +[2026-03-04 09:34:47] (step=0054930) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.747407552338094, LR: 0.0003 +[2026-03-04 09:34:55] (step=0054931) Train Loss: 0.4434, Train Steps/Sec: 0.12, Epoch: 10.747603208765408, LR: 0.0003 +[2026-03-04 09:35:03] (step=0054932) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.747798865192722, LR: 0.0003 +[2026-03-04 09:35:11] (step=0054933) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.747994521620035, LR: 0.0003 +[2026-03-04 09:35:19] (step=0054934) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 10.748190178047349, LR: 0.0003 +[2026-03-04 09:35:27] (step=0054935) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 10.748385834474663, LR: 0.0003 +[2026-03-04 09:35:35] (step=0054936) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.748581490901977, LR: 0.0003 +[2026-03-04 09:35:43] (step=0054937) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 10.74877714732929, LR: 0.0003 +[2026-03-04 09:35:50] (step=0054938) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.748972803756603, LR: 0.0003 +[2026-03-04 09:35:58] (step=0054939) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.749168460183917, LR: 0.0003 +[2026-03-04 09:36:06] (step=0054940) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.74936411661123, LR: 0.0003 +[2026-03-04 09:36:14] (step=0054941) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 10.749559773038545, LR: 0.0003 +[2026-03-04 09:36:22] (step=0054942) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 10.749755429465859, LR: 0.0003 +[2026-03-04 09:36:30] (step=0054943) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.749951085893171, LR: 0.0003 +[2026-03-04 09:36:38] (step=0054944) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.750146742320485, LR: 0.0003 +[2026-03-04 09:36:45] (step=0054945) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.750342398747799, LR: 0.0003 +[2026-03-04 09:36:53] (step=0054946) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.750538055175113, LR: 0.0003 +[2026-03-04 09:37:01] (step=0054947) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 10.750733711602425, LR: 0.0003 +[2026-03-04 09:37:09] (step=0054948) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.75092936802974, LR: 0.0003 +[2026-03-04 09:37:17] (step=0054949) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.751125024457053, LR: 0.0003 +[2026-03-04 09:37:25] (step=0054950) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.751320680884367, LR: 0.0003 +[2026-03-04 09:37:33] (step=0054951) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 10.751516337311681, LR: 0.0003 +[2026-03-04 09:37:40] (step=0054952) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 10.751711993738994, LR: 0.0003 +[2026-03-04 09:37:48] (step=0054953) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.751907650166308, LR: 0.0003 +[2026-03-04 09:37:56] (step=0054954) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 10.752103306593622, LR: 0.0003 +[2026-03-04 09:38:04] (step=0054955) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.752298963020936, LR: 0.0003 +[2026-03-04 09:38:12] (step=0054956) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.75249461944825, LR: 0.0003 +[2026-03-04 09:38:20] (step=0054957) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 10.752690275875562, LR: 0.0003 +[2026-03-04 09:38:28] (step=0054958) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.752885932302876, LR: 0.0003 +[2026-03-04 09:38:35] (step=0054959) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.75308158873019, LR: 0.0003 +[2026-03-04 09:38:43] (step=0054960) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.753277245157504, LR: 0.0003 +[2026-03-04 09:38:51] (step=0054961) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 10.753472901584818, LR: 0.0003 +[2026-03-04 09:38:59] (step=0054962) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.75366855801213, LR: 0.0003 +[2026-03-04 09:39:07] (step=0054963) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 10.753864214439444, LR: 0.0003 +[2026-03-04 09:39:15] (step=0054964) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.754059870866758, LR: 0.0003 +[2026-03-04 09:39:23] (step=0054965) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.754255527294072, LR: 0.0003 +[2026-03-04 09:39:30] (step=0054966) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.754451183721386, LR: 0.0003 +[2026-03-04 09:39:38] (step=0054967) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 10.754646840148698, LR: 0.0003 +[2026-03-04 09:39:46] (step=0054968) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.754842496576012, LR: 0.0003 +[2026-03-04 09:39:54] (step=0054969) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.755038153003326, LR: 0.0003 +[2026-03-04 09:40:02] (step=0054970) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.75523380943064, LR: 0.0003 +[2026-03-04 09:40:10] (step=0054971) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.755429465857953, LR: 0.0003 +[2026-03-04 09:40:18] (step=0054972) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.755625122285267, LR: 0.0003 +[2026-03-04 09:40:25] (step=0054973) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.75582077871258, LR: 0.0003 +[2026-03-04 09:40:33] (step=0054974) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.756016435139895, LR: 0.0003 +[2026-03-04 09:40:41] (step=0054975) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 10.756212091567209, LR: 0.0003 +[2026-03-04 09:40:49] (step=0054976) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.75640774799452, LR: 0.0003 +[2026-03-04 09:40:57] (step=0054977) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.756603404421835, LR: 0.0003 +[2026-03-04 09:41:05] (step=0054978) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 10.756799060849149, LR: 0.0003 +[2026-03-04 09:41:13] (step=0054979) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 10.756994717276463, LR: 0.0003 +[2026-03-04 09:41:20] (step=0054980) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.757190373703777, LR: 0.0003 +[2026-03-04 09:41:28] (step=0054981) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 10.757386030131089, LR: 0.0003 +[2026-03-04 09:41:36] (step=0054982) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.757581686558403, LR: 0.0003 +[2026-03-04 09:41:44] (step=0054983) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.757777342985717, LR: 0.0003 +[2026-03-04 09:41:52] (step=0054984) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.757972999413031, LR: 0.0003 +[2026-03-04 09:42:00] (step=0054985) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.758168655840345, LR: 0.0003 +[2026-03-04 09:42:08] (step=0054986) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.758364312267657, LR: 0.0003 +[2026-03-04 09:42:16] (step=0054987) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 10.758559968694971, LR: 0.0003 +[2026-03-04 09:42:23] (step=0054988) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.758755625122285, LR: 0.0003 +[2026-03-04 09:42:31] (step=0054989) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 10.7589512815496, LR: 0.0003 +[2026-03-04 09:42:39] (step=0054990) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.759146937976913, LR: 0.0003 +[2026-03-04 09:42:47] (step=0054991) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.759342594404226, LR: 0.0003 +[2026-03-04 09:42:55] (step=0054992) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.75953825083154, LR: 0.0003 +[2026-03-04 09:43:03] (step=0054993) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.759733907258854, LR: 0.0003 +[2026-03-04 09:43:11] (step=0054994) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.759929563686168, LR: 0.0003 +[2026-03-04 09:43:19] (step=0054995) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.760125220113482, LR: 0.0003 +[2026-03-04 09:43:26] (step=0054996) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.760320876540794, LR: 0.0003 +[2026-03-04 09:43:34] (step=0054997) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 10.760516532968108, LR: 0.0003 +[2026-03-04 09:43:42] (step=0054998) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.760712189395422, LR: 0.0003 +[2026-03-04 09:43:50] (step=0054999) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.760907845822736, LR: 0.0003 +[2026-03-04 09:43:58] (step=0055000) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.761103502250048, LR: 0.0003 +[2026-03-04 09:43:58] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0055000/ +[2026-03-04 09:44:06] (step=0055001) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.761299158677362, LR: 0.0003 +[2026-03-04 09:44:14] (step=0055002) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 10.761494815104676, LR: 0.0003 +[2026-03-04 09:44:22] (step=0055003) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.76169047153199, LR: 0.0003 +[2026-03-04 09:44:29] (step=0055004) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.761886127959304, LR: 0.0003 +[2026-03-04 09:44:37] (step=0055005) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 10.762081784386616, LR: 0.0003 +[2026-03-04 09:44:45] (step=0055006) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.76227744081393, LR: 0.0003 +[2026-03-04 09:44:53] (step=0055007) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 10.762473097241244, LR: 0.0003 +[2026-03-04 09:45:01] (step=0055008) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 10.762668753668558, LR: 0.0003 +[2026-03-04 09:45:09] (step=0055009) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.762864410095872, LR: 0.0003 +[2026-03-04 09:45:17] (step=0055010) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 10.763060066523185, LR: 0.0003 +[2026-03-04 09:45:24] (step=0055011) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.763255722950499, LR: 0.0003 +[2026-03-04 09:45:32] (step=0055012) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 10.763451379377813, LR: 0.0003 +[2026-03-04 09:45:40] (step=0055013) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.763647035805127, LR: 0.0003 +[2026-03-04 09:45:48] (step=0055014) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.76384269223244, LR: 0.0003 +[2026-03-04 09:45:56] (step=0055015) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.764038348659753, LR: 0.0003 +[2026-03-04 09:46:04] (step=0055016) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.764234005087067, LR: 0.0003 +[2026-03-04 09:46:12] (step=0055017) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.76442966151438, LR: 0.0003 +[2026-03-04 09:46:20] (step=0055018) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 10.764625317941695, LR: 0.0003 +[2026-03-04 09:46:27] (step=0055019) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.764820974369009, LR: 0.0003 +[2026-03-04 09:46:35] (step=0055020) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.765016630796321, LR: 0.0003 +[2026-03-04 09:46:43] (step=0055021) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 10.765212287223635, LR: 0.0003 +[2026-03-04 09:46:51] (step=0055022) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.765407943650949, LR: 0.0003 +[2026-03-04 09:46:59] (step=0055023) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.765603600078263, LR: 0.0003 +[2026-03-04 09:47:07] (step=0055024) Train Loss: 0.4341, Train Steps/Sec: 0.12, Epoch: 10.765799256505575, LR: 0.0003 +[2026-03-04 09:47:15] (step=0055025) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 10.76599491293289, LR: 0.0003 +[2026-03-04 09:47:23] (step=0055026) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.766190569360203, LR: 0.0003 +[2026-03-04 09:47:30] (step=0055027) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 10.766386225787517, LR: 0.0003 +[2026-03-04 09:47:38] (step=0055028) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.766581882214831, LR: 0.0003 +[2026-03-04 09:47:46] (step=0055029) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.766777538642144, LR: 0.0003 +[2026-03-04 09:47:54] (step=0055030) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.766973195069458, LR: 0.0003 +[2026-03-04 09:48:02] (step=0055031) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.767168851496772, LR: 0.0003 +[2026-03-04 09:48:10] (step=0055032) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.767364507924086, LR: 0.0003 +[2026-03-04 09:48:18] (step=0055033) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.7675601643514, LR: 0.0003 +[2026-03-04 09:48:26] (step=0055034) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.767755820778712, LR: 0.0003 +[2026-03-04 09:48:33] (step=0055035) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.767951477206026, LR: 0.0003 +[2026-03-04 09:48:41] (step=0055036) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.76814713363334, LR: 0.0003 +[2026-03-04 09:48:49] (step=0055037) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.768342790060654, LR: 0.0003 +[2026-03-04 09:48:57] (step=0055038) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 10.768538446487968, LR: 0.0003 +[2026-03-04 09:49:05] (step=0055039) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.76873410291528, LR: 0.0003 +[2026-03-04 09:49:13] (step=0055040) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.768929759342594, LR: 0.0003 +[2026-03-04 09:49:21] (step=0055041) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 10.769125415769908, LR: 0.0003 +[2026-03-04 09:49:29] (step=0055042) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 10.769321072197222, LR: 0.0003 +[2026-03-04 09:49:36] (step=0055043) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.769516728624536, LR: 0.0003 +[2026-03-04 09:49:44] (step=0055044) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.769712385051848, LR: 0.0003 +[2026-03-04 09:49:52] (step=0055045) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 10.769908041479162, LR: 0.0003 +[2026-03-04 09:50:00] (step=0055046) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.770103697906476, LR: 0.0003 +[2026-03-04 09:50:08] (step=0055047) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.77029935433379, LR: 0.0003 +[2026-03-04 09:50:16] (step=0055048) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.770495010761104, LR: 0.0003 +[2026-03-04 09:50:24] (step=0055049) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.770690667188417, LR: 0.0003 +[2026-03-04 09:50:31] (step=0055050) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 10.77088632361573, LR: 0.0003 +[2026-03-04 09:50:39] (step=0055051) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.771081980043045, LR: 0.0003 +[2026-03-04 09:50:47] (step=0055052) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.771277636470359, LR: 0.0003 +[2026-03-04 09:50:55] (step=0055053) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 10.77147329289767, LR: 0.0003 +[2026-03-04 09:51:03] (step=0055054) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.771668949324985, LR: 0.0003 +[2026-03-04 09:51:11] (step=0055055) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.771864605752299, LR: 0.0003 +[2026-03-04 09:51:19] (step=0055056) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 10.772060262179613, LR: 0.0003 +[2026-03-04 09:51:27] (step=0055057) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.772255918606927, LR: 0.0003 +[2026-03-04 09:51:34] (step=0055058) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 10.77245157503424, LR: 0.0003 +[2026-03-04 09:51:42] (step=0055059) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.772647231461553, LR: 0.0003 +[2026-03-04 09:51:50] (step=0055060) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.772842887888867, LR: 0.0003 +[2026-03-04 09:51:58] (step=0055061) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 10.773038544316181, LR: 0.0003 +[2026-03-04 09:52:06] (step=0055062) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.773234200743495, LR: 0.0003 +[2026-03-04 09:52:14] (step=0055063) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 10.773429857170807, LR: 0.0003 +[2026-03-04 09:52:22] (step=0055064) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 10.773625513598121, LR: 0.0003 +[2026-03-04 09:52:29] (step=0055065) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.773821170025435, LR: 0.0003 +[2026-03-04 09:52:37] (step=0055066) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 10.77401682645275, LR: 0.0003 +[2026-03-04 09:52:45] (step=0055067) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 10.774212482880063, LR: 0.0003 +[2026-03-04 09:52:53] (step=0055068) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.774408139307376, LR: 0.0003 +[2026-03-04 09:53:01] (step=0055069) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 10.77460379573469, LR: 0.0003 +[2026-03-04 09:53:09] (step=0055070) Train Loss: 0.4242, Train Steps/Sec: 0.13, Epoch: 10.774799452162004, LR: 0.0003 +[2026-03-04 09:53:17] (step=0055071) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.774995108589318, LR: 0.0003 +[2026-03-04 09:53:24] (step=0055072) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.775190765016632, LR: 0.0003 +[2026-03-04 09:53:32] (step=0055073) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 10.775386421443944, LR: 0.0003 +[2026-03-04 09:53:40] (step=0055074) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.775582077871258, LR: 0.0003 +[2026-03-04 09:53:48] (step=0055075) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 10.775777734298572, LR: 0.0003 +[2026-03-04 09:53:56] (step=0055076) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.775973390725886, LR: 0.0003 +[2026-03-04 09:54:04] (step=0055077) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.776169047153198, LR: 0.0003 +[2026-03-04 09:54:12] (step=0055078) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.776364703580512, LR: 0.0003 +[2026-03-04 09:54:20] (step=0055079) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 10.776560360007826, LR: 0.0003 +[2026-03-04 09:54:27] (step=0055080) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 10.77675601643514, LR: 0.0003 +[2026-03-04 09:54:35] (step=0055081) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.776951672862454, LR: 0.0003 +[2026-03-04 09:54:43] (step=0055082) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.777147329289766, LR: 0.0003 +[2026-03-04 09:54:51] (step=0055083) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.77734298571708, LR: 0.0003 +[2026-03-04 09:54:59] (step=0055084) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 10.777538642144394, LR: 0.0003 +[2026-03-04 09:55:07] (step=0055085) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 10.777734298571708, LR: 0.0003 +[2026-03-04 09:55:15] (step=0055086) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.777929954999022, LR: 0.0003 +[2026-03-04 09:55:22] (step=0055087) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.778125611426335, LR: 0.0003 +[2026-03-04 09:55:30] (step=0055088) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.778321267853649, LR: 0.0003 +[2026-03-04 09:55:38] (step=0055089) Train Loss: 0.4224, Train Steps/Sec: 0.13, Epoch: 10.778516924280963, LR: 0.0003 +[2026-03-04 09:55:46] (step=0055090) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.778712580708277, LR: 0.0003 +[2026-03-04 09:55:54] (step=0055091) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.77890823713559, LR: 0.0003 +[2026-03-04 09:56:02] (step=0055092) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.779103893562903, LR: 0.0003 +[2026-03-04 09:56:10] (step=0055093) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.779299549990217, LR: 0.0003 +[2026-03-04 09:56:18] (step=0055094) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 10.779495206417531, LR: 0.0003 +[2026-03-04 09:56:26] (step=0055095) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.779690862844845, LR: 0.0003 +[2026-03-04 09:56:33] (step=0055096) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 10.779886519272159, LR: 0.0003 +[2026-03-04 09:56:41] (step=0055097) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.780082175699471, LR: 0.0003 +[2026-03-04 09:56:49] (step=0055098) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 10.780277832126785, LR: 0.0003 +[2026-03-04 09:56:57] (step=0055099) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.7804734885541, LR: 0.0003 +[2026-03-04 09:57:05] (step=0055100) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.780669144981413, LR: 0.0003 +[2026-03-04 09:57:13] (step=0055101) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.780864801408727, LR: 0.0003 +[2026-03-04 09:57:21] (step=0055102) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.78106045783604, LR: 0.0003 +[2026-03-04 09:57:28] (step=0055103) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.781256114263353, LR: 0.0003 +[2026-03-04 09:57:36] (step=0055104) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.781451770690667, LR: 0.0003 +[2026-03-04 09:57:44] (step=0055105) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 10.781647427117981, LR: 0.0003 +[2026-03-04 09:57:52] (step=0055106) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.781843083545294, LR: 0.0003 +[2026-03-04 09:58:00] (step=0055107) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.782038739972608, LR: 0.0003 +[2026-03-04 09:58:08] (step=0055108) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 10.782234396399922, LR: 0.0003 +[2026-03-04 09:58:16] (step=0055109) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.782430052827236, LR: 0.0003 +[2026-03-04 09:58:24] (step=0055110) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 10.78262570925455, LR: 0.0003 +[2026-03-04 09:58:31] (step=0055111) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.782821365681862, LR: 0.0003 +[2026-03-04 09:58:39] (step=0055112) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 10.783017022109176, LR: 0.0003 +[2026-03-04 09:58:47] (step=0055113) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.78321267853649, LR: 0.0003 +[2026-03-04 09:58:55] (step=0055114) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.783408334963804, LR: 0.0003 +[2026-03-04 09:59:03] (step=0055115) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.783603991391118, LR: 0.0003 +[2026-03-04 09:59:11] (step=0055116) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.78379964781843, LR: 0.0003 +[2026-03-04 09:59:19] (step=0055117) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 10.783995304245744, LR: 0.0003 +[2026-03-04 09:59:26] (step=0055118) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.784190960673058, LR: 0.0003 +[2026-03-04 09:59:34] (step=0055119) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 10.784386617100372, LR: 0.0003 +[2026-03-04 09:59:42] (step=0055120) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 10.784582273527686, LR: 0.0003 +[2026-03-04 09:59:50] (step=0055121) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 10.784777929954998, LR: 0.0003 +[2026-03-04 09:59:58] (step=0055122) Train Loss: 0.4374, Train Steps/Sec: 0.12, Epoch: 10.784973586382312, LR: 0.0003 +[2026-03-04 10:00:06] (step=0055123) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.785169242809626, LR: 0.0003 +[2026-03-04 10:00:14] (step=0055124) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 10.78536489923694, LR: 0.0003 +[2026-03-04 10:00:22] (step=0055125) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 10.785560555664254, LR: 0.0003 +[2026-03-04 10:00:30] (step=0055126) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.785756212091567, LR: 0.0003 +[2026-03-04 10:00:37] (step=0055127) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.78595186851888, LR: 0.0003 +[2026-03-04 10:00:45] (step=0055128) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.786147524946195, LR: 0.0003 +[2026-03-04 10:00:53] (step=0055129) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.786343181373509, LR: 0.0003 +[2026-03-04 10:01:01] (step=0055130) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 10.786538837800821, LR: 0.0003 +[2026-03-04 10:01:09] (step=0055131) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.786734494228135, LR: 0.0003 +[2026-03-04 10:01:17] (step=0055132) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.786930150655449, LR: 0.0003 +[2026-03-04 10:01:25] (step=0055133) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.787125807082763, LR: 0.0003 +[2026-03-04 10:01:32] (step=0055134) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 10.787321463510077, LR: 0.0003 +[2026-03-04 10:01:40] (step=0055135) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 10.78751711993739, LR: 0.0003 +[2026-03-04 10:01:48] (step=0055136) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.787712776364703, LR: 0.0003 +[2026-03-04 10:01:56] (step=0055137) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 10.787908432792017, LR: 0.0003 +[2026-03-04 10:02:04] (step=0055138) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 10.788104089219331, LR: 0.0003 +[2026-03-04 10:02:12] (step=0055139) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 10.788299745646645, LR: 0.0003 +[2026-03-04 10:02:20] (step=0055140) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 10.788495402073957, LR: 0.0003 +[2026-03-04 10:02:28] (step=0055141) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.788691058501271, LR: 0.0003 +[2026-03-04 10:02:36] (step=0055142) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.788886714928585, LR: 0.0003 +[2026-03-04 10:02:43] (step=0055143) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.7890823713559, LR: 0.0003 +[2026-03-04 10:02:51] (step=0055144) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.789278027783213, LR: 0.0003 +[2026-03-04 10:02:59] (step=0055145) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.789473684210526, LR: 0.0003 +[2026-03-04 10:03:07] (step=0055146) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.78966934063784, LR: 0.0003 +[2026-03-04 10:03:15] (step=0055147) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.789864997065154, LR: 0.0003 +[2026-03-04 10:03:23] (step=0055148) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.790060653492468, LR: 0.0003 +[2026-03-04 10:03:31] (step=0055149) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.790256309919782, LR: 0.0003 +[2026-03-04 10:03:38] (step=0055150) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.790451966347094, LR: 0.0003 +[2026-03-04 10:03:46] (step=0055151) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.790647622774408, LR: 0.0003 +[2026-03-04 10:03:54] (step=0055152) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.790843279201722, LR: 0.0003 +[2026-03-04 10:04:02] (step=0055153) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.791038935629036, LR: 0.0003 +[2026-03-04 10:04:10] (step=0055154) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.79123459205635, LR: 0.0003 +[2026-03-04 10:04:18] (step=0055155) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.791430248483662, LR: 0.0003 +[2026-03-04 10:04:26] (step=0055156) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.791625904910976, LR: 0.0003 +[2026-03-04 10:04:34] (step=0055157) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.79182156133829, LR: 0.0003 +[2026-03-04 10:04:41] (step=0055158) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.792017217765604, LR: 0.0003 +[2026-03-04 10:04:49] (step=0055159) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.792212874192916, LR: 0.0003 +[2026-03-04 10:04:57] (step=0055160) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.79240853062023, LR: 0.0003 +[2026-03-04 10:05:05] (step=0055161) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.792604187047544, LR: 0.0003 +[2026-03-04 10:05:13] (step=0055162) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.792799843474858, LR: 0.0003 +[2026-03-04 10:05:21] (step=0055163) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.792995499902172, LR: 0.0003 +[2026-03-04 10:05:29] (step=0055164) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.793191156329485, LR: 0.0003 +[2026-03-04 10:05:36] (step=0055165) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 10.793386812756799, LR: 0.0003 +[2026-03-04 10:05:44] (step=0055166) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.793582469184113, LR: 0.0003 +[2026-03-04 10:05:52] (step=0055167) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.793778125611427, LR: 0.0003 +[2026-03-04 10:06:00] (step=0055168) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 10.79397378203874, LR: 0.0003 +[2026-03-04 10:06:08] (step=0055169) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.794169438466053, LR: 0.0003 +[2026-03-04 10:06:16] (step=0055170) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.794365094893367, LR: 0.0003 +[2026-03-04 10:06:24] (step=0055171) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 10.794560751320681, LR: 0.0003 +[2026-03-04 10:06:32] (step=0055172) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.794756407747995, LR: 0.0003 +[2026-03-04 10:06:39] (step=0055173) Train Loss: 0.4647, Train Steps/Sec: 0.13, Epoch: 10.794952064175309, LR: 0.0003 +[2026-03-04 10:06:47] (step=0055174) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.795147720602621, LR: 0.0003 +[2026-03-04 10:06:55] (step=0055175) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.795343377029935, LR: 0.0003 +[2026-03-04 10:07:03] (step=0055176) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.79553903345725, LR: 0.0003 +[2026-03-04 10:07:11] (step=0055177) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 10.795734689884563, LR: 0.0003 +[2026-03-04 10:07:19] (step=0055178) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.795930346311877, LR: 0.0003 +[2026-03-04 10:07:27] (step=0055179) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.79612600273919, LR: 0.0003 +[2026-03-04 10:07:34] (step=0055180) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 10.796321659166503, LR: 0.0003 +[2026-03-04 10:07:42] (step=0055181) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 10.796517315593817, LR: 0.0003 +[2026-03-04 10:07:50] (step=0055182) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 10.796712972021131, LR: 0.0003 +[2026-03-04 10:07:58] (step=0055183) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 10.796908628448444, LR: 0.0003 +[2026-03-04 10:08:06] (step=0055184) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 10.797104284875758, LR: 0.0003 +[2026-03-04 10:08:14] (step=0055185) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.797299941303072, LR: 0.0003 +[2026-03-04 10:08:22] (step=0055186) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.797495597730386, LR: 0.0003 +[2026-03-04 10:08:29] (step=0055187) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 10.7976912541577, LR: 0.0003 +[2026-03-04 10:08:37] (step=0055188) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.797886910585012, LR: 0.0003 +[2026-03-04 10:08:45] (step=0055189) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 10.798082567012326, LR: 0.0003 +[2026-03-04 10:08:53] (step=0055190) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 10.79827822343964, LR: 0.0003 +[2026-03-04 10:09:01] (step=0055191) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.798473879866954, LR: 0.0003 +[2026-03-04 10:09:09] (step=0055192) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 10.798669536294268, LR: 0.0003 +[2026-03-04 10:09:17] (step=0055193) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.79886519272158, LR: 0.0003 +[2026-03-04 10:09:25] (step=0055194) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.799060849148894, LR: 0.0003 +[2026-03-04 10:09:32] (step=0055195) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 10.799256505576208, LR: 0.0003 +[2026-03-04 10:09:40] (step=0055196) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 10.799452162003522, LR: 0.0003 +[2026-03-04 10:09:48] (step=0055197) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.799647818430836, LR: 0.0003 +[2026-03-04 10:09:56] (step=0055198) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.799843474858148, LR: 0.0003 +[2026-03-04 10:10:04] (step=0055199) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 10.800039131285462, LR: 0.0003 +[2026-03-04 10:10:12] (step=0055200) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.800234787712776, LR: 0.0003 +[2026-03-04 10:10:20] (step=0055201) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.80043044414009, LR: 0.0003 +[2026-03-04 10:10:28] (step=0055202) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.800626100567404, LR: 0.0003 +[2026-03-04 10:10:35] (step=0055203) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.800821756994717, LR: 0.0003 +[2026-03-04 10:10:43] (step=0055204) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 10.80101741342203, LR: 0.0003 +[2026-03-04 10:10:51] (step=0055205) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.801213069849345, LR: 0.0003 +[2026-03-04 10:10:59] (step=0055206) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.801408726276659, LR: 0.0003 +[2026-03-04 10:11:07] (step=0055207) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 10.801604382703971, LR: 0.0003 +[2026-03-04 10:11:15] (step=0055208) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.801800039131285, LR: 0.0003 +[2026-03-04 10:11:23] (step=0055209) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.801995695558599, LR: 0.0003 +[2026-03-04 10:11:30] (step=0055210) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.802191351985913, LR: 0.0003 +[2026-03-04 10:11:38] (step=0055211) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.802387008413227, LR: 0.0003 +[2026-03-04 10:11:46] (step=0055212) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.80258266484054, LR: 0.0003 +[2026-03-04 10:11:54] (step=0055213) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.802778321267853, LR: 0.0003 +[2026-03-04 10:12:02] (step=0055214) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.802973977695167, LR: 0.0003 +[2026-03-04 10:12:10] (step=0055215) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.803169634122481, LR: 0.0003 +[2026-03-04 10:12:18] (step=0055216) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.803365290549795, LR: 0.0003 +[2026-03-04 10:12:26] (step=0055217) Train Loss: 0.4518, Train Steps/Sec: 0.12, Epoch: 10.803560946977107, LR: 0.0003 +[2026-03-04 10:12:34] (step=0055218) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.803756603404421, LR: 0.0003 +[2026-03-04 10:12:41] (step=0055219) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.803952259831735, LR: 0.0003 +[2026-03-04 10:12:49] (step=0055220) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.80414791625905, LR: 0.0003 +[2026-03-04 10:12:57] (step=0055221) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.804343572686363, LR: 0.0003 +[2026-03-04 10:13:05] (step=0055222) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.804539229113676, LR: 0.0003 +[2026-03-04 10:13:13] (step=0055223) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 10.80473488554099, LR: 0.0003 +[2026-03-04 10:13:21] (step=0055224) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.804930541968304, LR: 0.0003 +[2026-03-04 10:13:29] (step=0055225) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.805126198395618, LR: 0.0003 +[2026-03-04 10:13:36] (step=0055226) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.805321854822932, LR: 0.0003 +[2026-03-04 10:13:44] (step=0055227) Train Loss: 0.4174, Train Steps/Sec: 0.13, Epoch: 10.805517511250244, LR: 0.0003 +[2026-03-04 10:13:52] (step=0055228) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.805713167677558, LR: 0.0003 +[2026-03-04 10:14:00] (step=0055229) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 10.805908824104872, LR: 0.0003 +[2026-03-04 10:14:08] (step=0055230) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.806104480532186, LR: 0.0003 +[2026-03-04 10:14:16] (step=0055231) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.8063001369595, LR: 0.0003 +[2026-03-04 10:14:24] (step=0055232) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.806495793386812, LR: 0.0003 +[2026-03-04 10:14:32] (step=0055233) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.806691449814126, LR: 0.0003 +[2026-03-04 10:14:39] (step=0055234) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.80688710624144, LR: 0.0003 +[2026-03-04 10:14:47] (step=0055235) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.807082762668754, LR: 0.0003 +[2026-03-04 10:14:55] (step=0055236) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.807278419096066, LR: 0.0003 +[2026-03-04 10:15:03] (step=0055237) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.80747407552338, LR: 0.0003 +[2026-03-04 10:15:11] (step=0055238) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 10.807669731950694, LR: 0.0003 +[2026-03-04 10:15:19] (step=0055239) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.807865388378008, LR: 0.0003 +[2026-03-04 10:15:27] (step=0055240) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.808061044805322, LR: 0.0003 +[2026-03-04 10:15:35] (step=0055241) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.808256701232635, LR: 0.0003 +[2026-03-04 10:15:42] (step=0055242) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.808452357659949, LR: 0.0003 +[2026-03-04 10:15:50] (step=0055243) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.808648014087263, LR: 0.0003 +[2026-03-04 10:15:58] (step=0055244) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 10.808843670514577, LR: 0.0003 +[2026-03-04 10:16:06] (step=0055245) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.80903932694189, LR: 0.0003 +[2026-03-04 10:16:14] (step=0055246) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 10.809234983369203, LR: 0.0003 +[2026-03-04 10:16:22] (step=0055247) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.809430639796517, LR: 0.0003 +[2026-03-04 10:16:30] (step=0055248) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.809626296223831, LR: 0.0003 +[2026-03-04 10:16:38] (step=0055249) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.809821952651145, LR: 0.0003 +[2026-03-04 10:16:45] (step=0055250) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.810017609078459, LR: 0.0003 +[2026-03-04 10:16:53] (step=0055251) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 10.810213265505771, LR: 0.0003 +[2026-03-04 10:17:01] (step=0055252) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.810408921933085, LR: 0.0003 +[2026-03-04 10:17:09] (step=0055253) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.8106045783604, LR: 0.0003 +[2026-03-04 10:17:17] (step=0055254) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 10.810800234787713, LR: 0.0003 +[2026-03-04 10:17:25] (step=0055255) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.810995891215027, LR: 0.0003 +[2026-03-04 10:17:33] (step=0055256) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.81119154764234, LR: 0.0003 +[2026-03-04 10:17:40] (step=0055257) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.811387204069653, LR: 0.0003 +[2026-03-04 10:17:48] (step=0055258) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.811582860496967, LR: 0.0003 +[2026-03-04 10:17:56] (step=0055259) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 10.811778516924281, LR: 0.0003 +[2026-03-04 10:18:04] (step=0055260) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.811974173351594, LR: 0.0003 +[2026-03-04 10:18:12] (step=0055261) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.812169829778908, LR: 0.0003 +[2026-03-04 10:18:20] (step=0055262) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 10.812365486206222, LR: 0.0003 +[2026-03-04 10:18:28] (step=0055263) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 10.812561142633536, LR: 0.0003 +[2026-03-04 10:18:35] (step=0055264) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 10.81275679906085, LR: 0.0003 +[2026-03-04 10:18:43] (step=0055265) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.812952455488162, LR: 0.0003 +[2026-03-04 10:18:51] (step=0055266) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.813148111915476, LR: 0.0003 +[2026-03-04 10:18:59] (step=0055267) Train Loss: 0.4316, Train Steps/Sec: 0.12, Epoch: 10.81334376834279, LR: 0.0003 +[2026-03-04 10:19:07] (step=0055268) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.813539424770104, LR: 0.0003 +[2026-03-04 10:19:15] (step=0055269) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 10.813735081197418, LR: 0.0003 +[2026-03-04 10:19:23] (step=0055270) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 10.81393073762473, LR: 0.0003 +[2026-03-04 10:19:31] (step=0055271) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.814126394052044, LR: 0.0003 +[2026-03-04 10:19:39] (step=0055272) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 10.814322050479358, LR: 0.0003 +[2026-03-04 10:19:46] (step=0055273) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 10.814517706906672, LR: 0.0003 +[2026-03-04 10:19:54] (step=0055274) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 10.814713363333986, LR: 0.0003 +[2026-03-04 10:20:02] (step=0055275) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.814909019761298, LR: 0.0003 +[2026-03-04 10:20:10] (step=0055276) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 10.815104676188612, LR: 0.0003 +[2026-03-04 10:20:18] (step=0055277) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.815300332615926, LR: 0.0003 +[2026-03-04 10:20:26] (step=0055278) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.81549598904324, LR: 0.0003 +[2026-03-04 10:20:34] (step=0055279) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.815691645470555, LR: 0.0003 +[2026-03-04 10:20:41] (step=0055280) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.815887301897867, LR: 0.0003 +[2026-03-04 10:20:49] (step=0055281) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 10.81608295832518, LR: 0.0003 +[2026-03-04 10:20:57] (step=0055282) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.816278614752495, LR: 0.0003 +[2026-03-04 10:21:05] (step=0055283) Train Loss: 0.4410, Train Steps/Sec: 0.12, Epoch: 10.816474271179809, LR: 0.0003 +[2026-03-04 10:21:13] (step=0055284) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 10.816669927607123, LR: 0.0003 +[2026-03-04 10:21:21] (step=0055285) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.816865584034435, LR: 0.0003 +[2026-03-04 10:21:29] (step=0055286) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.817061240461749, LR: 0.0003 +[2026-03-04 10:21:37] (step=0055287) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.817256896889063, LR: 0.0003 +[2026-03-04 10:21:44] (step=0055288) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.817452553316377, LR: 0.0003 +[2026-03-04 10:21:52] (step=0055289) Train Loss: 0.4243, Train Steps/Sec: 0.13, Epoch: 10.81764820974369, LR: 0.0003 +[2026-03-04 10:22:00] (step=0055290) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 10.817843866171003, LR: 0.0003 +[2026-03-04 10:22:08] (step=0055291) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.818039522598317, LR: 0.0003 +[2026-03-04 10:22:16] (step=0055292) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 10.818235179025631, LR: 0.0003 +[2026-03-04 10:22:24] (step=0055293) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 10.818430835452945, LR: 0.0003 +[2026-03-04 10:22:32] (step=0055294) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.818626491880257, LR: 0.0003 +[2026-03-04 10:22:40] (step=0055295) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.818822148307571, LR: 0.0003 +[2026-03-04 10:22:47] (step=0055296) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.819017804734886, LR: 0.0003 +[2026-03-04 10:22:55] (step=0055297) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.8192134611622, LR: 0.0003 +[2026-03-04 10:23:03] (step=0055298) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.819409117589514, LR: 0.0003 +[2026-03-04 10:23:11] (step=0055299) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 10.819604774016826, LR: 0.0003 +[2026-03-04 10:23:19] (step=0055300) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.81980043044414, LR: 0.0003 +[2026-03-04 10:23:27] (step=0055301) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.819996086871454, LR: 0.0003 +[2026-03-04 10:23:35] (step=0055302) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.820191743298768, LR: 0.0003 +[2026-03-04 10:23:42] (step=0055303) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.820387399726082, LR: 0.0003 +[2026-03-04 10:23:50] (step=0055304) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.820583056153394, LR: 0.0003 +[2026-03-04 10:23:58] (step=0055305) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 10.820778712580708, LR: 0.0003 +[2026-03-04 10:24:06] (step=0055306) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.820974369008022, LR: 0.0003 +[2026-03-04 10:24:14] (step=0055307) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.821170025435336, LR: 0.0003 +[2026-03-04 10:24:22] (step=0055308) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 10.82136568186265, LR: 0.0003 +[2026-03-04 10:24:30] (step=0055309) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.821561338289962, LR: 0.0003 +[2026-03-04 10:24:37] (step=0055310) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 10.821756994717276, LR: 0.0003 +[2026-03-04 10:24:45] (step=0055311) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.82195265114459, LR: 0.0003 +[2026-03-04 10:24:53] (step=0055312) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.822148307571904, LR: 0.0003 +[2026-03-04 10:25:01] (step=0055313) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.822343963999216, LR: 0.0003 +[2026-03-04 10:25:09] (step=0055314) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.82253962042653, LR: 0.0003 +[2026-03-04 10:25:17] (step=0055315) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.822735276853845, LR: 0.0003 +[2026-03-04 10:25:25] (step=0055316) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.822930933281159, LR: 0.0003 +[2026-03-04 10:25:32] (step=0055317) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.823126589708473, LR: 0.0003 +[2026-03-04 10:25:40] (step=0055318) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.823322246135785, LR: 0.0003 +[2026-03-04 10:25:48] (step=0055319) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.823517902563099, LR: 0.0003 +[2026-03-04 10:25:56] (step=0055320) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.823713558990413, LR: 0.0003 +[2026-03-04 10:26:04] (step=0055321) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.823909215417727, LR: 0.0003 +[2026-03-04 10:26:12] (step=0055322) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.82410487184504, LR: 0.0003 +[2026-03-04 10:26:20] (step=0055323) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.824300528272353, LR: 0.0003 +[2026-03-04 10:26:28] (step=0055324) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.824496184699667, LR: 0.0003 +[2026-03-04 10:26:36] (step=0055325) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.824691841126981, LR: 0.0003 +[2026-03-04 10:26:43] (step=0055326) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 10.824887497554295, LR: 0.0003 +[2026-03-04 10:26:51] (step=0055327) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.825083153981609, LR: 0.0003 +[2026-03-04 10:26:59] (step=0055328) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 10.825278810408921, LR: 0.0003 +[2026-03-04 10:27:07] (step=0055329) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 10.825474466836235, LR: 0.0003 +[2026-03-04 10:27:15] (step=0055330) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.82567012326355, LR: 0.0003 +[2026-03-04 10:27:23] (step=0055331) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.825865779690863, LR: 0.0003 +[2026-03-04 10:27:31] (step=0055332) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.826061436118177, LR: 0.0003 +[2026-03-04 10:27:38] (step=0055333) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 10.82625709254549, LR: 0.0003 +[2026-03-04 10:27:46] (step=0055334) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.826452748972804, LR: 0.0003 +[2026-03-04 10:27:54] (step=0055335) Train Loss: 0.4424, Train Steps/Sec: 0.12, Epoch: 10.826648405400118, LR: 0.0003 +[2026-03-04 10:28:02] (step=0055336) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.826844061827432, LR: 0.0003 +[2026-03-04 10:28:10] (step=0055337) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.827039718254746, LR: 0.0003 +[2026-03-04 10:28:18] (step=0055338) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 10.827235374682058, LR: 0.0003 +[2026-03-04 10:28:26] (step=0055339) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.827431031109372, LR: 0.0003 +[2026-03-04 10:28:34] (step=0055340) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.827626687536686, LR: 0.0003 +[2026-03-04 10:28:42] (step=0055341) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.827822343964, LR: 0.0003 +[2026-03-04 10:28:49] (step=0055342) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.828018000391312, LR: 0.0003 +[2026-03-04 10:28:57] (step=0055343) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.828213656818626, LR: 0.0003 +[2026-03-04 10:29:05] (step=0055344) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 10.82840931324594, LR: 0.0003 +[2026-03-04 10:29:13] (step=0055345) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 10.828604969673254, LR: 0.0003 +[2026-03-04 10:29:21] (step=0055346) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.828800626100568, LR: 0.0003 +[2026-03-04 10:29:29] (step=0055347) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.82899628252788, LR: 0.0003 +[2026-03-04 10:29:37] (step=0055348) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.829191938955194, LR: 0.0003 +[2026-03-04 10:29:45] (step=0055349) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 10.829387595382508, LR: 0.0003 +[2026-03-04 10:29:52] (step=0055350) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.829583251809822, LR: 0.0003 +[2026-03-04 10:30:00] (step=0055351) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.829778908237136, LR: 0.0003 +[2026-03-04 10:30:08] (step=0055352) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.829974564664449, LR: 0.0003 +[2026-03-04 10:30:16] (step=0055353) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 10.830170221091763, LR: 0.0003 +[2026-03-04 10:30:24] (step=0055354) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.830365877519077, LR: 0.0003 +[2026-03-04 10:30:32] (step=0055355) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.83056153394639, LR: 0.0003 +[2026-03-04 10:30:40] (step=0055356) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.830757190373705, LR: 0.0003 +[2026-03-04 10:30:47] (step=0055357) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.830952846801017, LR: 0.0003 +[2026-03-04 10:30:55] (step=0055358) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.83114850322833, LR: 0.0003 +[2026-03-04 10:31:03] (step=0055359) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 10.831344159655645, LR: 0.0003 +[2026-03-04 10:31:11] (step=0055360) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 10.831539816082959, LR: 0.0003 +[2026-03-04 10:31:19] (step=0055361) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.831735472510273, LR: 0.0003 +[2026-03-04 10:31:27] (step=0055362) Train Loss: 0.4677, Train Steps/Sec: 0.13, Epoch: 10.831931128937585, LR: 0.0003 +[2026-03-04 10:31:35] (step=0055363) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 10.832126785364899, LR: 0.0003 +[2026-03-04 10:31:43] (step=0055364) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.832322441792213, LR: 0.0003 +[2026-03-04 10:31:50] (step=0055365) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.832518098219527, LR: 0.0003 +[2026-03-04 10:31:58] (step=0055366) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.83271375464684, LR: 0.0003 +[2026-03-04 10:32:06] (step=0055367) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.832909411074153, LR: 0.0003 +[2026-03-04 10:32:14] (step=0055368) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.833105067501467, LR: 0.0003 +[2026-03-04 10:32:22] (step=0055369) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.833300723928781, LR: 0.0003 +[2026-03-04 10:32:30] (step=0055370) Train Loss: 0.4332, Train Steps/Sec: 0.12, Epoch: 10.833496380356095, LR: 0.0003 +[2026-03-04 10:32:38] (step=0055371) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.833692036783408, LR: 0.0003 +[2026-03-04 10:32:46] (step=0055372) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.833887693210722, LR: 0.0003 +[2026-03-04 10:32:53] (step=0055373) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.834083349638036, LR: 0.0003 +[2026-03-04 10:33:01] (step=0055374) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.83427900606535, LR: 0.0003 +[2026-03-04 10:33:09] (step=0055375) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.834474662492664, LR: 0.0003 +[2026-03-04 10:33:17] (step=0055376) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.834670318919976, LR: 0.0003 +[2026-03-04 10:33:25] (step=0055377) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 10.83486597534729, LR: 0.0003 +[2026-03-04 10:33:33] (step=0055378) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.835061631774604, LR: 0.0003 +[2026-03-04 10:33:41] (step=0055379) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.835257288201918, LR: 0.0003 +[2026-03-04 10:33:49] (step=0055380) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.835452944629232, LR: 0.0003 +[2026-03-04 10:33:56] (step=0055381) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.835648601056544, LR: 0.0003 +[2026-03-04 10:34:04] (step=0055382) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.835844257483858, LR: 0.0003 +[2026-03-04 10:34:12] (step=0055383) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 10.836039913911172, LR: 0.0003 +[2026-03-04 10:34:20] (step=0055384) Train Loss: 0.4439, Train Steps/Sec: 0.12, Epoch: 10.836235570338486, LR: 0.0003 +[2026-03-04 10:34:28] (step=0055385) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 10.8364312267658, LR: 0.0003 +[2026-03-04 10:34:36] (step=0055386) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.836626883193112, LR: 0.0003 +[2026-03-04 10:34:44] (step=0055387) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.836822539620426, LR: 0.0003 +[2026-03-04 10:34:52] (step=0055388) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.83701819604774, LR: 0.0003 +[2026-03-04 10:34:59] (step=0055389) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 10.837213852475054, LR: 0.0003 +[2026-03-04 10:35:07] (step=0055390) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.837409508902368, LR: 0.0003 +[2026-03-04 10:35:15] (step=0055391) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 10.83760516532968, LR: 0.0003 +[2026-03-04 10:35:23] (step=0055392) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.837800821756995, LR: 0.0003 +[2026-03-04 10:35:31] (step=0055393) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 10.837996478184309, LR: 0.0003 +[2026-03-04 10:35:39] (step=0055394) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 10.838192134611623, LR: 0.0003 +[2026-03-04 10:35:47] (step=0055395) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.838387791038935, LR: 0.0003 +[2026-03-04 10:35:55] (step=0055396) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.838583447466249, LR: 0.0003 +[2026-03-04 10:36:02] (step=0055397) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.838779103893563, LR: 0.0003 +[2026-03-04 10:36:10] (step=0055398) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.838974760320877, LR: 0.0003 +[2026-03-04 10:36:18] (step=0055399) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.83917041674819, LR: 0.0003 +[2026-03-04 10:36:26] (step=0055400) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.839366073175503, LR: 0.0003 +[2026-03-04 10:36:34] (step=0055401) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.839561729602817, LR: 0.0003 +[2026-03-04 10:36:42] (step=0055402) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 10.839757386030131, LR: 0.0003 +[2026-03-04 10:36:50] (step=0055403) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.839953042457445, LR: 0.0003 +[2026-03-04 10:36:57] (step=0055404) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.840148698884759, LR: 0.0003 +[2026-03-04 10:37:05] (step=0055405) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 10.840344355312071, LR: 0.0003 +[2026-03-04 10:37:13] (step=0055406) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.840540011739385, LR: 0.0003 +[2026-03-04 10:37:21] (step=0055407) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 10.8407356681667, LR: 0.0003 +[2026-03-04 10:37:29] (step=0055408) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.840931324594013, LR: 0.0003 +[2026-03-04 10:37:37] (step=0055409) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.841126981021327, LR: 0.0003 +[2026-03-04 10:37:45] (step=0055410) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 10.84132263744864, LR: 0.0003 +[2026-03-04 10:37:52] (step=0055411) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.841518293875954, LR: 0.0003 +[2026-03-04 10:38:00] (step=0055412) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.841713950303268, LR: 0.0003 +[2026-03-04 10:38:08] (step=0055413) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.841909606730582, LR: 0.0003 +[2026-03-04 10:38:16] (step=0055414) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.842105263157896, LR: 0.0003 +[2026-03-04 10:38:24] (step=0055415) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 10.842300919585208, LR: 0.0003 +[2026-03-04 10:38:32] (step=0055416) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.842496576012522, LR: 0.0003 +[2026-03-04 10:38:40] (step=0055417) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.842692232439836, LR: 0.0003 +[2026-03-04 10:38:48] (step=0055418) Train Loss: 0.4346, Train Steps/Sec: 0.12, Epoch: 10.84288788886715, LR: 0.0003 +[2026-03-04 10:38:55] (step=0055419) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.843083545294462, LR: 0.0003 +[2026-03-04 10:39:03] (step=0055420) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.843279201721776, LR: 0.0003 +[2026-03-04 10:39:11] (step=0055421) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.84347485814909, LR: 0.0003 +[2026-03-04 10:39:19] (step=0055422) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.843670514576404, LR: 0.0003 +[2026-03-04 10:39:27] (step=0055423) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.843866171003718, LR: 0.0003 +[2026-03-04 10:39:35] (step=0055424) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 10.84406182743103, LR: 0.0003 +[2026-03-04 10:39:43] (step=0055425) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.844257483858344, LR: 0.0003 +[2026-03-04 10:39:50] (step=0055426) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 10.844453140285658, LR: 0.0003 +[2026-03-04 10:39:58] (step=0055427) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.844648796712972, LR: 0.0003 +[2026-03-04 10:40:06] (step=0055428) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.844844453140286, LR: 0.0003 +[2026-03-04 10:40:14] (step=0055429) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.845040109567599, LR: 0.0003 +[2026-03-04 10:40:22] (step=0055430) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.845235765994913, LR: 0.0003 +[2026-03-04 10:40:30] (step=0055431) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 10.845431422422227, LR: 0.0003 +[2026-03-04 10:40:38] (step=0055432) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.84562707884954, LR: 0.0003 +[2026-03-04 10:40:46] (step=0055433) Train Loss: 0.4510, Train Steps/Sec: 0.12, Epoch: 10.845822735276855, LR: 0.0003 +[2026-03-04 10:40:54] (step=0055434) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 10.846018391704167, LR: 0.0003 +[2026-03-04 10:41:01] (step=0055435) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.84621404813148, LR: 0.0003 +[2026-03-04 10:41:09] (step=0055436) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.846409704558795, LR: 0.0003 +[2026-03-04 10:41:17] (step=0055437) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.846605360986109, LR: 0.0003 +[2026-03-04 10:41:25] (step=0055438) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.846801017413423, LR: 0.0003 +[2026-03-04 10:41:33] (step=0055439) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.846996673840735, LR: 0.0003 +[2026-03-04 10:41:41] (step=0055440) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.847192330268049, LR: 0.0003 +[2026-03-04 10:41:49] (step=0055441) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.847387986695363, LR: 0.0003 +[2026-03-04 10:41:56] (step=0055442) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.847583643122677, LR: 0.0003 +[2026-03-04 10:42:04] (step=0055443) Train Loss: 0.4627, Train Steps/Sec: 0.13, Epoch: 10.847779299549991, LR: 0.0003 +[2026-03-04 10:42:12] (step=0055444) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.847974955977303, LR: 0.0003 +[2026-03-04 10:42:20] (step=0055445) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.848170612404617, LR: 0.0003 +[2026-03-04 10:42:28] (step=0055446) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.848366268831931, LR: 0.0003 +[2026-03-04 10:42:36] (step=0055447) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.848561925259245, LR: 0.0003 +[2026-03-04 10:42:44] (step=0055448) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 10.848757581686558, LR: 0.0003 +[2026-03-04 10:42:51] (step=0055449) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 10.848953238113872, LR: 0.0003 +[2026-03-04 10:42:59] (step=0055450) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 10.849148894541186, LR: 0.0003 +[2026-03-04 10:43:07] (step=0055451) Train Loss: 0.4260, Train Steps/Sec: 0.13, Epoch: 10.8493445509685, LR: 0.0003 +[2026-03-04 10:43:15] (step=0055452) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.849540207395814, LR: 0.0003 +[2026-03-04 10:43:23] (step=0055453) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.849735863823126, LR: 0.0003 +[2026-03-04 10:43:31] (step=0055454) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 10.84993152025044, LR: 0.0003 +[2026-03-04 10:43:39] (step=0055455) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.850127176677754, LR: 0.0003 +[2026-03-04 10:43:46] (step=0055456) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.850322833105068, LR: 0.0003 +[2026-03-04 10:43:54] (step=0055457) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.850518489532382, LR: 0.0003 +[2026-03-04 10:44:02] (step=0055458) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.850714145959694, LR: 0.0003 +[2026-03-04 10:44:10] (step=0055459) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 10.850909802387008, LR: 0.0003 +[2026-03-04 10:44:18] (step=0055460) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 10.851105458814322, LR: 0.0003 +[2026-03-04 10:44:26] (step=0055461) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.851301115241636, LR: 0.0003 +[2026-03-04 10:44:34] (step=0055462) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.85149677166895, LR: 0.0003 +[2026-03-04 10:44:42] (step=0055463) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.851692428096262, LR: 0.0003 +[2026-03-04 10:44:49] (step=0055464) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.851888084523576, LR: 0.0003 +[2026-03-04 10:44:57] (step=0055465) Train Loss: 0.4463, Train Steps/Sec: 0.12, Epoch: 10.85208374095089, LR: 0.0003 +[2026-03-04 10:45:05] (step=0055466) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 10.852279397378204, LR: 0.0003 +[2026-03-04 10:45:13] (step=0055467) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.852475053805518, LR: 0.0003 +[2026-03-04 10:45:21] (step=0055468) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.85267071023283, LR: 0.0003 +[2026-03-04 10:45:29] (step=0055469) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.852866366660145, LR: 0.0003 +[2026-03-04 10:45:37] (step=0055470) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.853062023087459, LR: 0.0003 +[2026-03-04 10:45:45] (step=0055471) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 10.853257679514773, LR: 0.0003 +[2026-03-04 10:45:52] (step=0055472) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.853453335942085, LR: 0.0003 +[2026-03-04 10:46:00] (step=0055473) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 10.853648992369399, LR: 0.0003 +[2026-03-04 10:46:08] (step=0055474) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.853844648796713, LR: 0.0003 +[2026-03-04 10:46:16] (step=0055475) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 10.854040305224027, LR: 0.0003 +[2026-03-04 10:46:24] (step=0055476) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 10.85423596165134, LR: 0.0003 +[2026-03-04 10:46:32] (step=0055477) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 10.854431618078653, LR: 0.0003 +[2026-03-04 10:46:40] (step=0055478) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.854627274505967, LR: 0.0003 +[2026-03-04 10:46:48] (step=0055479) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.854822930933281, LR: 0.0003 +[2026-03-04 10:46:55] (step=0055480) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.855018587360595, LR: 0.0003 +[2026-03-04 10:47:03] (step=0055481) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.85521424378791, LR: 0.0003 +[2026-03-04 10:47:11] (step=0055482) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 10.855409900215221, LR: 0.0003 +[2026-03-04 10:47:19] (step=0055483) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.855605556642535, LR: 0.0003 +[2026-03-04 10:47:27] (step=0055484) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 10.85580121306985, LR: 0.0003 +[2026-03-04 10:47:35] (step=0055485) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.855996869497163, LR: 0.0003 +[2026-03-04 10:47:43] (step=0055486) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 10.856192525924477, LR: 0.0003 +[2026-03-04 10:47:51] (step=0055487) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 10.85638818235179, LR: 0.0003 +[2026-03-04 10:47:58] (step=0055488) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.856583838779104, LR: 0.0003 +[2026-03-04 10:48:06] (step=0055489) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 10.856779495206418, LR: 0.0003 +[2026-03-04 10:48:14] (step=0055490) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.856975151633732, LR: 0.0003 +[2026-03-04 10:48:22] (step=0055491) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.857170808061046, LR: 0.0003 +[2026-03-04 10:48:30] (step=0055492) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 10.857366464488358, LR: 0.0003 +[2026-03-04 10:48:38] (step=0055493) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.857562120915672, LR: 0.0003 +[2026-03-04 10:48:46] (step=0055494) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.857757777342986, LR: 0.0003 +[2026-03-04 10:48:54] (step=0055495) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.8579534337703, LR: 0.0003 +[2026-03-04 10:49:01] (step=0055496) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.858149090197614, LR: 0.0003 +[2026-03-04 10:49:09] (step=0055497) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.858344746624926, LR: 0.0003 +[2026-03-04 10:49:17] (step=0055498) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 10.85854040305224, LR: 0.0003 +[2026-03-04 10:49:25] (step=0055499) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.858736059479554, LR: 0.0003 +[2026-03-04 10:49:33] (step=0055500) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.858931715906868, LR: 0.0003 +[2026-03-04 10:49:33] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0055500/ +[2026-03-04 10:49:41] (step=0055501) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.85912737233418, LR: 0.0003 +[2026-03-04 10:49:49] (step=0055502) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.859323028761494, LR: 0.0003 +[2026-03-04 10:49:56] (step=0055503) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.859518685188808, LR: 0.0003 +[2026-03-04 10:50:04] (step=0055504) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 10.859714341616122, LR: 0.0003 +[2026-03-04 10:50:12] (step=0055505) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.859909998043436, LR: 0.0003 +[2026-03-04 10:50:20] (step=0055506) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 10.860105654470749, LR: 0.0003 +[2026-03-04 10:50:28] (step=0055507) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.860301310898063, LR: 0.0003 +[2026-03-04 10:50:36] (step=0055508) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 10.860496967325377, LR: 0.0003 +[2026-03-04 10:50:44] (step=0055509) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.86069262375269, LR: 0.0003 +[2026-03-04 10:50:51] (step=0055510) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.860888280180005, LR: 0.0003 +[2026-03-04 10:50:59] (step=0055511) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 10.861083936607317, LR: 0.0003 +[2026-03-04 10:51:07] (step=0055512) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.86127959303463, LR: 0.0003 +[2026-03-04 10:51:15] (step=0055513) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 10.861475249461945, LR: 0.0003 +[2026-03-04 10:51:23] (step=0055514) Train Loss: 0.4256, Train Steps/Sec: 0.12, Epoch: 10.861670905889259, LR: 0.0003 +[2026-03-04 10:51:31] (step=0055515) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.861866562316573, LR: 0.0003 +[2026-03-04 10:51:39] (step=0055516) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.862062218743885, LR: 0.0003 +[2026-03-04 10:51:47] (step=0055517) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.8622578751712, LR: 0.0003 +[2026-03-04 10:51:54] (step=0055518) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.862453531598513, LR: 0.0003 +[2026-03-04 10:52:02] (step=0055519) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.862649188025827, LR: 0.0003 +[2026-03-04 10:52:10] (step=0055520) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.862844844453141, LR: 0.0003 +[2026-03-04 10:52:18] (step=0055521) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 10.863040500880453, LR: 0.0003 +[2026-03-04 10:52:26] (step=0055522) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.863236157307767, LR: 0.0003 +[2026-03-04 10:52:34] (step=0055523) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.863431813735081, LR: 0.0003 +[2026-03-04 10:52:42] (step=0055524) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.863627470162395, LR: 0.0003 +[2026-03-04 10:52:49] (step=0055525) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.863823126589708, LR: 0.0003 +[2026-03-04 10:52:57] (step=0055526) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 10.864018783017022, LR: 0.0003 +[2026-03-04 10:53:05] (step=0055527) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.864214439444336, LR: 0.0003 +[2026-03-04 10:53:13] (step=0055528) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.86441009587165, LR: 0.0003 +[2026-03-04 10:53:21] (step=0055529) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.864605752298964, LR: 0.0003 +[2026-03-04 10:53:29] (step=0055530) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.864801408726276, LR: 0.0003 +[2026-03-04 10:53:37] (step=0055531) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.86499706515359, LR: 0.0003 +[2026-03-04 10:53:44] (step=0055532) Train Loss: 0.4260, Train Steps/Sec: 0.13, Epoch: 10.865192721580904, LR: 0.0003 +[2026-03-04 10:53:52] (step=0055533) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.865388378008218, LR: 0.0003 +[2026-03-04 10:54:00] (step=0055534) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.865584034435532, LR: 0.0003 +[2026-03-04 10:54:08] (step=0055535) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.865779690862844, LR: 0.0003 +[2026-03-04 10:54:16] (step=0055536) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.865975347290158, LR: 0.0003 +[2026-03-04 10:54:24] (step=0055537) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 10.866171003717472, LR: 0.0003 +[2026-03-04 10:54:32] (step=0055538) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 10.866366660144786, LR: 0.0003 +[2026-03-04 10:54:40] (step=0055539) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.8665623165721, LR: 0.0003 +[2026-03-04 10:54:48] (step=0055540) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.866757972999412, LR: 0.0003 +[2026-03-04 10:54:55] (step=0055541) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.866953629426726, LR: 0.0003 +[2026-03-04 10:55:03] (step=0055542) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.86714928585404, LR: 0.0003 +[2026-03-04 10:55:11] (step=0055543) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 10.867344942281354, LR: 0.0003 +[2026-03-04 10:55:19] (step=0055544) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 10.867540598708668, LR: 0.0003 +[2026-03-04 10:55:27] (step=0055545) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.86773625513598, LR: 0.0003 +[2026-03-04 10:55:35] (step=0055546) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 10.867931911563295, LR: 0.0003 +[2026-03-04 10:55:43] (step=0055547) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 10.868127567990609, LR: 0.0003 +[2026-03-04 10:55:51] (step=0055548) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 10.868323224417923, LR: 0.0003 +[2026-03-04 10:55:58] (step=0055549) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.868518880845237, LR: 0.0003 +[2026-03-04 10:56:06] (step=0055550) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.868714537272549, LR: 0.0003 +[2026-03-04 10:56:14] (step=0055551) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 10.868910193699863, LR: 0.0003 +[2026-03-04 10:56:22] (step=0055552) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.869105850127177, LR: 0.0003 +[2026-03-04 10:56:30] (step=0055553) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.869301506554491, LR: 0.0003 +[2026-03-04 10:56:38] (step=0055554) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 10.869497162981803, LR: 0.0003 +[2026-03-04 10:56:46] (step=0055555) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.869692819409117, LR: 0.0003 +[2026-03-04 10:56:53] (step=0055556) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 10.869888475836431, LR: 0.0003 +[2026-03-04 10:57:01] (step=0055557) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.870084132263745, LR: 0.0003 +[2026-03-04 10:57:09] (step=0055558) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 10.87027978869106, LR: 0.0003 +[2026-03-04 10:57:17] (step=0055559) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 10.870475445118371, LR: 0.0003 +[2026-03-04 10:57:25] (step=0055560) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.870671101545685, LR: 0.0003 +[2026-03-04 10:57:33] (step=0055561) Train Loss: 0.4463, Train Steps/Sec: 0.12, Epoch: 10.870866757973, LR: 0.0003 +[2026-03-04 10:57:41] (step=0055562) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 10.871062414400313, LR: 0.0003 +[2026-03-04 10:57:49] (step=0055563) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.871258070827627, LR: 0.0003 +[2026-03-04 10:57:57] (step=0055564) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 10.87145372725494, LR: 0.0003 +[2026-03-04 10:58:04] (step=0055565) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.871649383682254, LR: 0.0003 +[2026-03-04 10:58:12] (step=0055566) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.871845040109568, LR: 0.0003 +[2026-03-04 10:58:20] (step=0055567) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.872040696536882, LR: 0.0003 +[2026-03-04 10:58:28] (step=0055568) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 10.872236352964196, LR: 0.0003 +[2026-03-04 10:58:36] (step=0055569) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.872432009391508, LR: 0.0003 +[2026-03-04 10:58:44] (step=0055570) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 10.872627665818822, LR: 0.0003 +[2026-03-04 10:58:52] (step=0055571) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 10.872823322246136, LR: 0.0003 +[2026-03-04 10:59:00] (step=0055572) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.87301897867345, LR: 0.0003 +[2026-03-04 10:59:07] (step=0055573) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 10.873214635100764, LR: 0.0003 +[2026-03-04 10:59:15] (step=0055574) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.873410291528076, LR: 0.0003 +[2026-03-04 10:59:23] (step=0055575) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.87360594795539, LR: 0.0003 +[2026-03-04 10:59:31] (step=0055576) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.873801604382704, LR: 0.0003 +[2026-03-04 10:59:39] (step=0055577) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.873997260810018, LR: 0.0003 +[2026-03-04 10:59:47] (step=0055578) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.87419291723733, LR: 0.0003 +[2026-03-04 10:59:55] (step=0055579) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.874388573664644, LR: 0.0003 +[2026-03-04 11:00:02] (step=0055580) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.874584230091958, LR: 0.0003 +[2026-03-04 11:00:10] (step=0055581) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 10.874779886519272, LR: 0.0003 +[2026-03-04 11:00:18] (step=0055582) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.874975542946586, LR: 0.0003 +[2026-03-04 11:00:26] (step=0055583) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 10.875171199373899, LR: 0.0003 +[2026-03-04 11:00:34] (step=0055584) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 10.875366855801213, LR: 0.0003 +[2026-03-04 11:00:42] (step=0055585) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.875562512228527, LR: 0.0003 +[2026-03-04 11:00:50] (step=0055586) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.87575816865584, LR: 0.0003 +[2026-03-04 11:00:58] (step=0055587) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 10.875953825083155, LR: 0.0003 +[2026-03-04 11:01:06] (step=0055588) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.876149481510467, LR: 0.0003 +[2026-03-04 11:01:13] (step=0055589) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 10.876345137937781, LR: 0.0003 +[2026-03-04 11:01:21] (step=0055590) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.876540794365095, LR: 0.0003 +[2026-03-04 11:01:29] (step=0055591) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.876736450792409, LR: 0.0003 +[2026-03-04 11:01:37] (step=0055592) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.876932107219723, LR: 0.0003 +[2026-03-04 11:01:45] (step=0055593) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.877127763647035, LR: 0.0003 +[2026-03-04 11:01:53] (step=0055594) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.87732342007435, LR: 0.0003 +[2026-03-04 11:02:01] (step=0055595) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.877519076501663, LR: 0.0003 +[2026-03-04 11:02:08] (step=0055596) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.877714732928977, LR: 0.0003 +[2026-03-04 11:02:16] (step=0055597) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 10.877910389356291, LR: 0.0003 +[2026-03-04 11:02:24] (step=0055598) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 10.878106045783603, LR: 0.0003 +[2026-03-04 11:02:32] (step=0055599) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 10.878301702210917, LR: 0.0003 +[2026-03-04 11:02:40] (step=0055600) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 10.878497358638231, LR: 0.0003 +[2026-03-04 11:02:48] (step=0055601) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 10.878693015065545, LR: 0.0003 +[2026-03-04 11:02:56] (step=0055602) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.878888671492858, LR: 0.0003 +[2026-03-04 11:03:03] (step=0055603) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 10.879084327920172, LR: 0.0003 +[2026-03-04 11:03:11] (step=0055604) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 10.879279984347486, LR: 0.0003 +[2026-03-04 11:03:19] (step=0055605) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.8794756407748, LR: 0.0003 +[2026-03-04 11:03:27] (step=0055606) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 10.879671297202114, LR: 0.0003 +[2026-03-04 11:03:35] (step=0055607) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 10.879866953629426, LR: 0.0003 +[2026-03-04 11:03:43] (step=0055608) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.88006261005674, LR: 0.0003 +[2026-03-04 11:03:51] (step=0055609) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 10.880258266484054, LR: 0.0003 +[2026-03-04 11:03:58] (step=0055610) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.880453922911368, LR: 0.0003 +[2026-03-04 11:04:06] (step=0055611) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.880649579338682, LR: 0.0003 +[2026-03-04 11:04:14] (step=0055612) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.880845235765994, LR: 0.0003 +[2026-03-04 11:04:22] (step=0055613) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 10.881040892193308, LR: 0.0003 +[2026-03-04 11:04:30] (step=0055614) Train Loss: 0.4378, Train Steps/Sec: 0.12, Epoch: 10.881236548620622, LR: 0.0003 +[2026-03-04 11:04:38] (step=0055615) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.881432205047936, LR: 0.0003 +[2026-03-04 11:04:46] (step=0055616) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.88162786147525, LR: 0.0003 +[2026-03-04 11:04:54] (step=0055617) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.881823517902562, LR: 0.0003 +[2026-03-04 11:05:02] (step=0055618) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.882019174329876, LR: 0.0003 +[2026-03-04 11:05:09] (step=0055619) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.88221483075719, LR: 0.0003 +[2026-03-04 11:05:17] (step=0055620) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.882410487184504, LR: 0.0003 +[2026-03-04 11:05:25] (step=0055621) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.882606143611818, LR: 0.0003 +[2026-03-04 11:05:33] (step=0055622) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.88280180003913, LR: 0.0003 +[2026-03-04 11:05:41] (step=0055623) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 10.882997456466445, LR: 0.0003 +[2026-03-04 11:05:49] (step=0055624) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.883193112893759, LR: 0.0003 +[2026-03-04 11:05:57] (step=0055625) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.883388769321073, LR: 0.0003 +[2026-03-04 11:06:04] (step=0055626) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 10.883584425748387, LR: 0.0003 +[2026-03-04 11:06:12] (step=0055627) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 10.883780082175699, LR: 0.0003 +[2026-03-04 11:06:20] (step=0055628) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 10.883975738603013, LR: 0.0003 +[2026-03-04 11:06:28] (step=0055629) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 10.884171395030327, LR: 0.0003 +[2026-03-04 11:06:36] (step=0055630) Train Loss: 0.4471, Train Steps/Sec: 0.12, Epoch: 10.884367051457641, LR: 0.0003 +[2026-03-04 11:06:44] (step=0055631) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.884562707884953, LR: 0.0003 +[2026-03-04 11:06:52] (step=0055632) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 10.884758364312267, LR: 0.0003 +[2026-03-04 11:07:00] (step=0055633) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 10.884954020739581, LR: 0.0003 +[2026-03-04 11:07:08] (step=0055634) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 10.885149677166895, LR: 0.0003 +[2026-03-04 11:07:15] (step=0055635) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.88534533359421, LR: 0.0003 +[2026-03-04 11:07:23] (step=0055636) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.885540990021521, LR: 0.0003 +[2026-03-04 11:07:31] (step=0055637) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.885736646448835, LR: 0.0003 +[2026-03-04 11:07:39] (step=0055638) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.88593230287615, LR: 0.0003 +[2026-03-04 11:07:47] (step=0055639) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.886127959303463, LR: 0.0003 +[2026-03-04 11:07:55] (step=0055640) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.886323615730777, LR: 0.0003 +[2026-03-04 11:08:03] (step=0055641) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.88651927215809, LR: 0.0003 +[2026-03-04 11:08:10] (step=0055642) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.886714928585404, LR: 0.0003 +[2026-03-04 11:08:18] (step=0055643) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.886910585012718, LR: 0.0003 +[2026-03-04 11:08:26] (step=0055644) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.887106241440032, LR: 0.0003 +[2026-03-04 11:08:34] (step=0055645) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 10.887301897867346, LR: 0.0003 +[2026-03-04 11:08:42] (step=0055646) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.887497554294658, LR: 0.0003 +[2026-03-04 11:08:50] (step=0055647) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.887693210721972, LR: 0.0003 +[2026-03-04 11:08:58] (step=0055648) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 10.887888867149286, LR: 0.0003 +[2026-03-04 11:09:06] (step=0055649) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.8880845235766, LR: 0.0003 +[2026-03-04 11:09:13] (step=0055650) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.888280180003914, LR: 0.0003 +[2026-03-04 11:09:21] (step=0055651) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.888475836431226, LR: 0.0003 +[2026-03-04 11:09:29] (step=0055652) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.88867149285854, LR: 0.0003 +[2026-03-04 11:09:37] (step=0055653) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.888867149285854, LR: 0.0003 +[2026-03-04 11:09:45] (step=0055654) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.889062805713168, LR: 0.0003 +[2026-03-04 11:09:53] (step=0055655) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.88925846214048, LR: 0.0003 +[2026-03-04 11:10:01] (step=0055656) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.889454118567794, LR: 0.0003 +[2026-03-04 11:10:08] (step=0055657) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.889649774995108, LR: 0.0003 +[2026-03-04 11:10:16] (step=0055658) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 10.889845431422422, LR: 0.0003 +[2026-03-04 11:10:24] (step=0055659) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 10.890041087849736, LR: 0.0003 +[2026-03-04 11:10:32] (step=0055660) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.890236744277049, LR: 0.0003 +[2026-03-04 11:10:40] (step=0055661) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.890432400704363, LR: 0.0003 +[2026-03-04 11:10:48] (step=0055662) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.890628057131677, LR: 0.0003 +[2026-03-04 11:10:56] (step=0055663) Train Loss: 0.4454, Train Steps/Sec: 0.12, Epoch: 10.89082371355899, LR: 0.0003 +[2026-03-04 11:11:04] (step=0055664) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.891019369986305, LR: 0.0003 +[2026-03-04 11:11:12] (step=0055665) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 10.891215026413617, LR: 0.0003 +[2026-03-04 11:11:19] (step=0055666) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.891410682840931, LR: 0.0003 +[2026-03-04 11:11:27] (step=0055667) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.891606339268245, LR: 0.0003 +[2026-03-04 11:11:35] (step=0055668) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.891801995695559, LR: 0.0003 +[2026-03-04 11:11:43] (step=0055669) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 10.891997652122873, LR: 0.0003 +[2026-03-04 11:11:51] (step=0055670) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.892193308550185, LR: 0.0003 +[2026-03-04 11:11:59] (step=0055671) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 10.8923889649775, LR: 0.0003 +[2026-03-04 11:12:07] (step=0055672) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.892584621404813, LR: 0.0003 +[2026-03-04 11:12:14] (step=0055673) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.892780277832127, LR: 0.0003 +[2026-03-04 11:12:22] (step=0055674) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.892975934259441, LR: 0.0003 +[2026-03-04 11:12:30] (step=0055675) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.893171590686753, LR: 0.0003 +[2026-03-04 11:12:38] (step=0055676) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 10.893367247114067, LR: 0.0003 +[2026-03-04 11:12:46] (step=0055677) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.893562903541381, LR: 0.0003 +[2026-03-04 11:12:54] (step=0055678) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.893758559968695, LR: 0.0003 +[2026-03-04 11:13:02] (step=0055679) Train Loss: 0.4434, Train Steps/Sec: 0.12, Epoch: 10.89395421639601, LR: 0.0003 +[2026-03-04 11:13:10] (step=0055680) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.894149872823322, LR: 0.0003 +[2026-03-04 11:13:18] (step=0055681) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.894345529250636, LR: 0.0003 +[2026-03-04 11:13:25] (step=0055682) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.89454118567795, LR: 0.0003 +[2026-03-04 11:13:33] (step=0055683) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.894736842105264, LR: 0.0003 +[2026-03-04 11:13:41] (step=0055684) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.894932498532576, LR: 0.0003 +[2026-03-04 11:13:49] (step=0055685) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 10.89512815495989, LR: 0.0003 +[2026-03-04 11:13:57] (step=0055686) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.895323811387204, LR: 0.0003 +[2026-03-04 11:14:05] (step=0055687) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.895519467814518, LR: 0.0003 +[2026-03-04 11:14:13] (step=0055688) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 10.895715124241832, LR: 0.0003 +[2026-03-04 11:14:20] (step=0055689) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 10.895910780669144, LR: 0.0003 +[2026-03-04 11:14:28] (step=0055690) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.896106437096458, LR: 0.0003 +[2026-03-04 11:14:36] (step=0055691) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.896302093523772, LR: 0.0003 +[2026-03-04 11:14:44] (step=0055692) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 10.896497749951086, LR: 0.0003 +[2026-03-04 11:14:52] (step=0055693) Train Loss: 0.4268, Train Steps/Sec: 0.13, Epoch: 10.8966934063784, LR: 0.0003 +[2026-03-04 11:15:00] (step=0055694) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 10.896889062805712, LR: 0.0003 +[2026-03-04 11:15:08] (step=0055695) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.897084719233026, LR: 0.0003 +[2026-03-04 11:15:16] (step=0055696) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.89728037566034, LR: 0.0003 +[2026-03-04 11:15:23] (step=0055697) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.897476032087654, LR: 0.0003 +[2026-03-04 11:15:31] (step=0055698) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.897671688514968, LR: 0.0003 +[2026-03-04 11:15:39] (step=0055699) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.89786734494228, LR: 0.0003 +[2026-03-04 11:15:47] (step=0055700) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 10.898063001369595, LR: 0.0003 +[2026-03-04 11:15:55] (step=0055701) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 10.898258657796909, LR: 0.0003 +[2026-03-04 11:16:03] (step=0055702) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.898454314224223, LR: 0.0003 +[2026-03-04 11:16:11] (step=0055703) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 10.898649970651537, LR: 0.0003 +[2026-03-04 11:16:18] (step=0055704) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 10.898845627078849, LR: 0.0003 +[2026-03-04 11:16:26] (step=0055705) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 10.899041283506163, LR: 0.0003 +[2026-03-04 11:16:34] (step=0055706) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.899236939933477, LR: 0.0003 +[2026-03-04 11:16:42] (step=0055707) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 10.899432596360791, LR: 0.0003 +[2026-03-04 11:16:50] (step=0055708) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.899628252788103, LR: 0.0003 +[2026-03-04 11:16:58] (step=0055709) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.899823909215417, LR: 0.0003 +[2026-03-04 11:17:06] (step=0055710) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.900019565642731, LR: 0.0003 +[2026-03-04 11:17:13] (step=0055711) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 10.900215222070045, LR: 0.0003 +[2026-03-04 11:17:21] (step=0055712) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.90041087849736, LR: 0.0003 +[2026-03-04 11:17:29] (step=0055713) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.900606534924671, LR: 0.0003 +[2026-03-04 11:17:37] (step=0055714) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 10.900802191351985, LR: 0.0003 +[2026-03-04 11:17:45] (step=0055715) Train Loss: 0.4387, Train Steps/Sec: 0.12, Epoch: 10.9009978477793, LR: 0.0003 +[2026-03-04 11:17:53] (step=0055716) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.901193504206613, LR: 0.0003 +[2026-03-04 11:18:01] (step=0055717) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.901389160633927, LR: 0.0003 +[2026-03-04 11:18:09] (step=0055718) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.90158481706124, LR: 0.0003 +[2026-03-04 11:18:17] (step=0055719) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.901780473488554, LR: 0.0003 +[2026-03-04 11:18:24] (step=0055720) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 10.901976129915868, LR: 0.0003 +[2026-03-04 11:18:32] (step=0055721) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.902171786343182, LR: 0.0003 +[2026-03-04 11:18:40] (step=0055722) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.902367442770496, LR: 0.0003 +[2026-03-04 11:18:48] (step=0055723) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 10.902563099197808, LR: 0.0003 +[2026-03-04 11:18:56] (step=0055724) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.902758755625122, LR: 0.0003 +[2026-03-04 11:19:04] (step=0055725) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.902954412052436, LR: 0.0003 +[2026-03-04 11:19:12] (step=0055726) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 10.90315006847975, LR: 0.0003 +[2026-03-04 11:19:20] (step=0055727) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 10.903345724907064, LR: 0.0003 +[2026-03-04 11:19:27] (step=0055728) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.903541381334376, LR: 0.0003 +[2026-03-04 11:19:35] (step=0055729) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 10.90373703776169, LR: 0.0003 +[2026-03-04 11:19:43] (step=0055730) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 10.903932694189004, LR: 0.0003 +[2026-03-04 11:19:51] (step=0055731) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 10.904128350616318, LR: 0.0003 +[2026-03-04 11:19:59] (step=0055732) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 10.904324007043632, LR: 0.0003 +[2026-03-04 11:20:07] (step=0055733) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 10.904519663470944, LR: 0.0003 +[2026-03-04 11:20:15] (step=0055734) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.904715319898258, LR: 0.0003 +[2026-03-04 11:20:23] (step=0055735) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 10.904910976325572, LR: 0.0003 +[2026-03-04 11:20:30] (step=0055736) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.905106632752886, LR: 0.0003 +[2026-03-04 11:20:38] (step=0055737) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.905302289180199, LR: 0.0003 +[2026-03-04 11:20:46] (step=0055738) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.905497945607513, LR: 0.0003 +[2026-03-04 11:20:54] (step=0055739) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 10.905693602034827, LR: 0.0003 +[2026-03-04 11:21:02] (step=0055740) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 10.90588925846214, LR: 0.0003 +[2026-03-04 11:21:10] (step=0055741) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 10.906084914889455, LR: 0.0003 +[2026-03-04 11:21:18] (step=0055742) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 10.906280571316767, LR: 0.0003 +[2026-03-04 11:21:25] (step=0055743) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 10.906476227744081, LR: 0.0003 +[2026-03-04 11:21:33] (step=0055744) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.906671884171395, LR: 0.0003 +[2026-03-04 11:21:41] (step=0055745) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.906867540598709, LR: 0.0003 +[2026-03-04 11:21:49] (step=0055746) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.907063197026023, LR: 0.0003 +[2026-03-04 11:21:57] (step=0055747) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.907258853453335, LR: 0.0003 +[2026-03-04 11:22:05] (step=0055748) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.90745450988065, LR: 0.0003 +[2026-03-04 11:22:13] (step=0055749) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 10.907650166307963, LR: 0.0003 +[2026-03-04 11:22:20] (step=0055750) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.907845822735277, LR: 0.0003 +[2026-03-04 11:22:28] (step=0055751) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 10.908041479162591, LR: 0.0003 +[2026-03-04 11:22:36] (step=0055752) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 10.908237135589903, LR: 0.0003 +[2026-03-04 11:22:44] (step=0055753) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.908432792017217, LR: 0.0003 +[2026-03-04 11:22:52] (step=0055754) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.908628448444532, LR: 0.0003 +[2026-03-04 11:23:00] (step=0055755) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.908824104871846, LR: 0.0003 +[2026-03-04 11:23:08] (step=0055756) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.90901976129916, LR: 0.0003 +[2026-03-04 11:23:15] (step=0055757) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.909215417726472, LR: 0.0003 +[2026-03-04 11:23:23] (step=0055758) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.909411074153786, LR: 0.0003 +[2026-03-04 11:23:31] (step=0055759) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.9096067305811, LR: 0.0003 +[2026-03-04 11:23:39] (step=0055760) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.909802387008414, LR: 0.0003 +[2026-03-04 11:23:47] (step=0055761) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 10.909998043435726, LR: 0.0003 +[2026-03-04 11:23:55] (step=0055762) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.91019369986304, LR: 0.0003 +[2026-03-04 11:24:03] (step=0055763) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 10.910389356290354, LR: 0.0003 +[2026-03-04 11:24:10] (step=0055764) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.910585012717668, LR: 0.0003 +[2026-03-04 11:24:18] (step=0055765) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 10.910780669144982, LR: 0.0003 +[2026-03-04 11:24:26] (step=0055766) Train Loss: 0.4435, Train Steps/Sec: 0.12, Epoch: 10.910976325572294, LR: 0.0003 +[2026-03-04 11:24:34] (step=0055767) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.911171981999608, LR: 0.0003 +[2026-03-04 11:24:42] (step=0055768) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 10.911367638426922, LR: 0.0003 +[2026-03-04 11:24:50] (step=0055769) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 10.911563294854236, LR: 0.0003 +[2026-03-04 11:24:58] (step=0055770) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.91175895128155, LR: 0.0003 +[2026-03-04 11:25:06] (step=0055771) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 10.911954607708862, LR: 0.0003 +[2026-03-04 11:25:14] (step=0055772) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.912150264136177, LR: 0.0003 +[2026-03-04 11:25:21] (step=0055773) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 10.91234592056349, LR: 0.0003 +[2026-03-04 11:25:29] (step=0055774) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.912541576990805, LR: 0.0003 +[2026-03-04 11:25:37] (step=0055775) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.912737233418119, LR: 0.0003 +[2026-03-04 11:25:45] (step=0055776) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 10.91293288984543, LR: 0.0003 +[2026-03-04 11:25:53] (step=0055777) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.913128546272745, LR: 0.0003 +[2026-03-04 11:26:01] (step=0055778) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 10.913324202700059, LR: 0.0003 +[2026-03-04 11:26:09] (step=0055779) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.913519859127373, LR: 0.0003 +[2026-03-04 11:26:16] (step=0055780) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 10.913715515554687, LR: 0.0003 +[2026-03-04 11:26:24] (step=0055781) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.913911171981999, LR: 0.0003 +[2026-03-04 11:26:32] (step=0055782) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 10.914106828409313, LR: 0.0003 +[2026-03-04 11:26:40] (step=0055783) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 10.914302484836627, LR: 0.0003 +[2026-03-04 11:26:48] (step=0055784) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.914498141263941, LR: 0.0003 +[2026-03-04 11:26:56] (step=0055785) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.914693797691255, LR: 0.0003 +[2026-03-04 11:27:04] (step=0055786) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.914889454118567, LR: 0.0003 +[2026-03-04 11:27:12] (step=0055787) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 10.915085110545881, LR: 0.0003 +[2026-03-04 11:27:19] (step=0055788) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 10.915280766973195, LR: 0.0003 +[2026-03-04 11:27:27] (step=0055789) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.91547642340051, LR: 0.0003 +[2026-03-04 11:27:35] (step=0055790) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.915672079827822, LR: 0.0003 +[2026-03-04 11:27:43] (step=0055791) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.915867736255136, LR: 0.0003 +[2026-03-04 11:27:51] (step=0055792) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.91606339268245, LR: 0.0003 +[2026-03-04 11:27:59] (step=0055793) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 10.916259049109764, LR: 0.0003 +[2026-03-04 11:28:07] (step=0055794) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 10.916454705537078, LR: 0.0003 +[2026-03-04 11:28:15] (step=0055795) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.91665036196439, LR: 0.0003 +[2026-03-04 11:28:22] (step=0055796) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.916846018391704, LR: 0.0003 +[2026-03-04 11:28:30] (step=0055797) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.917041674819018, LR: 0.0003 +[2026-03-04 11:28:38] (step=0055798) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.917237331246332, LR: 0.0003 +[2026-03-04 11:28:46] (step=0055799) Train Loss: 0.4273, Train Steps/Sec: 0.13, Epoch: 10.917432987673646, LR: 0.0003 +[2026-03-04 11:28:54] (step=0055800) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 10.917628644100958, LR: 0.0003 +[2026-03-04 11:29:02] (step=0055801) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.917824300528272, LR: 0.0003 +[2026-03-04 11:29:10] (step=0055802) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.918019956955586, LR: 0.0003 +[2026-03-04 11:29:17] (step=0055803) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.9182156133829, LR: 0.0003 +[2026-03-04 11:29:25] (step=0055804) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 10.918411269810214, LR: 0.0003 +[2026-03-04 11:29:33] (step=0055805) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.918606926237526, LR: 0.0003 +[2026-03-04 11:29:41] (step=0055806) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.91880258266484, LR: 0.0003 +[2026-03-04 11:29:49] (step=0055807) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 10.918998239092154, LR: 0.0003 +[2026-03-04 11:29:57] (step=0055808) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.919193895519468, LR: 0.0003 +[2026-03-04 11:30:05] (step=0055809) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.919389551946782, LR: 0.0003 +[2026-03-04 11:30:13] (step=0055810) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.919585208374095, LR: 0.0003 +[2026-03-04 11:30:20] (step=0055811) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 10.919780864801409, LR: 0.0003 +[2026-03-04 11:30:28] (step=0055812) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.919976521228723, LR: 0.0003 +[2026-03-04 11:30:36] (step=0055813) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.920172177656037, LR: 0.0003 +[2026-03-04 11:30:44] (step=0055814) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 10.920367834083349, LR: 0.0003 +[2026-03-04 11:30:52] (step=0055815) Train Loss: 0.4367, Train Steps/Sec: 0.12, Epoch: 10.920563490510663, LR: 0.0003 +[2026-03-04 11:31:00] (step=0055816) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.920759146937977, LR: 0.0003 +[2026-03-04 11:31:08] (step=0055817) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 10.92095480336529, LR: 0.0003 +[2026-03-04 11:31:16] (step=0055818) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 10.921150459792605, LR: 0.0003 +[2026-03-04 11:31:24] (step=0055819) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 10.921346116219917, LR: 0.0003 +[2026-03-04 11:31:31] (step=0055820) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.921541772647231, LR: 0.0003 +[2026-03-04 11:31:39] (step=0055821) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 10.921737429074545, LR: 0.0003 +[2026-03-04 11:31:47] (step=0055822) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.921933085501859, LR: 0.0003 +[2026-03-04 11:31:55] (step=0055823) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.922128741929173, LR: 0.0003 +[2026-03-04 11:32:03] (step=0055824) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 10.922324398356485, LR: 0.0003 +[2026-03-04 11:32:11] (step=0055825) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.9225200547838, LR: 0.0003 +[2026-03-04 11:32:19] (step=0055826) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.922715711211113, LR: 0.0003 +[2026-03-04 11:32:27] (step=0055827) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.922911367638427, LR: 0.0003 +[2026-03-04 11:32:34] (step=0055828) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.923107024065741, LR: 0.0003 +[2026-03-04 11:32:42] (step=0055829) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 10.923302680493054, LR: 0.0003 +[2026-03-04 11:32:50] (step=0055830) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.923498336920368, LR: 0.0003 +[2026-03-04 11:32:58] (step=0055831) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.923693993347682, LR: 0.0003 +[2026-03-04 11:33:06] (step=0055832) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.923889649774996, LR: 0.0003 +[2026-03-04 11:33:14] (step=0055833) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 10.92408530620231, LR: 0.0003 +[2026-03-04 11:33:22] (step=0055834) Train Loss: 0.4442, Train Steps/Sec: 0.12, Epoch: 10.924280962629622, LR: 0.0003 +[2026-03-04 11:33:30] (step=0055835) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.924476619056936, LR: 0.0003 +[2026-03-04 11:33:37] (step=0055836) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 10.92467227548425, LR: 0.0003 +[2026-03-04 11:33:45] (step=0055837) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.924867931911564, LR: 0.0003 +[2026-03-04 11:33:53] (step=0055838) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 10.925063588338878, LR: 0.0003 +[2026-03-04 11:34:01] (step=0055839) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.92525924476619, LR: 0.0003 +[2026-03-04 11:34:09] (step=0055840) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 10.925454901193504, LR: 0.0003 +[2026-03-04 11:34:17] (step=0055841) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 10.925650557620818, LR: 0.0003 +[2026-03-04 11:34:25] (step=0055842) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 10.925846214048132, LR: 0.0003 +[2026-03-04 11:34:32] (step=0055843) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 10.926041870475444, LR: 0.0003 +[2026-03-04 11:34:40] (step=0055844) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 10.926237526902758, LR: 0.0003 +[2026-03-04 11:34:48] (step=0055845) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 10.926433183330072, LR: 0.0003 +[2026-03-04 11:34:56] (step=0055846) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.926628839757386, LR: 0.0003 +[2026-03-04 11:35:04] (step=0055847) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.9268244961847, LR: 0.0003 +[2026-03-04 11:35:12] (step=0055848) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.927020152612013, LR: 0.0003 +[2026-03-04 11:35:20] (step=0055849) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 10.927215809039327, LR: 0.0003 +[2026-03-04 11:35:27] (step=0055850) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.92741146546664, LR: 0.0003 +[2026-03-04 11:35:35] (step=0055851) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.927607121893955, LR: 0.0003 +[2026-03-04 11:35:43] (step=0055852) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.927802778321269, LR: 0.0003 +[2026-03-04 11:35:51] (step=0055853) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.92799843474858, LR: 0.0003 +[2026-03-04 11:35:59] (step=0055854) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.928194091175895, LR: 0.0003 +[2026-03-04 11:36:07] (step=0055855) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.928389747603209, LR: 0.0003 +[2026-03-04 11:36:15] (step=0055856) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.928585404030523, LR: 0.0003 +[2026-03-04 11:36:23] (step=0055857) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.928781060457837, LR: 0.0003 +[2026-03-04 11:36:30] (step=0055858) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 10.928976716885149, LR: 0.0003 +[2026-03-04 11:36:38] (step=0055859) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.929172373312463, LR: 0.0003 +[2026-03-04 11:36:46] (step=0055860) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 10.929368029739777, LR: 0.0003 +[2026-03-04 11:36:54] (step=0055861) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.929563686167091, LR: 0.0003 +[2026-03-04 11:37:02] (step=0055862) Train Loss: 0.4414, Train Steps/Sec: 0.12, Epoch: 10.929759342594405, LR: 0.0003 +[2026-03-04 11:37:10] (step=0055863) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.929954999021717, LR: 0.0003 +[2026-03-04 11:37:18] (step=0055864) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.930150655449031, LR: 0.0003 +[2026-03-04 11:37:26] (step=0055865) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 10.930346311876345, LR: 0.0003 +[2026-03-04 11:37:33] (step=0055866) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 10.93054196830366, LR: 0.0003 +[2026-03-04 11:37:41] (step=0055867) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 10.930737624730972, LR: 0.0003 +[2026-03-04 11:37:49] (step=0055868) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 10.930933281158286, LR: 0.0003 +[2026-03-04 11:37:57] (step=0055869) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.9311289375856, LR: 0.0003 +[2026-03-04 11:38:05] (step=0055870) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 10.931324594012914, LR: 0.0003 +[2026-03-04 11:38:13] (step=0055871) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 10.931520250440228, LR: 0.0003 +[2026-03-04 11:38:21] (step=0055872) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 10.93171590686754, LR: 0.0003 +[2026-03-04 11:38:28] (step=0055873) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 10.931911563294854, LR: 0.0003 +[2026-03-04 11:38:36] (step=0055874) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.932107219722168, LR: 0.0003 +[2026-03-04 11:38:44] (step=0055875) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.932302876149482, LR: 0.0003 +[2026-03-04 11:38:52] (step=0055876) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 10.932498532576796, LR: 0.0003 +[2026-03-04 11:39:00] (step=0055877) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.932694189004108, LR: 0.0003 +[2026-03-04 11:39:08] (step=0055878) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.932889845431422, LR: 0.0003 +[2026-03-04 11:39:16] (step=0055879) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.933085501858736, LR: 0.0003 +[2026-03-04 11:39:23] (step=0055880) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 10.93328115828605, LR: 0.0003 +[2026-03-04 11:39:31] (step=0055881) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.933476814713364, LR: 0.0003 +[2026-03-04 11:39:39] (step=0055882) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.933672471140676, LR: 0.0003 +[2026-03-04 11:39:47] (step=0055883) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.93386812756799, LR: 0.0003 +[2026-03-04 11:39:55] (step=0055884) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 10.934063783995304, LR: 0.0003 +[2026-03-04 11:40:03] (step=0055885) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.934259440422618, LR: 0.0003 +[2026-03-04 11:40:11] (step=0055886) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.934455096849932, LR: 0.0003 +[2026-03-04 11:40:19] (step=0055887) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.934650753277245, LR: 0.0003 +[2026-03-04 11:40:26] (step=0055888) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 10.934846409704559, LR: 0.0003 +[2026-03-04 11:40:34] (step=0055889) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.935042066131873, LR: 0.0003 +[2026-03-04 11:40:42] (step=0055890) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.935237722559187, LR: 0.0003 +[2026-03-04 11:40:50] (step=0055891) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.9354333789865, LR: 0.0003 +[2026-03-04 11:40:58] (step=0055892) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.935629035413813, LR: 0.0003 +[2026-03-04 11:41:06] (step=0055893) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.935824691841127, LR: 0.0003 +[2026-03-04 11:41:14] (step=0055894) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.93602034826844, LR: 0.0003 +[2026-03-04 11:41:21] (step=0055895) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 10.936216004695755, LR: 0.0003 +[2026-03-04 11:41:29] (step=0055896) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.936411661123067, LR: 0.0003 +[2026-03-04 11:41:37] (step=0055897) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.936607317550381, LR: 0.0003 +[2026-03-04 11:41:45] (step=0055898) Train Loss: 0.4266, Train Steps/Sec: 0.13, Epoch: 10.936802973977695, LR: 0.0003 +[2026-03-04 11:41:53] (step=0055899) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.936998630405009, LR: 0.0003 +[2026-03-04 11:42:01] (step=0055900) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.937194286832323, LR: 0.0003 +[2026-03-04 11:42:09] (step=0055901) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.937389943259635, LR: 0.0003 +[2026-03-04 11:42:16] (step=0055902) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 10.93758559968695, LR: 0.0003 +[2026-03-04 11:42:24] (step=0055903) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 10.937781256114263, LR: 0.0003 +[2026-03-04 11:42:32] (step=0055904) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 10.937976912541577, LR: 0.0003 +[2026-03-04 11:42:40] (step=0055905) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.938172568968891, LR: 0.0003 +[2026-03-04 11:42:48] (step=0055906) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 10.938368225396204, LR: 0.0003 +[2026-03-04 11:42:56] (step=0055907) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 10.938563881823518, LR: 0.0003 +[2026-03-04 11:43:04] (step=0055908) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.938759538250832, LR: 0.0003 +[2026-03-04 11:43:12] (step=0055909) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.938955194678146, LR: 0.0003 +[2026-03-04 11:43:19] (step=0055910) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.93915085110546, LR: 0.0003 +[2026-03-04 11:43:27] (step=0055911) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 10.939346507532772, LR: 0.0003 +[2026-03-04 11:43:35] (step=0055912) Train Loss: 0.4370, Train Steps/Sec: 0.12, Epoch: 10.939542163960086, LR: 0.0003 +[2026-03-04 11:43:43] (step=0055913) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.9397378203874, LR: 0.0003 +[2026-03-04 11:43:51] (step=0055914) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 10.939933476814714, LR: 0.0003 +[2026-03-04 11:43:59] (step=0055915) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.940129133242028, LR: 0.0003 +[2026-03-04 11:44:07] (step=0055916) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.94032478966934, LR: 0.0003 +[2026-03-04 11:44:15] (step=0055917) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.940520446096654, LR: 0.0003 +[2026-03-04 11:44:23] (step=0055918) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 10.940716102523968, LR: 0.0003 +[2026-03-04 11:44:30] (step=0055919) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 10.940911758951282, LR: 0.0003 +[2026-03-04 11:44:38] (step=0055920) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.941107415378594, LR: 0.0003 +[2026-03-04 11:44:46] (step=0055921) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.941303071805908, LR: 0.0003 +[2026-03-04 11:44:54] (step=0055922) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 10.941498728233222, LR: 0.0003 +[2026-03-04 11:45:02] (step=0055923) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.941694384660536, LR: 0.0003 +[2026-03-04 11:45:10] (step=0055924) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 10.94189004108785, LR: 0.0003 +[2026-03-04 11:45:18] (step=0055925) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.942085697515163, LR: 0.0003 +[2026-03-04 11:45:25] (step=0055926) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.942281353942477, LR: 0.0003 +[2026-03-04 11:45:33] (step=0055927) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 10.94247701036979, LR: 0.0003 +[2026-03-04 11:45:41] (step=0055928) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 10.942672666797105, LR: 0.0003 +[2026-03-04 11:45:49] (step=0055929) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.942868323224419, LR: 0.0003 +[2026-03-04 11:45:57] (step=0055930) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.94306397965173, LR: 0.0003 +[2026-03-04 11:46:05] (step=0055931) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 10.943259636079045, LR: 0.0003 +[2026-03-04 11:46:13] (step=0055932) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.943455292506359, LR: 0.0003 +[2026-03-04 11:46:21] (step=0055933) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.943650948933673, LR: 0.0003 +[2026-03-04 11:46:28] (step=0055934) Train Loss: 0.4235, Train Steps/Sec: 0.13, Epoch: 10.943846605360987, LR: 0.0003 +[2026-03-04 11:46:36] (step=0055935) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.944042261788299, LR: 0.0003 +[2026-03-04 11:46:44] (step=0055936) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 10.944237918215613, LR: 0.0003 +[2026-03-04 11:46:52] (step=0055937) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 10.944433574642927, LR: 0.0003 +[2026-03-04 11:47:00] (step=0055938) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 10.944629231070241, LR: 0.0003 +[2026-03-04 11:47:08] (step=0055939) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 10.944824887497555, LR: 0.0003 +[2026-03-04 11:47:16] (step=0055940) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.945020543924867, LR: 0.0003 +[2026-03-04 11:47:24] (step=0055941) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 10.945216200352181, LR: 0.0003 +[2026-03-04 11:47:31] (step=0055942) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.945411856779495, LR: 0.0003 +[2026-03-04 11:47:39] (step=0055943) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.94560751320681, LR: 0.0003 +[2026-03-04 11:47:47] (step=0055944) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.945803169634123, LR: 0.0003 +[2026-03-04 11:47:55] (step=0055945) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 10.945998826061436, LR: 0.0003 +[2026-03-04 11:48:03] (step=0055946) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 10.94619448248875, LR: 0.0003 +[2026-03-04 11:48:11] (step=0055947) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 10.946390138916064, LR: 0.0003 +[2026-03-04 11:48:19] (step=0055948) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.946585795343378, LR: 0.0003 +[2026-03-04 11:48:26] (step=0055949) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.94678145177069, LR: 0.0003 +[2026-03-04 11:48:34] (step=0055950) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.946977108198004, LR: 0.0003 +[2026-03-04 11:48:42] (step=0055951) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.947172764625318, LR: 0.0003 +[2026-03-04 11:48:50] (step=0055952) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 10.947368421052632, LR: 0.0003 +[2026-03-04 11:48:58] (step=0055953) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 10.947564077479946, LR: 0.0003 +[2026-03-04 11:49:06] (step=0055954) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 10.947759733907258, LR: 0.0003 +[2026-03-04 11:49:14] (step=0055955) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.947955390334572, LR: 0.0003 +[2026-03-04 11:49:21] (step=0055956) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.948151046761886, LR: 0.0003 +[2026-03-04 11:49:29] (step=0055957) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 10.9483467031892, LR: 0.0003 +[2026-03-04 11:49:37] (step=0055958) Train Loss: 0.4487, Train Steps/Sec: 0.12, Epoch: 10.948542359616514, LR: 0.0003 +[2026-03-04 11:49:45] (step=0055959) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.948738016043826, LR: 0.0003 +[2026-03-04 11:49:53] (step=0055960) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.94893367247114, LR: 0.0003 +[2026-03-04 11:50:01] (step=0055961) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.949129328898454, LR: 0.0003 +[2026-03-04 11:50:09] (step=0055962) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 10.949324985325768, LR: 0.0003 +[2026-03-04 11:50:17] (step=0055963) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 10.949520641753082, LR: 0.0003 +[2026-03-04 11:50:24] (step=0055964) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.949716298180395, LR: 0.0003 +[2026-03-04 11:50:32] (step=0055965) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.949911954607709, LR: 0.0003 +[2026-03-04 11:50:40] (step=0055966) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 10.950107611035023, LR: 0.0003 +[2026-03-04 11:50:48] (step=0055967) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.950303267462337, LR: 0.0003 +[2026-03-04 11:50:56] (step=0055968) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 10.95049892388965, LR: 0.0003 +[2026-03-04 11:51:04] (step=0055969) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 10.950694580316963, LR: 0.0003 +[2026-03-04 11:51:12] (step=0055970) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 10.950890236744277, LR: 0.0003 +[2026-03-04 11:51:20] (step=0055971) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 10.95108589317159, LR: 0.0003 +[2026-03-04 11:51:27] (step=0055972) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.951281549598905, LR: 0.0003 +[2026-03-04 11:51:35] (step=0055973) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.951477206026217, LR: 0.0003 +[2026-03-04 11:51:43] (step=0055974) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.951672862453531, LR: 0.0003 +[2026-03-04 11:51:51] (step=0055975) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 10.951868518880845, LR: 0.0003 +[2026-03-04 11:51:59] (step=0055976) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 10.95206417530816, LR: 0.0003 +[2026-03-04 11:52:07] (step=0055977) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.952259831735473, LR: 0.0003 +[2026-03-04 11:52:15] (step=0055978) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 10.952455488162785, LR: 0.0003 +[2026-03-04 11:52:22] (step=0055979) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 10.9526511445901, LR: 0.0003 +[2026-03-04 11:52:30] (step=0055980) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.952846801017413, LR: 0.0003 +[2026-03-04 11:52:38] (step=0055981) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 10.953042457444727, LR: 0.0003 +[2026-03-04 11:52:46] (step=0055982) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.953238113872041, LR: 0.0003 +[2026-03-04 11:52:54] (step=0055983) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 10.953433770299354, LR: 0.0003 +[2026-03-04 11:53:02] (step=0055984) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.953629426726668, LR: 0.0003 +[2026-03-04 11:53:10] (step=0055985) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 10.953825083153982, LR: 0.0003 +[2026-03-04 11:53:17] (step=0055986) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 10.954020739581296, LR: 0.0003 +[2026-03-04 11:53:25] (step=0055987) Train Loss: 0.4430, Train Steps/Sec: 0.12, Epoch: 10.95421639600861, LR: 0.0003 +[2026-03-04 11:53:33] (step=0055988) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 10.954412052435922, LR: 0.0003 +[2026-03-04 11:53:41] (step=0055989) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.954607708863236, LR: 0.0003 +[2026-03-04 11:53:49] (step=0055990) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.95480336529055, LR: 0.0003 +[2026-03-04 11:53:57] (step=0055991) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 10.954999021717864, LR: 0.0003 +[2026-03-04 11:54:05] (step=0055992) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.955194678145178, LR: 0.0003 +[2026-03-04 11:54:13] (step=0055993) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.95539033457249, LR: 0.0003 +[2026-03-04 11:54:21] (step=0055994) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 10.955585990999804, LR: 0.0003 +[2026-03-04 11:54:28] (step=0055995) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 10.955781647427118, LR: 0.0003 +[2026-03-04 11:54:36] (step=0055996) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.955977303854432, LR: 0.0003 +[2026-03-04 11:54:44] (step=0055997) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.956172960281746, LR: 0.0003 +[2026-03-04 11:54:52] (step=0055998) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 10.956368616709058, LR: 0.0003 +[2026-03-04 11:55:00] (step=0055999) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.956564273136372, LR: 0.0003 +[2026-03-04 11:55:08] (step=0056000) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.956759929563686, LR: 0.0003 +[2026-03-04 11:55:08] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0056000/ +[2026-03-04 11:55:16] (step=0056001) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 10.956955585991, LR: 0.0003 +[2026-03-04 11:55:23] (step=0056002) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 10.957151242418313, LR: 0.0003 +[2026-03-04 11:55:31] (step=0056003) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.957346898845627, LR: 0.0003 +[2026-03-04 11:55:39] (step=0056004) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.95754255527294, LR: 0.0003 +[2026-03-04 11:55:47] (step=0056005) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.957738211700255, LR: 0.0003 +[2026-03-04 11:55:55] (step=0056006) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.957933868127569, LR: 0.0003 +[2026-03-04 11:56:03] (step=0056007) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 10.95812952455488, LR: 0.0003 +[2026-03-04 11:56:11] (step=0056008) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.958325180982195, LR: 0.0003 +[2026-03-04 11:56:19] (step=0056009) Train Loss: 0.4393, Train Steps/Sec: 0.12, Epoch: 10.958520837409509, LR: 0.0003 +[2026-03-04 11:56:27] (step=0056010) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.958716493836823, LR: 0.0003 +[2026-03-04 11:56:34] (step=0056011) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.958912150264137, LR: 0.0003 +[2026-03-04 11:56:42] (step=0056012) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 10.95910780669145, LR: 0.0003 +[2026-03-04 11:56:50] (step=0056013) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 10.959303463118763, LR: 0.0003 +[2026-03-04 11:56:58] (step=0056014) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.959499119546077, LR: 0.0003 +[2026-03-04 11:57:06] (step=0056015) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.959694775973391, LR: 0.0003 +[2026-03-04 11:57:14] (step=0056016) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 10.959890432400705, LR: 0.0003 +[2026-03-04 11:57:22] (step=0056017) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.960086088828017, LR: 0.0003 +[2026-03-04 11:57:30] (step=0056018) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 10.960281745255331, LR: 0.0003 +[2026-03-04 11:57:37] (step=0056019) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 10.960477401682645, LR: 0.0003 +[2026-03-04 11:57:45] (step=0056020) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.96067305810996, LR: 0.0003 +[2026-03-04 11:57:53] (step=0056021) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.960868714537273, LR: 0.0003 +[2026-03-04 11:58:01] (step=0056022) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 10.961064370964586, LR: 0.0003 +[2026-03-04 11:58:09] (step=0056023) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 10.9612600273919, LR: 0.0003 +[2026-03-04 11:58:17] (step=0056024) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 10.961455683819214, LR: 0.0003 +[2026-03-04 11:58:24] (step=0056025) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 10.961651340246528, LR: 0.0003 +[2026-03-04 11:58:32] (step=0056026) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 10.96184699667384, LR: 0.0003 +[2026-03-04 11:58:40] (step=0056027) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.962042653101154, LR: 0.0003 +[2026-03-04 11:58:48] (step=0056028) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 10.962238309528468, LR: 0.0003 +[2026-03-04 11:58:56] (step=0056029) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 10.962433965955782, LR: 0.0003 +[2026-03-04 11:59:04] (step=0056030) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 10.962629622383096, LR: 0.0003 +[2026-03-04 11:59:12] (step=0056031) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 10.962825278810408, LR: 0.0003 +[2026-03-04 11:59:20] (step=0056032) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.963020935237722, LR: 0.0003 +[2026-03-04 11:59:27] (step=0056033) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 10.963216591665036, LR: 0.0003 +[2026-03-04 11:59:35] (step=0056034) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.96341224809235, LR: 0.0003 +[2026-03-04 11:59:43] (step=0056035) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.963607904519664, LR: 0.0003 +[2026-03-04 11:59:51] (step=0056036) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.963803560946976, LR: 0.0003 +[2026-03-04 11:59:59] (step=0056037) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 10.96399921737429, LR: 0.0003 +[2026-03-04 12:00:07] (step=0056038) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 10.964194873801604, LR: 0.0003 +[2026-03-04 12:00:15] (step=0056039) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 10.964390530228918, LR: 0.0003 +[2026-03-04 12:00:23] (step=0056040) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 10.964586186656232, LR: 0.0003 +[2026-03-04 12:00:30] (step=0056041) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 10.964781843083545, LR: 0.0003 +[2026-03-04 12:00:38] (step=0056042) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 10.964977499510859, LR: 0.0003 +[2026-03-04 12:00:46] (step=0056043) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 10.965173155938173, LR: 0.0003 +[2026-03-04 12:00:54] (step=0056044) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 10.965368812365487, LR: 0.0003 +[2026-03-04 12:01:02] (step=0056045) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 10.9655644687928, LR: 0.0003 +[2026-03-04 12:01:10] (step=0056046) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 10.965760125220113, LR: 0.0003 +[2026-03-04 12:01:18] (step=0056047) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.965955781647427, LR: 0.0003 +[2026-03-04 12:01:25] (step=0056048) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 10.966151438074741, LR: 0.0003 +[2026-03-04 12:01:33] (step=0056049) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.966347094502055, LR: 0.0003 +[2026-03-04 12:01:41] (step=0056050) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 10.966542750929367, LR: 0.0003 +[2026-03-04 12:01:49] (step=0056051) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.966738407356681, LR: 0.0003 +[2026-03-04 12:01:57] (step=0056052) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 10.966934063783995, LR: 0.0003 +[2026-03-04 12:02:05] (step=0056053) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.96712972021131, LR: 0.0003 +[2026-03-04 12:02:13] (step=0056054) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 10.967325376638623, LR: 0.0003 +[2026-03-04 12:02:21] (step=0056055) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.967521033065935, LR: 0.0003 +[2026-03-04 12:02:28] (step=0056056) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.96771668949325, LR: 0.0003 +[2026-03-04 12:02:36] (step=0056057) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.967912345920563, LR: 0.0003 +[2026-03-04 12:02:44] (step=0056058) Train Loss: 0.4218, Train Steps/Sec: 0.13, Epoch: 10.968108002347877, LR: 0.0003 +[2026-03-04 12:02:52] (step=0056059) Train Loss: 0.4375, Train Steps/Sec: 0.12, Epoch: 10.968303658775191, LR: 0.0003 +[2026-03-04 12:03:00] (step=0056060) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 10.968499315202504, LR: 0.0003 +[2026-03-04 12:03:08] (step=0056061) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.968694971629818, LR: 0.0003 +[2026-03-04 12:03:16] (step=0056062) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 10.968890628057132, LR: 0.0003 +[2026-03-04 12:03:24] (step=0056063) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.969086284484446, LR: 0.0003 +[2026-03-04 12:03:31] (step=0056064) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.96928194091176, LR: 0.0003 +[2026-03-04 12:03:39] (step=0056065) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.969477597339072, LR: 0.0003 +[2026-03-04 12:03:47] (step=0056066) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.969673253766386, LR: 0.0003 +[2026-03-04 12:03:55] (step=0056067) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.9698689101937, LR: 0.0003 +[2026-03-04 12:04:03] (step=0056068) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.970064566621014, LR: 0.0003 +[2026-03-04 12:04:11] (step=0056069) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 10.970260223048328, LR: 0.0003 +[2026-03-04 12:04:19] (step=0056070) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 10.97045587947564, LR: 0.0003 +[2026-03-04 12:04:26] (step=0056071) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 10.970651535902954, LR: 0.0003 +[2026-03-04 12:04:34] (step=0056072) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 10.970847192330268, LR: 0.0003 +[2026-03-04 12:04:42] (step=0056073) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 10.971042848757582, LR: 0.0003 +[2026-03-04 12:04:50] (step=0056074) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.971238505184896, LR: 0.0003 +[2026-03-04 12:04:58] (step=0056075) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.971434161612208, LR: 0.0003 +[2026-03-04 12:05:06] (step=0056076) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 10.971629818039522, LR: 0.0003 +[2026-03-04 12:05:14] (step=0056077) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 10.971825474466836, LR: 0.0003 +[2026-03-04 12:05:22] (step=0056078) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.97202113089415, LR: 0.0003 +[2026-03-04 12:05:29] (step=0056079) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 10.972216787321463, LR: 0.0003 +[2026-03-04 12:05:37] (step=0056080) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 10.972412443748777, LR: 0.0003 +[2026-03-04 12:05:45] (step=0056081) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.97260810017609, LR: 0.0003 +[2026-03-04 12:05:53] (step=0056082) Train Loss: 0.4461, Train Steps/Sec: 0.12, Epoch: 10.972803756603405, LR: 0.0003 +[2026-03-04 12:06:01] (step=0056083) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 10.972999413030719, LR: 0.0003 +[2026-03-04 12:06:09] (step=0056084) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 10.973195069458031, LR: 0.0003 +[2026-03-04 12:06:17] (step=0056085) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.973390725885345, LR: 0.0003 +[2026-03-04 12:06:25] (step=0056086) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 10.973586382312659, LR: 0.0003 +[2026-03-04 12:06:33] (step=0056087) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 10.973782038739973, LR: 0.0003 +[2026-03-04 12:06:40] (step=0056088) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 10.973977695167287, LR: 0.0003 +[2026-03-04 12:06:48] (step=0056089) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 10.9741733515946, LR: 0.0003 +[2026-03-04 12:06:56] (step=0056090) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.974369008021913, LR: 0.0003 +[2026-03-04 12:07:04] (step=0056091) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.974564664449227, LR: 0.0003 +[2026-03-04 12:07:12] (step=0056092) Train Loss: 0.4250, Train Steps/Sec: 0.13, Epoch: 10.974760320876541, LR: 0.0003 +[2026-03-04 12:07:20] (step=0056093) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.974955977303855, LR: 0.0003 +[2026-03-04 12:07:28] (step=0056094) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 10.975151633731167, LR: 0.0003 +[2026-03-04 12:07:35] (step=0056095) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.975347290158481, LR: 0.0003 +[2026-03-04 12:07:43] (step=0056096) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 10.975542946585795, LR: 0.0003 +[2026-03-04 12:07:51] (step=0056097) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.97573860301311, LR: 0.0003 +[2026-03-04 12:07:59] (step=0056098) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.975934259440423, LR: 0.0003 +[2026-03-04 12:08:07] (step=0056099) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 10.976129915867736, LR: 0.0003 +[2026-03-04 12:08:15] (step=0056100) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 10.97632557229505, LR: 0.0003 +[2026-03-04 12:08:23] (step=0056101) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 10.976521228722364, LR: 0.0003 +[2026-03-04 12:08:30] (step=0056102) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 10.976716885149678, LR: 0.0003 +[2026-03-04 12:08:38] (step=0056103) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 10.97691254157699, LR: 0.0003 +[2026-03-04 12:08:46] (step=0056104) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 10.977108198004304, LR: 0.0003 +[2026-03-04 12:08:54] (step=0056105) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 10.977303854431618, LR: 0.0003 +[2026-03-04 12:09:02] (step=0056106) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.977499510858932, LR: 0.0003 +[2026-03-04 12:09:10] (step=0056107) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.977695167286246, LR: 0.0003 +[2026-03-04 12:09:18] (step=0056108) Train Loss: 0.4470, Train Steps/Sec: 0.12, Epoch: 10.977890823713558, LR: 0.0003 +[2026-03-04 12:09:26] (step=0056109) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 10.978086480140872, LR: 0.0003 +[2026-03-04 12:09:33] (step=0056110) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 10.978282136568186, LR: 0.0003 +[2026-03-04 12:09:41] (step=0056111) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 10.9784777929955, LR: 0.0003 +[2026-03-04 12:09:49] (step=0056112) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 10.978673449422814, LR: 0.0003 +[2026-03-04 12:09:57] (step=0056113) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 10.978869105850126, LR: 0.0003 +[2026-03-04 12:10:05] (step=0056114) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 10.97906476227744, LR: 0.0003 +[2026-03-04 12:10:13] (step=0056115) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.979260418704754, LR: 0.0003 +[2026-03-04 12:10:21] (step=0056116) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 10.979456075132068, LR: 0.0003 +[2026-03-04 12:10:28] (step=0056117) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 10.979651731559382, LR: 0.0003 +[2026-03-04 12:10:36] (step=0056118) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 10.979847387986695, LR: 0.0003 +[2026-03-04 12:10:44] (step=0056119) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 10.980043044414009, LR: 0.0003 +[2026-03-04 12:10:52] (step=0056120) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 10.980238700841323, LR: 0.0003 +[2026-03-04 12:11:00] (step=0056121) Train Loss: 0.4265, Train Steps/Sec: 0.13, Epoch: 10.980434357268637, LR: 0.0003 +[2026-03-04 12:11:08] (step=0056122) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 10.98063001369595, LR: 0.0003 +[2026-03-04 12:11:16] (step=0056123) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 10.980825670123263, LR: 0.0003 +[2026-03-04 12:11:23] (step=0056124) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 10.981021326550577, LR: 0.0003 +[2026-03-04 12:11:31] (step=0056125) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.981216982977891, LR: 0.0003 +[2026-03-04 12:11:39] (step=0056126) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.981412639405205, LR: 0.0003 +[2026-03-04 12:11:47] (step=0056127) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 10.981608295832519, LR: 0.0003 +[2026-03-04 12:11:55] (step=0056128) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.981803952259831, LR: 0.0003 +[2026-03-04 12:12:03] (step=0056129) Train Loss: 0.4520, Train Steps/Sec: 0.12, Epoch: 10.981999608687145, LR: 0.0003 +[2026-03-04 12:12:11] (step=0056130) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.98219526511446, LR: 0.0003 +[2026-03-04 12:12:19] (step=0056131) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 10.982390921541773, LR: 0.0003 +[2026-03-04 12:12:27] (step=0056132) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.982586577969085, LR: 0.0003 +[2026-03-04 12:12:34] (step=0056133) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.9827822343964, LR: 0.0003 +[2026-03-04 12:12:42] (step=0056134) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 10.982977890823713, LR: 0.0003 +[2026-03-04 12:12:50] (step=0056135) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.983173547251027, LR: 0.0003 +[2026-03-04 12:12:58] (step=0056136) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 10.983369203678341, LR: 0.0003 +[2026-03-04 12:13:06] (step=0056137) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 10.983564860105654, LR: 0.0003 +[2026-03-04 12:13:14] (step=0056138) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 10.983760516532968, LR: 0.0003 +[2026-03-04 12:13:22] (step=0056139) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 10.983956172960282, LR: 0.0003 +[2026-03-04 12:13:29] (step=0056140) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 10.984151829387596, LR: 0.0003 +[2026-03-04 12:13:37] (step=0056141) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 10.98434748581491, LR: 0.0003 +[2026-03-04 12:13:45] (step=0056142) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 10.984543142242222, LR: 0.0003 +[2026-03-04 12:13:53] (step=0056143) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 10.984738798669536, LR: 0.0003 +[2026-03-04 12:14:01] (step=0056144) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 10.98493445509685, LR: 0.0003 +[2026-03-04 12:14:09] (step=0056145) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 10.985130111524164, LR: 0.0003 +[2026-03-04 12:14:17] (step=0056146) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.985325767951478, LR: 0.0003 +[2026-03-04 12:14:24] (step=0056147) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 10.98552142437879, LR: 0.0003 +[2026-03-04 12:14:32] (step=0056148) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.985717080806104, LR: 0.0003 +[2026-03-04 12:14:40] (step=0056149) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 10.985912737233418, LR: 0.0003 +[2026-03-04 12:14:48] (step=0056150) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 10.986108393660732, LR: 0.0003 +[2026-03-04 12:14:56] (step=0056151) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 10.986304050088046, LR: 0.0003 +[2026-03-04 12:15:04] (step=0056152) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 10.986499706515358, LR: 0.0003 +[2026-03-04 12:15:12] (step=0056153) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 10.986695362942672, LR: 0.0003 +[2026-03-04 12:15:19] (step=0056154) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 10.986891019369986, LR: 0.0003 +[2026-03-04 12:15:27] (step=0056155) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 10.9870866757973, LR: 0.0003 +[2026-03-04 12:15:35] (step=0056156) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 10.987282332224613, LR: 0.0003 +[2026-03-04 12:15:43] (step=0056157) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 10.987477988651927, LR: 0.0003 +[2026-03-04 12:15:51] (step=0056158) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.98767364507924, LR: 0.0003 +[2026-03-04 12:15:59] (step=0056159) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 10.987869301506555, LR: 0.0003 +[2026-03-04 12:16:06] (step=0056160) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 10.988064957933869, LR: 0.0003 +[2026-03-04 12:16:14] (step=0056161) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 10.988260614361181, LR: 0.0003 +[2026-03-04 12:16:22] (step=0056162) Train Loss: 0.4524, Train Steps/Sec: 0.12, Epoch: 10.988456270788495, LR: 0.0003 +[2026-03-04 12:16:30] (step=0056163) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 10.988651927215809, LR: 0.0003 +[2026-03-04 12:16:38] (step=0056164) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 10.988847583643123, LR: 0.0003 +[2026-03-04 12:16:46] (step=0056165) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 10.989043240070437, LR: 0.0003 +[2026-03-04 12:16:54] (step=0056166) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 10.98923889649775, LR: 0.0003 +[2026-03-04 12:17:02] (step=0056167) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 10.989434552925063, LR: 0.0003 +[2026-03-04 12:17:10] (step=0056168) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 10.989630209352377, LR: 0.0003 +[2026-03-04 12:17:17] (step=0056169) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 10.989825865779691, LR: 0.0003 +[2026-03-04 12:17:25] (step=0056170) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 10.990021522207005, LR: 0.0003 +[2026-03-04 12:17:33] (step=0056171) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.990217178634317, LR: 0.0003 +[2026-03-04 12:17:41] (step=0056172) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.990412835061631, LR: 0.0003 +[2026-03-04 12:17:49] (step=0056173) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 10.990608491488945, LR: 0.0003 +[2026-03-04 12:17:57] (step=0056174) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 10.99080414791626, LR: 0.0003 +[2026-03-04 12:18:05] (step=0056175) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 10.990999804343573, LR: 0.0003 +[2026-03-04 12:18:12] (step=0056176) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 10.991195460770886, LR: 0.0003 +[2026-03-04 12:18:20] (step=0056177) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.9913911171982, LR: 0.0003 +[2026-03-04 12:18:28] (step=0056178) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 10.991586773625514, LR: 0.0003 +[2026-03-04 12:18:36] (step=0056179) Train Loss: 0.4459, Train Steps/Sec: 0.12, Epoch: 10.991782430052828, LR: 0.0003 +[2026-03-04 12:18:44] (step=0056180) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 10.991978086480142, LR: 0.0003 +[2026-03-04 12:18:52] (step=0056181) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.992173742907454, LR: 0.0003 +[2026-03-04 12:19:00] (step=0056182) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.992369399334768, LR: 0.0003 +[2026-03-04 12:19:08] (step=0056183) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 10.992565055762082, LR: 0.0003 +[2026-03-04 12:19:16] (step=0056184) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 10.992760712189396, LR: 0.0003 +[2026-03-04 12:19:23] (step=0056185) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 10.992956368616708, LR: 0.0003 +[2026-03-04 12:19:31] (step=0056186) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 10.993152025044022, LR: 0.0003 +[2026-03-04 12:19:39] (step=0056187) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 10.993347681471336, LR: 0.0003 +[2026-03-04 12:19:47] (step=0056188) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 10.99354333789865, LR: 0.0003 +[2026-03-04 12:19:55] (step=0056189) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 10.993738994325964, LR: 0.0003 +[2026-03-04 12:20:03] (step=0056190) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 10.993934650753276, LR: 0.0003 +[2026-03-04 12:20:11] (step=0056191) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 10.99413030718059, LR: 0.0003 +[2026-03-04 12:20:18] (step=0056192) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 10.994325963607904, LR: 0.0003 +[2026-03-04 12:20:26] (step=0056193) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 10.994521620035218, LR: 0.0003 +[2026-03-04 12:20:34] (step=0056194) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 10.994717276462532, LR: 0.0003 +[2026-03-04 12:20:42] (step=0056195) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 10.994912932889845, LR: 0.0003 +[2026-03-04 12:20:50] (step=0056196) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 10.995108589317159, LR: 0.0003 +[2026-03-04 12:20:58] (step=0056197) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 10.995304245744473, LR: 0.0003 +[2026-03-04 12:21:06] (step=0056198) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 10.995499902171787, LR: 0.0003 +[2026-03-04 12:21:13] (step=0056199) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 10.9956955585991, LR: 0.0003 +[2026-03-04 12:21:21] (step=0056200) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 10.995891215026413, LR: 0.0003 +[2026-03-04 12:21:29] (step=0056201) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 10.996086871453727, LR: 0.0003 +[2026-03-04 12:21:37] (step=0056202) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 10.996282527881041, LR: 0.0003 +[2026-03-04 12:21:45] (step=0056203) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 10.996478184308355, LR: 0.0003 +[2026-03-04 12:21:53] (step=0056204) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 10.996673840735669, LR: 0.0003 +[2026-03-04 12:22:01] (step=0056205) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 10.996869497162981, LR: 0.0003 +[2026-03-04 12:22:09] (step=0056206) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 10.997065153590295, LR: 0.0003 +[2026-03-04 12:22:16] (step=0056207) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 10.99726081001761, LR: 0.0003 +[2026-03-04 12:22:24] (step=0056208) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 10.997456466444923, LR: 0.0003 +[2026-03-04 12:22:32] (step=0056209) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 10.997652122872235, LR: 0.0003 +[2026-03-04 12:22:40] (step=0056210) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 10.99784777929955, LR: 0.0003 +[2026-03-04 12:22:48] (step=0056211) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 10.998043435726863, LR: 0.0003 +[2026-03-04 12:22:56] (step=0056212) Train Loss: 0.4387, Train Steps/Sec: 0.12, Epoch: 10.998239092154177, LR: 0.0003 +[2026-03-04 12:23:04] (step=0056213) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 10.998434748581492, LR: 0.0003 +[2026-03-04 12:23:12] (step=0056214) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 10.998630405008804, LR: 0.0003 +[2026-03-04 12:23:20] (step=0056215) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 10.998826061436118, LR: 0.0003 +[2026-03-04 12:23:27] (step=0056216) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 10.999021717863432, LR: 0.0003 +[2026-03-04 12:23:35] (step=0056217) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 10.999217374290746, LR: 0.0003 +[2026-03-04 12:23:43] (step=0056218) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 10.99941303071806, LR: 0.0003 +[2026-03-04 12:23:51] (step=0056219) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 10.999608687145372, LR: 0.0003 +[2026-03-04 12:23:59] (step=0056220) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 10.999804343572686, LR: 0.0003 +[2026-03-04 12:24:07] (step=0056221) Train Loss: 0.4337, Train Steps/Sec: 0.12, Epoch: 11.0, LR: 0.0003 +[2026-03-04 12:24:07] Beginning epoch 11... +[2026-03-04 12:24:17] (step=0056222) Train Loss: 0.4342, Train Steps/Sec: 0.10, Epoch: 11.000195656427314, LR: 0.0003 +[2026-03-04 12:24:24] (step=0056223) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 11.000391312854628, LR: 0.0003 +[2026-03-04 12:24:32] (step=0056224) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.00058696928194, LR: 0.0003 +[2026-03-04 12:24:40] (step=0056225) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.000782625709254, LR: 0.0003 +[2026-03-04 12:24:48] (step=0056226) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 11.000978282136568, LR: 0.0003 +[2026-03-04 12:24:56] (step=0056227) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 11.001173938563882, LR: 0.0003 +[2026-03-04 12:25:04] (step=0056228) Train Loss: 0.4393, Train Steps/Sec: 0.12, Epoch: 11.001369594991196, LR: 0.0003 +[2026-03-04 12:25:12] (step=0056229) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 11.001565251418508, LR: 0.0003 +[2026-03-04 12:25:20] (step=0056230) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.001760907845823, LR: 0.0003 +[2026-03-04 12:25:28] (step=0056231) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.001956564273137, LR: 0.0003 +[2026-03-04 12:25:35] (step=0056232) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.00215222070045, LR: 0.0003 +[2026-03-04 12:25:43] (step=0056233) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.002347877127765, LR: 0.0003 +[2026-03-04 12:25:51] (step=0056234) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 11.002543533555077, LR: 0.0003 +[2026-03-04 12:25:59] (step=0056235) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.00273918998239, LR: 0.0003 +[2026-03-04 12:26:07] (step=0056236) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.002934846409705, LR: 0.0003 +[2026-03-04 12:26:15] (step=0056237) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.003130502837019, LR: 0.0003 +[2026-03-04 12:26:23] (step=0056238) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 11.003326159264331, LR: 0.0003 +[2026-03-04 12:26:30] (step=0056239) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 11.003521815691645, LR: 0.0003 +[2026-03-04 12:26:38] (step=0056240) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 11.003717472118959, LR: 0.0003 +[2026-03-04 12:26:46] (step=0056241) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 11.003913128546273, LR: 0.0003 +[2026-03-04 12:26:54] (step=0056242) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.004108784973587, LR: 0.0003 +[2026-03-04 12:27:02] (step=0056243) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.0043044414009, LR: 0.0003 +[2026-03-04 12:27:10] (step=0056244) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.004500097828213, LR: 0.0003 +[2026-03-04 12:27:18] (step=0056245) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.004695754255527, LR: 0.0003 +[2026-03-04 12:27:25] (step=0056246) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 11.004891410682841, LR: 0.0003 +[2026-03-04 12:27:33] (step=0056247) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.005087067110155, LR: 0.0003 +[2026-03-04 12:27:41] (step=0056248) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.005282723537468, LR: 0.0003 +[2026-03-04 12:27:49] (step=0056249) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 11.005478379964782, LR: 0.0003 +[2026-03-04 12:27:57] (step=0056250) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.005674036392096, LR: 0.0003 +[2026-03-04 12:28:05] (step=0056251) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 11.00586969281941, LR: 0.0003 +[2026-03-04 12:28:13] (step=0056252) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.006065349246724, LR: 0.0003 +[2026-03-04 12:28:20] (step=0056253) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.006261005674036, LR: 0.0003 +[2026-03-04 12:28:28] (step=0056254) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.00645666210135, LR: 0.0003 +[2026-03-04 12:28:36] (step=0056255) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.006652318528664, LR: 0.0003 +[2026-03-04 12:28:44] (step=0056256) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.006847974955978, LR: 0.0003 +[2026-03-04 12:28:52] (step=0056257) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 11.007043631383292, LR: 0.0003 +[2026-03-04 12:29:00] (step=0056258) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 11.007239287810604, LR: 0.0003 +[2026-03-04 12:29:08] (step=0056259) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.007434944237918, LR: 0.0003 +[2026-03-04 12:29:15] (step=0056260) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 11.007630600665232, LR: 0.0003 +[2026-03-04 12:29:23] (step=0056261) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.007826257092546, LR: 0.0003 +[2026-03-04 12:29:31] (step=0056262) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.008021913519858, LR: 0.0003 +[2026-03-04 12:29:39] (step=0056263) Train Loss: 0.4457, Train Steps/Sec: 0.12, Epoch: 11.008217569947172, LR: 0.0003 +[2026-03-04 12:29:47] (step=0056264) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 11.008413226374486, LR: 0.0003 +[2026-03-04 12:29:55] (step=0056265) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.0086088828018, LR: 0.0003 +[2026-03-04 12:30:03] (step=0056266) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 11.008804539229114, LR: 0.0003 +[2026-03-04 12:30:11] (step=0056267) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.009000195656427, LR: 0.0003 +[2026-03-04 12:30:19] (step=0056268) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.00919585208374, LR: 0.0003 +[2026-03-04 12:30:27] (step=0056269) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 11.009391508511055, LR: 0.0003 +[2026-03-04 12:30:34] (step=0056270) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 11.009587164938369, LR: 0.0003 +[2026-03-04 12:30:42] (step=0056271) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.009782821365683, LR: 0.0003 +[2026-03-04 12:30:50] (step=0056272) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.009978477792995, LR: 0.0003 +[2026-03-04 12:30:58] (step=0056273) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.010174134220309, LR: 0.0003 +[2026-03-04 12:31:06] (step=0056274) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.010369790647623, LR: 0.0003 +[2026-03-04 12:31:14] (step=0056275) Train Loss: 0.4482, Train Steps/Sec: 0.12, Epoch: 11.010565447074937, LR: 0.0003 +[2026-03-04 12:31:22] (step=0056276) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 11.01076110350225, LR: 0.0003 +[2026-03-04 12:31:30] (step=0056277) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.010956759929563, LR: 0.0003 +[2026-03-04 12:31:37] (step=0056278) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.011152416356877, LR: 0.0003 +[2026-03-04 12:31:45] (step=0056279) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.011348072784191, LR: 0.0003 +[2026-03-04 12:31:53] (step=0056280) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.011543729211505, LR: 0.0003 +[2026-03-04 12:32:01] (step=0056281) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.011739385638819, LR: 0.0003 +[2026-03-04 12:32:09] (step=0056282) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.011935042066131, LR: 0.0003 +[2026-03-04 12:32:17] (step=0056283) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.012130698493445, LR: 0.0003 +[2026-03-04 12:32:25] (step=0056284) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.01232635492076, LR: 0.0003 +[2026-03-04 12:32:32] (step=0056285) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.012522011348073, LR: 0.0003 +[2026-03-04 12:32:40] (step=0056286) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 11.012717667775387, LR: 0.0003 +[2026-03-04 12:32:48] (step=0056287) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.0129133242027, LR: 0.0003 +[2026-03-04 12:32:56] (step=0056288) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.013108980630014, LR: 0.0003 +[2026-03-04 12:33:04] (step=0056289) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.013304637057328, LR: 0.0003 +[2026-03-04 12:33:12] (step=0056290) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 11.013500293484642, LR: 0.0003 +[2026-03-04 12:33:20] (step=0056291) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 11.013695949911954, LR: 0.0003 +[2026-03-04 12:33:28] (step=0056292) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.013891606339268, LR: 0.0003 +[2026-03-04 12:33:35] (step=0056293) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.014087262766582, LR: 0.0003 +[2026-03-04 12:33:43] (step=0056294) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.014282919193896, LR: 0.0003 +[2026-03-04 12:33:51] (step=0056295) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.01447857562121, LR: 0.0003 +[2026-03-04 12:33:59] (step=0056296) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.014674232048522, LR: 0.0003 +[2026-03-04 12:34:07] (step=0056297) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.014869888475836, LR: 0.0003 +[2026-03-04 12:34:15] (step=0056298) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.01506554490315, LR: 0.0003 +[2026-03-04 12:34:23] (step=0056299) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.015261201330464, LR: 0.0003 +[2026-03-04 12:34:30] (step=0056300) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.015456857757778, LR: 0.0003 +[2026-03-04 12:34:38] (step=0056301) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 11.01565251418509, LR: 0.0003 +[2026-03-04 12:34:46] (step=0056302) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.015848170612404, LR: 0.0003 +[2026-03-04 12:34:54] (step=0056303) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.016043827039718, LR: 0.0003 +[2026-03-04 12:35:02] (step=0056304) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.016239483467032, LR: 0.0003 +[2026-03-04 12:35:10] (step=0056305) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.016435139894346, LR: 0.0003 +[2026-03-04 12:35:18] (step=0056306) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.016630796321659, LR: 0.0003 +[2026-03-04 12:35:25] (step=0056307) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.016826452748973, LR: 0.0003 +[2026-03-04 12:35:33] (step=0056308) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.017022109176287, LR: 0.0003 +[2026-03-04 12:35:41] (step=0056309) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.0172177656036, LR: 0.0003 +[2026-03-04 12:35:49] (step=0056310) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.017413422030915, LR: 0.0003 +[2026-03-04 12:35:57] (step=0056311) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 11.017609078458227, LR: 0.0003 +[2026-03-04 12:36:05] (step=0056312) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.01780473488554, LR: 0.0003 +[2026-03-04 12:36:13] (step=0056313) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.018000391312855, LR: 0.0003 +[2026-03-04 12:36:21] (step=0056314) Train Loss: 0.4300, Train Steps/Sec: 0.12, Epoch: 11.018196047740169, LR: 0.0003 +[2026-03-04 12:36:29] (step=0056315) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 11.018391704167481, LR: 0.0003 +[2026-03-04 12:36:36] (step=0056316) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 11.018587360594795, LR: 0.0003 +[2026-03-04 12:36:44] (step=0056317) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 11.018783017022109, LR: 0.0003 +[2026-03-04 12:36:52] (step=0056318) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.018978673449423, LR: 0.0003 +[2026-03-04 12:37:00] (step=0056319) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.019174329876737, LR: 0.0003 +[2026-03-04 12:37:08] (step=0056320) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 11.01936998630405, LR: 0.0003 +[2026-03-04 12:37:16] (step=0056321) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.019565642731363, LR: 0.0003 +[2026-03-04 12:37:24] (step=0056322) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.019761299158677, LR: 0.0003 +[2026-03-04 12:37:31] (step=0056323) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.019956955585991, LR: 0.0003 +[2026-03-04 12:37:39] (step=0056324) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.020152612013305, LR: 0.0003 +[2026-03-04 12:37:47] (step=0056325) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 11.020348268440618, LR: 0.0003 +[2026-03-04 12:37:55] (step=0056326) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.020543924867932, LR: 0.0003 +[2026-03-04 12:38:03] (step=0056327) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 11.020739581295246, LR: 0.0003 +[2026-03-04 12:38:11] (step=0056328) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.02093523772256, LR: 0.0003 +[2026-03-04 12:38:19] (step=0056329) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.021130894149874, LR: 0.0003 +[2026-03-04 12:38:27] (step=0056330) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.021326550577186, LR: 0.0003 +[2026-03-04 12:38:34] (step=0056331) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.0215222070045, LR: 0.0003 +[2026-03-04 12:38:42] (step=0056332) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.021717863431814, LR: 0.0003 +[2026-03-04 12:38:50] (step=0056333) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.021913519859128, LR: 0.0003 +[2026-03-04 12:38:58] (step=0056334) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.022109176286442, LR: 0.0003 +[2026-03-04 12:39:06] (step=0056335) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.022304832713754, LR: 0.0003 +[2026-03-04 12:39:14] (step=0056336) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.022500489141068, LR: 0.0003 +[2026-03-04 12:39:22] (step=0056337) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.022696145568382, LR: 0.0003 +[2026-03-04 12:39:29] (step=0056338) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 11.022891801995696, LR: 0.0003 +[2026-03-04 12:39:37] (step=0056339) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.02308745842301, LR: 0.0003 +[2026-03-04 12:39:45] (step=0056340) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.023283114850322, LR: 0.0003 +[2026-03-04 12:39:53] (step=0056341) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.023478771277636, LR: 0.0003 +[2026-03-04 12:40:01] (step=0056342) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.02367442770495, LR: 0.0003 +[2026-03-04 12:40:09] (step=0056343) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.023870084132264, LR: 0.0003 +[2026-03-04 12:40:17] (step=0056344) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.024065740559577, LR: 0.0003 +[2026-03-04 12:40:24] (step=0056345) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.02426139698689, LR: 0.0003 +[2026-03-04 12:40:32] (step=0056346) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 11.024457053414205, LR: 0.0003 +[2026-03-04 12:40:40] (step=0056347) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.024652709841519, LR: 0.0003 +[2026-03-04 12:40:48] (step=0056348) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.024848366268833, LR: 0.0003 +[2026-03-04 12:40:56] (step=0056349) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 11.025044022696145, LR: 0.0003 +[2026-03-04 12:41:04] (step=0056350) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.025239679123459, LR: 0.0003 +[2026-03-04 12:41:12] (step=0056351) Train Loss: 0.4206, Train Steps/Sec: 0.13, Epoch: 11.025435335550773, LR: 0.0003 +[2026-03-04 12:41:19] (step=0056352) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 11.025630991978087, LR: 0.0003 +[2026-03-04 12:41:27] (step=0056353) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.0258266484054, LR: 0.0003 +[2026-03-04 12:41:35] (step=0056354) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 11.026022304832713, LR: 0.0003 +[2026-03-04 12:41:43] (step=0056355) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 11.026217961260027, LR: 0.0003 +[2026-03-04 12:41:51] (step=0056356) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.026413617687341, LR: 0.0003 +[2026-03-04 12:41:59] (step=0056357) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 11.026609274114655, LR: 0.0003 +[2026-03-04 12:42:07] (step=0056358) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.026804930541969, LR: 0.0003 +[2026-03-04 12:42:15] (step=0056359) Train Loss: 0.4389, Train Steps/Sec: 0.12, Epoch: 11.027000586969281, LR: 0.0003 +[2026-03-04 12:42:23] (step=0056360) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.027196243396595, LR: 0.0003 +[2026-03-04 12:42:30] (step=0056361) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 11.02739189982391, LR: 0.0003 +[2026-03-04 12:42:38] (step=0056362) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.027587556251223, LR: 0.0003 +[2026-03-04 12:42:46] (step=0056363) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 11.027783212678537, LR: 0.0003 +[2026-03-04 12:42:54] (step=0056364) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.02797886910585, LR: 0.0003 +[2026-03-04 12:43:02] (step=0056365) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.028174525533164, LR: 0.0003 +[2026-03-04 12:43:10] (step=0056366) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.028370181960478, LR: 0.0003 +[2026-03-04 12:43:18] (step=0056367) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 11.028565838387792, LR: 0.0003 +[2026-03-04 12:43:25] (step=0056368) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 11.028761494815104, LR: 0.0003 +[2026-03-04 12:43:33] (step=0056369) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 11.028957151242418, LR: 0.0003 +[2026-03-04 12:43:41] (step=0056370) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.029152807669732, LR: 0.0003 +[2026-03-04 12:43:49] (step=0056371) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 11.029348464097046, LR: 0.0003 +[2026-03-04 12:43:57] (step=0056372) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.02954412052436, LR: 0.0003 +[2026-03-04 12:44:05] (step=0056373) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.029739776951672, LR: 0.0003 +[2026-03-04 12:44:13] (step=0056374) Train Loss: 0.4298, Train Steps/Sec: 0.12, Epoch: 11.029935433378986, LR: 0.0003 +[2026-03-04 12:44:21] (step=0056375) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.0301310898063, LR: 0.0003 +[2026-03-04 12:44:29] (step=0056376) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.030326746233614, LR: 0.0003 +[2026-03-04 12:44:36] (step=0056377) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.030522402660928, LR: 0.0003 +[2026-03-04 12:44:44] (step=0056378) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 11.03071805908824, LR: 0.0003 +[2026-03-04 12:44:52] (step=0056379) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.030913715515554, LR: 0.0003 +[2026-03-04 12:45:00] (step=0056380) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.031109371942868, LR: 0.0003 +[2026-03-04 12:45:08] (step=0056381) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 11.031305028370182, LR: 0.0003 +[2026-03-04 12:45:16] (step=0056382) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.031500684797496, LR: 0.0003 +[2026-03-04 12:45:24] (step=0056383) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.031696341224809, LR: 0.0003 +[2026-03-04 12:45:31] (step=0056384) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.031891997652123, LR: 0.0003 +[2026-03-04 12:45:39] (step=0056385) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.032087654079437, LR: 0.0003 +[2026-03-04 12:45:47] (step=0056386) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.03228331050675, LR: 0.0003 +[2026-03-04 12:45:55] (step=0056387) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.032478966934065, LR: 0.0003 +[2026-03-04 12:46:03] (step=0056388) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.032674623361377, LR: 0.0003 +[2026-03-04 12:46:11] (step=0056389) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 11.03287027978869, LR: 0.0003 +[2026-03-04 12:46:19] (step=0056390) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 11.033065936216005, LR: 0.0003 +[2026-03-04 12:46:27] (step=0056391) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.033261592643319, LR: 0.0003 +[2026-03-04 12:46:34] (step=0056392) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.033457249070633, LR: 0.0003 +[2026-03-04 12:46:42] (step=0056393) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.033652905497945, LR: 0.0003 +[2026-03-04 12:46:50] (step=0056394) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.033848561925259, LR: 0.0003 +[2026-03-04 12:46:58] (step=0056395) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.034044218352573, LR: 0.0003 +[2026-03-04 12:47:06] (step=0056396) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.034239874779887, LR: 0.0003 +[2026-03-04 12:47:14] (step=0056397) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 11.0344355312072, LR: 0.0003 +[2026-03-04 12:47:22] (step=0056398) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.034631187634513, LR: 0.0003 +[2026-03-04 12:47:29] (step=0056399) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.034826844061827, LR: 0.0003 +[2026-03-04 12:47:37] (step=0056400) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 11.035022500489141, LR: 0.0003 +[2026-03-04 12:47:45] (step=0056401) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.035218156916455, LR: 0.0003 +[2026-03-04 12:47:53] (step=0056402) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.035413813343768, LR: 0.0003 +[2026-03-04 12:48:01] (step=0056403) Train Loss: 0.4488, Train Steps/Sec: 0.12, Epoch: 11.035609469771082, LR: 0.0003 +[2026-03-04 12:48:09] (step=0056404) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.035805126198396, LR: 0.0003 +[2026-03-04 12:48:17] (step=0056405) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.03600078262571, LR: 0.0003 +[2026-03-04 12:48:25] (step=0056406) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.036196439053024, LR: 0.0003 +[2026-03-04 12:48:32] (step=0056407) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.036392095480336, LR: 0.0003 +[2026-03-04 12:48:40] (step=0056408) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 11.03658775190765, LR: 0.0003 +[2026-03-04 12:48:48] (step=0056409) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.036783408334964, LR: 0.0003 +[2026-03-04 12:48:56] (step=0056410) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.036979064762278, LR: 0.0003 +[2026-03-04 12:49:04] (step=0056411) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.037174721189592, LR: 0.0003 +[2026-03-04 12:49:12] (step=0056412) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 11.037370377616904, LR: 0.0003 +[2026-03-04 12:49:20] (step=0056413) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.037566034044218, LR: 0.0003 +[2026-03-04 12:49:28] (step=0056414) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.037761690471532, LR: 0.0003 +[2026-03-04 12:49:35] (step=0056415) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 11.037957346898846, LR: 0.0003 +[2026-03-04 12:49:43] (step=0056416) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.03815300332616, LR: 0.0003 +[2026-03-04 12:49:51] (step=0056417) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.038348659753472, LR: 0.0003 +[2026-03-04 12:49:59] (step=0056418) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 11.038544316180786, LR: 0.0003 +[2026-03-04 12:50:07] (step=0056419) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.0387399726081, LR: 0.0003 +[2026-03-04 12:50:15] (step=0056420) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.038935629035414, LR: 0.0003 +[2026-03-04 12:50:23] (step=0056421) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.039131285462727, LR: 0.0003 +[2026-03-04 12:50:30] (step=0056422) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 11.03932694189004, LR: 0.0003 +[2026-03-04 12:50:38] (step=0056423) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.039522598317355, LR: 0.0003 +[2026-03-04 12:50:46] (step=0056424) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.039718254744669, LR: 0.0003 +[2026-03-04 12:50:54] (step=0056425) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.039913911171983, LR: 0.0003 +[2026-03-04 12:51:02] (step=0056426) Train Loss: 0.4384, Train Steps/Sec: 0.12, Epoch: 11.040109567599295, LR: 0.0003 +[2026-03-04 12:51:10] (step=0056427) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.040305224026609, LR: 0.0003 +[2026-03-04 12:51:18] (step=0056428) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.040500880453923, LR: 0.0003 +[2026-03-04 12:51:26] (step=0056429) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.040696536881237, LR: 0.0003 +[2026-03-04 12:51:33] (step=0056430) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 11.04089219330855, LR: 0.0003 +[2026-03-04 12:51:41] (step=0056431) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 11.041087849735863, LR: 0.0003 +[2026-03-04 12:51:49] (step=0056432) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.041283506163177, LR: 0.0003 +[2026-03-04 12:51:57] (step=0056433) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.041479162590491, LR: 0.0003 +[2026-03-04 12:52:05] (step=0056434) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 11.041674819017805, LR: 0.0003 +[2026-03-04 12:52:13] (step=0056435) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.04187047544512, LR: 0.0003 +[2026-03-04 12:52:21] (step=0056436) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.042066131872431, LR: 0.0003 +[2026-03-04 12:52:28] (step=0056437) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.042261788299745, LR: 0.0003 +[2026-03-04 12:52:36] (step=0056438) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.04245744472706, LR: 0.0003 +[2026-03-04 12:52:44] (step=0056439) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.042653101154373, LR: 0.0003 +[2026-03-04 12:52:52] (step=0056440) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.042848757581687, LR: 0.0003 +[2026-03-04 12:53:00] (step=0056441) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 11.043044414009, LR: 0.0003 +[2026-03-04 12:53:08] (step=0056442) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.043240070436314, LR: 0.0003 +[2026-03-04 12:53:16] (step=0056443) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 11.043435726863628, LR: 0.0003 +[2026-03-04 12:53:23] (step=0056444) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 11.043631383290942, LR: 0.0003 +[2026-03-04 12:53:31] (step=0056445) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.043827039718254, LR: 0.0003 +[2026-03-04 12:53:39] (step=0056446) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 11.044022696145568, LR: 0.0003 +[2026-03-04 12:53:47] (step=0056447) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.044218352572882, LR: 0.0003 +[2026-03-04 12:53:55] (step=0056448) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.044414009000196, LR: 0.0003 +[2026-03-04 12:54:03] (step=0056449) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.04460966542751, LR: 0.0003 +[2026-03-04 12:54:11] (step=0056450) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 11.044805321854822, LR: 0.0003 +[2026-03-04 12:54:18] (step=0056451) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.045000978282136, LR: 0.0003 +[2026-03-04 12:54:26] (step=0056452) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.04519663470945, LR: 0.0003 +[2026-03-04 12:54:34] (step=0056453) Train Loss: 0.4444, Train Steps/Sec: 0.12, Epoch: 11.045392291136764, LR: 0.0003 +[2026-03-04 12:54:42] (step=0056454) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.045587947564078, LR: 0.0003 +[2026-03-04 12:54:50] (step=0056455) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 11.04578360399139, LR: 0.0003 +[2026-03-04 12:54:58] (step=0056456) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 11.045979260418704, LR: 0.0003 +[2026-03-04 12:55:06] (step=0056457) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.046174916846018, LR: 0.0003 +[2026-03-04 12:55:14] (step=0056458) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.046370573273332, LR: 0.0003 +[2026-03-04 12:55:22] (step=0056459) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.046566229700646, LR: 0.0003 +[2026-03-04 12:55:29] (step=0056460) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.046761886127959, LR: 0.0003 +[2026-03-04 12:55:37] (step=0056461) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 11.046957542555273, LR: 0.0003 +[2026-03-04 12:55:45] (step=0056462) Train Loss: 0.4252, Train Steps/Sec: 0.13, Epoch: 11.047153198982587, LR: 0.0003 +[2026-03-04 12:55:53] (step=0056463) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.0473488554099, LR: 0.0003 +[2026-03-04 12:56:01] (step=0056464) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 11.047544511837215, LR: 0.0003 +[2026-03-04 12:56:09] (step=0056465) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 11.047740168264527, LR: 0.0003 +[2026-03-04 12:56:17] (step=0056466) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 11.04793582469184, LR: 0.0003 +[2026-03-04 12:56:24] (step=0056467) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.048131481119155, LR: 0.0003 +[2026-03-04 12:56:32] (step=0056468) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.048327137546469, LR: 0.0003 +[2026-03-04 12:56:40] (step=0056469) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.048522793973783, LR: 0.0003 +[2026-03-04 12:56:48] (step=0056470) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.048718450401095, LR: 0.0003 +[2026-03-04 12:56:56] (step=0056471) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.04891410682841, LR: 0.0003 +[2026-03-04 12:57:04] (step=0056472) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.049109763255723, LR: 0.0003 +[2026-03-04 12:57:12] (step=0056473) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 11.049305419683037, LR: 0.0003 +[2026-03-04 12:57:19] (step=0056474) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 11.04950107611035, LR: 0.0003 +[2026-03-04 12:57:27] (step=0056475) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.049696732537663, LR: 0.0003 +[2026-03-04 12:57:35] (step=0056476) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 11.049892388964977, LR: 0.0003 +[2026-03-04 12:57:43] (step=0056477) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.050088045392291, LR: 0.0003 +[2026-03-04 12:57:51] (step=0056478) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 11.050283701819605, LR: 0.0003 +[2026-03-04 12:57:59] (step=0056479) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 11.050479358246918, LR: 0.0003 +[2026-03-04 12:58:07] (step=0056480) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.050675014674232, LR: 0.0003 +[2026-03-04 12:58:15] (step=0056481) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 11.050870671101546, LR: 0.0003 +[2026-03-04 12:58:23] (step=0056482) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 11.05106632752886, LR: 0.0003 +[2026-03-04 12:58:30] (step=0056483) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.051261983956174, LR: 0.0003 +[2026-03-04 12:58:38] (step=0056484) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 11.051457640383486, LR: 0.0003 +[2026-03-04 12:58:46] (step=0056485) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 11.0516532968108, LR: 0.0003 +[2026-03-04 12:58:54] (step=0056486) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.051848953238114, LR: 0.0003 +[2026-03-04 12:59:02] (step=0056487) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.052044609665428, LR: 0.0003 +[2026-03-04 12:59:10] (step=0056488) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 11.052240266092742, LR: 0.0003 +[2026-03-04 12:59:18] (step=0056489) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.052435922520054, LR: 0.0003 +[2026-03-04 12:59:26] (step=0056490) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.052631578947368, LR: 0.0003 +[2026-03-04 12:59:33] (step=0056491) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.052827235374682, LR: 0.0003 +[2026-03-04 12:59:41] (step=0056492) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 11.053022891801996, LR: 0.0003 +[2026-03-04 12:59:49] (step=0056493) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.05321854822931, LR: 0.0003 +[2026-03-04 12:59:57] (step=0056494) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.053414204656622, LR: 0.0003 +[2026-03-04 13:00:05] (step=0056495) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 11.053609861083936, LR: 0.0003 +[2026-03-04 13:00:13] (step=0056496) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.05380551751125, LR: 0.0003 +[2026-03-04 13:00:21] (step=0056497) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.054001173938564, LR: 0.0003 +[2026-03-04 13:00:28] (step=0056498) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.054196830365877, LR: 0.0003 +[2026-03-04 13:00:36] (step=0056499) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.05439248679319, LR: 0.0003 +[2026-03-04 13:00:44] (step=0056500) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.054588143220505, LR: 0.0003 +[2026-03-04 13:00:44] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0056500/ +[2026-03-04 13:00:52] (step=0056501) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 11.054783799647819, LR: 0.0003 +[2026-03-04 13:01:00] (step=0056502) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 11.054979456075133, LR: 0.0003 +[2026-03-04 13:01:08] (step=0056503) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 11.055175112502445, LR: 0.0003 +[2026-03-04 13:01:16] (step=0056504) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.055370768929759, LR: 0.0003 +[2026-03-04 13:01:23] (step=0056505) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 11.055566425357073, LR: 0.0003 +[2026-03-04 13:01:31] (step=0056506) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.055762081784387, LR: 0.0003 +[2026-03-04 13:01:39] (step=0056507) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.055957738211701, LR: 0.0003 +[2026-03-04 13:01:47] (step=0056508) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.056153394639013, LR: 0.0003 +[2026-03-04 13:01:55] (step=0056509) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.056349051066327, LR: 0.0003 +[2026-03-04 13:02:03] (step=0056510) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 11.056544707493641, LR: 0.0003 +[2026-03-04 13:02:11] (step=0056511) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.056740363920955, LR: 0.0003 +[2026-03-04 13:02:19] (step=0056512) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 11.05693602034827, LR: 0.0003 +[2026-03-04 13:02:26] (step=0056513) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.057131676775581, LR: 0.0003 +[2026-03-04 13:02:34] (step=0056514) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 11.057327333202895, LR: 0.0003 +[2026-03-04 13:02:42] (step=0056515) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.05752298963021, LR: 0.0003 +[2026-03-04 13:02:50] (step=0056516) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.057718646057523, LR: 0.0003 +[2026-03-04 13:02:58] (step=0056517) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 11.057914302484837, LR: 0.0003 +[2026-03-04 13:03:06] (step=0056518) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 11.05810995891215, LR: 0.0003 +[2026-03-04 13:03:14] (step=0056519) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.058305615339464, LR: 0.0003 +[2026-03-04 13:03:21] (step=0056520) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.058501271766778, LR: 0.0003 +[2026-03-04 13:03:29] (step=0056521) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.058696928194092, LR: 0.0003 +[2026-03-04 13:03:37] (step=0056522) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.058892584621406, LR: 0.0003 +[2026-03-04 13:03:45] (step=0056523) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.059088241048718, LR: 0.0003 +[2026-03-04 13:03:53] (step=0056524) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 11.059283897476032, LR: 0.0003 +[2026-03-04 13:04:01] (step=0056525) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.059479553903346, LR: 0.0003 +[2026-03-04 13:04:09] (step=0056526) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 11.05967521033066, LR: 0.0003 +[2026-03-04 13:04:17] (step=0056527) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 11.059870866757972, LR: 0.0003 +[2026-03-04 13:04:24] (step=0056528) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.060066523185286, LR: 0.0003 +[2026-03-04 13:04:32] (step=0056529) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.0602621796126, LR: 0.0003 +[2026-03-04 13:04:40] (step=0056530) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 11.060457836039914, LR: 0.0003 +[2026-03-04 13:04:48] (step=0056531) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.060653492467228, LR: 0.0003 +[2026-03-04 13:04:56] (step=0056532) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.06084914889454, LR: 0.0003 +[2026-03-04 13:05:04] (step=0056533) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.061044805321854, LR: 0.0003 +[2026-03-04 13:05:12] (step=0056534) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 11.061240461749168, LR: 0.0003 +[2026-03-04 13:05:20] (step=0056535) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 11.061436118176482, LR: 0.0003 +[2026-03-04 13:05:27] (step=0056536) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.061631774603796, LR: 0.0003 +[2026-03-04 13:05:35] (step=0056537) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.061827431031109, LR: 0.0003 +[2026-03-04 13:05:43] (step=0056538) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.062023087458423, LR: 0.0003 +[2026-03-04 13:05:51] (step=0056539) Train Loss: 0.4255, Train Steps/Sec: 0.13, Epoch: 11.062218743885737, LR: 0.0003 +[2026-03-04 13:05:59] (step=0056540) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 11.06241440031305, LR: 0.0003 +[2026-03-04 13:06:07] (step=0056541) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.062610056740365, LR: 0.0003 +[2026-03-04 13:06:15] (step=0056542) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.062805713167677, LR: 0.0003 +[2026-03-04 13:06:22] (step=0056543) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.063001369594991, LR: 0.0003 +[2026-03-04 13:06:30] (step=0056544) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.063197026022305, LR: 0.0003 +[2026-03-04 13:06:38] (step=0056545) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.063392682449619, LR: 0.0003 +[2026-03-04 13:06:46] (step=0056546) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.063588338876933, LR: 0.0003 +[2026-03-04 13:06:54] (step=0056547) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.063783995304245, LR: 0.0003 +[2026-03-04 13:07:02] (step=0056548) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.06397965173156, LR: 0.0003 +[2026-03-04 13:07:10] (step=0056549) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.064175308158873, LR: 0.0003 +[2026-03-04 13:07:17] (step=0056550) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.064370964586187, LR: 0.0003 +[2026-03-04 13:07:25] (step=0056551) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 11.0645666210135, LR: 0.0003 +[2026-03-04 13:07:33] (step=0056552) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.064762277440813, LR: 0.0003 +[2026-03-04 13:07:41] (step=0056553) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.064957933868127, LR: 0.0003 +[2026-03-04 13:07:49] (step=0056554) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.065153590295441, LR: 0.0003 +[2026-03-04 13:07:57] (step=0056555) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.065349246722755, LR: 0.0003 +[2026-03-04 13:08:05] (step=0056556) Train Loss: 0.4408, Train Steps/Sec: 0.12, Epoch: 11.065544903150068, LR: 0.0003 +[2026-03-04 13:08:13] (step=0056557) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.065740559577382, LR: 0.0003 +[2026-03-04 13:08:21] (step=0056558) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 11.065936216004696, LR: 0.0003 +[2026-03-04 13:08:28] (step=0056559) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.06613187243201, LR: 0.0003 +[2026-03-04 13:08:36] (step=0056560) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.066327528859324, LR: 0.0003 +[2026-03-04 13:08:44] (step=0056561) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 11.066523185286636, LR: 0.0003 +[2026-03-04 13:08:52] (step=0056562) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 11.06671884171395, LR: 0.0003 +[2026-03-04 13:09:00] (step=0056563) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 11.066914498141264, LR: 0.0003 +[2026-03-04 13:09:08] (step=0056564) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.067110154568578, LR: 0.0003 +[2026-03-04 13:09:16] (step=0056565) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.067305810995892, LR: 0.0003 +[2026-03-04 13:09:24] (step=0056566) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.067501467423204, LR: 0.0003 +[2026-03-04 13:09:31] (step=0056567) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.067697123850518, LR: 0.0003 +[2026-03-04 13:09:39] (step=0056568) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 11.067892780277832, LR: 0.0003 +[2026-03-04 13:09:47] (step=0056569) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.068088436705146, LR: 0.0003 +[2026-03-04 13:09:55] (step=0056570) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.06828409313246, LR: 0.0003 +[2026-03-04 13:10:03] (step=0056571) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 11.068479749559772, LR: 0.0003 +[2026-03-04 13:10:11] (step=0056572) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.068675405987086, LR: 0.0003 +[2026-03-04 13:10:19] (step=0056573) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.0688710624144, LR: 0.0003 +[2026-03-04 13:10:26] (step=0056574) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 11.069066718841714, LR: 0.0003 +[2026-03-04 13:10:34] (step=0056575) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 11.069262375269028, LR: 0.0003 +[2026-03-04 13:10:42] (step=0056576) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 11.06945803169634, LR: 0.0003 +[2026-03-04 13:10:50] (step=0056577) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.069653688123655, LR: 0.0003 +[2026-03-04 13:10:58] (step=0056578) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.069849344550969, LR: 0.0003 +[2026-03-04 13:11:06] (step=0056579) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.070045000978283, LR: 0.0003 +[2026-03-04 13:11:14] (step=0056580) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 11.070240657405595, LR: 0.0003 +[2026-03-04 13:11:22] (step=0056581) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.070436313832909, LR: 0.0003 +[2026-03-04 13:11:29] (step=0056582) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.070631970260223, LR: 0.0003 +[2026-03-04 13:11:37] (step=0056583) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 11.070827626687537, LR: 0.0003 +[2026-03-04 13:11:45] (step=0056584) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 11.071023283114851, LR: 0.0003 +[2026-03-04 13:11:53] (step=0056585) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.071218939542163, LR: 0.0003 +[2026-03-04 13:12:01] (step=0056586) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 11.071414595969477, LR: 0.0003 +[2026-03-04 13:12:09] (step=0056587) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.071610252396791, LR: 0.0003 +[2026-03-04 13:12:17] (step=0056588) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.071805908824105, LR: 0.0003 +[2026-03-04 13:12:24] (step=0056589) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 11.07200156525142, LR: 0.0003 +[2026-03-04 13:12:32] (step=0056590) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.072197221678731, LR: 0.0003 +[2026-03-04 13:12:40] (step=0056591) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 11.072392878106045, LR: 0.0003 +[2026-03-04 13:12:48] (step=0056592) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.07258853453336, LR: 0.0003 +[2026-03-04 13:12:56] (step=0056593) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.072784190960673, LR: 0.0003 +[2026-03-04 13:13:04] (step=0056594) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.072979847387987, LR: 0.0003 +[2026-03-04 13:13:12] (step=0056595) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.0731755038153, LR: 0.0003 +[2026-03-04 13:13:19] (step=0056596) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.073371160242614, LR: 0.0003 +[2026-03-04 13:13:27] (step=0056597) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.073566816669928, LR: 0.0003 +[2026-03-04 13:13:35] (step=0056598) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.073762473097242, LR: 0.0003 +[2026-03-04 13:13:43] (step=0056599) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.073958129524556, LR: 0.0003 +[2026-03-04 13:13:51] (step=0056600) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 11.074153785951868, LR: 0.0003 +[2026-03-04 13:13:59] (step=0056601) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.074349442379182, LR: 0.0003 +[2026-03-04 13:14:07] (step=0056602) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.074545098806496, LR: 0.0003 +[2026-03-04 13:14:15] (step=0056603) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.07474075523381, LR: 0.0003 +[2026-03-04 13:14:22] (step=0056604) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 11.074936411661122, LR: 0.0003 +[2026-03-04 13:14:30] (step=0056605) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.075132068088436, LR: 0.0003 +[2026-03-04 13:14:38] (step=0056606) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.07532772451575, LR: 0.0003 +[2026-03-04 13:14:46] (step=0056607) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.075523380943064, LR: 0.0003 +[2026-03-04 13:14:54] (step=0056608) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.075719037370378, LR: 0.0003 +[2026-03-04 13:15:02] (step=0056609) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.07591469379769, LR: 0.0003 +[2026-03-04 13:15:10] (step=0056610) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.076110350225004, LR: 0.0003 +[2026-03-04 13:15:18] (step=0056611) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.076306006652318, LR: 0.0003 +[2026-03-04 13:15:25] (step=0056612) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 11.076501663079632, LR: 0.0003 +[2026-03-04 13:15:33] (step=0056613) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.076697319506946, LR: 0.0003 +[2026-03-04 13:15:41] (step=0056614) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.076892975934259, LR: 0.0003 +[2026-03-04 13:15:49] (step=0056615) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.077088632361573, LR: 0.0003 +[2026-03-04 13:15:57] (step=0056616) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 11.077284288788887, LR: 0.0003 +[2026-03-04 13:16:05] (step=0056617) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.0774799452162, LR: 0.0003 +[2026-03-04 13:16:12] (step=0056618) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 11.077675601643515, LR: 0.0003 +[2026-03-04 13:16:20] (step=0056619) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.077871258070827, LR: 0.0003 +[2026-03-04 13:16:28] (step=0056620) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 11.078066914498141, LR: 0.0003 +[2026-03-04 13:16:36] (step=0056621) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.078262570925455, LR: 0.0003 +[2026-03-04 13:16:44] (step=0056622) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.078458227352769, LR: 0.0003 +[2026-03-04 13:16:52] (step=0056623) Train Loss: 0.4386, Train Steps/Sec: 0.12, Epoch: 11.078653883780083, LR: 0.0003 +[2026-03-04 13:17:00] (step=0056624) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.078849540207395, LR: 0.0003 +[2026-03-04 13:17:08] (step=0056625) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.07904519663471, LR: 0.0003 +[2026-03-04 13:17:16] (step=0056626) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.079240853062023, LR: 0.0003 +[2026-03-04 13:17:23] (step=0056627) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.079436509489337, LR: 0.0003 +[2026-03-04 13:17:31] (step=0056628) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 11.079632165916651, LR: 0.0003 +[2026-03-04 13:17:39] (step=0056629) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.079827822343963, LR: 0.0003 +[2026-03-04 13:17:47] (step=0056630) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.080023478771277, LR: 0.0003 +[2026-03-04 13:17:55] (step=0056631) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 11.080219135198591, LR: 0.0003 +[2026-03-04 13:18:03] (step=0056632) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.080414791625905, LR: 0.0003 +[2026-03-04 13:18:11] (step=0056633) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 11.080610448053218, LR: 0.0003 +[2026-03-04 13:18:18] (step=0056634) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.080806104480532, LR: 0.0003 +[2026-03-04 13:18:26] (step=0056635) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.081001760907846, LR: 0.0003 +[2026-03-04 13:18:34] (step=0056636) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.08119741733516, LR: 0.0003 +[2026-03-04 13:18:42] (step=0056637) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 11.081393073762474, LR: 0.0003 +[2026-03-04 13:18:50] (step=0056638) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.081588730189786, LR: 0.0003 +[2026-03-04 13:18:58] (step=0056639) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.0817843866171, LR: 0.0003 +[2026-03-04 13:19:06] (step=0056640) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.081980043044414, LR: 0.0003 +[2026-03-04 13:19:13] (step=0056641) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.082175699471728, LR: 0.0003 +[2026-03-04 13:19:21] (step=0056642) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.082371355899042, LR: 0.0003 +[2026-03-04 13:19:29] (step=0056643) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 11.082567012326354, LR: 0.0003 +[2026-03-04 13:19:37] (step=0056644) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.082762668753668, LR: 0.0003 +[2026-03-04 13:19:45] (step=0056645) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 11.082958325180982, LR: 0.0003 +[2026-03-04 13:19:53] (step=0056646) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.083153981608296, LR: 0.0003 +[2026-03-04 13:20:01] (step=0056647) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.08334963803561, LR: 0.0003 +[2026-03-04 13:20:08] (step=0056648) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.083545294462922, LR: 0.0003 +[2026-03-04 13:20:16] (step=0056649) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.083740950890236, LR: 0.0003 +[2026-03-04 13:20:24] (step=0056650) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.08393660731755, LR: 0.0003 +[2026-03-04 13:20:32] (step=0056651) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 11.084132263744864, LR: 0.0003 +[2026-03-04 13:20:40] (step=0056652) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 11.084327920172178, LR: 0.0003 +[2026-03-04 13:20:48] (step=0056653) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.08452357659949, LR: 0.0003 +[2026-03-04 13:20:56] (step=0056654) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.084719233026805, LR: 0.0003 +[2026-03-04 13:21:04] (step=0056655) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.084914889454119, LR: 0.0003 +[2026-03-04 13:21:11] (step=0056656) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.085110545881433, LR: 0.0003 +[2026-03-04 13:21:19] (step=0056657) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.085306202308745, LR: 0.0003 +[2026-03-04 13:21:27] (step=0056658) Train Loss: 0.4421, Train Steps/Sec: 0.12, Epoch: 11.085501858736059, LR: 0.0003 +[2026-03-04 13:21:35] (step=0056659) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.085697515163373, LR: 0.0003 +[2026-03-04 13:21:43] (step=0056660) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.085893171590687, LR: 0.0003 +[2026-03-04 13:21:51] (step=0056661) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.086088828018001, LR: 0.0003 +[2026-03-04 13:21:59] (step=0056662) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.086284484445313, LR: 0.0003 +[2026-03-04 13:22:07] (step=0056663) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.086480140872627, LR: 0.0003 +[2026-03-04 13:22:14] (step=0056664) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.086675797299941, LR: 0.0003 +[2026-03-04 13:22:22] (step=0056665) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 11.086871453727255, LR: 0.0003 +[2026-03-04 13:22:30] (step=0056666) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.08706711015457, LR: 0.0003 +[2026-03-04 13:22:38] (step=0056667) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.087262766581881, LR: 0.0003 +[2026-03-04 13:22:46] (step=0056668) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 11.087458423009195, LR: 0.0003 +[2026-03-04 13:22:54] (step=0056669) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.08765407943651, LR: 0.0003 +[2026-03-04 13:23:02] (step=0056670) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.087849735863823, LR: 0.0003 +[2026-03-04 13:23:10] (step=0056671) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 11.088045392291138, LR: 0.0003 +[2026-03-04 13:23:17] (step=0056672) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.08824104871845, LR: 0.0003 +[2026-03-04 13:23:25] (step=0056673) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.088436705145764, LR: 0.0003 +[2026-03-04 13:23:33] (step=0056674) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.088632361573078, LR: 0.0003 +[2026-03-04 13:23:41] (step=0056675) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.088828018000392, LR: 0.0003 +[2026-03-04 13:23:49] (step=0056676) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.089023674427706, LR: 0.0003 +[2026-03-04 13:23:57] (step=0056677) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.089219330855018, LR: 0.0003 +[2026-03-04 13:24:05] (step=0056678) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.089414987282332, LR: 0.0003 +[2026-03-04 13:24:13] (step=0056679) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 11.089610643709646, LR: 0.0003 +[2026-03-04 13:24:20] (step=0056680) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.08980630013696, LR: 0.0003 +[2026-03-04 13:24:28] (step=0056681) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.090001956564274, LR: 0.0003 +[2026-03-04 13:24:36] (step=0056682) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.090197612991586, LR: 0.0003 +[2026-03-04 13:24:44] (step=0056683) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.0903932694189, LR: 0.0003 +[2026-03-04 13:24:52] (step=0056684) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.090588925846214, LR: 0.0003 +[2026-03-04 13:25:00] (step=0056685) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.090784582273528, LR: 0.0003 +[2026-03-04 13:25:08] (step=0056686) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 11.09098023870084, LR: 0.0003 +[2026-03-04 13:25:15] (step=0056687) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.091175895128154, LR: 0.0003 +[2026-03-04 13:25:23] (step=0056688) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.091371551555468, LR: 0.0003 +[2026-03-04 13:25:31] (step=0056689) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 11.091567207982783, LR: 0.0003 +[2026-03-04 13:25:39] (step=0056690) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.091762864410097, LR: 0.0003 +[2026-03-04 13:25:47] (step=0056691) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.091958520837409, LR: 0.0003 +[2026-03-04 13:25:55] (step=0056692) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.092154177264723, LR: 0.0003 +[2026-03-04 13:26:03] (step=0056693) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.092349833692037, LR: 0.0003 +[2026-03-04 13:26:10] (step=0056694) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 11.09254549011935, LR: 0.0003 +[2026-03-04 13:26:18] (step=0056695) Train Loss: 0.4228, Train Steps/Sec: 0.13, Epoch: 11.092741146546665, LR: 0.0003 +[2026-03-04 13:26:26] (step=0056696) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.092936802973977, LR: 0.0003 +[2026-03-04 13:26:34] (step=0056697) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.093132459401291, LR: 0.0003 +[2026-03-04 13:26:42] (step=0056698) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 11.093328115828605, LR: 0.0003 +[2026-03-04 13:26:50] (step=0056699) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.093523772255919, LR: 0.0003 +[2026-03-04 13:26:58] (step=0056700) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.093719428683233, LR: 0.0003 +[2026-03-04 13:27:06] (step=0056701) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.093915085110545, LR: 0.0003 +[2026-03-04 13:27:13] (step=0056702) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.09411074153786, LR: 0.0003 +[2026-03-04 13:27:21] (step=0056703) Train Loss: 0.4502, Train Steps/Sec: 0.12, Epoch: 11.094306397965173, LR: 0.0003 +[2026-03-04 13:27:29] (step=0056704) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.094502054392487, LR: 0.0003 +[2026-03-04 13:27:37] (step=0056705) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.094697710819801, LR: 0.0003 +[2026-03-04 13:27:45] (step=0056706) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.094893367247114, LR: 0.0003 +[2026-03-04 13:27:53] (step=0056707) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.095089023674428, LR: 0.0003 +[2026-03-04 13:28:01] (step=0056708) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.095284680101742, LR: 0.0003 +[2026-03-04 13:28:09] (step=0056709) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.095480336529056, LR: 0.0003 +[2026-03-04 13:28:17] (step=0056710) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.095675992956368, LR: 0.0003 +[2026-03-04 13:28:24] (step=0056711) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 11.095871649383682, LR: 0.0003 +[2026-03-04 13:28:32] (step=0056712) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.096067305810996, LR: 0.0003 +[2026-03-04 13:28:40] (step=0056713) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.09626296223831, LR: 0.0003 +[2026-03-04 13:28:48] (step=0056714) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.096458618665624, LR: 0.0003 +[2026-03-04 13:28:56] (step=0056715) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.096654275092936, LR: 0.0003 +[2026-03-04 13:29:04] (step=0056716) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.09684993152025, LR: 0.0003 +[2026-03-04 13:29:12] (step=0056717) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 11.097045587947564, LR: 0.0003 +[2026-03-04 13:29:19] (step=0056718) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.097241244374878, LR: 0.0003 +[2026-03-04 13:29:27] (step=0056719) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.097436900802192, LR: 0.0003 +[2026-03-04 13:29:35] (step=0056720) Train Loss: 0.4524, Train Steps/Sec: 0.12, Epoch: 11.097632557229504, LR: 0.0003 +[2026-03-04 13:29:43] (step=0056721) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.097828213656818, LR: 0.0003 +[2026-03-04 13:29:51] (step=0056722) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.098023870084132, LR: 0.0003 +[2026-03-04 13:29:59] (step=0056723) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.098219526511446, LR: 0.0003 +[2026-03-04 13:30:07] (step=0056724) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 11.09841518293876, LR: 0.0003 +[2026-03-04 13:30:15] (step=0056725) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.098610839366073, LR: 0.0003 +[2026-03-04 13:30:23] (step=0056726) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 11.098806495793387, LR: 0.0003 +[2026-03-04 13:30:30] (step=0056727) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.0990021522207, LR: 0.0003 +[2026-03-04 13:30:38] (step=0056728) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.099197808648015, LR: 0.0003 +[2026-03-04 13:30:46] (step=0056729) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.099393465075329, LR: 0.0003 +[2026-03-04 13:30:54] (step=0056730) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.09958912150264, LR: 0.0003 +[2026-03-04 13:31:02] (step=0056731) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.099784777929955, LR: 0.0003 +[2026-03-04 13:31:10] (step=0056732) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.099980434357269, LR: 0.0003 +[2026-03-04 13:31:18] (step=0056733) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.100176090784583, LR: 0.0003 +[2026-03-04 13:31:25] (step=0056734) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.100371747211897, LR: 0.0003 +[2026-03-04 13:31:33] (step=0056735) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.100567403639209, LR: 0.0003 +[2026-03-04 13:31:41] (step=0056736) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.100763060066523, LR: 0.0003 +[2026-03-04 13:31:49] (step=0056737) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.100958716493837, LR: 0.0003 +[2026-03-04 13:31:57] (step=0056738) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.101154372921151, LR: 0.0003 +[2026-03-04 13:32:05] (step=0056739) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.101350029348463, LR: 0.0003 +[2026-03-04 13:32:13] (step=0056740) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.101545685775777, LR: 0.0003 +[2026-03-04 13:32:21] (step=0056741) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.101741342203091, LR: 0.0003 +[2026-03-04 13:32:28] (step=0056742) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.101936998630405, LR: 0.0003 +[2026-03-04 13:32:36] (step=0056743) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.10213265505772, LR: 0.0003 +[2026-03-04 13:32:44] (step=0056744) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.102328311485032, LR: 0.0003 +[2026-03-04 13:32:52] (step=0056745) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.102523967912346, LR: 0.0003 +[2026-03-04 13:33:00] (step=0056746) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.10271962433966, LR: 0.0003 +[2026-03-04 13:33:08] (step=0056747) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 11.102915280766974, LR: 0.0003 +[2026-03-04 13:33:16] (step=0056748) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.103110937194288, LR: 0.0003 +[2026-03-04 13:33:23] (step=0056749) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.1033065936216, LR: 0.0003 +[2026-03-04 13:33:31] (step=0056750) Train Loss: 0.4470, Train Steps/Sec: 0.12, Epoch: 11.103502250048914, LR: 0.0003 +[2026-03-04 13:33:39] (step=0056751) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 11.103697906476228, LR: 0.0003 +[2026-03-04 13:33:47] (step=0056752) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.103893562903542, LR: 0.0003 +[2026-03-04 13:33:55] (step=0056753) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.104089219330856, LR: 0.0003 +[2026-03-04 13:34:03] (step=0056754) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 11.104284875758168, LR: 0.0003 +[2026-03-04 13:34:11] (step=0056755) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.104480532185482, LR: 0.0003 +[2026-03-04 13:34:19] (step=0056756) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.104676188612796, LR: 0.0003 +[2026-03-04 13:34:27] (step=0056757) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.10487184504011, LR: 0.0003 +[2026-03-04 13:34:34] (step=0056758) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.105067501467424, LR: 0.0003 +[2026-03-04 13:34:42] (step=0056759) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.105263157894736, LR: 0.0003 +[2026-03-04 13:34:50] (step=0056760) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.10545881432205, LR: 0.0003 +[2026-03-04 13:34:58] (step=0056761) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.105654470749364, LR: 0.0003 +[2026-03-04 13:35:06] (step=0056762) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.105850127176678, LR: 0.0003 +[2026-03-04 13:35:14] (step=0056763) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.10604578360399, LR: 0.0003 +[2026-03-04 13:35:22] (step=0056764) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.106241440031305, LR: 0.0003 +[2026-03-04 13:35:29] (step=0056765) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 11.106437096458619, LR: 0.0003 +[2026-03-04 13:35:37] (step=0056766) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.106632752885933, LR: 0.0003 +[2026-03-04 13:35:45] (step=0056767) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.106828409313247, LR: 0.0003 +[2026-03-04 13:35:53] (step=0056768) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.107024065740559, LR: 0.0003 +[2026-03-04 13:36:01] (step=0056769) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.107219722167873, LR: 0.0003 +[2026-03-04 13:36:09] (step=0056770) Train Loss: 0.4625, Train Steps/Sec: 0.13, Epoch: 11.107415378595187, LR: 0.0003 +[2026-03-04 13:36:17] (step=0056771) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.1076110350225, LR: 0.0003 +[2026-03-04 13:36:25] (step=0056772) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.107806691449815, LR: 0.0003 +[2026-03-04 13:36:33] (step=0056773) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.108002347877127, LR: 0.0003 +[2026-03-04 13:36:40] (step=0056774) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.108198004304441, LR: 0.0003 +[2026-03-04 13:36:48] (step=0056775) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.108393660731755, LR: 0.0003 +[2026-03-04 13:36:56] (step=0056776) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.108589317159069, LR: 0.0003 +[2026-03-04 13:37:04] (step=0056777) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.108784973586383, LR: 0.0003 +[2026-03-04 13:37:12] (step=0056778) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 11.108980630013695, LR: 0.0003 +[2026-03-04 13:37:20] (step=0056779) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.10917628644101, LR: 0.0003 +[2026-03-04 13:37:28] (step=0056780) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.109371942868323, LR: 0.0003 +[2026-03-04 13:37:35] (step=0056781) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.109567599295637, LR: 0.0003 +[2026-03-04 13:37:43] (step=0056782) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.109763255722951, LR: 0.0003 +[2026-03-04 13:37:51] (step=0056783) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.109958912150264, LR: 0.0003 +[2026-03-04 13:37:59] (step=0056784) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 11.110154568577578, LR: 0.0003 +[2026-03-04 13:38:07] (step=0056785) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.110350225004892, LR: 0.0003 +[2026-03-04 13:38:15] (step=0056786) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.110545881432206, LR: 0.0003 +[2026-03-04 13:38:23] (step=0056787) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 11.11074153785952, LR: 0.0003 +[2026-03-04 13:38:30] (step=0056788) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.110937194286832, LR: 0.0003 +[2026-03-04 13:38:38] (step=0056789) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.111132850714146, LR: 0.0003 +[2026-03-04 13:38:46] (step=0056790) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 11.11132850714146, LR: 0.0003 +[2026-03-04 13:38:54] (step=0056791) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 11.111524163568774, LR: 0.0003 +[2026-03-04 13:39:02] (step=0056792) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.111719819996086, LR: 0.0003 +[2026-03-04 13:39:10] (step=0056793) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.1119154764234, LR: 0.0003 +[2026-03-04 13:39:18] (step=0056794) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.112111132850714, LR: 0.0003 +[2026-03-04 13:39:25] (step=0056795) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 11.112306789278028, LR: 0.0003 +[2026-03-04 13:39:33] (step=0056796) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.112502445705342, LR: 0.0003 +[2026-03-04 13:39:41] (step=0056797) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 11.112698102132654, LR: 0.0003 +[2026-03-04 13:39:49] (step=0056798) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.112893758559968, LR: 0.0003 +[2026-03-04 13:39:57] (step=0056799) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 11.113089414987282, LR: 0.0003 +[2026-03-04 13:40:05] (step=0056800) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.113285071414596, LR: 0.0003 +[2026-03-04 13:40:13] (step=0056801) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.11348072784191, LR: 0.0003 +[2026-03-04 13:40:20] (step=0056802) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 11.113676384269223, LR: 0.0003 +[2026-03-04 13:40:28] (step=0056803) Train Loss: 0.4500, Train Steps/Sec: 0.12, Epoch: 11.113872040696537, LR: 0.0003 +[2026-03-04 13:40:36] (step=0056804) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.11406769712385, LR: 0.0003 +[2026-03-04 13:40:44] (step=0056805) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.114263353551165, LR: 0.0003 +[2026-03-04 13:40:52] (step=0056806) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 11.114459009978479, LR: 0.0003 +[2026-03-04 13:41:00] (step=0056807) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 11.11465466640579, LR: 0.0003 +[2026-03-04 13:41:08] (step=0056808) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.114850322833105, LR: 0.0003 +[2026-03-04 13:41:16] (step=0056809) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 11.115045979260419, LR: 0.0003 +[2026-03-04 13:41:23] (step=0056810) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.115241635687733, LR: 0.0003 +[2026-03-04 13:41:31] (step=0056811) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 11.115437292115047, LR: 0.0003 +[2026-03-04 13:41:39] (step=0056812) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.115632948542359, LR: 0.0003 +[2026-03-04 13:41:47] (step=0056813) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 11.115828604969673, LR: 0.0003 +[2026-03-04 13:41:55] (step=0056814) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 11.116024261396987, LR: 0.0003 +[2026-03-04 13:42:03] (step=0056815) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 11.116219917824301, LR: 0.0003 +[2026-03-04 13:42:11] (step=0056816) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 11.116415574251613, LR: 0.0003 +[2026-03-04 13:42:18] (step=0056817) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.116611230678927, LR: 0.0003 +[2026-03-04 13:42:26] (step=0056818) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.116806887106241, LR: 0.0003 +[2026-03-04 13:42:34] (step=0056819) Train Loss: 0.4238, Train Steps/Sec: 0.13, Epoch: 11.117002543533555, LR: 0.0003 +[2026-03-04 13:42:42] (step=0056820) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.11719819996087, LR: 0.0003 +[2026-03-04 13:42:50] (step=0056821) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.117393856388182, LR: 0.0003 +[2026-03-04 13:42:58] (step=0056822) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.117589512815496, LR: 0.0003 +[2026-03-04 13:43:06] (step=0056823) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 11.11778516924281, LR: 0.0003 +[2026-03-04 13:43:13] (step=0056824) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.117980825670124, LR: 0.0003 +[2026-03-04 13:43:21] (step=0056825) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.118176482097438, LR: 0.0003 +[2026-03-04 13:43:29] (step=0056826) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.11837213852475, LR: 0.0003 +[2026-03-04 13:43:37] (step=0056827) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.118567794952064, LR: 0.0003 +[2026-03-04 13:43:45] (step=0056828) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.118763451379378, LR: 0.0003 +[2026-03-04 13:43:53] (step=0056829) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.118959107806692, LR: 0.0003 +[2026-03-04 13:44:01] (step=0056830) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.119154764234006, LR: 0.0003 +[2026-03-04 13:44:08] (step=0056831) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 11.119350420661318, LR: 0.0003 +[2026-03-04 13:44:16] (step=0056832) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.119546077088632, LR: 0.0003 +[2026-03-04 13:44:24] (step=0056833) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.119741733515946, LR: 0.0003 +[2026-03-04 13:44:32] (step=0056834) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.11993738994326, LR: 0.0003 +[2026-03-04 13:44:40] (step=0056835) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.120133046370574, LR: 0.0003 +[2026-03-04 13:44:48] (step=0056836) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.120328702797886, LR: 0.0003 +[2026-03-04 13:44:56] (step=0056837) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.1205243592252, LR: 0.0003 +[2026-03-04 13:45:03] (step=0056838) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.120720015652514, LR: 0.0003 +[2026-03-04 13:45:11] (step=0056839) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 11.120915672079828, LR: 0.0003 +[2026-03-04 13:45:19] (step=0056840) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.121111328507142, LR: 0.0003 +[2026-03-04 13:45:27] (step=0056841) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.121306984934455, LR: 0.0003 +[2026-03-04 13:45:35] (step=0056842) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 11.121502641361769, LR: 0.0003 +[2026-03-04 13:45:43] (step=0056843) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 11.121698297789083, LR: 0.0003 +[2026-03-04 13:45:50] (step=0056844) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.121893954216397, LR: 0.0003 +[2026-03-04 13:45:58] (step=0056845) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.122089610643709, LR: 0.0003 +[2026-03-04 13:46:06] (step=0056846) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.122285267071023, LR: 0.0003 +[2026-03-04 13:46:14] (step=0056847) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.122480923498337, LR: 0.0003 +[2026-03-04 13:46:22] (step=0056848) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.12267657992565, LR: 0.0003 +[2026-03-04 13:46:30] (step=0056849) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.122872236352965, LR: 0.0003 +[2026-03-04 13:46:38] (step=0056850) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.123067892780277, LR: 0.0003 +[2026-03-04 13:46:45] (step=0056851) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.123263549207591, LR: 0.0003 +[2026-03-04 13:46:53] (step=0056852) Train Loss: 0.4590, Train Steps/Sec: 0.12, Epoch: 11.123459205634905, LR: 0.0003 +[2026-03-04 13:47:01] (step=0056853) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.123654862062219, LR: 0.0003 +[2026-03-04 13:47:09] (step=0056854) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 11.123850518489533, LR: 0.0003 +[2026-03-04 13:47:17] (step=0056855) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.124046174916845, LR: 0.0003 +[2026-03-04 13:47:25] (step=0056856) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 11.12424183134416, LR: 0.0003 +[2026-03-04 13:47:33] (step=0056857) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.124437487771473, LR: 0.0003 +[2026-03-04 13:47:41] (step=0056858) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.124633144198787, LR: 0.0003 +[2026-03-04 13:47:48] (step=0056859) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.124828800626101, LR: 0.0003 +[2026-03-04 13:47:56] (step=0056860) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.125024457053414, LR: 0.0003 +[2026-03-04 13:48:04] (step=0056861) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.125220113480728, LR: 0.0003 +[2026-03-04 13:48:12] (step=0056862) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.125415769908042, LR: 0.0003 +[2026-03-04 13:48:20] (step=0056863) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.125611426335356, LR: 0.0003 +[2026-03-04 13:48:28] (step=0056864) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.12580708276267, LR: 0.0003 +[2026-03-04 13:48:36] (step=0056865) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.126002739189982, LR: 0.0003 +[2026-03-04 13:48:43] (step=0056866) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 11.126198395617296, LR: 0.0003 +[2026-03-04 13:48:51] (step=0056867) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.12639405204461, LR: 0.0003 +[2026-03-04 13:48:59] (step=0056868) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.126589708471924, LR: 0.0003 +[2026-03-04 13:49:07] (step=0056869) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.126785364899236, LR: 0.0003 +[2026-03-04 13:49:15] (step=0056870) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.12698102132655, LR: 0.0003 +[2026-03-04 13:49:23] (step=0056871) Train Loss: 0.4469, Train Steps/Sec: 0.12, Epoch: 11.127176677753864, LR: 0.0003 +[2026-03-04 13:49:31] (step=0056872) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.127372334181178, LR: 0.0003 +[2026-03-04 13:49:39] (step=0056873) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.127567990608492, LR: 0.0003 +[2026-03-04 13:49:46] (step=0056874) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.127763647035804, LR: 0.0003 +[2026-03-04 13:49:54] (step=0056875) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.127959303463118, LR: 0.0003 +[2026-03-04 13:50:02] (step=0056876) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.128154959890432, LR: 0.0003 +[2026-03-04 13:50:10] (step=0056877) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 11.128350616317746, LR: 0.0003 +[2026-03-04 13:50:18] (step=0056878) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.12854627274506, LR: 0.0003 +[2026-03-04 13:50:26] (step=0056879) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.128741929172373, LR: 0.0003 +[2026-03-04 13:50:34] (step=0056880) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.128937585599687, LR: 0.0003 +[2026-03-04 13:50:41] (step=0056881) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 11.129133242027, LR: 0.0003 +[2026-03-04 13:50:49] (step=0056882) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.129328898454315, LR: 0.0003 +[2026-03-04 13:50:57] (step=0056883) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.129524554881629, LR: 0.0003 +[2026-03-04 13:51:05] (step=0056884) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.12972021130894, LR: 0.0003 +[2026-03-04 13:51:13] (step=0056885) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 11.129915867736255, LR: 0.0003 +[2026-03-04 13:51:21] (step=0056886) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.130111524163569, LR: 0.0003 +[2026-03-04 13:51:29] (step=0056887) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.130307180590883, LR: 0.0003 +[2026-03-04 13:51:37] (step=0056888) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.130502837018197, LR: 0.0003 +[2026-03-04 13:51:44] (step=0056889) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.130698493445509, LR: 0.0003 +[2026-03-04 13:51:52] (step=0056890) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.130894149872823, LR: 0.0003 +[2026-03-04 13:52:00] (step=0056891) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.131089806300137, LR: 0.0003 +[2026-03-04 13:52:08] (step=0056892) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.131285462727451, LR: 0.0003 +[2026-03-04 13:52:16] (step=0056893) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 11.131481119154763, LR: 0.0003 +[2026-03-04 13:52:24] (step=0056894) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.131676775582077, LR: 0.0003 +[2026-03-04 13:52:32] (step=0056895) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 11.131872432009391, LR: 0.0003 +[2026-03-04 13:52:39] (step=0056896) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.132068088436705, LR: 0.0003 +[2026-03-04 13:52:47] (step=0056897) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.13226374486402, LR: 0.0003 +[2026-03-04 13:52:55] (step=0056898) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.132459401291332, LR: 0.0003 +[2026-03-04 13:53:03] (step=0056899) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 11.132655057718646, LR: 0.0003 +[2026-03-04 13:53:11] (step=0056900) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.13285071414596, LR: 0.0003 +[2026-03-04 13:53:19] (step=0056901) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 11.133046370573274, LR: 0.0003 +[2026-03-04 13:53:27] (step=0056902) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.133242027000588, LR: 0.0003 +[2026-03-04 13:53:34] (step=0056903) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 11.1334376834279, LR: 0.0003 +[2026-03-04 13:53:42] (step=0056904) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.133633339855214, LR: 0.0003 +[2026-03-04 13:53:50] (step=0056905) Train Loss: 0.4227, Train Steps/Sec: 0.13, Epoch: 11.133828996282528, LR: 0.0003 +[2026-03-04 13:53:58] (step=0056906) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 11.134024652709842, LR: 0.0003 +[2026-03-04 13:54:06] (step=0056907) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.134220309137156, LR: 0.0003 +[2026-03-04 13:54:14] (step=0056908) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.134415965564468, LR: 0.0003 +[2026-03-04 13:54:22] (step=0056909) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 11.134611621991782, LR: 0.0003 +[2026-03-04 13:54:30] (step=0056910) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.134807278419096, LR: 0.0003 +[2026-03-04 13:54:37] (step=0056911) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.13500293484641, LR: 0.0003 +[2026-03-04 13:54:45] (step=0056912) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.135198591273724, LR: 0.0003 +[2026-03-04 13:54:53] (step=0056913) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 11.135394247701036, LR: 0.0003 +[2026-03-04 13:55:01] (step=0056914) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 11.13558990412835, LR: 0.0003 +[2026-03-04 13:55:09] (step=0056915) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.135785560555664, LR: 0.0003 +[2026-03-04 13:55:17] (step=0056916) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 11.135981216982978, LR: 0.0003 +[2026-03-04 13:55:25] (step=0056917) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.136176873410292, LR: 0.0003 +[2026-03-04 13:55:33] (step=0056918) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.136372529837605, LR: 0.0003 +[2026-03-04 13:55:41] (step=0056919) Train Loss: 0.4435, Train Steps/Sec: 0.12, Epoch: 11.136568186264919, LR: 0.0003 +[2026-03-04 13:55:48] (step=0056920) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.136763842692233, LR: 0.0003 +[2026-03-04 13:55:56] (step=0056921) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.136959499119547, LR: 0.0003 +[2026-03-04 13:56:04] (step=0056922) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 11.137155155546859, LR: 0.0003 +[2026-03-04 13:56:12] (step=0056923) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 11.137350811974173, LR: 0.0003 +[2026-03-04 13:56:20] (step=0056924) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.137546468401487, LR: 0.0003 +[2026-03-04 13:56:28] (step=0056925) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 11.1377421248288, LR: 0.0003 +[2026-03-04 13:56:36] (step=0056926) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 11.137937781256115, LR: 0.0003 +[2026-03-04 13:56:43] (step=0056927) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.138133437683427, LR: 0.0003 +[2026-03-04 13:56:51] (step=0056928) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 11.138329094110741, LR: 0.0003 +[2026-03-04 13:56:59] (step=0056929) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.138524750538055, LR: 0.0003 +[2026-03-04 13:57:07] (step=0056930) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 11.13872040696537, LR: 0.0003 +[2026-03-04 13:57:15] (step=0056931) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.138916063392683, LR: 0.0003 +[2026-03-04 13:57:23] (step=0056932) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.139111719819995, LR: 0.0003 +[2026-03-04 13:57:31] (step=0056933) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.13930737624731, LR: 0.0003 +[2026-03-04 13:57:38] (step=0056934) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.139503032674623, LR: 0.0003 +[2026-03-04 13:57:46] (step=0056935) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 11.139698689101937, LR: 0.0003 +[2026-03-04 13:57:54] (step=0056936) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.139894345529251, LR: 0.0003 +[2026-03-04 13:58:02] (step=0056937) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 11.140090001956564, LR: 0.0003 +[2026-03-04 13:58:10] (step=0056938) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.140285658383878, LR: 0.0003 +[2026-03-04 13:58:18] (step=0056939) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 11.140481314811192, LR: 0.0003 +[2026-03-04 13:58:26] (step=0056940) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.140676971238506, LR: 0.0003 +[2026-03-04 13:58:33] (step=0056941) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.14087262766582, LR: 0.0003 +[2026-03-04 13:58:41] (step=0056942) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 11.141068284093132, LR: 0.0003 +[2026-03-04 13:58:49] (step=0056943) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.141263940520446, LR: 0.0003 +[2026-03-04 13:58:57] (step=0056944) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 11.14145959694776, LR: 0.0003 +[2026-03-04 13:59:05] (step=0056945) Train Loss: 0.4496, Train Steps/Sec: 0.12, Epoch: 11.141655253375074, LR: 0.0003 +[2026-03-04 13:59:13] (step=0056946) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.141850909802386, LR: 0.0003 +[2026-03-04 13:59:21] (step=0056947) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 11.1420465662297, LR: 0.0003 +[2026-03-04 13:59:29] (step=0056948) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.142242222657014, LR: 0.0003 +[2026-03-04 13:59:37] (step=0056949) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 11.142437879084328, LR: 0.0003 +[2026-03-04 13:59:44] (step=0056950) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 11.142633535511642, LR: 0.0003 +[2026-03-04 13:59:52] (step=0056951) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 11.142829191938954, LR: 0.0003 +[2026-03-04 14:00:00] (step=0056952) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.143024848366268, LR: 0.0003 +[2026-03-04 14:00:08] (step=0056953) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.143220504793582, LR: 0.0003 +[2026-03-04 14:00:16] (step=0056954) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 11.143416161220896, LR: 0.0003 +[2026-03-04 14:00:24] (step=0056955) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 11.14361181764821, LR: 0.0003 +[2026-03-04 14:00:32] (step=0056956) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 11.143807474075523, LR: 0.0003 +[2026-03-04 14:00:39] (step=0056957) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 11.144003130502837, LR: 0.0003 +[2026-03-04 14:00:47] (step=0056958) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.14419878693015, LR: 0.0003 +[2026-03-04 14:00:55] (step=0056959) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.144394443357465, LR: 0.0003 +[2026-03-04 14:01:03] (step=0056960) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.144590099784779, LR: 0.0003 +[2026-03-04 14:01:11] (step=0056961) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.14478575621209, LR: 0.0003 +[2026-03-04 14:01:19] (step=0056962) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.144981412639405, LR: 0.0003 +[2026-03-04 14:01:27] (step=0056963) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.145177069066719, LR: 0.0003 +[2026-03-04 14:01:34] (step=0056964) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.145372725494033, LR: 0.0003 +[2026-03-04 14:01:42] (step=0056965) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.145568381921347, LR: 0.0003 +[2026-03-04 14:01:50] (step=0056966) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 11.14576403834866, LR: 0.0003 +[2026-03-04 14:01:58] (step=0056967) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.145959694775973, LR: 0.0003 +[2026-03-04 14:02:06] (step=0056968) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.146155351203287, LR: 0.0003 +[2026-03-04 14:02:14] (step=0056969) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.146351007630601, LR: 0.0003 +[2026-03-04 14:02:22] (step=0056970) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 11.146546664057915, LR: 0.0003 +[2026-03-04 14:02:30] (step=0056971) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 11.146742320485227, LR: 0.0003 +[2026-03-04 14:02:37] (step=0056972) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.146937976912541, LR: 0.0003 +[2026-03-04 14:02:45] (step=0056973) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.147133633339855, LR: 0.0003 +[2026-03-04 14:02:53] (step=0056974) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.14732928976717, LR: 0.0003 +[2026-03-04 14:03:01] (step=0056975) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.147524946194482, LR: 0.0003 +[2026-03-04 14:03:09] (step=0056976) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.147720602621796, LR: 0.0003 +[2026-03-04 14:03:17] (step=0056977) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.14791625904911, LR: 0.0003 +[2026-03-04 14:03:25] (step=0056978) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.148111915476424, LR: 0.0003 +[2026-03-04 14:03:33] (step=0056979) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.148307571903738, LR: 0.0003 +[2026-03-04 14:03:40] (step=0056980) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.14850322833105, LR: 0.0003 +[2026-03-04 14:03:48] (step=0056981) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.148698884758364, LR: 0.0003 +[2026-03-04 14:03:56] (step=0056982) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 11.148894541185678, LR: 0.0003 +[2026-03-04 14:04:04] (step=0056983) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.149090197612992, LR: 0.0003 +[2026-03-04 14:04:12] (step=0056984) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.149285854040306, LR: 0.0003 +[2026-03-04 14:04:20] (step=0056985) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.149481510467618, LR: 0.0003 +[2026-03-04 14:04:28] (step=0056986) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 11.149677166894932, LR: 0.0003 +[2026-03-04 14:04:35] (step=0056987) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.149872823322246, LR: 0.0003 +[2026-03-04 14:04:43] (step=0056988) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 11.15006847974956, LR: 0.0003 +[2026-03-04 14:04:51] (step=0056989) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 11.150264136176874, LR: 0.0003 +[2026-03-04 14:04:59] (step=0056990) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 11.150459792604186, LR: 0.0003 +[2026-03-04 14:05:07] (step=0056991) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.1506554490315, LR: 0.0003 +[2026-03-04 14:05:15] (step=0056992) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.150851105458814, LR: 0.0003 +[2026-03-04 14:05:23] (step=0056993) Train Loss: 0.4396, Train Steps/Sec: 0.12, Epoch: 11.151046761886128, LR: 0.0003 +[2026-03-04 14:05:31] (step=0056994) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 11.151242418313442, LR: 0.0003 +[2026-03-04 14:05:39] (step=0056995) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.151438074740755, LR: 0.0003 +[2026-03-04 14:05:46] (step=0056996) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.151633731168069, LR: 0.0003 +[2026-03-04 14:05:54] (step=0056997) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.151829387595383, LR: 0.0003 +[2026-03-04 14:06:02] (step=0056998) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 11.152025044022697, LR: 0.0003 +[2026-03-04 14:06:10] (step=0056999) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.152220700450009, LR: 0.0003 +[2026-03-04 14:06:18] (step=0057000) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.152416356877323, LR: 0.0003 +[2026-03-04 14:06:18] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0057000/ +[2026-03-04 14:06:26] (step=0057001) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.152612013304637, LR: 0.0003 +[2026-03-04 14:06:34] (step=0057002) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.152807669731951, LR: 0.0003 +[2026-03-04 14:06:41] (step=0057003) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.153003326159265, LR: 0.0003 +[2026-03-04 14:06:49] (step=0057004) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.153198982586577, LR: 0.0003 +[2026-03-04 14:06:57] (step=0057005) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.153394639013891, LR: 0.0003 +[2026-03-04 14:07:05] (step=0057006) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.153590295441205, LR: 0.0003 +[2026-03-04 14:07:13] (step=0057007) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.15378595186852, LR: 0.0003 +[2026-03-04 14:07:21] (step=0057008) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.153981608295833, LR: 0.0003 +[2026-03-04 14:07:29] (step=0057009) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.154177264723145, LR: 0.0003 +[2026-03-04 14:07:37] (step=0057010) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.15437292115046, LR: 0.0003 +[2026-03-04 14:07:44] (step=0057011) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 11.154568577577773, LR: 0.0003 +[2026-03-04 14:07:52] (step=0057012) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 11.154764234005087, LR: 0.0003 +[2026-03-04 14:08:00] (step=0057013) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.154959890432401, LR: 0.0003 +[2026-03-04 14:08:08] (step=0057014) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.155155546859714, LR: 0.0003 +[2026-03-04 14:08:16] (step=0057015) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.155351203287028, LR: 0.0003 +[2026-03-04 14:08:24] (step=0057016) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 11.155546859714342, LR: 0.0003 +[2026-03-04 14:08:32] (step=0057017) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.155742516141656, LR: 0.0003 +[2026-03-04 14:08:39] (step=0057018) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.15593817256897, LR: 0.0003 +[2026-03-04 14:08:47] (step=0057019) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 11.156133828996282, LR: 0.0003 +[2026-03-04 14:08:55] (step=0057020) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.156329485423596, LR: 0.0003 +[2026-03-04 14:09:03] (step=0057021) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.15652514185091, LR: 0.0003 +[2026-03-04 14:09:11] (step=0057022) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 11.156720798278224, LR: 0.0003 +[2026-03-04 14:09:19] (step=0057023) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.156916454705538, LR: 0.0003 +[2026-03-04 14:09:27] (step=0057024) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.15711211113285, LR: 0.0003 +[2026-03-04 14:09:35] (step=0057025) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.157307767560164, LR: 0.0003 +[2026-03-04 14:09:42] (step=0057026) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.157503423987478, LR: 0.0003 +[2026-03-04 14:09:50] (step=0057027) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.157699080414792, LR: 0.0003 +[2026-03-04 14:09:58] (step=0057028) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.157894736842104, LR: 0.0003 +[2026-03-04 14:10:06] (step=0057029) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.158090393269418, LR: 0.0003 +[2026-03-04 14:10:14] (step=0057030) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.158286049696732, LR: 0.0003 +[2026-03-04 14:10:22] (step=0057031) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.158481706124046, LR: 0.0003 +[2026-03-04 14:10:30] (step=0057032) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 11.15867736255136, LR: 0.0003 +[2026-03-04 14:10:37] (step=0057033) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.158873018978673, LR: 0.0003 +[2026-03-04 14:10:45] (step=0057034) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.159068675405987, LR: 0.0003 +[2026-03-04 14:10:53] (step=0057035) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.1592643318333, LR: 0.0003 +[2026-03-04 14:11:01] (step=0057036) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.159459988260615, LR: 0.0003 +[2026-03-04 14:11:09] (step=0057037) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 11.159655644687929, LR: 0.0003 +[2026-03-04 14:11:17] (step=0057038) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 11.159851301115241, LR: 0.0003 +[2026-03-04 14:11:25] (step=0057039) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 11.160046957542555, LR: 0.0003 +[2026-03-04 14:11:33] (step=0057040) Train Loss: 0.4460, Train Steps/Sec: 0.12, Epoch: 11.160242613969869, LR: 0.0003 +[2026-03-04 14:11:41] (step=0057041) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 11.160438270397183, LR: 0.0003 +[2026-03-04 14:11:48] (step=0057042) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.160633926824497, LR: 0.0003 +[2026-03-04 14:11:56] (step=0057043) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 11.16082958325181, LR: 0.0003 +[2026-03-04 14:12:04] (step=0057044) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.161025239679123, LR: 0.0003 +[2026-03-04 14:12:12] (step=0057045) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 11.161220896106437, LR: 0.0003 +[2026-03-04 14:12:20] (step=0057046) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.161416552533751, LR: 0.0003 +[2026-03-04 14:12:28] (step=0057047) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.161612208961065, LR: 0.0003 +[2026-03-04 14:12:36] (step=0057048) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.161807865388377, LR: 0.0003 +[2026-03-04 14:12:43] (step=0057049) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.162003521815691, LR: 0.0003 +[2026-03-04 14:12:51] (step=0057050) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 11.162199178243005, LR: 0.0003 +[2026-03-04 14:12:59] (step=0057051) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 11.16239483467032, LR: 0.0003 +[2026-03-04 14:13:07] (step=0057052) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 11.162590491097632, LR: 0.0003 +[2026-03-04 14:13:15] (step=0057053) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.162786147524946, LR: 0.0003 +[2026-03-04 14:13:23] (step=0057054) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.16298180395226, LR: 0.0003 +[2026-03-04 14:13:31] (step=0057055) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.163177460379574, LR: 0.0003 +[2026-03-04 14:13:38] (step=0057056) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 11.163373116806888, LR: 0.0003 +[2026-03-04 14:13:46] (step=0057057) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.1635687732342, LR: 0.0003 +[2026-03-04 14:13:54] (step=0057058) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.163764429661514, LR: 0.0003 +[2026-03-04 14:14:02] (step=0057059) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.163960086088828, LR: 0.0003 +[2026-03-04 14:14:10] (step=0057060) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 11.164155742516142, LR: 0.0003 +[2026-03-04 14:14:18] (step=0057061) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.164351398943456, LR: 0.0003 +[2026-03-04 14:14:26] (step=0057062) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.164547055370768, LR: 0.0003 +[2026-03-04 14:14:33] (step=0057063) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.164742711798082, LR: 0.0003 +[2026-03-04 14:14:41] (step=0057064) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.164938368225396, LR: 0.0003 +[2026-03-04 14:14:49] (step=0057065) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.16513402465271, LR: 0.0003 +[2026-03-04 14:14:57] (step=0057066) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.165329681080024, LR: 0.0003 +[2026-03-04 14:15:05] (step=0057067) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 11.165525337507336, LR: 0.0003 +[2026-03-04 14:15:13] (step=0057068) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 11.16572099393465, LR: 0.0003 +[2026-03-04 14:15:21] (step=0057069) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.165916650361964, LR: 0.0003 +[2026-03-04 14:15:29] (step=0057070) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.166112306789278, LR: 0.0003 +[2026-03-04 14:15:36] (step=0057071) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 11.166307963216592, LR: 0.0003 +[2026-03-04 14:15:44] (step=0057072) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.166503619643905, LR: 0.0003 +[2026-03-04 14:15:52] (step=0057073) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 11.166699276071219, LR: 0.0003 +[2026-03-04 14:16:00] (step=0057074) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.166894932498533, LR: 0.0003 +[2026-03-04 14:16:08] (step=0057075) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 11.167090588925847, LR: 0.0003 +[2026-03-04 14:16:16] (step=0057076) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.16728624535316, LR: 0.0003 +[2026-03-04 14:16:24] (step=0057077) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 11.167481901780473, LR: 0.0003 +[2026-03-04 14:16:31] (step=0057078) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.167677558207787, LR: 0.0003 +[2026-03-04 14:16:39] (step=0057079) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.167873214635101, LR: 0.0003 +[2026-03-04 14:16:47] (step=0057080) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 11.168068871062415, LR: 0.0003 +[2026-03-04 14:16:55] (step=0057081) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 11.168264527489727, LR: 0.0003 +[2026-03-04 14:17:03] (step=0057082) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.168460183917041, LR: 0.0003 +[2026-03-04 14:17:11] (step=0057083) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.168655840344355, LR: 0.0003 +[2026-03-04 14:17:19] (step=0057084) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.16885149677167, LR: 0.0003 +[2026-03-04 14:17:26] (step=0057085) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.169047153198983, LR: 0.0003 +[2026-03-04 14:17:34] (step=0057086) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.169242809626295, LR: 0.0003 +[2026-03-04 14:17:42] (step=0057087) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 11.16943846605361, LR: 0.0003 +[2026-03-04 14:17:50] (step=0057088) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.169634122480923, LR: 0.0003 +[2026-03-04 14:17:58] (step=0057089) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 11.169829778908237, LR: 0.0003 +[2026-03-04 14:18:06] (step=0057090) Train Loss: 0.4345, Train Steps/Sec: 0.12, Epoch: 11.170025435335551, LR: 0.0003 +[2026-03-04 14:18:14] (step=0057091) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 11.170221091762864, LR: 0.0003 +[2026-03-04 14:18:22] (step=0057092) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.170416748190178, LR: 0.0003 +[2026-03-04 14:18:30] (step=0057093) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.170612404617492, LR: 0.0003 +[2026-03-04 14:18:37] (step=0057094) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.170808061044806, LR: 0.0003 +[2026-03-04 14:18:45] (step=0057095) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.17100371747212, LR: 0.0003 +[2026-03-04 14:18:53] (step=0057096) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 11.171199373899432, LR: 0.0003 +[2026-03-04 14:19:01] (step=0057097) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.171395030326746, LR: 0.0003 +[2026-03-04 14:19:09] (step=0057098) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.17159068675406, LR: 0.0003 +[2026-03-04 14:19:17] (step=0057099) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.171786343181374, LR: 0.0003 +[2026-03-04 14:19:25] (step=0057100) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.171981999608688, LR: 0.0003 +[2026-03-04 14:19:32] (step=0057101) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.172177656036, LR: 0.0003 +[2026-03-04 14:19:40] (step=0057102) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 11.172373312463314, LR: 0.0003 +[2026-03-04 14:19:48] (step=0057103) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.172568968890628, LR: 0.0003 +[2026-03-04 14:19:56] (step=0057104) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.172764625317942, LR: 0.0003 +[2026-03-04 14:20:04] (step=0057105) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.172960281745254, LR: 0.0003 +[2026-03-04 14:20:12] (step=0057106) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.173155938172568, LR: 0.0003 +[2026-03-04 14:20:20] (step=0057107) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.173351594599882, LR: 0.0003 +[2026-03-04 14:20:27] (step=0057108) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 11.173547251027196, LR: 0.0003 +[2026-03-04 14:20:35] (step=0057109) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 11.17374290745451, LR: 0.0003 +[2026-03-04 14:20:43] (step=0057110) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 11.173938563881823, LR: 0.0003 +[2026-03-04 14:20:51] (step=0057111) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.174134220309137, LR: 0.0003 +[2026-03-04 14:20:59] (step=0057112) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 11.17432987673645, LR: 0.0003 +[2026-03-04 14:21:07] (step=0057113) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.174525533163765, LR: 0.0003 +[2026-03-04 14:21:15] (step=0057114) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 11.174721189591079, LR: 0.0003 +[2026-03-04 14:21:22] (step=0057115) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.174916846018391, LR: 0.0003 +[2026-03-04 14:21:30] (step=0057116) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.175112502445705, LR: 0.0003 +[2026-03-04 14:21:38] (step=0057117) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.175308158873019, LR: 0.0003 +[2026-03-04 14:21:46] (step=0057118) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.175503815300333, LR: 0.0003 +[2026-03-04 14:21:54] (step=0057119) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.175699471727647, LR: 0.0003 +[2026-03-04 14:22:02] (step=0057120) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.17589512815496, LR: 0.0003 +[2026-03-04 14:22:10] (step=0057121) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 11.176090784582273, LR: 0.0003 +[2026-03-04 14:22:18] (step=0057122) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.176286441009587, LR: 0.0003 +[2026-03-04 14:22:25] (step=0057123) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 11.176482097436901, LR: 0.0003 +[2026-03-04 14:22:33] (step=0057124) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.176677753864215, LR: 0.0003 +[2026-03-04 14:22:41] (step=0057125) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.176873410291527, LR: 0.0003 +[2026-03-04 14:22:49] (step=0057126) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 11.177069066718841, LR: 0.0003 +[2026-03-04 14:22:57] (step=0057127) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 11.177264723146155, LR: 0.0003 +[2026-03-04 14:23:05] (step=0057128) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.17746037957347, LR: 0.0003 +[2026-03-04 14:23:13] (step=0057129) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.177656036000784, LR: 0.0003 +[2026-03-04 14:23:21] (step=0057130) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 11.177851692428096, LR: 0.0003 +[2026-03-04 14:23:28] (step=0057131) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 11.17804734885541, LR: 0.0003 +[2026-03-04 14:23:36] (step=0057132) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 11.178243005282724, LR: 0.0003 +[2026-03-04 14:23:44] (step=0057133) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.178438661710038, LR: 0.0003 +[2026-03-04 14:23:52] (step=0057134) Train Loss: 0.4679, Train Steps/Sec: 0.13, Epoch: 11.17863431813735, LR: 0.0003 +[2026-03-04 14:24:00] (step=0057135) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 11.178829974564664, LR: 0.0003 +[2026-03-04 14:24:08] (step=0057136) Train Loss: 0.4692, Train Steps/Sec: 0.13, Epoch: 11.179025630991978, LR: 0.0003 +[2026-03-04 14:24:16] (step=0057137) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 11.179221287419292, LR: 0.0003 +[2026-03-04 14:24:24] (step=0057138) Train Loss: 0.4504, Train Steps/Sec: 0.12, Epoch: 11.179416943846606, LR: 0.0003 +[2026-03-04 14:24:31] (step=0057139) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.179612600273918, LR: 0.0003 +[2026-03-04 14:24:39] (step=0057140) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.179808256701232, LR: 0.0003 +[2026-03-04 14:24:47] (step=0057141) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.180003913128546, LR: 0.0003 +[2026-03-04 14:24:55] (step=0057142) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.18019956955586, LR: 0.0003 +[2026-03-04 14:25:03] (step=0057143) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 11.180395225983174, LR: 0.0003 +[2026-03-04 14:25:11] (step=0057144) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.180590882410486, LR: 0.0003 +[2026-03-04 14:25:19] (step=0057145) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 11.1807865388378, LR: 0.0003 +[2026-03-04 14:25:26] (step=0057146) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.180982195265114, LR: 0.0003 +[2026-03-04 14:25:34] (step=0057147) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 11.181177851692429, LR: 0.0003 +[2026-03-04 14:25:42] (step=0057148) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.181373508119743, LR: 0.0003 +[2026-03-04 14:25:50] (step=0057149) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 11.181569164547055, LR: 0.0003 +[2026-03-04 14:25:58] (step=0057150) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 11.181764820974369, LR: 0.0003 +[2026-03-04 14:26:06] (step=0057151) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.181960477401683, LR: 0.0003 +[2026-03-04 14:26:14] (step=0057152) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.182156133828997, LR: 0.0003 +[2026-03-04 14:26:22] (step=0057153) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 11.18235179025631, LR: 0.0003 +[2026-03-04 14:26:29] (step=0057154) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.182547446683623, LR: 0.0003 +[2026-03-04 14:26:37] (step=0057155) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.182743103110937, LR: 0.0003 +[2026-03-04 14:26:45] (step=0057156) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.182938759538251, LR: 0.0003 +[2026-03-04 14:26:53] (step=0057157) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 11.183134415965565, LR: 0.0003 +[2026-03-04 14:27:01] (step=0057158) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.183330072392877, LR: 0.0003 +[2026-03-04 14:27:09] (step=0057159) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.183525728820191, LR: 0.0003 +[2026-03-04 14:27:17] (step=0057160) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 11.183721385247505, LR: 0.0003 +[2026-03-04 14:27:24] (step=0057161) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.18391704167482, LR: 0.0003 +[2026-03-04 14:27:32] (step=0057162) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 11.184112698102133, LR: 0.0003 +[2026-03-04 14:27:40] (step=0057163) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.184308354529445, LR: 0.0003 +[2026-03-04 14:27:48] (step=0057164) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 11.18450401095676, LR: 0.0003 +[2026-03-04 14:27:56] (step=0057165) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.184699667384074, LR: 0.0003 +[2026-03-04 14:28:04] (step=0057166) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.184895323811388, LR: 0.0003 +[2026-03-04 14:28:12] (step=0057167) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.185090980238702, LR: 0.0003 +[2026-03-04 14:28:20] (step=0057168) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.185286636666014, LR: 0.0003 +[2026-03-04 14:28:27] (step=0057169) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.185482293093328, LR: 0.0003 +[2026-03-04 14:28:35] (step=0057170) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.185677949520642, LR: 0.0003 +[2026-03-04 14:28:43] (step=0057171) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 11.185873605947956, LR: 0.0003 +[2026-03-04 14:28:51] (step=0057172) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.18606926237527, LR: 0.0003 +[2026-03-04 14:28:59] (step=0057173) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.186264918802582, LR: 0.0003 +[2026-03-04 14:29:07] (step=0057174) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.186460575229896, LR: 0.0003 +[2026-03-04 14:29:15] (step=0057175) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 11.18665623165721, LR: 0.0003 +[2026-03-04 14:29:23] (step=0057176) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.186851888084524, LR: 0.0003 +[2026-03-04 14:29:30] (step=0057177) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.187047544511838, LR: 0.0003 +[2026-03-04 14:29:38] (step=0057178) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.18724320093915, LR: 0.0003 +[2026-03-04 14:29:46] (step=0057179) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.187438857366464, LR: 0.0003 +[2026-03-04 14:29:54] (step=0057180) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.187634513793778, LR: 0.0003 +[2026-03-04 14:30:02] (step=0057181) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.187830170221092, LR: 0.0003 +[2026-03-04 14:30:10] (step=0057182) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.188025826648406, LR: 0.0003 +[2026-03-04 14:30:18] (step=0057183) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.188221483075719, LR: 0.0003 +[2026-03-04 14:30:25] (step=0057184) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.188417139503033, LR: 0.0003 +[2026-03-04 14:30:33] (step=0057185) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 11.188612795930347, LR: 0.0003 +[2026-03-04 14:30:41] (step=0057186) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.18880845235766, LR: 0.0003 +[2026-03-04 14:30:49] (step=0057187) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.189004108784973, LR: 0.0003 +[2026-03-04 14:30:57] (step=0057188) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.189199765212287, LR: 0.0003 +[2026-03-04 14:31:05] (step=0057189) Train Loss: 0.4501, Train Steps/Sec: 0.12, Epoch: 11.1893954216396, LR: 0.0003 +[2026-03-04 14:31:13] (step=0057190) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.189591078066915, LR: 0.0003 +[2026-03-04 14:31:21] (step=0057191) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.189786734494229, LR: 0.0003 +[2026-03-04 14:31:28] (step=0057192) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.189982390921541, LR: 0.0003 +[2026-03-04 14:31:36] (step=0057193) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 11.190178047348855, LR: 0.0003 +[2026-03-04 14:31:44] (step=0057194) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.190373703776169, LR: 0.0003 +[2026-03-04 14:31:52] (step=0057195) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.190569360203483, LR: 0.0003 +[2026-03-04 14:32:00] (step=0057196) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.190765016630797, LR: 0.0003 +[2026-03-04 14:32:08] (step=0057197) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.19096067305811, LR: 0.0003 +[2026-03-04 14:32:16] (step=0057198) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.191156329485423, LR: 0.0003 +[2026-03-04 14:32:23] (step=0057199) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.191351985912737, LR: 0.0003 +[2026-03-04 14:32:31] (step=0057200) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 11.191547642340051, LR: 0.0003 +[2026-03-04 14:32:39] (step=0057201) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.191743298767365, LR: 0.0003 +[2026-03-04 14:32:47] (step=0057202) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.191938955194678, LR: 0.0003 +[2026-03-04 14:32:55] (step=0057203) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 11.192134611621992, LR: 0.0003 +[2026-03-04 14:33:03] (step=0057204) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.192330268049306, LR: 0.0003 +[2026-03-04 14:33:11] (step=0057205) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.19252592447662, LR: 0.0003 +[2026-03-04 14:33:18] (step=0057206) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.192721580903934, LR: 0.0003 +[2026-03-04 14:33:26] (step=0057207) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.192917237331246, LR: 0.0003 +[2026-03-04 14:33:34] (step=0057208) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.19311289375856, LR: 0.0003 +[2026-03-04 14:33:42] (step=0057209) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.193308550185874, LR: 0.0003 +[2026-03-04 14:33:50] (step=0057210) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.193504206613188, LR: 0.0003 +[2026-03-04 14:33:58] (step=0057211) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.1936998630405, LR: 0.0003 +[2026-03-04 14:34:06] (step=0057212) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.193895519467814, LR: 0.0003 +[2026-03-04 14:34:14] (step=0057213) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 11.194091175895128, LR: 0.0003 +[2026-03-04 14:34:21] (step=0057214) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.194286832322442, LR: 0.0003 +[2026-03-04 14:34:29] (step=0057215) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.194482488749756, LR: 0.0003 +[2026-03-04 14:34:37] (step=0057216) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 11.194678145177068, LR: 0.0003 +[2026-03-04 14:34:45] (step=0057217) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 11.194873801604382, LR: 0.0003 +[2026-03-04 14:34:53] (step=0057218) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.195069458031696, LR: 0.0003 +[2026-03-04 14:35:01] (step=0057219) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.19526511445901, LR: 0.0003 +[2026-03-04 14:35:09] (step=0057220) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.195460770886324, LR: 0.0003 +[2026-03-04 14:35:17] (step=0057221) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.195656427313637, LR: 0.0003 +[2026-03-04 14:35:24] (step=0057222) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.19585208374095, LR: 0.0003 +[2026-03-04 14:35:32] (step=0057223) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.196047740168265, LR: 0.0003 +[2026-03-04 14:35:40] (step=0057224) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.196243396595579, LR: 0.0003 +[2026-03-04 14:35:48] (step=0057225) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.196439053022893, LR: 0.0003 +[2026-03-04 14:35:56] (step=0057226) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.196634709450205, LR: 0.0003 +[2026-03-04 14:36:04] (step=0057227) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.196830365877519, LR: 0.0003 +[2026-03-04 14:36:12] (step=0057228) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.197026022304833, LR: 0.0003 +[2026-03-04 14:36:20] (step=0057229) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.197221678732147, LR: 0.0003 +[2026-03-04 14:36:27] (step=0057230) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 11.19741733515946, LR: 0.0003 +[2026-03-04 14:36:35] (step=0057231) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 11.197612991586773, LR: 0.0003 +[2026-03-04 14:36:43] (step=0057232) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.197808648014087, LR: 0.0003 +[2026-03-04 14:36:51] (step=0057233) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.198004304441401, LR: 0.0003 +[2026-03-04 14:36:59] (step=0057234) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.198199960868715, LR: 0.0003 +[2026-03-04 14:37:07] (step=0057235) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.198395617296029, LR: 0.0003 +[2026-03-04 14:37:15] (step=0057236) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.198591273723341, LR: 0.0003 +[2026-03-04 14:37:22] (step=0057237) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.198786930150655, LR: 0.0003 +[2026-03-04 14:37:30] (step=0057238) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 11.19898258657797, LR: 0.0003 +[2026-03-04 14:37:38] (step=0057239) Train Loss: 0.4512, Train Steps/Sec: 0.12, Epoch: 11.199178243005283, LR: 0.0003 +[2026-03-04 14:37:46] (step=0057240) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.199373899432596, LR: 0.0003 +[2026-03-04 14:37:54] (step=0057241) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.19956955585991, LR: 0.0003 +[2026-03-04 14:38:02] (step=0057242) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 11.199765212287224, LR: 0.0003 +[2026-03-04 14:38:10] (step=0057243) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.199960868714538, LR: 0.0003 +[2026-03-04 14:38:18] (step=0057244) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.200156525141852, LR: 0.0003 +[2026-03-04 14:38:25] (step=0057245) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.200352181569164, LR: 0.0003 +[2026-03-04 14:38:33] (step=0057246) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.200547837996478, LR: 0.0003 +[2026-03-04 14:38:41] (step=0057247) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.200743494423792, LR: 0.0003 +[2026-03-04 14:38:49] (step=0057248) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 11.200939150851106, LR: 0.0003 +[2026-03-04 14:38:57] (step=0057249) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.20113480727842, LR: 0.0003 +[2026-03-04 14:39:05] (step=0057250) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.201330463705732, LR: 0.0003 +[2026-03-04 14:39:13] (step=0057251) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.201526120133046, LR: 0.0003 +[2026-03-04 14:39:20] (step=0057252) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.20172177656036, LR: 0.0003 +[2026-03-04 14:39:28] (step=0057253) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.201917432987674, LR: 0.0003 +[2026-03-04 14:39:36] (step=0057254) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.202113089414988, LR: 0.0003 +[2026-03-04 14:39:44] (step=0057255) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.2023087458423, LR: 0.0003 +[2026-03-04 14:39:52] (step=0057256) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 11.202504402269614, LR: 0.0003 +[2026-03-04 14:40:00] (step=0057257) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.202700058696928, LR: 0.0003 +[2026-03-04 14:40:08] (step=0057258) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.202895715124242, LR: 0.0003 +[2026-03-04 14:40:16] (step=0057259) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.203091371551556, LR: 0.0003 +[2026-03-04 14:40:23] (step=0057260) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.203287027978869, LR: 0.0003 +[2026-03-04 14:40:31] (step=0057261) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.203482684406183, LR: 0.0003 +[2026-03-04 14:40:39] (step=0057262) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 11.203678340833497, LR: 0.0003 +[2026-03-04 14:40:47] (step=0057263) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.20387399726081, LR: 0.0003 +[2026-03-04 14:40:55] (step=0057264) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.204069653688123, LR: 0.0003 +[2026-03-04 14:41:03] (step=0057265) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.204265310115437, LR: 0.0003 +[2026-03-04 14:41:11] (step=0057266) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.20446096654275, LR: 0.0003 +[2026-03-04 14:41:18] (step=0057267) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 11.204656622970065, LR: 0.0003 +[2026-03-04 14:41:26] (step=0057268) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.204852279397379, LR: 0.0003 +[2026-03-04 14:41:34] (step=0057269) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.205047935824691, LR: 0.0003 +[2026-03-04 14:41:42] (step=0057270) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.205243592252005, LR: 0.0003 +[2026-03-04 14:41:50] (step=0057271) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.205439248679319, LR: 0.0003 +[2026-03-04 14:41:58] (step=0057272) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.205634905106633, LR: 0.0003 +[2026-03-04 14:42:06] (step=0057273) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.205830561533947, LR: 0.0003 +[2026-03-04 14:42:14] (step=0057274) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 11.20602621796126, LR: 0.0003 +[2026-03-04 14:42:21] (step=0057275) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 11.206221874388573, LR: 0.0003 +[2026-03-04 14:42:29] (step=0057276) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.206417530815887, LR: 0.0003 +[2026-03-04 14:42:37] (step=0057277) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.206613187243201, LR: 0.0003 +[2026-03-04 14:42:45] (step=0057278) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.206808843670515, LR: 0.0003 +[2026-03-04 14:42:53] (step=0057279) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.207004500097828, LR: 0.0003 +[2026-03-04 14:43:01] (step=0057280) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.207200156525142, LR: 0.0003 +[2026-03-04 14:43:09] (step=0057281) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.207395812952456, LR: 0.0003 +[2026-03-04 14:43:16] (step=0057282) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.20759146937977, LR: 0.0003 +[2026-03-04 14:43:24] (step=0057283) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.207787125807084, LR: 0.0003 +[2026-03-04 14:43:32] (step=0057284) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.207982782234396, LR: 0.0003 +[2026-03-04 14:43:40] (step=0057285) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.20817843866171, LR: 0.0003 +[2026-03-04 14:43:48] (step=0057286) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.208374095089024, LR: 0.0003 +[2026-03-04 14:43:56] (step=0057287) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.208569751516338, LR: 0.0003 +[2026-03-04 14:44:04] (step=0057288) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 11.20876540794365, LR: 0.0003 +[2026-03-04 14:44:12] (step=0057289) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 11.208961064370964, LR: 0.0003 +[2026-03-04 14:44:19] (step=0057290) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.209156720798278, LR: 0.0003 +[2026-03-04 14:44:27] (step=0057291) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.209352377225592, LR: 0.0003 +[2026-03-04 14:44:35] (step=0057292) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.209548033652906, LR: 0.0003 +[2026-03-04 14:44:43] (step=0057293) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.209743690080218, LR: 0.0003 +[2026-03-04 14:44:51] (step=0057294) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 11.209939346507532, LR: 0.0003 +[2026-03-04 14:44:59] (step=0057295) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.210135002934846, LR: 0.0003 +[2026-03-04 14:45:07] (step=0057296) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.21033065936216, LR: 0.0003 +[2026-03-04 14:45:15] (step=0057297) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 11.210526315789474, LR: 0.0003 +[2026-03-04 14:45:22] (step=0057298) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.210721972216787, LR: 0.0003 +[2026-03-04 14:45:30] (step=0057299) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.2109176286441, LR: 0.0003 +[2026-03-04 14:45:38] (step=0057300) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.211113285071415, LR: 0.0003 +[2026-03-04 14:45:46] (step=0057301) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.211308941498729, LR: 0.0003 +[2026-03-04 14:45:54] (step=0057302) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.211504597926043, LR: 0.0003 +[2026-03-04 14:46:02] (step=0057303) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.211700254353355, LR: 0.0003 +[2026-03-04 14:46:10] (step=0057304) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.211895910780669, LR: 0.0003 +[2026-03-04 14:46:17] (step=0057305) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 11.212091567207983, LR: 0.0003 +[2026-03-04 14:46:25] (step=0057306) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.212287223635297, LR: 0.0003 +[2026-03-04 14:46:33] (step=0057307) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 11.21248288006261, LR: 0.0003 +[2026-03-04 14:46:41] (step=0057308) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.212678536489923, LR: 0.0003 +[2026-03-04 14:46:49] (step=0057309) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 11.212874192917237, LR: 0.0003 +[2026-03-04 14:46:57] (step=0057310) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.213069849344551, LR: 0.0003 +[2026-03-04 14:47:05] (step=0057311) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.213265505771865, LR: 0.0003 +[2026-03-04 14:47:12] (step=0057312) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.213461162199179, LR: 0.0003 +[2026-03-04 14:47:20] (step=0057313) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 11.213656818626491, LR: 0.0003 +[2026-03-04 14:47:28] (step=0057314) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.213852475053805, LR: 0.0003 +[2026-03-04 14:47:36] (step=0057315) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 11.21404813148112, LR: 0.0003 +[2026-03-04 14:47:44] (step=0057316) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 11.214243787908433, LR: 0.0003 +[2026-03-04 14:47:52] (step=0057317) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.214439444335746, LR: 0.0003 +[2026-03-04 14:48:00] (step=0057318) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.21463510076306, LR: 0.0003 +[2026-03-04 14:48:08] (step=0057319) Train Loss: 0.4461, Train Steps/Sec: 0.12, Epoch: 11.214830757190374, LR: 0.0003 +[2026-03-04 14:48:15] (step=0057320) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.215026413617688, LR: 0.0003 +[2026-03-04 14:48:23] (step=0057321) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.215222070045002, LR: 0.0003 +[2026-03-04 14:48:31] (step=0057322) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 11.215417726472314, LR: 0.0003 +[2026-03-04 14:48:39] (step=0057323) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.215613382899628, LR: 0.0003 +[2026-03-04 14:48:47] (step=0057324) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.215809039326942, LR: 0.0003 +[2026-03-04 14:48:55] (step=0057325) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.216004695754256, LR: 0.0003 +[2026-03-04 14:49:03] (step=0057326) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.21620035218157, LR: 0.0003 +[2026-03-04 14:49:10] (step=0057327) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.216396008608882, LR: 0.0003 +[2026-03-04 14:49:18] (step=0057328) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.216591665036196, LR: 0.0003 +[2026-03-04 14:49:26] (step=0057329) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.21678732146351, LR: 0.0003 +[2026-03-04 14:49:34] (step=0057330) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.216982977890824, LR: 0.0003 +[2026-03-04 14:49:42] (step=0057331) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.217178634318138, LR: 0.0003 +[2026-03-04 14:49:50] (step=0057332) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.21737429074545, LR: 0.0003 +[2026-03-04 14:49:58] (step=0057333) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.217569947172764, LR: 0.0003 +[2026-03-04 14:50:06] (step=0057334) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.217765603600078, LR: 0.0003 +[2026-03-04 14:50:13] (step=0057335) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.217961260027392, LR: 0.0003 +[2026-03-04 14:50:21] (step=0057336) Train Loss: 0.4458, Train Steps/Sec: 0.12, Epoch: 11.218156916454706, LR: 0.0003 +[2026-03-04 14:50:29] (step=0057337) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.218352572882019, LR: 0.0003 +[2026-03-04 14:50:37] (step=0057338) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.218548229309333, LR: 0.0003 +[2026-03-04 14:50:45] (step=0057339) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.218743885736647, LR: 0.0003 +[2026-03-04 14:50:53] (step=0057340) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.21893954216396, LR: 0.0003 +[2026-03-04 14:51:01] (step=0057341) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 11.219135198591273, LR: 0.0003 +[2026-03-04 14:51:09] (step=0057342) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 11.219330855018587, LR: 0.0003 +[2026-03-04 14:51:17] (step=0057343) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.2195265114459, LR: 0.0003 +[2026-03-04 14:51:24] (step=0057344) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.219722167873215, LR: 0.0003 +[2026-03-04 14:51:32] (step=0057345) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 11.219917824300529, LR: 0.0003 +[2026-03-04 14:51:40] (step=0057346) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.220113480727841, LR: 0.0003 +[2026-03-04 14:51:48] (step=0057347) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.220309137155155, LR: 0.0003 +[2026-03-04 14:51:56] (step=0057348) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.220504793582469, LR: 0.0003 +[2026-03-04 14:52:04] (step=0057349) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.220700450009783, LR: 0.0003 +[2026-03-04 14:52:12] (step=0057350) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.220896106437097, LR: 0.0003 +[2026-03-04 14:52:19] (step=0057351) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.22109176286441, LR: 0.0003 +[2026-03-04 14:52:27] (step=0057352) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 11.221287419291723, LR: 0.0003 +[2026-03-04 14:52:35] (step=0057353) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 11.221483075719037, LR: 0.0003 +[2026-03-04 14:52:43] (step=0057354) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.221678732146351, LR: 0.0003 +[2026-03-04 14:52:51] (step=0057355) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.221874388573665, LR: 0.0003 +[2026-03-04 14:52:59] (step=0057356) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.222070045000978, LR: 0.0003 +[2026-03-04 14:53:07] (step=0057357) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.222265701428292, LR: 0.0003 +[2026-03-04 14:53:15] (step=0057358) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.222461357855606, LR: 0.0003 +[2026-03-04 14:53:22] (step=0057359) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.22265701428292, LR: 0.0003 +[2026-03-04 14:53:30] (step=0057360) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.222852670710234, LR: 0.0003 +[2026-03-04 14:53:38] (step=0057361) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.223048327137546, LR: 0.0003 +[2026-03-04 14:53:46] (step=0057362) Train Loss: 0.4237, Train Steps/Sec: 0.13, Epoch: 11.22324398356486, LR: 0.0003 +[2026-03-04 14:53:54] (step=0057363) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.223439639992174, LR: 0.0003 +[2026-03-04 14:54:02] (step=0057364) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.223635296419488, LR: 0.0003 +[2026-03-04 14:54:10] (step=0057365) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.223830952846802, LR: 0.0003 +[2026-03-04 14:54:17] (step=0057366) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.224026609274114, LR: 0.0003 +[2026-03-04 14:54:25] (step=0057367) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 11.224222265701428, LR: 0.0003 +[2026-03-04 14:54:33] (step=0057368) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.224417922128742, LR: 0.0003 +[2026-03-04 14:54:41] (step=0057369) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.224613578556056, LR: 0.0003 +[2026-03-04 14:54:49] (step=0057370) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 11.224809234983368, LR: 0.0003 +[2026-03-04 14:54:57] (step=0057371) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.225004891410682, LR: 0.0003 +[2026-03-04 14:55:05] (step=0057372) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.225200547837996, LR: 0.0003 +[2026-03-04 14:55:13] (step=0057373) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.22539620426531, LR: 0.0003 +[2026-03-04 14:55:20] (step=0057374) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.225591860692624, LR: 0.0003 +[2026-03-04 14:55:28] (step=0057375) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.225787517119937, LR: 0.0003 +[2026-03-04 14:55:36] (step=0057376) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.22598317354725, LR: 0.0003 +[2026-03-04 14:55:44] (step=0057377) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.226178829974565, LR: 0.0003 +[2026-03-04 14:55:52] (step=0057378) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.226374486401879, LR: 0.0003 +[2026-03-04 14:56:00] (step=0057379) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 11.226570142829193, LR: 0.0003 +[2026-03-04 14:56:08] (step=0057380) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.226765799256505, LR: 0.0003 +[2026-03-04 14:56:15] (step=0057381) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.226961455683819, LR: 0.0003 +[2026-03-04 14:56:23] (step=0057382) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.227157112111133, LR: 0.0003 +[2026-03-04 14:56:31] (step=0057383) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.227352768538447, LR: 0.0003 +[2026-03-04 14:56:39] (step=0057384) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.22754842496576, LR: 0.0003 +[2026-03-04 14:56:47] (step=0057385) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.227744081393073, LR: 0.0003 +[2026-03-04 14:56:55] (step=0057386) Train Loss: 0.4594, Train Steps/Sec: 0.12, Epoch: 11.227939737820387, LR: 0.0003 +[2026-03-04 14:57:03] (step=0057387) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.228135394247701, LR: 0.0003 +[2026-03-04 14:57:11] (step=0057388) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.228331050675015, LR: 0.0003 +[2026-03-04 14:57:18] (step=0057389) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.22852670710233, LR: 0.0003 +[2026-03-04 14:57:26] (step=0057390) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.228722363529641, LR: 0.0003 +[2026-03-04 14:57:34] (step=0057391) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.228918019956955, LR: 0.0003 +[2026-03-04 14:57:42] (step=0057392) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.22911367638427, LR: 0.0003 +[2026-03-04 14:57:50] (step=0057393) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.229309332811583, LR: 0.0003 +[2026-03-04 14:57:58] (step=0057394) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.229504989238896, LR: 0.0003 +[2026-03-04 14:58:06] (step=0057395) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 11.22970064566621, LR: 0.0003 +[2026-03-04 14:58:14] (step=0057396) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.229896302093524, LR: 0.0003 +[2026-03-04 14:58:21] (step=0057397) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.230091958520838, LR: 0.0003 +[2026-03-04 14:58:29] (step=0057398) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.230287614948152, LR: 0.0003 +[2026-03-04 14:58:37] (step=0057399) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.230483271375464, LR: 0.0003 +[2026-03-04 14:58:45] (step=0057400) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 11.230678927802778, LR: 0.0003 +[2026-03-04 14:58:53] (step=0057401) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.230874584230092, LR: 0.0003 +[2026-03-04 14:59:01] (step=0057402) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.231070240657406, LR: 0.0003 +[2026-03-04 14:59:09] (step=0057403) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.23126589708472, LR: 0.0003 +[2026-03-04 14:59:16] (step=0057404) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.231461553512032, LR: 0.0003 +[2026-03-04 14:59:24] (step=0057405) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 11.231657209939346, LR: 0.0003 +[2026-03-04 14:59:32] (step=0057406) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.23185286636666, LR: 0.0003 +[2026-03-04 14:59:40] (step=0057407) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.232048522793974, LR: 0.0003 +[2026-03-04 14:59:48] (step=0057408) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 11.232244179221288, LR: 0.0003 +[2026-03-04 14:59:56] (step=0057409) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 11.2324398356486, LR: 0.0003 +[2026-03-04 15:00:04] (step=0057410) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 11.232635492075914, LR: 0.0003 +[2026-03-04 15:00:11] (step=0057411) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.232831148503228, LR: 0.0003 +[2026-03-04 15:00:19] (step=0057412) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.233026804930542, LR: 0.0003 +[2026-03-04 15:00:27] (step=0057413) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.233222461357856, LR: 0.0003 +[2026-03-04 15:00:35] (step=0057414) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.233418117785169, LR: 0.0003 +[2026-03-04 15:00:43] (step=0057415) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 11.233613774212483, LR: 0.0003 +[2026-03-04 15:00:51] (step=0057416) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 11.233809430639797, LR: 0.0003 +[2026-03-04 15:00:59] (step=0057417) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.23400508706711, LR: 0.0003 +[2026-03-04 15:01:06] (step=0057418) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 11.234200743494425, LR: 0.0003 +[2026-03-04 15:01:14] (step=0057419) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.234396399921737, LR: 0.0003 +[2026-03-04 15:01:22] (step=0057420) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.234592056349051, LR: 0.0003 +[2026-03-04 15:01:30] (step=0057421) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.234787712776365, LR: 0.0003 +[2026-03-04 15:01:38] (step=0057422) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 11.234983369203679, LR: 0.0003 +[2026-03-04 15:01:46] (step=0057423) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.235179025630991, LR: 0.0003 +[2026-03-04 15:01:54] (step=0057424) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.235374682058305, LR: 0.0003 +[2026-03-04 15:02:02] (step=0057425) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.23557033848562, LR: 0.0003 +[2026-03-04 15:02:09] (step=0057426) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.235765994912933, LR: 0.0003 +[2026-03-04 15:02:17] (step=0057427) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.235961651340247, LR: 0.0003 +[2026-03-04 15:02:25] (step=0057428) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 11.23615730776756, LR: 0.0003 +[2026-03-04 15:02:33] (step=0057429) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 11.236352964194873, LR: 0.0003 +[2026-03-04 15:02:41] (step=0057430) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.236548620622187, LR: 0.0003 +[2026-03-04 15:02:49] (step=0057431) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.236744277049501, LR: 0.0003 +[2026-03-04 15:02:57] (step=0057432) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.236939933476815, LR: 0.0003 +[2026-03-04 15:03:05] (step=0057433) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 11.237135589904128, LR: 0.0003 +[2026-03-04 15:03:12] (step=0057434) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.237331246331442, LR: 0.0003 +[2026-03-04 15:03:20] (step=0057435) Train Loss: 0.4429, Train Steps/Sec: 0.12, Epoch: 11.237526902758756, LR: 0.0003 +[2026-03-04 15:03:28] (step=0057436) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.23772255918607, LR: 0.0003 +[2026-03-04 15:03:36] (step=0057437) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.237918215613384, LR: 0.0003 +[2026-03-04 15:03:44] (step=0057438) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.238113872040696, LR: 0.0003 +[2026-03-04 15:03:52] (step=0057439) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.23830952846801, LR: 0.0003 +[2026-03-04 15:04:00] (step=0057440) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.238505184895324, LR: 0.0003 +[2026-03-04 15:04:08] (step=0057441) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.238700841322638, LR: 0.0003 +[2026-03-04 15:04:15] (step=0057442) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.238896497749952, LR: 0.0003 +[2026-03-04 15:04:23] (step=0057443) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 11.239092154177264, LR: 0.0003 +[2026-03-04 15:04:31] (step=0057444) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.239287810604578, LR: 0.0003 +[2026-03-04 15:04:39] (step=0057445) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.239483467031892, LR: 0.0003 +[2026-03-04 15:04:47] (step=0057446) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 11.239679123459206, LR: 0.0003 +[2026-03-04 15:04:55] (step=0057447) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.239874779886518, LR: 0.0003 +[2026-03-04 15:05:03] (step=0057448) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.240070436313832, LR: 0.0003 +[2026-03-04 15:05:11] (step=0057449) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.240266092741146, LR: 0.0003 +[2026-03-04 15:05:18] (step=0057450) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 11.24046174916846, LR: 0.0003 +[2026-03-04 15:05:26] (step=0057451) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.240657405595774, LR: 0.0003 +[2026-03-04 15:05:34] (step=0057452) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.240853062023087, LR: 0.0003 +[2026-03-04 15:05:42] (step=0057453) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.2410487184504, LR: 0.0003 +[2026-03-04 15:05:50] (step=0057454) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 11.241244374877715, LR: 0.0003 +[2026-03-04 15:05:58] (step=0057455) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 11.241440031305029, LR: 0.0003 +[2026-03-04 15:06:06] (step=0057456) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.241635687732343, LR: 0.0003 +[2026-03-04 15:06:13] (step=0057457) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.241831344159655, LR: 0.0003 +[2026-03-04 15:06:21] (step=0057458) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.242027000586969, LR: 0.0003 +[2026-03-04 15:06:29] (step=0057459) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.242222657014283, LR: 0.0003 +[2026-03-04 15:06:37] (step=0057460) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.242418313441597, LR: 0.0003 +[2026-03-04 15:06:45] (step=0057461) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 11.242613969868911, LR: 0.0003 +[2026-03-04 15:06:53] (step=0057462) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.242809626296223, LR: 0.0003 +[2026-03-04 15:07:01] (step=0057463) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.243005282723537, LR: 0.0003 +[2026-03-04 15:07:09] (step=0057464) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.243200939150851, LR: 0.0003 +[2026-03-04 15:07:16] (step=0057465) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.243396595578165, LR: 0.0003 +[2026-03-04 15:07:24] (step=0057466) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.24359225200548, LR: 0.0003 +[2026-03-04 15:07:32] (step=0057467) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.243787908432791, LR: 0.0003 +[2026-03-04 15:07:40] (step=0057468) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.243983564860105, LR: 0.0003 +[2026-03-04 15:07:48] (step=0057469) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 11.24417922128742, LR: 0.0003 +[2026-03-04 15:07:56] (step=0057470) Train Loss: 0.4387, Train Steps/Sec: 0.12, Epoch: 11.244374877714733, LR: 0.0003 +[2026-03-04 15:08:04] (step=0057471) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 11.244570534142047, LR: 0.0003 +[2026-03-04 15:08:12] (step=0057472) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.24476619056936, LR: 0.0003 +[2026-03-04 15:08:19] (step=0057473) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.244961846996674, LR: 0.0003 +[2026-03-04 15:08:27] (step=0057474) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.245157503423988, LR: 0.0003 +[2026-03-04 15:08:35] (step=0057475) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.245353159851302, LR: 0.0003 +[2026-03-04 15:08:43] (step=0057476) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.245548816278614, LR: 0.0003 +[2026-03-04 15:08:51] (step=0057477) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.245744472705928, LR: 0.0003 +[2026-03-04 15:08:59] (step=0057478) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.245940129133242, LR: 0.0003 +[2026-03-04 15:09:07] (step=0057479) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 11.246135785560556, LR: 0.0003 +[2026-03-04 15:09:14] (step=0057480) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.24633144198787, LR: 0.0003 +[2026-03-04 15:09:22] (step=0057481) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 11.246527098415182, LR: 0.0003 +[2026-03-04 15:09:30] (step=0057482) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 11.246722754842496, LR: 0.0003 +[2026-03-04 15:09:38] (step=0057483) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.24691841126981, LR: 0.0003 +[2026-03-04 15:09:46] (step=0057484) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 11.247114067697124, LR: 0.0003 +[2026-03-04 15:09:54] (step=0057485) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 11.247309724124438, LR: 0.0003 +[2026-03-04 15:10:02] (step=0057486) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.24750538055175, LR: 0.0003 +[2026-03-04 15:10:10] (step=0057487) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.247701036979064, LR: 0.0003 +[2026-03-04 15:10:17] (step=0057488) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.247896693406378, LR: 0.0003 +[2026-03-04 15:10:25] (step=0057489) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.248092349833692, LR: 0.0003 +[2026-03-04 15:10:33] (step=0057490) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.248288006261006, LR: 0.0003 +[2026-03-04 15:10:41] (step=0057491) Train Loss: 0.4236, Train Steps/Sec: 0.13, Epoch: 11.248483662688319, LR: 0.0003 +[2026-03-04 15:10:49] (step=0057492) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.248679319115633, LR: 0.0003 +[2026-03-04 15:10:57] (step=0057493) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.248874975542947, LR: 0.0003 +[2026-03-04 15:11:05] (step=0057494) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 11.24907063197026, LR: 0.0003 +[2026-03-04 15:11:13] (step=0057495) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.249266288397575, LR: 0.0003 +[2026-03-04 15:11:20] (step=0057496) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.249461944824887, LR: 0.0003 +[2026-03-04 15:11:28] (step=0057497) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.249657601252201, LR: 0.0003 +[2026-03-04 15:11:36] (step=0057498) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.249853257679515, LR: 0.0003 +[2026-03-04 15:11:44] (step=0057499) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.250048914106829, LR: 0.0003 +[2026-03-04 15:11:52] (step=0057500) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 11.250244570534141, LR: 0.0003 +[2026-03-04 15:11:52] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0057500/ +[2026-03-04 15:12:00] (step=0057501) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 11.250440226961455, LR: 0.0003 +[2026-03-04 15:12:08] (step=0057502) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.25063588338877, LR: 0.0003 +[2026-03-04 15:12:15] (step=0057503) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.250831539816083, LR: 0.0003 +[2026-03-04 15:12:23] (step=0057504) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.251027196243397, LR: 0.0003 +[2026-03-04 15:12:31] (step=0057505) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 11.25122285267071, LR: 0.0003 +[2026-03-04 15:12:39] (step=0057506) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.251418509098023, LR: 0.0003 +[2026-03-04 15:12:47] (step=0057507) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.251614165525337, LR: 0.0003 +[2026-03-04 15:12:55] (step=0057508) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.251809821952651, LR: 0.0003 +[2026-03-04 15:13:03] (step=0057509) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.252005478379965, LR: 0.0003 +[2026-03-04 15:13:11] (step=0057510) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.252201134807278, LR: 0.0003 +[2026-03-04 15:13:18] (step=0057511) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.252396791234592, LR: 0.0003 +[2026-03-04 15:13:26] (step=0057512) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 11.252592447661906, LR: 0.0003 +[2026-03-04 15:13:34] (step=0057513) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 11.25278810408922, LR: 0.0003 +[2026-03-04 15:13:42] (step=0057514) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 11.252983760516534, LR: 0.0003 +[2026-03-04 15:13:50] (step=0057515) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.253179416943846, LR: 0.0003 +[2026-03-04 15:13:58] (step=0057516) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 11.25337507337116, LR: 0.0003 +[2026-03-04 15:14:06] (step=0057517) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.253570729798474, LR: 0.0003 +[2026-03-04 15:14:13] (step=0057518) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.253766386225788, LR: 0.0003 +[2026-03-04 15:14:21] (step=0057519) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.253962042653102, LR: 0.0003 +[2026-03-04 15:14:29] (step=0057520) Train Loss: 0.4459, Train Steps/Sec: 0.12, Epoch: 11.254157699080414, LR: 0.0003 +[2026-03-04 15:14:37] (step=0057521) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.254353355507728, LR: 0.0003 +[2026-03-04 15:14:45] (step=0057522) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 11.254549011935042, LR: 0.0003 +[2026-03-04 15:14:53] (step=0057523) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.254744668362356, LR: 0.0003 +[2026-03-04 15:15:01] (step=0057524) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 11.25494032478967, LR: 0.0003 +[2026-03-04 15:15:09] (step=0057525) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.255135981216982, LR: 0.0003 +[2026-03-04 15:15:16] (step=0057526) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.255331637644296, LR: 0.0003 +[2026-03-04 15:15:24] (step=0057527) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.25552729407161, LR: 0.0003 +[2026-03-04 15:15:32] (step=0057528) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.255722950498924, LR: 0.0003 +[2026-03-04 15:15:40] (step=0057529) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.255918606926237, LR: 0.0003 +[2026-03-04 15:15:48] (step=0057530) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.25611426335355, LR: 0.0003 +[2026-03-04 15:15:56] (step=0057531) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.256309919780865, LR: 0.0003 +[2026-03-04 15:16:04] (step=0057532) Train Loss: 0.4396, Train Steps/Sec: 0.12, Epoch: 11.256505576208179, LR: 0.0003 +[2026-03-04 15:16:12] (step=0057533) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.256701232635493, LR: 0.0003 +[2026-03-04 15:16:19] (step=0057534) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.256896889062805, LR: 0.0003 +[2026-03-04 15:16:27] (step=0057535) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.257092545490119, LR: 0.0003 +[2026-03-04 15:16:35] (step=0057536) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.257288201917433, LR: 0.0003 +[2026-03-04 15:16:43] (step=0057537) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.257483858344747, LR: 0.0003 +[2026-03-04 15:16:51] (step=0057538) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.257679514772061, LR: 0.0003 +[2026-03-04 15:16:59] (step=0057539) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 11.257875171199373, LR: 0.0003 +[2026-03-04 15:17:07] (step=0057540) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.258070827626687, LR: 0.0003 +[2026-03-04 15:17:14] (step=0057541) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.258266484054001, LR: 0.0003 +[2026-03-04 15:17:22] (step=0057542) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.258462140481315, LR: 0.0003 +[2026-03-04 15:17:30] (step=0057543) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.25865779690863, LR: 0.0003 +[2026-03-04 15:17:38] (step=0057544) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.258853453335941, LR: 0.0003 +[2026-03-04 15:17:46] (step=0057545) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.259049109763255, LR: 0.0003 +[2026-03-04 15:17:54] (step=0057546) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.25924476619057, LR: 0.0003 +[2026-03-04 15:18:02] (step=0057547) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.259440422617883, LR: 0.0003 +[2026-03-04 15:18:10] (step=0057548) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.259636079045197, LR: 0.0003 +[2026-03-04 15:18:17] (step=0057549) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 11.25983173547251, LR: 0.0003 +[2026-03-04 15:18:25] (step=0057550) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.260027391899824, LR: 0.0003 +[2026-03-04 15:18:33] (step=0057551) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.260223048327138, LR: 0.0003 +[2026-03-04 15:18:41] (step=0057552) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.260418704754452, LR: 0.0003 +[2026-03-04 15:18:49] (step=0057553) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 11.260614361181764, LR: 0.0003 +[2026-03-04 15:18:57] (step=0057554) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 11.260810017609078, LR: 0.0003 +[2026-03-04 15:19:05] (step=0057555) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.261005674036392, LR: 0.0003 +[2026-03-04 15:19:13] (step=0057556) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.261201330463706, LR: 0.0003 +[2026-03-04 15:19:20] (step=0057557) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.26139698689102, LR: 0.0003 +[2026-03-04 15:19:28] (step=0057558) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 11.261592643318332, LR: 0.0003 +[2026-03-04 15:19:36] (step=0057559) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.261788299745646, LR: 0.0003 +[2026-03-04 15:19:44] (step=0057560) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.26198395617296, LR: 0.0003 +[2026-03-04 15:19:52] (step=0057561) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.262179612600274, LR: 0.0003 +[2026-03-04 15:20:00] (step=0057562) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 11.262375269027588, LR: 0.0003 +[2026-03-04 15:20:08] (step=0057563) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.2625709254549, LR: 0.0003 +[2026-03-04 15:20:15] (step=0057564) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 11.262766581882214, LR: 0.0003 +[2026-03-04 15:20:23] (step=0057565) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.262962238309528, LR: 0.0003 +[2026-03-04 15:20:31] (step=0057566) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 11.263157894736842, LR: 0.0003 +[2026-03-04 15:20:39] (step=0057567) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.263353551164156, LR: 0.0003 +[2026-03-04 15:20:47] (step=0057568) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 11.263549207591469, LR: 0.0003 +[2026-03-04 15:20:55] (step=0057569) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.263744864018783, LR: 0.0003 +[2026-03-04 15:21:03] (step=0057570) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.263940520446097, LR: 0.0003 +[2026-03-04 15:21:11] (step=0057571) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.26413617687341, LR: 0.0003 +[2026-03-04 15:21:19] (step=0057572) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.264331833300725, LR: 0.0003 +[2026-03-04 15:21:26] (step=0057573) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.264527489728037, LR: 0.0003 +[2026-03-04 15:21:34] (step=0057574) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.264723146155351, LR: 0.0003 +[2026-03-04 15:21:42] (step=0057575) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.264918802582665, LR: 0.0003 +[2026-03-04 15:21:50] (step=0057576) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.265114459009979, LR: 0.0003 +[2026-03-04 15:21:58] (step=0057577) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.265310115437293, LR: 0.0003 +[2026-03-04 15:22:06] (step=0057578) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.265505771864605, LR: 0.0003 +[2026-03-04 15:22:14] (step=0057579) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 11.26570142829192, LR: 0.0003 +[2026-03-04 15:22:22] (step=0057580) Train Loss: 0.4460, Train Steps/Sec: 0.12, Epoch: 11.265897084719233, LR: 0.0003 +[2026-03-04 15:22:30] (step=0057581) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.266092741146547, LR: 0.0003 +[2026-03-04 15:22:37] (step=0057582) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.26628839757386, LR: 0.0003 +[2026-03-04 15:22:45] (step=0057583) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.266484054001173, LR: 0.0003 +[2026-03-04 15:22:53] (step=0057584) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.266679710428487, LR: 0.0003 +[2026-03-04 15:23:01] (step=0057585) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.266875366855801, LR: 0.0003 +[2026-03-04 15:23:09] (step=0057586) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.267071023283115, LR: 0.0003 +[2026-03-04 15:23:17] (step=0057587) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.267266679710428, LR: 0.0003 +[2026-03-04 15:23:25] (step=0057588) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.267462336137742, LR: 0.0003 +[2026-03-04 15:23:33] (step=0057589) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 11.267657992565056, LR: 0.0003 +[2026-03-04 15:23:40] (step=0057590) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.26785364899237, LR: 0.0003 +[2026-03-04 15:23:48] (step=0057591) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 11.268049305419684, LR: 0.0003 +[2026-03-04 15:23:56] (step=0057592) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 11.268244961846996, LR: 0.0003 +[2026-03-04 15:24:04] (step=0057593) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 11.26844061827431, LR: 0.0003 +[2026-03-04 15:24:12] (step=0057594) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.268636274701624, LR: 0.0003 +[2026-03-04 15:24:20] (step=0057595) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.268831931128938, LR: 0.0003 +[2026-03-04 15:24:28] (step=0057596) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.269027587556252, LR: 0.0003 +[2026-03-04 15:24:35] (step=0057597) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.269223243983564, LR: 0.0003 +[2026-03-04 15:24:43] (step=0057598) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.269418900410878, LR: 0.0003 +[2026-03-04 15:24:51] (step=0057599) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.269614556838192, LR: 0.0003 +[2026-03-04 15:24:59] (step=0057600) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.269810213265506, LR: 0.0003 +[2026-03-04 15:25:07] (step=0057601) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 11.27000586969282, LR: 0.0003 +[2026-03-04 15:25:15] (step=0057602) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.270201526120132, LR: 0.0003 +[2026-03-04 15:25:23] (step=0057603) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.270397182547446, LR: 0.0003 +[2026-03-04 15:25:30] (step=0057604) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.27059283897476, LR: 0.0003 +[2026-03-04 15:25:38] (step=0057605) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.270788495402075, LR: 0.0003 +[2026-03-04 15:25:46] (step=0057606) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 11.270984151829387, LR: 0.0003 +[2026-03-04 15:25:54] (step=0057607) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 11.2711798082567, LR: 0.0003 +[2026-03-04 15:26:02] (step=0057608) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 11.271375464684015, LR: 0.0003 +[2026-03-04 15:26:10] (step=0057609) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 11.271571121111329, LR: 0.0003 +[2026-03-04 15:26:18] (step=0057610) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.271766777538643, LR: 0.0003 +[2026-03-04 15:26:26] (step=0057611) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.271962433965955, LR: 0.0003 +[2026-03-04 15:26:33] (step=0057612) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.272158090393269, LR: 0.0003 +[2026-03-04 15:26:41] (step=0057613) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 11.272353746820583, LR: 0.0003 +[2026-03-04 15:26:49] (step=0057614) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 11.272549403247897, LR: 0.0003 +[2026-03-04 15:26:57] (step=0057615) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.272745059675211, LR: 0.0003 +[2026-03-04 15:27:05] (step=0057616) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 11.272940716102523, LR: 0.0003 +[2026-03-04 15:27:13] (step=0057617) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.273136372529837, LR: 0.0003 +[2026-03-04 15:27:21] (step=0057618) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.273332028957151, LR: 0.0003 +[2026-03-04 15:27:28] (step=0057619) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 11.273527685384465, LR: 0.0003 +[2026-03-04 15:27:36] (step=0057620) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 11.27372334181178, LR: 0.0003 +[2026-03-04 15:27:44] (step=0057621) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 11.273918998239091, LR: 0.0003 +[2026-03-04 15:27:52] (step=0057622) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.274114654666405, LR: 0.0003 +[2026-03-04 15:28:00] (step=0057623) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 11.27431031109372, LR: 0.0003 +[2026-03-04 15:28:08] (step=0057624) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 11.274505967521034, LR: 0.0003 +[2026-03-04 15:28:16] (step=0057625) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 11.274701623948348, LR: 0.0003 +[2026-03-04 15:28:24] (step=0057626) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.27489728037566, LR: 0.0003 +[2026-03-04 15:28:32] (step=0057627) Train Loss: 0.4364, Train Steps/Sec: 0.12, Epoch: 11.275092936802974, LR: 0.0003 +[2026-03-04 15:28:40] (step=0057628) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.275288593230288, LR: 0.0003 +[2026-03-04 15:28:47] (step=0057629) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.275484249657602, LR: 0.0003 +[2026-03-04 15:28:55] (step=0057630) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.275679906084916, LR: 0.0003 +[2026-03-04 15:29:03] (step=0057631) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.275875562512228, LR: 0.0003 +[2026-03-04 15:29:11] (step=0057632) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.276071218939542, LR: 0.0003 +[2026-03-04 15:29:19] (step=0057633) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.276266875366856, LR: 0.0003 +[2026-03-04 15:29:27] (step=0057634) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.27646253179417, LR: 0.0003 +[2026-03-04 15:29:35] (step=0057635) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.276658188221482, LR: 0.0003 +[2026-03-04 15:29:42] (step=0057636) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.276853844648796, LR: 0.0003 +[2026-03-04 15:29:50] (step=0057637) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.27704950107611, LR: 0.0003 +[2026-03-04 15:29:58] (step=0057638) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 11.277245157503424, LR: 0.0003 +[2026-03-04 15:30:06] (step=0057639) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.277440813930738, LR: 0.0003 +[2026-03-04 15:30:14] (step=0057640) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.27763647035805, LR: 0.0003 +[2026-03-04 15:30:22] (step=0057641) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.277832126785365, LR: 0.0003 +[2026-03-04 15:30:30] (step=0057642) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.278027783212679, LR: 0.0003 +[2026-03-04 15:30:37] (step=0057643) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.278223439639993, LR: 0.0003 +[2026-03-04 15:30:45] (step=0057644) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.278419096067307, LR: 0.0003 +[2026-03-04 15:30:53] (step=0057645) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.278614752494619, LR: 0.0003 +[2026-03-04 15:31:01] (step=0057646) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.278810408921933, LR: 0.0003 +[2026-03-04 15:31:09] (step=0057647) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.279006065349247, LR: 0.0003 +[2026-03-04 15:31:17] (step=0057648) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.27920172177656, LR: 0.0003 +[2026-03-04 15:31:25] (step=0057649) Train Loss: 0.4227, Train Steps/Sec: 0.13, Epoch: 11.279397378203875, LR: 0.0003 +[2026-03-04 15:31:32] (step=0057650) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.279593034631187, LR: 0.0003 +[2026-03-04 15:31:40] (step=0057651) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.279788691058501, LR: 0.0003 +[2026-03-04 15:31:48] (step=0057652) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.279984347485815, LR: 0.0003 +[2026-03-04 15:31:56] (step=0057653) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.280180003913129, LR: 0.0003 +[2026-03-04 15:32:04] (step=0057654) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.280375660340443, LR: 0.0003 +[2026-03-04 15:32:12] (step=0057655) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.280571316767755, LR: 0.0003 +[2026-03-04 15:32:20] (step=0057656) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.28076697319507, LR: 0.0003 +[2026-03-04 15:32:27] (step=0057657) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.280962629622383, LR: 0.0003 +[2026-03-04 15:32:35] (step=0057658) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.281158286049697, LR: 0.0003 +[2026-03-04 15:32:43] (step=0057659) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 11.28135394247701, LR: 0.0003 +[2026-03-04 15:32:51] (step=0057660) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.281549598904324, LR: 0.0003 +[2026-03-04 15:32:59] (step=0057661) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.281745255331638, LR: 0.0003 +[2026-03-04 15:33:07] (step=0057662) Train Loss: 0.4242, Train Steps/Sec: 0.13, Epoch: 11.281940911758952, LR: 0.0003 +[2026-03-04 15:33:15] (step=0057663) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 11.282136568186266, LR: 0.0003 +[2026-03-04 15:33:23] (step=0057664) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.282332224613578, LR: 0.0003 +[2026-03-04 15:33:30] (step=0057665) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.282527881040892, LR: 0.0003 +[2026-03-04 15:33:38] (step=0057666) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.282723537468206, LR: 0.0003 +[2026-03-04 15:33:46] (step=0057667) Train Loss: 0.4469, Train Steps/Sec: 0.12, Epoch: 11.28291919389552, LR: 0.0003 +[2026-03-04 15:33:54] (step=0057668) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.283114850322834, LR: 0.0003 +[2026-03-04 15:34:02] (step=0057669) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.283310506750146, LR: 0.0003 +[2026-03-04 15:34:10] (step=0057670) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.28350616317746, LR: 0.0003 +[2026-03-04 15:34:18] (step=0057671) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.283701819604774, LR: 0.0003 +[2026-03-04 15:34:26] (step=0057672) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.283897476032088, LR: 0.0003 +[2026-03-04 15:34:33] (step=0057673) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.284093132459402, LR: 0.0003 +[2026-03-04 15:34:41] (step=0057674) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.284288788886714, LR: 0.0003 +[2026-03-04 15:34:49] (step=0057675) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.284484445314028, LR: 0.0003 +[2026-03-04 15:34:57] (step=0057676) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.284680101741342, LR: 0.0003 +[2026-03-04 15:35:05] (step=0057677) Train Loss: 0.4249, Train Steps/Sec: 0.13, Epoch: 11.284875758168656, LR: 0.0003 +[2026-03-04 15:35:13] (step=0057678) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 11.28507141459597, LR: 0.0003 +[2026-03-04 15:35:21] (step=0057679) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.285267071023283, LR: 0.0003 +[2026-03-04 15:35:28] (step=0057680) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.285462727450597, LR: 0.0003 +[2026-03-04 15:35:36] (step=0057681) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.28565838387791, LR: 0.0003 +[2026-03-04 15:35:44] (step=0057682) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.285854040305225, LR: 0.0003 +[2026-03-04 15:35:52] (step=0057683) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.286049696732539, LR: 0.0003 +[2026-03-04 15:36:00] (step=0057684) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.28624535315985, LR: 0.0003 +[2026-03-04 15:36:08] (step=0057685) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 11.286441009587165, LR: 0.0003 +[2026-03-04 15:36:16] (step=0057686) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 11.286636666014479, LR: 0.0003 +[2026-03-04 15:36:23] (step=0057687) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.286832322441793, LR: 0.0003 +[2026-03-04 15:36:31] (step=0057688) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.287027978869105, LR: 0.0003 +[2026-03-04 15:36:39] (step=0057689) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.287223635296419, LR: 0.0003 +[2026-03-04 15:36:47] (step=0057690) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.287419291723733, LR: 0.0003 +[2026-03-04 15:36:55] (step=0057691) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.287614948151047, LR: 0.0003 +[2026-03-04 15:37:03] (step=0057692) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 11.287810604578361, LR: 0.0003 +[2026-03-04 15:37:10] (step=0057693) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.288006261005673, LR: 0.0003 +[2026-03-04 15:37:18] (step=0057694) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.288201917432987, LR: 0.0003 +[2026-03-04 15:37:26] (step=0057695) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.288397573860301, LR: 0.0003 +[2026-03-04 15:37:34] (step=0057696) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.288593230287615, LR: 0.0003 +[2026-03-04 15:37:42] (step=0057697) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.28878888671493, LR: 0.0003 +[2026-03-04 15:37:50] (step=0057698) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 11.288984543142242, LR: 0.0003 +[2026-03-04 15:37:58] (step=0057699) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.289180199569556, LR: 0.0003 +[2026-03-04 15:38:05] (step=0057700) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.28937585599687, LR: 0.0003 +[2026-03-04 15:38:13] (step=0057701) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.289571512424184, LR: 0.0003 +[2026-03-04 15:38:21] (step=0057702) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.289767168851498, LR: 0.0003 +[2026-03-04 15:38:29] (step=0057703) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.28996282527881, LR: 0.0003 +[2026-03-04 15:38:37] (step=0057704) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 11.290158481706124, LR: 0.0003 +[2026-03-04 15:38:45] (step=0057705) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.290354138133438, LR: 0.0003 +[2026-03-04 15:38:52] (step=0057706) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.290549794560752, LR: 0.0003 +[2026-03-04 15:39:00] (step=0057707) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 11.290745450988066, LR: 0.0003 +[2026-03-04 15:39:08] (step=0057708) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.290941107415378, LR: 0.0003 +[2026-03-04 15:39:16] (step=0057709) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.291136763842692, LR: 0.0003 +[2026-03-04 15:39:24] (step=0057710) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.291332420270006, LR: 0.0003 +[2026-03-04 15:39:32] (step=0057711) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 11.29152807669732, LR: 0.0003 +[2026-03-04 15:39:40] (step=0057712) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.291723733124632, LR: 0.0003 +[2026-03-04 15:39:47] (step=0057713) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.291919389551946, LR: 0.0003 +[2026-03-04 15:39:55] (step=0057714) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.29211504597926, LR: 0.0003 +[2026-03-04 15:40:03] (step=0057715) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.292310702406574, LR: 0.0003 +[2026-03-04 15:40:11] (step=0057716) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 11.292506358833888, LR: 0.0003 +[2026-03-04 15:40:19] (step=0057717) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 11.2927020152612, LR: 0.0003 +[2026-03-04 15:40:27] (step=0057718) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.292897671688515, LR: 0.0003 +[2026-03-04 15:40:35] (step=0057719) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.293093328115829, LR: 0.0003 +[2026-03-04 15:40:43] (step=0057720) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 11.293288984543143, LR: 0.0003 +[2026-03-04 15:40:51] (step=0057721) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.293484640970457, LR: 0.0003 +[2026-03-04 15:40:58] (step=0057722) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.293680297397769, LR: 0.0003 +[2026-03-04 15:41:06] (step=0057723) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.293875953825083, LR: 0.0003 +[2026-03-04 15:41:14] (step=0057724) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 11.294071610252397, LR: 0.0003 +[2026-03-04 15:41:22] (step=0057725) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 11.29426726667971, LR: 0.0003 +[2026-03-04 15:41:30] (step=0057726) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.294462923107025, LR: 0.0003 +[2026-03-04 15:41:38] (step=0057727) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.294658579534337, LR: 0.0003 +[2026-03-04 15:41:46] (step=0057728) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.294854235961651, LR: 0.0003 +[2026-03-04 15:41:53] (step=0057729) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.295049892388965, LR: 0.0003 +[2026-03-04 15:42:01] (step=0057730) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 11.295245548816279, LR: 0.0003 +[2026-03-04 15:42:09] (step=0057731) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.295441205243593, LR: 0.0003 +[2026-03-04 15:42:17] (step=0057732) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.295636861670905, LR: 0.0003 +[2026-03-04 15:42:25] (step=0057733) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.29583251809822, LR: 0.0003 +[2026-03-04 15:42:33] (step=0057734) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 11.296028174525533, LR: 0.0003 +[2026-03-04 15:42:40] (step=0057735) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.296223830952847, LR: 0.0003 +[2026-03-04 15:42:48] (step=0057736) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 11.29641948738016, LR: 0.0003 +[2026-03-04 15:42:56] (step=0057737) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.296615143807474, LR: 0.0003 +[2026-03-04 15:43:04] (step=0057738) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.296810800234788, LR: 0.0003 +[2026-03-04 15:43:12] (step=0057739) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.297006456662102, LR: 0.0003 +[2026-03-04 15:43:20] (step=0057740) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.297202113089416, LR: 0.0003 +[2026-03-04 15:43:28] (step=0057741) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.297397769516728, LR: 0.0003 +[2026-03-04 15:43:35] (step=0057742) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.297593425944042, LR: 0.0003 +[2026-03-04 15:43:43] (step=0057743) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 11.297789082371356, LR: 0.0003 +[2026-03-04 15:43:51] (step=0057744) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 11.29798473879867, LR: 0.0003 +[2026-03-04 15:43:59] (step=0057745) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.298180395225984, LR: 0.0003 +[2026-03-04 15:44:07] (step=0057746) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.298376051653296, LR: 0.0003 +[2026-03-04 15:44:15] (step=0057747) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.29857170808061, LR: 0.0003 +[2026-03-04 15:44:23] (step=0057748) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.298767364507924, LR: 0.0003 +[2026-03-04 15:44:31] (step=0057749) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.298963020935238, LR: 0.0003 +[2026-03-04 15:44:38] (step=0057750) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.299158677362552, LR: 0.0003 +[2026-03-04 15:44:46] (step=0057751) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 11.299354333789864, LR: 0.0003 +[2026-03-04 15:44:54] (step=0057752) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.299549990217178, LR: 0.0003 +[2026-03-04 15:45:02] (step=0057753) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.299745646644492, LR: 0.0003 +[2026-03-04 15:45:10] (step=0057754) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.299941303071806, LR: 0.0003 +[2026-03-04 15:45:18] (step=0057755) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 11.30013695949912, LR: 0.0003 +[2026-03-04 15:45:26] (step=0057756) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.300332615926433, LR: 0.0003 +[2026-03-04 15:45:33] (step=0057757) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.300528272353747, LR: 0.0003 +[2026-03-04 15:45:41] (step=0057758) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.30072392878106, LR: 0.0003 +[2026-03-04 15:45:49] (step=0057759) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.300919585208375, LR: 0.0003 +[2026-03-04 15:45:57] (step=0057760) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.301115241635689, LR: 0.0003 +[2026-03-04 15:46:05] (step=0057761) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.301310898063, LR: 0.0003 +[2026-03-04 15:46:13] (step=0057762) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.301506554490315, LR: 0.0003 +[2026-03-04 15:46:21] (step=0057763) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 11.301702210917629, LR: 0.0003 +[2026-03-04 15:46:29] (step=0057764) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.301897867344943, LR: 0.0003 +[2026-03-04 15:46:36] (step=0057765) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 11.302093523772255, LR: 0.0003 +[2026-03-04 15:46:44] (step=0057766) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.302289180199569, LR: 0.0003 +[2026-03-04 15:46:52] (step=0057767) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 11.302484836626883, LR: 0.0003 +[2026-03-04 15:47:00] (step=0057768) Train Loss: 0.4449, Train Steps/Sec: 0.12, Epoch: 11.302680493054197, LR: 0.0003 +[2026-03-04 15:47:08] (step=0057769) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.302876149481511, LR: 0.0003 +[2026-03-04 15:47:16] (step=0057770) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.303071805908823, LR: 0.0003 +[2026-03-04 15:47:24] (step=0057771) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.303267462336137, LR: 0.0003 +[2026-03-04 15:47:32] (step=0057772) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.303463118763451, LR: 0.0003 +[2026-03-04 15:47:39] (step=0057773) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.303658775190765, LR: 0.0003 +[2026-03-04 15:47:47] (step=0057774) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.30385443161808, LR: 0.0003 +[2026-03-04 15:47:55] (step=0057775) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.304050088045392, LR: 0.0003 +[2026-03-04 15:48:03] (step=0057776) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.304245744472706, LR: 0.0003 +[2026-03-04 15:48:11] (step=0057777) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 11.30444140090002, LR: 0.0003 +[2026-03-04 15:48:19] (step=0057778) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.304637057327334, LR: 0.0003 +[2026-03-04 15:48:27] (step=0057779) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.304832713754648, LR: 0.0003 +[2026-03-04 15:48:34] (step=0057780) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.30502837018196, LR: 0.0003 +[2026-03-04 15:48:42] (step=0057781) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.305224026609274, LR: 0.0003 +[2026-03-04 15:48:50] (step=0057782) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.305419683036588, LR: 0.0003 +[2026-03-04 15:48:58] (step=0057783) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.305615339463902, LR: 0.0003 +[2026-03-04 15:49:06] (step=0057784) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.305810995891216, LR: 0.0003 +[2026-03-04 15:49:14] (step=0057785) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.306006652318528, LR: 0.0003 +[2026-03-04 15:49:22] (step=0057786) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.306202308745842, LR: 0.0003 +[2026-03-04 15:49:29] (step=0057787) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 11.306397965173156, LR: 0.0003 +[2026-03-04 15:49:37] (step=0057788) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.30659362160047, LR: 0.0003 +[2026-03-04 15:49:45] (step=0057789) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 11.306789278027782, LR: 0.0003 +[2026-03-04 15:49:53] (step=0057790) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 11.306984934455096, LR: 0.0003 +[2026-03-04 15:50:01] (step=0057791) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.30718059088241, LR: 0.0003 +[2026-03-04 15:50:09] (step=0057792) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.307376247309724, LR: 0.0003 +[2026-03-04 15:50:17] (step=0057793) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 11.307571903737038, LR: 0.0003 +[2026-03-04 15:50:25] (step=0057794) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 11.30776756016435, LR: 0.0003 +[2026-03-04 15:50:32] (step=0057795) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.307963216591665, LR: 0.0003 +[2026-03-04 15:50:40] (step=0057796) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.308158873018979, LR: 0.0003 +[2026-03-04 15:50:48] (step=0057797) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.308354529446293, LR: 0.0003 +[2026-03-04 15:50:56] (step=0057798) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 11.308550185873607, LR: 0.0003 +[2026-03-04 15:51:04] (step=0057799) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.308745842300919, LR: 0.0003 +[2026-03-04 15:51:12] (step=0057800) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.308941498728233, LR: 0.0003 +[2026-03-04 15:51:20] (step=0057801) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.309137155155547, LR: 0.0003 +[2026-03-04 15:51:27] (step=0057802) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.30933281158286, LR: 0.0003 +[2026-03-04 15:51:35] (step=0057803) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.309528468010175, LR: 0.0003 +[2026-03-04 15:51:43] (step=0057804) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.309724124437487, LR: 0.0003 +[2026-03-04 15:51:51] (step=0057805) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 11.309919780864801, LR: 0.0003 +[2026-03-04 15:51:59] (step=0057806) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.310115437292115, LR: 0.0003 +[2026-03-04 15:52:07] (step=0057807) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.310311093719429, LR: 0.0003 +[2026-03-04 15:52:15] (step=0057808) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 11.310506750146743, LR: 0.0003 +[2026-03-04 15:52:22] (step=0057809) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.310702406574055, LR: 0.0003 +[2026-03-04 15:52:30] (step=0057810) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.31089806300137, LR: 0.0003 +[2026-03-04 15:52:38] (step=0057811) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.311093719428683, LR: 0.0003 +[2026-03-04 15:52:46] (step=0057812) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.311289375855997, LR: 0.0003 +[2026-03-04 15:52:54] (step=0057813) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 11.311485032283311, LR: 0.0003 +[2026-03-04 15:53:02] (step=0057814) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.311680688710624, LR: 0.0003 +[2026-03-04 15:53:10] (step=0057815) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.311876345137938, LR: 0.0003 +[2026-03-04 15:53:18] (step=0057816) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.312072001565252, LR: 0.0003 +[2026-03-04 15:53:26] (step=0057817) Train Loss: 0.4253, Train Steps/Sec: 0.12, Epoch: 11.312267657992566, LR: 0.0003 +[2026-03-04 15:53:33] (step=0057818) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.312463314419878, LR: 0.0003 +[2026-03-04 15:53:41] (step=0057819) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.312658970847192, LR: 0.0003 +[2026-03-04 15:53:49] (step=0057820) Train Loss: 0.4450, Train Steps/Sec: 0.12, Epoch: 11.312854627274506, LR: 0.0003 +[2026-03-04 15:53:57] (step=0057821) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 11.31305028370182, LR: 0.0003 +[2026-03-04 15:54:05] (step=0057822) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.313245940129134, LR: 0.0003 +[2026-03-04 15:54:13] (step=0057823) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.313441596556446, LR: 0.0003 +[2026-03-04 15:54:21] (step=0057824) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.31363725298376, LR: 0.0003 +[2026-03-04 15:54:29] (step=0057825) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.313832909411074, LR: 0.0003 +[2026-03-04 15:54:37] (step=0057826) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.314028565838388, LR: 0.0003 +[2026-03-04 15:54:44] (step=0057827) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.314224222265702, LR: 0.0003 +[2026-03-04 15:54:52] (step=0057828) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 11.314419878693014, LR: 0.0003 +[2026-03-04 15:55:00] (step=0057829) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.314615535120328, LR: 0.0003 +[2026-03-04 15:55:08] (step=0057830) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.314811191547642, LR: 0.0003 +[2026-03-04 15:55:16] (step=0057831) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.315006847974956, LR: 0.0003 +[2026-03-04 15:55:24] (step=0057832) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.31520250440227, LR: 0.0003 +[2026-03-04 15:55:32] (step=0057833) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.315398160829583, LR: 0.0003 +[2026-03-04 15:55:39] (step=0057834) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.315593817256897, LR: 0.0003 +[2026-03-04 15:55:47] (step=0057835) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.31578947368421, LR: 0.0003 +[2026-03-04 15:55:55] (step=0057836) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 11.315985130111525, LR: 0.0003 +[2026-03-04 15:56:03] (step=0057837) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.316180786538839, LR: 0.0003 +[2026-03-04 15:56:11] (step=0057838) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.31637644296615, LR: 0.0003 +[2026-03-04 15:56:19] (step=0057839) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.316572099393465, LR: 0.0003 +[2026-03-04 15:56:27] (step=0057840) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 11.316767755820779, LR: 0.0003 +[2026-03-04 15:56:34] (step=0057841) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.316963412248093, LR: 0.0003 +[2026-03-04 15:56:42] (step=0057842) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 11.317159068675405, LR: 0.0003 +[2026-03-04 15:56:50] (step=0057843) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.31735472510272, LR: 0.0003 +[2026-03-04 15:56:58] (step=0057844) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.317550381530033, LR: 0.0003 +[2026-03-04 15:57:06] (step=0057845) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.317746037957347, LR: 0.0003 +[2026-03-04 15:57:14] (step=0057846) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 11.317941694384661, LR: 0.0003 +[2026-03-04 15:57:22] (step=0057847) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 11.318137350811973, LR: 0.0003 +[2026-03-04 15:57:29] (step=0057848) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.318333007239287, LR: 0.0003 +[2026-03-04 15:57:37] (step=0057849) Train Loss: 0.4247, Train Steps/Sec: 0.13, Epoch: 11.318528663666601, LR: 0.0003 +[2026-03-04 15:57:45] (step=0057850) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.318724320093915, LR: 0.0003 +[2026-03-04 15:57:53] (step=0057851) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.31891997652123, LR: 0.0003 +[2026-03-04 15:58:01] (step=0057852) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.319115632948542, LR: 0.0003 +[2026-03-04 15:58:09] (step=0057853) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.319311289375856, LR: 0.0003 +[2026-03-04 15:58:17] (step=0057854) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.31950694580317, LR: 0.0003 +[2026-03-04 15:58:24] (step=0057855) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.319702602230484, LR: 0.0003 +[2026-03-04 15:58:32] (step=0057856) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.319898258657798, LR: 0.0003 +[2026-03-04 15:58:40] (step=0057857) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.32009391508511, LR: 0.0003 +[2026-03-04 15:58:48] (step=0057858) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.320289571512424, LR: 0.0003 +[2026-03-04 15:58:56] (step=0057859) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.320485227939738, LR: 0.0003 +[2026-03-04 15:59:04] (step=0057860) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 11.320680884367052, LR: 0.0003 +[2026-03-04 15:59:12] (step=0057861) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 11.320876540794366, LR: 0.0003 +[2026-03-04 15:59:19] (step=0057862) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.321072197221678, LR: 0.0003 +[2026-03-04 15:59:27] (step=0057863) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.321267853648992, LR: 0.0003 +[2026-03-04 15:59:35] (step=0057864) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 11.321463510076306, LR: 0.0003 +[2026-03-04 15:59:43] (step=0057865) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 11.32165916650362, LR: 0.0003 +[2026-03-04 15:59:51] (step=0057866) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.321854822930934, LR: 0.0003 +[2026-03-04 15:59:59] (step=0057867) Train Loss: 0.4366, Train Steps/Sec: 0.12, Epoch: 11.322050479358246, LR: 0.0003 +[2026-03-04 16:00:07] (step=0057868) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.32224613578556, LR: 0.0003 +[2026-03-04 16:00:15] (step=0057869) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.322441792212874, LR: 0.0003 +[2026-03-04 16:00:22] (step=0057870) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.322637448640188, LR: 0.0003 +[2026-03-04 16:00:30] (step=0057871) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.3228331050675, LR: 0.0003 +[2026-03-04 16:00:38] (step=0057872) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 11.323028761494815, LR: 0.0003 +[2026-03-04 16:00:46] (step=0057873) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.323224417922129, LR: 0.0003 +[2026-03-04 16:00:54] (step=0057874) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 11.323420074349443, LR: 0.0003 +[2026-03-04 16:01:02] (step=0057875) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.323615730776757, LR: 0.0003 +[2026-03-04 16:01:10] (step=0057876) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.323811387204069, LR: 0.0003 +[2026-03-04 16:01:18] (step=0057877) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.324007043631383, LR: 0.0003 +[2026-03-04 16:01:25] (step=0057878) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.324202700058697, LR: 0.0003 +[2026-03-04 16:01:33] (step=0057879) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.324398356486011, LR: 0.0003 +[2026-03-04 16:01:41] (step=0057880) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.324594012913325, LR: 0.0003 +[2026-03-04 16:01:49] (step=0057881) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.324789669340637, LR: 0.0003 +[2026-03-04 16:01:57] (step=0057882) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.324985325767951, LR: 0.0003 +[2026-03-04 16:02:05] (step=0057883) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.325180982195265, LR: 0.0003 +[2026-03-04 16:02:13] (step=0057884) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 11.32537663862258, LR: 0.0003 +[2026-03-04 16:02:20] (step=0057885) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.325572295049893, LR: 0.0003 +[2026-03-04 16:02:28] (step=0057886) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 11.325767951477205, LR: 0.0003 +[2026-03-04 16:02:36] (step=0057887) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 11.32596360790452, LR: 0.0003 +[2026-03-04 16:02:44] (step=0057888) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 11.326159264331833, LR: 0.0003 +[2026-03-04 16:02:52] (step=0057889) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.326354920759147, LR: 0.0003 +[2026-03-04 16:03:00] (step=0057890) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.326550577186461, LR: 0.0003 +[2026-03-04 16:03:08] (step=0057891) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 11.326746233613774, LR: 0.0003 +[2026-03-04 16:03:15] (step=0057892) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 11.326941890041088, LR: 0.0003 +[2026-03-04 16:03:23] (step=0057893) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.327137546468402, LR: 0.0003 +[2026-03-04 16:03:31] (step=0057894) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.327333202895716, LR: 0.0003 +[2026-03-04 16:03:39] (step=0057895) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 11.327528859323028, LR: 0.0003 +[2026-03-04 16:03:47] (step=0057896) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.327724515750342, LR: 0.0003 +[2026-03-04 16:03:55] (step=0057897) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.327920172177656, LR: 0.0003 +[2026-03-04 16:04:03] (step=0057898) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.32811582860497, LR: 0.0003 +[2026-03-04 16:04:10] (step=0057899) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 11.328311485032284, LR: 0.0003 +[2026-03-04 16:04:18] (step=0057900) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.328507141459596, LR: 0.0003 +[2026-03-04 16:04:26] (step=0057901) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.32870279788691, LR: 0.0003 +[2026-03-04 16:04:34] (step=0057902) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.328898454314224, LR: 0.0003 +[2026-03-04 16:04:42] (step=0057903) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 11.329094110741538, LR: 0.0003 +[2026-03-04 16:04:50] (step=0057904) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.329289767168852, LR: 0.0003 +[2026-03-04 16:04:58] (step=0057905) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.329485423596164, LR: 0.0003 +[2026-03-04 16:05:06] (step=0057906) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.329681080023478, LR: 0.0003 +[2026-03-04 16:05:13] (step=0057907) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 11.329876736450792, LR: 0.0003 +[2026-03-04 16:05:21] (step=0057908) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 11.330072392878106, LR: 0.0003 +[2026-03-04 16:05:29] (step=0057909) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.33026804930542, LR: 0.0003 +[2026-03-04 16:05:37] (step=0057910) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.330463705732733, LR: 0.0003 +[2026-03-04 16:05:45] (step=0057911) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 11.330659362160047, LR: 0.0003 +[2026-03-04 16:05:53] (step=0057912) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.33085501858736, LR: 0.0003 +[2026-03-04 16:06:01] (step=0057913) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.331050675014675, LR: 0.0003 +[2026-03-04 16:06:08] (step=0057914) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.331246331441989, LR: 0.0003 +[2026-03-04 16:06:16] (step=0057915) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.331441987869301, LR: 0.0003 +[2026-03-04 16:06:24] (step=0057916) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.331637644296615, LR: 0.0003 +[2026-03-04 16:06:32] (step=0057917) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.331833300723929, LR: 0.0003 +[2026-03-04 16:06:40] (step=0057918) Train Loss: 0.4616, Train Steps/Sec: 0.12, Epoch: 11.332028957151243, LR: 0.0003 +[2026-03-04 16:06:48] (step=0057919) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.332224613578557, LR: 0.0003 +[2026-03-04 16:06:56] (step=0057920) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 11.33242027000587, LR: 0.0003 +[2026-03-04 16:07:04] (step=0057921) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.332615926433183, LR: 0.0003 +[2026-03-04 16:07:12] (step=0057922) Train Loss: 0.4482, Train Steps/Sec: 0.12, Epoch: 11.332811582860497, LR: 0.0003 +[2026-03-04 16:07:20] (step=0057923) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.333007239287811, LR: 0.0003 +[2026-03-04 16:07:27] (step=0057924) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.333202895715123, LR: 0.0003 +[2026-03-04 16:07:35] (step=0057925) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.333398552142437, LR: 0.0003 +[2026-03-04 16:07:43] (step=0057926) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.333594208569751, LR: 0.0003 +[2026-03-04 16:07:51] (step=0057927) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 11.333789864997065, LR: 0.0003 +[2026-03-04 16:07:59] (step=0057928) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.33398552142438, LR: 0.0003 +[2026-03-04 16:08:07] (step=0057929) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.334181177851692, LR: 0.0003 +[2026-03-04 16:08:15] (step=0057930) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.334376834279006, LR: 0.0003 +[2026-03-04 16:08:22] (step=0057931) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.33457249070632, LR: 0.0003 +[2026-03-04 16:08:30] (step=0057932) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.334768147133634, LR: 0.0003 +[2026-03-04 16:08:38] (step=0057933) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 11.334963803560948, LR: 0.0003 +[2026-03-04 16:08:46] (step=0057934) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.33515945998826, LR: 0.0003 +[2026-03-04 16:08:54] (step=0057935) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.335355116415574, LR: 0.0003 +[2026-03-04 16:09:02] (step=0057936) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.335550772842888, LR: 0.0003 +[2026-03-04 16:09:10] (step=0057937) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 11.335746429270202, LR: 0.0003 +[2026-03-04 16:09:18] (step=0057938) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.335942085697516, LR: 0.0003 +[2026-03-04 16:09:25] (step=0057939) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.336137742124828, LR: 0.0003 +[2026-03-04 16:09:33] (step=0057940) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.336333398552142, LR: 0.0003 +[2026-03-04 16:09:41] (step=0057941) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.336529054979456, LR: 0.0003 +[2026-03-04 16:09:49] (step=0057942) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.33672471140677, LR: 0.0003 +[2026-03-04 16:09:57] (step=0057943) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 11.336920367834084, LR: 0.0003 +[2026-03-04 16:10:05] (step=0057944) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.337116024261396, LR: 0.0003 +[2026-03-04 16:10:13] (step=0057945) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.33731168068871, LR: 0.0003 +[2026-03-04 16:10:20] (step=0057946) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.337507337116024, LR: 0.0003 +[2026-03-04 16:10:28] (step=0057947) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.337702993543338, LR: 0.0003 +[2026-03-04 16:10:36] (step=0057948) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.33789864997065, LR: 0.0003 +[2026-03-04 16:10:44] (step=0057949) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.338094306397965, LR: 0.0003 +[2026-03-04 16:10:52] (step=0057950) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 11.338289962825279, LR: 0.0003 +[2026-03-04 16:11:00] (step=0057951) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.338485619252593, LR: 0.0003 +[2026-03-04 16:11:08] (step=0057952) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.338681275679907, LR: 0.0003 +[2026-03-04 16:11:15] (step=0057953) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 11.338876932107219, LR: 0.0003 +[2026-03-04 16:11:23] (step=0057954) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.339072588534533, LR: 0.0003 +[2026-03-04 16:11:31] (step=0057955) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.339268244961847, LR: 0.0003 +[2026-03-04 16:11:39] (step=0057956) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.339463901389161, LR: 0.0003 +[2026-03-04 16:11:47] (step=0057957) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.339659557816475, LR: 0.0003 +[2026-03-04 16:11:55] (step=0057958) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.339855214243787, LR: 0.0003 +[2026-03-04 16:12:03] (step=0057959) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.340050870671101, LR: 0.0003 +[2026-03-04 16:12:10] (step=0057960) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 11.340246527098415, LR: 0.0003 +[2026-03-04 16:12:18] (step=0057961) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.34044218352573, LR: 0.0003 +[2026-03-04 16:12:26] (step=0057962) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.340637839953043, LR: 0.0003 +[2026-03-04 16:12:34] (step=0057963) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.340833496380355, LR: 0.0003 +[2026-03-04 16:12:42] (step=0057964) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.34102915280767, LR: 0.0003 +[2026-03-04 16:12:50] (step=0057965) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.341224809234983, LR: 0.0003 +[2026-03-04 16:12:58] (step=0057966) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.341420465662297, LR: 0.0003 +[2026-03-04 16:13:06] (step=0057967) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.341616122089611, LR: 0.0003 +[2026-03-04 16:13:13] (step=0057968) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.341811778516924, LR: 0.0003 +[2026-03-04 16:13:21] (step=0057969) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 11.342007434944238, LR: 0.0003 +[2026-03-04 16:13:29] (step=0057970) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 11.342203091371552, LR: 0.0003 +[2026-03-04 16:13:37] (step=0057971) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 11.342398747798866, LR: 0.0003 +[2026-03-04 16:13:45] (step=0057972) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.34259440422618, LR: 0.0003 +[2026-03-04 16:13:53] (step=0057973) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 11.342790060653492, LR: 0.0003 +[2026-03-04 16:14:01] (step=0057974) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.342985717080806, LR: 0.0003 +[2026-03-04 16:14:08] (step=0057975) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 11.34318137350812, LR: 0.0003 +[2026-03-04 16:14:16] (step=0057976) Train Loss: 0.4381, Train Steps/Sec: 0.12, Epoch: 11.343377029935434, LR: 0.0003 +[2026-03-04 16:14:24] (step=0057977) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.343572686362746, LR: 0.0003 +[2026-03-04 16:14:32] (step=0057978) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.34376834279006, LR: 0.0003 +[2026-03-04 16:14:40] (step=0057979) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.343963999217374, LR: 0.0003 +[2026-03-04 16:14:48] (step=0057980) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.344159655644688, LR: 0.0003 +[2026-03-04 16:14:56] (step=0057981) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 11.344355312072002, LR: 0.0003 +[2026-03-04 16:15:04] (step=0057982) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.344550968499314, LR: 0.0003 +[2026-03-04 16:15:12] (step=0057983) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.344746624926628, LR: 0.0003 +[2026-03-04 16:15:19] (step=0057984) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.344942281353942, LR: 0.0003 +[2026-03-04 16:15:27] (step=0057985) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.345137937781256, LR: 0.0003 +[2026-03-04 16:15:35] (step=0057986) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.34533359420857, LR: 0.0003 +[2026-03-04 16:15:43] (step=0057987) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.345529250635883, LR: 0.0003 +[2026-03-04 16:15:51] (step=0057988) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.345724907063197, LR: 0.0003 +[2026-03-04 16:15:59] (step=0057989) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.34592056349051, LR: 0.0003 +[2026-03-04 16:16:07] (step=0057990) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 11.346116219917825, LR: 0.0003 +[2026-03-04 16:16:14] (step=0057991) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.346311876345139, LR: 0.0003 +[2026-03-04 16:16:22] (step=0057992) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.346507532772451, LR: 0.0003 +[2026-03-04 16:16:30] (step=0057993) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.346703189199765, LR: 0.0003 +[2026-03-04 16:16:38] (step=0057994) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 11.346898845627079, LR: 0.0003 +[2026-03-04 16:16:46] (step=0057995) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.347094502054393, LR: 0.0003 +[2026-03-04 16:16:54] (step=0057996) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.347290158481707, LR: 0.0003 +[2026-03-04 16:17:02] (step=0057997) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.34748581490902, LR: 0.0003 +[2026-03-04 16:17:09] (step=0057998) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 11.347681471336333, LR: 0.0003 +[2026-03-04 16:17:17] (step=0057999) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.347877127763647, LR: 0.0003 +[2026-03-04 16:17:25] (step=0058000) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.348072784190961, LR: 0.0003 +[2026-03-04 16:17:25] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0058000/ +[2026-03-04 16:17:33] (step=0058001) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.348268440618273, LR: 0.0003 +[2026-03-04 16:17:41] (step=0058002) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.348464097045587, LR: 0.0003 +[2026-03-04 16:17:49] (step=0058003) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 11.348659753472901, LR: 0.0003 +[2026-03-04 16:17:57] (step=0058004) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.348855409900215, LR: 0.0003 +[2026-03-04 16:18:05] (step=0058005) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.34905106632753, LR: 0.0003 +[2026-03-04 16:18:12] (step=0058006) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 11.349246722754842, LR: 0.0003 +[2026-03-04 16:18:20] (step=0058007) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.349442379182156, LR: 0.0003 +[2026-03-04 16:18:28] (step=0058008) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.34963803560947, LR: 0.0003 +[2026-03-04 16:18:36] (step=0058009) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.349833692036784, LR: 0.0003 +[2026-03-04 16:18:44] (step=0058010) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 11.350029348464098, LR: 0.0003 +[2026-03-04 16:18:52] (step=0058011) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.35022500489141, LR: 0.0003 +[2026-03-04 16:19:00] (step=0058012) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.350420661318724, LR: 0.0003 +[2026-03-04 16:19:08] (step=0058013) Train Loss: 0.4424, Train Steps/Sec: 0.12, Epoch: 11.350616317746038, LR: 0.0003 +[2026-03-04 16:19:15] (step=0058014) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 11.350811974173352, LR: 0.0003 +[2026-03-04 16:19:23] (step=0058015) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.351007630600666, LR: 0.0003 +[2026-03-04 16:19:31] (step=0058016) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 11.351203287027978, LR: 0.0003 +[2026-03-04 16:19:39] (step=0058017) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.351398943455292, LR: 0.0003 +[2026-03-04 16:19:47] (step=0058018) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 11.351594599882606, LR: 0.0003 +[2026-03-04 16:19:55] (step=0058019) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.35179025630992, LR: 0.0003 +[2026-03-04 16:20:03] (step=0058020) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.351985912737234, LR: 0.0003 +[2026-03-04 16:20:10] (step=0058021) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.352181569164546, LR: 0.0003 +[2026-03-04 16:20:18] (step=0058022) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.35237722559186, LR: 0.0003 +[2026-03-04 16:20:26] (step=0058023) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.352572882019174, LR: 0.0003 +[2026-03-04 16:20:34] (step=0058024) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.352768538446488, LR: 0.0003 +[2026-03-04 16:20:42] (step=0058025) Train Loss: 0.4423, Train Steps/Sec: 0.12, Epoch: 11.352964194873802, LR: 0.0003 +[2026-03-04 16:20:50] (step=0058026) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 11.353159851301115, LR: 0.0003 +[2026-03-04 16:20:58] (step=0058027) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 11.353355507728429, LR: 0.0003 +[2026-03-04 16:21:06] (step=0058028) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.353551164155743, LR: 0.0003 +[2026-03-04 16:21:14] (step=0058029) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.353746820583057, LR: 0.0003 +[2026-03-04 16:21:21] (step=0058030) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.353942477010369, LR: 0.0003 +[2026-03-04 16:21:29] (step=0058031) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 11.354138133437683, LR: 0.0003 +[2026-03-04 16:21:37] (step=0058032) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.354333789864997, LR: 0.0003 +[2026-03-04 16:21:45] (step=0058033) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.354529446292311, LR: 0.0003 +[2026-03-04 16:21:53] (step=0058034) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.354725102719625, LR: 0.0003 +[2026-03-04 16:22:01] (step=0058035) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.354920759146937, LR: 0.0003 +[2026-03-04 16:22:09] (step=0058036) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.355116415574251, LR: 0.0003 +[2026-03-04 16:22:16] (step=0058037) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.355312072001565, LR: 0.0003 +[2026-03-04 16:22:24] (step=0058038) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.35550772842888, LR: 0.0003 +[2026-03-04 16:22:32] (step=0058039) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 11.355703384856193, LR: 0.0003 +[2026-03-04 16:22:40] (step=0058040) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.355899041283505, LR: 0.0003 +[2026-03-04 16:22:48] (step=0058041) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.35609469771082, LR: 0.0003 +[2026-03-04 16:22:56] (step=0058042) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.356290354138133, LR: 0.0003 +[2026-03-04 16:23:04] (step=0058043) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.356486010565447, LR: 0.0003 +[2026-03-04 16:23:12] (step=0058044) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.356681666992761, LR: 0.0003 +[2026-03-04 16:23:19] (step=0058045) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 11.356877323420074, LR: 0.0003 +[2026-03-04 16:23:27] (step=0058046) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.357072979847388, LR: 0.0003 +[2026-03-04 16:23:35] (step=0058047) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 11.357268636274702, LR: 0.0003 +[2026-03-04 16:23:43] (step=0058048) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.357464292702016, LR: 0.0003 +[2026-03-04 16:23:51] (step=0058049) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 11.35765994912933, LR: 0.0003 +[2026-03-04 16:23:59] (step=0058050) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.357855605556642, LR: 0.0003 +[2026-03-04 16:24:07] (step=0058051) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.358051261983956, LR: 0.0003 +[2026-03-04 16:24:14] (step=0058052) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.35824691841127, LR: 0.0003 +[2026-03-04 16:24:22] (step=0058053) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 11.358442574838584, LR: 0.0003 +[2026-03-04 16:24:30] (step=0058054) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.358638231265896, LR: 0.0003 +[2026-03-04 16:24:38] (step=0058055) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.35883388769321, LR: 0.0003 +[2026-03-04 16:24:46] (step=0058056) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.359029544120524, LR: 0.0003 +[2026-03-04 16:24:54] (step=0058057) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.359225200547838, LR: 0.0003 +[2026-03-04 16:25:02] (step=0058058) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.359420856975152, LR: 0.0003 +[2026-03-04 16:25:09] (step=0058059) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 11.359616513402464, LR: 0.0003 +[2026-03-04 16:25:17] (step=0058060) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.359812169829778, LR: 0.0003 +[2026-03-04 16:25:25] (step=0058061) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.360007826257092, LR: 0.0003 +[2026-03-04 16:25:33] (step=0058062) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.360203482684406, LR: 0.0003 +[2026-03-04 16:25:41] (step=0058063) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 11.36039913911172, LR: 0.0003 +[2026-03-04 16:25:49] (step=0058064) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.360594795539033, LR: 0.0003 +[2026-03-04 16:25:57] (step=0058065) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.360790451966347, LR: 0.0003 +[2026-03-04 16:26:05] (step=0058066) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.36098610839366, LR: 0.0003 +[2026-03-04 16:26:13] (step=0058067) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.361181764820975, LR: 0.0003 +[2026-03-04 16:26:20] (step=0058068) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.361377421248289, LR: 0.0003 +[2026-03-04 16:26:28] (step=0058069) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.361573077675601, LR: 0.0003 +[2026-03-04 16:26:36] (step=0058070) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.361768734102915, LR: 0.0003 +[2026-03-04 16:26:44] (step=0058071) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.361964390530229, LR: 0.0003 +[2026-03-04 16:26:52] (step=0058072) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 11.362160046957543, LR: 0.0003 +[2026-03-04 16:27:00] (step=0058073) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.362355703384857, LR: 0.0003 +[2026-03-04 16:27:07] (step=0058074) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.36255135981217, LR: 0.0003 +[2026-03-04 16:27:16] (step=0058075) Train Loss: 0.4434, Train Steps/Sec: 0.12, Epoch: 11.362747016239483, LR: 0.0003 +[2026-03-04 16:27:23] (step=0058076) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 11.362942672666797, LR: 0.0003 +[2026-03-04 16:27:31] (step=0058077) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.363138329094111, LR: 0.0003 +[2026-03-04 16:27:39] (step=0058078) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 11.363333985521425, LR: 0.0003 +[2026-03-04 16:27:47] (step=0058079) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.363529641948737, LR: 0.0003 +[2026-03-04 16:27:55] (step=0058080) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.363725298376051, LR: 0.0003 +[2026-03-04 16:28:03] (step=0058081) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.363920954803366, LR: 0.0003 +[2026-03-04 16:28:11] (step=0058082) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 11.36411661123068, LR: 0.0003 +[2026-03-04 16:28:18] (step=0058083) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.364312267657992, LR: 0.0003 +[2026-03-04 16:28:26] (step=0058084) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.364507924085306, LR: 0.0003 +[2026-03-04 16:28:34] (step=0058085) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.36470358051262, LR: 0.0003 +[2026-03-04 16:28:42] (step=0058086) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.364899236939934, LR: 0.0003 +[2026-03-04 16:28:50] (step=0058087) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.365094893367248, LR: 0.0003 +[2026-03-04 16:28:58] (step=0058088) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.36529054979456, LR: 0.0003 +[2026-03-04 16:29:06] (step=0058089) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.365486206221874, LR: 0.0003 +[2026-03-04 16:29:13] (step=0058090) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.365681862649188, LR: 0.0003 +[2026-03-04 16:29:21] (step=0058091) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.365877519076502, LR: 0.0003 +[2026-03-04 16:29:29] (step=0058092) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.366073175503816, LR: 0.0003 +[2026-03-04 16:29:37] (step=0058093) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.366268831931128, LR: 0.0003 +[2026-03-04 16:29:45] (step=0058094) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.366464488358442, LR: 0.0003 +[2026-03-04 16:29:53] (step=0058095) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.366660144785756, LR: 0.0003 +[2026-03-04 16:30:01] (step=0058096) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 11.36685580121307, LR: 0.0003 +[2026-03-04 16:30:08] (step=0058097) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.367051457640384, LR: 0.0003 +[2026-03-04 16:30:16] (step=0058098) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 11.367247114067697, LR: 0.0003 +[2026-03-04 16:30:24] (step=0058099) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.36744277049501, LR: 0.0003 +[2026-03-04 16:30:32] (step=0058100) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 11.367638426922325, LR: 0.0003 +[2026-03-04 16:30:40] (step=0058101) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.367834083349639, LR: 0.0003 +[2026-03-04 16:30:48] (step=0058102) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.368029739776953, LR: 0.0003 +[2026-03-04 16:30:56] (step=0058103) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.368225396204265, LR: 0.0003 +[2026-03-04 16:31:03] (step=0058104) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 11.368421052631579, LR: 0.0003 +[2026-03-04 16:31:11] (step=0058105) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.368616709058893, LR: 0.0003 +[2026-03-04 16:31:19] (step=0058106) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 11.368812365486207, LR: 0.0003 +[2026-03-04 16:31:27] (step=0058107) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.369008021913519, LR: 0.0003 +[2026-03-04 16:31:35] (step=0058108) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.369203678340833, LR: 0.0003 +[2026-03-04 16:31:43] (step=0058109) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.369399334768147, LR: 0.0003 +[2026-03-04 16:31:51] (step=0058110) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 11.369594991195461, LR: 0.0003 +[2026-03-04 16:31:59] (step=0058111) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 11.369790647622775, LR: 0.0003 +[2026-03-04 16:32:06] (step=0058112) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.369986304050087, LR: 0.0003 +[2026-03-04 16:32:14] (step=0058113) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 11.370181960477401, LR: 0.0003 +[2026-03-04 16:32:22] (step=0058114) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.370377616904715, LR: 0.0003 +[2026-03-04 16:32:30] (step=0058115) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 11.37057327333203, LR: 0.0003 +[2026-03-04 16:32:38] (step=0058116) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 11.370768929759343, LR: 0.0003 +[2026-03-04 16:32:46] (step=0058117) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.370964586186656, LR: 0.0003 +[2026-03-04 16:32:54] (step=0058118) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 11.37116024261397, LR: 0.0003 +[2026-03-04 16:33:02] (step=0058119) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.371355899041284, LR: 0.0003 +[2026-03-04 16:33:09] (step=0058120) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.371551555468598, LR: 0.0003 +[2026-03-04 16:33:17] (step=0058121) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 11.371747211895912, LR: 0.0003 +[2026-03-04 16:33:25] (step=0058122) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.371942868323224, LR: 0.0003 +[2026-03-04 16:33:33] (step=0058123) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.372138524750538, LR: 0.0003 +[2026-03-04 16:33:41] (step=0058124) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.372334181177852, LR: 0.0003 +[2026-03-04 16:33:49] (step=0058125) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.372529837605166, LR: 0.0003 +[2026-03-04 16:33:57] (step=0058126) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 11.37272549403248, LR: 0.0003 +[2026-03-04 16:34:05] (step=0058127) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.372921150459792, LR: 0.0003 +[2026-03-04 16:34:13] (step=0058128) Train Loss: 0.4572, Train Steps/Sec: 0.12, Epoch: 11.373116806887106, LR: 0.0003 +[2026-03-04 16:34:20] (step=0058129) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.37331246331442, LR: 0.0003 +[2026-03-04 16:34:28] (step=0058130) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.373508119741734, LR: 0.0003 +[2026-03-04 16:34:36] (step=0058131) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.373703776169048, LR: 0.0003 +[2026-03-04 16:34:44] (step=0058132) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.37389943259636, LR: 0.0003 +[2026-03-04 16:34:52] (step=0058133) Train Loss: 0.4237, Train Steps/Sec: 0.13, Epoch: 11.374095089023674, LR: 0.0003 +[2026-03-04 16:35:00] (step=0058134) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.374290745450988, LR: 0.0003 +[2026-03-04 16:35:08] (step=0058135) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.374486401878302, LR: 0.0003 +[2026-03-04 16:35:16] (step=0058136) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 11.374682058305615, LR: 0.0003 +[2026-03-04 16:35:23] (step=0058137) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.374877714732929, LR: 0.0003 +[2026-03-04 16:35:31] (step=0058138) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.375073371160243, LR: 0.0003 +[2026-03-04 16:35:39] (step=0058139) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 11.375269027587557, LR: 0.0003 +[2026-03-04 16:35:47] (step=0058140) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 11.37546468401487, LR: 0.0003 +[2026-03-04 16:35:55] (step=0058141) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.375660340442183, LR: 0.0003 +[2026-03-04 16:36:03] (step=0058142) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 11.375855996869497, LR: 0.0003 +[2026-03-04 16:36:11] (step=0058143) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 11.37605165329681, LR: 0.0003 +[2026-03-04 16:36:18] (step=0058144) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.376247309724125, LR: 0.0003 +[2026-03-04 16:36:26] (step=0058145) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.376442966151439, LR: 0.0003 +[2026-03-04 16:36:34] (step=0058146) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 11.376638622578751, LR: 0.0003 +[2026-03-04 16:36:42] (step=0058147) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 11.376834279006065, LR: 0.0003 +[2026-03-04 16:36:50] (step=0058148) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.377029935433379, LR: 0.0003 +[2026-03-04 16:36:58] (step=0058149) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.377225591860693, LR: 0.0003 +[2026-03-04 16:37:06] (step=0058150) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.377421248288007, LR: 0.0003 +[2026-03-04 16:37:13] (step=0058151) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.37761690471532, LR: 0.0003 +[2026-03-04 16:37:21] (step=0058152) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.377812561142633, LR: 0.0003 +[2026-03-04 16:37:29] (step=0058153) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.378008217569947, LR: 0.0003 +[2026-03-04 16:37:37] (step=0058154) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.378203873997261, LR: 0.0003 +[2026-03-04 16:37:45] (step=0058155) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.378399530424575, LR: 0.0003 +[2026-03-04 16:37:53] (step=0058156) Train Loss: 0.4414, Train Steps/Sec: 0.12, Epoch: 11.378595186851888, LR: 0.0003 +[2026-03-04 16:38:01] (step=0058157) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.378790843279202, LR: 0.0003 +[2026-03-04 16:38:09] (step=0058158) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.378986499706516, LR: 0.0003 +[2026-03-04 16:38:16] (step=0058159) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 11.37918215613383, LR: 0.0003 +[2026-03-04 16:38:24] (step=0058160) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.379377812561142, LR: 0.0003 +[2026-03-04 16:38:32] (step=0058161) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.379573468988456, LR: 0.0003 +[2026-03-04 16:38:40] (step=0058162) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.37976912541577, LR: 0.0003 +[2026-03-04 16:38:48] (step=0058163) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.379964781843084, LR: 0.0003 +[2026-03-04 16:38:56] (step=0058164) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.380160438270398, LR: 0.0003 +[2026-03-04 16:39:04] (step=0058165) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.38035609469771, LR: 0.0003 +[2026-03-04 16:39:12] (step=0058166) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.380551751125024, LR: 0.0003 +[2026-03-04 16:39:19] (step=0058167) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 11.380747407552338, LR: 0.0003 +[2026-03-04 16:39:27] (step=0058168) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.380943063979652, LR: 0.0003 +[2026-03-04 16:39:35] (step=0058169) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.381138720406966, LR: 0.0003 +[2026-03-04 16:39:43] (step=0058170) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 11.381334376834278, LR: 0.0003 +[2026-03-04 16:39:51] (step=0058171) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 11.381530033261592, LR: 0.0003 +[2026-03-04 16:39:59] (step=0058172) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.381725689688906, LR: 0.0003 +[2026-03-04 16:40:07] (step=0058173) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.38192134611622, LR: 0.0003 +[2026-03-04 16:40:14] (step=0058174) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.382117002543534, LR: 0.0003 +[2026-03-04 16:40:22] (step=0058175) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.382312658970847, LR: 0.0003 +[2026-03-04 16:40:30] (step=0058176) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.38250831539816, LR: 0.0003 +[2026-03-04 16:40:38] (step=0058177) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.382703971825475, LR: 0.0003 +[2026-03-04 16:40:46] (step=0058178) Train Loss: 0.4529, Train Steps/Sec: 0.12, Epoch: 11.382899628252789, LR: 0.0003 +[2026-03-04 16:40:54] (step=0058179) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.383095284680103, LR: 0.0003 +[2026-03-04 16:41:02] (step=0058180) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 11.383290941107415, LR: 0.0003 +[2026-03-04 16:41:10] (step=0058181) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 11.383486597534729, LR: 0.0003 +[2026-03-04 16:41:18] (step=0058182) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.383682253962043, LR: 0.0003 +[2026-03-04 16:41:25] (step=0058183) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.383877910389357, LR: 0.0003 +[2026-03-04 16:41:33] (step=0058184) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 11.384073566816669, LR: 0.0003 +[2026-03-04 16:41:41] (step=0058185) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.384269223243983, LR: 0.0003 +[2026-03-04 16:41:49] (step=0058186) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.384464879671297, LR: 0.0003 +[2026-03-04 16:41:57] (step=0058187) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.384660536098611, LR: 0.0003 +[2026-03-04 16:42:05] (step=0058188) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.384856192525925, LR: 0.0003 +[2026-03-04 16:42:13] (step=0058189) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.385051848953237, LR: 0.0003 +[2026-03-04 16:42:20] (step=0058190) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 11.385247505380551, LR: 0.0003 +[2026-03-04 16:42:28] (step=0058191) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.385443161807865, LR: 0.0003 +[2026-03-04 16:42:36] (step=0058192) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.38563881823518, LR: 0.0003 +[2026-03-04 16:42:44] (step=0058193) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.385834474662493, LR: 0.0003 +[2026-03-04 16:42:52] (step=0058194) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.386030131089806, LR: 0.0003 +[2026-03-04 16:43:00] (step=0058195) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.38622578751712, LR: 0.0003 +[2026-03-04 16:43:08] (step=0058196) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.386421443944434, LR: 0.0003 +[2026-03-04 16:43:15] (step=0058197) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.386617100371748, LR: 0.0003 +[2026-03-04 16:43:23] (step=0058198) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 11.386812756799062, LR: 0.0003 +[2026-03-04 16:43:31] (step=0058199) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.387008413226374, LR: 0.0003 +[2026-03-04 16:43:39] (step=0058200) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.387204069653688, LR: 0.0003 +[2026-03-04 16:43:47] (step=0058201) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.387399726081002, LR: 0.0003 +[2026-03-04 16:43:55] (step=0058202) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.387595382508316, LR: 0.0003 +[2026-03-04 16:44:03] (step=0058203) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 11.38779103893563, LR: 0.0003 +[2026-03-04 16:44:11] (step=0058204) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.387986695362942, LR: 0.0003 +[2026-03-04 16:44:18] (step=0058205) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 11.388182351790256, LR: 0.0003 +[2026-03-04 16:44:26] (step=0058206) Train Loss: 0.4494, Train Steps/Sec: 0.12, Epoch: 11.38837800821757, LR: 0.0003 +[2026-03-04 16:44:34] (step=0058207) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.388573664644884, LR: 0.0003 +[2026-03-04 16:44:42] (step=0058208) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.388769321072198, LR: 0.0003 +[2026-03-04 16:44:50] (step=0058209) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.38896497749951, LR: 0.0003 +[2026-03-04 16:44:58] (step=0058210) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.389160633926824, LR: 0.0003 +[2026-03-04 16:45:06] (step=0058211) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.389356290354138, LR: 0.0003 +[2026-03-04 16:45:14] (step=0058212) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.389551946781452, LR: 0.0003 +[2026-03-04 16:45:22] (step=0058213) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.389747603208765, LR: 0.0003 +[2026-03-04 16:45:29] (step=0058214) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.389943259636079, LR: 0.0003 +[2026-03-04 16:45:37] (step=0058215) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 11.390138916063393, LR: 0.0003 +[2026-03-04 16:45:45] (step=0058216) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.390334572490707, LR: 0.0003 +[2026-03-04 16:45:53] (step=0058217) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.39053022891802, LR: 0.0003 +[2026-03-04 16:46:01] (step=0058218) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.390725885345333, LR: 0.0003 +[2026-03-04 16:46:09] (step=0058219) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.390921541772647, LR: 0.0003 +[2026-03-04 16:46:17] (step=0058220) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.39111719819996, LR: 0.0003 +[2026-03-04 16:46:24] (step=0058221) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.391312854627275, LR: 0.0003 +[2026-03-04 16:46:32] (step=0058222) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.391508511054589, LR: 0.0003 +[2026-03-04 16:46:40] (step=0058223) Train Loss: 0.4496, Train Steps/Sec: 0.12, Epoch: 11.391704167481901, LR: 0.0003 +[2026-03-04 16:46:48] (step=0058224) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 11.391899823909215, LR: 0.0003 +[2026-03-04 16:46:56] (step=0058225) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.392095480336529, LR: 0.0003 +[2026-03-04 16:47:04] (step=0058226) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.392291136763843, LR: 0.0003 +[2026-03-04 16:47:12] (step=0058227) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 11.392486793191157, LR: 0.0003 +[2026-03-04 16:47:20] (step=0058228) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.39268244961847, LR: 0.0003 +[2026-03-04 16:47:28] (step=0058229) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.392878106045783, LR: 0.0003 +[2026-03-04 16:47:35] (step=0058230) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.393073762473097, LR: 0.0003 +[2026-03-04 16:47:43] (step=0058231) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.393269418900411, LR: 0.0003 +[2026-03-04 16:47:51] (step=0058232) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.393465075327725, LR: 0.0003 +[2026-03-04 16:47:59] (step=0058233) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.393660731755038, LR: 0.0003 +[2026-03-04 16:48:07] (step=0058234) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.393856388182352, LR: 0.0003 +[2026-03-04 16:48:15] (step=0058235) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.394052044609666, LR: 0.0003 +[2026-03-04 16:48:23] (step=0058236) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.39424770103698, LR: 0.0003 +[2026-03-04 16:48:30] (step=0058237) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 11.394443357464292, LR: 0.0003 +[2026-03-04 16:48:38] (step=0058238) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.394639013891606, LR: 0.0003 +[2026-03-04 16:48:46] (step=0058239) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 11.39483467031892, LR: 0.0003 +[2026-03-04 16:48:54] (step=0058240) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.395030326746234, LR: 0.0003 +[2026-03-04 16:49:02] (step=0058241) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 11.395225983173548, LR: 0.0003 +[2026-03-04 16:49:10] (step=0058242) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.39542163960086, LR: 0.0003 +[2026-03-04 16:49:18] (step=0058243) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.395617296028174, LR: 0.0003 +[2026-03-04 16:49:25] (step=0058244) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.395812952455488, LR: 0.0003 +[2026-03-04 16:49:33] (step=0058245) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.396008608882802, LR: 0.0003 +[2026-03-04 16:49:41] (step=0058246) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 11.396204265310116, LR: 0.0003 +[2026-03-04 16:49:49] (step=0058247) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.396399921737428, LR: 0.0003 +[2026-03-04 16:49:57] (step=0058248) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 11.396595578164742, LR: 0.0003 +[2026-03-04 16:50:05] (step=0058249) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.396791234592056, LR: 0.0003 +[2026-03-04 16:50:13] (step=0058250) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.39698689101937, LR: 0.0003 +[2026-03-04 16:50:20] (step=0058251) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.397182547446684, LR: 0.0003 +[2026-03-04 16:50:28] (step=0058252) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.397378203873997, LR: 0.0003 +[2026-03-04 16:50:36] (step=0058253) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 11.39757386030131, LR: 0.0003 +[2026-03-04 16:50:44] (step=0058254) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.397769516728625, LR: 0.0003 +[2026-03-04 16:50:52] (step=0058255) Train Loss: 0.4462, Train Steps/Sec: 0.12, Epoch: 11.397965173155939, LR: 0.0003 +[2026-03-04 16:51:00] (step=0058256) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.398160829583253, LR: 0.0003 +[2026-03-04 16:51:08] (step=0058257) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.398356486010565, LR: 0.0003 +[2026-03-04 16:51:16] (step=0058258) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.398552142437879, LR: 0.0003 +[2026-03-04 16:51:24] (step=0058259) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 11.398747798865193, LR: 0.0003 +[2026-03-04 16:51:31] (step=0058260) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.398943455292507, LR: 0.0003 +[2026-03-04 16:51:39] (step=0058261) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 11.39913911171982, LR: 0.0003 +[2026-03-04 16:51:47] (step=0058262) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.399334768147133, LR: 0.0003 +[2026-03-04 16:51:55] (step=0058263) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.399530424574447, LR: 0.0003 +[2026-03-04 16:52:03] (step=0058264) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.399726081001761, LR: 0.0003 +[2026-03-04 16:52:11] (step=0058265) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.399921737429075, LR: 0.0003 +[2026-03-04 16:52:19] (step=0058266) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.400117393856387, LR: 0.0003 +[2026-03-04 16:52:27] (step=0058267) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.400313050283701, LR: 0.0003 +[2026-03-04 16:52:34] (step=0058268) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 11.400508706711015, LR: 0.0003 +[2026-03-04 16:52:42] (step=0058269) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 11.40070436313833, LR: 0.0003 +[2026-03-04 16:52:50] (step=0058270) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.400900019565643, LR: 0.0003 +[2026-03-04 16:52:58] (step=0058271) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 11.401095675992956, LR: 0.0003 +[2026-03-04 16:53:06] (step=0058272) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 11.40129133242027, LR: 0.0003 +[2026-03-04 16:53:14] (step=0058273) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.401486988847584, LR: 0.0003 +[2026-03-04 16:53:22] (step=0058274) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.401682645274898, LR: 0.0003 +[2026-03-04 16:53:30] (step=0058275) Train Loss: 0.4537, Train Steps/Sec: 0.12, Epoch: 11.401878301702212, LR: 0.0003 +[2026-03-04 16:53:38] (step=0058276) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.402073958129524, LR: 0.0003 +[2026-03-04 16:53:45] (step=0058277) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.402269614556838, LR: 0.0003 +[2026-03-04 16:53:53] (step=0058278) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.402465270984152, LR: 0.0003 +[2026-03-04 16:54:01] (step=0058279) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 11.402660927411466, LR: 0.0003 +[2026-03-04 16:54:09] (step=0058280) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 11.40285658383878, LR: 0.0003 +[2026-03-04 16:54:17] (step=0058281) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.403052240266092, LR: 0.0003 +[2026-03-04 16:54:25] (step=0058282) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 11.403247896693406, LR: 0.0003 +[2026-03-04 16:54:33] (step=0058283) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.40344355312072, LR: 0.0003 +[2026-03-04 16:54:40] (step=0058284) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.403639209548034, LR: 0.0003 +[2026-03-04 16:54:48] (step=0058285) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.403834865975348, LR: 0.0003 +[2026-03-04 16:54:56] (step=0058286) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.40403052240266, LR: 0.0003 +[2026-03-04 16:55:04] (step=0058287) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.404226178829974, LR: 0.0003 +[2026-03-04 16:55:12] (step=0058288) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.404421835257288, LR: 0.0003 +[2026-03-04 16:55:20] (step=0058289) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 11.404617491684602, LR: 0.0003 +[2026-03-04 16:55:28] (step=0058290) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.404813148111915, LR: 0.0003 +[2026-03-04 16:55:35] (step=0058291) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 11.405008804539229, LR: 0.0003 +[2026-03-04 16:55:43] (step=0058292) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.405204460966543, LR: 0.0003 +[2026-03-04 16:55:51] (step=0058293) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 11.405400117393857, LR: 0.0003 +[2026-03-04 16:55:59] (step=0058294) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 11.40559577382117, LR: 0.0003 +[2026-03-04 16:56:07] (step=0058295) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.405791430248483, LR: 0.0003 +[2026-03-04 16:56:15] (step=0058296) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 11.405987086675797, LR: 0.0003 +[2026-03-04 16:56:23] (step=0058297) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 11.40618274310311, LR: 0.0003 +[2026-03-04 16:56:30] (step=0058298) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.406378399530425, LR: 0.0003 +[2026-03-04 16:56:38] (step=0058299) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.406574055957739, LR: 0.0003 +[2026-03-04 16:56:46] (step=0058300) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.406769712385051, LR: 0.0003 +[2026-03-04 16:56:54] (step=0058301) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.406965368812365, LR: 0.0003 +[2026-03-04 16:57:02] (step=0058302) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.40716102523968, LR: 0.0003 +[2026-03-04 16:57:10] (step=0058303) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 11.407356681666993, LR: 0.0003 +[2026-03-04 16:57:18] (step=0058304) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.407552338094307, LR: 0.0003 +[2026-03-04 16:57:26] (step=0058305) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 11.40774799452162, LR: 0.0003 +[2026-03-04 16:57:33] (step=0058306) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.407943650948933, LR: 0.0003 +[2026-03-04 16:57:41] (step=0058307) Train Loss: 0.4452, Train Steps/Sec: 0.12, Epoch: 11.408139307376247, LR: 0.0003 +[2026-03-04 16:57:49] (step=0058308) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.408334963803561, LR: 0.0003 +[2026-03-04 16:57:57] (step=0058309) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 11.408530620230875, LR: 0.0003 +[2026-03-04 16:58:05] (step=0058310) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.408726276658188, LR: 0.0003 +[2026-03-04 16:58:13] (step=0058311) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.408921933085502, LR: 0.0003 +[2026-03-04 16:58:21] (step=0058312) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 11.409117589512816, LR: 0.0003 +[2026-03-04 16:58:29] (step=0058313) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 11.40931324594013, LR: 0.0003 +[2026-03-04 16:58:36] (step=0058314) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 11.409508902367444, LR: 0.0003 +[2026-03-04 16:58:44] (step=0058315) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.409704558794756, LR: 0.0003 +[2026-03-04 16:58:52] (step=0058316) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.40990021522207, LR: 0.0003 +[2026-03-04 16:59:00] (step=0058317) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.410095871649384, LR: 0.0003 +[2026-03-04 16:59:08] (step=0058318) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 11.410291528076698, LR: 0.0003 +[2026-03-04 16:59:16] (step=0058319) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.41048718450401, LR: 0.0003 +[2026-03-04 16:59:24] (step=0058320) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.410682840931324, LR: 0.0003 +[2026-03-04 16:59:32] (step=0058321) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.410878497358638, LR: 0.0003 +[2026-03-04 16:59:39] (step=0058322) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 11.411074153785952, LR: 0.0003 +[2026-03-04 16:59:47] (step=0058323) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 11.411269810213266, LR: 0.0003 +[2026-03-04 16:59:55] (step=0058324) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.411465466640578, LR: 0.0003 +[2026-03-04 17:00:03] (step=0058325) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.411661123067892, LR: 0.0003 +[2026-03-04 17:00:11] (step=0058326) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.411856779495206, LR: 0.0003 +[2026-03-04 17:00:19] (step=0058327) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.41205243592252, LR: 0.0003 +[2026-03-04 17:00:27] (step=0058328) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.412248092349834, LR: 0.0003 +[2026-03-04 17:00:35] (step=0058329) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 11.412443748777147, LR: 0.0003 +[2026-03-04 17:00:42] (step=0058330) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.41263940520446, LR: 0.0003 +[2026-03-04 17:00:50] (step=0058331) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.412835061631775, LR: 0.0003 +[2026-03-04 17:00:58] (step=0058332) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 11.413030718059089, LR: 0.0003 +[2026-03-04 17:01:06] (step=0058333) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 11.413226374486403, LR: 0.0003 +[2026-03-04 17:01:14] (step=0058334) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.413422030913715, LR: 0.0003 +[2026-03-04 17:01:22] (step=0058335) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.413617687341029, LR: 0.0003 +[2026-03-04 17:01:30] (step=0058336) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.413813343768343, LR: 0.0003 +[2026-03-04 17:01:37] (step=0058337) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 11.414009000195657, LR: 0.0003 +[2026-03-04 17:01:45] (step=0058338) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.414204656622971, LR: 0.0003 +[2026-03-04 17:01:53] (step=0058339) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.414400313050283, LR: 0.0003 +[2026-03-04 17:02:01] (step=0058340) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 11.414595969477597, LR: 0.0003 +[2026-03-04 17:02:09] (step=0058341) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 11.414791625904911, LR: 0.0003 +[2026-03-04 17:02:17] (step=0058342) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 11.414987282332225, LR: 0.0003 +[2026-03-04 17:02:25] (step=0058343) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.415182938759537, LR: 0.0003 +[2026-03-04 17:02:33] (step=0058344) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 11.415378595186851, LR: 0.0003 +[2026-03-04 17:02:40] (step=0058345) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.415574251614165, LR: 0.0003 +[2026-03-04 17:02:48] (step=0058346) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.41576990804148, LR: 0.0003 +[2026-03-04 17:02:56] (step=0058347) Train Loss: 0.4617, Train Steps/Sec: 0.13, Epoch: 11.415965564468793, LR: 0.0003 +[2026-03-04 17:03:04] (step=0058348) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.416161220896106, LR: 0.0003 +[2026-03-04 17:03:12] (step=0058349) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.41635687732342, LR: 0.0003 +[2026-03-04 17:03:20] (step=0058350) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 11.416552533750734, LR: 0.0003 +[2026-03-04 17:03:28] (step=0058351) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.416748190178048, LR: 0.0003 +[2026-03-04 17:03:35] (step=0058352) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.416943846605362, LR: 0.0003 +[2026-03-04 17:03:43] (step=0058353) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 11.417139503032674, LR: 0.0003 +[2026-03-04 17:03:51] (step=0058354) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.417335159459988, LR: 0.0003 +[2026-03-04 17:03:59] (step=0058355) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.417530815887302, LR: 0.0003 +[2026-03-04 17:04:07] (step=0058356) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.417726472314616, LR: 0.0003 +[2026-03-04 17:04:15] (step=0058357) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.41792212874193, LR: 0.0003 +[2026-03-04 17:04:23] (step=0058358) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 11.418117785169242, LR: 0.0003 +[2026-03-04 17:04:30] (step=0058359) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 11.418313441596556, LR: 0.0003 +[2026-03-04 17:04:38] (step=0058360) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.41850909802387, LR: 0.0003 +[2026-03-04 17:04:46] (step=0058361) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 11.418704754451184, LR: 0.0003 +[2026-03-04 17:04:54] (step=0058362) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 11.418900410878498, LR: 0.0003 +[2026-03-04 17:05:02] (step=0058363) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 11.41909606730581, LR: 0.0003 +[2026-03-04 17:05:10] (step=0058364) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.419291723733124, LR: 0.0003 +[2026-03-04 17:05:18] (step=0058365) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.419487380160438, LR: 0.0003 +[2026-03-04 17:05:26] (step=0058366) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.419683036587752, LR: 0.0003 +[2026-03-04 17:05:33] (step=0058367) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.419878693015066, LR: 0.0003 +[2026-03-04 17:05:41] (step=0058368) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 11.420074349442379, LR: 0.0003 +[2026-03-04 17:05:49] (step=0058369) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.420270005869693, LR: 0.0003 +[2026-03-04 17:05:57] (step=0058370) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.420465662297007, LR: 0.0003 +[2026-03-04 17:06:05] (step=0058371) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.42066131872432, LR: 0.0003 +[2026-03-04 17:06:13] (step=0058372) Train Loss: 0.4514, Train Steps/Sec: 0.12, Epoch: 11.420856975151633, LR: 0.0003 +[2026-03-04 17:06:21] (step=0058373) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.421052631578947, LR: 0.0003 +[2026-03-04 17:06:29] (step=0058374) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.421248288006261, LR: 0.0003 +[2026-03-04 17:06:36] (step=0058375) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 11.421443944433575, LR: 0.0003 +[2026-03-04 17:06:44] (step=0058376) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.421639600860889, LR: 0.0003 +[2026-03-04 17:06:52] (step=0058377) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.421835257288201, LR: 0.0003 +[2026-03-04 17:07:00] (step=0058378) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.422030913715515, LR: 0.0003 +[2026-03-04 17:07:08] (step=0058379) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 11.42222657014283, LR: 0.0003 +[2026-03-04 17:07:16] (step=0058380) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 11.422422226570143, LR: 0.0003 +[2026-03-04 17:07:24] (step=0058381) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 11.422617882997457, LR: 0.0003 +[2026-03-04 17:07:31] (step=0058382) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.42281353942477, LR: 0.0003 +[2026-03-04 17:07:39] (step=0058383) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.423009195852083, LR: 0.0003 +[2026-03-04 17:07:47] (step=0058384) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.423204852279397, LR: 0.0003 +[2026-03-04 17:07:55] (step=0058385) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.423400508706711, LR: 0.0003 +[2026-03-04 17:08:03] (step=0058386) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.423596165134025, LR: 0.0003 +[2026-03-04 17:08:11] (step=0058387) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.423791821561338, LR: 0.0003 +[2026-03-04 17:08:19] (step=0058388) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.423987477988652, LR: 0.0003 +[2026-03-04 17:08:26] (step=0058389) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.424183134415966, LR: 0.0003 +[2026-03-04 17:08:34] (step=0058390) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.42437879084328, LR: 0.0003 +[2026-03-04 17:08:42] (step=0058391) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.424574447270594, LR: 0.0003 +[2026-03-04 17:08:50] (step=0058392) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 11.424770103697906, LR: 0.0003 +[2026-03-04 17:08:58] (step=0058393) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 11.42496576012522, LR: 0.0003 +[2026-03-04 17:09:06] (step=0058394) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.425161416552534, LR: 0.0003 +[2026-03-04 17:09:14] (step=0058395) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 11.425357072979848, LR: 0.0003 +[2026-03-04 17:09:21] (step=0058396) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.42555272940716, LR: 0.0003 +[2026-03-04 17:09:29] (step=0058397) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.425748385834474, LR: 0.0003 +[2026-03-04 17:09:37] (step=0058398) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.425944042261788, LR: 0.0003 +[2026-03-04 17:09:45] (step=0058399) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.426139698689102, LR: 0.0003 +[2026-03-04 17:09:53] (step=0058400) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.426335355116416, LR: 0.0003 +[2026-03-04 17:10:01] (step=0058401) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 11.426531011543728, LR: 0.0003 +[2026-03-04 17:10:09] (step=0058402) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.426726667971042, LR: 0.0003 +[2026-03-04 17:10:17] (step=0058403) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 11.426922324398356, LR: 0.0003 +[2026-03-04 17:10:25] (step=0058404) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.42711798082567, LR: 0.0003 +[2026-03-04 17:10:32] (step=0058405) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.427313637252984, LR: 0.0003 +[2026-03-04 17:10:40] (step=0058406) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.427509293680297, LR: 0.0003 +[2026-03-04 17:10:48] (step=0058407) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.42770495010761, LR: 0.0003 +[2026-03-04 17:10:56] (step=0058408) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.427900606534925, LR: 0.0003 +[2026-03-04 17:11:04] (step=0058409) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 11.428096262962239, LR: 0.0003 +[2026-03-04 17:11:12] (step=0058410) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.428291919389553, LR: 0.0003 +[2026-03-04 17:11:20] (step=0058411) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.428487575816865, LR: 0.0003 +[2026-03-04 17:11:27] (step=0058412) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 11.428683232244179, LR: 0.0003 +[2026-03-04 17:11:35] (step=0058413) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 11.428878888671493, LR: 0.0003 +[2026-03-04 17:11:43] (step=0058414) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.429074545098807, LR: 0.0003 +[2026-03-04 17:11:51] (step=0058415) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 11.429270201526121, LR: 0.0003 +[2026-03-04 17:11:59] (step=0058416) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.429465857953433, LR: 0.0003 +[2026-03-04 17:12:07] (step=0058417) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 11.429661514380747, LR: 0.0003 +[2026-03-04 17:12:15] (step=0058418) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 11.429857170808061, LR: 0.0003 +[2026-03-04 17:12:23] (step=0058419) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.430052827235375, LR: 0.0003 +[2026-03-04 17:12:30] (step=0058420) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.43024848366269, LR: 0.0003 +[2026-03-04 17:12:38] (step=0058421) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 11.430444140090001, LR: 0.0003 +[2026-03-04 17:12:46] (step=0058422) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.430639796517315, LR: 0.0003 +[2026-03-04 17:12:54] (step=0058423) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.43083545294463, LR: 0.0003 +[2026-03-04 17:13:02] (step=0058424) Train Loss: 0.4400, Train Steps/Sec: 0.12, Epoch: 11.431031109371943, LR: 0.0003 +[2026-03-04 17:13:10] (step=0058425) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 11.431226765799256, LR: 0.0003 +[2026-03-04 17:13:18] (step=0058426) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.43142242222657, LR: 0.0003 +[2026-03-04 17:13:26] (step=0058427) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.431618078653884, LR: 0.0003 +[2026-03-04 17:13:34] (step=0058428) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.431813735081198, LR: 0.0003 +[2026-03-04 17:13:41] (step=0058429) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.432009391508512, LR: 0.0003 +[2026-03-04 17:13:49] (step=0058430) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.432205047935824, LR: 0.0003 +[2026-03-04 17:13:57] (step=0058431) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.432400704363138, LR: 0.0003 +[2026-03-04 17:14:05] (step=0058432) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.432596360790452, LR: 0.0003 +[2026-03-04 17:14:13] (step=0058433) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.432792017217766, LR: 0.0003 +[2026-03-04 17:14:21] (step=0058434) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 11.43298767364508, LR: 0.0003 +[2026-03-04 17:14:29] (step=0058435) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 11.433183330072392, LR: 0.0003 +[2026-03-04 17:14:36] (step=0058436) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.433378986499706, LR: 0.0003 +[2026-03-04 17:14:44] (step=0058437) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 11.43357464292702, LR: 0.0003 +[2026-03-04 17:14:52] (step=0058438) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 11.433770299354334, LR: 0.0003 +[2026-03-04 17:15:00] (step=0058439) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.433965955781648, LR: 0.0003 +[2026-03-04 17:15:08] (step=0058440) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.43416161220896, LR: 0.0003 +[2026-03-04 17:15:16] (step=0058441) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 11.434357268636274, LR: 0.0003 +[2026-03-04 17:15:24] (step=0058442) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.434552925063588, LR: 0.0003 +[2026-03-04 17:15:31] (step=0058443) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.434748581490902, LR: 0.0003 +[2026-03-04 17:15:39] (step=0058444) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.434944237918216, LR: 0.0003 +[2026-03-04 17:15:47] (step=0058445) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 11.435139894345529, LR: 0.0003 +[2026-03-04 17:15:55] (step=0058446) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 11.435335550772843, LR: 0.0003 +[2026-03-04 17:16:03] (step=0058447) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.435531207200157, LR: 0.0003 +[2026-03-04 17:16:11] (step=0058448) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.43572686362747, LR: 0.0003 +[2026-03-04 17:16:19] (step=0058449) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 11.435922520054783, LR: 0.0003 +[2026-03-04 17:16:27] (step=0058450) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.436118176482097, LR: 0.0003 +[2026-03-04 17:16:34] (step=0058451) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.436313832909411, LR: 0.0003 +[2026-03-04 17:16:42] (step=0058452) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.436509489336725, LR: 0.0003 +[2026-03-04 17:16:50] (step=0058453) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.436705145764039, LR: 0.0003 +[2026-03-04 17:16:58] (step=0058454) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.436900802191351, LR: 0.0003 +[2026-03-04 17:17:06] (step=0058455) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.437096458618665, LR: 0.0003 +[2026-03-04 17:17:14] (step=0058456) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.43729211504598, LR: 0.0003 +[2026-03-04 17:17:22] (step=0058457) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.437487771473293, LR: 0.0003 +[2026-03-04 17:17:29] (step=0058458) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.437683427900607, LR: 0.0003 +[2026-03-04 17:17:37] (step=0058459) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.43787908432792, LR: 0.0003 +[2026-03-04 17:17:45] (step=0058460) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.438074740755233, LR: 0.0003 +[2026-03-04 17:17:53] (step=0058461) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.438270397182547, LR: 0.0003 +[2026-03-04 17:18:01] (step=0058462) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.438466053609861, LR: 0.0003 +[2026-03-04 17:18:09] (step=0058463) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.438661710037175, LR: 0.0003 +[2026-03-04 17:18:17] (step=0058464) Train Loss: 0.4218, Train Steps/Sec: 0.13, Epoch: 11.438857366464488, LR: 0.0003 +[2026-03-04 17:18:24] (step=0058465) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.439053022891802, LR: 0.0003 +[2026-03-04 17:18:32] (step=0058466) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.439248679319116, LR: 0.0003 +[2026-03-04 17:18:40] (step=0058467) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.43944433574643, LR: 0.0003 +[2026-03-04 17:18:48] (step=0058468) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.439639992173744, LR: 0.0003 +[2026-03-04 17:18:56] (step=0058469) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.439835648601056, LR: 0.0003 +[2026-03-04 17:19:04] (step=0058470) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 11.44003130502837, LR: 0.0003 +[2026-03-04 17:19:12] (step=0058471) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 11.440226961455684, LR: 0.0003 +[2026-03-04 17:19:19] (step=0058472) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.440422617882998, LR: 0.0003 +[2026-03-04 17:19:27] (step=0058473) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.440618274310312, LR: 0.0003 +[2026-03-04 17:19:35] (step=0058474) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.440813930737624, LR: 0.0003 +[2026-03-04 17:19:43] (step=0058475) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.441009587164938, LR: 0.0003 +[2026-03-04 17:19:51] (step=0058476) Train Loss: 0.4443, Train Steps/Sec: 0.12, Epoch: 11.441205243592252, LR: 0.0003 +[2026-03-04 17:19:59] (step=0058477) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.441400900019566, LR: 0.0003 +[2026-03-04 17:20:07] (step=0058478) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.441596556446878, LR: 0.0003 +[2026-03-04 17:20:15] (step=0058479) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.441792212874192, LR: 0.0003 +[2026-03-04 17:20:23] (step=0058480) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.441987869301506, LR: 0.0003 +[2026-03-04 17:20:30] (step=0058481) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.44218352572882, LR: 0.0003 +[2026-03-04 17:20:38] (step=0058482) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.442379182156134, LR: 0.0003 +[2026-03-04 17:20:46] (step=0058483) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.442574838583447, LR: 0.0003 +[2026-03-04 17:20:54] (step=0058484) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 11.44277049501076, LR: 0.0003 +[2026-03-04 17:21:02] (step=0058485) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.442966151438075, LR: 0.0003 +[2026-03-04 17:21:10] (step=0058486) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.443161807865389, LR: 0.0003 +[2026-03-04 17:21:18] (step=0058487) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.443357464292703, LR: 0.0003 +[2026-03-04 17:21:26] (step=0058488) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.443553120720015, LR: 0.0003 +[2026-03-04 17:21:33] (step=0058489) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.443748777147329, LR: 0.0003 +[2026-03-04 17:21:41] (step=0058490) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.443944433574643, LR: 0.0003 +[2026-03-04 17:21:49] (step=0058491) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.444140090001957, LR: 0.0003 +[2026-03-04 17:21:57] (step=0058492) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.444335746429271, LR: 0.0003 +[2026-03-04 17:22:05] (step=0058493) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 11.444531402856583, LR: 0.0003 +[2026-03-04 17:22:13] (step=0058494) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.444727059283897, LR: 0.0003 +[2026-03-04 17:22:21] (step=0058495) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.444922715711211, LR: 0.0003 +[2026-03-04 17:22:29] (step=0058496) Train Loss: 0.4589, Train Steps/Sec: 0.12, Epoch: 11.445118372138525, LR: 0.0003 +[2026-03-04 17:22:36] (step=0058497) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.44531402856584, LR: 0.0003 +[2026-03-04 17:22:44] (step=0058498) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 11.445509684993151, LR: 0.0003 +[2026-03-04 17:22:52] (step=0058499) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 11.445705341420465, LR: 0.0003 +[2026-03-04 17:23:00] (step=0058500) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.44590099784778, LR: 0.0003 +[2026-03-04 17:23:00] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0058500/ +[2026-03-04 17:23:08] (step=0058501) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.446096654275093, LR: 0.0003 +[2026-03-04 17:23:16] (step=0058502) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 11.446292310702406, LR: 0.0003 +[2026-03-04 17:23:24] (step=0058503) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.44648796712972, LR: 0.0003 +[2026-03-04 17:23:32] (step=0058504) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.446683623557034, LR: 0.0003 +[2026-03-04 17:23:39] (step=0058505) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.446879279984348, LR: 0.0003 +[2026-03-04 17:23:47] (step=0058506) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.447074936411662, LR: 0.0003 +[2026-03-04 17:23:55] (step=0058507) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 11.447270592838974, LR: 0.0003 +[2026-03-04 17:24:03] (step=0058508) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.447466249266288, LR: 0.0003 +[2026-03-04 17:24:11] (step=0058509) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 11.447661905693602, LR: 0.0003 +[2026-03-04 17:24:19] (step=0058510) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.447857562120916, LR: 0.0003 +[2026-03-04 17:24:27] (step=0058511) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.44805321854823, LR: 0.0003 +[2026-03-04 17:24:34] (step=0058512) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.448248874975542, LR: 0.0003 +[2026-03-04 17:24:42] (step=0058513) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.448444531402856, LR: 0.0003 +[2026-03-04 17:24:50] (step=0058514) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.44864018783017, LR: 0.0003 +[2026-03-04 17:24:58] (step=0058515) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.448835844257484, LR: 0.0003 +[2026-03-04 17:25:06] (step=0058516) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.449031500684798, LR: 0.0003 +[2026-03-04 17:25:14] (step=0058517) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.44922715711211, LR: 0.0003 +[2026-03-04 17:25:22] (step=0058518) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.449422813539424, LR: 0.0003 +[2026-03-04 17:25:30] (step=0058519) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.449618469966738, LR: 0.0003 +[2026-03-04 17:25:37] (step=0058520) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.449814126394052, LR: 0.0003 +[2026-03-04 17:25:45] (step=0058521) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.450009782821366, LR: 0.0003 +[2026-03-04 17:25:53] (step=0058522) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.450205439248679, LR: 0.0003 +[2026-03-04 17:26:01] (step=0058523) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 11.450401095675993, LR: 0.0003 +[2026-03-04 17:26:09] (step=0058524) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.450596752103307, LR: 0.0003 +[2026-03-04 17:26:17] (step=0058525) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 11.45079240853062, LR: 0.0003 +[2026-03-04 17:26:25] (step=0058526) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.450988064957935, LR: 0.0003 +[2026-03-04 17:26:33] (step=0058527) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.451183721385247, LR: 0.0003 +[2026-03-04 17:26:40] (step=0058528) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.451379377812561, LR: 0.0003 +[2026-03-04 17:26:48] (step=0058529) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.451575034239875, LR: 0.0003 +[2026-03-04 17:26:56] (step=0058530) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 11.451770690667189, LR: 0.0003 +[2026-03-04 17:27:04] (step=0058531) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.451966347094501, LR: 0.0003 +[2026-03-04 17:27:12] (step=0058532) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.452162003521815, LR: 0.0003 +[2026-03-04 17:27:20] (step=0058533) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 11.45235765994913, LR: 0.0003 +[2026-03-04 17:27:28] (step=0058534) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.452553316376443, LR: 0.0003 +[2026-03-04 17:27:35] (step=0058535) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.452748972803757, LR: 0.0003 +[2026-03-04 17:27:43] (step=0058536) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.45294462923107, LR: 0.0003 +[2026-03-04 17:27:51] (step=0058537) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.453140285658383, LR: 0.0003 +[2026-03-04 17:27:59] (step=0058538) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 11.453335942085697, LR: 0.0003 +[2026-03-04 17:28:07] (step=0058539) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.453531598513012, LR: 0.0003 +[2026-03-04 17:28:15] (step=0058540) Train Loss: 0.4497, Train Steps/Sec: 0.12, Epoch: 11.453727254940326, LR: 0.0003 +[2026-03-04 17:28:23] (step=0058541) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.453922911367638, LR: 0.0003 +[2026-03-04 17:28:31] (step=0058542) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.454118567794952, LR: 0.0003 +[2026-03-04 17:28:39] (step=0058543) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.454314224222266, LR: 0.0003 +[2026-03-04 17:28:46] (step=0058544) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.45450988064958, LR: 0.0003 +[2026-03-04 17:28:54] (step=0058545) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.454705537076894, LR: 0.0003 +[2026-03-04 17:29:02] (step=0058546) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.454901193504206, LR: 0.0003 +[2026-03-04 17:29:10] (step=0058547) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 11.45509684993152, LR: 0.0003 +[2026-03-04 17:29:18] (step=0058548) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 11.455292506358834, LR: 0.0003 +[2026-03-04 17:29:26] (step=0058549) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.455488162786148, LR: 0.0003 +[2026-03-04 17:29:34] (step=0058550) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.455683819213462, LR: 0.0003 +[2026-03-04 17:29:41] (step=0058551) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.455879475640774, LR: 0.0003 +[2026-03-04 17:29:49] (step=0058552) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 11.456075132068088, LR: 0.0003 +[2026-03-04 17:29:57] (step=0058553) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.456270788495402, LR: 0.0003 +[2026-03-04 17:30:05] (step=0058554) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 11.456466444922716, LR: 0.0003 +[2026-03-04 17:30:13] (step=0058555) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.456662101350028, LR: 0.0003 +[2026-03-04 17:30:21] (step=0058556) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.456857757777342, LR: 0.0003 +[2026-03-04 17:30:29] (step=0058557) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.457053414204657, LR: 0.0003 +[2026-03-04 17:30:36] (step=0058558) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.45724907063197, LR: 0.0003 +[2026-03-04 17:30:44] (step=0058559) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.457444727059285, LR: 0.0003 +[2026-03-04 17:30:52] (step=0058560) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.457640383486597, LR: 0.0003 +[2026-03-04 17:31:00] (step=0058561) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 11.45783603991391, LR: 0.0003 +[2026-03-04 17:31:08] (step=0058562) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.458031696341225, LR: 0.0003 +[2026-03-04 17:31:16] (step=0058563) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 11.458227352768539, LR: 0.0003 +[2026-03-04 17:31:24] (step=0058564) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 11.458423009195853, LR: 0.0003 +[2026-03-04 17:31:31] (step=0058565) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 11.458618665623165, LR: 0.0003 +[2026-03-04 17:31:39] (step=0058566) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 11.458814322050479, LR: 0.0003 +[2026-03-04 17:31:47] (step=0058567) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 11.459009978477793, LR: 0.0003 +[2026-03-04 17:31:55] (step=0058568) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.459205634905107, LR: 0.0003 +[2026-03-04 17:32:03] (step=0058569) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.459401291332421, LR: 0.0003 +[2026-03-04 17:32:11] (step=0058570) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.459596947759733, LR: 0.0003 +[2026-03-04 17:32:19] (step=0058571) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.459792604187047, LR: 0.0003 +[2026-03-04 17:32:26] (step=0058572) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.459988260614361, LR: 0.0003 +[2026-03-04 17:32:34] (step=0058573) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.460183917041675, LR: 0.0003 +[2026-03-04 17:32:42] (step=0058574) Train Loss: 0.4442, Train Steps/Sec: 0.12, Epoch: 11.46037957346899, LR: 0.0003 +[2026-03-04 17:32:50] (step=0058575) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.460575229896302, LR: 0.0003 +[2026-03-04 17:32:58] (step=0058576) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.460770886323616, LR: 0.0003 +[2026-03-04 17:33:06] (step=0058577) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.46096654275093, LR: 0.0003 +[2026-03-04 17:33:14] (step=0058578) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 11.461162199178244, LR: 0.0003 +[2026-03-04 17:33:22] (step=0058579) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.461357855605556, LR: 0.0003 +[2026-03-04 17:33:29] (step=0058580) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.46155351203287, LR: 0.0003 +[2026-03-04 17:33:37] (step=0058581) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.461749168460184, LR: 0.0003 +[2026-03-04 17:33:45] (step=0058582) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.461944824887498, LR: 0.0003 +[2026-03-04 17:33:53] (step=0058583) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 11.462140481314812, LR: 0.0003 +[2026-03-04 17:34:01] (step=0058584) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.462336137742124, LR: 0.0003 +[2026-03-04 17:34:09] (step=0058585) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.462531794169438, LR: 0.0003 +[2026-03-04 17:34:17] (step=0058586) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.462727450596752, LR: 0.0003 +[2026-03-04 17:34:24] (step=0058587) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.462923107024066, LR: 0.0003 +[2026-03-04 17:34:32] (step=0058588) Train Loss: 0.4545, Train Steps/Sec: 0.12, Epoch: 11.46311876345138, LR: 0.0003 +[2026-03-04 17:34:40] (step=0058589) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.463314419878692, LR: 0.0003 +[2026-03-04 17:34:48] (step=0058590) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 11.463510076306006, LR: 0.0003 +[2026-03-04 17:34:56] (step=0058591) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 11.46370573273332, LR: 0.0003 +[2026-03-04 17:35:04] (step=0058592) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.463901389160634, LR: 0.0003 +[2026-03-04 17:35:12] (step=0058593) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 11.464097045587948, LR: 0.0003 +[2026-03-04 17:35:20] (step=0058594) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.46429270201526, LR: 0.0003 +[2026-03-04 17:35:27] (step=0058595) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 11.464488358442575, LR: 0.0003 +[2026-03-04 17:35:35] (step=0058596) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 11.464684014869889, LR: 0.0003 +[2026-03-04 17:35:43] (step=0058597) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.464879671297203, LR: 0.0003 +[2026-03-04 17:35:51] (step=0058598) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.465075327724517, LR: 0.0003 +[2026-03-04 17:35:59] (step=0058599) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.465270984151829, LR: 0.0003 +[2026-03-04 17:36:07] (step=0058600) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.465466640579143, LR: 0.0003 +[2026-03-04 17:36:15] (step=0058601) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 11.465662297006457, LR: 0.0003 +[2026-03-04 17:36:22] (step=0058602) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.46585795343377, LR: 0.0003 +[2026-03-04 17:36:30] (step=0058603) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.466053609861085, LR: 0.0003 +[2026-03-04 17:36:38] (step=0058604) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.466249266288397, LR: 0.0003 +[2026-03-04 17:36:46] (step=0058605) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.466444922715711, LR: 0.0003 +[2026-03-04 17:36:54] (step=0058606) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.466640579143025, LR: 0.0003 +[2026-03-04 17:37:02] (step=0058607) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.466836235570339, LR: 0.0003 +[2026-03-04 17:37:10] (step=0058608) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 11.467031891997651, LR: 0.0003 +[2026-03-04 17:37:18] (step=0058609) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.467227548424965, LR: 0.0003 +[2026-03-04 17:37:25] (step=0058610) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.46742320485228, LR: 0.0003 +[2026-03-04 17:37:33] (step=0058611) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 11.467618861279593, LR: 0.0003 +[2026-03-04 17:37:41] (step=0058612) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 11.467814517706907, LR: 0.0003 +[2026-03-04 17:37:49] (step=0058613) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.46801017413422, LR: 0.0003 +[2026-03-04 17:37:57] (step=0058614) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.468205830561534, LR: 0.0003 +[2026-03-04 17:38:05] (step=0058615) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.468401486988848, LR: 0.0003 +[2026-03-04 17:38:13] (step=0058616) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.468597143416162, LR: 0.0003 +[2026-03-04 17:38:20] (step=0058617) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.468792799843476, LR: 0.0003 +[2026-03-04 17:38:28] (step=0058618) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 11.468988456270788, LR: 0.0003 +[2026-03-04 17:38:36] (step=0058619) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.469184112698102, LR: 0.0003 +[2026-03-04 17:38:44] (step=0058620) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.469379769125416, LR: 0.0003 +[2026-03-04 17:38:52] (step=0058621) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.46957542555273, LR: 0.0003 +[2026-03-04 17:39:00] (step=0058622) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.469771081980044, LR: 0.0003 +[2026-03-04 17:39:08] (step=0058623) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.469966738407356, LR: 0.0003 +[2026-03-04 17:39:16] (step=0058624) Train Loss: 0.4535, Train Steps/Sec: 0.12, Epoch: 11.47016239483467, LR: 0.0003 +[2026-03-04 17:39:24] (step=0058625) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.470358051261984, LR: 0.0003 +[2026-03-04 17:39:31] (step=0058626) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.470553707689298, LR: 0.0003 +[2026-03-04 17:39:39] (step=0058627) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 11.470749364116612, LR: 0.0003 +[2026-03-04 17:39:47] (step=0058628) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.470945020543924, LR: 0.0003 +[2026-03-04 17:39:55] (step=0058629) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.471140676971238, LR: 0.0003 +[2026-03-04 17:40:03] (step=0058630) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.471336333398552, LR: 0.0003 +[2026-03-04 17:40:11] (step=0058631) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 11.471531989825866, LR: 0.0003 +[2026-03-04 17:40:19] (step=0058632) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 11.471727646253179, LR: 0.0003 +[2026-03-04 17:40:26] (step=0058633) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.471923302680493, LR: 0.0003 +[2026-03-04 17:40:34] (step=0058634) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.472118959107807, LR: 0.0003 +[2026-03-04 17:40:42] (step=0058635) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 11.47231461553512, LR: 0.0003 +[2026-03-04 17:40:50] (step=0058636) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.472510271962435, LR: 0.0003 +[2026-03-04 17:40:58] (step=0058637) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.472705928389747, LR: 0.0003 +[2026-03-04 17:41:06] (step=0058638) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.47290158481706, LR: 0.0003 +[2026-03-04 17:41:14] (step=0058639) Train Loss: 0.4376, Train Steps/Sec: 0.12, Epoch: 11.473097241244375, LR: 0.0003 +[2026-03-04 17:41:22] (step=0058640) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.473292897671689, LR: 0.0003 +[2026-03-04 17:41:30] (step=0058641) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 11.473488554099003, LR: 0.0003 +[2026-03-04 17:41:37] (step=0058642) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.473684210526315, LR: 0.0003 +[2026-03-04 17:41:45] (step=0058643) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.473879866953629, LR: 0.0003 +[2026-03-04 17:41:53] (step=0058644) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 11.474075523380943, LR: 0.0003 +[2026-03-04 17:42:01] (step=0058645) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.474271179808257, LR: 0.0003 +[2026-03-04 17:42:09] (step=0058646) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.474466836235571, LR: 0.0003 +[2026-03-04 17:42:17] (step=0058647) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.474662492662883, LR: 0.0003 +[2026-03-04 17:42:25] (step=0058648) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.474858149090197, LR: 0.0003 +[2026-03-04 17:42:32] (step=0058649) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.475053805517511, LR: 0.0003 +[2026-03-04 17:42:40] (step=0058650) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.475249461944825, LR: 0.0003 +[2026-03-04 17:42:48] (step=0058651) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.47544511837214, LR: 0.0003 +[2026-03-04 17:42:56] (step=0058652) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.475640774799452, LR: 0.0003 +[2026-03-04 17:43:04] (step=0058653) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 11.475836431226766, LR: 0.0003 +[2026-03-04 17:43:12] (step=0058654) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.47603208765408, LR: 0.0003 +[2026-03-04 17:43:20] (step=0058655) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.476227744081394, LR: 0.0003 +[2026-03-04 17:43:28] (step=0058656) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.476423400508708, LR: 0.0003 +[2026-03-04 17:43:35] (step=0058657) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.47661905693602, LR: 0.0003 +[2026-03-04 17:43:43] (step=0058658) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.476814713363334, LR: 0.0003 +[2026-03-04 17:43:51] (step=0058659) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.477010369790648, LR: 0.0003 +[2026-03-04 17:43:59] (step=0058660) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.477206026217962, LR: 0.0003 +[2026-03-04 17:44:07] (step=0058661) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.477401682645274, LR: 0.0003 +[2026-03-04 17:44:15] (step=0058662) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.477597339072588, LR: 0.0003 +[2026-03-04 17:44:23] (step=0058663) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.477792995499902, LR: 0.0003 +[2026-03-04 17:44:30] (step=0058664) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.477988651927216, LR: 0.0003 +[2026-03-04 17:44:38] (step=0058665) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.47818430835453, LR: 0.0003 +[2026-03-04 17:44:46] (step=0058666) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.478379964781842, LR: 0.0003 +[2026-03-04 17:44:54] (step=0058667) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.478575621209156, LR: 0.0003 +[2026-03-04 17:45:02] (step=0058668) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.47877127763647, LR: 0.0003 +[2026-03-04 17:45:10] (step=0058669) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.478966934063784, LR: 0.0003 +[2026-03-04 17:45:18] (step=0058670) Train Loss: 0.4487, Train Steps/Sec: 0.12, Epoch: 11.479162590491098, LR: 0.0003 +[2026-03-04 17:45:26] (step=0058671) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 11.47935824691841, LR: 0.0003 +[2026-03-04 17:45:34] (step=0058672) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.479553903345725, LR: 0.0003 +[2026-03-04 17:45:41] (step=0058673) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.479749559773039, LR: 0.0003 +[2026-03-04 17:45:49] (step=0058674) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.479945216200353, LR: 0.0003 +[2026-03-04 17:45:57] (step=0058675) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.480140872627667, LR: 0.0003 +[2026-03-04 17:46:05] (step=0058676) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.480336529054979, LR: 0.0003 +[2026-03-04 17:46:13] (step=0058677) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.480532185482293, LR: 0.0003 +[2026-03-04 17:46:21] (step=0058678) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.480727841909607, LR: 0.0003 +[2026-03-04 17:46:29] (step=0058679) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 11.48092349833692, LR: 0.0003 +[2026-03-04 17:46:36] (step=0058680) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.481119154764235, LR: 0.0003 +[2026-03-04 17:46:44] (step=0058681) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.481314811191547, LR: 0.0003 +[2026-03-04 17:46:52] (step=0058682) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.481510467618861, LR: 0.0003 +[2026-03-04 17:47:00] (step=0058683) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.481706124046175, LR: 0.0003 +[2026-03-04 17:47:08] (step=0058684) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 11.481901780473489, LR: 0.0003 +[2026-03-04 17:47:16] (step=0058685) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.482097436900801, LR: 0.0003 +[2026-03-04 17:47:24] (step=0058686) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.482293093328115, LR: 0.0003 +[2026-03-04 17:47:32] (step=0058687) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 11.48248874975543, LR: 0.0003 +[2026-03-04 17:47:40] (step=0058688) Train Loss: 0.4393, Train Steps/Sec: 0.12, Epoch: 11.482684406182743, LR: 0.0003 +[2026-03-04 17:47:47] (step=0058689) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.482880062610057, LR: 0.0003 +[2026-03-04 17:47:55] (step=0058690) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.48307571903737, LR: 0.0003 +[2026-03-04 17:48:03] (step=0058691) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.483271375464684, LR: 0.0003 +[2026-03-04 17:48:11] (step=0058692) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 11.483467031891998, LR: 0.0003 +[2026-03-04 17:48:19] (step=0058693) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.483662688319312, LR: 0.0003 +[2026-03-04 17:48:27] (step=0058694) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.483858344746626, LR: 0.0003 +[2026-03-04 17:48:35] (step=0058695) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.484054001173938, LR: 0.0003 +[2026-03-04 17:48:42] (step=0058696) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.484249657601252, LR: 0.0003 +[2026-03-04 17:48:50] (step=0058697) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.484445314028566, LR: 0.0003 +[2026-03-04 17:48:58] (step=0058698) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 11.48464097045588, LR: 0.0003 +[2026-03-04 17:49:06] (step=0058699) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.484836626883194, LR: 0.0003 +[2026-03-04 17:49:14] (step=0058700) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.485032283310506, LR: 0.0003 +[2026-03-04 17:49:22] (step=0058701) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.48522793973782, LR: 0.0003 +[2026-03-04 17:49:30] (step=0058702) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.485423596165134, LR: 0.0003 +[2026-03-04 17:49:37] (step=0058703) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.485619252592448, LR: 0.0003 +[2026-03-04 17:49:45] (step=0058704) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.485814909019762, LR: 0.0003 +[2026-03-04 17:49:53] (step=0058705) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.486010565447074, LR: 0.0003 +[2026-03-04 17:50:01] (step=0058706) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 11.486206221874388, LR: 0.0003 +[2026-03-04 17:50:09] (step=0058707) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.486401878301702, LR: 0.0003 +[2026-03-04 17:50:17] (step=0058708) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.486597534729016, LR: 0.0003 +[2026-03-04 17:50:25] (step=0058709) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.48679319115633, LR: 0.0003 +[2026-03-04 17:50:32] (step=0058710) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.486988847583643, LR: 0.0003 +[2026-03-04 17:50:40] (step=0058711) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 11.487184504010957, LR: 0.0003 +[2026-03-04 17:50:48] (step=0058712) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.48738016043827, LR: 0.0003 +[2026-03-04 17:50:56] (step=0058713) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.487575816865585, LR: 0.0003 +[2026-03-04 17:51:04] (step=0058714) Train Loss: 0.4376, Train Steps/Sec: 0.12, Epoch: 11.487771473292897, LR: 0.0003 +[2026-03-04 17:51:12] (step=0058715) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.48796712972021, LR: 0.0003 +[2026-03-04 17:51:20] (step=0058716) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.488162786147525, LR: 0.0003 +[2026-03-04 17:51:28] (step=0058717) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 11.488358442574839, LR: 0.0003 +[2026-03-04 17:51:36] (step=0058718) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 11.488554099002153, LR: 0.0003 +[2026-03-04 17:51:43] (step=0058719) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.488749755429465, LR: 0.0003 +[2026-03-04 17:51:51] (step=0058720) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.488945411856779, LR: 0.0003 +[2026-03-04 17:51:59] (step=0058721) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 11.489141068284093, LR: 0.0003 +[2026-03-04 17:52:07] (step=0058722) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.489336724711407, LR: 0.0003 +[2026-03-04 17:52:15] (step=0058723) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.489532381138721, LR: 0.0003 +[2026-03-04 17:52:23] (step=0058724) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.489728037566033, LR: 0.0003 +[2026-03-04 17:52:31] (step=0058725) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.489923693993347, LR: 0.0003 +[2026-03-04 17:52:38] (step=0058726) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.490119350420661, LR: 0.0003 +[2026-03-04 17:52:46] (step=0058727) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.490315006847975, LR: 0.0003 +[2026-03-04 17:52:54] (step=0058728) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.49051066327529, LR: 0.0003 +[2026-03-04 17:53:02] (step=0058729) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.490706319702602, LR: 0.0003 +[2026-03-04 17:53:10] (step=0058730) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.490901976129916, LR: 0.0003 +[2026-03-04 17:53:18] (step=0058731) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.49109763255723, LR: 0.0003 +[2026-03-04 17:53:26] (step=0058732) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.491293288984544, LR: 0.0003 +[2026-03-04 17:53:34] (step=0058733) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.491488945411858, LR: 0.0003 +[2026-03-04 17:53:41] (step=0058734) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 11.49168460183917, LR: 0.0003 +[2026-03-04 17:53:49] (step=0058735) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.491880258266484, LR: 0.0003 +[2026-03-04 17:53:57] (step=0058736) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 11.492075914693798, LR: 0.0003 +[2026-03-04 17:54:05] (step=0058737) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 11.492271571121112, LR: 0.0003 +[2026-03-04 17:54:13] (step=0058738) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.492467227548424, LR: 0.0003 +[2026-03-04 17:54:21] (step=0058739) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.492662883975738, LR: 0.0003 +[2026-03-04 17:54:29] (step=0058740) Train Loss: 0.4365, Train Steps/Sec: 0.12, Epoch: 11.492858540403052, LR: 0.0003 +[2026-03-04 17:54:37] (step=0058741) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 11.493054196830366, LR: 0.0003 +[2026-03-04 17:54:45] (step=0058742) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.49324985325768, LR: 0.0003 +[2026-03-04 17:54:52] (step=0058743) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.493445509684992, LR: 0.0003 +[2026-03-04 17:55:00] (step=0058744) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.493641166112306, LR: 0.0003 +[2026-03-04 17:55:08] (step=0058745) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.49383682253962, LR: 0.0003 +[2026-03-04 17:55:16] (step=0058746) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.494032478966934, LR: 0.0003 +[2026-03-04 17:55:24] (step=0058747) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 11.494228135394248, LR: 0.0003 +[2026-03-04 17:55:32] (step=0058748) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.49442379182156, LR: 0.0003 +[2026-03-04 17:55:40] (step=0058749) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.494619448248875, LR: 0.0003 +[2026-03-04 17:55:47] (step=0058750) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.494815104676189, LR: 0.0003 +[2026-03-04 17:55:55] (step=0058751) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.495010761103503, LR: 0.0003 +[2026-03-04 17:56:03] (step=0058752) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.495206417530817, LR: 0.0003 +[2026-03-04 17:56:11] (step=0058753) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 11.495402073958129, LR: 0.0003 +[2026-03-04 17:56:19] (step=0058754) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 11.495597730385443, LR: 0.0003 +[2026-03-04 17:56:27] (step=0058755) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.495793386812757, LR: 0.0003 +[2026-03-04 17:56:35] (step=0058756) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 11.49598904324007, LR: 0.0003 +[2026-03-04 17:56:42] (step=0058757) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.496184699667385, LR: 0.0003 +[2026-03-04 17:56:50] (step=0058758) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.496380356094697, LR: 0.0003 +[2026-03-04 17:56:58] (step=0058759) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 11.496576012522011, LR: 0.0003 +[2026-03-04 17:57:06] (step=0058760) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.496771668949325, LR: 0.0003 +[2026-03-04 17:57:14] (step=0058761) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.49696732537664, LR: 0.0003 +[2026-03-04 17:57:22] (step=0058762) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 11.497162981803953, LR: 0.0003 +[2026-03-04 17:57:30] (step=0058763) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.497358638231265, LR: 0.0003 +[2026-03-04 17:57:38] (step=0058764) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 11.49755429465858, LR: 0.0003 +[2026-03-04 17:57:45] (step=0058765) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.497749951085893, LR: 0.0003 +[2026-03-04 17:57:53] (step=0058766) Train Loss: 0.4316, Train Steps/Sec: 0.12, Epoch: 11.497945607513207, LR: 0.0003 +[2026-03-04 17:58:01] (step=0058767) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.49814126394052, LR: 0.0003 +[2026-03-04 17:58:09] (step=0058768) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.498336920367834, LR: 0.0003 +[2026-03-04 17:58:17] (step=0058769) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.498532576795148, LR: 0.0003 +[2026-03-04 17:58:25] (step=0058770) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.498728233222462, LR: 0.0003 +[2026-03-04 17:58:33] (step=0058771) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.498923889649776, LR: 0.0003 +[2026-03-04 17:58:41] (step=0058772) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.499119546077088, LR: 0.0003 +[2026-03-04 17:58:49] (step=0058773) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.499315202504402, LR: 0.0003 +[2026-03-04 17:58:56] (step=0058774) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.499510858931716, LR: 0.0003 +[2026-03-04 17:59:04] (step=0058775) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 11.49970651535903, LR: 0.0003 +[2026-03-04 17:59:12] (step=0058776) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.499902171786344, LR: 0.0003 +[2026-03-04 17:59:20] (step=0058777) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 11.500097828213656, LR: 0.0003 +[2026-03-04 17:59:28] (step=0058778) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.50029348464097, LR: 0.0003 +[2026-03-04 17:59:36] (step=0058779) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.500489141068284, LR: 0.0003 +[2026-03-04 17:59:44] (step=0058780) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 11.500684797495598, LR: 0.0003 +[2026-03-04 17:59:51] (step=0058781) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 11.500880453922912, LR: 0.0003 +[2026-03-04 17:59:59] (step=0058782) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.501076110350224, LR: 0.0003 +[2026-03-04 18:00:07] (step=0058783) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.501271766777538, LR: 0.0003 +[2026-03-04 18:00:15] (step=0058784) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.501467423204852, LR: 0.0003 +[2026-03-04 18:00:23] (step=0058785) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.501663079632166, LR: 0.0003 +[2026-03-04 18:00:31] (step=0058786) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 11.50185873605948, LR: 0.0003 +[2026-03-04 18:00:39] (step=0058787) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.502054392486793, LR: 0.0003 +[2026-03-04 18:00:47] (step=0058788) Train Loss: 0.4274, Train Steps/Sec: 0.12, Epoch: 11.502250048914107, LR: 0.0003 +[2026-03-04 18:00:54] (step=0058789) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.50244570534142, LR: 0.0003 +[2026-03-04 18:01:02] (step=0058790) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.502641361768735, LR: 0.0003 +[2026-03-04 18:01:10] (step=0058791) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 11.502837018196047, LR: 0.0003 +[2026-03-04 18:01:18] (step=0058792) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 11.50303267462336, LR: 0.0003 +[2026-03-04 18:01:26] (step=0058793) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.503228331050675, LR: 0.0003 +[2026-03-04 18:01:34] (step=0058794) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 11.503423987477989, LR: 0.0003 +[2026-03-04 18:01:42] (step=0058795) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 11.503619643905303, LR: 0.0003 +[2026-03-04 18:01:50] (step=0058796) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.503815300332615, LR: 0.0003 +[2026-03-04 18:01:57] (step=0058797) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 11.50401095675993, LR: 0.0003 +[2026-03-04 18:02:05] (step=0058798) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.504206613187243, LR: 0.0003 +[2026-03-04 18:02:13] (step=0058799) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.504402269614557, LR: 0.0003 +[2026-03-04 18:02:21] (step=0058800) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.504597926041871, LR: 0.0003 +[2026-03-04 18:02:29] (step=0058801) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.504793582469183, LR: 0.0003 +[2026-03-04 18:02:37] (step=0058802) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 11.504989238896497, LR: 0.0003 +[2026-03-04 18:02:45] (step=0058803) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.505184895323811, LR: 0.0003 +[2026-03-04 18:02:52] (step=0058804) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.505380551751125, LR: 0.0003 +[2026-03-04 18:03:00] (step=0058805) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.50557620817844, LR: 0.0003 +[2026-03-04 18:03:08] (step=0058806) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.505771864605752, LR: 0.0003 +[2026-03-04 18:03:16] (step=0058807) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.505967521033066, LR: 0.0003 +[2026-03-04 18:03:24] (step=0058808) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 11.50616317746038, LR: 0.0003 +[2026-03-04 18:03:32] (step=0058809) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.506358833887694, LR: 0.0003 +[2026-03-04 18:03:40] (step=0058810) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 11.506554490315008, LR: 0.0003 +[2026-03-04 18:03:47] (step=0058811) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 11.50675014674232, LR: 0.0003 +[2026-03-04 18:03:55] (step=0058812) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.506945803169634, LR: 0.0003 +[2026-03-04 18:04:03] (step=0058813) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 11.507141459596948, LR: 0.0003 +[2026-03-04 18:04:11] (step=0058814) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.507337116024262, LR: 0.0003 +[2026-03-04 18:04:19] (step=0058815) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.507532772451576, LR: 0.0003 +[2026-03-04 18:04:27] (step=0058816) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.507728428878888, LR: 0.0003 +[2026-03-04 18:04:35] (step=0058817) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.507924085306202, LR: 0.0003 +[2026-03-04 18:04:43] (step=0058818) Train Loss: 0.4436, Train Steps/Sec: 0.12, Epoch: 11.508119741733516, LR: 0.0003 +[2026-03-04 18:04:51] (step=0058819) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 11.50831539816083, LR: 0.0003 +[2026-03-04 18:04:58] (step=0058820) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.508511054588142, LR: 0.0003 +[2026-03-04 18:05:06] (step=0058821) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.508706711015456, LR: 0.0003 +[2026-03-04 18:05:14] (step=0058822) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.50890236744277, LR: 0.0003 +[2026-03-04 18:05:22] (step=0058823) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.509098023870084, LR: 0.0003 +[2026-03-04 18:05:30] (step=0058824) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.509293680297398, LR: 0.0003 +[2026-03-04 18:05:38] (step=0058825) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.50948933672471, LR: 0.0003 +[2026-03-04 18:05:46] (step=0058826) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 11.509684993152025, LR: 0.0003 +[2026-03-04 18:05:53] (step=0058827) Train Loss: 0.4259, Train Steps/Sec: 0.13, Epoch: 11.509880649579339, LR: 0.0003 +[2026-03-04 18:06:01] (step=0058828) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 11.510076306006653, LR: 0.0003 +[2026-03-04 18:06:09] (step=0058829) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.510271962433967, LR: 0.0003 +[2026-03-04 18:06:17] (step=0058830) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.510467618861279, LR: 0.0003 +[2026-03-04 18:06:25] (step=0058831) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.510663275288593, LR: 0.0003 +[2026-03-04 18:06:33] (step=0058832) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.510858931715907, LR: 0.0003 +[2026-03-04 18:06:41] (step=0058833) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.511054588143221, LR: 0.0003 +[2026-03-04 18:06:48] (step=0058834) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.511250244570535, LR: 0.0003 +[2026-03-04 18:06:56] (step=0058835) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.511445900997847, LR: 0.0003 +[2026-03-04 18:07:04] (step=0058836) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.511641557425161, LR: 0.0003 +[2026-03-04 18:07:12] (step=0058837) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.511837213852475, LR: 0.0003 +[2026-03-04 18:07:20] (step=0058838) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.51203287027979, LR: 0.0003 +[2026-03-04 18:07:28] (step=0058839) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 11.512228526707103, LR: 0.0003 +[2026-03-04 18:07:36] (step=0058840) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.512424183134415, LR: 0.0003 +[2026-03-04 18:07:44] (step=0058841) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.51261983956173, LR: 0.0003 +[2026-03-04 18:07:51] (step=0058842) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.512815495989043, LR: 0.0003 +[2026-03-04 18:07:59] (step=0058843) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 11.513011152416357, LR: 0.0003 +[2026-03-04 18:08:07] (step=0058844) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 11.51320680884367, LR: 0.0003 +[2026-03-04 18:08:15] (step=0058845) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.513402465270984, LR: 0.0003 +[2026-03-04 18:08:23] (step=0058846) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.513598121698298, LR: 0.0003 +[2026-03-04 18:08:31] (step=0058847) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 11.513793778125612, LR: 0.0003 +[2026-03-04 18:08:39] (step=0058848) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 11.513989434552926, LR: 0.0003 +[2026-03-04 18:08:46] (step=0058849) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.514185090980238, LR: 0.0003 +[2026-03-04 18:08:54] (step=0058850) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 11.514380747407552, LR: 0.0003 +[2026-03-04 18:09:02] (step=0058851) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.514576403834866, LR: 0.0003 +[2026-03-04 18:09:10] (step=0058852) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 11.51477206026218, LR: 0.0003 +[2026-03-04 18:09:18] (step=0058853) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.514967716689494, LR: 0.0003 +[2026-03-04 18:09:26] (step=0058854) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.515163373116806, LR: 0.0003 +[2026-03-04 18:09:34] (step=0058855) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 11.51535902954412, LR: 0.0003 +[2026-03-04 18:09:42] (step=0058856) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.515554685971434, LR: 0.0003 +[2026-03-04 18:09:49] (step=0058857) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.515750342398748, LR: 0.0003 +[2026-03-04 18:09:57] (step=0058858) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 11.515945998826062, LR: 0.0003 +[2026-03-04 18:10:05] (step=0058859) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.516141655253374, LR: 0.0003 +[2026-03-04 18:10:13] (step=0058860) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.516337311680688, LR: 0.0003 +[2026-03-04 18:10:21] (step=0058861) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 11.516532968108002, LR: 0.0003 +[2026-03-04 18:10:29] (step=0058862) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.516728624535316, LR: 0.0003 +[2026-03-04 18:10:37] (step=0058863) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 11.51692428096263, LR: 0.0003 +[2026-03-04 18:10:45] (step=0058864) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.517119937389943, LR: 0.0003 +[2026-03-04 18:10:52] (step=0058865) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.517315593817257, LR: 0.0003 +[2026-03-04 18:11:00] (step=0058866) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.51751125024457, LR: 0.0003 +[2026-03-04 18:11:08] (step=0058867) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.517706906671885, LR: 0.0003 +[2026-03-04 18:11:16] (step=0058868) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.517902563099199, LR: 0.0003 +[2026-03-04 18:11:24] (step=0058869) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.518098219526511, LR: 0.0003 +[2026-03-04 18:11:32] (step=0058870) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.518293875953825, LR: 0.0003 +[2026-03-04 18:11:40] (step=0058871) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.518489532381139, LR: 0.0003 +[2026-03-04 18:11:48] (step=0058872) Train Loss: 0.4437, Train Steps/Sec: 0.12, Epoch: 11.518685188808453, LR: 0.0003 +[2026-03-04 18:11:56] (step=0058873) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.518880845235765, LR: 0.0003 +[2026-03-04 18:12:03] (step=0058874) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.51907650166308, LR: 0.0003 +[2026-03-04 18:12:11] (step=0058875) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.519272158090393, LR: 0.0003 +[2026-03-04 18:12:19] (step=0058876) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.519467814517707, LR: 0.0003 +[2026-03-04 18:12:27] (step=0058877) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.519663470945021, LR: 0.0003 +[2026-03-04 18:12:35] (step=0058878) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.519859127372333, LR: 0.0003 +[2026-03-04 18:12:43] (step=0058879) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.520054783799647, LR: 0.0003 +[2026-03-04 18:12:51] (step=0058880) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.520250440226961, LR: 0.0003 +[2026-03-04 18:12:58] (step=0058881) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.520446096654275, LR: 0.0003 +[2026-03-04 18:13:06] (step=0058882) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.52064175308159, LR: 0.0003 +[2026-03-04 18:13:14] (step=0058883) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 11.520837409508902, LR: 0.0003 +[2026-03-04 18:13:22] (step=0058884) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.521033065936216, LR: 0.0003 +[2026-03-04 18:13:30] (step=0058885) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 11.52122872236353, LR: 0.0003 +[2026-03-04 18:13:38] (step=0058886) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 11.521424378790844, LR: 0.0003 +[2026-03-04 18:13:46] (step=0058887) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.521620035218158, LR: 0.0003 +[2026-03-04 18:13:53] (step=0058888) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.52181569164547, LR: 0.0003 +[2026-03-04 18:14:01] (step=0058889) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 11.522011348072784, LR: 0.0003 +[2026-03-04 18:14:09] (step=0058890) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.522207004500098, LR: 0.0003 +[2026-03-04 18:14:17] (step=0058891) Train Loss: 0.4461, Train Steps/Sec: 0.12, Epoch: 11.522402660927412, LR: 0.0003 +[2026-03-04 18:14:25] (step=0058892) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.522598317354726, LR: 0.0003 +[2026-03-04 18:14:33] (step=0058893) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 11.522793973782038, LR: 0.0003 +[2026-03-04 18:14:41] (step=0058894) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.522989630209352, LR: 0.0003 +[2026-03-04 18:14:49] (step=0058895) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 11.523185286636666, LR: 0.0003 +[2026-03-04 18:14:56] (step=0058896) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.52338094306398, LR: 0.0003 +[2026-03-04 18:15:04] (step=0058897) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.523576599491292, LR: 0.0003 +[2026-03-04 18:15:12] (step=0058898) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.523772255918606, LR: 0.0003 +[2026-03-04 18:15:20] (step=0058899) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.52396791234592, LR: 0.0003 +[2026-03-04 18:15:28] (step=0058900) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.524163568773234, LR: 0.0003 +[2026-03-04 18:15:36] (step=0058901) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 11.524359225200548, LR: 0.0003 +[2026-03-04 18:15:44] (step=0058902) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.52455488162786, LR: 0.0003 +[2026-03-04 18:15:52] (step=0058903) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.524750538055175, LR: 0.0003 +[2026-03-04 18:15:59] (step=0058904) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.524946194482489, LR: 0.0003 +[2026-03-04 18:16:07] (step=0058905) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 11.525141850909803, LR: 0.0003 +[2026-03-04 18:16:15] (step=0058906) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 11.525337507337117, LR: 0.0003 +[2026-03-04 18:16:23] (step=0058907) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.525533163764429, LR: 0.0003 +[2026-03-04 18:16:31] (step=0058908) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.525728820191743, LR: 0.0003 +[2026-03-04 18:16:39] (step=0058909) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 11.525924476619057, LR: 0.0003 +[2026-03-04 18:16:47] (step=0058910) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.526120133046371, LR: 0.0003 +[2026-03-04 18:16:54] (step=0058911) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.526315789473685, LR: 0.0003 +[2026-03-04 18:17:02] (step=0058912) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.526511445900997, LR: 0.0003 +[2026-03-04 18:17:10] (step=0058913) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.526707102328311, LR: 0.0003 +[2026-03-04 18:17:18] (step=0058914) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.526902758755625, LR: 0.0003 +[2026-03-04 18:17:26] (step=0058915) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.52709841518294, LR: 0.0003 +[2026-03-04 18:17:34] (step=0058916) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.527294071610253, LR: 0.0003 +[2026-03-04 18:17:42] (step=0058917) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.527489728037565, LR: 0.0003 +[2026-03-04 18:17:49] (step=0058918) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.52768538446488, LR: 0.0003 +[2026-03-04 18:17:57] (step=0058919) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.527881040892193, LR: 0.0003 +[2026-03-04 18:18:05] (step=0058920) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.528076697319507, LR: 0.0003 +[2026-03-04 18:18:13] (step=0058921) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.528272353746821, LR: 0.0003 +[2026-03-04 18:18:21] (step=0058922) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.528468010174134, LR: 0.0003 +[2026-03-04 18:18:29] (step=0058923) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.528663666601448, LR: 0.0003 +[2026-03-04 18:18:37] (step=0058924) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 11.528859323028762, LR: 0.0003 +[2026-03-04 18:18:44] (step=0058925) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.529054979456076, LR: 0.0003 +[2026-03-04 18:18:52] (step=0058926) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 11.529250635883388, LR: 0.0003 +[2026-03-04 18:19:00] (step=0058927) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.529446292310702, LR: 0.0003 +[2026-03-04 18:19:08] (step=0058928) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.529641948738016, LR: 0.0003 +[2026-03-04 18:19:16] (step=0058929) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.52983760516533, LR: 0.0003 +[2026-03-04 18:19:24] (step=0058930) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 11.530033261592644, LR: 0.0003 +[2026-03-04 18:19:32] (step=0058931) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.530228918019956, LR: 0.0003 +[2026-03-04 18:19:40] (step=0058932) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.53042457444727, LR: 0.0003 +[2026-03-04 18:19:47] (step=0058933) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.530620230874584, LR: 0.0003 +[2026-03-04 18:19:55] (step=0058934) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.530815887301898, LR: 0.0003 +[2026-03-04 18:20:03] (step=0058935) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 11.531011543729212, LR: 0.0003 +[2026-03-04 18:20:11] (step=0058936) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.531207200156524, LR: 0.0003 +[2026-03-04 18:20:19] (step=0058937) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.531402856583838, LR: 0.0003 +[2026-03-04 18:20:27] (step=0058938) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.531598513011152, LR: 0.0003 +[2026-03-04 18:20:35] (step=0058939) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.531794169438466, LR: 0.0003 +[2026-03-04 18:20:43] (step=0058940) Train Loss: 0.4415, Train Steps/Sec: 0.12, Epoch: 11.53198982586578, LR: 0.0003 +[2026-03-04 18:20:50] (step=0058941) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.532185482293093, LR: 0.0003 +[2026-03-04 18:20:58] (step=0058942) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.532381138720407, LR: 0.0003 +[2026-03-04 18:21:06] (step=0058943) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 11.53257679514772, LR: 0.0003 +[2026-03-04 18:21:14] (step=0058944) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.532772451575035, LR: 0.0003 +[2026-03-04 18:21:22] (step=0058945) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 11.532968108002349, LR: 0.0003 +[2026-03-04 18:21:30] (step=0058946) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.533163764429661, LR: 0.0003 +[2026-03-04 18:21:38] (step=0058947) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.533359420856975, LR: 0.0003 +[2026-03-04 18:21:45] (step=0058948) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 11.533555077284289, LR: 0.0003 +[2026-03-04 18:21:53] (step=0058949) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.533750733711603, LR: 0.0003 +[2026-03-04 18:22:01] (step=0058950) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.533946390138915, LR: 0.0003 +[2026-03-04 18:22:09] (step=0058951) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.53414204656623, LR: 0.0003 +[2026-03-04 18:22:17] (step=0058952) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 11.534337702993543, LR: 0.0003 +[2026-03-04 18:22:25] (step=0058953) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.534533359420857, LR: 0.0003 +[2026-03-04 18:22:33] (step=0058954) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.534729015848171, LR: 0.0003 +[2026-03-04 18:22:40] (step=0058955) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 11.534924672275483, LR: 0.0003 +[2026-03-04 18:22:48] (step=0058956) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.535120328702797, LR: 0.0003 +[2026-03-04 18:22:56] (step=0058957) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 11.535315985130111, LR: 0.0003 +[2026-03-04 18:23:04] (step=0058958) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.535511641557425, LR: 0.0003 +[2026-03-04 18:23:12] (step=0058959) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 11.53570729798474, LR: 0.0003 +[2026-03-04 18:23:20] (step=0058960) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 11.535902954412052, LR: 0.0003 +[2026-03-04 18:23:28] (step=0058961) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.536098610839366, LR: 0.0003 +[2026-03-04 18:23:35] (step=0058962) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.53629426726668, LR: 0.0003 +[2026-03-04 18:23:43] (step=0058963) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.536489923693994, LR: 0.0003 +[2026-03-04 18:23:51] (step=0058964) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 11.536685580121308, LR: 0.0003 +[2026-03-04 18:23:59] (step=0058965) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 11.53688123654862, LR: 0.0003 +[2026-03-04 18:24:07] (step=0058966) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.537076892975934, LR: 0.0003 +[2026-03-04 18:24:15] (step=0058967) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 11.537272549403248, LR: 0.0003 +[2026-03-04 18:24:23] (step=0058968) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.537468205830562, LR: 0.0003 +[2026-03-04 18:24:31] (step=0058969) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 11.537663862257876, LR: 0.0003 +[2026-03-04 18:24:38] (step=0058970) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.537859518685188, LR: 0.0003 +[2026-03-04 18:24:46] (step=0058971) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.538055175112502, LR: 0.0003 +[2026-03-04 18:24:54] (step=0058972) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 11.538250831539816, LR: 0.0003 +[2026-03-04 18:25:02] (step=0058973) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 11.53844648796713, LR: 0.0003 +[2026-03-04 18:25:10] (step=0058974) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.538642144394444, LR: 0.0003 +[2026-03-04 18:25:18] (step=0058975) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 11.538837800821756, LR: 0.0003 +[2026-03-04 18:25:26] (step=0058976) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.53903345724907, LR: 0.0003 +[2026-03-04 18:25:34] (step=0058977) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.539229113676384, LR: 0.0003 +[2026-03-04 18:25:41] (step=0058978) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.539424770103698, LR: 0.0003 +[2026-03-04 18:25:49] (step=0058979) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.53962042653101, LR: 0.0003 +[2026-03-04 18:25:57] (step=0058980) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.539816082958325, LR: 0.0003 +[2026-03-04 18:26:05] (step=0058981) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.540011739385639, LR: 0.0003 +[2026-03-04 18:26:13] (step=0058982) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 11.540207395812953, LR: 0.0003 +[2026-03-04 18:26:21] (step=0058983) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.540403052240267, LR: 0.0003 +[2026-03-04 18:26:29] (step=0058984) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 11.540598708667579, LR: 0.0003 +[2026-03-04 18:26:36] (step=0058985) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.540794365094893, LR: 0.0003 +[2026-03-04 18:26:44] (step=0058986) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.540990021522207, LR: 0.0003 +[2026-03-04 18:26:52] (step=0058987) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.541185677949521, LR: 0.0003 +[2026-03-04 18:27:00] (step=0058988) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.541381334376835, LR: 0.0003 +[2026-03-04 18:27:08] (step=0058989) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.541576990804147, LR: 0.0003 +[2026-03-04 18:27:16] (step=0058990) Train Loss: 0.4302, Train Steps/Sec: 0.12, Epoch: 11.541772647231461, LR: 0.0003 +[2026-03-04 18:27:24] (step=0058991) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.541968303658775, LR: 0.0003 +[2026-03-04 18:27:32] (step=0058992) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 11.54216396008609, LR: 0.0003 +[2026-03-04 18:27:40] (step=0058993) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.542359616513403, LR: 0.0003 +[2026-03-04 18:27:47] (step=0058994) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.542555272940715, LR: 0.0003 +[2026-03-04 18:27:55] (step=0058995) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.54275092936803, LR: 0.0003 +[2026-03-04 18:28:03] (step=0058996) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.542946585795343, LR: 0.0003 +[2026-03-04 18:28:11] (step=0058997) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 11.543142242222658, LR: 0.0003 +[2026-03-04 18:28:19] (step=0058998) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.543337898649972, LR: 0.0003 +[2026-03-04 18:28:27] (step=0058999) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.543533555077284, LR: 0.0003 +[2026-03-04 18:28:35] (step=0059000) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 11.543729211504598, LR: 0.0003 +[2026-03-04 18:28:35] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0059000/ +[2026-03-04 18:28:42] (step=0059001) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.543924867931912, LR: 0.0003 +[2026-03-04 18:28:50] (step=0059002) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.544120524359226, LR: 0.0003 +[2026-03-04 18:28:58] (step=0059003) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.544316180786538, LR: 0.0003 +[2026-03-04 18:29:06] (step=0059004) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.544511837213852, LR: 0.0003 +[2026-03-04 18:29:14] (step=0059005) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.544707493641166, LR: 0.0003 +[2026-03-04 18:29:22] (step=0059006) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 11.54490315006848, LR: 0.0003 +[2026-03-04 18:29:30] (step=0059007) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.545098806495794, LR: 0.0003 +[2026-03-04 18:29:37] (step=0059008) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.545294462923106, LR: 0.0003 +[2026-03-04 18:29:45] (step=0059009) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.54549011935042, LR: 0.0003 +[2026-03-04 18:29:53] (step=0059010) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.545685775777734, LR: 0.0003 +[2026-03-04 18:30:01] (step=0059011) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.545881432205048, LR: 0.0003 +[2026-03-04 18:30:09] (step=0059012) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.546077088632362, LR: 0.0003 +[2026-03-04 18:30:17] (step=0059013) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 11.546272745059674, LR: 0.0003 +[2026-03-04 18:30:25] (step=0059014) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.546468401486988, LR: 0.0003 +[2026-03-04 18:30:33] (step=0059015) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.546664057914303, LR: 0.0003 +[2026-03-04 18:30:40] (step=0059016) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.546859714341617, LR: 0.0003 +[2026-03-04 18:30:48] (step=0059017) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.54705537076893, LR: 0.0003 +[2026-03-04 18:30:56] (step=0059018) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.547251027196243, LR: 0.0003 +[2026-03-04 18:31:04] (step=0059019) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.547446683623557, LR: 0.0003 +[2026-03-04 18:31:12] (step=0059020) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 11.54764234005087, LR: 0.0003 +[2026-03-04 18:31:20] (step=0059021) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.547837996478185, LR: 0.0003 +[2026-03-04 18:31:27] (step=0059022) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.548033652905499, LR: 0.0003 +[2026-03-04 18:31:35] (step=0059023) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.548229309332811, LR: 0.0003 +[2026-03-04 18:31:43] (step=0059024) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.548424965760125, LR: 0.0003 +[2026-03-04 18:31:51] (step=0059025) Train Loss: 0.4518, Train Steps/Sec: 0.12, Epoch: 11.548620622187439, LR: 0.0003 +[2026-03-04 18:31:59] (step=0059026) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.548816278614753, LR: 0.0003 +[2026-03-04 18:32:07] (step=0059027) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.549011935042065, LR: 0.0003 +[2026-03-04 18:32:15] (step=0059028) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.54920759146938, LR: 0.0003 +[2026-03-04 18:32:23] (step=0059029) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 11.549403247896693, LR: 0.0003 +[2026-03-04 18:32:31] (step=0059030) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 11.549598904324007, LR: 0.0003 +[2026-03-04 18:32:38] (step=0059031) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.549794560751321, LR: 0.0003 +[2026-03-04 18:32:46] (step=0059032) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.549990217178634, LR: 0.0003 +[2026-03-04 18:32:54] (step=0059033) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 11.550185873605948, LR: 0.0003 +[2026-03-04 18:33:02] (step=0059034) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.550381530033262, LR: 0.0003 +[2026-03-04 18:33:10] (step=0059035) Train Loss: 0.4221, Train Steps/Sec: 0.13, Epoch: 11.550577186460576, LR: 0.0003 +[2026-03-04 18:33:18] (step=0059036) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.55077284288789, LR: 0.0003 +[2026-03-04 18:33:25] (step=0059037) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.550968499315202, LR: 0.0003 +[2026-03-04 18:33:33] (step=0059038) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.551164155742516, LR: 0.0003 +[2026-03-04 18:33:41] (step=0059039) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 11.55135981216983, LR: 0.0003 +[2026-03-04 18:33:49] (step=0059040) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 11.551555468597144, LR: 0.0003 +[2026-03-04 18:33:57] (step=0059041) Train Loss: 0.4292, Train Steps/Sec: 0.12, Epoch: 11.551751125024458, LR: 0.0003 +[2026-03-04 18:34:05] (step=0059042) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 11.55194678145177, LR: 0.0003 +[2026-03-04 18:34:13] (step=0059043) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 11.552142437879084, LR: 0.0003 +[2026-03-04 18:34:21] (step=0059044) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 11.552338094306398, LR: 0.0003 +[2026-03-04 18:34:29] (step=0059045) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 11.552533750733712, LR: 0.0003 +[2026-03-04 18:34:36] (step=0059046) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.552729407161026, LR: 0.0003 +[2026-03-04 18:34:44] (step=0059047) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.552925063588338, LR: 0.0003 +[2026-03-04 18:34:52] (step=0059048) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 11.553120720015652, LR: 0.0003 +[2026-03-04 18:35:00] (step=0059049) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.553316376442966, LR: 0.0003 +[2026-03-04 18:35:08] (step=0059050) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 11.55351203287028, LR: 0.0003 +[2026-03-04 18:35:16] (step=0059051) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.553707689297594, LR: 0.0003 +[2026-03-04 18:35:24] (step=0059052) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.553903345724907, LR: 0.0003 +[2026-03-04 18:35:31] (step=0059053) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.55409900215222, LR: 0.0003 +[2026-03-04 18:35:39] (step=0059054) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 11.554294658579535, LR: 0.0003 +[2026-03-04 18:35:47] (step=0059055) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.554490315006849, LR: 0.0003 +[2026-03-04 18:35:55] (step=0059056) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 11.55468597143416, LR: 0.0003 +[2026-03-04 18:36:03] (step=0059057) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.554881627861475, LR: 0.0003 +[2026-03-04 18:36:11] (step=0059058) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.555077284288789, LR: 0.0003 +[2026-03-04 18:36:19] (step=0059059) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.555272940716103, LR: 0.0003 +[2026-03-04 18:36:26] (step=0059060) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.555468597143417, LR: 0.0003 +[2026-03-04 18:36:34] (step=0059061) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.555664253570729, LR: 0.0003 +[2026-03-04 18:36:42] (step=0059062) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.555859909998043, LR: 0.0003 +[2026-03-04 18:36:50] (step=0059063) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 11.556055566425357, LR: 0.0003 +[2026-03-04 18:36:58] (step=0059064) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.556251222852671, LR: 0.0003 +[2026-03-04 18:37:06] (step=0059065) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.556446879279985, LR: 0.0003 +[2026-03-04 18:37:14] (step=0059066) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.556642535707297, LR: 0.0003 +[2026-03-04 18:37:21] (step=0059067) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.556838192134611, LR: 0.0003 +[2026-03-04 18:37:29] (step=0059068) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.557033848561925, LR: 0.0003 +[2026-03-04 18:37:37] (step=0059069) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 11.55722950498924, LR: 0.0003 +[2026-03-04 18:37:45] (step=0059070) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 11.557425161416553, LR: 0.0003 +[2026-03-04 18:37:53] (step=0059071) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.557620817843866, LR: 0.0003 +[2026-03-04 18:38:01] (step=0059072) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 11.55781647427118, LR: 0.0003 +[2026-03-04 18:38:09] (step=0059073) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 11.558012130698494, LR: 0.0003 +[2026-03-04 18:38:17] (step=0059074) Train Loss: 0.4428, Train Steps/Sec: 0.12, Epoch: 11.558207787125808, LR: 0.0003 +[2026-03-04 18:38:25] (step=0059075) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.558403443553122, LR: 0.0003 +[2026-03-04 18:38:32] (step=0059076) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.558599099980434, LR: 0.0003 +[2026-03-04 18:38:40] (step=0059077) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.558794756407748, LR: 0.0003 +[2026-03-04 18:38:48] (step=0059078) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.558990412835062, LR: 0.0003 +[2026-03-04 18:38:56] (step=0059079) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 11.559186069262376, LR: 0.0003 +[2026-03-04 18:39:04] (step=0059080) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 11.559381725689688, LR: 0.0003 +[2026-03-04 18:39:12] (step=0059081) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.559577382117002, LR: 0.0003 +[2026-03-04 18:39:20] (step=0059082) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.559773038544316, LR: 0.0003 +[2026-03-04 18:39:27] (step=0059083) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.55996869497163, LR: 0.0003 +[2026-03-04 18:39:35] (step=0059084) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 11.560164351398944, LR: 0.0003 +[2026-03-04 18:39:43] (step=0059085) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.560360007826256, LR: 0.0003 +[2026-03-04 18:39:51] (step=0059086) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.56055566425357, LR: 0.0003 +[2026-03-04 18:39:59] (step=0059087) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 11.560751320680884, LR: 0.0003 +[2026-03-04 18:40:07] (step=0059088) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.560946977108198, LR: 0.0003 +[2026-03-04 18:40:15] (step=0059089) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.561142633535512, LR: 0.0003 +[2026-03-04 18:40:23] (step=0059090) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.561338289962825, LR: 0.0003 +[2026-03-04 18:40:30] (step=0059091) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.561533946390139, LR: 0.0003 +[2026-03-04 18:40:38] (step=0059092) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.561729602817453, LR: 0.0003 +[2026-03-04 18:40:46] (step=0059093) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 11.561925259244767, LR: 0.0003 +[2026-03-04 18:40:54] (step=0059094) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.56212091567208, LR: 0.0003 +[2026-03-04 18:41:02] (step=0059095) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.562316572099393, LR: 0.0003 +[2026-03-04 18:41:10] (step=0059096) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 11.562512228526707, LR: 0.0003 +[2026-03-04 18:41:18] (step=0059097) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.56270788495402, LR: 0.0003 +[2026-03-04 18:41:26] (step=0059098) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.562903541381335, LR: 0.0003 +[2026-03-04 18:41:33] (step=0059099) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.563099197808649, LR: 0.0003 +[2026-03-04 18:41:41] (step=0059100) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.563294854235961, LR: 0.0003 +[2026-03-04 18:41:49] (step=0059101) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.563490510663275, LR: 0.0003 +[2026-03-04 18:41:57] (step=0059102) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.563686167090589, LR: 0.0003 +[2026-03-04 18:42:05] (step=0059103) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.563881823517903, LR: 0.0003 +[2026-03-04 18:42:13] (step=0059104) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 11.564077479945217, LR: 0.0003 +[2026-03-04 18:42:21] (step=0059105) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.56427313637253, LR: 0.0003 +[2026-03-04 18:42:28] (step=0059106) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 11.564468792799843, LR: 0.0003 +[2026-03-04 18:42:36] (step=0059107) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.564664449227157, LR: 0.0003 +[2026-03-04 18:42:44] (step=0059108) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.564860105654471, LR: 0.0003 +[2026-03-04 18:42:52] (step=0059109) Train Loss: 0.4200, Train Steps/Sec: 0.13, Epoch: 11.565055762081784, LR: 0.0003 +[2026-03-04 18:43:00] (step=0059110) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 11.565251418509098, LR: 0.0003 +[2026-03-04 18:43:08] (step=0059111) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.565447074936412, LR: 0.0003 +[2026-03-04 18:43:16] (step=0059112) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 11.565642731363726, LR: 0.0003 +[2026-03-04 18:43:23] (step=0059113) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 11.56583838779104, LR: 0.0003 +[2026-03-04 18:43:31] (step=0059114) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 11.566034044218352, LR: 0.0003 +[2026-03-04 18:43:39] (step=0059115) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.566229700645666, LR: 0.0003 +[2026-03-04 18:43:47] (step=0059116) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.56642535707298, LR: 0.0003 +[2026-03-04 18:43:55] (step=0059117) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.566621013500294, LR: 0.0003 +[2026-03-04 18:44:03] (step=0059118) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.566816669927608, LR: 0.0003 +[2026-03-04 18:44:11] (step=0059119) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.56701232635492, LR: 0.0003 +[2026-03-04 18:44:19] (step=0059120) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.567207982782234, LR: 0.0003 +[2026-03-04 18:44:27] (step=0059121) Train Loss: 0.4456, Train Steps/Sec: 0.12, Epoch: 11.567403639209548, LR: 0.0003 +[2026-03-04 18:44:34] (step=0059122) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.567599295636862, LR: 0.0003 +[2026-03-04 18:44:42] (step=0059123) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.567794952064176, LR: 0.0003 +[2026-03-04 18:44:50] (step=0059124) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.567990608491488, LR: 0.0003 +[2026-03-04 18:44:58] (step=0059125) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.568186264918802, LR: 0.0003 +[2026-03-04 18:45:06] (step=0059126) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.568381921346116, LR: 0.0003 +[2026-03-04 18:45:14] (step=0059127) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.56857757777343, LR: 0.0003 +[2026-03-04 18:45:22] (step=0059128) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.568773234200744, LR: 0.0003 +[2026-03-04 18:45:29] (step=0059129) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.568968890628057, LR: 0.0003 +[2026-03-04 18:45:37] (step=0059130) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 11.56916454705537, LR: 0.0003 +[2026-03-04 18:45:45] (step=0059131) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.569360203482685, LR: 0.0003 +[2026-03-04 18:45:53] (step=0059132) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.569555859909999, LR: 0.0003 +[2026-03-04 18:46:01] (step=0059133) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.56975151633731, LR: 0.0003 +[2026-03-04 18:46:09] (step=0059134) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 11.569947172764625, LR: 0.0003 +[2026-03-04 18:46:17] (step=0059135) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.570142829191939, LR: 0.0003 +[2026-03-04 18:46:24] (step=0059136) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.570338485619253, LR: 0.0003 +[2026-03-04 18:46:32] (step=0059137) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.570534142046567, LR: 0.0003 +[2026-03-04 18:46:40] (step=0059138) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.570729798473879, LR: 0.0003 +[2026-03-04 18:46:48] (step=0059139) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.570925454901193, LR: 0.0003 +[2026-03-04 18:46:56] (step=0059140) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.571121111328507, LR: 0.0003 +[2026-03-04 18:47:04] (step=0059141) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 11.571316767755821, LR: 0.0003 +[2026-03-04 18:47:12] (step=0059142) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.571512424183135, LR: 0.0003 +[2026-03-04 18:47:20] (step=0059143) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.571708080610447, LR: 0.0003 +[2026-03-04 18:47:27] (step=0059144) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.571903737037761, LR: 0.0003 +[2026-03-04 18:47:35] (step=0059145) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.572099393465075, LR: 0.0003 +[2026-03-04 18:47:43] (step=0059146) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 11.57229504989239, LR: 0.0003 +[2026-03-04 18:47:51] (step=0059147) Train Loss: 0.4645, Train Steps/Sec: 0.13, Epoch: 11.572490706319703, LR: 0.0003 +[2026-03-04 18:47:59] (step=0059148) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 11.572686362747016, LR: 0.0003 +[2026-03-04 18:48:07] (step=0059149) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.57288201917433, LR: 0.0003 +[2026-03-04 18:48:15] (step=0059150) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 11.573077675601644, LR: 0.0003 +[2026-03-04 18:48:22] (step=0059151) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 11.573273332028958, LR: 0.0003 +[2026-03-04 18:48:30] (step=0059152) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.573468988456272, LR: 0.0003 +[2026-03-04 18:48:38] (step=0059153) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.573664644883584, LR: 0.0003 +[2026-03-04 18:48:46] (step=0059154) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 11.573860301310898, LR: 0.0003 +[2026-03-04 18:48:54] (step=0059155) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.574055957738212, LR: 0.0003 +[2026-03-04 18:49:02] (step=0059156) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.574251614165526, LR: 0.0003 +[2026-03-04 18:49:10] (step=0059157) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.57444727059284, LR: 0.0003 +[2026-03-04 18:49:18] (step=0059158) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.574642927020152, LR: 0.0003 +[2026-03-04 18:49:25] (step=0059159) Train Loss: 0.4249, Train Steps/Sec: 0.13, Epoch: 11.574838583447466, LR: 0.0003 +[2026-03-04 18:49:33] (step=0059160) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.57503423987478, LR: 0.0003 +[2026-03-04 18:49:41] (step=0059161) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.575229896302094, LR: 0.0003 +[2026-03-04 18:49:49] (step=0059162) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 11.575425552729406, LR: 0.0003 +[2026-03-04 18:49:57] (step=0059163) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.57562120915672, LR: 0.0003 +[2026-03-04 18:50:05] (step=0059164) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.575816865584034, LR: 0.0003 +[2026-03-04 18:50:13] (step=0059165) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 11.576012522011348, LR: 0.0003 +[2026-03-04 18:50:20] (step=0059166) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.576208178438662, LR: 0.0003 +[2026-03-04 18:50:28] (step=0059167) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 11.576403834865975, LR: 0.0003 +[2026-03-04 18:50:36] (step=0059168) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.576599491293289, LR: 0.0003 +[2026-03-04 18:50:44] (step=0059169) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.576795147720603, LR: 0.0003 +[2026-03-04 18:50:52] (step=0059170) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.576990804147917, LR: 0.0003 +[2026-03-04 18:51:00] (step=0059171) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.57718646057523, LR: 0.0003 +[2026-03-04 18:51:08] (step=0059172) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.577382117002543, LR: 0.0003 +[2026-03-04 18:51:16] (step=0059173) Train Loss: 0.4443, Train Steps/Sec: 0.12, Epoch: 11.577577773429857, LR: 0.0003 +[2026-03-04 18:51:23] (step=0059174) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.57777342985717, LR: 0.0003 +[2026-03-04 18:51:31] (step=0059175) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.577969086284485, LR: 0.0003 +[2026-03-04 18:51:39] (step=0059176) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.578164742711799, LR: 0.0003 +[2026-03-04 18:51:47] (step=0059177) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 11.578360399139111, LR: 0.0003 +[2026-03-04 18:51:55] (step=0059178) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 11.578556055566425, LR: 0.0003 +[2026-03-04 18:52:03] (step=0059179) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 11.578751711993739, LR: 0.0003 +[2026-03-04 18:52:11] (step=0059180) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 11.578947368421053, LR: 0.0003 +[2026-03-04 18:52:18] (step=0059181) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.579143024848367, LR: 0.0003 +[2026-03-04 18:52:26] (step=0059182) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.57933868127568, LR: 0.0003 +[2026-03-04 18:52:34] (step=0059183) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.579534337702993, LR: 0.0003 +[2026-03-04 18:52:42] (step=0059184) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.579729994130307, LR: 0.0003 +[2026-03-04 18:52:50] (step=0059185) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.579925650557621, LR: 0.0003 +[2026-03-04 18:52:58] (step=0059186) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.580121306984934, LR: 0.0003 +[2026-03-04 18:53:06] (step=0059187) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.580316963412248, LR: 0.0003 +[2026-03-04 18:53:14] (step=0059188) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.580512619839562, LR: 0.0003 +[2026-03-04 18:53:21] (step=0059189) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.580708276266876, LR: 0.0003 +[2026-03-04 18:53:29] (step=0059190) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.58090393269419, LR: 0.0003 +[2026-03-04 18:53:37] (step=0059191) Train Loss: 0.4502, Train Steps/Sec: 0.12, Epoch: 11.581099589121502, LR: 0.0003 +[2026-03-04 18:53:45] (step=0059192) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.581295245548816, LR: 0.0003 +[2026-03-04 18:53:53] (step=0059193) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.58149090197613, LR: 0.0003 +[2026-03-04 18:54:01] (step=0059194) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.581686558403444, LR: 0.0003 +[2026-03-04 18:54:09] (step=0059195) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.581882214830758, LR: 0.0003 +[2026-03-04 18:54:17] (step=0059196) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.58207787125807, LR: 0.0003 +[2026-03-04 18:54:24] (step=0059197) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.582273527685384, LR: 0.0003 +[2026-03-04 18:54:32] (step=0059198) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.582469184112698, LR: 0.0003 +[2026-03-04 18:54:40] (step=0059199) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.582664840540012, LR: 0.0003 +[2026-03-04 18:54:48] (step=0059200) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.582860496967326, LR: 0.0003 +[2026-03-04 18:54:56] (step=0059201) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.583056153394638, LR: 0.0003 +[2026-03-04 18:55:04] (step=0059202) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.583251809821952, LR: 0.0003 +[2026-03-04 18:55:12] (step=0059203) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 11.583447466249266, LR: 0.0003 +[2026-03-04 18:55:19] (step=0059204) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 11.58364312267658, LR: 0.0003 +[2026-03-04 18:55:27] (step=0059205) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 11.583838779103894, LR: 0.0003 +[2026-03-04 18:55:35] (step=0059206) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.584034435531207, LR: 0.0003 +[2026-03-04 18:55:43] (step=0059207) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.58423009195852, LR: 0.0003 +[2026-03-04 18:55:51] (step=0059208) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.584425748385835, LR: 0.0003 +[2026-03-04 18:55:59] (step=0059209) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.584621404813149, LR: 0.0003 +[2026-03-04 18:56:07] (step=0059210) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.584817061240463, LR: 0.0003 +[2026-03-04 18:56:14] (step=0059211) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.585012717667775, LR: 0.0003 +[2026-03-04 18:56:22] (step=0059212) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 11.585208374095089, LR: 0.0003 +[2026-03-04 18:56:30] (step=0059213) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.585404030522403, LR: 0.0003 +[2026-03-04 18:56:38] (step=0059214) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.585599686949717, LR: 0.0003 +[2026-03-04 18:56:46] (step=0059215) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.585795343377029, LR: 0.0003 +[2026-03-04 18:56:54] (step=0059216) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 11.585990999804343, LR: 0.0003 +[2026-03-04 18:57:02] (step=0059217) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.586186656231657, LR: 0.0003 +[2026-03-04 18:57:09] (step=0059218) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.586382312658971, LR: 0.0003 +[2026-03-04 18:57:17] (step=0059219) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.586577969086285, LR: 0.0003 +[2026-03-04 18:57:25] (step=0059220) Train Loss: 0.4577, Train Steps/Sec: 0.12, Epoch: 11.586773625513597, LR: 0.0003 +[2026-03-04 18:57:33] (step=0059221) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 11.586969281940911, LR: 0.0003 +[2026-03-04 18:57:41] (step=0059222) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.587164938368225, LR: 0.0003 +[2026-03-04 18:57:49] (step=0059223) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.58736059479554, LR: 0.0003 +[2026-03-04 18:57:57] (step=0059224) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 11.587556251222853, LR: 0.0003 +[2026-03-04 18:58:05] (step=0059225) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 11.587751907650166, LR: 0.0003 +[2026-03-04 18:58:12] (step=0059226) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 11.58794756407748, LR: 0.0003 +[2026-03-04 18:58:20] (step=0059227) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.588143220504794, LR: 0.0003 +[2026-03-04 18:58:28] (step=0059228) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 11.588338876932108, LR: 0.0003 +[2026-03-04 18:58:36] (step=0059229) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.588534533359422, LR: 0.0003 +[2026-03-04 18:58:44] (step=0059230) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.588730189786734, LR: 0.0003 +[2026-03-04 18:58:52] (step=0059231) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.588925846214048, LR: 0.0003 +[2026-03-04 18:59:00] (step=0059232) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 11.589121502641362, LR: 0.0003 +[2026-03-04 18:59:08] (step=0059233) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.589317159068676, LR: 0.0003 +[2026-03-04 18:59:15] (step=0059234) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.58951281549599, LR: 0.0003 +[2026-03-04 18:59:23] (step=0059235) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 11.589708471923302, LR: 0.0003 +[2026-03-04 18:59:31] (step=0059236) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 11.589904128350616, LR: 0.0003 +[2026-03-04 18:59:39] (step=0059237) Train Loss: 0.4241, Train Steps/Sec: 0.13, Epoch: 11.59009978477793, LR: 0.0003 +[2026-03-04 18:59:47] (step=0059238) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.590295441205244, LR: 0.0003 +[2026-03-04 18:59:55] (step=0059239) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.590491097632556, LR: 0.0003 +[2026-03-04 19:00:03] (step=0059240) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 11.59068675405987, LR: 0.0003 +[2026-03-04 19:00:11] (step=0059241) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.590882410487184, LR: 0.0003 +[2026-03-04 19:00:18] (step=0059242) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.591078066914498, LR: 0.0003 +[2026-03-04 19:00:26] (step=0059243) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.591273723341812, LR: 0.0003 +[2026-03-04 19:00:34] (step=0059244) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 11.591469379769125, LR: 0.0003 +[2026-03-04 19:00:42] (step=0059245) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.591665036196439, LR: 0.0003 +[2026-03-04 19:00:50] (step=0059246) Train Loss: 0.4268, Train Steps/Sec: 0.13, Epoch: 11.591860692623753, LR: 0.0003 +[2026-03-04 19:00:58] (step=0059247) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.592056349051067, LR: 0.0003 +[2026-03-04 19:01:05] (step=0059248) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.59225200547838, LR: 0.0003 +[2026-03-04 19:01:13] (step=0059249) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 11.592447661905693, LR: 0.0003 +[2026-03-04 19:01:21] (step=0059250) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.592643318333007, LR: 0.0003 +[2026-03-04 19:01:29] (step=0059251) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.59283897476032, LR: 0.0003 +[2026-03-04 19:01:37] (step=0059252) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.593034631187635, LR: 0.0003 +[2026-03-04 19:01:45] (step=0059253) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.593230287614949, LR: 0.0003 +[2026-03-04 19:01:53] (step=0059254) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.593425944042261, LR: 0.0003 +[2026-03-04 19:02:00] (step=0059255) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.593621600469575, LR: 0.0003 +[2026-03-04 19:02:08] (step=0059256) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.59381725689689, LR: 0.0003 +[2026-03-04 19:02:16] (step=0059257) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 11.594012913324203, LR: 0.0003 +[2026-03-04 19:02:24] (step=0059258) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.594208569751517, LR: 0.0003 +[2026-03-04 19:02:32] (step=0059259) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.59440422617883, LR: 0.0003 +[2026-03-04 19:02:40] (step=0059260) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.594599882606143, LR: 0.0003 +[2026-03-04 19:02:48] (step=0059261) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 11.594795539033457, LR: 0.0003 +[2026-03-04 19:02:55] (step=0059262) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.594991195460771, LR: 0.0003 +[2026-03-04 19:03:03] (step=0059263) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 11.595186851888085, LR: 0.0003 +[2026-03-04 19:03:11] (step=0059264) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.595382508315398, LR: 0.0003 +[2026-03-04 19:03:19] (step=0059265) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.595578164742712, LR: 0.0003 +[2026-03-04 19:03:27] (step=0059266) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.595773821170026, LR: 0.0003 +[2026-03-04 19:03:35] (step=0059267) Train Loss: 0.4596, Train Steps/Sec: 0.12, Epoch: 11.59596947759734, LR: 0.0003 +[2026-03-04 19:03:43] (step=0059268) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.596165134024652, LR: 0.0003 +[2026-03-04 19:03:51] (step=0059269) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.596360790451966, LR: 0.0003 +[2026-03-04 19:03:58] (step=0059270) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 11.59655644687928, LR: 0.0003 +[2026-03-04 19:04:06] (step=0059271) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.596752103306594, LR: 0.0003 +[2026-03-04 19:04:14] (step=0059272) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 11.596947759733908, LR: 0.0003 +[2026-03-04 19:04:22] (step=0059273) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.59714341616122, LR: 0.0003 +[2026-03-04 19:04:30] (step=0059274) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.597339072588534, LR: 0.0003 +[2026-03-04 19:04:38] (step=0059275) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.597534729015848, LR: 0.0003 +[2026-03-04 19:04:46] (step=0059276) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.597730385443162, LR: 0.0003 +[2026-03-04 19:04:53] (step=0059277) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.597926041870476, LR: 0.0003 +[2026-03-04 19:05:01] (step=0059278) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.598121698297788, LR: 0.0003 +[2026-03-04 19:05:09] (step=0059279) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.598317354725102, LR: 0.0003 +[2026-03-04 19:05:17] (step=0059280) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.598513011152416, LR: 0.0003 +[2026-03-04 19:05:25] (step=0059281) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.59870866757973, LR: 0.0003 +[2026-03-04 19:05:33] (step=0059282) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.598904324007044, LR: 0.0003 +[2026-03-04 19:05:41] (step=0059283) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.599099980434357, LR: 0.0003 +[2026-03-04 19:05:49] (step=0059284) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.59929563686167, LR: 0.0003 +[2026-03-04 19:05:57] (step=0059285) Train Loss: 0.4456, Train Steps/Sec: 0.12, Epoch: 11.599491293288985, LR: 0.0003 +[2026-03-04 19:06:04] (step=0059286) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.599686949716299, LR: 0.0003 +[2026-03-04 19:06:12] (step=0059287) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 11.599882606143613, LR: 0.0003 +[2026-03-04 19:06:20] (step=0059288) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.600078262570925, LR: 0.0003 +[2026-03-04 19:06:28] (step=0059289) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.600273918998239, LR: 0.0003 +[2026-03-04 19:06:36] (step=0059290) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 11.600469575425553, LR: 0.0003 +[2026-03-04 19:06:44] (step=0059291) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.600665231852867, LR: 0.0003 +[2026-03-04 19:06:52] (step=0059292) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.60086088828018, LR: 0.0003 +[2026-03-04 19:06:59] (step=0059293) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 11.601056544707493, LR: 0.0003 +[2026-03-04 19:07:07] (step=0059294) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.601252201134807, LR: 0.0003 +[2026-03-04 19:07:15] (step=0059295) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.601447857562121, LR: 0.0003 +[2026-03-04 19:07:23] (step=0059296) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.601643513989435, LR: 0.0003 +[2026-03-04 19:07:31] (step=0059297) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.601839170416747, LR: 0.0003 +[2026-03-04 19:07:39] (step=0059298) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.602034826844061, LR: 0.0003 +[2026-03-04 19:07:46] (step=0059299) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.602230483271375, LR: 0.0003 +[2026-03-04 19:07:54] (step=0059300) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 11.60242613969869, LR: 0.0003 +[2026-03-04 19:08:02] (step=0059301) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.602621796126003, LR: 0.0003 +[2026-03-04 19:08:10] (step=0059302) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.602817452553316, LR: 0.0003 +[2026-03-04 19:08:18] (step=0059303) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.60301310898063, LR: 0.0003 +[2026-03-04 19:08:26] (step=0059304) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.603208765407944, LR: 0.0003 +[2026-03-04 19:08:34] (step=0059305) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.603404421835258, LR: 0.0003 +[2026-03-04 19:08:41] (step=0059306) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 11.603600078262572, LR: 0.0003 +[2026-03-04 19:08:49] (step=0059307) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.603795734689884, LR: 0.0003 +[2026-03-04 19:08:57] (step=0059308) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.603991391117198, LR: 0.0003 +[2026-03-04 19:09:05] (step=0059309) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.604187047544512, LR: 0.0003 +[2026-03-04 19:09:13] (step=0059310) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 11.604382703971826, LR: 0.0003 +[2026-03-04 19:09:21] (step=0059311) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 11.60457836039914, LR: 0.0003 +[2026-03-04 19:09:29] (step=0059312) Train Loss: 0.4545, Train Steps/Sec: 0.12, Epoch: 11.604774016826452, LR: 0.0003 +[2026-03-04 19:09:37] (step=0059313) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.604969673253766, LR: 0.0003 +[2026-03-04 19:09:44] (step=0059314) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.60516532968108, LR: 0.0003 +[2026-03-04 19:09:52] (step=0059315) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.605360986108394, LR: 0.0003 +[2026-03-04 19:10:00] (step=0059316) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 11.605556642535708, LR: 0.0003 +[2026-03-04 19:10:08] (step=0059317) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.60575229896302, LR: 0.0003 +[2026-03-04 19:10:16] (step=0059318) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.605947955390334, LR: 0.0003 +[2026-03-04 19:10:24] (step=0059319) Train Loss: 0.4230, Train Steps/Sec: 0.13, Epoch: 11.606143611817648, LR: 0.0003 +[2026-03-04 19:10:32] (step=0059320) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.606339268244962, LR: 0.0003 +[2026-03-04 19:10:39] (step=0059321) Train Loss: 0.4240, Train Steps/Sec: 0.13, Epoch: 11.606534924672275, LR: 0.0003 +[2026-03-04 19:10:47] (step=0059322) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.606730581099589, LR: 0.0003 +[2026-03-04 19:10:55] (step=0059323) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.606926237526903, LR: 0.0003 +[2026-03-04 19:11:03] (step=0059324) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.607121893954217, LR: 0.0003 +[2026-03-04 19:11:11] (step=0059325) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.60731755038153, LR: 0.0003 +[2026-03-04 19:11:19] (step=0059326) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.607513206808843, LR: 0.0003 +[2026-03-04 19:11:27] (step=0059327) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 11.607708863236157, LR: 0.0003 +[2026-03-04 19:11:35] (step=0059328) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.607904519663471, LR: 0.0003 +[2026-03-04 19:11:42] (step=0059329) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.608100176090785, LR: 0.0003 +[2026-03-04 19:11:50] (step=0059330) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 11.608295832518099, LR: 0.0003 +[2026-03-04 19:11:58] (step=0059331) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.608491488945411, LR: 0.0003 +[2026-03-04 19:12:06] (step=0059332) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.608687145372725, LR: 0.0003 +[2026-03-04 19:12:14] (step=0059333) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.60888280180004, LR: 0.0003 +[2026-03-04 19:12:22] (step=0059334) Train Loss: 0.4536, Train Steps/Sec: 0.12, Epoch: 11.609078458227353, LR: 0.0003 +[2026-03-04 19:12:30] (step=0059335) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.609274114654667, LR: 0.0003 +[2026-03-04 19:12:38] (step=0059336) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.60946977108198, LR: 0.0003 +[2026-03-04 19:12:45] (step=0059337) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 11.609665427509293, LR: 0.0003 +[2026-03-04 19:12:53] (step=0059338) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.609861083936607, LR: 0.0003 +[2026-03-04 19:13:01] (step=0059339) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.610056740363921, LR: 0.0003 +[2026-03-04 19:13:09] (step=0059340) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.610252396791235, LR: 0.0003 +[2026-03-04 19:13:17] (step=0059341) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.610448053218548, LR: 0.0003 +[2026-03-04 19:13:25] (step=0059342) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.610643709645862, LR: 0.0003 +[2026-03-04 19:13:33] (step=0059343) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.610839366073176, LR: 0.0003 +[2026-03-04 19:13:40] (step=0059344) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.61103502250049, LR: 0.0003 +[2026-03-04 19:13:48] (step=0059345) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.611230678927802, LR: 0.0003 +[2026-03-04 19:13:56] (step=0059346) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.611426335355116, LR: 0.0003 +[2026-03-04 19:14:04] (step=0059347) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.61162199178243, LR: 0.0003 +[2026-03-04 19:14:12] (step=0059348) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.611817648209744, LR: 0.0003 +[2026-03-04 19:14:20] (step=0059349) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 11.612013304637058, LR: 0.0003 +[2026-03-04 19:14:28] (step=0059350) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.61220896106437, LR: 0.0003 +[2026-03-04 19:14:35] (step=0059351) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.612404617491684, LR: 0.0003 +[2026-03-04 19:14:43] (step=0059352) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.612600273918998, LR: 0.0003 +[2026-03-04 19:14:51] (step=0059353) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.612795930346312, LR: 0.0003 +[2026-03-04 19:14:59] (step=0059354) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 11.612991586773626, LR: 0.0003 +[2026-03-04 19:15:07] (step=0059355) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.613187243200938, LR: 0.0003 +[2026-03-04 19:15:15] (step=0059356) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 11.613382899628252, LR: 0.0003 +[2026-03-04 19:15:23] (step=0059357) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.613578556055566, LR: 0.0003 +[2026-03-04 19:15:30] (step=0059358) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 11.61377421248288, LR: 0.0003 +[2026-03-04 19:15:38] (step=0059359) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 11.613969868910194, LR: 0.0003 +[2026-03-04 19:15:46] (step=0059360) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.614165525337507, LR: 0.0003 +[2026-03-04 19:15:54] (step=0059361) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.61436118176482, LR: 0.0003 +[2026-03-04 19:16:02] (step=0059362) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.614556838192135, LR: 0.0003 +[2026-03-04 19:16:10] (step=0059363) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.614752494619449, LR: 0.0003 +[2026-03-04 19:16:18] (step=0059364) Train Loss: 0.4208, Train Steps/Sec: 0.13, Epoch: 11.614948151046763, LR: 0.0003 +[2026-03-04 19:16:25] (step=0059365) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.615143807474075, LR: 0.0003 +[2026-03-04 19:16:33] (step=0059366) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.615339463901389, LR: 0.0003 +[2026-03-04 19:16:41] (step=0059367) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.615535120328703, LR: 0.0003 +[2026-03-04 19:16:49] (step=0059368) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.615730776756017, LR: 0.0003 +[2026-03-04 19:16:57] (step=0059369) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.615926433183331, LR: 0.0003 +[2026-03-04 19:17:05] (step=0059370) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.616122089610643, LR: 0.0003 +[2026-03-04 19:17:13] (step=0059371) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.616317746037957, LR: 0.0003 +[2026-03-04 19:17:21] (step=0059372) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.616513402465271, LR: 0.0003 +[2026-03-04 19:17:28] (step=0059373) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.616709058892585, LR: 0.0003 +[2026-03-04 19:17:36] (step=0059374) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.616904715319897, LR: 0.0003 +[2026-03-04 19:17:44] (step=0059375) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 11.617100371747211, LR: 0.0003 +[2026-03-04 19:17:52] (step=0059376) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.617296028174525, LR: 0.0003 +[2026-03-04 19:18:00] (step=0059377) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.61749168460184, LR: 0.0003 +[2026-03-04 19:18:08] (step=0059378) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 11.617687341029153, LR: 0.0003 +[2026-03-04 19:18:16] (step=0059379) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 11.617882997456466, LR: 0.0003 +[2026-03-04 19:18:23] (step=0059380) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.61807865388378, LR: 0.0003 +[2026-03-04 19:18:31] (step=0059381) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.618274310311094, LR: 0.0003 +[2026-03-04 19:18:39] (step=0059382) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.618469966738408, LR: 0.0003 +[2026-03-04 19:18:47] (step=0059383) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 11.618665623165722, LR: 0.0003 +[2026-03-04 19:18:55] (step=0059384) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.618861279593034, LR: 0.0003 +[2026-03-04 19:19:03] (step=0059385) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.619056936020348, LR: 0.0003 +[2026-03-04 19:19:11] (step=0059386) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.619252592447662, LR: 0.0003 +[2026-03-04 19:19:19] (step=0059387) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.619448248874976, LR: 0.0003 +[2026-03-04 19:19:26] (step=0059388) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.61964390530229, LR: 0.0003 +[2026-03-04 19:19:34] (step=0059389) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.619839561729602, LR: 0.0003 +[2026-03-04 19:19:42] (step=0059390) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.620035218156916, LR: 0.0003 +[2026-03-04 19:19:50] (step=0059391) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.62023087458423, LR: 0.0003 +[2026-03-04 19:19:58] (step=0059392) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 11.620426531011544, LR: 0.0003 +[2026-03-04 19:20:06] (step=0059393) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 11.620622187438858, LR: 0.0003 +[2026-03-04 19:20:14] (step=0059394) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 11.62081784386617, LR: 0.0003 +[2026-03-04 19:20:22] (step=0059395) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.621013500293484, LR: 0.0003 +[2026-03-04 19:20:29] (step=0059396) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.621209156720798, LR: 0.0003 +[2026-03-04 19:20:37] (step=0059397) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 11.621404813148112, LR: 0.0003 +[2026-03-04 19:20:45] (step=0059398) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.621600469575425, LR: 0.0003 +[2026-03-04 19:20:53] (step=0059399) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.621796126002739, LR: 0.0003 +[2026-03-04 19:21:01] (step=0059400) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.621991782430053, LR: 0.0003 +[2026-03-04 19:21:09] (step=0059401) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 11.622187438857367, LR: 0.0003 +[2026-03-04 19:21:17] (step=0059402) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 11.62238309528468, LR: 0.0003 +[2026-03-04 19:21:24] (step=0059403) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.622578751711993, LR: 0.0003 +[2026-03-04 19:21:32] (step=0059404) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 11.622774408139307, LR: 0.0003 +[2026-03-04 19:21:40] (step=0059405) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.622970064566621, LR: 0.0003 +[2026-03-04 19:21:48] (step=0059406) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.623165720993935, LR: 0.0003 +[2026-03-04 19:21:56] (step=0059407) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.623361377421249, LR: 0.0003 +[2026-03-04 19:22:04] (step=0059408) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.623557033848561, LR: 0.0003 +[2026-03-04 19:22:12] (step=0059409) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.623752690275875, LR: 0.0003 +[2026-03-04 19:22:19] (step=0059410) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.62394834670319, LR: 0.0003 +[2026-03-04 19:22:27] (step=0059411) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.624144003130503, LR: 0.0003 +[2026-03-04 19:22:35] (step=0059412) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.624339659557817, LR: 0.0003 +[2026-03-04 19:22:43] (step=0059413) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.62453531598513, LR: 0.0003 +[2026-03-04 19:22:51] (step=0059414) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.624730972412443, LR: 0.0003 +[2026-03-04 19:22:59] (step=0059415) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.624926628839757, LR: 0.0003 +[2026-03-04 19:23:07] (step=0059416) Train Loss: 0.4383, Train Steps/Sec: 0.12, Epoch: 11.625122285267071, LR: 0.0003 +[2026-03-04 19:23:15] (step=0059417) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.625317941694385, LR: 0.0003 +[2026-03-04 19:23:23] (step=0059418) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.625513598121698, LR: 0.0003 +[2026-03-04 19:23:30] (step=0059419) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 11.625709254549012, LR: 0.0003 +[2026-03-04 19:23:38] (step=0059420) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.625904910976326, LR: 0.0003 +[2026-03-04 19:23:46] (step=0059421) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.62610056740364, LR: 0.0003 +[2026-03-04 19:23:54] (step=0059422) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.626296223830952, LR: 0.0003 +[2026-03-04 19:24:02] (step=0059423) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.626491880258266, LR: 0.0003 +[2026-03-04 19:24:10] (step=0059424) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 11.62668753668558, LR: 0.0003 +[2026-03-04 19:24:17] (step=0059425) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.626883193112894, LR: 0.0003 +[2026-03-04 19:24:25] (step=0059426) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.627078849540208, LR: 0.0003 +[2026-03-04 19:24:33] (step=0059427) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.62727450596752, LR: 0.0003 +[2026-03-04 19:24:41] (step=0059428) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.627470162394834, LR: 0.0003 +[2026-03-04 19:24:49] (step=0059429) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 11.627665818822148, LR: 0.0003 +[2026-03-04 19:24:57] (step=0059430) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 11.627861475249462, LR: 0.0003 +[2026-03-04 19:25:05] (step=0059431) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.628057131676776, LR: 0.0003 +[2026-03-04 19:25:12] (step=0059432) Train Loss: 0.4636, Train Steps/Sec: 0.13, Epoch: 11.628252788104088, LR: 0.0003 +[2026-03-04 19:25:20] (step=0059433) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 11.628448444531402, LR: 0.0003 +[2026-03-04 19:25:28] (step=0059434) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 11.628644100958716, LR: 0.0003 +[2026-03-04 19:25:36] (step=0059435) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.62883975738603, LR: 0.0003 +[2026-03-04 19:25:44] (step=0059436) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 11.629035413813344, LR: 0.0003 +[2026-03-04 19:25:52] (step=0059437) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 11.629231070240657, LR: 0.0003 +[2026-03-04 19:26:00] (step=0059438) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.62942672666797, LR: 0.0003 +[2026-03-04 19:26:08] (step=0059439) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 11.629622383095285, LR: 0.0003 +[2026-03-04 19:26:16] (step=0059440) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 11.629818039522599, LR: 0.0003 +[2026-03-04 19:26:23] (step=0059441) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 11.630013695949913, LR: 0.0003 +[2026-03-04 19:26:31] (step=0059442) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 11.630209352377225, LR: 0.0003 +[2026-03-04 19:26:39] (step=0059443) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.630405008804539, LR: 0.0003 +[2026-03-04 19:26:47] (step=0059444) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 11.630600665231853, LR: 0.0003 +[2026-03-04 19:26:55] (step=0059445) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.630796321659167, LR: 0.0003 +[2026-03-04 19:27:03] (step=0059446) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 11.630991978086481, LR: 0.0003 +[2026-03-04 19:27:11] (step=0059447) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.631187634513793, LR: 0.0003 +[2026-03-04 19:27:18] (step=0059448) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.631383290941107, LR: 0.0003 +[2026-03-04 19:27:26] (step=0059449) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.631578947368421, LR: 0.0003 +[2026-03-04 19:27:34] (step=0059450) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.631774603795735, LR: 0.0003 +[2026-03-04 19:27:42] (step=0059451) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.631970260223047, LR: 0.0003 +[2026-03-04 19:27:50] (step=0059452) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.632165916650361, LR: 0.0003 +[2026-03-04 19:27:58] (step=0059453) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.632361573077675, LR: 0.0003 +[2026-03-04 19:28:06] (step=0059454) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 11.63255722950499, LR: 0.0003 +[2026-03-04 19:28:13] (step=0059455) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 11.632752885932303, LR: 0.0003 +[2026-03-04 19:28:21] (step=0059456) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.632948542359616, LR: 0.0003 +[2026-03-04 19:28:29] (step=0059457) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.63314419878693, LR: 0.0003 +[2026-03-04 19:28:37] (step=0059458) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 11.633339855214244, LR: 0.0003 +[2026-03-04 19:28:45] (step=0059459) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.633535511641558, LR: 0.0003 +[2026-03-04 19:28:53] (step=0059460) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.633731168068872, LR: 0.0003 +[2026-03-04 19:29:01] (step=0059461) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.633926824496184, LR: 0.0003 +[2026-03-04 19:29:08] (step=0059462) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.634122480923498, LR: 0.0003 +[2026-03-04 19:29:16] (step=0059463) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.634318137350812, LR: 0.0003 +[2026-03-04 19:29:24] (step=0059464) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 11.634513793778126, LR: 0.0003 +[2026-03-04 19:29:32] (step=0059465) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.63470945020544, LR: 0.0003 +[2026-03-04 19:29:40] (step=0059466) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 11.634905106632752, LR: 0.0003 +[2026-03-04 19:29:48] (step=0059467) Train Loss: 0.4344, Train Steps/Sec: 0.12, Epoch: 11.635100763060066, LR: 0.0003 +[2026-03-04 19:29:56] (step=0059468) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.63529641948738, LR: 0.0003 +[2026-03-04 19:30:04] (step=0059469) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 11.635492075914694, LR: 0.0003 +[2026-03-04 19:30:12] (step=0059470) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.635687732342008, LR: 0.0003 +[2026-03-04 19:30:19] (step=0059471) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.63588338876932, LR: 0.0003 +[2026-03-04 19:30:27] (step=0059472) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.636079045196634, LR: 0.0003 +[2026-03-04 19:30:35] (step=0059473) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.636274701623949, LR: 0.0003 +[2026-03-04 19:30:43] (step=0059474) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 11.636470358051263, LR: 0.0003 +[2026-03-04 19:30:51] (step=0059475) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.636666014478575, LR: 0.0003 +[2026-03-04 19:30:59] (step=0059476) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 11.636861670905889, LR: 0.0003 +[2026-03-04 19:31:07] (step=0059477) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 11.637057327333203, LR: 0.0003 +[2026-03-04 19:31:14] (step=0059478) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.637252983760517, LR: 0.0003 +[2026-03-04 19:31:22] (step=0059479) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 11.63744864018783, LR: 0.0003 +[2026-03-04 19:31:30] (step=0059480) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 11.637644296615143, LR: 0.0003 +[2026-03-04 19:31:38] (step=0059481) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.637839953042457, LR: 0.0003 +[2026-03-04 19:31:46] (step=0059482) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.638035609469771, LR: 0.0003 +[2026-03-04 19:31:54] (step=0059483) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 11.638231265897085, LR: 0.0003 +[2026-03-04 19:32:02] (step=0059484) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 11.638426922324399, LR: 0.0003 +[2026-03-04 19:32:10] (step=0059485) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.638622578751711, LR: 0.0003 +[2026-03-04 19:32:18] (step=0059486) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 11.638818235179025, LR: 0.0003 +[2026-03-04 19:32:25] (step=0059487) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.63901389160634, LR: 0.0003 +[2026-03-04 19:32:33] (step=0059488) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 11.639209548033653, LR: 0.0003 +[2026-03-04 19:32:41] (step=0059489) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 11.639405204460967, LR: 0.0003 +[2026-03-04 19:32:49] (step=0059490) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.63960086088828, LR: 0.0003 +[2026-03-04 19:32:57] (step=0059491) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.639796517315594, LR: 0.0003 +[2026-03-04 19:33:05] (step=0059492) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.639992173742908, LR: 0.0003 +[2026-03-04 19:33:13] (step=0059493) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.640187830170222, LR: 0.0003 +[2026-03-04 19:33:20] (step=0059494) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.640383486597536, LR: 0.0003 +[2026-03-04 19:33:28] (step=0059495) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.640579143024848, LR: 0.0003 +[2026-03-04 19:33:36] (step=0059496) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.640774799452162, LR: 0.0003 +[2026-03-04 19:33:44] (step=0059497) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 11.640970455879476, LR: 0.0003 +[2026-03-04 19:33:52] (step=0059498) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.64116611230679, LR: 0.0003 +[2026-03-04 19:34:00] (step=0059499) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.641361768734104, LR: 0.0003 +[2026-03-04 19:34:08] (step=0059500) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.641557425161416, LR: 0.0003 +[2026-03-04 19:34:08] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0059500/ +[2026-03-04 19:34:15] (step=0059501) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.64175308158873, LR: 0.0003 +[2026-03-04 19:34:23] (step=0059502) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 11.641948738016044, LR: 0.0003 +[2026-03-04 19:34:31] (step=0059503) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.642144394443358, LR: 0.0003 +[2026-03-04 19:34:39] (step=0059504) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.64234005087067, LR: 0.0003 +[2026-03-04 19:34:47] (step=0059505) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 11.642535707297984, LR: 0.0003 +[2026-03-04 19:34:55] (step=0059506) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.642731363725298, LR: 0.0003 +[2026-03-04 19:35:03] (step=0059507) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.642927020152612, LR: 0.0003 +[2026-03-04 19:35:10] (step=0059508) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.643122676579926, LR: 0.0003 +[2026-03-04 19:35:18] (step=0059509) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.643318333007239, LR: 0.0003 +[2026-03-04 19:35:26] (step=0059510) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.643513989434553, LR: 0.0003 +[2026-03-04 19:35:34] (step=0059511) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.643709645861867, LR: 0.0003 +[2026-03-04 19:35:42] (step=0059512) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.64390530228918, LR: 0.0003 +[2026-03-04 19:35:50] (step=0059513) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 11.644100958716495, LR: 0.0003 +[2026-03-04 19:35:58] (step=0059514) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.644296615143807, LR: 0.0003 +[2026-03-04 19:36:06] (step=0059515) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.64449227157112, LR: 0.0003 +[2026-03-04 19:36:14] (step=0059516) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.644687927998435, LR: 0.0003 +[2026-03-04 19:36:21] (step=0059517) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.644883584425749, LR: 0.0003 +[2026-03-04 19:36:29] (step=0059518) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.645079240853063, LR: 0.0003 +[2026-03-04 19:36:37] (step=0059519) Train Loss: 0.4256, Train Steps/Sec: 0.13, Epoch: 11.645274897280375, LR: 0.0003 +[2026-03-04 19:36:45] (step=0059520) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.645470553707689, LR: 0.0003 +[2026-03-04 19:36:53] (step=0059521) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 11.645666210135003, LR: 0.0003 +[2026-03-04 19:37:01] (step=0059522) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.645861866562317, LR: 0.0003 +[2026-03-04 19:37:08] (step=0059523) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.646057522989631, LR: 0.0003 +[2026-03-04 19:37:16] (step=0059524) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.646253179416943, LR: 0.0003 +[2026-03-04 19:37:24] (step=0059525) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.646448835844257, LR: 0.0003 +[2026-03-04 19:37:32] (step=0059526) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.646644492271571, LR: 0.0003 +[2026-03-04 19:37:40] (step=0059527) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.646840148698885, LR: 0.0003 +[2026-03-04 19:37:48] (step=0059528) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.647035805126198, LR: 0.0003 +[2026-03-04 19:37:56] (step=0059529) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 11.647231461553512, LR: 0.0003 +[2026-03-04 19:38:04] (step=0059530) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.647427117980826, LR: 0.0003 +[2026-03-04 19:38:11] (step=0059531) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.64762277440814, LR: 0.0003 +[2026-03-04 19:38:19] (step=0059532) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.647818430835454, LR: 0.0003 +[2026-03-04 19:38:27] (step=0059533) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.648014087262766, LR: 0.0003 +[2026-03-04 19:38:35] (step=0059534) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.64820974369008, LR: 0.0003 +[2026-03-04 19:38:43] (step=0059535) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.648405400117394, LR: 0.0003 +[2026-03-04 19:38:51] (step=0059536) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.648601056544708, LR: 0.0003 +[2026-03-04 19:38:59] (step=0059537) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.648796712972022, LR: 0.0003 +[2026-03-04 19:39:06] (step=0059538) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.648992369399334, LR: 0.0003 +[2026-03-04 19:39:14] (step=0059539) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 11.649188025826648, LR: 0.0003 +[2026-03-04 19:39:22] (step=0059540) Train Loss: 0.4266, Train Steps/Sec: 0.13, Epoch: 11.649383682253962, LR: 0.0003 +[2026-03-04 19:39:30] (step=0059541) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.649579338681276, LR: 0.0003 +[2026-03-04 19:39:38] (step=0059542) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.64977499510859, LR: 0.0003 +[2026-03-04 19:39:46] (step=0059543) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.649970651535902, LR: 0.0003 +[2026-03-04 19:39:54] (step=0059544) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 11.650166307963216, LR: 0.0003 +[2026-03-04 19:40:01] (step=0059545) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.65036196439053, LR: 0.0003 +[2026-03-04 19:40:09] (step=0059546) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.650557620817844, LR: 0.0003 +[2026-03-04 19:40:17] (step=0059547) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.650753277245158, LR: 0.0003 +[2026-03-04 19:40:25] (step=0059548) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.65094893367247, LR: 0.0003 +[2026-03-04 19:40:33] (step=0059549) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.651144590099785, LR: 0.0003 +[2026-03-04 19:40:41] (step=0059550) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.651340246527099, LR: 0.0003 +[2026-03-04 19:40:49] (step=0059551) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 11.651535902954413, LR: 0.0003 +[2026-03-04 19:40:56] (step=0059552) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.651731559381727, LR: 0.0003 +[2026-03-04 19:41:04] (step=0059553) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 11.651927215809039, LR: 0.0003 +[2026-03-04 19:41:12] (step=0059554) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.652122872236353, LR: 0.0003 +[2026-03-04 19:41:20] (step=0059555) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.652318528663667, LR: 0.0003 +[2026-03-04 19:41:28] (step=0059556) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.65251418509098, LR: 0.0003 +[2026-03-04 19:41:36] (step=0059557) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 11.652709841518293, LR: 0.0003 +[2026-03-04 19:41:44] (step=0059558) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.652905497945607, LR: 0.0003 +[2026-03-04 19:41:51] (step=0059559) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 11.653101154372921, LR: 0.0003 +[2026-03-04 19:41:59] (step=0059560) Train Loss: 0.4453, Train Steps/Sec: 0.12, Epoch: 11.653296810800235, LR: 0.0003 +[2026-03-04 19:42:07] (step=0059561) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.653492467227549, LR: 0.0003 +[2026-03-04 19:42:15] (step=0059562) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.653688123654861, LR: 0.0003 +[2026-03-04 19:42:23] (step=0059563) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.653883780082175, LR: 0.0003 +[2026-03-04 19:42:31] (step=0059564) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.65407943650949, LR: 0.0003 +[2026-03-04 19:42:39] (step=0059565) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 11.654275092936803, LR: 0.0003 +[2026-03-04 19:42:47] (step=0059566) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 11.654470749364117, LR: 0.0003 +[2026-03-04 19:42:55] (step=0059567) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.65466640579143, LR: 0.0003 +[2026-03-04 19:43:02] (step=0059568) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.654862062218744, LR: 0.0003 +[2026-03-04 19:43:10] (step=0059569) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.655057718646058, LR: 0.0003 +[2026-03-04 19:43:18] (step=0059570) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.655253375073372, LR: 0.0003 +[2026-03-04 19:43:26] (step=0059571) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.655449031500686, LR: 0.0003 +[2026-03-04 19:43:34] (step=0059572) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.655644687927998, LR: 0.0003 +[2026-03-04 19:43:42] (step=0059573) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.655840344355312, LR: 0.0003 +[2026-03-04 19:43:50] (step=0059574) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.656036000782626, LR: 0.0003 +[2026-03-04 19:43:57] (step=0059575) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 11.65623165720994, LR: 0.0003 +[2026-03-04 19:44:05] (step=0059576) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 11.656427313637254, LR: 0.0003 +[2026-03-04 19:44:13] (step=0059577) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.656622970064566, LR: 0.0003 +[2026-03-04 19:44:21] (step=0059578) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.65681862649188, LR: 0.0003 +[2026-03-04 19:44:29] (step=0059579) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 11.657014282919194, LR: 0.0003 +[2026-03-04 19:44:37] (step=0059580) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.657209939346508, LR: 0.0003 +[2026-03-04 19:44:45] (step=0059581) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.65740559577382, LR: 0.0003 +[2026-03-04 19:44:53] (step=0059582) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.657601252201134, LR: 0.0003 +[2026-03-04 19:45:01] (step=0059583) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 11.657796908628448, LR: 0.0003 +[2026-03-04 19:45:08] (step=0059584) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.657992565055762, LR: 0.0003 +[2026-03-04 19:45:16] (step=0059585) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.658188221483076, LR: 0.0003 +[2026-03-04 19:45:24] (step=0059586) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.658383877910389, LR: 0.0003 +[2026-03-04 19:45:32] (step=0059587) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.658579534337703, LR: 0.0003 +[2026-03-04 19:45:40] (step=0059588) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.658775190765017, LR: 0.0003 +[2026-03-04 19:45:48] (step=0059589) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.65897084719233, LR: 0.0003 +[2026-03-04 19:45:56] (step=0059590) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.659166503619645, LR: 0.0003 +[2026-03-04 19:46:03] (step=0059591) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.659362160046957, LR: 0.0003 +[2026-03-04 19:46:11] (step=0059592) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.65955781647427, LR: 0.0003 +[2026-03-04 19:46:19] (step=0059593) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 11.659753472901585, LR: 0.0003 +[2026-03-04 19:46:27] (step=0059594) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.659949129328899, LR: 0.0003 +[2026-03-04 19:46:35] (step=0059595) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.660144785756213, LR: 0.0003 +[2026-03-04 19:46:43] (step=0059596) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 11.660340442183525, LR: 0.0003 +[2026-03-04 19:46:51] (step=0059597) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.660536098610839, LR: 0.0003 +[2026-03-04 19:46:58] (step=0059598) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.660731755038153, LR: 0.0003 +[2026-03-04 19:47:06] (step=0059599) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 11.660927411465467, LR: 0.0003 +[2026-03-04 19:47:14] (step=0059600) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.661123067892781, LR: 0.0003 +[2026-03-04 19:47:22] (step=0059601) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.661318724320093, LR: 0.0003 +[2026-03-04 19:47:30] (step=0059602) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.661514380747407, LR: 0.0003 +[2026-03-04 19:47:38] (step=0059603) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.661710037174721, LR: 0.0003 +[2026-03-04 19:47:46] (step=0059604) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.661905693602035, LR: 0.0003 +[2026-03-04 19:47:54] (step=0059605) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.66210135002935, LR: 0.0003 +[2026-03-04 19:48:01] (step=0059606) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.662297006456662, LR: 0.0003 +[2026-03-04 19:48:09] (step=0059607) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.662492662883976, LR: 0.0003 +[2026-03-04 19:48:17] (step=0059608) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.66268831931129, LR: 0.0003 +[2026-03-04 19:48:25] (step=0059609) Train Loss: 0.4406, Train Steps/Sec: 0.12, Epoch: 11.662883975738604, LR: 0.0003 +[2026-03-04 19:48:33] (step=0059610) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.663079632165916, LR: 0.0003 +[2026-03-04 19:48:41] (step=0059611) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.66327528859323, LR: 0.0003 +[2026-03-04 19:48:49] (step=0059612) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.663470945020544, LR: 0.0003 +[2026-03-04 19:48:57] (step=0059613) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.663666601447858, LR: 0.0003 +[2026-03-04 19:49:04] (step=0059614) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.663862257875172, LR: 0.0003 +[2026-03-04 19:49:12] (step=0059615) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.664057914302484, LR: 0.0003 +[2026-03-04 19:49:20] (step=0059616) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 11.664253570729798, LR: 0.0003 +[2026-03-04 19:49:28] (step=0059617) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.664449227157112, LR: 0.0003 +[2026-03-04 19:49:36] (step=0059618) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.664644883584426, LR: 0.0003 +[2026-03-04 19:49:44] (step=0059619) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.66484054001174, LR: 0.0003 +[2026-03-04 19:49:52] (step=0059620) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.665036196439052, LR: 0.0003 +[2026-03-04 19:50:00] (step=0059621) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.665231852866366, LR: 0.0003 +[2026-03-04 19:50:07] (step=0059622) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.66542750929368, LR: 0.0003 +[2026-03-04 19:50:15] (step=0059623) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.665623165720994, LR: 0.0003 +[2026-03-04 19:50:23] (step=0059624) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.665818822148308, LR: 0.0003 +[2026-03-04 19:50:31] (step=0059625) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 11.66601447857562, LR: 0.0003 +[2026-03-04 19:50:39] (step=0059626) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.666210135002935, LR: 0.0003 +[2026-03-04 19:50:47] (step=0059627) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.666405791430249, LR: 0.0003 +[2026-03-04 19:50:55] (step=0059628) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.666601447857563, LR: 0.0003 +[2026-03-04 19:51:03] (step=0059629) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.666797104284877, LR: 0.0003 +[2026-03-04 19:51:10] (step=0059630) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.666992760712189, LR: 0.0003 +[2026-03-04 19:51:18] (step=0059631) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.667188417139503, LR: 0.0003 +[2026-03-04 19:51:26] (step=0059632) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.667384073566817, LR: 0.0003 +[2026-03-04 19:51:34] (step=0059633) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 11.66757972999413, LR: 0.0003 +[2026-03-04 19:51:42] (step=0059634) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.667775386421443, LR: 0.0003 +[2026-03-04 19:51:50] (step=0059635) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.667971042848757, LR: 0.0003 +[2026-03-04 19:51:57] (step=0059636) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.668166699276071, LR: 0.0003 +[2026-03-04 19:52:05] (step=0059637) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.668362355703385, LR: 0.0003 +[2026-03-04 19:52:13] (step=0059638) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.668558012130699, LR: 0.0003 +[2026-03-04 19:52:21] (step=0059639) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 11.668753668558011, LR: 0.0003 +[2026-03-04 19:52:29] (step=0059640) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.668949324985325, LR: 0.0003 +[2026-03-04 19:52:37] (step=0059641) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.66914498141264, LR: 0.0003 +[2026-03-04 19:52:45] (step=0059642) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.669340637839953, LR: 0.0003 +[2026-03-04 19:52:52] (step=0059643) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.669536294267267, LR: 0.0003 +[2026-03-04 19:53:00] (step=0059644) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.66973195069458, LR: 0.0003 +[2026-03-04 19:53:08] (step=0059645) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.669927607121894, LR: 0.0003 +[2026-03-04 19:53:16] (step=0059646) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.670123263549208, LR: 0.0003 +[2026-03-04 19:53:24] (step=0059647) Train Loss: 0.4221, Train Steps/Sec: 0.13, Epoch: 11.670318919976522, LR: 0.0003 +[2026-03-04 19:53:32] (step=0059648) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.670514576403836, LR: 0.0003 +[2026-03-04 19:53:40] (step=0059649) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 11.670710232831148, LR: 0.0003 +[2026-03-04 19:53:47] (step=0059650) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 11.670905889258462, LR: 0.0003 +[2026-03-04 19:53:55] (step=0059651) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.671101545685776, LR: 0.0003 +[2026-03-04 19:54:03] (step=0059652) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.67129720211309, LR: 0.0003 +[2026-03-04 19:54:11] (step=0059653) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.671492858540404, LR: 0.0003 +[2026-03-04 19:54:19] (step=0059654) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.671688514967716, LR: 0.0003 +[2026-03-04 19:54:27] (step=0059655) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.67188417139503, LR: 0.0003 +[2026-03-04 19:54:35] (step=0059656) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.672079827822344, LR: 0.0003 +[2026-03-04 19:54:42] (step=0059657) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 11.672275484249658, LR: 0.0003 +[2026-03-04 19:54:50] (step=0059658) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.672471140676972, LR: 0.0003 +[2026-03-04 19:54:58] (step=0059659) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.672666797104284, LR: 0.0003 +[2026-03-04 19:55:06] (step=0059660) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.672862453531598, LR: 0.0003 +[2026-03-04 19:55:14] (step=0059661) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.673058109958912, LR: 0.0003 +[2026-03-04 19:55:22] (step=0059662) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 11.673253766386226, LR: 0.0003 +[2026-03-04 19:55:30] (step=0059663) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.673449422813539, LR: 0.0003 +[2026-03-04 19:55:38] (step=0059664) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.673645079240853, LR: 0.0003 +[2026-03-04 19:55:45] (step=0059665) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.673840735668167, LR: 0.0003 +[2026-03-04 19:55:53] (step=0059666) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 11.67403639209548, LR: 0.0003 +[2026-03-04 19:56:01] (step=0059667) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.674232048522795, LR: 0.0003 +[2026-03-04 19:56:09] (step=0059668) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.674427704950107, LR: 0.0003 +[2026-03-04 19:56:17] (step=0059669) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.67462336137742, LR: 0.0003 +[2026-03-04 19:56:25] (step=0059670) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.674819017804735, LR: 0.0003 +[2026-03-04 19:56:33] (step=0059671) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 11.675014674232049, LR: 0.0003 +[2026-03-04 19:56:41] (step=0059672) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 11.675210330659363, LR: 0.0003 +[2026-03-04 19:56:48] (step=0059673) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.675405987086675, LR: 0.0003 +[2026-03-04 19:56:56] (step=0059674) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.675601643513989, LR: 0.0003 +[2026-03-04 19:57:04] (step=0059675) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.675797299941303, LR: 0.0003 +[2026-03-04 19:57:12] (step=0059676) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.675992956368617, LR: 0.0003 +[2026-03-04 19:57:20] (step=0059677) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 11.676188612795931, LR: 0.0003 +[2026-03-04 19:57:28] (step=0059678) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.676384269223243, LR: 0.0003 +[2026-03-04 19:57:36] (step=0059679) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 11.676579925650557, LR: 0.0003 +[2026-03-04 19:57:44] (step=0059680) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.676775582077871, LR: 0.0003 +[2026-03-04 19:57:51] (step=0059681) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.676971238505185, LR: 0.0003 +[2026-03-04 19:57:59] (step=0059682) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 11.6771668949325, LR: 0.0003 +[2026-03-04 19:58:07] (step=0059683) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.677362551359812, LR: 0.0003 +[2026-03-04 19:58:15] (step=0059684) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 11.677558207787126, LR: 0.0003 +[2026-03-04 19:58:23] (step=0059685) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 11.67775386421444, LR: 0.0003 +[2026-03-04 19:58:31] (step=0059686) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.677949520641754, LR: 0.0003 +[2026-03-04 19:58:39] (step=0059687) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.678145177069066, LR: 0.0003 +[2026-03-04 19:58:46] (step=0059688) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.67834083349638, LR: 0.0003 +[2026-03-04 19:58:54] (step=0059689) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.678536489923694, LR: 0.0003 +[2026-03-04 19:59:02] (step=0059690) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.678732146351008, LR: 0.0003 +[2026-03-04 19:59:10] (step=0059691) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 11.678927802778322, LR: 0.0003 +[2026-03-04 19:59:18] (step=0059692) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.679123459205634, LR: 0.0003 +[2026-03-04 19:59:26] (step=0059693) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.679319115632948, LR: 0.0003 +[2026-03-04 19:59:34] (step=0059694) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.679514772060262, LR: 0.0003 +[2026-03-04 19:59:41] (step=0059695) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.679710428487576, LR: 0.0003 +[2026-03-04 19:59:49] (step=0059696) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 11.67990608491489, LR: 0.0003 +[2026-03-04 19:59:57] (step=0059697) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.680101741342202, LR: 0.0003 +[2026-03-04 20:00:05] (step=0059698) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.680297397769516, LR: 0.0003 +[2026-03-04 20:00:13] (step=0059699) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.68049305419683, LR: 0.0003 +[2026-03-04 20:00:21] (step=0059700) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.680688710624144, LR: 0.0003 +[2026-03-04 20:00:29] (step=0059701) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.680884367051458, LR: 0.0003 +[2026-03-04 20:00:36] (step=0059702) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.68108002347877, LR: 0.0003 +[2026-03-04 20:00:44] (step=0059703) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 11.681275679906085, LR: 0.0003 +[2026-03-04 20:00:52] (step=0059704) Train Loss: 0.4386, Train Steps/Sec: 0.12, Epoch: 11.681471336333399, LR: 0.0003 +[2026-03-04 20:01:00] (step=0059705) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.681666992760713, LR: 0.0003 +[2026-03-04 20:01:08] (step=0059706) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.681862649188027, LR: 0.0003 +[2026-03-04 20:01:16] (step=0059707) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.682058305615339, LR: 0.0003 +[2026-03-04 20:01:24] (step=0059708) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 11.682253962042653, LR: 0.0003 +[2026-03-04 20:01:32] (step=0059709) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.682449618469967, LR: 0.0003 +[2026-03-04 20:01:40] (step=0059710) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.68264527489728, LR: 0.0003 +[2026-03-04 20:01:47] (step=0059711) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.682840931324595, LR: 0.0003 +[2026-03-04 20:01:55] (step=0059712) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.683036587751907, LR: 0.0003 +[2026-03-04 20:02:03] (step=0059713) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.683232244179221, LR: 0.0003 +[2026-03-04 20:02:11] (step=0059714) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 11.683427900606535, LR: 0.0003 +[2026-03-04 20:02:19] (step=0059715) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.68362355703385, LR: 0.0003 +[2026-03-04 20:02:27] (step=0059716) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.683819213461161, LR: 0.0003 +[2026-03-04 20:02:35] (step=0059717) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.684014869888475, LR: 0.0003 +[2026-03-04 20:02:42] (step=0059718) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.68421052631579, LR: 0.0003 +[2026-03-04 20:02:50] (step=0059719) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.684406182743103, LR: 0.0003 +[2026-03-04 20:02:58] (step=0059720) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.684601839170417, LR: 0.0003 +[2026-03-04 20:03:06] (step=0059721) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.68479749559773, LR: 0.0003 +[2026-03-04 20:03:14] (step=0059722) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 11.684993152025044, LR: 0.0003 +[2026-03-04 20:03:22] (step=0059723) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 11.685188808452358, LR: 0.0003 +[2026-03-04 20:03:30] (step=0059724) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.685384464879672, LR: 0.0003 +[2026-03-04 20:03:37] (step=0059725) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 11.685580121306986, LR: 0.0003 +[2026-03-04 20:03:45] (step=0059726) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.685775777734298, LR: 0.0003 +[2026-03-04 20:03:53] (step=0059727) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.685971434161612, LR: 0.0003 +[2026-03-04 20:04:01] (step=0059728) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 11.686167090588926, LR: 0.0003 +[2026-03-04 20:04:09] (step=0059729) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.68636274701624, LR: 0.0003 +[2026-03-04 20:04:17] (step=0059730) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.686558403443554, LR: 0.0003 +[2026-03-04 20:04:25] (step=0059731) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 11.686754059870866, LR: 0.0003 +[2026-03-04 20:04:32] (step=0059732) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.68694971629818, LR: 0.0003 +[2026-03-04 20:04:40] (step=0059733) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.687145372725494, LR: 0.0003 +[2026-03-04 20:04:48] (step=0059734) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.687341029152808, LR: 0.0003 +[2026-03-04 20:04:56] (step=0059735) Train Loss: 0.4614, Train Steps/Sec: 0.13, Epoch: 11.687536685580122, LR: 0.0003 +[2026-03-04 20:05:04] (step=0059736) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.687732342007434, LR: 0.0003 +[2026-03-04 20:05:12] (step=0059737) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.687927998434748, LR: 0.0003 +[2026-03-04 20:05:20] (step=0059738) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.688123654862062, LR: 0.0003 +[2026-03-04 20:05:27] (step=0059739) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 11.688319311289376, LR: 0.0003 +[2026-03-04 20:05:35] (step=0059740) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.688514967716689, LR: 0.0003 +[2026-03-04 20:05:43] (step=0059741) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.688710624144003, LR: 0.0003 +[2026-03-04 20:05:51] (step=0059742) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.688906280571317, LR: 0.0003 +[2026-03-04 20:05:59] (step=0059743) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.68910193699863, LR: 0.0003 +[2026-03-04 20:06:07] (step=0059744) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.689297593425945, LR: 0.0003 +[2026-03-04 20:06:15] (step=0059745) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.689493249853257, LR: 0.0003 +[2026-03-04 20:06:22] (step=0059746) Train Loss: 0.4227, Train Steps/Sec: 0.13, Epoch: 11.689688906280571, LR: 0.0003 +[2026-03-04 20:06:30] (step=0059747) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.689884562707885, LR: 0.0003 +[2026-03-04 20:06:38] (step=0059748) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 11.690080219135199, LR: 0.0003 +[2026-03-04 20:06:46] (step=0059749) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.690275875562513, LR: 0.0003 +[2026-03-04 20:06:54] (step=0059750) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.690471531989825, LR: 0.0003 +[2026-03-04 20:07:02] (step=0059751) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.69066718841714, LR: 0.0003 +[2026-03-04 20:07:10] (step=0059752) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.690862844844453, LR: 0.0003 +[2026-03-04 20:07:18] (step=0059753) Train Loss: 0.4474, Train Steps/Sec: 0.12, Epoch: 11.691058501271767, LR: 0.0003 +[2026-03-04 20:07:26] (step=0059754) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.691254157699081, LR: 0.0003 +[2026-03-04 20:07:33] (step=0059755) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.691449814126393, LR: 0.0003 +[2026-03-04 20:07:41] (step=0059756) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.691645470553707, LR: 0.0003 +[2026-03-04 20:07:49] (step=0059757) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.691841126981021, LR: 0.0003 +[2026-03-04 20:07:57] (step=0059758) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 11.692036783408335, LR: 0.0003 +[2026-03-04 20:08:05] (step=0059759) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.69223243983565, LR: 0.0003 +[2026-03-04 20:08:13] (step=0059760) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 11.692428096262962, LR: 0.0003 +[2026-03-04 20:08:21] (step=0059761) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.692623752690276, LR: 0.0003 +[2026-03-04 20:08:28] (step=0059762) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 11.69281940911759, LR: 0.0003 +[2026-03-04 20:08:36] (step=0059763) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.693015065544904, LR: 0.0003 +[2026-03-04 20:08:44] (step=0059764) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.693210721972218, LR: 0.0003 +[2026-03-04 20:08:52] (step=0059765) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.69340637839953, LR: 0.0003 +[2026-03-04 20:09:00] (step=0059766) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.693602034826844, LR: 0.0003 +[2026-03-04 20:09:08] (step=0059767) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.693797691254158, LR: 0.0003 +[2026-03-04 20:09:16] (step=0059768) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.693993347681472, LR: 0.0003 +[2026-03-04 20:09:24] (step=0059769) Train Loss: 0.4453, Train Steps/Sec: 0.12, Epoch: 11.694189004108784, LR: 0.0003 +[2026-03-04 20:09:31] (step=0059770) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.694384660536098, LR: 0.0003 +[2026-03-04 20:09:39] (step=0059771) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.694580316963412, LR: 0.0003 +[2026-03-04 20:09:47] (step=0059772) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.694775973390726, LR: 0.0003 +[2026-03-04 20:09:55] (step=0059773) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.69497162981804, LR: 0.0003 +[2026-03-04 20:10:03] (step=0059774) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.695167286245352, LR: 0.0003 +[2026-03-04 20:10:11] (step=0059775) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.695362942672666, LR: 0.0003 +[2026-03-04 20:10:19] (step=0059776) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.69555859909998, LR: 0.0003 +[2026-03-04 20:10:26] (step=0059777) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.695754255527294, LR: 0.0003 +[2026-03-04 20:10:34] (step=0059778) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.695949911954608, LR: 0.0003 +[2026-03-04 20:10:42] (step=0059779) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.69614556838192, LR: 0.0003 +[2026-03-04 20:10:50] (step=0059780) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 11.696341224809235, LR: 0.0003 +[2026-03-04 20:10:58] (step=0059781) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 11.696536881236549, LR: 0.0003 +[2026-03-04 20:11:06] (step=0059782) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.696732537663863, LR: 0.0003 +[2026-03-04 20:11:14] (step=0059783) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.696928194091177, LR: 0.0003 +[2026-03-04 20:11:21] (step=0059784) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.697123850518489, LR: 0.0003 +[2026-03-04 20:11:29] (step=0059785) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 11.697319506945803, LR: 0.0003 +[2026-03-04 20:11:37] (step=0059786) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.697515163373117, LR: 0.0003 +[2026-03-04 20:11:45] (step=0059787) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.697710819800431, LR: 0.0003 +[2026-03-04 20:11:53] (step=0059788) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.697906476227745, LR: 0.0003 +[2026-03-04 20:12:01] (step=0059789) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.698102132655057, LR: 0.0003 +[2026-03-04 20:12:09] (step=0059790) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.698297789082371, LR: 0.0003 +[2026-03-04 20:12:16] (step=0059791) Train Loss: 0.4256, Train Steps/Sec: 0.13, Epoch: 11.698493445509685, LR: 0.0003 +[2026-03-04 20:12:24] (step=0059792) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.698689101937, LR: 0.0003 +[2026-03-04 20:12:32] (step=0059793) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.698884758364311, LR: 0.0003 +[2026-03-04 20:12:40] (step=0059794) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.699080414791625, LR: 0.0003 +[2026-03-04 20:12:48] (step=0059795) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.69927607121894, LR: 0.0003 +[2026-03-04 20:12:56] (step=0059796) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.699471727646253, LR: 0.0003 +[2026-03-04 20:13:04] (step=0059797) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 11.699667384073567, LR: 0.0003 +[2026-03-04 20:13:12] (step=0059798) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.69986304050088, LR: 0.0003 +[2026-03-04 20:13:19] (step=0059799) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.700058696928194, LR: 0.0003 +[2026-03-04 20:13:27] (step=0059800) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.700254353355508, LR: 0.0003 +[2026-03-04 20:13:35] (step=0059801) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.700450009782822, LR: 0.0003 +[2026-03-04 20:13:43] (step=0059802) Train Loss: 0.4254, Train Steps/Sec: 0.12, Epoch: 11.700645666210136, LR: 0.0003 +[2026-03-04 20:13:51] (step=0059803) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.700841322637448, LR: 0.0003 +[2026-03-04 20:13:59] (step=0059804) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.701036979064762, LR: 0.0003 +[2026-03-04 20:14:07] (step=0059805) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.701232635492076, LR: 0.0003 +[2026-03-04 20:14:15] (step=0059806) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 11.70142829191939, LR: 0.0003 +[2026-03-04 20:14:22] (step=0059807) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 11.701623948346704, LR: 0.0003 +[2026-03-04 20:14:30] (step=0059808) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.701819604774016, LR: 0.0003 +[2026-03-04 20:14:38] (step=0059809) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.70201526120133, LR: 0.0003 +[2026-03-04 20:14:46] (step=0059810) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.702210917628644, LR: 0.0003 +[2026-03-04 20:14:54] (step=0059811) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.702406574055958, LR: 0.0003 +[2026-03-04 20:15:02] (step=0059812) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.702602230483272, LR: 0.0003 +[2026-03-04 20:15:10] (step=0059813) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 11.702797886910584, LR: 0.0003 +[2026-03-04 20:15:17] (step=0059814) Train Loss: 0.4228, Train Steps/Sec: 0.13, Epoch: 11.702993543337898, LR: 0.0003 +[2026-03-04 20:15:25] (step=0059815) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 11.703189199765212, LR: 0.0003 +[2026-03-04 20:15:33] (step=0059816) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.703384856192526, LR: 0.0003 +[2026-03-04 20:15:41] (step=0059817) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.70358051261984, LR: 0.0003 +[2026-03-04 20:15:49] (step=0059818) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.703776169047153, LR: 0.0003 +[2026-03-04 20:15:57] (step=0059819) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.703971825474467, LR: 0.0003 +[2026-03-04 20:16:05] (step=0059820) Train Loss: 0.4389, Train Steps/Sec: 0.12, Epoch: 11.70416748190178, LR: 0.0003 +[2026-03-04 20:16:13] (step=0059821) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.704363138329095, LR: 0.0003 +[2026-03-04 20:16:21] (step=0059822) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.704558794756407, LR: 0.0003 +[2026-03-04 20:16:28] (step=0059823) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.704754451183721, LR: 0.0003 +[2026-03-04 20:16:36] (step=0059824) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.704950107611035, LR: 0.0003 +[2026-03-04 20:16:44] (step=0059825) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.705145764038349, LR: 0.0003 +[2026-03-04 20:16:52] (step=0059826) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 11.705341420465663, LR: 0.0003 +[2026-03-04 20:17:00] (step=0059827) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.705537076892975, LR: 0.0003 +[2026-03-04 20:17:08] (step=0059828) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.70573273332029, LR: 0.0003 +[2026-03-04 20:17:16] (step=0059829) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.705928389747603, LR: 0.0003 +[2026-03-04 20:17:23] (step=0059830) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.706124046174917, LR: 0.0003 +[2026-03-04 20:17:31] (step=0059831) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.706319702602231, LR: 0.0003 +[2026-03-04 20:17:39] (step=0059832) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 11.706515359029543, LR: 0.0003 +[2026-03-04 20:17:47] (step=0059833) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.706711015456857, LR: 0.0003 +[2026-03-04 20:17:55] (step=0059834) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 11.706906671884171, LR: 0.0003 +[2026-03-04 20:18:03] (step=0059835) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.707102328311485, LR: 0.0003 +[2026-03-04 20:18:11] (step=0059836) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.7072979847388, LR: 0.0003 +[2026-03-04 20:18:18] (step=0059837) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 11.707493641166112, LR: 0.0003 +[2026-03-04 20:18:26] (step=0059838) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 11.707689297593426, LR: 0.0003 +[2026-03-04 20:18:34] (step=0059839) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.70788495402074, LR: 0.0003 +[2026-03-04 20:18:42] (step=0059840) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.708080610448054, LR: 0.0003 +[2026-03-04 20:18:50] (step=0059841) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.708276266875368, LR: 0.0003 +[2026-03-04 20:18:58] (step=0059842) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.70847192330268, LR: 0.0003 +[2026-03-04 20:19:06] (step=0059843) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 11.708667579729994, LR: 0.0003 +[2026-03-04 20:19:14] (step=0059844) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.708863236157308, LR: 0.0003 +[2026-03-04 20:19:21] (step=0059845) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.709058892584622, LR: 0.0003 +[2026-03-04 20:19:29] (step=0059846) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.709254549011934, LR: 0.0003 +[2026-03-04 20:19:37] (step=0059847) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 11.709450205439248, LR: 0.0003 +[2026-03-04 20:19:45] (step=0059848) Train Loss: 0.4455, Train Steps/Sec: 0.12, Epoch: 11.709645861866562, LR: 0.0003 +[2026-03-04 20:19:53] (step=0059849) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.709841518293876, LR: 0.0003 +[2026-03-04 20:20:01] (step=0059850) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 11.71003717472119, LR: 0.0003 +[2026-03-04 20:20:09] (step=0059851) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.710232831148502, LR: 0.0003 +[2026-03-04 20:20:17] (step=0059852) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 11.710428487575816, LR: 0.0003 +[2026-03-04 20:20:24] (step=0059853) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.71062414400313, LR: 0.0003 +[2026-03-04 20:20:32] (step=0059854) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.710819800430444, LR: 0.0003 +[2026-03-04 20:20:40] (step=0059855) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.711015456857758, LR: 0.0003 +[2026-03-04 20:20:48] (step=0059856) Train Loss: 0.4265, Train Steps/Sec: 0.13, Epoch: 11.71121111328507, LR: 0.0003 +[2026-03-04 20:20:56] (step=0059857) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.711406769712385, LR: 0.0003 +[2026-03-04 20:21:04] (step=0059858) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.711602426139699, LR: 0.0003 +[2026-03-04 20:21:12] (step=0059859) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.711798082567013, LR: 0.0003 +[2026-03-04 20:21:19] (step=0059860) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.711993738994327, LR: 0.0003 +[2026-03-04 20:21:27] (step=0059861) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 11.712189395421639, LR: 0.0003 +[2026-03-04 20:21:35] (step=0059862) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.712385051848953, LR: 0.0003 +[2026-03-04 20:21:43] (step=0059863) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.712580708276267, LR: 0.0003 +[2026-03-04 20:21:51] (step=0059864) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.712776364703581, LR: 0.0003 +[2026-03-04 20:21:59] (step=0059865) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 11.712972021130895, LR: 0.0003 +[2026-03-04 20:22:07] (step=0059866) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.713167677558207, LR: 0.0003 +[2026-03-04 20:22:14] (step=0059867) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.713363333985521, LR: 0.0003 +[2026-03-04 20:22:22] (step=0059868) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.713558990412835, LR: 0.0003 +[2026-03-04 20:22:30] (step=0059869) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 11.71375464684015, LR: 0.0003 +[2026-03-04 20:22:38] (step=0059870) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.713950303267461, LR: 0.0003 +[2026-03-04 20:22:46] (step=0059871) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.714145959694775, LR: 0.0003 +[2026-03-04 20:22:54] (step=0059872) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.71434161612209, LR: 0.0003 +[2026-03-04 20:23:02] (step=0059873) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.714537272549403, LR: 0.0003 +[2026-03-04 20:23:10] (step=0059874) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.714732928976717, LR: 0.0003 +[2026-03-04 20:23:17] (step=0059875) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 11.71492858540403, LR: 0.0003 +[2026-03-04 20:23:25] (step=0059876) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 11.715124241831344, LR: 0.0003 +[2026-03-04 20:23:33] (step=0059877) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 11.715319898258658, LR: 0.0003 +[2026-03-04 20:23:41] (step=0059878) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.715515554685972, LR: 0.0003 +[2026-03-04 20:23:49] (step=0059879) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 11.715711211113286, LR: 0.0003 +[2026-03-04 20:23:57] (step=0059880) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.715906867540598, LR: 0.0003 +[2026-03-04 20:24:05] (step=0059881) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.716102523967912, LR: 0.0003 +[2026-03-04 20:24:12] (step=0059882) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.716298180395226, LR: 0.0003 +[2026-03-04 20:24:20] (step=0059883) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.71649383682254, LR: 0.0003 +[2026-03-04 20:24:28] (step=0059884) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.716689493249854, LR: 0.0003 +[2026-03-04 20:24:36] (step=0059885) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.716885149677166, LR: 0.0003 +[2026-03-04 20:24:44] (step=0059886) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.71708080610448, LR: 0.0003 +[2026-03-04 20:24:52] (step=0059887) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.717276462531794, LR: 0.0003 +[2026-03-04 20:25:00] (step=0059888) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.717472118959108, LR: 0.0003 +[2026-03-04 20:25:08] (step=0059889) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.717667775386422, LR: 0.0003 +[2026-03-04 20:25:15] (step=0059890) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.717863431813734, LR: 0.0003 +[2026-03-04 20:25:23] (step=0059891) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.718059088241048, LR: 0.0003 +[2026-03-04 20:25:31] (step=0059892) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.718254744668362, LR: 0.0003 +[2026-03-04 20:25:39] (step=0059893) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.718450401095676, LR: 0.0003 +[2026-03-04 20:25:47] (step=0059894) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 11.71864605752299, LR: 0.0003 +[2026-03-04 20:25:55] (step=0059895) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.718841713950303, LR: 0.0003 +[2026-03-04 20:26:02] (step=0059896) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.719037370377617, LR: 0.0003 +[2026-03-04 20:26:10] (step=0059897) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.71923302680493, LR: 0.0003 +[2026-03-04 20:26:18] (step=0059898) Train Loss: 0.4500, Train Steps/Sec: 0.12, Epoch: 11.719428683232245, LR: 0.0003 +[2026-03-04 20:26:26] (step=0059899) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.719624339659557, LR: 0.0003 +[2026-03-04 20:26:34] (step=0059900) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 11.719819996086871, LR: 0.0003 +[2026-03-04 20:26:42] (step=0059901) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.720015652514185, LR: 0.0003 +[2026-03-04 20:26:50] (step=0059902) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.720211308941499, LR: 0.0003 +[2026-03-04 20:26:58] (step=0059903) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.720406965368813, LR: 0.0003 +[2026-03-04 20:27:06] (step=0059904) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 11.720602621796125, LR: 0.0003 +[2026-03-04 20:27:13] (step=0059905) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 11.72079827822344, LR: 0.0003 +[2026-03-04 20:27:21] (step=0059906) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.720993934650753, LR: 0.0003 +[2026-03-04 20:27:29] (step=0059907) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.721189591078067, LR: 0.0003 +[2026-03-04 20:27:37] (step=0059908) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.721385247505381, LR: 0.0003 +[2026-03-04 20:27:45] (step=0059909) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.721580903932693, LR: 0.0003 +[2026-03-04 20:27:53] (step=0059910) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.721776560360007, LR: 0.0003 +[2026-03-04 20:28:01] (step=0059911) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.721972216787321, LR: 0.0003 +[2026-03-04 20:28:08] (step=0059912) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 11.722167873214635, LR: 0.0003 +[2026-03-04 20:28:16] (step=0059913) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.72236352964195, LR: 0.0003 +[2026-03-04 20:28:24] (step=0059914) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.722559186069262, LR: 0.0003 +[2026-03-04 20:28:32] (step=0059915) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 11.722754842496576, LR: 0.0003 +[2026-03-04 20:28:40] (step=0059916) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 11.72295049892389, LR: 0.0003 +[2026-03-04 20:28:48] (step=0059917) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.723146155351204, LR: 0.0003 +[2026-03-04 20:28:56] (step=0059918) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 11.723341811778518, LR: 0.0003 +[2026-03-04 20:29:03] (step=0059919) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.72353746820583, LR: 0.0003 +[2026-03-04 20:29:11] (step=0059920) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.723733124633144, LR: 0.0003 +[2026-03-04 20:29:19] (step=0059921) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.723928781060458, LR: 0.0003 +[2026-03-04 20:29:27] (step=0059922) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 11.724124437487772, LR: 0.0003 +[2026-03-04 20:29:35] (step=0059923) Train Loss: 0.4467, Train Steps/Sec: 0.12, Epoch: 11.724320093915084, LR: 0.0003 +[2026-03-04 20:29:43] (step=0059924) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.724515750342398, LR: 0.0003 +[2026-03-04 20:29:51] (step=0059925) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.724711406769712, LR: 0.0003 +[2026-03-04 20:29:59] (step=0059926) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.724907063197026, LR: 0.0003 +[2026-03-04 20:30:07] (step=0059927) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 11.72510271962434, LR: 0.0003 +[2026-03-04 20:30:14] (step=0059928) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.725298376051652, LR: 0.0003 +[2026-03-04 20:30:22] (step=0059929) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.725494032478966, LR: 0.0003 +[2026-03-04 20:30:30] (step=0059930) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 11.72568968890628, LR: 0.0003 +[2026-03-04 20:30:38] (step=0059931) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 11.725885345333595, LR: 0.0003 +[2026-03-04 20:30:46] (step=0059932) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.726081001760909, LR: 0.0003 +[2026-03-04 20:30:54] (step=0059933) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 11.72627665818822, LR: 0.0003 +[2026-03-04 20:31:02] (step=0059934) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.726472314615535, LR: 0.0003 +[2026-03-04 20:31:09] (step=0059935) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 11.726667971042849, LR: 0.0003 +[2026-03-04 20:31:17] (step=0059936) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.726863627470163, LR: 0.0003 +[2026-03-04 20:31:25] (step=0059937) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.727059283897477, LR: 0.0003 +[2026-03-04 20:31:33] (step=0059938) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.727254940324789, LR: 0.0003 +[2026-03-04 20:31:41] (step=0059939) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.727450596752103, LR: 0.0003 +[2026-03-04 20:31:49] (step=0059940) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.727646253179417, LR: 0.0003 +[2026-03-04 20:31:57] (step=0059941) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 11.727841909606731, LR: 0.0003 +[2026-03-04 20:32:05] (step=0059942) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.728037566034045, LR: 0.0003 +[2026-03-04 20:32:12] (step=0059943) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 11.728233222461357, LR: 0.0003 +[2026-03-04 20:32:20] (step=0059944) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 11.728428878888671, LR: 0.0003 +[2026-03-04 20:32:28] (step=0059945) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.728624535315985, LR: 0.0003 +[2026-03-04 20:32:36] (step=0059946) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 11.7288201917433, LR: 0.0003 +[2026-03-04 20:32:44] (step=0059947) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.729015848170613, LR: 0.0003 +[2026-03-04 20:32:52] (step=0059948) Train Loss: 0.4371, Train Steps/Sec: 0.12, Epoch: 11.729211504597925, LR: 0.0003 +[2026-03-04 20:33:00] (step=0059949) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 11.72940716102524, LR: 0.0003 +[2026-03-04 20:33:08] (step=0059950) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.729602817452554, LR: 0.0003 +[2026-03-04 20:33:16] (step=0059951) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.729798473879868, LR: 0.0003 +[2026-03-04 20:33:23] (step=0059952) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.72999413030718, LR: 0.0003 +[2026-03-04 20:33:31] (step=0059953) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.730189786734494, LR: 0.0003 +[2026-03-04 20:33:39] (step=0059954) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.730385443161808, LR: 0.0003 +[2026-03-04 20:33:47] (step=0059955) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.730581099589122, LR: 0.0003 +[2026-03-04 20:33:55] (step=0059956) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.730776756016436, LR: 0.0003 +[2026-03-04 20:34:03] (step=0059957) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.730972412443748, LR: 0.0003 +[2026-03-04 20:34:11] (step=0059958) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 11.731168068871062, LR: 0.0003 +[2026-03-04 20:34:18] (step=0059959) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.731363725298376, LR: 0.0003 +[2026-03-04 20:34:26] (step=0059960) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.73155938172569, LR: 0.0003 +[2026-03-04 20:34:34] (step=0059961) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.731755038153004, LR: 0.0003 +[2026-03-04 20:34:42] (step=0059962) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.731950694580316, LR: 0.0003 +[2026-03-04 20:34:50] (step=0059963) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.73214635100763, LR: 0.0003 +[2026-03-04 20:34:58] (step=0059964) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.732342007434944, LR: 0.0003 +[2026-03-04 20:35:06] (step=0059965) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.732537663862258, LR: 0.0003 +[2026-03-04 20:35:13] (step=0059966) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 11.732733320289572, LR: 0.0003 +[2026-03-04 20:35:21] (step=0059967) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 11.732928976716885, LR: 0.0003 +[2026-03-04 20:35:29] (step=0059968) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.733124633144199, LR: 0.0003 +[2026-03-04 20:35:37] (step=0059969) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 11.733320289571513, LR: 0.0003 +[2026-03-04 20:35:45] (step=0059970) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.733515945998827, LR: 0.0003 +[2026-03-04 20:35:53] (step=0059971) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 11.73371160242614, LR: 0.0003 +[2026-03-04 20:36:01] (step=0059972) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.733907258853453, LR: 0.0003 +[2026-03-04 20:36:09] (step=0059973) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 11.734102915280767, LR: 0.0003 +[2026-03-04 20:36:16] (step=0059974) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.73429857170808, LR: 0.0003 +[2026-03-04 20:36:24] (step=0059975) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.734494228135395, LR: 0.0003 +[2026-03-04 20:36:32] (step=0059976) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.734689884562707, LR: 0.0003 +[2026-03-04 20:36:40] (step=0059977) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 11.734885540990021, LR: 0.0003 +[2026-03-04 20:36:48] (step=0059978) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.735081197417335, LR: 0.0003 +[2026-03-04 20:36:56] (step=0059979) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.735276853844649, LR: 0.0003 +[2026-03-04 20:37:04] (step=0059980) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.735472510271963, LR: 0.0003 +[2026-03-04 20:37:11] (step=0059981) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.735668166699275, LR: 0.0003 +[2026-03-04 20:37:19] (step=0059982) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.73586382312659, LR: 0.0003 +[2026-03-04 20:37:27] (step=0059983) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.736059479553903, LR: 0.0003 +[2026-03-04 20:37:35] (step=0059984) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 11.736255135981217, LR: 0.0003 +[2026-03-04 20:37:43] (step=0059985) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.736450792408531, LR: 0.0003 +[2026-03-04 20:37:51] (step=0059986) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.736646448835844, LR: 0.0003 +[2026-03-04 20:37:59] (step=0059987) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 11.736842105263158, LR: 0.0003 +[2026-03-04 20:38:06] (step=0059988) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.737037761690472, LR: 0.0003 +[2026-03-04 20:38:14] (step=0059989) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.737233418117786, LR: 0.0003 +[2026-03-04 20:38:22] (step=0059990) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.7374290745451, LR: 0.0003 +[2026-03-04 20:38:30] (step=0059991) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.737624730972412, LR: 0.0003 +[2026-03-04 20:38:38] (step=0059992) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.737820387399726, LR: 0.0003 +[2026-03-04 20:38:46] (step=0059993) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.73801604382704, LR: 0.0003 +[2026-03-04 20:38:54] (step=0059994) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 11.738211700254354, LR: 0.0003 +[2026-03-04 20:39:01] (step=0059995) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.738407356681668, LR: 0.0003 +[2026-03-04 20:39:09] (step=0059996) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.73860301310898, LR: 0.0003 +[2026-03-04 20:39:17] (step=0059997) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.738798669536294, LR: 0.0003 +[2026-03-04 20:39:25] (step=0059998) Train Loss: 0.4474, Train Steps/Sec: 0.12, Epoch: 11.738994325963608, LR: 0.0003 +[2026-03-04 20:39:33] (step=0059999) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.739189982390922, LR: 0.0003 +[2026-03-04 20:39:41] (step=0060000) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 11.739385638818236, LR: 0.0003 +[2026-03-04 20:39:41] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0060000/ +[2026-03-04 20:39:49] (step=0060001) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 11.739581295245548, LR: 0.0003 +[2026-03-04 20:39:57] (step=0060002) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 11.739776951672862, LR: 0.0003 +[2026-03-04 20:40:04] (step=0060003) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 11.739972608100176, LR: 0.0003 +[2026-03-04 20:40:12] (step=0060004) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.74016826452749, LR: 0.0003 +[2026-03-04 20:40:20] (step=0060005) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 11.740363920954803, LR: 0.0003 +[2026-03-04 20:40:28] (step=0060006) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.740559577382117, LR: 0.0003 +[2026-03-04 20:40:36] (step=0060007) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.74075523380943, LR: 0.0003 +[2026-03-04 20:40:44] (step=0060008) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.740950890236745, LR: 0.0003 +[2026-03-04 20:40:52] (step=0060009) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 11.741146546664059, LR: 0.0003 +[2026-03-04 20:40:59] (step=0060010) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 11.74134220309137, LR: 0.0003 +[2026-03-04 20:41:07] (step=0060011) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.741537859518685, LR: 0.0003 +[2026-03-04 20:41:15] (step=0060012) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 11.741733515945999, LR: 0.0003 +[2026-03-04 20:41:23] (step=0060013) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.741929172373313, LR: 0.0003 +[2026-03-04 20:41:31] (step=0060014) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 11.742124828800627, LR: 0.0003 +[2026-03-04 20:41:39] (step=0060015) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.742320485227939, LR: 0.0003 +[2026-03-04 20:41:47] (step=0060016) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 11.742516141655253, LR: 0.0003 +[2026-03-04 20:41:54] (step=0060017) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 11.742711798082567, LR: 0.0003 +[2026-03-04 20:42:02] (step=0060018) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.742907454509881, LR: 0.0003 +[2026-03-04 20:42:10] (step=0060019) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.743103110937195, LR: 0.0003 +[2026-03-04 20:42:18] (step=0060020) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.743298767364507, LR: 0.0003 +[2026-03-04 20:42:26] (step=0060021) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 11.743494423791821, LR: 0.0003 +[2026-03-04 20:42:34] (step=0060022) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.743690080219135, LR: 0.0003 +[2026-03-04 20:42:42] (step=0060023) Train Loss: 0.4458, Train Steps/Sec: 0.12, Epoch: 11.74388573664645, LR: 0.0003 +[2026-03-04 20:42:50] (step=0060024) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.744081393073763, LR: 0.0003 +[2026-03-04 20:42:57] (step=0060025) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.744277049501076, LR: 0.0003 +[2026-03-04 20:43:05] (step=0060026) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 11.74447270592839, LR: 0.0003 +[2026-03-04 20:43:13] (step=0060027) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.744668362355704, LR: 0.0003 +[2026-03-04 20:43:21] (step=0060028) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 11.744864018783018, LR: 0.0003 +[2026-03-04 20:43:29] (step=0060029) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.74505967521033, LR: 0.0003 +[2026-03-04 20:43:37] (step=0060030) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.745255331637644, LR: 0.0003 +[2026-03-04 20:43:45] (step=0060031) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.745450988064958, LR: 0.0003 +[2026-03-04 20:43:53] (step=0060032) Train Loss: 0.4241, Train Steps/Sec: 0.13, Epoch: 11.745646644492272, LR: 0.0003 +[2026-03-04 20:44:00] (step=0060033) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.745842300919586, LR: 0.0003 +[2026-03-04 20:44:08] (step=0060034) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.746037957346898, LR: 0.0003 +[2026-03-04 20:44:16] (step=0060035) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.746233613774212, LR: 0.0003 +[2026-03-04 20:44:24] (step=0060036) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.746429270201526, LR: 0.0003 +[2026-03-04 20:44:32] (step=0060037) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.74662492662884, LR: 0.0003 +[2026-03-04 20:44:40] (step=0060038) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 11.746820583056154, LR: 0.0003 +[2026-03-04 20:44:48] (step=0060039) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.747016239483466, LR: 0.0003 +[2026-03-04 20:44:55] (step=0060040) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.74721189591078, LR: 0.0003 +[2026-03-04 20:45:03] (step=0060041) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.747407552338094, LR: 0.0003 +[2026-03-04 20:45:11] (step=0060042) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 11.747603208765408, LR: 0.0003 +[2026-03-04 20:45:19] (step=0060043) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.747798865192722, LR: 0.0003 +[2026-03-04 20:45:27] (step=0060044) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 11.747994521620035, LR: 0.0003 +[2026-03-04 20:45:35] (step=0060045) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.748190178047349, LR: 0.0003 +[2026-03-04 20:45:43] (step=0060046) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.748385834474663, LR: 0.0003 +[2026-03-04 20:45:50] (step=0060047) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 11.748581490901977, LR: 0.0003 +[2026-03-04 20:45:58] (step=0060048) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.74877714732929, LR: 0.0003 +[2026-03-04 20:46:06] (step=0060049) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.748972803756603, LR: 0.0003 +[2026-03-04 20:46:14] (step=0060050) Train Loss: 0.4460, Train Steps/Sec: 0.12, Epoch: 11.749168460183917, LR: 0.0003 +[2026-03-04 20:46:22] (step=0060051) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.74936411661123, LR: 0.0003 +[2026-03-04 20:46:30] (step=0060052) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 11.749559773038545, LR: 0.0003 +[2026-03-04 20:46:38] (step=0060053) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.749755429465859, LR: 0.0003 +[2026-03-04 20:46:46] (step=0060054) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.749951085893171, LR: 0.0003 +[2026-03-04 20:46:54] (step=0060055) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.750146742320485, LR: 0.0003 +[2026-03-04 20:47:01] (step=0060056) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.750342398747799, LR: 0.0003 +[2026-03-04 20:47:09] (step=0060057) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.750538055175113, LR: 0.0003 +[2026-03-04 20:47:17] (step=0060058) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 11.750733711602425, LR: 0.0003 +[2026-03-04 20:47:25] (step=0060059) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.75092936802974, LR: 0.0003 +[2026-03-04 20:47:33] (step=0060060) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.751125024457053, LR: 0.0003 +[2026-03-04 20:47:41] (step=0060061) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 11.751320680884367, LR: 0.0003 +[2026-03-04 20:47:49] (step=0060062) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 11.751516337311681, LR: 0.0003 +[2026-03-04 20:47:56] (step=0060063) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.751711993738994, LR: 0.0003 +[2026-03-04 20:48:04] (step=0060064) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.751907650166308, LR: 0.0003 +[2026-03-04 20:48:12] (step=0060065) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.752103306593622, LR: 0.0003 +[2026-03-04 20:48:20] (step=0060066) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.752298963020936, LR: 0.0003 +[2026-03-04 20:48:28] (step=0060067) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.75249461944825, LR: 0.0003 +[2026-03-04 20:48:36] (step=0060068) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.752690275875562, LR: 0.0003 +[2026-03-04 20:48:44] (step=0060069) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.752885932302876, LR: 0.0003 +[2026-03-04 20:48:52] (step=0060070) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.75308158873019, LR: 0.0003 +[2026-03-04 20:48:59] (step=0060071) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 11.753277245157504, LR: 0.0003 +[2026-03-04 20:49:07] (step=0060072) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.753472901584818, LR: 0.0003 +[2026-03-04 20:49:15] (step=0060073) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.75366855801213, LR: 0.0003 +[2026-03-04 20:49:23] (step=0060074) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.753864214439444, LR: 0.0003 +[2026-03-04 20:49:31] (step=0060075) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 11.754059870866758, LR: 0.0003 +[2026-03-04 20:49:39] (step=0060076) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 11.754255527294072, LR: 0.0003 +[2026-03-04 20:49:47] (step=0060077) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.754451183721386, LR: 0.0003 +[2026-03-04 20:49:55] (step=0060078) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.754646840148698, LR: 0.0003 +[2026-03-04 20:50:02] (step=0060079) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.754842496576012, LR: 0.0003 +[2026-03-04 20:50:10] (step=0060080) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.755038153003326, LR: 0.0003 +[2026-03-04 20:50:18] (step=0060081) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 11.75523380943064, LR: 0.0003 +[2026-03-04 20:50:26] (step=0060082) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 11.755429465857953, LR: 0.0003 +[2026-03-04 20:50:34] (step=0060083) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.755625122285267, LR: 0.0003 +[2026-03-04 20:50:42] (step=0060084) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.75582077871258, LR: 0.0003 +[2026-03-04 20:50:50] (step=0060085) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.756016435139895, LR: 0.0003 +[2026-03-04 20:50:58] (step=0060086) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.756212091567209, LR: 0.0003 +[2026-03-04 20:51:05] (step=0060087) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 11.75640774799452, LR: 0.0003 +[2026-03-04 20:51:13] (step=0060088) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.756603404421835, LR: 0.0003 +[2026-03-04 20:51:21] (step=0060089) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 11.756799060849149, LR: 0.0003 +[2026-03-04 20:51:29] (step=0060090) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.756994717276463, LR: 0.0003 +[2026-03-04 20:51:37] (step=0060091) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.757190373703777, LR: 0.0003 +[2026-03-04 20:51:45] (step=0060092) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.757386030131089, LR: 0.0003 +[2026-03-04 20:51:53] (step=0060093) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.757581686558403, LR: 0.0003 +[2026-03-04 20:52:00] (step=0060094) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.757777342985717, LR: 0.0003 +[2026-03-04 20:52:08] (step=0060095) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.757972999413031, LR: 0.0003 +[2026-03-04 20:52:16] (step=0060096) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.758168655840345, LR: 0.0003 +[2026-03-04 20:52:24] (step=0060097) Train Loss: 0.4310, Train Steps/Sec: 0.12, Epoch: 11.758364312267657, LR: 0.0003 +[2026-03-04 20:52:32] (step=0060098) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 11.758559968694971, LR: 0.0003 +[2026-03-04 20:52:40] (step=0060099) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 11.758755625122285, LR: 0.0003 +[2026-03-04 20:52:48] (step=0060100) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.7589512815496, LR: 0.0003 +[2026-03-04 20:52:56] (step=0060101) Train Loss: 0.4270, Train Steps/Sec: 0.13, Epoch: 11.759146937976913, LR: 0.0003 +[2026-03-04 20:53:04] (step=0060102) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.759342594404226, LR: 0.0003 +[2026-03-04 20:53:11] (step=0060103) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.75953825083154, LR: 0.0003 +[2026-03-04 20:53:19] (step=0060104) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 11.759733907258854, LR: 0.0003 +[2026-03-04 20:53:27] (step=0060105) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 11.759929563686168, LR: 0.0003 +[2026-03-04 20:53:35] (step=0060106) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.760125220113482, LR: 0.0003 +[2026-03-04 20:53:43] (step=0060107) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.760320876540794, LR: 0.0003 +[2026-03-04 20:53:51] (step=0060108) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 11.760516532968108, LR: 0.0003 +[2026-03-04 20:53:59] (step=0060109) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.760712189395422, LR: 0.0003 +[2026-03-04 20:54:07] (step=0060110) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.760907845822736, LR: 0.0003 +[2026-03-04 20:54:14] (step=0060111) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 11.761103502250048, LR: 0.0003 +[2026-03-04 20:54:22] (step=0060112) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.761299158677362, LR: 0.0003 +[2026-03-04 20:54:30] (step=0060113) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.761494815104676, LR: 0.0003 +[2026-03-04 20:54:38] (step=0060114) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.76169047153199, LR: 0.0003 +[2026-03-04 20:54:46] (step=0060115) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.761886127959304, LR: 0.0003 +[2026-03-04 20:54:54] (step=0060116) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.762081784386616, LR: 0.0003 +[2026-03-04 20:55:01] (step=0060117) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.76227744081393, LR: 0.0003 +[2026-03-04 20:55:09] (step=0060118) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.762473097241244, LR: 0.0003 +[2026-03-04 20:55:17] (step=0060119) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.762668753668558, LR: 0.0003 +[2026-03-04 20:55:25] (step=0060120) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.762864410095872, LR: 0.0003 +[2026-03-04 20:55:33] (step=0060121) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.763060066523185, LR: 0.0003 +[2026-03-04 20:55:41] (step=0060122) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.763255722950499, LR: 0.0003 +[2026-03-04 20:55:49] (step=0060123) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.763451379377813, LR: 0.0003 +[2026-03-04 20:55:56] (step=0060124) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.763647035805127, LR: 0.0003 +[2026-03-04 20:56:04] (step=0060125) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.76384269223244, LR: 0.0003 +[2026-03-04 20:56:12] (step=0060126) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.764038348659753, LR: 0.0003 +[2026-03-04 20:56:20] (step=0060127) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 11.764234005087067, LR: 0.0003 +[2026-03-04 20:56:28] (step=0060128) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.76442966151438, LR: 0.0003 +[2026-03-04 20:56:36] (step=0060129) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.764625317941695, LR: 0.0003 +[2026-03-04 20:56:44] (step=0060130) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.764820974369009, LR: 0.0003 +[2026-03-04 20:56:52] (step=0060131) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.765016630796321, LR: 0.0003 +[2026-03-04 20:57:00] (step=0060132) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.765212287223635, LR: 0.0003 +[2026-03-04 20:57:07] (step=0060133) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.765407943650949, LR: 0.0003 +[2026-03-04 20:57:15] (step=0060134) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.765603600078263, LR: 0.0003 +[2026-03-04 20:57:23] (step=0060135) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.765799256505575, LR: 0.0003 +[2026-03-04 20:57:31] (step=0060136) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.76599491293289, LR: 0.0003 +[2026-03-04 20:57:39] (step=0060137) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.766190569360203, LR: 0.0003 +[2026-03-04 20:57:47] (step=0060138) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.766386225787517, LR: 0.0003 +[2026-03-04 20:57:55] (step=0060139) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.766581882214831, LR: 0.0003 +[2026-03-04 20:58:02] (step=0060140) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.766777538642144, LR: 0.0003 +[2026-03-04 20:58:10] (step=0060141) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 11.766973195069458, LR: 0.0003 +[2026-03-04 20:58:18] (step=0060142) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.767168851496772, LR: 0.0003 +[2026-03-04 20:58:26] (step=0060143) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.767364507924086, LR: 0.0003 +[2026-03-04 20:58:34] (step=0060144) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.7675601643514, LR: 0.0003 +[2026-03-04 20:58:42] (step=0060145) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.767755820778712, LR: 0.0003 +[2026-03-04 20:58:50] (step=0060146) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.767951477206026, LR: 0.0003 +[2026-03-04 20:58:58] (step=0060147) Train Loss: 0.4535, Train Steps/Sec: 0.12, Epoch: 11.76814713363334, LR: 0.0003 +[2026-03-04 20:59:05] (step=0060148) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.768342790060654, LR: 0.0003 +[2026-03-04 20:59:13] (step=0060149) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 11.768538446487968, LR: 0.0003 +[2026-03-04 20:59:21] (step=0060150) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.76873410291528, LR: 0.0003 +[2026-03-04 20:59:29] (step=0060151) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.768929759342594, LR: 0.0003 +[2026-03-04 20:59:37] (step=0060152) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.769125415769908, LR: 0.0003 +[2026-03-04 20:59:45] (step=0060153) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 11.769321072197222, LR: 0.0003 +[2026-03-04 20:59:53] (step=0060154) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.769516728624536, LR: 0.0003 +[2026-03-04 21:00:00] (step=0060155) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.769712385051848, LR: 0.0003 +[2026-03-04 21:00:08] (step=0060156) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 11.769908041479162, LR: 0.0003 +[2026-03-04 21:00:16] (step=0060157) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.770103697906476, LR: 0.0003 +[2026-03-04 21:00:24] (step=0060158) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.77029935433379, LR: 0.0003 +[2026-03-04 21:00:32] (step=0060159) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.770495010761104, LR: 0.0003 +[2026-03-04 21:00:40] (step=0060160) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 11.770690667188417, LR: 0.0003 +[2026-03-04 21:00:48] (step=0060161) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.77088632361573, LR: 0.0003 +[2026-03-04 21:00:55] (step=0060162) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 11.771081980043045, LR: 0.0003 +[2026-03-04 21:01:03] (step=0060163) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 11.771277636470359, LR: 0.0003 +[2026-03-04 21:01:11] (step=0060164) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 11.77147329289767, LR: 0.0003 +[2026-03-04 21:01:19] (step=0060165) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.771668949324985, LR: 0.0003 +[2026-03-04 21:01:27] (step=0060166) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 11.771864605752299, LR: 0.0003 +[2026-03-04 21:01:35] (step=0060167) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.772060262179613, LR: 0.0003 +[2026-03-04 21:01:43] (step=0060168) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.772255918606927, LR: 0.0003 +[2026-03-04 21:01:50] (step=0060169) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.77245157503424, LR: 0.0003 +[2026-03-04 21:01:58] (step=0060170) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 11.772647231461553, LR: 0.0003 +[2026-03-04 21:02:06] (step=0060171) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 11.772842887888867, LR: 0.0003 +[2026-03-04 21:02:14] (step=0060172) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.773038544316181, LR: 0.0003 +[2026-03-04 21:02:22] (step=0060173) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.773234200743495, LR: 0.0003 +[2026-03-04 21:02:30] (step=0060174) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.773429857170807, LR: 0.0003 +[2026-03-04 21:02:38] (step=0060175) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 11.773625513598121, LR: 0.0003 +[2026-03-04 21:02:46] (step=0060176) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 11.773821170025435, LR: 0.0003 +[2026-03-04 21:02:53] (step=0060177) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.77401682645275, LR: 0.0003 +[2026-03-04 21:03:01] (step=0060178) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.774212482880063, LR: 0.0003 +[2026-03-04 21:03:09] (step=0060179) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.774408139307376, LR: 0.0003 +[2026-03-04 21:03:17] (step=0060180) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.77460379573469, LR: 0.0003 +[2026-03-04 21:03:25] (step=0060181) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.774799452162004, LR: 0.0003 +[2026-03-04 21:03:33] (step=0060182) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.774995108589318, LR: 0.0003 +[2026-03-04 21:03:41] (step=0060183) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.775190765016632, LR: 0.0003 +[2026-03-04 21:03:49] (step=0060184) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.775386421443944, LR: 0.0003 +[2026-03-04 21:03:56] (step=0060185) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.775582077871258, LR: 0.0003 +[2026-03-04 21:04:04] (step=0060186) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.775777734298572, LR: 0.0003 +[2026-03-04 21:04:12] (step=0060187) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.775973390725886, LR: 0.0003 +[2026-03-04 21:04:20] (step=0060188) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.776169047153198, LR: 0.0003 +[2026-03-04 21:04:28] (step=0060189) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.776364703580512, LR: 0.0003 +[2026-03-04 21:04:36] (step=0060190) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.776560360007826, LR: 0.0003 +[2026-03-04 21:04:44] (step=0060191) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.77675601643514, LR: 0.0003 +[2026-03-04 21:04:51] (step=0060192) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.776951672862454, LR: 0.0003 +[2026-03-04 21:04:59] (step=0060193) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.777147329289766, LR: 0.0003 +[2026-03-04 21:05:07] (step=0060194) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.77734298571708, LR: 0.0003 +[2026-03-04 21:05:15] (step=0060195) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 11.777538642144394, LR: 0.0003 +[2026-03-04 21:05:23] (step=0060196) Train Loss: 0.4509, Train Steps/Sec: 0.12, Epoch: 11.777734298571708, LR: 0.0003 +[2026-03-04 21:05:31] (step=0060197) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.777929954999022, LR: 0.0003 +[2026-03-04 21:05:39] (step=0060198) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.778125611426335, LR: 0.0003 +[2026-03-04 21:05:47] (step=0060199) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.778321267853649, LR: 0.0003 +[2026-03-04 21:05:54] (step=0060200) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.778516924280963, LR: 0.0003 +[2026-03-04 21:06:02] (step=0060201) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.778712580708277, LR: 0.0003 +[2026-03-04 21:06:10] (step=0060202) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.77890823713559, LR: 0.0003 +[2026-03-04 21:06:18] (step=0060203) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.779103893562903, LR: 0.0003 +[2026-03-04 21:06:26] (step=0060204) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.779299549990217, LR: 0.0003 +[2026-03-04 21:06:34] (step=0060205) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 11.779495206417531, LR: 0.0003 +[2026-03-04 21:06:42] (step=0060206) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.779690862844845, LR: 0.0003 +[2026-03-04 21:06:49] (step=0060207) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.779886519272159, LR: 0.0003 +[2026-03-04 21:06:57] (step=0060208) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.780082175699471, LR: 0.0003 +[2026-03-04 21:07:05] (step=0060209) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 11.780277832126785, LR: 0.0003 +[2026-03-04 21:07:13] (step=0060210) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.7804734885541, LR: 0.0003 +[2026-03-04 21:07:21] (step=0060211) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.780669144981413, LR: 0.0003 +[2026-03-04 21:07:29] (step=0060212) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 11.780864801408727, LR: 0.0003 +[2026-03-04 21:07:37] (step=0060213) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 11.78106045783604, LR: 0.0003 +[2026-03-04 21:07:44] (step=0060214) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.781256114263353, LR: 0.0003 +[2026-03-04 21:07:52] (step=0060215) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.781451770690667, LR: 0.0003 +[2026-03-04 21:08:00] (step=0060216) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.781647427117981, LR: 0.0003 +[2026-03-04 21:08:08] (step=0060217) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.781843083545294, LR: 0.0003 +[2026-03-04 21:08:16] (step=0060218) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.782038739972608, LR: 0.0003 +[2026-03-04 21:08:24] (step=0060219) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.782234396399922, LR: 0.0003 +[2026-03-04 21:08:32] (step=0060220) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.782430052827236, LR: 0.0003 +[2026-03-04 21:08:39] (step=0060221) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 11.78262570925455, LR: 0.0003 +[2026-03-04 21:08:47] (step=0060222) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.782821365681862, LR: 0.0003 +[2026-03-04 21:08:55] (step=0060223) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 11.783017022109176, LR: 0.0003 +[2026-03-04 21:09:03] (step=0060224) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 11.78321267853649, LR: 0.0003 +[2026-03-04 21:09:11] (step=0060225) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.783408334963804, LR: 0.0003 +[2026-03-04 21:09:19] (step=0060226) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.783603991391118, LR: 0.0003 +[2026-03-04 21:09:27] (step=0060227) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.78379964781843, LR: 0.0003 +[2026-03-04 21:09:34] (step=0060228) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.783995304245744, LR: 0.0003 +[2026-03-04 21:09:42] (step=0060229) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.784190960673058, LR: 0.0003 +[2026-03-04 21:09:50] (step=0060230) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.784386617100372, LR: 0.0003 +[2026-03-04 21:09:58] (step=0060231) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.784582273527686, LR: 0.0003 +[2026-03-04 21:10:06] (step=0060232) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.784777929954998, LR: 0.0003 +[2026-03-04 21:10:14] (step=0060233) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.784973586382312, LR: 0.0003 +[2026-03-04 21:10:22] (step=0060234) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.785169242809626, LR: 0.0003 +[2026-03-04 21:10:30] (step=0060235) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.78536489923694, LR: 0.0003 +[2026-03-04 21:10:37] (step=0060236) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 11.785560555664254, LR: 0.0003 +[2026-03-04 21:10:45] (step=0060237) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.785756212091567, LR: 0.0003 +[2026-03-04 21:10:53] (step=0060238) Train Loss: 0.4269, Train Steps/Sec: 0.13, Epoch: 11.78595186851888, LR: 0.0003 +[2026-03-04 21:11:01] (step=0060239) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.786147524946195, LR: 0.0003 +[2026-03-04 21:11:09] (step=0060240) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 11.786343181373509, LR: 0.0003 +[2026-03-04 21:11:17] (step=0060241) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.786538837800821, LR: 0.0003 +[2026-03-04 21:11:25] (step=0060242) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.786734494228135, LR: 0.0003 +[2026-03-04 21:11:32] (step=0060243) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.786930150655449, LR: 0.0003 +[2026-03-04 21:11:40] (step=0060244) Train Loss: 0.4257, Train Steps/Sec: 0.13, Epoch: 11.787125807082763, LR: 0.0003 +[2026-03-04 21:11:48] (step=0060245) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.787321463510077, LR: 0.0003 +[2026-03-04 21:11:56] (step=0060246) Train Loss: 0.4320, Train Steps/Sec: 0.12, Epoch: 11.78751711993739, LR: 0.0003 +[2026-03-04 21:12:04] (step=0060247) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.787712776364703, LR: 0.0003 +[2026-03-04 21:12:12] (step=0060248) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.787908432792017, LR: 0.0003 +[2026-03-04 21:12:20] (step=0060249) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.788104089219331, LR: 0.0003 +[2026-03-04 21:12:28] (step=0060250) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.788299745646645, LR: 0.0003 +[2026-03-04 21:12:35] (step=0060251) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.788495402073957, LR: 0.0003 +[2026-03-04 21:12:43] (step=0060252) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 11.788691058501271, LR: 0.0003 +[2026-03-04 21:12:51] (step=0060253) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 11.788886714928585, LR: 0.0003 +[2026-03-04 21:12:59] (step=0060254) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 11.7890823713559, LR: 0.0003 +[2026-03-04 21:13:07] (step=0060255) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.789278027783213, LR: 0.0003 +[2026-03-04 21:13:15] (step=0060256) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.789473684210526, LR: 0.0003 +[2026-03-04 21:13:23] (step=0060257) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.78966934063784, LR: 0.0003 +[2026-03-04 21:13:30] (step=0060258) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 11.789864997065154, LR: 0.0003 +[2026-03-04 21:13:38] (step=0060259) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 11.790060653492468, LR: 0.0003 +[2026-03-04 21:13:46] (step=0060260) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 11.790256309919782, LR: 0.0003 +[2026-03-04 21:13:54] (step=0060261) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.790451966347094, LR: 0.0003 +[2026-03-04 21:14:02] (step=0060262) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.790647622774408, LR: 0.0003 +[2026-03-04 21:14:10] (step=0060263) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.790843279201722, LR: 0.0003 +[2026-03-04 21:14:18] (step=0060264) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 11.791038935629036, LR: 0.0003 +[2026-03-04 21:14:26] (step=0060265) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.79123459205635, LR: 0.0003 +[2026-03-04 21:14:33] (step=0060266) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.791430248483662, LR: 0.0003 +[2026-03-04 21:14:41] (step=0060267) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.791625904910976, LR: 0.0003 +[2026-03-04 21:14:49] (step=0060268) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.79182156133829, LR: 0.0003 +[2026-03-04 21:14:57] (step=0060269) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.792017217765604, LR: 0.0003 +[2026-03-04 21:15:05] (step=0060270) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.792212874192916, LR: 0.0003 +[2026-03-04 21:15:13] (step=0060271) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.79240853062023, LR: 0.0003 +[2026-03-04 21:15:21] (step=0060272) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.792604187047544, LR: 0.0003 +[2026-03-04 21:15:28] (step=0060273) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.792799843474858, LR: 0.0003 +[2026-03-04 21:15:36] (step=0060274) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.792995499902172, LR: 0.0003 +[2026-03-04 21:15:44] (step=0060275) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.793191156329485, LR: 0.0003 +[2026-03-04 21:15:52] (step=0060276) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 11.793386812756799, LR: 0.0003 +[2026-03-04 21:16:00] (step=0060277) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.793582469184113, LR: 0.0003 +[2026-03-04 21:16:08] (step=0060278) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 11.793778125611427, LR: 0.0003 +[2026-03-04 21:16:16] (step=0060279) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.79397378203874, LR: 0.0003 +[2026-03-04 21:16:24] (step=0060280) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 11.794169438466053, LR: 0.0003 +[2026-03-04 21:16:31] (step=0060281) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.794365094893367, LR: 0.0003 +[2026-03-04 21:16:39] (step=0060282) Train Loss: 0.4461, Train Steps/Sec: 0.12, Epoch: 11.794560751320681, LR: 0.0003 +[2026-03-04 21:16:47] (step=0060283) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 11.794756407747995, LR: 0.0003 +[2026-03-04 21:16:55] (step=0060284) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.794952064175309, LR: 0.0003 +[2026-03-04 21:17:03] (step=0060285) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 11.795147720602621, LR: 0.0003 +[2026-03-04 21:17:11] (step=0060286) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 11.795343377029935, LR: 0.0003 +[2026-03-04 21:17:19] (step=0060287) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 11.79553903345725, LR: 0.0003 +[2026-03-04 21:17:27] (step=0060288) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.795734689884563, LR: 0.0003 +[2026-03-04 21:17:34] (step=0060289) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.795930346311877, LR: 0.0003 +[2026-03-04 21:17:42] (step=0060290) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.79612600273919, LR: 0.0003 +[2026-03-04 21:17:50] (step=0060291) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.796321659166503, LR: 0.0003 +[2026-03-04 21:17:58] (step=0060292) Train Loss: 0.4484, Train Steps/Sec: 0.12, Epoch: 11.796517315593817, LR: 0.0003 +[2026-03-04 21:18:06] (step=0060293) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.796712972021131, LR: 0.0003 +[2026-03-04 21:18:14] (step=0060294) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.796908628448444, LR: 0.0003 +[2026-03-04 21:18:22] (step=0060295) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.797104284875758, LR: 0.0003 +[2026-03-04 21:18:30] (step=0060296) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.797299941303072, LR: 0.0003 +[2026-03-04 21:18:37] (step=0060297) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.797495597730386, LR: 0.0003 +[2026-03-04 21:18:45] (step=0060298) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.7976912541577, LR: 0.0003 +[2026-03-04 21:18:53] (step=0060299) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 11.797886910585012, LR: 0.0003 +[2026-03-04 21:19:01] (step=0060300) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.798082567012326, LR: 0.0003 +[2026-03-04 21:19:09] (step=0060301) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.79827822343964, LR: 0.0003 +[2026-03-04 21:19:17] (step=0060302) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.798473879866954, LR: 0.0003 +[2026-03-04 21:19:25] (step=0060303) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.798669536294268, LR: 0.0003 +[2026-03-04 21:19:32] (step=0060304) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.79886519272158, LR: 0.0003 +[2026-03-04 21:19:40] (step=0060305) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.799060849148894, LR: 0.0003 +[2026-03-04 21:19:48] (step=0060306) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.799256505576208, LR: 0.0003 +[2026-03-04 21:19:56] (step=0060307) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.799452162003522, LR: 0.0003 +[2026-03-04 21:20:04] (step=0060308) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.799647818430836, LR: 0.0003 +[2026-03-04 21:20:12] (step=0060309) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.799843474858148, LR: 0.0003 +[2026-03-04 21:20:20] (step=0060310) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.800039131285462, LR: 0.0003 +[2026-03-04 21:20:27] (step=0060311) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.800234787712776, LR: 0.0003 +[2026-03-04 21:20:35] (step=0060312) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.80043044414009, LR: 0.0003 +[2026-03-04 21:20:43] (step=0060313) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.800626100567404, LR: 0.0003 +[2026-03-04 21:20:51] (step=0060314) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 11.800821756994717, LR: 0.0003 +[2026-03-04 21:20:59] (step=0060315) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.80101741342203, LR: 0.0003 +[2026-03-04 21:21:07] (step=0060316) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.801213069849345, LR: 0.0003 +[2026-03-04 21:21:15] (step=0060317) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.801408726276659, LR: 0.0003 +[2026-03-04 21:21:22] (step=0060318) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 11.801604382703971, LR: 0.0003 +[2026-03-04 21:21:30] (step=0060319) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 11.801800039131285, LR: 0.0003 +[2026-03-04 21:21:38] (step=0060320) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.801995695558599, LR: 0.0003 +[2026-03-04 21:21:46] (step=0060321) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.802191351985913, LR: 0.0003 +[2026-03-04 21:21:54] (step=0060322) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 11.802387008413227, LR: 0.0003 +[2026-03-04 21:22:02] (step=0060323) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.80258266484054, LR: 0.0003 +[2026-03-04 21:22:10] (step=0060324) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 11.802778321267853, LR: 0.0003 +[2026-03-04 21:22:17] (step=0060325) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.802973977695167, LR: 0.0003 +[2026-03-04 21:22:25] (step=0060326) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.803169634122481, LR: 0.0003 +[2026-03-04 21:22:33] (step=0060327) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.803365290549795, LR: 0.0003 +[2026-03-04 21:22:41] (step=0060328) Train Loss: 0.4454, Train Steps/Sec: 0.12, Epoch: 11.803560946977107, LR: 0.0003 +[2026-03-04 21:22:49] (step=0060329) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.803756603404421, LR: 0.0003 +[2026-03-04 21:22:57] (step=0060330) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 11.803952259831735, LR: 0.0003 +[2026-03-04 21:23:05] (step=0060331) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.80414791625905, LR: 0.0003 +[2026-03-04 21:23:13] (step=0060332) Train Loss: 0.4233, Train Steps/Sec: 0.13, Epoch: 11.804343572686363, LR: 0.0003 +[2026-03-04 21:23:20] (step=0060333) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.804539229113676, LR: 0.0003 +[2026-03-04 21:23:28] (step=0060334) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 11.80473488554099, LR: 0.0003 +[2026-03-04 21:23:36] (step=0060335) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.804930541968304, LR: 0.0003 +[2026-03-04 21:23:44] (step=0060336) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.805126198395618, LR: 0.0003 +[2026-03-04 21:23:52] (step=0060337) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.805321854822932, LR: 0.0003 +[2026-03-04 21:24:00] (step=0060338) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.805517511250244, LR: 0.0003 +[2026-03-04 21:24:08] (step=0060339) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.805713167677558, LR: 0.0003 +[2026-03-04 21:24:15] (step=0060340) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.805908824104872, LR: 0.0003 +[2026-03-04 21:24:23] (step=0060341) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.806104480532186, LR: 0.0003 +[2026-03-04 21:24:31] (step=0060342) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 11.8063001369595, LR: 0.0003 +[2026-03-04 21:24:39] (step=0060343) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.806495793386812, LR: 0.0003 +[2026-03-04 21:24:47] (step=0060344) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.806691449814126, LR: 0.0003 +[2026-03-04 21:24:55] (step=0060345) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.80688710624144, LR: 0.0003 +[2026-03-04 21:25:03] (step=0060346) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 11.807082762668754, LR: 0.0003 +[2026-03-04 21:25:11] (step=0060347) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.807278419096066, LR: 0.0003 +[2026-03-04 21:25:18] (step=0060348) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.80747407552338, LR: 0.0003 +[2026-03-04 21:25:26] (step=0060349) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.807669731950694, LR: 0.0003 +[2026-03-04 21:25:34] (step=0060350) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.807865388378008, LR: 0.0003 +[2026-03-04 21:25:42] (step=0060351) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.808061044805322, LR: 0.0003 +[2026-03-04 21:25:50] (step=0060352) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.808256701232635, LR: 0.0003 +[2026-03-04 21:25:58] (step=0060353) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.808452357659949, LR: 0.0003 +[2026-03-04 21:26:06] (step=0060354) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.808648014087263, LR: 0.0003 +[2026-03-04 21:26:14] (step=0060355) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.808843670514577, LR: 0.0003 +[2026-03-04 21:26:21] (step=0060356) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.80903932694189, LR: 0.0003 +[2026-03-04 21:26:29] (step=0060357) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.809234983369203, LR: 0.0003 +[2026-03-04 21:26:37] (step=0060358) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.809430639796517, LR: 0.0003 +[2026-03-04 21:26:45] (step=0060359) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.809626296223831, LR: 0.0003 +[2026-03-04 21:26:53] (step=0060360) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.809821952651145, LR: 0.0003 +[2026-03-04 21:27:01] (step=0060361) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.810017609078459, LR: 0.0003 +[2026-03-04 21:27:09] (step=0060362) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.810213265505771, LR: 0.0003 +[2026-03-04 21:27:16] (step=0060363) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.810408921933085, LR: 0.0003 +[2026-03-04 21:27:24] (step=0060364) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.8106045783604, LR: 0.0003 +[2026-03-04 21:27:32] (step=0060365) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 11.810800234787713, LR: 0.0003 +[2026-03-04 21:27:40] (step=0060366) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.810995891215027, LR: 0.0003 +[2026-03-04 21:27:48] (step=0060367) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.81119154764234, LR: 0.0003 +[2026-03-04 21:27:56] (step=0060368) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.811387204069653, LR: 0.0003 +[2026-03-04 21:28:04] (step=0060369) Train Loss: 0.4261, Train Steps/Sec: 0.13, Epoch: 11.811582860496967, LR: 0.0003 +[2026-03-04 21:28:11] (step=0060370) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.811778516924281, LR: 0.0003 +[2026-03-04 21:28:19] (step=0060371) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.811974173351594, LR: 0.0003 +[2026-03-04 21:28:27] (step=0060372) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.812169829778908, LR: 0.0003 +[2026-03-04 21:28:35] (step=0060373) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.812365486206222, LR: 0.0003 +[2026-03-04 21:28:43] (step=0060374) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.812561142633536, LR: 0.0003 +[2026-03-04 21:28:51] (step=0060375) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.81275679906085, LR: 0.0003 +[2026-03-04 21:28:59] (step=0060376) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.812952455488162, LR: 0.0003 +[2026-03-04 21:29:06] (step=0060377) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.813148111915476, LR: 0.0003 +[2026-03-04 21:29:14] (step=0060378) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.81334376834279, LR: 0.0003 +[2026-03-04 21:29:22] (step=0060379) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.813539424770104, LR: 0.0003 +[2026-03-04 21:29:30] (step=0060380) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.813735081197418, LR: 0.0003 +[2026-03-04 21:29:38] (step=0060381) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.81393073762473, LR: 0.0003 +[2026-03-04 21:29:46] (step=0060382) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.814126394052044, LR: 0.0003 +[2026-03-04 21:29:54] (step=0060383) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 11.814322050479358, LR: 0.0003 +[2026-03-04 21:30:02] (step=0060384) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 11.814517706906672, LR: 0.0003 +[2026-03-04 21:30:09] (step=0060385) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.814713363333986, LR: 0.0003 +[2026-03-04 21:30:17] (step=0060386) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 11.814909019761298, LR: 0.0003 +[2026-03-04 21:30:25] (step=0060387) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.815104676188612, LR: 0.0003 +[2026-03-04 21:30:33] (step=0060388) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 11.815300332615926, LR: 0.0003 +[2026-03-04 21:30:41] (step=0060389) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.81549598904324, LR: 0.0003 +[2026-03-04 21:30:49] (step=0060390) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.815691645470555, LR: 0.0003 +[2026-03-04 21:30:57] (step=0060391) Train Loss: 0.4416, Train Steps/Sec: 0.12, Epoch: 11.815887301897867, LR: 0.0003 +[2026-03-04 21:31:05] (step=0060392) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 11.81608295832518, LR: 0.0003 +[2026-03-04 21:31:13] (step=0060393) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.816278614752495, LR: 0.0003 +[2026-03-04 21:31:20] (step=0060394) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.816474271179809, LR: 0.0003 +[2026-03-04 21:31:28] (step=0060395) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 11.816669927607123, LR: 0.0003 +[2026-03-04 21:31:36] (step=0060396) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.816865584034435, LR: 0.0003 +[2026-03-04 21:31:44] (step=0060397) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 11.817061240461749, LR: 0.0003 +[2026-03-04 21:31:52] (step=0060398) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.817256896889063, LR: 0.0003 +[2026-03-04 21:32:00] (step=0060399) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.817452553316377, LR: 0.0003 +[2026-03-04 21:32:08] (step=0060400) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.81764820974369, LR: 0.0003 +[2026-03-04 21:32:15] (step=0060401) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.817843866171003, LR: 0.0003 +[2026-03-04 21:32:23] (step=0060402) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.818039522598317, LR: 0.0003 +[2026-03-04 21:32:31] (step=0060403) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 11.818235179025631, LR: 0.0003 +[2026-03-04 21:32:39] (step=0060404) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.818430835452945, LR: 0.0003 +[2026-03-04 21:32:47] (step=0060405) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 11.818626491880257, LR: 0.0003 +[2026-03-04 21:32:55] (step=0060406) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.818822148307571, LR: 0.0003 +[2026-03-04 21:33:03] (step=0060407) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.819017804734886, LR: 0.0003 +[2026-03-04 21:33:11] (step=0060408) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.8192134611622, LR: 0.0003 +[2026-03-04 21:33:18] (step=0060409) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.819409117589514, LR: 0.0003 +[2026-03-04 21:33:26] (step=0060410) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 11.819604774016826, LR: 0.0003 +[2026-03-04 21:33:34] (step=0060411) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.81980043044414, LR: 0.0003 +[2026-03-04 21:33:42] (step=0060412) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 11.819996086871454, LR: 0.0003 +[2026-03-04 21:33:50] (step=0060413) Train Loss: 0.4265, Train Steps/Sec: 0.13, Epoch: 11.820191743298768, LR: 0.0003 +[2026-03-04 21:33:58] (step=0060414) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 11.820387399726082, LR: 0.0003 +[2026-03-04 21:34:06] (step=0060415) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.820583056153394, LR: 0.0003 +[2026-03-04 21:34:13] (step=0060416) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.820778712580708, LR: 0.0003 +[2026-03-04 21:34:21] (step=0060417) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.820974369008022, LR: 0.0003 +[2026-03-04 21:34:29] (step=0060418) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.821170025435336, LR: 0.0003 +[2026-03-04 21:34:37] (step=0060419) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.82136568186265, LR: 0.0003 +[2026-03-04 21:34:45] (step=0060420) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.821561338289962, LR: 0.0003 +[2026-03-04 21:34:53] (step=0060421) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.821756994717276, LR: 0.0003 +[2026-03-04 21:35:01] (step=0060422) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.82195265114459, LR: 0.0003 +[2026-03-04 21:35:08] (step=0060423) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 11.822148307571904, LR: 0.0003 +[2026-03-04 21:35:16] (step=0060424) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.822343963999216, LR: 0.0003 +[2026-03-04 21:35:24] (step=0060425) Train Loss: 0.4276, Train Steps/Sec: 0.13, Epoch: 11.82253962042653, LR: 0.0003 +[2026-03-04 21:35:32] (step=0060426) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 11.822735276853845, LR: 0.0003 +[2026-03-04 21:35:40] (step=0060427) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.822930933281159, LR: 0.0003 +[2026-03-04 21:35:48] (step=0060428) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.823126589708473, LR: 0.0003 +[2026-03-04 21:35:56] (step=0060429) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.823322246135785, LR: 0.0003 +[2026-03-04 21:36:04] (step=0060430) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.823517902563099, LR: 0.0003 +[2026-03-04 21:36:11] (step=0060431) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.823713558990413, LR: 0.0003 +[2026-03-04 21:36:19] (step=0060432) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.823909215417727, LR: 0.0003 +[2026-03-04 21:36:27] (step=0060433) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.82410487184504, LR: 0.0003 +[2026-03-04 21:36:35] (step=0060434) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.824300528272353, LR: 0.0003 +[2026-03-04 21:36:43] (step=0060435) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.824496184699667, LR: 0.0003 +[2026-03-04 21:36:51] (step=0060436) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.824691841126981, LR: 0.0003 +[2026-03-04 21:36:58] (step=0060437) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 11.824887497554295, LR: 0.0003 +[2026-03-04 21:37:06] (step=0060438) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.825083153981609, LR: 0.0003 +[2026-03-04 21:37:14] (step=0060439) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.825278810408921, LR: 0.0003 +[2026-03-04 21:37:22] (step=0060440) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 11.825474466836235, LR: 0.0003 +[2026-03-04 21:37:30] (step=0060441) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.82567012326355, LR: 0.0003 +[2026-03-04 21:37:38] (step=0060442) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.825865779690863, LR: 0.0003 +[2026-03-04 21:37:46] (step=0060443) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.826061436118177, LR: 0.0003 +[2026-03-04 21:37:54] (step=0060444) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.82625709254549, LR: 0.0003 +[2026-03-04 21:38:01] (step=0060445) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.826452748972804, LR: 0.0003 +[2026-03-04 21:38:09] (step=0060446) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.826648405400118, LR: 0.0003 +[2026-03-04 21:38:17] (step=0060447) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 11.826844061827432, LR: 0.0003 +[2026-03-04 21:38:25] (step=0060448) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 11.827039718254746, LR: 0.0003 +[2026-03-04 21:38:33] (step=0060449) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.827235374682058, LR: 0.0003 +[2026-03-04 21:38:41] (step=0060450) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.827431031109372, LR: 0.0003 +[2026-03-04 21:38:49] (step=0060451) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.827626687536686, LR: 0.0003 +[2026-03-04 21:38:57] (step=0060452) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.827822343964, LR: 0.0003 +[2026-03-04 21:39:04] (step=0060453) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 11.828018000391312, LR: 0.0003 +[2026-03-04 21:39:12] (step=0060454) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.828213656818626, LR: 0.0003 +[2026-03-04 21:39:20] (step=0060455) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.82840931324594, LR: 0.0003 +[2026-03-04 21:39:28] (step=0060456) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.828604969673254, LR: 0.0003 +[2026-03-04 21:39:36] (step=0060457) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 11.828800626100568, LR: 0.0003 +[2026-03-04 21:39:44] (step=0060458) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 11.82899628252788, LR: 0.0003 +[2026-03-04 21:39:52] (step=0060459) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.829191938955194, LR: 0.0003 +[2026-03-04 21:39:59] (step=0060460) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.829387595382508, LR: 0.0003 +[2026-03-04 21:40:07] (step=0060461) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.829583251809822, LR: 0.0003 +[2026-03-04 21:40:15] (step=0060462) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.829778908237136, LR: 0.0003 +[2026-03-04 21:40:23] (step=0060463) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.829974564664449, LR: 0.0003 +[2026-03-04 21:40:31] (step=0060464) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.830170221091763, LR: 0.0003 +[2026-03-04 21:40:39] (step=0060465) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.830365877519077, LR: 0.0003 +[2026-03-04 21:40:47] (step=0060466) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.83056153394639, LR: 0.0003 +[2026-03-04 21:40:54] (step=0060467) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.830757190373705, LR: 0.0003 +[2026-03-04 21:41:02] (step=0060468) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.830952846801017, LR: 0.0003 +[2026-03-04 21:41:10] (step=0060469) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 11.83114850322833, LR: 0.0003 +[2026-03-04 21:41:18] (step=0060470) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.831344159655645, LR: 0.0003 +[2026-03-04 21:41:26] (step=0060471) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.831539816082959, LR: 0.0003 +[2026-03-04 21:41:34] (step=0060472) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 11.831735472510273, LR: 0.0003 +[2026-03-04 21:41:42] (step=0060473) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.831931128937585, LR: 0.0003 +[2026-03-04 21:41:49] (step=0060474) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.832126785364899, LR: 0.0003 +[2026-03-04 21:41:57] (step=0060475) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.832322441792213, LR: 0.0003 +[2026-03-04 21:42:05] (step=0060476) Train Loss: 0.4334, Train Steps/Sec: 0.12, Epoch: 11.832518098219527, LR: 0.0003 +[2026-03-04 21:42:13] (step=0060477) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.83271375464684, LR: 0.0003 +[2026-03-04 21:42:21] (step=0060478) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 11.832909411074153, LR: 0.0003 +[2026-03-04 21:42:29] (step=0060479) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.833105067501467, LR: 0.0003 +[2026-03-04 21:42:37] (step=0060480) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.833300723928781, LR: 0.0003 +[2026-03-04 21:42:45] (step=0060481) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.833496380356095, LR: 0.0003 +[2026-03-04 21:42:53] (step=0060482) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.833692036783408, LR: 0.0003 +[2026-03-04 21:43:00] (step=0060483) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.833887693210722, LR: 0.0003 +[2026-03-04 21:43:08] (step=0060484) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.834083349638036, LR: 0.0003 +[2026-03-04 21:43:16] (step=0060485) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 11.83427900606535, LR: 0.0003 +[2026-03-04 21:43:24] (step=0060486) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.834474662492664, LR: 0.0003 +[2026-03-04 21:43:32] (step=0060487) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.834670318919976, LR: 0.0003 +[2026-03-04 21:43:40] (step=0060488) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.83486597534729, LR: 0.0003 +[2026-03-04 21:43:48] (step=0060489) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.835061631774604, LR: 0.0003 +[2026-03-04 21:43:56] (step=0060490) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.835257288201918, LR: 0.0003 +[2026-03-04 21:44:03] (step=0060491) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.835452944629232, LR: 0.0003 +[2026-03-04 21:44:11] (step=0060492) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 11.835648601056544, LR: 0.0003 +[2026-03-04 21:44:19] (step=0060493) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.835844257483858, LR: 0.0003 +[2026-03-04 21:44:27] (step=0060494) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.836039913911172, LR: 0.0003 +[2026-03-04 21:44:35] (step=0060495) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.836235570338486, LR: 0.0003 +[2026-03-04 21:44:43] (step=0060496) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 11.8364312267658, LR: 0.0003 +[2026-03-04 21:44:51] (step=0060497) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.836626883193112, LR: 0.0003 +[2026-03-04 21:44:58] (step=0060498) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.836822539620426, LR: 0.0003 +[2026-03-04 21:45:06] (step=0060499) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.83701819604774, LR: 0.0003 +[2026-03-04 21:45:14] (step=0060500) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.837213852475054, LR: 0.0003 +[2026-03-04 21:45:14] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0060500/ +[2026-03-04 21:45:22] (step=0060501) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.837409508902368, LR: 0.0003 +[2026-03-04 21:45:30] (step=0060502) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.83760516532968, LR: 0.0003 +[2026-03-04 21:45:38] (step=0060503) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.837800821756995, LR: 0.0003 +[2026-03-04 21:45:46] (step=0060504) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 11.837996478184309, LR: 0.0003 +[2026-03-04 21:45:54] (step=0060505) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.838192134611623, LR: 0.0003 +[2026-03-04 21:46:01] (step=0060506) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 11.838387791038935, LR: 0.0003 +[2026-03-04 21:46:09] (step=0060507) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 11.838583447466249, LR: 0.0003 +[2026-03-04 21:46:17] (step=0060508) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.838779103893563, LR: 0.0003 +[2026-03-04 21:46:25] (step=0060509) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 11.838974760320877, LR: 0.0003 +[2026-03-04 21:46:33] (step=0060510) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.83917041674819, LR: 0.0003 +[2026-03-04 21:46:41] (step=0060511) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.839366073175503, LR: 0.0003 +[2026-03-04 21:46:49] (step=0060512) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 11.839561729602817, LR: 0.0003 +[2026-03-04 21:46:56] (step=0060513) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.839757386030131, LR: 0.0003 +[2026-03-04 21:47:04] (step=0060514) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 11.839953042457445, LR: 0.0003 +[2026-03-04 21:47:12] (step=0060515) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 11.840148698884759, LR: 0.0003 +[2026-03-04 21:47:20] (step=0060516) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 11.840344355312071, LR: 0.0003 +[2026-03-04 21:47:28] (step=0060517) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.840540011739385, LR: 0.0003 +[2026-03-04 21:47:36] (step=0060518) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.8407356681667, LR: 0.0003 +[2026-03-04 21:47:44] (step=0060519) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.840931324594013, LR: 0.0003 +[2026-03-04 21:47:51] (step=0060520) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.841126981021327, LR: 0.0003 +[2026-03-04 21:47:59] (step=0060521) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.84132263744864, LR: 0.0003 +[2026-03-04 21:48:07] (step=0060522) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.841518293875954, LR: 0.0003 +[2026-03-04 21:48:15] (step=0060523) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 11.841713950303268, LR: 0.0003 +[2026-03-04 21:48:23] (step=0060524) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.841909606730582, LR: 0.0003 +[2026-03-04 21:48:31] (step=0060525) Train Loss: 0.4229, Train Steps/Sec: 0.13, Epoch: 11.842105263157896, LR: 0.0003 +[2026-03-04 21:48:39] (step=0060526) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.842300919585208, LR: 0.0003 +[2026-03-04 21:48:47] (step=0060527) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.842496576012522, LR: 0.0003 +[2026-03-04 21:48:55] (step=0060528) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.842692232439836, LR: 0.0003 +[2026-03-04 21:49:02] (step=0060529) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.84288788886715, LR: 0.0003 +[2026-03-04 21:49:10] (step=0060530) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.843083545294462, LR: 0.0003 +[2026-03-04 21:49:18] (step=0060531) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.843279201721776, LR: 0.0003 +[2026-03-04 21:49:26] (step=0060532) Train Loss: 0.4400, Train Steps/Sec: 0.12, Epoch: 11.84347485814909, LR: 0.0003 +[2026-03-04 21:49:34] (step=0060533) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.843670514576404, LR: 0.0003 +[2026-03-04 21:49:42] (step=0060534) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.843866171003718, LR: 0.0003 +[2026-03-04 21:49:50] (step=0060535) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.84406182743103, LR: 0.0003 +[2026-03-04 21:49:58] (step=0060536) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.844257483858344, LR: 0.0003 +[2026-03-04 21:50:05] (step=0060537) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.844453140285658, LR: 0.0003 +[2026-03-04 21:50:13] (step=0060538) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.844648796712972, LR: 0.0003 +[2026-03-04 21:50:21] (step=0060539) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 11.844844453140286, LR: 0.0003 +[2026-03-04 21:50:29] (step=0060540) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.845040109567599, LR: 0.0003 +[2026-03-04 21:50:37] (step=0060541) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.845235765994913, LR: 0.0003 +[2026-03-04 21:50:45] (step=0060542) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.845431422422227, LR: 0.0003 +[2026-03-04 21:50:53] (step=0060543) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 11.84562707884954, LR: 0.0003 +[2026-03-04 21:51:00] (step=0060544) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.845822735276855, LR: 0.0003 +[2026-03-04 21:51:08] (step=0060545) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.846018391704167, LR: 0.0003 +[2026-03-04 21:51:16] (step=0060546) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 11.84621404813148, LR: 0.0003 +[2026-03-04 21:51:24] (step=0060547) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.846409704558795, LR: 0.0003 +[2026-03-04 21:51:32] (step=0060548) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 11.846605360986109, LR: 0.0003 +[2026-03-04 21:51:40] (step=0060549) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.846801017413423, LR: 0.0003 +[2026-03-04 21:51:48] (step=0060550) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.846996673840735, LR: 0.0003 +[2026-03-04 21:51:55] (step=0060551) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.847192330268049, LR: 0.0003 +[2026-03-04 21:52:03] (step=0060552) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.847387986695363, LR: 0.0003 +[2026-03-04 21:52:11] (step=0060553) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 11.847583643122677, LR: 0.0003 +[2026-03-04 21:52:19] (step=0060554) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.847779299549991, LR: 0.0003 +[2026-03-04 21:52:27] (step=0060555) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.847974955977303, LR: 0.0003 +[2026-03-04 21:52:35] (step=0060556) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 11.848170612404617, LR: 0.0003 +[2026-03-04 21:52:43] (step=0060557) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.848366268831931, LR: 0.0003 +[2026-03-04 21:52:50] (step=0060558) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 11.848561925259245, LR: 0.0003 +[2026-03-04 21:52:58] (step=0060559) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.848757581686558, LR: 0.0003 +[2026-03-04 21:53:06] (step=0060560) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.848953238113872, LR: 0.0003 +[2026-03-04 21:53:14] (step=0060561) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.849148894541186, LR: 0.0003 +[2026-03-04 21:53:22] (step=0060562) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.8493445509685, LR: 0.0003 +[2026-03-04 21:53:30] (step=0060563) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 11.849540207395814, LR: 0.0003 +[2026-03-04 21:53:38] (step=0060564) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.849735863823126, LR: 0.0003 +[2026-03-04 21:53:45] (step=0060565) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.84993152025044, LR: 0.0003 +[2026-03-04 21:53:53] (step=0060566) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 11.850127176677754, LR: 0.0003 +[2026-03-04 21:54:01] (step=0060567) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.850322833105068, LR: 0.0003 +[2026-03-04 21:54:09] (step=0060568) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.850518489532382, LR: 0.0003 +[2026-03-04 21:54:17] (step=0060569) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.850714145959694, LR: 0.0003 +[2026-03-04 21:54:25] (step=0060570) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 11.850909802387008, LR: 0.0003 +[2026-03-04 21:54:33] (step=0060571) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.851105458814322, LR: 0.0003 +[2026-03-04 21:54:40] (step=0060572) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.851301115241636, LR: 0.0003 +[2026-03-04 21:54:48] (step=0060573) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.85149677166895, LR: 0.0003 +[2026-03-04 21:54:56] (step=0060574) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 11.851692428096262, LR: 0.0003 +[2026-03-04 21:55:04] (step=0060575) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.851888084523576, LR: 0.0003 +[2026-03-04 21:55:12] (step=0060576) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.85208374095089, LR: 0.0003 +[2026-03-04 21:55:20] (step=0060577) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 11.852279397378204, LR: 0.0003 +[2026-03-04 21:55:28] (step=0060578) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.852475053805518, LR: 0.0003 +[2026-03-04 21:55:36] (step=0060579) Train Loss: 0.4549, Train Steps/Sec: 0.12, Epoch: 11.85267071023283, LR: 0.0003 +[2026-03-04 21:55:44] (step=0060580) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 11.852866366660145, LR: 0.0003 +[2026-03-04 21:55:52] (step=0060581) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.853062023087459, LR: 0.0003 +[2026-03-04 21:55:59] (step=0060582) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 11.853257679514773, LR: 0.0003 +[2026-03-04 21:56:07] (step=0060583) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.853453335942085, LR: 0.0003 +[2026-03-04 21:56:15] (step=0060584) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.853648992369399, LR: 0.0003 +[2026-03-04 21:56:23] (step=0060585) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 11.853844648796713, LR: 0.0003 +[2026-03-04 21:56:31] (step=0060586) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 11.854040305224027, LR: 0.0003 +[2026-03-04 21:56:39] (step=0060587) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.85423596165134, LR: 0.0003 +[2026-03-04 21:56:47] (step=0060588) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.854431618078653, LR: 0.0003 +[2026-03-04 21:56:54] (step=0060589) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.854627274505967, LR: 0.0003 +[2026-03-04 21:57:02] (step=0060590) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.854822930933281, LR: 0.0003 +[2026-03-04 21:57:10] (step=0060591) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 11.855018587360595, LR: 0.0003 +[2026-03-04 21:57:18] (step=0060592) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.85521424378791, LR: 0.0003 +[2026-03-04 21:57:26] (step=0060593) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.855409900215221, LR: 0.0003 +[2026-03-04 21:57:34] (step=0060594) Train Loss: 0.4262, Train Steps/Sec: 0.13, Epoch: 11.855605556642535, LR: 0.0003 +[2026-03-04 21:57:42] (step=0060595) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.85580121306985, LR: 0.0003 +[2026-03-04 21:57:49] (step=0060596) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.855996869497163, LR: 0.0003 +[2026-03-04 21:57:57] (step=0060597) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.856192525924477, LR: 0.0003 +[2026-03-04 21:58:05] (step=0060598) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.85638818235179, LR: 0.0003 +[2026-03-04 21:58:13] (step=0060599) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.856583838779104, LR: 0.0003 +[2026-03-04 21:58:21] (step=0060600) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.856779495206418, LR: 0.0003 +[2026-03-04 21:58:29] (step=0060601) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.856975151633732, LR: 0.0003 +[2026-03-04 21:58:37] (step=0060602) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.857170808061046, LR: 0.0003 +[2026-03-04 21:58:44] (step=0060603) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.857366464488358, LR: 0.0003 +[2026-03-04 21:58:52] (step=0060604) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 11.857562120915672, LR: 0.0003 +[2026-03-04 21:59:00] (step=0060605) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.857757777342986, LR: 0.0003 +[2026-03-04 21:59:08] (step=0060606) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.8579534337703, LR: 0.0003 +[2026-03-04 21:59:16] (step=0060607) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 11.858149090197614, LR: 0.0003 +[2026-03-04 21:59:24] (step=0060608) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 11.858344746624926, LR: 0.0003 +[2026-03-04 21:59:32] (step=0060609) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.85854040305224, LR: 0.0003 +[2026-03-04 21:59:39] (step=0060610) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 11.858736059479554, LR: 0.0003 +[2026-03-04 21:59:47] (step=0060611) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.858931715906868, LR: 0.0003 +[2026-03-04 21:59:55] (step=0060612) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.85912737233418, LR: 0.0003 +[2026-03-04 22:00:03] (step=0060613) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.859323028761494, LR: 0.0003 +[2026-03-04 22:00:11] (step=0060614) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.859518685188808, LR: 0.0003 +[2026-03-04 22:00:19] (step=0060615) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 11.859714341616122, LR: 0.0003 +[2026-03-04 22:00:27] (step=0060616) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.859909998043436, LR: 0.0003 +[2026-03-04 22:00:34] (step=0060617) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.860105654470749, LR: 0.0003 +[2026-03-04 22:00:42] (step=0060618) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.860301310898063, LR: 0.0003 +[2026-03-04 22:00:50] (step=0060619) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.860496967325377, LR: 0.0003 +[2026-03-04 22:00:58] (step=0060620) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.86069262375269, LR: 0.0003 +[2026-03-04 22:01:06] (step=0060621) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.860888280180005, LR: 0.0003 +[2026-03-04 22:01:14] (step=0060622) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.861083936607317, LR: 0.0003 +[2026-03-04 22:01:22] (step=0060623) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.86127959303463, LR: 0.0003 +[2026-03-04 22:01:29] (step=0060624) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.861475249461945, LR: 0.0003 +[2026-03-04 22:01:37] (step=0060625) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 11.861670905889259, LR: 0.0003 +[2026-03-04 22:01:45] (step=0060626) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.861866562316573, LR: 0.0003 +[2026-03-04 22:01:53] (step=0060627) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.862062218743885, LR: 0.0003 +[2026-03-04 22:02:01] (step=0060628) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.8622578751712, LR: 0.0003 +[2026-03-04 22:02:09] (step=0060629) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.862453531598513, LR: 0.0003 +[2026-03-04 22:02:17] (step=0060630) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.862649188025827, LR: 0.0003 +[2026-03-04 22:02:24] (step=0060631) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.862844844453141, LR: 0.0003 +[2026-03-04 22:02:32] (step=0060632) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.863040500880453, LR: 0.0003 +[2026-03-04 22:02:40] (step=0060633) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 11.863236157307767, LR: 0.0003 +[2026-03-04 22:02:48] (step=0060634) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 11.863431813735081, LR: 0.0003 +[2026-03-04 22:02:56] (step=0060635) Train Loss: 0.4462, Train Steps/Sec: 0.12, Epoch: 11.863627470162395, LR: 0.0003 +[2026-03-04 22:03:04] (step=0060636) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.863823126589708, LR: 0.0003 +[2026-03-04 22:03:12] (step=0060637) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.864018783017022, LR: 0.0003 +[2026-03-04 22:03:20] (step=0060638) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.864214439444336, LR: 0.0003 +[2026-03-04 22:03:28] (step=0060639) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 11.86441009587165, LR: 0.0003 +[2026-03-04 22:03:35] (step=0060640) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 11.864605752298964, LR: 0.0003 +[2026-03-04 22:03:43] (step=0060641) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.864801408726276, LR: 0.0003 +[2026-03-04 22:03:51] (step=0060642) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.86499706515359, LR: 0.0003 +[2026-03-04 22:03:59] (step=0060643) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.865192721580904, LR: 0.0003 +[2026-03-04 22:04:07] (step=0060644) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.865388378008218, LR: 0.0003 +[2026-03-04 22:04:15] (step=0060645) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 11.865584034435532, LR: 0.0003 +[2026-03-04 22:04:23] (step=0060646) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 11.865779690862844, LR: 0.0003 +[2026-03-04 22:04:30] (step=0060647) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.865975347290158, LR: 0.0003 +[2026-03-04 22:04:38] (step=0060648) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.866171003717472, LR: 0.0003 +[2026-03-04 22:04:46] (step=0060649) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.866366660144786, LR: 0.0003 +[2026-03-04 22:04:54] (step=0060650) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.8665623165721, LR: 0.0003 +[2026-03-04 22:05:02] (step=0060651) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.866757972999412, LR: 0.0003 +[2026-03-04 22:05:10] (step=0060652) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.866953629426726, LR: 0.0003 +[2026-03-04 22:05:18] (step=0060653) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.86714928585404, LR: 0.0003 +[2026-03-04 22:05:25] (step=0060654) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 11.867344942281354, LR: 0.0003 +[2026-03-04 22:05:33] (step=0060655) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.867540598708668, LR: 0.0003 +[2026-03-04 22:05:41] (step=0060656) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.86773625513598, LR: 0.0003 +[2026-03-04 22:05:49] (step=0060657) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.867931911563295, LR: 0.0003 +[2026-03-04 22:05:57] (step=0060658) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.868127567990609, LR: 0.0003 +[2026-03-04 22:06:05] (step=0060659) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.868323224417923, LR: 0.0003 +[2026-03-04 22:06:13] (step=0060660) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.868518880845237, LR: 0.0003 +[2026-03-04 22:06:20] (step=0060661) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.868714537272549, LR: 0.0003 +[2026-03-04 22:06:28] (step=0060662) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.868910193699863, LR: 0.0003 +[2026-03-04 22:06:36] (step=0060663) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.869105850127177, LR: 0.0003 +[2026-03-04 22:06:44] (step=0060664) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.869301506554491, LR: 0.0003 +[2026-03-04 22:06:52] (step=0060665) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.869497162981803, LR: 0.0003 +[2026-03-04 22:07:00] (step=0060666) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 11.869692819409117, LR: 0.0003 +[2026-03-04 22:07:08] (step=0060667) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.869888475836431, LR: 0.0003 +[2026-03-04 22:07:15] (step=0060668) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.870084132263745, LR: 0.0003 +[2026-03-04 22:07:23] (step=0060669) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 11.87027978869106, LR: 0.0003 +[2026-03-04 22:07:31] (step=0060670) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.870475445118371, LR: 0.0003 +[2026-03-04 22:07:39] (step=0060671) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.870671101545685, LR: 0.0003 +[2026-03-04 22:07:47] (step=0060672) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 11.870866757973, LR: 0.0003 +[2026-03-04 22:07:55] (step=0060673) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.871062414400313, LR: 0.0003 +[2026-03-04 22:08:03] (step=0060674) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.871258070827627, LR: 0.0003 +[2026-03-04 22:08:10] (step=0060675) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.87145372725494, LR: 0.0003 +[2026-03-04 22:08:18] (step=0060676) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.871649383682254, LR: 0.0003 +[2026-03-04 22:08:26] (step=0060677) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.871845040109568, LR: 0.0003 +[2026-03-04 22:08:34] (step=0060678) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.872040696536882, LR: 0.0003 +[2026-03-04 22:08:42] (step=0060679) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.872236352964196, LR: 0.0003 +[2026-03-04 22:08:50] (step=0060680) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.872432009391508, LR: 0.0003 +[2026-03-04 22:08:58] (step=0060681) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 11.872627665818822, LR: 0.0003 +[2026-03-04 22:09:06] (step=0060682) Train Loss: 0.4533, Train Steps/Sec: 0.12, Epoch: 11.872823322246136, LR: 0.0003 +[2026-03-04 22:09:13] (step=0060683) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.87301897867345, LR: 0.0003 +[2026-03-04 22:09:21] (step=0060684) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.873214635100764, LR: 0.0003 +[2026-03-04 22:09:29] (step=0060685) Train Loss: 0.4452, Train Steps/Sec: 0.12, Epoch: 11.873410291528076, LR: 0.0003 +[2026-03-04 22:09:37] (step=0060686) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 11.87360594795539, LR: 0.0003 +[2026-03-04 22:09:45] (step=0060687) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.873801604382704, LR: 0.0003 +[2026-03-04 22:09:53] (step=0060688) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.873997260810018, LR: 0.0003 +[2026-03-04 22:10:01] (step=0060689) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.87419291723733, LR: 0.0003 +[2026-03-04 22:10:09] (step=0060690) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.874388573664644, LR: 0.0003 +[2026-03-04 22:10:16] (step=0060691) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 11.874584230091958, LR: 0.0003 +[2026-03-04 22:10:24] (step=0060692) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.874779886519272, LR: 0.0003 +[2026-03-04 22:10:32] (step=0060693) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.874975542946586, LR: 0.0003 +[2026-03-04 22:10:40] (step=0060694) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.875171199373899, LR: 0.0003 +[2026-03-04 22:10:48] (step=0060695) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 11.875366855801213, LR: 0.0003 +[2026-03-04 22:10:56] (step=0060696) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.875562512228527, LR: 0.0003 +[2026-03-04 22:11:04] (step=0060697) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.87575816865584, LR: 0.0003 +[2026-03-04 22:11:11] (step=0060698) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 11.875953825083155, LR: 0.0003 +[2026-03-04 22:11:19] (step=0060699) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.876149481510467, LR: 0.0003 +[2026-03-04 22:11:27] (step=0060700) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 11.876345137937781, LR: 0.0003 +[2026-03-04 22:11:35] (step=0060701) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.876540794365095, LR: 0.0003 +[2026-03-04 22:11:43] (step=0060702) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.876736450792409, LR: 0.0003 +[2026-03-04 22:11:51] (step=0060703) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 11.876932107219723, LR: 0.0003 +[2026-03-04 22:11:59] (step=0060704) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 11.877127763647035, LR: 0.0003 +[2026-03-04 22:12:06] (step=0060705) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.87732342007435, LR: 0.0003 +[2026-03-04 22:12:14] (step=0060706) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.877519076501663, LR: 0.0003 +[2026-03-04 22:12:22] (step=0060707) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.877714732928977, LR: 0.0003 +[2026-03-04 22:12:30] (step=0060708) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.877910389356291, LR: 0.0003 +[2026-03-04 22:12:38] (step=0060709) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 11.878106045783603, LR: 0.0003 +[2026-03-04 22:12:46] (step=0060710) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.878301702210917, LR: 0.0003 +[2026-03-04 22:12:54] (step=0060711) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 11.878497358638231, LR: 0.0003 +[2026-03-04 22:13:01] (step=0060712) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.878693015065545, LR: 0.0003 +[2026-03-04 22:13:09] (step=0060713) Train Loss: 0.4651, Train Steps/Sec: 0.13, Epoch: 11.878888671492858, LR: 0.0003 +[2026-03-04 22:13:17] (step=0060714) Train Loss: 0.4240, Train Steps/Sec: 0.13, Epoch: 11.879084327920172, LR: 0.0003 +[2026-03-04 22:13:25] (step=0060715) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.879279984347486, LR: 0.0003 +[2026-03-04 22:13:33] (step=0060716) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.8794756407748, LR: 0.0003 +[2026-03-04 22:13:41] (step=0060717) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 11.879671297202114, LR: 0.0003 +[2026-03-04 22:13:49] (step=0060718) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.879866953629426, LR: 0.0003 +[2026-03-04 22:13:56] (step=0060719) Train Loss: 0.4237, Train Steps/Sec: 0.13, Epoch: 11.88006261005674, LR: 0.0003 +[2026-03-04 22:14:04] (step=0060720) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.880258266484054, LR: 0.0003 +[2026-03-04 22:14:12] (step=0060721) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.880453922911368, LR: 0.0003 +[2026-03-04 22:14:20] (step=0060722) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 11.880649579338682, LR: 0.0003 +[2026-03-04 22:14:28] (step=0060723) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.880845235765994, LR: 0.0003 +[2026-03-04 22:14:36] (step=0060724) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.881040892193308, LR: 0.0003 +[2026-03-04 22:14:44] (step=0060725) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.881236548620622, LR: 0.0003 +[2026-03-04 22:14:51] (step=0060726) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.881432205047936, LR: 0.0003 +[2026-03-04 22:14:59] (step=0060727) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.88162786147525, LR: 0.0003 +[2026-03-04 22:15:07] (step=0060728) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 11.881823517902562, LR: 0.0003 +[2026-03-04 22:15:15] (step=0060729) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.882019174329876, LR: 0.0003 +[2026-03-04 22:15:23] (step=0060730) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 11.88221483075719, LR: 0.0003 +[2026-03-04 22:15:31] (step=0060731) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.882410487184504, LR: 0.0003 +[2026-03-04 22:15:39] (step=0060732) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.882606143611818, LR: 0.0003 +[2026-03-04 22:15:47] (step=0060733) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.88280180003913, LR: 0.0003 +[2026-03-04 22:15:54] (step=0060734) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.882997456466445, LR: 0.0003 +[2026-03-04 22:16:02] (step=0060735) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 11.883193112893759, LR: 0.0003 +[2026-03-04 22:16:10] (step=0060736) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.883388769321073, LR: 0.0003 +[2026-03-04 22:16:18] (step=0060737) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.883584425748387, LR: 0.0003 +[2026-03-04 22:16:26] (step=0060738) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 11.883780082175699, LR: 0.0003 +[2026-03-04 22:16:34] (step=0060739) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.883975738603013, LR: 0.0003 +[2026-03-04 22:16:42] (step=0060740) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 11.884171395030327, LR: 0.0003 +[2026-03-04 22:16:49] (step=0060741) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.884367051457641, LR: 0.0003 +[2026-03-04 22:16:57] (step=0060742) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.884562707884953, LR: 0.0003 +[2026-03-04 22:17:05] (step=0060743) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 11.884758364312267, LR: 0.0003 +[2026-03-04 22:17:13] (step=0060744) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.884954020739581, LR: 0.0003 +[2026-03-04 22:17:21] (step=0060745) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.885149677166895, LR: 0.0003 +[2026-03-04 22:17:29] (step=0060746) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.88534533359421, LR: 0.0003 +[2026-03-04 22:17:36] (step=0060747) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.885540990021521, LR: 0.0003 +[2026-03-04 22:17:44] (step=0060748) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.885736646448835, LR: 0.0003 +[2026-03-04 22:17:52] (step=0060749) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 11.88593230287615, LR: 0.0003 +[2026-03-04 22:18:00] (step=0060750) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.886127959303463, LR: 0.0003 +[2026-03-04 22:18:08] (step=0060751) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 11.886323615730777, LR: 0.0003 +[2026-03-04 22:18:16] (step=0060752) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 11.88651927215809, LR: 0.0003 +[2026-03-04 22:18:24] (step=0060753) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 11.886714928585404, LR: 0.0003 +[2026-03-04 22:18:32] (step=0060754) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.886910585012718, LR: 0.0003 +[2026-03-04 22:18:39] (step=0060755) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.887106241440032, LR: 0.0003 +[2026-03-04 22:18:47] (step=0060756) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.887301897867346, LR: 0.0003 +[2026-03-04 22:18:55] (step=0060757) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.887497554294658, LR: 0.0003 +[2026-03-04 22:19:03] (step=0060758) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 11.887693210721972, LR: 0.0003 +[2026-03-04 22:19:11] (step=0060759) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 11.887888867149286, LR: 0.0003 +[2026-03-04 22:19:19] (step=0060760) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.8880845235766, LR: 0.0003 +[2026-03-04 22:19:27] (step=0060761) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.888280180003914, LR: 0.0003 +[2026-03-04 22:19:34] (step=0060762) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 11.888475836431226, LR: 0.0003 +[2026-03-04 22:19:42] (step=0060763) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 11.88867149285854, LR: 0.0003 +[2026-03-04 22:19:50] (step=0060764) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.888867149285854, LR: 0.0003 +[2026-03-04 22:19:58] (step=0060765) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 11.889062805713168, LR: 0.0003 +[2026-03-04 22:20:06] (step=0060766) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 11.88925846214048, LR: 0.0003 +[2026-03-04 22:20:14] (step=0060767) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.889454118567794, LR: 0.0003 +[2026-03-04 22:20:22] (step=0060768) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.889649774995108, LR: 0.0003 +[2026-03-04 22:20:29] (step=0060769) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.889845431422422, LR: 0.0003 +[2026-03-04 22:20:37] (step=0060770) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.890041087849736, LR: 0.0003 +[2026-03-04 22:20:45] (step=0060771) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.890236744277049, LR: 0.0003 +[2026-03-04 22:20:53] (step=0060772) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.890432400704363, LR: 0.0003 +[2026-03-04 22:21:01] (step=0060773) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.890628057131677, LR: 0.0003 +[2026-03-04 22:21:09] (step=0060774) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.89082371355899, LR: 0.0003 +[2026-03-04 22:21:16] (step=0060775) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.891019369986305, LR: 0.0003 +[2026-03-04 22:21:24] (step=0060776) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.891215026413617, LR: 0.0003 +[2026-03-04 22:21:32] (step=0060777) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.891410682840931, LR: 0.0003 +[2026-03-04 22:21:40] (step=0060778) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.891606339268245, LR: 0.0003 +[2026-03-04 22:21:48] (step=0060779) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.891801995695559, LR: 0.0003 +[2026-03-04 22:21:56] (step=0060780) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.891997652122873, LR: 0.0003 +[2026-03-04 22:22:04] (step=0060781) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 11.892193308550185, LR: 0.0003 +[2026-03-04 22:22:12] (step=0060782) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.8923889649775, LR: 0.0003 +[2026-03-04 22:22:20] (step=0060783) Train Loss: 0.4368, Train Steps/Sec: 0.12, Epoch: 11.892584621404813, LR: 0.0003 +[2026-03-04 22:22:27] (step=0060784) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.892780277832127, LR: 0.0003 +[2026-03-04 22:22:35] (step=0060785) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.892975934259441, LR: 0.0003 +[2026-03-04 22:22:43] (step=0060786) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.893171590686753, LR: 0.0003 +[2026-03-04 22:22:51] (step=0060787) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.893367247114067, LR: 0.0003 +[2026-03-04 22:22:59] (step=0060788) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.893562903541381, LR: 0.0003 +[2026-03-04 22:23:07] (step=0060789) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.893758559968695, LR: 0.0003 +[2026-03-04 22:23:15] (step=0060790) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.89395421639601, LR: 0.0003 +[2026-03-04 22:23:22] (step=0060791) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 11.894149872823322, LR: 0.0003 +[2026-03-04 22:23:30] (step=0060792) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 11.894345529250636, LR: 0.0003 +[2026-03-04 22:23:38] (step=0060793) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 11.89454118567795, LR: 0.0003 +[2026-03-04 22:23:46] (step=0060794) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 11.894736842105264, LR: 0.0003 +[2026-03-04 22:23:54] (step=0060795) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 11.894932498532576, LR: 0.0003 +[2026-03-04 22:24:02] (step=0060796) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 11.89512815495989, LR: 0.0003 +[2026-03-04 22:24:10] (step=0060797) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 11.895323811387204, LR: 0.0003 +[2026-03-04 22:24:17] (step=0060798) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.895519467814518, LR: 0.0003 +[2026-03-04 22:24:25] (step=0060799) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.895715124241832, LR: 0.0003 +[2026-03-04 22:24:33] (step=0060800) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 11.895910780669144, LR: 0.0003 +[2026-03-04 22:24:41] (step=0060801) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.896106437096458, LR: 0.0003 +[2026-03-04 22:24:49] (step=0060802) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.896302093523772, LR: 0.0003 +[2026-03-04 22:24:57] (step=0060803) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 11.896497749951086, LR: 0.0003 +[2026-03-04 22:25:04] (step=0060804) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.8966934063784, LR: 0.0003 +[2026-03-04 22:25:12] (step=0060805) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.896889062805712, LR: 0.0003 +[2026-03-04 22:25:20] (step=0060806) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.897084719233026, LR: 0.0003 +[2026-03-04 22:25:28] (step=0060807) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.89728037566034, LR: 0.0003 +[2026-03-04 22:25:36] (step=0060808) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.897476032087654, LR: 0.0003 +[2026-03-04 22:25:44] (step=0060809) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 11.897671688514968, LR: 0.0003 +[2026-03-04 22:25:52] (step=0060810) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.89786734494228, LR: 0.0003 +[2026-03-04 22:25:59] (step=0060811) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.898063001369595, LR: 0.0003 +[2026-03-04 22:26:07] (step=0060812) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.898258657796909, LR: 0.0003 +[2026-03-04 22:26:15] (step=0060813) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.898454314224223, LR: 0.0003 +[2026-03-04 22:26:23] (step=0060814) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.898649970651537, LR: 0.0003 +[2026-03-04 22:26:31] (step=0060815) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 11.898845627078849, LR: 0.0003 +[2026-03-04 22:26:39] (step=0060816) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.899041283506163, LR: 0.0003 +[2026-03-04 22:26:47] (step=0060817) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 11.899236939933477, LR: 0.0003 +[2026-03-04 22:26:54] (step=0060818) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.899432596360791, LR: 0.0003 +[2026-03-04 22:27:02] (step=0060819) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 11.899628252788103, LR: 0.0003 +[2026-03-04 22:27:10] (step=0060820) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.899823909215417, LR: 0.0003 +[2026-03-04 22:27:18] (step=0060821) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.900019565642731, LR: 0.0003 +[2026-03-04 22:27:26] (step=0060822) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.900215222070045, LR: 0.0003 +[2026-03-04 22:27:34] (step=0060823) Train Loss: 0.4406, Train Steps/Sec: 0.12, Epoch: 11.90041087849736, LR: 0.0003 +[2026-03-04 22:27:42] (step=0060824) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.900606534924671, LR: 0.0003 +[2026-03-04 22:27:50] (step=0060825) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.900802191351985, LR: 0.0003 +[2026-03-04 22:27:57] (step=0060826) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.9009978477793, LR: 0.0003 +[2026-03-04 22:28:05] (step=0060827) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 11.901193504206613, LR: 0.0003 +[2026-03-04 22:28:13] (step=0060828) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.901389160633927, LR: 0.0003 +[2026-03-04 22:28:21] (step=0060829) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.90158481706124, LR: 0.0003 +[2026-03-04 22:28:29] (step=0060830) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.901780473488554, LR: 0.0003 +[2026-03-04 22:28:37] (step=0060831) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.901976129915868, LR: 0.0003 +[2026-03-04 22:28:45] (step=0060832) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.902171786343182, LR: 0.0003 +[2026-03-04 22:28:52] (step=0060833) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.902367442770496, LR: 0.0003 +[2026-03-04 22:29:00] (step=0060834) Train Loss: 0.4441, Train Steps/Sec: 0.12, Epoch: 11.902563099197808, LR: 0.0003 +[2026-03-04 22:29:08] (step=0060835) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.902758755625122, LR: 0.0003 +[2026-03-04 22:29:16] (step=0060836) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 11.902954412052436, LR: 0.0003 +[2026-03-04 22:29:24] (step=0060837) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.90315006847975, LR: 0.0003 +[2026-03-04 22:29:32] (step=0060838) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.903345724907064, LR: 0.0003 +[2026-03-04 22:29:40] (step=0060839) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.903541381334376, LR: 0.0003 +[2026-03-04 22:29:48] (step=0060840) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 11.90373703776169, LR: 0.0003 +[2026-03-04 22:29:55] (step=0060841) Train Loss: 0.4270, Train Steps/Sec: 0.13, Epoch: 11.903932694189004, LR: 0.0003 +[2026-03-04 22:30:03] (step=0060842) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 11.904128350616318, LR: 0.0003 +[2026-03-04 22:30:11] (step=0060843) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 11.904324007043632, LR: 0.0003 +[2026-03-04 22:30:19] (step=0060844) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.904519663470944, LR: 0.0003 +[2026-03-04 22:30:27] (step=0060845) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 11.904715319898258, LR: 0.0003 +[2026-03-04 22:30:35] (step=0060846) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.904910976325572, LR: 0.0003 +[2026-03-04 22:30:43] (step=0060847) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 11.905106632752886, LR: 0.0003 +[2026-03-04 22:30:50] (step=0060848) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 11.905302289180199, LR: 0.0003 +[2026-03-04 22:30:58] (step=0060849) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 11.905497945607513, LR: 0.0003 +[2026-03-04 22:31:06] (step=0060850) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.905693602034827, LR: 0.0003 +[2026-03-04 22:31:14] (step=0060851) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.90588925846214, LR: 0.0003 +[2026-03-04 22:31:22] (step=0060852) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 11.906084914889455, LR: 0.0003 +[2026-03-04 22:31:30] (step=0060853) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.906280571316767, LR: 0.0003 +[2026-03-04 22:31:37] (step=0060854) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.906476227744081, LR: 0.0003 +[2026-03-04 22:31:45] (step=0060855) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.906671884171395, LR: 0.0003 +[2026-03-04 22:31:53] (step=0060856) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.906867540598709, LR: 0.0003 +[2026-03-04 22:32:01] (step=0060857) Train Loss: 0.4254, Train Steps/Sec: 0.13, Epoch: 11.907063197026023, LR: 0.0003 +[2026-03-04 22:32:09] (step=0060858) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.907258853453335, LR: 0.0003 +[2026-03-04 22:32:17] (step=0060859) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.90745450988065, LR: 0.0003 +[2026-03-04 22:32:25] (step=0060860) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.907650166307963, LR: 0.0003 +[2026-03-04 22:32:32] (step=0060861) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.907845822735277, LR: 0.0003 +[2026-03-04 22:32:40] (step=0060862) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.908041479162591, LR: 0.0003 +[2026-03-04 22:32:48] (step=0060863) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.908237135589903, LR: 0.0003 +[2026-03-04 22:32:56] (step=0060864) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.908432792017217, LR: 0.0003 +[2026-03-04 22:33:04] (step=0060865) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.908628448444532, LR: 0.0003 +[2026-03-04 22:33:12] (step=0060866) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 11.908824104871846, LR: 0.0003 +[2026-03-04 22:33:20] (step=0060867) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 11.90901976129916, LR: 0.0003 +[2026-03-04 22:33:27] (step=0060868) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.909215417726472, LR: 0.0003 +[2026-03-04 22:33:35] (step=0060869) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 11.909411074153786, LR: 0.0003 +[2026-03-04 22:33:43] (step=0060870) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 11.9096067305811, LR: 0.0003 +[2026-03-04 22:33:51] (step=0060871) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 11.909802387008414, LR: 0.0003 +[2026-03-04 22:33:59] (step=0060872) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.909998043435726, LR: 0.0003 +[2026-03-04 22:34:07] (step=0060873) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.91019369986304, LR: 0.0003 +[2026-03-04 22:34:15] (step=0060874) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.910389356290354, LR: 0.0003 +[2026-03-04 22:34:23] (step=0060875) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.910585012717668, LR: 0.0003 +[2026-03-04 22:34:30] (step=0060876) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.910780669144982, LR: 0.0003 +[2026-03-04 22:34:38] (step=0060877) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.910976325572294, LR: 0.0003 +[2026-03-04 22:34:46] (step=0060878) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 11.911171981999608, LR: 0.0003 +[2026-03-04 22:34:54] (step=0060879) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.911367638426922, LR: 0.0003 +[2026-03-04 22:35:02] (step=0060880) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 11.911563294854236, LR: 0.0003 +[2026-03-04 22:35:10] (step=0060881) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.91175895128155, LR: 0.0003 +[2026-03-04 22:35:18] (step=0060882) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.911954607708862, LR: 0.0003 +[2026-03-04 22:35:25] (step=0060883) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 11.912150264136177, LR: 0.0003 +[2026-03-04 22:35:33] (step=0060884) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.91234592056349, LR: 0.0003 +[2026-03-04 22:35:41] (step=0060885) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 11.912541576990805, LR: 0.0003 +[2026-03-04 22:35:49] (step=0060886) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.912737233418119, LR: 0.0003 +[2026-03-04 22:35:57] (step=0060887) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.91293288984543, LR: 0.0003 +[2026-03-04 22:36:05] (step=0060888) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 11.913128546272745, LR: 0.0003 +[2026-03-04 22:36:13] (step=0060889) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 11.913324202700059, LR: 0.0003 +[2026-03-04 22:36:20] (step=0060890) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.913519859127373, LR: 0.0003 +[2026-03-04 22:36:28] (step=0060891) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.913715515554687, LR: 0.0003 +[2026-03-04 22:36:36] (step=0060892) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.913911171981999, LR: 0.0003 +[2026-03-04 22:36:44] (step=0060893) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.914106828409313, LR: 0.0003 +[2026-03-04 22:36:52] (step=0060894) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 11.914302484836627, LR: 0.0003 +[2026-03-04 22:37:00] (step=0060895) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 11.914498141263941, LR: 0.0003 +[2026-03-04 22:37:08] (step=0060896) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 11.914693797691255, LR: 0.0003 +[2026-03-04 22:37:15] (step=0060897) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.914889454118567, LR: 0.0003 +[2026-03-04 22:37:23] (step=0060898) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.915085110545881, LR: 0.0003 +[2026-03-04 22:37:31] (step=0060899) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.915280766973195, LR: 0.0003 +[2026-03-04 22:37:39] (step=0060900) Train Loss: 0.4254, Train Steps/Sec: 0.13, Epoch: 11.91547642340051, LR: 0.0003 +[2026-03-04 22:37:47] (step=0060901) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.915672079827822, LR: 0.0003 +[2026-03-04 22:37:55] (step=0060902) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 11.915867736255136, LR: 0.0003 +[2026-03-04 22:38:03] (step=0060903) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.91606339268245, LR: 0.0003 +[2026-03-04 22:38:10] (step=0060904) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.916259049109764, LR: 0.0003 +[2026-03-04 22:38:18] (step=0060905) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.916454705537078, LR: 0.0003 +[2026-03-04 22:38:26] (step=0060906) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.91665036196439, LR: 0.0003 +[2026-03-04 22:38:34] (step=0060907) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.916846018391704, LR: 0.0003 +[2026-03-04 22:38:42] (step=0060908) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 11.917041674819018, LR: 0.0003 +[2026-03-04 22:38:50] (step=0060909) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 11.917237331246332, LR: 0.0003 +[2026-03-04 22:38:58] (step=0060910) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.917432987673646, LR: 0.0003 +[2026-03-04 22:39:05] (step=0060911) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.917628644100958, LR: 0.0003 +[2026-03-04 22:39:13] (step=0060912) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 11.917824300528272, LR: 0.0003 +[2026-03-04 22:39:21] (step=0060913) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.918019956955586, LR: 0.0003 +[2026-03-04 22:39:29] (step=0060914) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 11.9182156133829, LR: 0.0003 +[2026-03-04 22:39:37] (step=0060915) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.918411269810214, LR: 0.0003 +[2026-03-04 22:39:45] (step=0060916) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.918606926237526, LR: 0.0003 +[2026-03-04 22:39:52] (step=0060917) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.91880258266484, LR: 0.0003 +[2026-03-04 22:40:00] (step=0060918) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.918998239092154, LR: 0.0003 +[2026-03-04 22:40:08] (step=0060919) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.919193895519468, LR: 0.0003 +[2026-03-04 22:40:16] (step=0060920) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.919389551946782, LR: 0.0003 +[2026-03-04 22:40:24] (step=0060921) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 11.919585208374095, LR: 0.0003 +[2026-03-04 22:40:32] (step=0060922) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 11.919780864801409, LR: 0.0003 +[2026-03-04 22:40:40] (step=0060923) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.919976521228723, LR: 0.0003 +[2026-03-04 22:40:48] (step=0060924) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.920172177656037, LR: 0.0003 +[2026-03-04 22:40:55] (step=0060925) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.920367834083349, LR: 0.0003 +[2026-03-04 22:41:03] (step=0060926) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 11.920563490510663, LR: 0.0003 +[2026-03-04 22:41:11] (step=0060927) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.920759146937977, LR: 0.0003 +[2026-03-04 22:41:19] (step=0060928) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.92095480336529, LR: 0.0003 +[2026-03-04 22:41:27] (step=0060929) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 11.921150459792605, LR: 0.0003 +[2026-03-04 22:41:35] (step=0060930) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.921346116219917, LR: 0.0003 +[2026-03-04 22:41:43] (step=0060931) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.921541772647231, LR: 0.0003 +[2026-03-04 22:41:51] (step=0060932) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.921737429074545, LR: 0.0003 +[2026-03-04 22:41:58] (step=0060933) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.921933085501859, LR: 0.0003 +[2026-03-04 22:42:06] (step=0060934) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 11.922128741929173, LR: 0.0003 +[2026-03-04 22:42:14] (step=0060935) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 11.922324398356485, LR: 0.0003 +[2026-03-04 22:42:22] (step=0060936) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.9225200547838, LR: 0.0003 +[2026-03-04 22:42:30] (step=0060937) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 11.922715711211113, LR: 0.0003 +[2026-03-04 22:42:38] (step=0060938) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.922911367638427, LR: 0.0003 +[2026-03-04 22:42:46] (step=0060939) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.923107024065741, LR: 0.0003 +[2026-03-04 22:42:53] (step=0060940) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.923302680493054, LR: 0.0003 +[2026-03-04 22:43:01] (step=0060941) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.923498336920368, LR: 0.0003 +[2026-03-04 22:43:09] (step=0060942) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 11.923693993347682, LR: 0.0003 +[2026-03-04 22:43:17] (step=0060943) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.923889649774996, LR: 0.0003 +[2026-03-04 22:43:25] (step=0060944) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 11.92408530620231, LR: 0.0003 +[2026-03-04 22:43:33] (step=0060945) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.924280962629622, LR: 0.0003 +[2026-03-04 22:43:41] (step=0060946) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 11.924476619056936, LR: 0.0003 +[2026-03-04 22:43:48] (step=0060947) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 11.92467227548425, LR: 0.0003 +[2026-03-04 22:43:56] (step=0060948) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 11.924867931911564, LR: 0.0003 +[2026-03-04 22:44:04] (step=0060949) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.925063588338878, LR: 0.0003 +[2026-03-04 22:44:12] (step=0060950) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.92525924476619, LR: 0.0003 +[2026-03-04 22:44:20] (step=0060951) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 11.925454901193504, LR: 0.0003 +[2026-03-04 22:44:28] (step=0060952) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 11.925650557620818, LR: 0.0003 +[2026-03-04 22:44:35] (step=0060953) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.925846214048132, LR: 0.0003 +[2026-03-04 22:44:43] (step=0060954) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 11.926041870475444, LR: 0.0003 +[2026-03-04 22:44:51] (step=0060955) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.926237526902758, LR: 0.0003 +[2026-03-04 22:44:59] (step=0060956) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.926433183330072, LR: 0.0003 +[2026-03-04 22:45:07] (step=0060957) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 11.926628839757386, LR: 0.0003 +[2026-03-04 22:45:15] (step=0060958) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 11.9268244961847, LR: 0.0003 +[2026-03-04 22:45:23] (step=0060959) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 11.927020152612013, LR: 0.0003 +[2026-03-04 22:45:30] (step=0060960) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.927215809039327, LR: 0.0003 +[2026-03-04 22:45:38] (step=0060961) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.92741146546664, LR: 0.0003 +[2026-03-04 22:45:46] (step=0060962) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.927607121893955, LR: 0.0003 +[2026-03-04 22:45:54] (step=0060963) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 11.927802778321269, LR: 0.0003 +[2026-03-04 22:46:02] (step=0060964) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.92799843474858, LR: 0.0003 +[2026-03-04 22:46:10] (step=0060965) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.928194091175895, LR: 0.0003 +[2026-03-04 22:46:18] (step=0060966) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.928389747603209, LR: 0.0003 +[2026-03-04 22:46:25] (step=0060967) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.928585404030523, LR: 0.0003 +[2026-03-04 22:46:33] (step=0060968) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.928781060457837, LR: 0.0003 +[2026-03-04 22:46:41] (step=0060969) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 11.928976716885149, LR: 0.0003 +[2026-03-04 22:46:49] (step=0060970) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 11.929172373312463, LR: 0.0003 +[2026-03-04 22:46:57] (step=0060971) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.929368029739777, LR: 0.0003 +[2026-03-04 22:47:05] (step=0060972) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.929563686167091, LR: 0.0003 +[2026-03-04 22:47:13] (step=0060973) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 11.929759342594405, LR: 0.0003 +[2026-03-04 22:47:20] (step=0060974) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 11.929954999021717, LR: 0.0003 +[2026-03-04 22:47:28] (step=0060975) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.930150655449031, LR: 0.0003 +[2026-03-04 22:47:36] (step=0060976) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.930346311876345, LR: 0.0003 +[2026-03-04 22:47:44] (step=0060977) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 11.93054196830366, LR: 0.0003 +[2026-03-04 22:47:52] (step=0060978) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.930737624730972, LR: 0.0003 +[2026-03-04 22:48:00] (step=0060979) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.930933281158286, LR: 0.0003 +[2026-03-04 22:48:08] (step=0060980) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 11.9311289375856, LR: 0.0003 +[2026-03-04 22:48:15] (step=0060981) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 11.931324594012914, LR: 0.0003 +[2026-03-04 22:48:23] (step=0060982) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 11.931520250440228, LR: 0.0003 +[2026-03-04 22:48:31] (step=0060983) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 11.93171590686754, LR: 0.0003 +[2026-03-04 22:48:39] (step=0060984) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 11.931911563294854, LR: 0.0003 +[2026-03-04 22:48:47] (step=0060985) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 11.932107219722168, LR: 0.0003 +[2026-03-04 22:48:55] (step=0060986) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.932302876149482, LR: 0.0003 +[2026-03-04 22:49:03] (step=0060987) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.932498532576796, LR: 0.0003 +[2026-03-04 22:49:10] (step=0060988) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.932694189004108, LR: 0.0003 +[2026-03-04 22:49:18] (step=0060989) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 11.932889845431422, LR: 0.0003 +[2026-03-04 22:49:26] (step=0060990) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 11.933085501858736, LR: 0.0003 +[2026-03-04 22:49:34] (step=0060991) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 11.93328115828605, LR: 0.0003 +[2026-03-04 22:49:42] (step=0060992) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.933476814713364, LR: 0.0003 +[2026-03-04 22:49:50] (step=0060993) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.933672471140676, LR: 0.0003 +[2026-03-04 22:49:58] (step=0060994) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.93386812756799, LR: 0.0003 +[2026-03-04 22:50:05] (step=0060995) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 11.934063783995304, LR: 0.0003 +[2026-03-04 22:50:13] (step=0060996) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.934259440422618, LR: 0.0003 +[2026-03-04 22:50:21] (step=0060997) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.934455096849932, LR: 0.0003 +[2026-03-04 22:50:29] (step=0060998) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 11.934650753277245, LR: 0.0003 +[2026-03-04 22:50:37] (step=0060999) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.934846409704559, LR: 0.0003 +[2026-03-04 22:50:45] (step=0061000) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.935042066131873, LR: 0.0003 +[2026-03-04 22:50:45] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0061000/ +[2026-03-04 22:50:53] (step=0061001) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 11.935237722559187, LR: 0.0003 +[2026-03-04 22:51:00] (step=0061002) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.9354333789865, LR: 0.0003 +[2026-03-04 22:51:08] (step=0061003) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.935629035413813, LR: 0.0003 +[2026-03-04 22:51:16] (step=0061004) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 11.935824691841127, LR: 0.0003 +[2026-03-04 22:51:24] (step=0061005) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 11.93602034826844, LR: 0.0003 +[2026-03-04 22:51:32] (step=0061006) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.936216004695755, LR: 0.0003 +[2026-03-04 22:51:40] (step=0061007) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.936411661123067, LR: 0.0003 +[2026-03-04 22:51:48] (step=0061008) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 11.936607317550381, LR: 0.0003 +[2026-03-04 22:51:55] (step=0061009) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 11.936802973977695, LR: 0.0003 +[2026-03-04 22:52:03] (step=0061010) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.936998630405009, LR: 0.0003 +[2026-03-04 22:52:11] (step=0061011) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.937194286832323, LR: 0.0003 +[2026-03-04 22:52:19] (step=0061012) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.937389943259635, LR: 0.0003 +[2026-03-04 22:52:27] (step=0061013) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 11.93758559968695, LR: 0.0003 +[2026-03-04 22:52:35] (step=0061014) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.937781256114263, LR: 0.0003 +[2026-03-04 22:52:43] (step=0061015) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.937976912541577, LR: 0.0003 +[2026-03-04 22:52:50] (step=0061016) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.938172568968891, LR: 0.0003 +[2026-03-04 22:52:58] (step=0061017) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 11.938368225396204, LR: 0.0003 +[2026-03-04 22:53:06] (step=0061018) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.938563881823518, LR: 0.0003 +[2026-03-04 22:53:14] (step=0061019) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 11.938759538250832, LR: 0.0003 +[2026-03-04 22:53:22] (step=0061020) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.938955194678146, LR: 0.0003 +[2026-03-04 22:53:30] (step=0061021) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 11.93915085110546, LR: 0.0003 +[2026-03-04 22:53:38] (step=0061022) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.939346507532772, LR: 0.0003 +[2026-03-04 22:53:45] (step=0061023) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.939542163960086, LR: 0.0003 +[2026-03-04 22:53:53] (step=0061024) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.9397378203874, LR: 0.0003 +[2026-03-04 22:54:01] (step=0061025) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 11.939933476814714, LR: 0.0003 +[2026-03-04 22:54:09] (step=0061026) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 11.940129133242028, LR: 0.0003 +[2026-03-04 22:54:17] (step=0061027) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 11.94032478966934, LR: 0.0003 +[2026-03-04 22:54:25] (step=0061028) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.940520446096654, LR: 0.0003 +[2026-03-04 22:54:33] (step=0061029) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.940716102523968, LR: 0.0003 +[2026-03-04 22:54:40] (step=0061030) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.940911758951282, LR: 0.0003 +[2026-03-04 22:54:48] (step=0061031) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 11.941107415378594, LR: 0.0003 +[2026-03-04 22:54:56] (step=0061032) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 11.941303071805908, LR: 0.0003 +[2026-03-04 22:55:04] (step=0061033) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.941498728233222, LR: 0.0003 +[2026-03-04 22:55:12] (step=0061034) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.941694384660536, LR: 0.0003 +[2026-03-04 22:55:20] (step=0061035) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.94189004108785, LR: 0.0003 +[2026-03-04 22:55:28] (step=0061036) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 11.942085697515163, LR: 0.0003 +[2026-03-04 22:55:36] (step=0061037) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.942281353942477, LR: 0.0003 +[2026-03-04 22:55:43] (step=0061038) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 11.94247701036979, LR: 0.0003 +[2026-03-04 22:55:51] (step=0061039) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.942672666797105, LR: 0.0003 +[2026-03-04 22:55:59] (step=0061040) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.942868323224419, LR: 0.0003 +[2026-03-04 22:56:07] (step=0061041) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.94306397965173, LR: 0.0003 +[2026-03-04 22:56:15] (step=0061042) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.943259636079045, LR: 0.0003 +[2026-03-04 22:56:23] (step=0061043) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.943455292506359, LR: 0.0003 +[2026-03-04 22:56:31] (step=0061044) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.943650948933673, LR: 0.0003 +[2026-03-04 22:56:38] (step=0061045) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 11.943846605360987, LR: 0.0003 +[2026-03-04 22:56:46] (step=0061046) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.944042261788299, LR: 0.0003 +[2026-03-04 22:56:54] (step=0061047) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 11.944237918215613, LR: 0.0003 +[2026-03-04 22:57:02] (step=0061048) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 11.944433574642927, LR: 0.0003 +[2026-03-04 22:57:10] (step=0061049) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.944629231070241, LR: 0.0003 +[2026-03-04 22:57:18] (step=0061050) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.944824887497555, LR: 0.0003 +[2026-03-04 22:57:26] (step=0061051) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 11.945020543924867, LR: 0.0003 +[2026-03-04 22:57:33] (step=0061052) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 11.945216200352181, LR: 0.0003 +[2026-03-04 22:57:41] (step=0061053) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.945411856779495, LR: 0.0003 +[2026-03-04 22:57:49] (step=0061054) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.94560751320681, LR: 0.0003 +[2026-03-04 22:57:57] (step=0061055) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 11.945803169634123, LR: 0.0003 +[2026-03-04 22:58:05] (step=0061056) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 11.945998826061436, LR: 0.0003 +[2026-03-04 22:58:13] (step=0061057) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.94619448248875, LR: 0.0003 +[2026-03-04 22:58:21] (step=0061058) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.946390138916064, LR: 0.0003 +[2026-03-04 22:58:28] (step=0061059) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.946585795343378, LR: 0.0003 +[2026-03-04 22:58:36] (step=0061060) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 11.94678145177069, LR: 0.0003 +[2026-03-04 22:58:44] (step=0061061) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 11.946977108198004, LR: 0.0003 +[2026-03-04 22:58:52] (step=0061062) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 11.947172764625318, LR: 0.0003 +[2026-03-04 22:59:00] (step=0061063) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.947368421052632, LR: 0.0003 +[2026-03-04 22:59:08] (step=0061064) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 11.947564077479946, LR: 0.0003 +[2026-03-04 22:59:16] (step=0061065) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.947759733907258, LR: 0.0003 +[2026-03-04 22:59:24] (step=0061066) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 11.947955390334572, LR: 0.0003 +[2026-03-04 22:59:31] (step=0061067) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.948151046761886, LR: 0.0003 +[2026-03-04 22:59:39] (step=0061068) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.9483467031892, LR: 0.0003 +[2026-03-04 22:59:47] (step=0061069) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 11.948542359616514, LR: 0.0003 +[2026-03-04 22:59:55] (step=0061070) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 11.948738016043826, LR: 0.0003 +[2026-03-04 23:00:03] (step=0061071) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 11.94893367247114, LR: 0.0003 +[2026-03-04 23:00:11] (step=0061072) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 11.949129328898454, LR: 0.0003 +[2026-03-04 23:00:19] (step=0061073) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.949324985325768, LR: 0.0003 +[2026-03-04 23:00:26] (step=0061074) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 11.949520641753082, LR: 0.0003 +[2026-03-04 23:00:34] (step=0061075) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 11.949716298180395, LR: 0.0003 +[2026-03-04 23:00:42] (step=0061076) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 11.949911954607709, LR: 0.0003 +[2026-03-04 23:00:50] (step=0061077) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 11.950107611035023, LR: 0.0003 +[2026-03-04 23:00:58] (step=0061078) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.950303267462337, LR: 0.0003 +[2026-03-04 23:01:06] (step=0061079) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.95049892388965, LR: 0.0003 +[2026-03-04 23:01:13] (step=0061080) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.950694580316963, LR: 0.0003 +[2026-03-04 23:01:21] (step=0061081) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 11.950890236744277, LR: 0.0003 +[2026-03-04 23:01:29] (step=0061082) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.95108589317159, LR: 0.0003 +[2026-03-04 23:01:37] (step=0061083) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.951281549598905, LR: 0.0003 +[2026-03-04 23:01:45] (step=0061084) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 11.951477206026217, LR: 0.0003 +[2026-03-04 23:01:53] (step=0061085) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.951672862453531, LR: 0.0003 +[2026-03-04 23:02:01] (step=0061086) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.951868518880845, LR: 0.0003 +[2026-03-04 23:02:09] (step=0061087) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.95206417530816, LR: 0.0003 +[2026-03-04 23:02:16] (step=0061088) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 11.952259831735473, LR: 0.0003 +[2026-03-04 23:02:24] (step=0061089) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.952455488162785, LR: 0.0003 +[2026-03-04 23:02:32] (step=0061090) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 11.9526511445901, LR: 0.0003 +[2026-03-04 23:02:40] (step=0061091) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 11.952846801017413, LR: 0.0003 +[2026-03-04 23:02:48] (step=0061092) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 11.953042457444727, LR: 0.0003 +[2026-03-04 23:02:56] (step=0061093) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 11.953238113872041, LR: 0.0003 +[2026-03-04 23:03:03] (step=0061094) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 11.953433770299354, LR: 0.0003 +[2026-03-04 23:03:11] (step=0061095) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 11.953629426726668, LR: 0.0003 +[2026-03-04 23:03:19] (step=0061096) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 11.953825083153982, LR: 0.0003 +[2026-03-04 23:03:27] (step=0061097) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 11.954020739581296, LR: 0.0003 +[2026-03-04 23:03:35] (step=0061098) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 11.95421639600861, LR: 0.0003 +[2026-03-04 23:03:43] (step=0061099) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.954412052435922, LR: 0.0003 +[2026-03-04 23:03:51] (step=0061100) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.954607708863236, LR: 0.0003 +[2026-03-04 23:03:58] (step=0061101) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.95480336529055, LR: 0.0003 +[2026-03-04 23:04:06] (step=0061102) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 11.954999021717864, LR: 0.0003 +[2026-03-04 23:04:14] (step=0061103) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 11.955194678145178, LR: 0.0003 +[2026-03-04 23:04:22] (step=0061104) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.95539033457249, LR: 0.0003 +[2026-03-04 23:04:30] (step=0061105) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 11.955585990999804, LR: 0.0003 +[2026-03-04 23:04:38] (step=0061106) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 11.955781647427118, LR: 0.0003 +[2026-03-04 23:04:45] (step=0061107) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.955977303854432, LR: 0.0003 +[2026-03-04 23:04:53] (step=0061108) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 11.956172960281746, LR: 0.0003 +[2026-03-04 23:05:01] (step=0061109) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 11.956368616709058, LR: 0.0003 +[2026-03-04 23:05:09] (step=0061110) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.956564273136372, LR: 0.0003 +[2026-03-04 23:05:17] (step=0061111) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.956759929563686, LR: 0.0003 +[2026-03-04 23:05:25] (step=0061112) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.956955585991, LR: 0.0003 +[2026-03-04 23:05:33] (step=0061113) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 11.957151242418313, LR: 0.0003 +[2026-03-04 23:05:40] (step=0061114) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 11.957346898845627, LR: 0.0003 +[2026-03-04 23:05:48] (step=0061115) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 11.95754255527294, LR: 0.0003 +[2026-03-04 23:05:56] (step=0061116) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 11.957738211700255, LR: 0.0003 +[2026-03-04 23:06:04] (step=0061117) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 11.957933868127569, LR: 0.0003 +[2026-03-04 23:06:12] (step=0061118) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 11.95812952455488, LR: 0.0003 +[2026-03-04 23:06:20] (step=0061119) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.958325180982195, LR: 0.0003 +[2026-03-04 23:06:28] (step=0061120) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 11.958520837409509, LR: 0.0003 +[2026-03-04 23:06:35] (step=0061121) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.958716493836823, LR: 0.0003 +[2026-03-04 23:06:43] (step=0061122) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.958912150264137, LR: 0.0003 +[2026-03-04 23:06:51] (step=0061123) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 11.95910780669145, LR: 0.0003 +[2026-03-04 23:06:59] (step=0061124) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.959303463118763, LR: 0.0003 +[2026-03-04 23:07:07] (step=0061125) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.959499119546077, LR: 0.0003 +[2026-03-04 23:07:15] (step=0061126) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.959694775973391, LR: 0.0003 +[2026-03-04 23:07:23] (step=0061127) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.959890432400705, LR: 0.0003 +[2026-03-04 23:07:30] (step=0061128) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 11.960086088828017, LR: 0.0003 +[2026-03-04 23:07:38] (step=0061129) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.960281745255331, LR: 0.0003 +[2026-03-04 23:07:46] (step=0061130) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 11.960477401682645, LR: 0.0003 +[2026-03-04 23:07:54] (step=0061131) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 11.96067305810996, LR: 0.0003 +[2026-03-04 23:08:02] (step=0061132) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.960868714537273, LR: 0.0003 +[2026-03-04 23:08:10] (step=0061133) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 11.961064370964586, LR: 0.0003 +[2026-03-04 23:08:18] (step=0061134) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 11.9612600273919, LR: 0.0003 +[2026-03-04 23:08:25] (step=0061135) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 11.961455683819214, LR: 0.0003 +[2026-03-04 23:08:33] (step=0061136) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.961651340246528, LR: 0.0003 +[2026-03-04 23:08:41] (step=0061137) Train Loss: 0.4468, Train Steps/Sec: 0.12, Epoch: 11.96184699667384, LR: 0.0003 +[2026-03-04 23:08:49] (step=0061138) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.962042653101154, LR: 0.0003 +[2026-03-04 23:08:57] (step=0061139) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 11.962238309528468, LR: 0.0003 +[2026-03-04 23:09:05] (step=0061140) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.962433965955782, LR: 0.0003 +[2026-03-04 23:09:13] (step=0061141) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 11.962629622383096, LR: 0.0003 +[2026-03-04 23:09:21] (step=0061142) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 11.962825278810408, LR: 0.0003 +[2026-03-04 23:09:29] (step=0061143) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 11.963020935237722, LR: 0.0003 +[2026-03-04 23:09:36] (step=0061144) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 11.963216591665036, LR: 0.0003 +[2026-03-04 23:09:44] (step=0061145) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.96341224809235, LR: 0.0003 +[2026-03-04 23:09:52] (step=0061146) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 11.963607904519664, LR: 0.0003 +[2026-03-04 23:10:00] (step=0061147) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 11.963803560946976, LR: 0.0003 +[2026-03-04 23:10:08] (step=0061148) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 11.96399921737429, LR: 0.0003 +[2026-03-04 23:10:16] (step=0061149) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 11.964194873801604, LR: 0.0003 +[2026-03-04 23:10:24] (step=0061150) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 11.964390530228918, LR: 0.0003 +[2026-03-04 23:10:31] (step=0061151) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 11.964586186656232, LR: 0.0003 +[2026-03-04 23:10:39] (step=0061152) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.964781843083545, LR: 0.0003 +[2026-03-04 23:10:47] (step=0061153) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.964977499510859, LR: 0.0003 +[2026-03-04 23:10:55] (step=0061154) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 11.965173155938173, LR: 0.0003 +[2026-03-04 23:11:03] (step=0061155) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 11.965368812365487, LR: 0.0003 +[2026-03-04 23:11:11] (step=0061156) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.9655644687928, LR: 0.0003 +[2026-03-04 23:11:18] (step=0061157) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 11.965760125220113, LR: 0.0003 +[2026-03-04 23:11:26] (step=0061158) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 11.965955781647427, LR: 0.0003 +[2026-03-04 23:11:34] (step=0061159) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.966151438074741, LR: 0.0003 +[2026-03-04 23:11:42] (step=0061160) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.966347094502055, LR: 0.0003 +[2026-03-04 23:11:50] (step=0061161) Train Loss: 0.4471, Train Steps/Sec: 0.12, Epoch: 11.966542750929367, LR: 0.0003 +[2026-03-04 23:11:58] (step=0061162) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.966738407356681, LR: 0.0003 +[2026-03-04 23:12:06] (step=0061163) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.966934063783995, LR: 0.0003 +[2026-03-04 23:12:14] (step=0061164) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.96712972021131, LR: 0.0003 +[2026-03-04 23:12:22] (step=0061165) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 11.967325376638623, LR: 0.0003 +[2026-03-04 23:12:29] (step=0061166) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 11.967521033065935, LR: 0.0003 +[2026-03-04 23:12:37] (step=0061167) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.96771668949325, LR: 0.0003 +[2026-03-04 23:12:45] (step=0061168) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.967912345920563, LR: 0.0003 +[2026-03-04 23:12:53] (step=0061169) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 11.968108002347877, LR: 0.0003 +[2026-03-04 23:13:01] (step=0061170) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 11.968303658775191, LR: 0.0003 +[2026-03-04 23:13:09] (step=0061171) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 11.968499315202504, LR: 0.0003 +[2026-03-04 23:13:16] (step=0061172) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.968694971629818, LR: 0.0003 +[2026-03-04 23:13:24] (step=0061173) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 11.968890628057132, LR: 0.0003 +[2026-03-04 23:13:32] (step=0061174) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.969086284484446, LR: 0.0003 +[2026-03-04 23:13:40] (step=0061175) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 11.96928194091176, LR: 0.0003 +[2026-03-04 23:13:48] (step=0061176) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 11.969477597339072, LR: 0.0003 +[2026-03-04 23:13:56] (step=0061177) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 11.969673253766386, LR: 0.0003 +[2026-03-04 23:14:04] (step=0061178) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 11.9698689101937, LR: 0.0003 +[2026-03-04 23:14:11] (step=0061179) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.970064566621014, LR: 0.0003 +[2026-03-04 23:14:19] (step=0061180) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.970260223048328, LR: 0.0003 +[2026-03-04 23:14:27] (step=0061181) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.97045587947564, LR: 0.0003 +[2026-03-04 23:14:35] (step=0061182) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.970651535902954, LR: 0.0003 +[2026-03-04 23:14:43] (step=0061183) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 11.970847192330268, LR: 0.0003 +[2026-03-04 23:14:51] (step=0061184) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.971042848757582, LR: 0.0003 +[2026-03-04 23:14:59] (step=0061185) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 11.971238505184896, LR: 0.0003 +[2026-03-04 23:15:06] (step=0061186) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 11.971434161612208, LR: 0.0003 +[2026-03-04 23:15:14] (step=0061187) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.971629818039522, LR: 0.0003 +[2026-03-04 23:15:22] (step=0061188) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.971825474466836, LR: 0.0003 +[2026-03-04 23:15:30] (step=0061189) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.97202113089415, LR: 0.0003 +[2026-03-04 23:15:38] (step=0061190) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.972216787321463, LR: 0.0003 +[2026-03-04 23:15:46] (step=0061191) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 11.972412443748777, LR: 0.0003 +[2026-03-04 23:15:54] (step=0061192) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.97260810017609, LR: 0.0003 +[2026-03-04 23:16:01] (step=0061193) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 11.972803756603405, LR: 0.0003 +[2026-03-04 23:16:09] (step=0061194) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 11.972999413030719, LR: 0.0003 +[2026-03-04 23:16:17] (step=0061195) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 11.973195069458031, LR: 0.0003 +[2026-03-04 23:16:25] (step=0061196) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.973390725885345, LR: 0.0003 +[2026-03-04 23:16:33] (step=0061197) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 11.973586382312659, LR: 0.0003 +[2026-03-04 23:16:41] (step=0061198) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 11.973782038739973, LR: 0.0003 +[2026-03-04 23:16:49] (step=0061199) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 11.973977695167287, LR: 0.0003 +[2026-03-04 23:16:56] (step=0061200) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 11.9741733515946, LR: 0.0003 +[2026-03-04 23:17:04] (step=0061201) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.974369008021913, LR: 0.0003 +[2026-03-04 23:17:12] (step=0061202) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.974564664449227, LR: 0.0003 +[2026-03-04 23:17:20] (step=0061203) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.974760320876541, LR: 0.0003 +[2026-03-04 23:17:28] (step=0061204) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 11.974955977303855, LR: 0.0003 +[2026-03-04 23:17:36] (step=0061205) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.975151633731167, LR: 0.0003 +[2026-03-04 23:17:44] (step=0061206) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 11.975347290158481, LR: 0.0003 +[2026-03-04 23:17:51] (step=0061207) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.975542946585795, LR: 0.0003 +[2026-03-04 23:17:59] (step=0061208) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 11.97573860301311, LR: 0.0003 +[2026-03-04 23:18:07] (step=0061209) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 11.975934259440423, LR: 0.0003 +[2026-03-04 23:18:15] (step=0061210) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.976129915867736, LR: 0.0003 +[2026-03-04 23:18:23] (step=0061211) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.97632557229505, LR: 0.0003 +[2026-03-04 23:18:31] (step=0061212) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.976521228722364, LR: 0.0003 +[2026-03-04 23:18:39] (step=0061213) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.976716885149678, LR: 0.0003 +[2026-03-04 23:18:47] (step=0061214) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 11.97691254157699, LR: 0.0003 +[2026-03-04 23:18:54] (step=0061215) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 11.977108198004304, LR: 0.0003 +[2026-03-04 23:19:02] (step=0061216) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 11.977303854431618, LR: 0.0003 +[2026-03-04 23:19:10] (step=0061217) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 11.977499510858932, LR: 0.0003 +[2026-03-04 23:19:18] (step=0061218) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.977695167286246, LR: 0.0003 +[2026-03-04 23:19:26] (step=0061219) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 11.977890823713558, LR: 0.0003 +[2026-03-04 23:19:34] (step=0061220) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.978086480140872, LR: 0.0003 +[2026-03-04 23:19:42] (step=0061221) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 11.978282136568186, LR: 0.0003 +[2026-03-04 23:19:49] (step=0061222) Train Loss: 0.4249, Train Steps/Sec: 0.13, Epoch: 11.9784777929955, LR: 0.0003 +[2026-03-04 23:19:57] (step=0061223) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 11.978673449422814, LR: 0.0003 +[2026-03-04 23:20:05] (step=0061224) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 11.978869105850126, LR: 0.0003 +[2026-03-04 23:20:13] (step=0061225) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 11.97906476227744, LR: 0.0003 +[2026-03-04 23:20:21] (step=0061226) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 11.979260418704754, LR: 0.0003 +[2026-03-04 23:20:29] (step=0061227) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 11.979456075132068, LR: 0.0003 +[2026-03-04 23:20:36] (step=0061228) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 11.979651731559382, LR: 0.0003 +[2026-03-04 23:20:44] (step=0061229) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 11.979847387986695, LR: 0.0003 +[2026-03-04 23:20:52] (step=0061230) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 11.980043044414009, LR: 0.0003 +[2026-03-04 23:21:00] (step=0061231) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 11.980238700841323, LR: 0.0003 +[2026-03-04 23:21:08] (step=0061232) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 11.980434357268637, LR: 0.0003 +[2026-03-04 23:21:16] (step=0061233) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 11.98063001369595, LR: 0.0003 +[2026-03-04 23:21:24] (step=0061234) Train Loss: 0.4598, Train Steps/Sec: 0.13, Epoch: 11.980825670123263, LR: 0.0003 +[2026-03-04 23:21:31] (step=0061235) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.981021326550577, LR: 0.0003 +[2026-03-04 23:21:39] (step=0061236) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 11.981216982977891, LR: 0.0003 +[2026-03-04 23:21:47] (step=0061237) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.981412639405205, LR: 0.0003 +[2026-03-04 23:21:55] (step=0061238) Train Loss: 0.4462, Train Steps/Sec: 0.12, Epoch: 11.981608295832519, LR: 0.0003 +[2026-03-04 23:22:03] (step=0061239) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.981803952259831, LR: 0.0003 +[2026-03-04 23:22:11] (step=0061240) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.981999608687145, LR: 0.0003 +[2026-03-04 23:22:19] (step=0061241) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.98219526511446, LR: 0.0003 +[2026-03-04 23:22:27] (step=0061242) Train Loss: 0.4253, Train Steps/Sec: 0.13, Epoch: 11.982390921541773, LR: 0.0003 +[2026-03-04 23:22:34] (step=0061243) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 11.982586577969085, LR: 0.0003 +[2026-03-04 23:22:42] (step=0061244) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 11.9827822343964, LR: 0.0003 +[2026-03-04 23:22:50] (step=0061245) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 11.982977890823713, LR: 0.0003 +[2026-03-04 23:22:58] (step=0061246) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 11.983173547251027, LR: 0.0003 +[2026-03-04 23:23:06] (step=0061247) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 11.983369203678341, LR: 0.0003 +[2026-03-04 23:23:14] (step=0061248) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 11.983564860105654, LR: 0.0003 +[2026-03-04 23:23:22] (step=0061249) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.983760516532968, LR: 0.0003 +[2026-03-04 23:23:29] (step=0061250) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.983956172960282, LR: 0.0003 +[2026-03-04 23:23:37] (step=0061251) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 11.984151829387596, LR: 0.0003 +[2026-03-04 23:23:45] (step=0061252) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.98434748581491, LR: 0.0003 +[2026-03-04 23:23:53] (step=0061253) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 11.984543142242222, LR: 0.0003 +[2026-03-04 23:24:01] (step=0061254) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 11.984738798669536, LR: 0.0003 +[2026-03-04 23:24:09] (step=0061255) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.98493445509685, LR: 0.0003 +[2026-03-04 23:24:17] (step=0061256) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 11.985130111524164, LR: 0.0003 +[2026-03-04 23:24:24] (step=0061257) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 11.985325767951478, LR: 0.0003 +[2026-03-04 23:24:32] (step=0061258) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 11.98552142437879, LR: 0.0003 +[2026-03-04 23:24:40] (step=0061259) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.985717080806104, LR: 0.0003 +[2026-03-04 23:24:48] (step=0061260) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 11.985912737233418, LR: 0.0003 +[2026-03-04 23:24:56] (step=0061261) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.986108393660732, LR: 0.0003 +[2026-03-04 23:25:04] (step=0061262) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 11.986304050088046, LR: 0.0003 +[2026-03-04 23:25:12] (step=0061263) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 11.986499706515358, LR: 0.0003 +[2026-03-04 23:25:19] (step=0061264) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 11.986695362942672, LR: 0.0003 +[2026-03-04 23:25:27] (step=0061265) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 11.986891019369986, LR: 0.0003 +[2026-03-04 23:25:35] (step=0061266) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 11.9870866757973, LR: 0.0003 +[2026-03-04 23:25:43] (step=0061267) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 11.987282332224613, LR: 0.0003 +[2026-03-04 23:25:51] (step=0061268) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 11.987477988651927, LR: 0.0003 +[2026-03-04 23:25:59] (step=0061269) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.98767364507924, LR: 0.0003 +[2026-03-04 23:26:07] (step=0061270) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.987869301506555, LR: 0.0003 +[2026-03-04 23:26:14] (step=0061271) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 11.988064957933869, LR: 0.0003 +[2026-03-04 23:26:22] (step=0061272) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 11.988260614361181, LR: 0.0003 +[2026-03-04 23:26:30] (step=0061273) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 11.988456270788495, LR: 0.0003 +[2026-03-04 23:26:38] (step=0061274) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 11.988651927215809, LR: 0.0003 +[2026-03-04 23:26:46] (step=0061275) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 11.988847583643123, LR: 0.0003 +[2026-03-04 23:26:54] (step=0061276) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 11.989043240070437, LR: 0.0003 +[2026-03-04 23:27:02] (step=0061277) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 11.98923889649775, LR: 0.0003 +[2026-03-04 23:27:09] (step=0061278) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.989434552925063, LR: 0.0003 +[2026-03-04 23:27:17] (step=0061279) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 11.989630209352377, LR: 0.0003 +[2026-03-04 23:27:25] (step=0061280) Train Loss: 0.4246, Train Steps/Sec: 0.13, Epoch: 11.989825865779691, LR: 0.0003 +[2026-03-04 23:27:33] (step=0061281) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 11.990021522207005, LR: 0.0003 +[2026-03-04 23:27:41] (step=0061282) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.990217178634317, LR: 0.0003 +[2026-03-04 23:27:49] (step=0061283) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 11.990412835061631, LR: 0.0003 +[2026-03-04 23:27:57] (step=0061284) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 11.990608491488945, LR: 0.0003 +[2026-03-04 23:28:05] (step=0061285) Train Loss: 0.4511, Train Steps/Sec: 0.12, Epoch: 11.99080414791626, LR: 0.0003 +[2026-03-04 23:28:12] (step=0061286) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.990999804343573, LR: 0.0003 +[2026-03-04 23:28:20] (step=0061287) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 11.991195460770886, LR: 0.0003 +[2026-03-04 23:28:28] (step=0061288) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.9913911171982, LR: 0.0003 +[2026-03-04 23:28:36] (step=0061289) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.991586773625514, LR: 0.0003 +[2026-03-04 23:28:44] (step=0061290) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 11.991782430052828, LR: 0.0003 +[2026-03-04 23:28:52] (step=0061291) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 11.991978086480142, LR: 0.0003 +[2026-03-04 23:29:00] (step=0061292) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 11.992173742907454, LR: 0.0003 +[2026-03-04 23:29:07] (step=0061293) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 11.992369399334768, LR: 0.0003 +[2026-03-04 23:29:15] (step=0061294) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 11.992565055762082, LR: 0.0003 +[2026-03-04 23:29:23] (step=0061295) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 11.992760712189396, LR: 0.0003 +[2026-03-04 23:29:31] (step=0061296) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 11.992956368616708, LR: 0.0003 +[2026-03-04 23:29:39] (step=0061297) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 11.993152025044022, LR: 0.0003 +[2026-03-04 23:29:47] (step=0061298) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 11.993347681471336, LR: 0.0003 +[2026-03-04 23:29:55] (step=0061299) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 11.99354333789865, LR: 0.0003 +[2026-03-04 23:30:02] (step=0061300) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.993738994325964, LR: 0.0003 +[2026-03-04 23:30:10] (step=0061301) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 11.993934650753276, LR: 0.0003 +[2026-03-04 23:30:18] (step=0061302) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.99413030718059, LR: 0.0003 +[2026-03-04 23:30:26] (step=0061303) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 11.994325963607904, LR: 0.0003 +[2026-03-04 23:30:34] (step=0061304) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 11.994521620035218, LR: 0.0003 +[2026-03-04 23:30:42] (step=0061305) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.994717276462532, LR: 0.0003 +[2026-03-04 23:30:50] (step=0061306) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 11.994912932889845, LR: 0.0003 +[2026-03-04 23:30:58] (step=0061307) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 11.995108589317159, LR: 0.0003 +[2026-03-04 23:31:05] (step=0061308) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 11.995304245744473, LR: 0.0003 +[2026-03-04 23:31:13] (step=0061309) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.995499902171787, LR: 0.0003 +[2026-03-04 23:31:21] (step=0061310) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 11.9956955585991, LR: 0.0003 +[2026-03-04 23:31:29] (step=0061311) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 11.995891215026413, LR: 0.0003 +[2026-03-04 23:31:37] (step=0061312) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 11.996086871453727, LR: 0.0003 +[2026-03-04 23:31:45] (step=0061313) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 11.996282527881041, LR: 0.0003 +[2026-03-04 23:31:52] (step=0061314) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 11.996478184308355, LR: 0.0003 +[2026-03-04 23:32:00] (step=0061315) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 11.996673840735669, LR: 0.0003 +[2026-03-04 23:32:08] (step=0061316) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 11.996869497162981, LR: 0.0003 +[2026-03-04 23:32:16] (step=0061317) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 11.997065153590295, LR: 0.0003 +[2026-03-04 23:32:24] (step=0061318) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 11.99726081001761, LR: 0.0003 +[2026-03-04 23:32:32] (step=0061319) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 11.997456466444923, LR: 0.0003 +[2026-03-04 23:32:40] (step=0061320) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 11.997652122872235, LR: 0.0003 +[2026-03-04 23:32:47] (step=0061321) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 11.99784777929955, LR: 0.0003 +[2026-03-04 23:32:55] (step=0061322) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 11.998043435726863, LR: 0.0003 +[2026-03-04 23:33:03] (step=0061323) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 11.998239092154177, LR: 0.0003 +[2026-03-04 23:33:11] (step=0061324) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 11.998434748581492, LR: 0.0003 +[2026-03-04 23:33:19] (step=0061325) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 11.998630405008804, LR: 0.0003 +[2026-03-04 23:33:27] (step=0061326) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 11.998826061436118, LR: 0.0003 +[2026-03-04 23:33:35] (step=0061327) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 11.999021717863432, LR: 0.0003 +[2026-03-04 23:33:43] (step=0061328) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 11.999217374290746, LR: 0.0003 +[2026-03-04 23:33:50] (step=0061329) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 11.99941303071806, LR: 0.0003 +[2026-03-04 23:33:58] (step=0061330) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 11.999608687145372, LR: 0.0003 +[2026-03-04 23:34:06] (step=0061331) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 11.999804343572686, LR: 0.0003 +[2026-03-04 23:34:14] (step=0061332) Train Loss: 0.4469, Train Steps/Sec: 0.12, Epoch: 12.0, LR: 0.0003 +[2026-03-04 23:34:14] Beginning epoch 12... +[2026-03-04 23:34:24] (step=0061333) Train Loss: 0.4384, Train Steps/Sec: 0.10, Epoch: 12.000195656427314, LR: 0.0003 +[2026-03-04 23:34:32] (step=0061334) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.000391312854628, LR: 0.0003 +[2026-03-04 23:34:40] (step=0061335) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.00058696928194, LR: 0.0003 +[2026-03-04 23:34:47] (step=0061336) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.000782625709254, LR: 0.0003 +[2026-03-04 23:34:55] (step=0061337) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.000978282136568, LR: 0.0003 +[2026-03-04 23:35:03] (step=0061338) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.001173938563882, LR: 0.0003 +[2026-03-04 23:35:11] (step=0061339) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.001369594991196, LR: 0.0003 +[2026-03-04 23:35:19] (step=0061340) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.001565251418508, LR: 0.0003 +[2026-03-04 23:35:27] (step=0061341) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.001760907845823, LR: 0.0003 +[2026-03-04 23:35:35] (step=0061342) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.001956564273137, LR: 0.0003 +[2026-03-04 23:35:42] (step=0061343) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 12.00215222070045, LR: 0.0003 +[2026-03-04 23:35:50] (step=0061344) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.002347877127765, LR: 0.0003 +[2026-03-04 23:35:58] (step=0061345) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 12.002543533555077, LR: 0.0003 +[2026-03-04 23:36:06] (step=0061346) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 12.00273918998239, LR: 0.0003 +[2026-03-04 23:36:14] (step=0061347) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 12.002934846409705, LR: 0.0003 +[2026-03-04 23:36:22] (step=0061348) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.003130502837019, LR: 0.0003 +[2026-03-04 23:36:30] (step=0061349) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.003326159264331, LR: 0.0003 +[2026-03-04 23:36:37] (step=0061350) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 12.003521815691645, LR: 0.0003 +[2026-03-04 23:36:45] (step=0061351) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.003717472118959, LR: 0.0003 +[2026-03-04 23:36:53] (step=0061352) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.003913128546273, LR: 0.0003 +[2026-03-04 23:37:01] (step=0061353) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.004108784973587, LR: 0.0003 +[2026-03-04 23:37:09] (step=0061354) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 12.0043044414009, LR: 0.0003 +[2026-03-04 23:37:17] (step=0061355) Train Loss: 0.4292, Train Steps/Sec: 0.12, Epoch: 12.004500097828213, LR: 0.0003 +[2026-03-04 23:37:25] (step=0061356) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.004695754255527, LR: 0.0003 +[2026-03-04 23:37:33] (step=0061357) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.004891410682841, LR: 0.0003 +[2026-03-04 23:37:40] (step=0061358) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.005087067110155, LR: 0.0003 +[2026-03-04 23:37:48] (step=0061359) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.005282723537468, LR: 0.0003 +[2026-03-04 23:37:56] (step=0061360) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 12.005478379964782, LR: 0.0003 +[2026-03-04 23:38:04] (step=0061361) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.005674036392096, LR: 0.0003 +[2026-03-04 23:38:12] (step=0061362) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.00586969281941, LR: 0.0003 +[2026-03-04 23:38:20] (step=0061363) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.006065349246724, LR: 0.0003 +[2026-03-04 23:38:28] (step=0061364) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 12.006261005674036, LR: 0.0003 +[2026-03-04 23:38:35] (step=0061365) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.00645666210135, LR: 0.0003 +[2026-03-04 23:38:43] (step=0061366) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.006652318528664, LR: 0.0003 +[2026-03-04 23:38:51] (step=0061367) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.006847974955978, LR: 0.0003 +[2026-03-04 23:38:59] (step=0061368) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 12.007043631383292, LR: 0.0003 +[2026-03-04 23:39:07] (step=0061369) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 12.007239287810604, LR: 0.0003 +[2026-03-04 23:39:15] (step=0061370) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.007434944237918, LR: 0.0003 +[2026-03-04 23:39:23] (step=0061371) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.007630600665232, LR: 0.0003 +[2026-03-04 23:39:30] (step=0061372) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.007826257092546, LR: 0.0003 +[2026-03-04 23:39:38] (step=0061373) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 12.008021913519858, LR: 0.0003 +[2026-03-04 23:39:46] (step=0061374) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 12.008217569947172, LR: 0.0003 +[2026-03-04 23:39:54] (step=0061375) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 12.008413226374486, LR: 0.0003 +[2026-03-04 23:40:02] (step=0061376) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.0086088828018, LR: 0.0003 +[2026-03-04 23:40:10] (step=0061377) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.008804539229114, LR: 0.0003 +[2026-03-04 23:40:18] (step=0061378) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.009000195656427, LR: 0.0003 +[2026-03-04 23:40:26] (step=0061379) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.00919585208374, LR: 0.0003 +[2026-03-04 23:40:33] (step=0061380) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.009391508511055, LR: 0.0003 +[2026-03-04 23:40:41] (step=0061381) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.009587164938369, LR: 0.0003 +[2026-03-04 23:40:49] (step=0061382) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.009782821365683, LR: 0.0003 +[2026-03-04 23:40:57] (step=0061383) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.009978477792995, LR: 0.0003 +[2026-03-04 23:41:05] (step=0061384) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.010174134220309, LR: 0.0003 +[2026-03-04 23:41:13] (step=0061385) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.010369790647623, LR: 0.0003 +[2026-03-04 23:41:21] (step=0061386) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.010565447074937, LR: 0.0003 +[2026-03-04 23:41:28] (step=0061387) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.01076110350225, LR: 0.0003 +[2026-03-04 23:41:36] (step=0061388) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.010956759929563, LR: 0.0003 +[2026-03-04 23:41:44] (step=0061389) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 12.011152416356877, LR: 0.0003 +[2026-03-04 23:41:52] (step=0061390) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.011348072784191, LR: 0.0003 +[2026-03-04 23:42:00] (step=0061391) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.011543729211505, LR: 0.0003 +[2026-03-04 23:42:08] (step=0061392) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.011739385638819, LR: 0.0003 +[2026-03-04 23:42:16] (step=0061393) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.011935042066131, LR: 0.0003 +[2026-03-04 23:42:23] (step=0061394) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.012130698493445, LR: 0.0003 +[2026-03-04 23:42:31] (step=0061395) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.01232635492076, LR: 0.0003 +[2026-03-04 23:42:39] (step=0061396) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.012522011348073, LR: 0.0003 +[2026-03-04 23:42:47] (step=0061397) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.012717667775387, LR: 0.0003 +[2026-03-04 23:42:55] (step=0061398) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.0129133242027, LR: 0.0003 +[2026-03-04 23:43:03] (step=0061399) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.013108980630014, LR: 0.0003 +[2026-03-04 23:43:11] (step=0061400) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 12.013304637057328, LR: 0.0003 +[2026-03-04 23:43:18] (step=0061401) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 12.013500293484642, LR: 0.0003 +[2026-03-04 23:43:26] (step=0061402) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.013695949911954, LR: 0.0003 +[2026-03-04 23:43:34] (step=0061403) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.013891606339268, LR: 0.0003 +[2026-03-04 23:43:42] (step=0061404) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.014087262766582, LR: 0.0003 +[2026-03-04 23:43:50] (step=0061405) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.014282919193896, LR: 0.0003 +[2026-03-04 23:43:58] (step=0061406) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.01447857562121, LR: 0.0003 +[2026-03-04 23:44:06] (step=0061407) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 12.014674232048522, LR: 0.0003 +[2026-03-04 23:44:14] (step=0061408) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.014869888475836, LR: 0.0003 +[2026-03-04 23:44:21] (step=0061409) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.01506554490315, LR: 0.0003 +[2026-03-04 23:44:29] (step=0061410) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.015261201330464, LR: 0.0003 +[2026-03-04 23:44:37] (step=0061411) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 12.015456857757778, LR: 0.0003 +[2026-03-04 23:44:45] (step=0061412) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 12.01565251418509, LR: 0.0003 +[2026-03-04 23:44:53] (step=0061413) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.015848170612404, LR: 0.0003 +[2026-03-04 23:45:01] (step=0061414) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.016043827039718, LR: 0.0003 +[2026-03-04 23:45:09] (step=0061415) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.016239483467032, LR: 0.0003 +[2026-03-04 23:45:16] (step=0061416) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.016435139894346, LR: 0.0003 +[2026-03-04 23:45:24] (step=0061417) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.016630796321659, LR: 0.0003 +[2026-03-04 23:45:32] (step=0061418) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 12.016826452748973, LR: 0.0003 +[2026-03-04 23:45:40] (step=0061419) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.017022109176287, LR: 0.0003 +[2026-03-04 23:45:48] (step=0061420) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.0172177656036, LR: 0.0003 +[2026-03-04 23:45:56] (step=0061421) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.017413422030915, LR: 0.0003 +[2026-03-04 23:46:04] (step=0061422) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.017609078458227, LR: 0.0003 +[2026-03-04 23:46:11] (step=0061423) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 12.01780473488554, LR: 0.0003 +[2026-03-04 23:46:19] (step=0061424) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.018000391312855, LR: 0.0003 +[2026-03-04 23:46:27] (step=0061425) Train Loss: 0.4252, Train Steps/Sec: 0.13, Epoch: 12.018196047740169, LR: 0.0003 +[2026-03-04 23:46:35] (step=0061426) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.018391704167481, LR: 0.0003 +[2026-03-04 23:46:43] (step=0061427) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.018587360594795, LR: 0.0003 +[2026-03-04 23:46:51] (step=0061428) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.018783017022109, LR: 0.0003 +[2026-03-04 23:46:58] (step=0061429) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.018978673449423, LR: 0.0003 +[2026-03-04 23:47:06] (step=0061430) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.019174329876737, LR: 0.0003 +[2026-03-04 23:47:14] (step=0061431) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.01936998630405, LR: 0.0003 +[2026-03-04 23:47:22] (step=0061432) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.019565642731363, LR: 0.0003 +[2026-03-04 23:47:30] (step=0061433) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.019761299158677, LR: 0.0003 +[2026-03-04 23:47:38] (step=0061434) Train Loss: 0.4553, Train Steps/Sec: 0.12, Epoch: 12.019956955585991, LR: 0.0003 +[2026-03-04 23:47:46] (step=0061435) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.020152612013305, LR: 0.0003 +[2026-03-04 23:47:54] (step=0061436) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.020348268440618, LR: 0.0003 +[2026-03-04 23:48:02] (step=0061437) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.020543924867932, LR: 0.0003 +[2026-03-04 23:48:09] (step=0061438) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.020739581295246, LR: 0.0003 +[2026-03-04 23:48:17] (step=0061439) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.02093523772256, LR: 0.0003 +[2026-03-04 23:48:25] (step=0061440) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.021130894149874, LR: 0.0003 +[2026-03-04 23:48:33] (step=0061441) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.021326550577186, LR: 0.0003 +[2026-03-04 23:48:41] (step=0061442) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.0215222070045, LR: 0.0003 +[2026-03-04 23:48:49] (step=0061443) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.021717863431814, LR: 0.0003 +[2026-03-04 23:48:57] (step=0061444) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.021913519859128, LR: 0.0003 +[2026-03-04 23:49:04] (step=0061445) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.022109176286442, LR: 0.0003 +[2026-03-04 23:49:12] (step=0061446) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.022304832713754, LR: 0.0003 +[2026-03-04 23:49:20] (step=0061447) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.022500489141068, LR: 0.0003 +[2026-03-04 23:49:28] (step=0061448) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 12.022696145568382, LR: 0.0003 +[2026-03-04 23:49:36] (step=0061449) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.022891801995696, LR: 0.0003 +[2026-03-04 23:49:44] (step=0061450) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.02308745842301, LR: 0.0003 +[2026-03-04 23:49:52] (step=0061451) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 12.023283114850322, LR: 0.0003 +[2026-03-04 23:49:59] (step=0061452) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.023478771277636, LR: 0.0003 +[2026-03-04 23:50:07] (step=0061453) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.02367442770495, LR: 0.0003 +[2026-03-04 23:50:15] (step=0061454) Train Loss: 0.4270, Train Steps/Sec: 0.13, Epoch: 12.023870084132264, LR: 0.0003 +[2026-03-04 23:50:23] (step=0061455) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.024065740559577, LR: 0.0003 +[2026-03-04 23:50:31] (step=0061456) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.02426139698689, LR: 0.0003 +[2026-03-04 23:50:39] (step=0061457) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.024457053414205, LR: 0.0003 +[2026-03-04 23:50:47] (step=0061458) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.024652709841519, LR: 0.0003 +[2026-03-04 23:50:54] (step=0061459) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 12.024848366268833, LR: 0.0003 +[2026-03-04 23:51:02] (step=0061460) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.025044022696145, LR: 0.0003 +[2026-03-04 23:51:10] (step=0061461) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 12.025239679123459, LR: 0.0003 +[2026-03-04 23:51:18] (step=0061462) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.025435335550773, LR: 0.0003 +[2026-03-04 23:51:26] (step=0061463) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.025630991978087, LR: 0.0003 +[2026-03-04 23:51:34] (step=0061464) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.0258266484054, LR: 0.0003 +[2026-03-04 23:51:42] (step=0061465) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 12.026022304832713, LR: 0.0003 +[2026-03-04 23:51:49] (step=0061466) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.026217961260027, LR: 0.0003 +[2026-03-04 23:51:57] (step=0061467) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 12.026413617687341, LR: 0.0003 +[2026-03-04 23:52:05] (step=0061468) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.026609274114655, LR: 0.0003 +[2026-03-04 23:52:13] (step=0061469) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.026804930541969, LR: 0.0003 +[2026-03-04 23:52:21] (step=0061470) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.027000586969281, LR: 0.0003 +[2026-03-04 23:52:29] (step=0061471) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 12.027196243396595, LR: 0.0003 +[2026-03-04 23:52:36] (step=0061472) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.02739189982391, LR: 0.0003 +[2026-03-04 23:52:44] (step=0061473) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 12.027587556251223, LR: 0.0003 +[2026-03-04 23:52:52] (step=0061474) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.027783212678537, LR: 0.0003 +[2026-03-04 23:53:00] (step=0061475) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.02797886910585, LR: 0.0003 +[2026-03-04 23:53:08] (step=0061476) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 12.028174525533164, LR: 0.0003 +[2026-03-04 23:53:16] (step=0061477) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.028370181960478, LR: 0.0003 +[2026-03-04 23:53:24] (step=0061478) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.028565838387792, LR: 0.0003 +[2026-03-04 23:53:31] (step=0061479) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.028761494815104, LR: 0.0003 +[2026-03-04 23:53:39] (step=0061480) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.028957151242418, LR: 0.0003 +[2026-03-04 23:53:47] (step=0061481) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.029152807669732, LR: 0.0003 +[2026-03-04 23:53:55] (step=0061482) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.029348464097046, LR: 0.0003 +[2026-03-04 23:54:03] (step=0061483) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.02954412052436, LR: 0.0003 +[2026-03-04 23:54:11] (step=0061484) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.029739776951672, LR: 0.0003 +[2026-03-04 23:54:19] (step=0061485) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.029935433378986, LR: 0.0003 +[2026-03-04 23:54:27] (step=0061486) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.0301310898063, LR: 0.0003 +[2026-03-04 23:54:34] (step=0061487) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 12.030326746233614, LR: 0.0003 +[2026-03-04 23:54:42] (step=0061488) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.030522402660928, LR: 0.0003 +[2026-03-04 23:54:50] (step=0061489) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.03071805908824, LR: 0.0003 +[2026-03-04 23:54:58] (step=0061490) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.030913715515554, LR: 0.0003 +[2026-03-04 23:55:06] (step=0061491) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.031109371942868, LR: 0.0003 +[2026-03-04 23:55:14] (step=0061492) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.031305028370182, LR: 0.0003 +[2026-03-04 23:55:22] (step=0061493) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 12.031500684797496, LR: 0.0003 +[2026-03-04 23:55:30] (step=0061494) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.031696341224809, LR: 0.0003 +[2026-03-04 23:55:37] (step=0061495) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 12.031891997652123, LR: 0.0003 +[2026-03-04 23:55:45] (step=0061496) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.032087654079437, LR: 0.0003 +[2026-03-04 23:55:53] (step=0061497) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.03228331050675, LR: 0.0003 +[2026-03-04 23:56:01] (step=0061498) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.032478966934065, LR: 0.0003 +[2026-03-04 23:56:09] (step=0061499) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.032674623361377, LR: 0.0003 +[2026-03-04 23:56:17] (step=0061500) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.03287027978869, LR: 0.0003 +[2026-03-04 23:56:17] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0061500/ +[2026-03-04 23:56:25] (step=0061501) Train Loss: 0.4255, Train Steps/Sec: 0.13, Epoch: 12.033065936216005, LR: 0.0003 +[2026-03-04 23:56:33] (step=0061502) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 12.033261592643319, LR: 0.0003 +[2026-03-04 23:56:40] (step=0061503) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.033457249070633, LR: 0.0003 +[2026-03-04 23:56:48] (step=0061504) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.033652905497945, LR: 0.0003 +[2026-03-04 23:56:56] (step=0061505) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.033848561925259, LR: 0.0003 +[2026-03-04 23:57:04] (step=0061506) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.034044218352573, LR: 0.0003 +[2026-03-04 23:57:12] (step=0061507) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.034239874779887, LR: 0.0003 +[2026-03-04 23:57:20] (step=0061508) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.0344355312072, LR: 0.0003 +[2026-03-04 23:57:28] (step=0061509) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 12.034631187634513, LR: 0.0003 +[2026-03-04 23:57:36] (step=0061510) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.034826844061827, LR: 0.0003 +[2026-03-04 23:57:43] (step=0061511) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.035022500489141, LR: 0.0003 +[2026-03-04 23:57:51] (step=0061512) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.035218156916455, LR: 0.0003 +[2026-03-04 23:57:59] (step=0061513) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.035413813343768, LR: 0.0003 +[2026-03-04 23:58:07] (step=0061514) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.035609469771082, LR: 0.0003 +[2026-03-04 23:58:15] (step=0061515) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.035805126198396, LR: 0.0003 +[2026-03-04 23:58:23] (step=0061516) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.03600078262571, LR: 0.0003 +[2026-03-04 23:58:31] (step=0061517) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 12.036196439053024, LR: 0.0003 +[2026-03-04 23:58:39] (step=0061518) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.036392095480336, LR: 0.0003 +[2026-03-04 23:58:46] (step=0061519) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.03658775190765, LR: 0.0003 +[2026-03-04 23:58:54] (step=0061520) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.036783408334964, LR: 0.0003 +[2026-03-04 23:59:02] (step=0061521) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 12.036979064762278, LR: 0.0003 +[2026-03-04 23:59:10] (step=0061522) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 12.037174721189592, LR: 0.0003 +[2026-03-04 23:59:18] (step=0061523) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.037370377616904, LR: 0.0003 +[2026-03-04 23:59:26] (step=0061524) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.037566034044218, LR: 0.0003 +[2026-03-04 23:59:34] (step=0061525) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 12.037761690471532, LR: 0.0003 +[2026-03-04 23:59:41] (step=0061526) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.037957346898846, LR: 0.0003 +[2026-03-04 23:59:49] (step=0061527) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.03815300332616, LR: 0.0003 +[2026-03-04 23:59:57] (step=0061528) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.038348659753472, LR: 0.0003 +[2026-03-05 00:00:05] (step=0061529) Train Loss: 0.4470, Train Steps/Sec: 0.12, Epoch: 12.038544316180786, LR: 0.0003 +[2026-03-05 00:00:13] (step=0061530) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.0387399726081, LR: 0.0003 +[2026-03-05 00:00:21] (step=0061531) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 12.038935629035414, LR: 0.0003 +[2026-03-05 00:00:29] (step=0061532) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.039131285462727, LR: 0.0003 +[2026-03-05 00:00:37] (step=0061533) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.03932694189004, LR: 0.0003 +[2026-03-05 00:00:45] (step=0061534) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.039522598317355, LR: 0.0003 +[2026-03-05 00:00:52] (step=0061535) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.039718254744669, LR: 0.0003 +[2026-03-05 00:01:00] (step=0061536) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 12.039913911171983, LR: 0.0003 +[2026-03-05 00:01:08] (step=0061537) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.040109567599295, LR: 0.0003 +[2026-03-05 00:01:16] (step=0061538) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.040305224026609, LR: 0.0003 +[2026-03-05 00:01:24] (step=0061539) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.040500880453923, LR: 0.0003 +[2026-03-05 00:01:32] (step=0061540) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 12.040696536881237, LR: 0.0003 +[2026-03-05 00:01:40] (step=0061541) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 12.04089219330855, LR: 0.0003 +[2026-03-05 00:01:47] (step=0061542) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.041087849735863, LR: 0.0003 +[2026-03-05 00:01:55] (step=0061543) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.041283506163177, LR: 0.0003 +[2026-03-05 00:02:03] (step=0061544) Train Loss: 0.4265, Train Steps/Sec: 0.13, Epoch: 12.041479162590491, LR: 0.0003 +[2026-03-05 00:02:11] (step=0061545) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.041674819017805, LR: 0.0003 +[2026-03-05 00:02:19] (step=0061546) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.04187047544512, LR: 0.0003 +[2026-03-05 00:02:27] (step=0061547) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.042066131872431, LR: 0.0003 +[2026-03-05 00:02:35] (step=0061548) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.042261788299745, LR: 0.0003 +[2026-03-05 00:02:42] (step=0061549) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.04245744472706, LR: 0.0003 +[2026-03-05 00:02:50] (step=0061550) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 12.042653101154373, LR: 0.0003 +[2026-03-05 00:02:58] (step=0061551) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.042848757581687, LR: 0.0003 +[2026-03-05 00:03:06] (step=0061552) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 12.043044414009, LR: 0.0003 +[2026-03-05 00:03:14] (step=0061553) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.043240070436314, LR: 0.0003 +[2026-03-05 00:03:22] (step=0061554) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.043435726863628, LR: 0.0003 +[2026-03-05 00:03:30] (step=0061555) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.043631383290942, LR: 0.0003 +[2026-03-05 00:03:38] (step=0061556) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.043827039718254, LR: 0.0003 +[2026-03-05 00:03:45] (step=0061557) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.044022696145568, LR: 0.0003 +[2026-03-05 00:03:53] (step=0061558) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.044218352572882, LR: 0.0003 +[2026-03-05 00:04:01] (step=0061559) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.044414009000196, LR: 0.0003 +[2026-03-05 00:04:09] (step=0061560) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 12.04460966542751, LR: 0.0003 +[2026-03-05 00:04:17] (step=0061561) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.044805321854822, LR: 0.0003 +[2026-03-05 00:04:25] (step=0061562) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.045000978282136, LR: 0.0003 +[2026-03-05 00:04:33] (step=0061563) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 12.04519663470945, LR: 0.0003 +[2026-03-05 00:04:40] (step=0061564) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.045392291136764, LR: 0.0003 +[2026-03-05 00:04:48] (step=0061565) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.045587947564078, LR: 0.0003 +[2026-03-05 00:04:56] (step=0061566) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.04578360399139, LR: 0.0003 +[2026-03-05 00:05:04] (step=0061567) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.045979260418704, LR: 0.0003 +[2026-03-05 00:05:12] (step=0061568) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.046174916846018, LR: 0.0003 +[2026-03-05 00:05:20] (step=0061569) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.046370573273332, LR: 0.0003 +[2026-03-05 00:05:28] (step=0061570) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.046566229700646, LR: 0.0003 +[2026-03-05 00:05:35] (step=0061571) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.046761886127959, LR: 0.0003 +[2026-03-05 00:05:43] (step=0061572) Train Loss: 0.4621, Train Steps/Sec: 0.13, Epoch: 12.046957542555273, LR: 0.0003 +[2026-03-05 00:05:51] (step=0061573) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.047153198982587, LR: 0.0003 +[2026-03-05 00:05:59] (step=0061574) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.0473488554099, LR: 0.0003 +[2026-03-05 00:06:07] (step=0061575) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 12.047544511837215, LR: 0.0003 +[2026-03-05 00:06:15] (step=0061576) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.047740168264527, LR: 0.0003 +[2026-03-05 00:06:23] (step=0061577) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.04793582469184, LR: 0.0003 +[2026-03-05 00:06:31] (step=0061578) Train Loss: 0.4554, Train Steps/Sec: 0.12, Epoch: 12.048131481119155, LR: 0.0003 +[2026-03-05 00:06:38] (step=0061579) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.048327137546469, LR: 0.0003 +[2026-03-05 00:06:46] (step=0061580) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.048522793973783, LR: 0.0003 +[2026-03-05 00:06:54] (step=0061581) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.048718450401095, LR: 0.0003 +[2026-03-05 00:07:02] (step=0061582) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.04891410682841, LR: 0.0003 +[2026-03-05 00:07:10] (step=0061583) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 12.049109763255723, LR: 0.0003 +[2026-03-05 00:07:18] (step=0061584) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 12.049305419683037, LR: 0.0003 +[2026-03-05 00:07:26] (step=0061585) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.04950107611035, LR: 0.0003 +[2026-03-05 00:07:34] (step=0061586) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.049696732537663, LR: 0.0003 +[2026-03-05 00:07:41] (step=0061587) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 12.049892388964977, LR: 0.0003 +[2026-03-05 00:07:49] (step=0061588) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.050088045392291, LR: 0.0003 +[2026-03-05 00:07:57] (step=0061589) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.050283701819605, LR: 0.0003 +[2026-03-05 00:08:05] (step=0061590) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.050479358246918, LR: 0.0003 +[2026-03-05 00:08:13] (step=0061591) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.050675014674232, LR: 0.0003 +[2026-03-05 00:08:21] (step=0061592) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.050870671101546, LR: 0.0003 +[2026-03-05 00:08:29] (step=0061593) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.05106632752886, LR: 0.0003 +[2026-03-05 00:08:36] (step=0061594) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.051261983956174, LR: 0.0003 +[2026-03-05 00:08:44] (step=0061595) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.051457640383486, LR: 0.0003 +[2026-03-05 00:08:52] (step=0061596) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.0516532968108, LR: 0.0003 +[2026-03-05 00:09:00] (step=0061597) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 12.051848953238114, LR: 0.0003 +[2026-03-05 00:09:08] (step=0061598) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.052044609665428, LR: 0.0003 +[2026-03-05 00:09:16] (step=0061599) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.052240266092742, LR: 0.0003 +[2026-03-05 00:09:24] (step=0061600) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.052435922520054, LR: 0.0003 +[2026-03-05 00:09:31] (step=0061601) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 12.052631578947368, LR: 0.0003 +[2026-03-05 00:09:39] (step=0061602) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.052827235374682, LR: 0.0003 +[2026-03-05 00:09:47] (step=0061603) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.053022891801996, LR: 0.0003 +[2026-03-05 00:09:55] (step=0061604) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.05321854822931, LR: 0.0003 +[2026-03-05 00:10:03] (step=0061605) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.053414204656622, LR: 0.0003 +[2026-03-05 00:10:11] (step=0061606) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.053609861083936, LR: 0.0003 +[2026-03-05 00:10:19] (step=0061607) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.05380551751125, LR: 0.0003 +[2026-03-05 00:10:27] (step=0061608) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.054001173938564, LR: 0.0003 +[2026-03-05 00:10:34] (step=0061609) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.054196830365877, LR: 0.0003 +[2026-03-05 00:10:42] (step=0061610) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.05439248679319, LR: 0.0003 +[2026-03-05 00:10:50] (step=0061611) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.054588143220505, LR: 0.0003 +[2026-03-05 00:10:58] (step=0061612) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.054783799647819, LR: 0.0003 +[2026-03-05 00:11:06] (step=0061613) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.054979456075133, LR: 0.0003 +[2026-03-05 00:11:14] (step=0061614) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 12.055175112502445, LR: 0.0003 +[2026-03-05 00:11:22] (step=0061615) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 12.055370768929759, LR: 0.0003 +[2026-03-05 00:11:30] (step=0061616) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 12.055566425357073, LR: 0.0003 +[2026-03-05 00:11:37] (step=0061617) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.055762081784387, LR: 0.0003 +[2026-03-05 00:11:45] (step=0061618) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.055957738211701, LR: 0.0003 +[2026-03-05 00:11:53] (step=0061619) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.056153394639013, LR: 0.0003 +[2026-03-05 00:12:01] (step=0061620) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.056349051066327, LR: 0.0003 +[2026-03-05 00:12:09] (step=0061621) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.056544707493641, LR: 0.0003 +[2026-03-05 00:12:17] (step=0061622) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.056740363920955, LR: 0.0003 +[2026-03-05 00:12:25] (step=0061623) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.05693602034827, LR: 0.0003 +[2026-03-05 00:12:32] (step=0061624) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.057131676775581, LR: 0.0003 +[2026-03-05 00:12:40] (step=0061625) Train Loss: 0.4214, Train Steps/Sec: 0.13, Epoch: 12.057327333202895, LR: 0.0003 +[2026-03-05 00:12:48] (step=0061626) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.05752298963021, LR: 0.0003 +[2026-03-05 00:12:56] (step=0061627) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 12.057718646057523, LR: 0.0003 +[2026-03-05 00:13:04] (step=0061628) Train Loss: 0.4571, Train Steps/Sec: 0.12, Epoch: 12.057914302484837, LR: 0.0003 +[2026-03-05 00:13:12] (step=0061629) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.05810995891215, LR: 0.0003 +[2026-03-05 00:13:20] (step=0061630) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 12.058305615339464, LR: 0.0003 +[2026-03-05 00:13:28] (step=0061631) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.058501271766778, LR: 0.0003 +[2026-03-05 00:13:35] (step=0061632) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 12.058696928194092, LR: 0.0003 +[2026-03-05 00:13:43] (step=0061633) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.058892584621406, LR: 0.0003 +[2026-03-05 00:13:51] (step=0061634) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.059088241048718, LR: 0.0003 +[2026-03-05 00:13:59] (step=0061635) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.059283897476032, LR: 0.0003 +[2026-03-05 00:14:07] (step=0061636) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.059479553903346, LR: 0.0003 +[2026-03-05 00:14:15] (step=0061637) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 12.05967521033066, LR: 0.0003 +[2026-03-05 00:14:23] (step=0061638) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.059870866757972, LR: 0.0003 +[2026-03-05 00:14:31] (step=0061639) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 12.060066523185286, LR: 0.0003 +[2026-03-05 00:14:38] (step=0061640) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.0602621796126, LR: 0.0003 +[2026-03-05 00:14:46] (step=0061641) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.060457836039914, LR: 0.0003 +[2026-03-05 00:14:54] (step=0061642) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.060653492467228, LR: 0.0003 +[2026-03-05 00:15:02] (step=0061643) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.06084914889454, LR: 0.0003 +[2026-03-05 00:15:10] (step=0061644) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.061044805321854, LR: 0.0003 +[2026-03-05 00:15:18] (step=0061645) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.061240461749168, LR: 0.0003 +[2026-03-05 00:15:26] (step=0061646) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 12.061436118176482, LR: 0.0003 +[2026-03-05 00:15:33] (step=0061647) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.061631774603796, LR: 0.0003 +[2026-03-05 00:15:41] (step=0061648) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.061827431031109, LR: 0.0003 +[2026-03-05 00:15:49] (step=0061649) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 12.062023087458423, LR: 0.0003 +[2026-03-05 00:15:57] (step=0061650) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 12.062218743885737, LR: 0.0003 +[2026-03-05 00:16:05] (step=0061651) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.06241440031305, LR: 0.0003 +[2026-03-05 00:16:13] (step=0061652) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 12.062610056740365, LR: 0.0003 +[2026-03-05 00:16:21] (step=0061653) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.062805713167677, LR: 0.0003 +[2026-03-05 00:16:29] (step=0061654) Train Loss: 0.4533, Train Steps/Sec: 0.12, Epoch: 12.063001369594991, LR: 0.0003 +[2026-03-05 00:16:37] (step=0061655) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 12.063197026022305, LR: 0.0003 +[2026-03-05 00:16:44] (step=0061656) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.063392682449619, LR: 0.0003 +[2026-03-05 00:16:52] (step=0061657) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.063588338876933, LR: 0.0003 +[2026-03-05 00:17:00] (step=0061658) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.063783995304245, LR: 0.0003 +[2026-03-05 00:17:08] (step=0061659) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.06397965173156, LR: 0.0003 +[2026-03-05 00:17:16] (step=0061660) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.064175308158873, LR: 0.0003 +[2026-03-05 00:17:24] (step=0061661) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.064370964586187, LR: 0.0003 +[2026-03-05 00:17:32] (step=0061662) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.0645666210135, LR: 0.0003 +[2026-03-05 00:17:40] (step=0061663) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.064762277440813, LR: 0.0003 +[2026-03-05 00:17:47] (step=0061664) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 12.064957933868127, LR: 0.0003 +[2026-03-05 00:17:55] (step=0061665) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.065153590295441, LR: 0.0003 +[2026-03-05 00:18:03] (step=0061666) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.065349246722755, LR: 0.0003 +[2026-03-05 00:18:11] (step=0061667) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 12.065544903150068, LR: 0.0003 +[2026-03-05 00:18:19] (step=0061668) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.065740559577382, LR: 0.0003 +[2026-03-05 00:18:27] (step=0061669) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.065936216004696, LR: 0.0003 +[2026-03-05 00:18:35] (step=0061670) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.06613187243201, LR: 0.0003 +[2026-03-05 00:18:42] (step=0061671) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.066327528859324, LR: 0.0003 +[2026-03-05 00:18:50] (step=0061672) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.066523185286636, LR: 0.0003 +[2026-03-05 00:18:58] (step=0061673) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.06671884171395, LR: 0.0003 +[2026-03-05 00:19:06] (step=0061674) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 12.066914498141264, LR: 0.0003 +[2026-03-05 00:19:14] (step=0061675) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.067110154568578, LR: 0.0003 +[2026-03-05 00:19:22] (step=0061676) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.067305810995892, LR: 0.0003 +[2026-03-05 00:19:30] (step=0061677) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.067501467423204, LR: 0.0003 +[2026-03-05 00:19:37] (step=0061678) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 12.067697123850518, LR: 0.0003 +[2026-03-05 00:19:45] (step=0061679) Train Loss: 0.4458, Train Steps/Sec: 0.12, Epoch: 12.067892780277832, LR: 0.0003 +[2026-03-05 00:19:53] (step=0061680) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.068088436705146, LR: 0.0003 +[2026-03-05 00:20:01] (step=0061681) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.06828409313246, LR: 0.0003 +[2026-03-05 00:20:09] (step=0061682) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.068479749559772, LR: 0.0003 +[2026-03-05 00:20:17] (step=0061683) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.068675405987086, LR: 0.0003 +[2026-03-05 00:20:25] (step=0061684) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.0688710624144, LR: 0.0003 +[2026-03-05 00:20:33] (step=0061685) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.069066718841714, LR: 0.0003 +[2026-03-05 00:20:40] (step=0061686) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.069262375269028, LR: 0.0003 +[2026-03-05 00:20:48] (step=0061687) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.06945803169634, LR: 0.0003 +[2026-03-05 00:20:56] (step=0061688) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 12.069653688123655, LR: 0.0003 +[2026-03-05 00:21:04] (step=0061689) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.069849344550969, LR: 0.0003 +[2026-03-05 00:21:12] (step=0061690) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.070045000978283, LR: 0.0003 +[2026-03-05 00:21:20] (step=0061691) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.070240657405595, LR: 0.0003 +[2026-03-05 00:21:28] (step=0061692) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 12.070436313832909, LR: 0.0003 +[2026-03-05 00:21:35] (step=0061693) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.070631970260223, LR: 0.0003 +[2026-03-05 00:21:43] (step=0061694) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.070827626687537, LR: 0.0003 +[2026-03-05 00:21:51] (step=0061695) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.071023283114851, LR: 0.0003 +[2026-03-05 00:21:59] (step=0061696) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.071218939542163, LR: 0.0003 +[2026-03-05 00:22:07] (step=0061697) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.071414595969477, LR: 0.0003 +[2026-03-05 00:22:15] (step=0061698) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.071610252396791, LR: 0.0003 +[2026-03-05 00:22:23] (step=0061699) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 12.071805908824105, LR: 0.0003 +[2026-03-05 00:22:30] (step=0061700) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 12.07200156525142, LR: 0.0003 +[2026-03-05 00:22:38] (step=0061701) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 12.072197221678731, LR: 0.0003 +[2026-03-05 00:22:46] (step=0061702) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 12.072392878106045, LR: 0.0003 +[2026-03-05 00:22:54] (step=0061703) Train Loss: 0.4413, Train Steps/Sec: 0.12, Epoch: 12.07258853453336, LR: 0.0003 +[2026-03-05 00:23:02] (step=0061704) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 12.072784190960673, LR: 0.0003 +[2026-03-05 00:23:10] (step=0061705) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.072979847387987, LR: 0.0003 +[2026-03-05 00:23:18] (step=0061706) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 12.0731755038153, LR: 0.0003 +[2026-03-05 00:23:26] (step=0061707) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.073371160242614, LR: 0.0003 +[2026-03-05 00:23:34] (step=0061708) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.073566816669928, LR: 0.0003 +[2026-03-05 00:23:41] (step=0061709) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.073762473097242, LR: 0.0003 +[2026-03-05 00:23:49] (step=0061710) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.073958129524556, LR: 0.0003 +[2026-03-05 00:23:57] (step=0061711) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.074153785951868, LR: 0.0003 +[2026-03-05 00:24:05] (step=0061712) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 12.074349442379182, LR: 0.0003 +[2026-03-05 00:24:13] (step=0061713) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 12.074545098806496, LR: 0.0003 +[2026-03-05 00:24:21] (step=0061714) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.07474075523381, LR: 0.0003 +[2026-03-05 00:24:29] (step=0061715) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.074936411661122, LR: 0.0003 +[2026-03-05 00:24:36] (step=0061716) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.075132068088436, LR: 0.0003 +[2026-03-05 00:24:44] (step=0061717) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.07532772451575, LR: 0.0003 +[2026-03-05 00:24:52] (step=0061718) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.075523380943064, LR: 0.0003 +[2026-03-05 00:25:00] (step=0061719) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 12.075719037370378, LR: 0.0003 +[2026-03-05 00:25:08] (step=0061720) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.07591469379769, LR: 0.0003 +[2026-03-05 00:25:16] (step=0061721) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.076110350225004, LR: 0.0003 +[2026-03-05 00:25:24] (step=0061722) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.076306006652318, LR: 0.0003 +[2026-03-05 00:25:31] (step=0061723) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.076501663079632, LR: 0.0003 +[2026-03-05 00:25:39] (step=0061724) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.076697319506946, LR: 0.0003 +[2026-03-05 00:25:47] (step=0061725) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.076892975934259, LR: 0.0003 +[2026-03-05 00:25:55] (step=0061726) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.077088632361573, LR: 0.0003 +[2026-03-05 00:26:03] (step=0061727) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.077284288788887, LR: 0.0003 +[2026-03-05 00:26:11] (step=0061728) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.0774799452162, LR: 0.0003 +[2026-03-05 00:26:19] (step=0061729) Train Loss: 0.4272, Train Steps/Sec: 0.12, Epoch: 12.077675601643515, LR: 0.0003 +[2026-03-05 00:26:27] (step=0061730) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.077871258070827, LR: 0.0003 +[2026-03-05 00:26:35] (step=0061731) Train Loss: 0.4658, Train Steps/Sec: 0.13, Epoch: 12.078066914498141, LR: 0.0003 +[2026-03-05 00:26:42] (step=0061732) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.078262570925455, LR: 0.0003 +[2026-03-05 00:26:50] (step=0061733) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.078458227352769, LR: 0.0003 +[2026-03-05 00:26:58] (step=0061734) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.078653883780083, LR: 0.0003 +[2026-03-05 00:27:06] (step=0061735) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.078849540207395, LR: 0.0003 +[2026-03-05 00:27:14] (step=0061736) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.07904519663471, LR: 0.0003 +[2026-03-05 00:27:22] (step=0061737) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.079240853062023, LR: 0.0003 +[2026-03-05 00:27:30] (step=0061738) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 12.079436509489337, LR: 0.0003 +[2026-03-05 00:27:37] (step=0061739) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.079632165916651, LR: 0.0003 +[2026-03-05 00:27:45] (step=0061740) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.079827822343963, LR: 0.0003 +[2026-03-05 00:27:53] (step=0061741) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.080023478771277, LR: 0.0003 +[2026-03-05 00:28:01] (step=0061742) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.080219135198591, LR: 0.0003 +[2026-03-05 00:28:09] (step=0061743) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.080414791625905, LR: 0.0003 +[2026-03-05 00:28:17] (step=0061744) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.080610448053218, LR: 0.0003 +[2026-03-05 00:28:25] (step=0061745) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 12.080806104480532, LR: 0.0003 +[2026-03-05 00:28:32] (step=0061746) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.081001760907846, LR: 0.0003 +[2026-03-05 00:28:40] (step=0061747) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.08119741733516, LR: 0.0003 +[2026-03-05 00:28:48] (step=0061748) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.081393073762474, LR: 0.0003 +[2026-03-05 00:28:56] (step=0061749) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.081588730189786, LR: 0.0003 +[2026-03-05 00:29:04] (step=0061750) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.0817843866171, LR: 0.0003 +[2026-03-05 00:29:12] (step=0061751) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.081980043044414, LR: 0.0003 +[2026-03-05 00:29:20] (step=0061752) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.082175699471728, LR: 0.0003 +[2026-03-05 00:29:28] (step=0061753) Train Loss: 0.4659, Train Steps/Sec: 0.12, Epoch: 12.082371355899042, LR: 0.0003 +[2026-03-05 00:29:35] (step=0061754) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.082567012326354, LR: 0.0003 +[2026-03-05 00:29:43] (step=0061755) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.082762668753668, LR: 0.0003 +[2026-03-05 00:29:51] (step=0061756) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.082958325180982, LR: 0.0003 +[2026-03-05 00:29:59] (step=0061757) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 12.083153981608296, LR: 0.0003 +[2026-03-05 00:30:07] (step=0061758) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.08334963803561, LR: 0.0003 +[2026-03-05 00:30:15] (step=0061759) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.083545294462922, LR: 0.0003 +[2026-03-05 00:30:23] (step=0061760) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.083740950890236, LR: 0.0003 +[2026-03-05 00:30:31] (step=0061761) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.08393660731755, LR: 0.0003 +[2026-03-05 00:30:38] (step=0061762) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.084132263744864, LR: 0.0003 +[2026-03-05 00:30:46] (step=0061763) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.084327920172178, LR: 0.0003 +[2026-03-05 00:30:54] (step=0061764) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.08452357659949, LR: 0.0003 +[2026-03-05 00:31:02] (step=0061765) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.084719233026805, LR: 0.0003 +[2026-03-05 00:31:10] (step=0061766) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 12.084914889454119, LR: 0.0003 +[2026-03-05 00:31:18] (step=0061767) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.085110545881433, LR: 0.0003 +[2026-03-05 00:31:26] (step=0061768) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.085306202308745, LR: 0.0003 +[2026-03-05 00:31:33] (step=0061769) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.085501858736059, LR: 0.0003 +[2026-03-05 00:31:41] (step=0061770) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.085697515163373, LR: 0.0003 +[2026-03-05 00:31:49] (step=0061771) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.085893171590687, LR: 0.0003 +[2026-03-05 00:31:57] (step=0061772) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.086088828018001, LR: 0.0003 +[2026-03-05 00:32:05] (step=0061773) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.086284484445313, LR: 0.0003 +[2026-03-05 00:32:13] (step=0061774) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.086480140872627, LR: 0.0003 +[2026-03-05 00:32:21] (step=0061775) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.086675797299941, LR: 0.0003 +[2026-03-05 00:32:29] (step=0061776) Train Loss: 0.4412, Train Steps/Sec: 0.12, Epoch: 12.086871453727255, LR: 0.0003 +[2026-03-05 00:32:36] (step=0061777) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 12.08706711015457, LR: 0.0003 +[2026-03-05 00:32:44] (step=0061778) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.087262766581881, LR: 0.0003 +[2026-03-05 00:32:52] (step=0061779) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.087458423009195, LR: 0.0003 +[2026-03-05 00:33:00] (step=0061780) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 12.08765407943651, LR: 0.0003 +[2026-03-05 00:33:08] (step=0061781) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 12.087849735863823, LR: 0.0003 +[2026-03-05 00:33:16] (step=0061782) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.088045392291138, LR: 0.0003 +[2026-03-05 00:33:24] (step=0061783) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.08824104871845, LR: 0.0003 +[2026-03-05 00:33:32] (step=0061784) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.088436705145764, LR: 0.0003 +[2026-03-05 00:33:39] (step=0061785) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.088632361573078, LR: 0.0003 +[2026-03-05 00:33:47] (step=0061786) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.088828018000392, LR: 0.0003 +[2026-03-05 00:33:55] (step=0061787) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.089023674427706, LR: 0.0003 +[2026-03-05 00:34:03] (step=0061788) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.089219330855018, LR: 0.0003 +[2026-03-05 00:34:11] (step=0061789) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.089414987282332, LR: 0.0003 +[2026-03-05 00:34:19] (step=0061790) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.089610643709646, LR: 0.0003 +[2026-03-05 00:34:27] (step=0061791) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 12.08980630013696, LR: 0.0003 +[2026-03-05 00:34:34] (step=0061792) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.090001956564274, LR: 0.0003 +[2026-03-05 00:34:42] (step=0061793) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.090197612991586, LR: 0.0003 +[2026-03-05 00:34:50] (step=0061794) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.0903932694189, LR: 0.0003 +[2026-03-05 00:34:58] (step=0061795) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 12.090588925846214, LR: 0.0003 +[2026-03-05 00:35:06] (step=0061796) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.090784582273528, LR: 0.0003 +[2026-03-05 00:35:14] (step=0061797) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 12.09098023870084, LR: 0.0003 +[2026-03-05 00:35:22] (step=0061798) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.091175895128154, LR: 0.0003 +[2026-03-05 00:35:30] (step=0061799) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 12.091371551555468, LR: 0.0003 +[2026-03-05 00:35:37] (step=0061800) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.091567207982783, LR: 0.0003 +[2026-03-05 00:35:45] (step=0061801) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.091762864410097, LR: 0.0003 +[2026-03-05 00:35:53] (step=0061802) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.091958520837409, LR: 0.0003 +[2026-03-05 00:36:01] (step=0061803) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.092154177264723, LR: 0.0003 +[2026-03-05 00:36:09] (step=0061804) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 12.092349833692037, LR: 0.0003 +[2026-03-05 00:36:17] (step=0061805) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.09254549011935, LR: 0.0003 +[2026-03-05 00:36:25] (step=0061806) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.092741146546665, LR: 0.0003 +[2026-03-05 00:36:33] (step=0061807) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.092936802973977, LR: 0.0003 +[2026-03-05 00:36:40] (step=0061808) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.093132459401291, LR: 0.0003 +[2026-03-05 00:36:48] (step=0061809) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.093328115828605, LR: 0.0003 +[2026-03-05 00:36:56] (step=0061810) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.093523772255919, LR: 0.0003 +[2026-03-05 00:37:04] (step=0061811) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.093719428683233, LR: 0.0003 +[2026-03-05 00:37:12] (step=0061812) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 12.093915085110545, LR: 0.0003 +[2026-03-05 00:37:20] (step=0061813) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.09411074153786, LR: 0.0003 +[2026-03-05 00:37:27] (step=0061814) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.094306397965173, LR: 0.0003 +[2026-03-05 00:37:35] (step=0061815) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.094502054392487, LR: 0.0003 +[2026-03-05 00:37:43] (step=0061816) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.094697710819801, LR: 0.0003 +[2026-03-05 00:37:51] (step=0061817) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.094893367247114, LR: 0.0003 +[2026-03-05 00:37:59] (step=0061818) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 12.095089023674428, LR: 0.0003 +[2026-03-05 00:38:07] (step=0061819) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 12.095284680101742, LR: 0.0003 +[2026-03-05 00:38:15] (step=0061820) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.095480336529056, LR: 0.0003 +[2026-03-05 00:38:23] (step=0061821) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.095675992956368, LR: 0.0003 +[2026-03-05 00:38:30] (step=0061822) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.095871649383682, LR: 0.0003 +[2026-03-05 00:38:38] (step=0061823) Train Loss: 0.4549, Train Steps/Sec: 0.12, Epoch: 12.096067305810996, LR: 0.0003 +[2026-03-05 00:38:46] (step=0061824) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.09626296223831, LR: 0.0003 +[2026-03-05 00:38:54] (step=0061825) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.096458618665624, LR: 0.0003 +[2026-03-05 00:39:02] (step=0061826) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.096654275092936, LR: 0.0003 +[2026-03-05 00:39:10] (step=0061827) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.09684993152025, LR: 0.0003 +[2026-03-05 00:39:18] (step=0061828) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.097045587947564, LR: 0.0003 +[2026-03-05 00:39:26] (step=0061829) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 12.097241244374878, LR: 0.0003 +[2026-03-05 00:39:34] (step=0061830) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.097436900802192, LR: 0.0003 +[2026-03-05 00:39:41] (step=0061831) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.097632557229504, LR: 0.0003 +[2026-03-05 00:39:49] (step=0061832) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.097828213656818, LR: 0.0003 +[2026-03-05 00:39:57] (step=0061833) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.098023870084132, LR: 0.0003 +[2026-03-05 00:40:05] (step=0061834) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.098219526511446, LR: 0.0003 +[2026-03-05 00:40:13] (step=0061835) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 12.09841518293876, LR: 0.0003 +[2026-03-05 00:40:21] (step=0061836) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.098610839366073, LR: 0.0003 +[2026-03-05 00:40:29] (step=0061837) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 12.098806495793387, LR: 0.0003 +[2026-03-05 00:40:36] (step=0061838) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 12.0990021522207, LR: 0.0003 +[2026-03-05 00:40:44] (step=0061839) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.099197808648015, LR: 0.0003 +[2026-03-05 00:40:52] (step=0061840) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.099393465075329, LR: 0.0003 +[2026-03-05 00:41:00] (step=0061841) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.09958912150264, LR: 0.0003 +[2026-03-05 00:41:08] (step=0061842) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 12.099784777929955, LR: 0.0003 +[2026-03-05 00:41:16] (step=0061843) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.099980434357269, LR: 0.0003 +[2026-03-05 00:41:24] (step=0061844) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.100176090784583, LR: 0.0003 +[2026-03-05 00:41:32] (step=0061845) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.100371747211897, LR: 0.0003 +[2026-03-05 00:41:39] (step=0061846) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 12.100567403639209, LR: 0.0003 +[2026-03-05 00:41:47] (step=0061847) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.100763060066523, LR: 0.0003 +[2026-03-05 00:41:55] (step=0061848) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.100958716493837, LR: 0.0003 +[2026-03-05 00:42:03] (step=0061849) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.101154372921151, LR: 0.0003 +[2026-03-05 00:42:11] (step=0061850) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.101350029348463, LR: 0.0003 +[2026-03-05 00:42:19] (step=0061851) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.101545685775777, LR: 0.0003 +[2026-03-05 00:42:27] (step=0061852) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.101741342203091, LR: 0.0003 +[2026-03-05 00:42:35] (step=0061853) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.101936998630405, LR: 0.0003 +[2026-03-05 00:42:42] (step=0061854) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.10213265505772, LR: 0.0003 +[2026-03-05 00:42:50] (step=0061855) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.102328311485032, LR: 0.0003 +[2026-03-05 00:42:58] (step=0061856) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.102523967912346, LR: 0.0003 +[2026-03-05 00:43:06] (step=0061857) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.10271962433966, LR: 0.0003 +[2026-03-05 00:43:14] (step=0061858) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.102915280766974, LR: 0.0003 +[2026-03-05 00:43:22] (step=0061859) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.103110937194288, LR: 0.0003 +[2026-03-05 00:43:30] (step=0061860) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.1033065936216, LR: 0.0003 +[2026-03-05 00:43:37] (step=0061861) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 12.103502250048914, LR: 0.0003 +[2026-03-05 00:43:45] (step=0061862) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 12.103697906476228, LR: 0.0003 +[2026-03-05 00:43:53] (step=0061863) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.103893562903542, LR: 0.0003 +[2026-03-05 00:44:01] (step=0061864) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.104089219330856, LR: 0.0003 +[2026-03-05 00:44:09] (step=0061865) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 12.104284875758168, LR: 0.0003 +[2026-03-05 00:44:17] (step=0061866) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.104480532185482, LR: 0.0003 +[2026-03-05 00:44:25] (step=0061867) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 12.104676188612796, LR: 0.0003 +[2026-03-05 00:44:33] (step=0061868) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.10487184504011, LR: 0.0003 +[2026-03-05 00:44:41] (step=0061869) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 12.105067501467424, LR: 0.0003 +[2026-03-05 00:44:48] (step=0061870) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.105263157894736, LR: 0.0003 +[2026-03-05 00:44:56] (step=0061871) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.10545881432205, LR: 0.0003 +[2026-03-05 00:45:04] (step=0061872) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.105654470749364, LR: 0.0003 +[2026-03-05 00:45:12] (step=0061873) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.105850127176678, LR: 0.0003 +[2026-03-05 00:45:20] (step=0061874) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.10604578360399, LR: 0.0003 +[2026-03-05 00:45:28] (step=0061875) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.106241440031305, LR: 0.0003 +[2026-03-05 00:45:36] (step=0061876) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.106437096458619, LR: 0.0003 +[2026-03-05 00:45:43] (step=0061877) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.106632752885933, LR: 0.0003 +[2026-03-05 00:45:51] (step=0061878) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.106828409313247, LR: 0.0003 +[2026-03-05 00:45:59] (step=0061879) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 12.107024065740559, LR: 0.0003 +[2026-03-05 00:46:07] (step=0061880) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.107219722167873, LR: 0.0003 +[2026-03-05 00:46:15] (step=0061881) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 12.107415378595187, LR: 0.0003 +[2026-03-05 00:46:23] (step=0061882) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.1076110350225, LR: 0.0003 +[2026-03-05 00:46:31] (step=0061883) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.107806691449815, LR: 0.0003 +[2026-03-05 00:46:38] (step=0061884) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.108002347877127, LR: 0.0003 +[2026-03-05 00:46:46] (step=0061885) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 12.108198004304441, LR: 0.0003 +[2026-03-05 00:46:54] (step=0061886) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.108393660731755, LR: 0.0003 +[2026-03-05 00:47:02] (step=0061887) Train Loss: 0.4258, Train Steps/Sec: 0.13, Epoch: 12.108589317159069, LR: 0.0003 +[2026-03-05 00:47:10] (step=0061888) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.108784973586383, LR: 0.0003 +[2026-03-05 00:47:18] (step=0061889) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.108980630013695, LR: 0.0003 +[2026-03-05 00:47:26] (step=0061890) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.10917628644101, LR: 0.0003 +[2026-03-05 00:47:34] (step=0061891) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.109371942868323, LR: 0.0003 +[2026-03-05 00:47:41] (step=0061892) Train Loss: 0.4265, Train Steps/Sec: 0.13, Epoch: 12.109567599295637, LR: 0.0003 +[2026-03-05 00:47:49] (step=0061893) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.109763255722951, LR: 0.0003 +[2026-03-05 00:47:57] (step=0061894) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.109958912150264, LR: 0.0003 +[2026-03-05 00:48:05] (step=0061895) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.110154568577578, LR: 0.0003 +[2026-03-05 00:48:13] (step=0061896) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.110350225004892, LR: 0.0003 +[2026-03-05 00:48:21] (step=0061897) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.110545881432206, LR: 0.0003 +[2026-03-05 00:48:29] (step=0061898) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.11074153785952, LR: 0.0003 +[2026-03-05 00:48:36] (step=0061899) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.110937194286832, LR: 0.0003 +[2026-03-05 00:48:44] (step=0061900) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.111132850714146, LR: 0.0003 +[2026-03-05 00:48:52] (step=0061901) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.11132850714146, LR: 0.0003 +[2026-03-05 00:49:00] (step=0061902) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.111524163568774, LR: 0.0003 +[2026-03-05 00:49:08] (step=0061903) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.111719819996086, LR: 0.0003 +[2026-03-05 00:49:16] (step=0061904) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.1119154764234, LR: 0.0003 +[2026-03-05 00:49:24] (step=0061905) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.112111132850714, LR: 0.0003 +[2026-03-05 00:49:32] (step=0061906) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 12.112306789278028, LR: 0.0003 +[2026-03-05 00:49:39] (step=0061907) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 12.112502445705342, LR: 0.0003 +[2026-03-05 00:49:47] (step=0061908) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.112698102132654, LR: 0.0003 +[2026-03-05 00:49:55] (step=0061909) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.112893758559968, LR: 0.0003 +[2026-03-05 00:50:03] (step=0061910) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.113089414987282, LR: 0.0003 +[2026-03-05 00:50:11] (step=0061911) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.113285071414596, LR: 0.0003 +[2026-03-05 00:50:19] (step=0061912) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.11348072784191, LR: 0.0003 +[2026-03-05 00:50:27] (step=0061913) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.113676384269223, LR: 0.0003 +[2026-03-05 00:50:34] (step=0061914) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.113872040696537, LR: 0.0003 +[2026-03-05 00:50:42] (step=0061915) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.11406769712385, LR: 0.0003 +[2026-03-05 00:50:50] (step=0061916) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.114263353551165, LR: 0.0003 +[2026-03-05 00:50:58] (step=0061917) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 12.114459009978479, LR: 0.0003 +[2026-03-05 00:51:06] (step=0061918) Train Loss: 0.4390, Train Steps/Sec: 0.12, Epoch: 12.11465466640579, LR: 0.0003 +[2026-03-05 00:51:14] (step=0061919) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 12.114850322833105, LR: 0.0003 +[2026-03-05 00:51:22] (step=0061920) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 12.115045979260419, LR: 0.0003 +[2026-03-05 00:51:30] (step=0061921) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.115241635687733, LR: 0.0003 +[2026-03-05 00:51:38] (step=0061922) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.115437292115047, LR: 0.0003 +[2026-03-05 00:51:45] (step=0061923) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.115632948542359, LR: 0.0003 +[2026-03-05 00:51:53] (step=0061924) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.115828604969673, LR: 0.0003 +[2026-03-05 00:52:01] (step=0061925) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.116024261396987, LR: 0.0003 +[2026-03-05 00:52:09] (step=0061926) Train Loss: 0.4633, Train Steps/Sec: 0.13, Epoch: 12.116219917824301, LR: 0.0003 +[2026-03-05 00:52:17] (step=0061927) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.116415574251613, LR: 0.0003 +[2026-03-05 00:52:25] (step=0061928) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 12.116611230678927, LR: 0.0003 +[2026-03-05 00:52:33] (step=0061929) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.116806887106241, LR: 0.0003 +[2026-03-05 00:52:40] (step=0061930) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.117002543533555, LR: 0.0003 +[2026-03-05 00:52:48] (step=0061931) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.11719819996087, LR: 0.0003 +[2026-03-05 00:52:56] (step=0061932) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.117393856388182, LR: 0.0003 +[2026-03-05 00:53:04] (step=0061933) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.117589512815496, LR: 0.0003 +[2026-03-05 00:53:12] (step=0061934) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.11778516924281, LR: 0.0003 +[2026-03-05 00:53:20] (step=0061935) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.117980825670124, LR: 0.0003 +[2026-03-05 00:53:28] (step=0061936) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 12.118176482097438, LR: 0.0003 +[2026-03-05 00:53:35] (step=0061937) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.11837213852475, LR: 0.0003 +[2026-03-05 00:53:43] (step=0061938) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.118567794952064, LR: 0.0003 +[2026-03-05 00:53:51] (step=0061939) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 12.118763451379378, LR: 0.0003 +[2026-03-05 00:53:59] (step=0061940) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.118959107806692, LR: 0.0003 +[2026-03-05 00:54:07] (step=0061941) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.119154764234006, LR: 0.0003 +[2026-03-05 00:54:15] (step=0061942) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 12.119350420661318, LR: 0.0003 +[2026-03-05 00:54:23] (step=0061943) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.119546077088632, LR: 0.0003 +[2026-03-05 00:54:30] (step=0061944) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 12.119741733515946, LR: 0.0003 +[2026-03-05 00:54:38] (step=0061945) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.11993738994326, LR: 0.0003 +[2026-03-05 00:54:46] (step=0061946) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.120133046370574, LR: 0.0003 +[2026-03-05 00:54:54] (step=0061947) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.120328702797886, LR: 0.0003 +[2026-03-05 00:55:02] (step=0061948) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.1205243592252, LR: 0.0003 +[2026-03-05 00:55:10] (step=0061949) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.120720015652514, LR: 0.0003 +[2026-03-05 00:55:18] (step=0061950) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.120915672079828, LR: 0.0003 +[2026-03-05 00:55:25] (step=0061951) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.121111328507142, LR: 0.0003 +[2026-03-05 00:55:33] (step=0061952) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.121306984934455, LR: 0.0003 +[2026-03-05 00:55:41] (step=0061953) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.121502641361769, LR: 0.0003 +[2026-03-05 00:55:49] (step=0061954) Train Loss: 0.4648, Train Steps/Sec: 0.13, Epoch: 12.121698297789083, LR: 0.0003 +[2026-03-05 00:55:57] (step=0061955) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.121893954216397, LR: 0.0003 +[2026-03-05 00:56:05] (step=0061956) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.122089610643709, LR: 0.0003 +[2026-03-05 00:56:13] (step=0061957) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.122285267071023, LR: 0.0003 +[2026-03-05 00:56:21] (step=0061958) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.122480923498337, LR: 0.0003 +[2026-03-05 00:56:28] (step=0061959) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.12267657992565, LR: 0.0003 +[2026-03-05 00:56:36] (step=0061960) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 12.122872236352965, LR: 0.0003 +[2026-03-05 00:56:44] (step=0061961) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.123067892780277, LR: 0.0003 +[2026-03-05 00:56:52] (step=0061962) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.123263549207591, LR: 0.0003 +[2026-03-05 00:57:00] (step=0061963) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.123459205634905, LR: 0.0003 +[2026-03-05 00:57:08] (step=0061964) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.123654862062219, LR: 0.0003 +[2026-03-05 00:57:16] (step=0061965) Train Loss: 0.4413, Train Steps/Sec: 0.12, Epoch: 12.123850518489533, LR: 0.0003 +[2026-03-05 00:57:24] (step=0061966) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.124046174916845, LR: 0.0003 +[2026-03-05 00:57:31] (step=0061967) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.12424183134416, LR: 0.0003 +[2026-03-05 00:57:39] (step=0061968) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.124437487771473, LR: 0.0003 +[2026-03-05 00:57:47] (step=0061969) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.124633144198787, LR: 0.0003 +[2026-03-05 00:57:55] (step=0061970) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.124828800626101, LR: 0.0003 +[2026-03-05 00:58:03] (step=0061971) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.125024457053414, LR: 0.0003 +[2026-03-05 00:58:11] (step=0061972) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.125220113480728, LR: 0.0003 +[2026-03-05 00:58:19] (step=0061973) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.125415769908042, LR: 0.0003 +[2026-03-05 00:58:27] (step=0061974) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.125611426335356, LR: 0.0003 +[2026-03-05 00:58:34] (step=0061975) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.12580708276267, LR: 0.0003 +[2026-03-05 00:58:42] (step=0061976) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.126002739189982, LR: 0.0003 +[2026-03-05 00:58:50] (step=0061977) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 12.126198395617296, LR: 0.0003 +[2026-03-05 00:58:58] (step=0061978) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.12639405204461, LR: 0.0003 +[2026-03-05 00:59:06] (step=0061979) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.126589708471924, LR: 0.0003 +[2026-03-05 00:59:14] (step=0061980) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.126785364899236, LR: 0.0003 +[2026-03-05 00:59:22] (step=0061981) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.12698102132655, LR: 0.0003 +[2026-03-05 00:59:29] (step=0061982) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.127176677753864, LR: 0.0003 +[2026-03-05 00:59:37] (step=0061983) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.127372334181178, LR: 0.0003 +[2026-03-05 00:59:45] (step=0061984) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 12.127567990608492, LR: 0.0003 +[2026-03-05 00:59:53] (step=0061985) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.127763647035804, LR: 0.0003 +[2026-03-05 01:00:01] (step=0061986) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.127959303463118, LR: 0.0003 +[2026-03-05 01:00:09] (step=0061987) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.128154959890432, LR: 0.0003 +[2026-03-05 01:00:17] (step=0061988) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.128350616317746, LR: 0.0003 +[2026-03-05 01:00:25] (step=0061989) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 12.12854627274506, LR: 0.0003 +[2026-03-05 01:00:32] (step=0061990) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.128741929172373, LR: 0.0003 +[2026-03-05 01:00:40] (step=0061991) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 12.128937585599687, LR: 0.0003 +[2026-03-05 01:00:48] (step=0061992) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.129133242027, LR: 0.0003 +[2026-03-05 01:00:56] (step=0061993) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 12.129328898454315, LR: 0.0003 +[2026-03-05 01:01:04] (step=0061994) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 12.129524554881629, LR: 0.0003 +[2026-03-05 01:01:12] (step=0061995) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.12972021130894, LR: 0.0003 +[2026-03-05 01:01:20] (step=0061996) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.129915867736255, LR: 0.0003 +[2026-03-05 01:01:27] (step=0061997) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.130111524163569, LR: 0.0003 +[2026-03-05 01:01:35] (step=0061998) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.130307180590883, LR: 0.0003 +[2026-03-05 01:01:43] (step=0061999) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 12.130502837018197, LR: 0.0003 +[2026-03-05 01:01:51] (step=0062000) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 12.130698493445509, LR: 0.0003 +[2026-03-05 01:01:51] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0062000/ +[2026-03-05 01:01:59] (step=0062001) Train Loss: 0.4443, Train Steps/Sec: 0.12, Epoch: 12.130894149872823, LR: 0.0003 +[2026-03-05 01:02:07] (step=0062002) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.131089806300137, LR: 0.0003 +[2026-03-05 01:02:15] (step=0062003) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 12.131285462727451, LR: 0.0003 +[2026-03-05 01:02:23] (step=0062004) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.131481119154763, LR: 0.0003 +[2026-03-05 01:02:31] (step=0062005) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.131676775582077, LR: 0.0003 +[2026-03-05 01:02:38] (step=0062006) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.131872432009391, LR: 0.0003 +[2026-03-05 01:02:46] (step=0062007) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.132068088436705, LR: 0.0003 +[2026-03-05 01:02:54] (step=0062008) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 12.13226374486402, LR: 0.0003 +[2026-03-05 01:03:02] (step=0062009) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.132459401291332, LR: 0.0003 +[2026-03-05 01:03:10] (step=0062010) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.132655057718646, LR: 0.0003 +[2026-03-05 01:03:18] (step=0062011) Train Loss: 0.4376, Train Steps/Sec: 0.12, Epoch: 12.13285071414596, LR: 0.0003 +[2026-03-05 01:03:26] (step=0062012) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.133046370573274, LR: 0.0003 +[2026-03-05 01:03:34] (step=0062013) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.133242027000588, LR: 0.0003 +[2026-03-05 01:03:42] (step=0062014) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.1334376834279, LR: 0.0003 +[2026-03-05 01:03:49] (step=0062015) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.133633339855214, LR: 0.0003 +[2026-03-05 01:03:57] (step=0062016) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.133828996282528, LR: 0.0003 +[2026-03-05 01:04:05] (step=0062017) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.134024652709842, LR: 0.0003 +[2026-03-05 01:04:13] (step=0062018) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.134220309137156, LR: 0.0003 +[2026-03-05 01:04:21] (step=0062019) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.134415965564468, LR: 0.0003 +[2026-03-05 01:04:29] (step=0062020) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.134611621991782, LR: 0.0003 +[2026-03-05 01:04:37] (step=0062021) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.134807278419096, LR: 0.0003 +[2026-03-05 01:04:44] (step=0062022) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.13500293484641, LR: 0.0003 +[2026-03-05 01:04:52] (step=0062023) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 12.135198591273724, LR: 0.0003 +[2026-03-05 01:05:00] (step=0062024) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.135394247701036, LR: 0.0003 +[2026-03-05 01:05:08] (step=0062025) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.13558990412835, LR: 0.0003 +[2026-03-05 01:05:16] (step=0062026) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.135785560555664, LR: 0.0003 +[2026-03-05 01:05:24] (step=0062027) Train Loss: 0.4235, Train Steps/Sec: 0.13, Epoch: 12.135981216982978, LR: 0.0003 +[2026-03-05 01:05:32] (step=0062028) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.136176873410292, LR: 0.0003 +[2026-03-05 01:05:39] (step=0062029) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.136372529837605, LR: 0.0003 +[2026-03-05 01:05:47] (step=0062030) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.136568186264919, LR: 0.0003 +[2026-03-05 01:05:55] (step=0062031) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.136763842692233, LR: 0.0003 +[2026-03-05 01:06:03] (step=0062032) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.136959499119547, LR: 0.0003 +[2026-03-05 01:06:11] (step=0062033) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.137155155546859, LR: 0.0003 +[2026-03-05 01:06:19] (step=0062034) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.137350811974173, LR: 0.0003 +[2026-03-05 01:06:27] (step=0062035) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.137546468401487, LR: 0.0003 +[2026-03-05 01:06:34] (step=0062036) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.1377421248288, LR: 0.0003 +[2026-03-05 01:06:42] (step=0062037) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.137937781256115, LR: 0.0003 +[2026-03-05 01:06:50] (step=0062038) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 12.138133437683427, LR: 0.0003 +[2026-03-05 01:06:58] (step=0062039) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.138329094110741, LR: 0.0003 +[2026-03-05 01:07:06] (step=0062040) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.138524750538055, LR: 0.0003 +[2026-03-05 01:07:14] (step=0062041) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.13872040696537, LR: 0.0003 +[2026-03-05 01:07:22] (step=0062042) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.138916063392683, LR: 0.0003 +[2026-03-05 01:07:29] (step=0062043) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.139111719819995, LR: 0.0003 +[2026-03-05 01:07:37] (step=0062044) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.13930737624731, LR: 0.0003 +[2026-03-05 01:07:45] (step=0062045) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.139503032674623, LR: 0.0003 +[2026-03-05 01:07:53] (step=0062046) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.139698689101937, LR: 0.0003 +[2026-03-05 01:08:01] (step=0062047) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.139894345529251, LR: 0.0003 +[2026-03-05 01:08:09] (step=0062048) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.140090001956564, LR: 0.0003 +[2026-03-05 01:08:17] (step=0062049) Train Loss: 0.4416, Train Steps/Sec: 0.12, Epoch: 12.140285658383878, LR: 0.0003 +[2026-03-05 01:08:25] (step=0062050) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.140481314811192, LR: 0.0003 +[2026-03-05 01:08:32] (step=0062051) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.140676971238506, LR: 0.0003 +[2026-03-05 01:08:40] (step=0062052) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 12.14087262766582, LR: 0.0003 +[2026-03-05 01:08:48] (step=0062053) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.141068284093132, LR: 0.0003 +[2026-03-05 01:08:56] (step=0062054) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.141263940520446, LR: 0.0003 +[2026-03-05 01:09:04] (step=0062055) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.14145959694776, LR: 0.0003 +[2026-03-05 01:09:12] (step=0062056) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.141655253375074, LR: 0.0003 +[2026-03-05 01:09:20] (step=0062057) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 12.141850909802386, LR: 0.0003 +[2026-03-05 01:09:28] (step=0062058) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 12.1420465662297, LR: 0.0003 +[2026-03-05 01:09:36] (step=0062059) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.142242222657014, LR: 0.0003 +[2026-03-05 01:09:43] (step=0062060) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 12.142437879084328, LR: 0.0003 +[2026-03-05 01:09:51] (step=0062061) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.142633535511642, LR: 0.0003 +[2026-03-05 01:09:59] (step=0062062) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.142829191938954, LR: 0.0003 +[2026-03-05 01:10:07] (step=0062063) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.143024848366268, LR: 0.0003 +[2026-03-05 01:10:15] (step=0062064) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.143220504793582, LR: 0.0003 +[2026-03-05 01:10:23] (step=0062065) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.143416161220896, LR: 0.0003 +[2026-03-05 01:10:31] (step=0062066) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.14361181764821, LR: 0.0003 +[2026-03-05 01:10:38] (step=0062067) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.143807474075523, LR: 0.0003 +[2026-03-05 01:10:46] (step=0062068) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.144003130502837, LR: 0.0003 +[2026-03-05 01:10:54] (step=0062069) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.14419878693015, LR: 0.0003 +[2026-03-05 01:11:02] (step=0062070) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.144394443357465, LR: 0.0003 +[2026-03-05 01:11:10] (step=0062071) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.144590099784779, LR: 0.0003 +[2026-03-05 01:11:18] (step=0062072) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.14478575621209, LR: 0.0003 +[2026-03-05 01:11:25] (step=0062073) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.144981412639405, LR: 0.0003 +[2026-03-05 01:11:33] (step=0062074) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.145177069066719, LR: 0.0003 +[2026-03-05 01:11:41] (step=0062075) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.145372725494033, LR: 0.0003 +[2026-03-05 01:11:49] (step=0062076) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 12.145568381921347, LR: 0.0003 +[2026-03-05 01:11:57] (step=0062077) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.14576403834866, LR: 0.0003 +[2026-03-05 01:12:05] (step=0062078) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.145959694775973, LR: 0.0003 +[2026-03-05 01:12:13] (step=0062079) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.146155351203287, LR: 0.0003 +[2026-03-05 01:12:20] (step=0062080) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 12.146351007630601, LR: 0.0003 +[2026-03-05 01:12:28] (step=0062081) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 12.146546664057915, LR: 0.0003 +[2026-03-05 01:12:36] (step=0062082) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.146742320485227, LR: 0.0003 +[2026-03-05 01:12:44] (step=0062083) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.146937976912541, LR: 0.0003 +[2026-03-05 01:12:52] (step=0062084) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.147133633339855, LR: 0.0003 +[2026-03-05 01:13:00] (step=0062085) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.14732928976717, LR: 0.0003 +[2026-03-05 01:13:08] (step=0062086) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.147524946194482, LR: 0.0003 +[2026-03-05 01:13:16] (step=0062087) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 12.147720602621796, LR: 0.0003 +[2026-03-05 01:13:23] (step=0062088) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 12.14791625904911, LR: 0.0003 +[2026-03-05 01:13:31] (step=0062089) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.148111915476424, LR: 0.0003 +[2026-03-05 01:13:39] (step=0062090) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.148307571903738, LR: 0.0003 +[2026-03-05 01:13:47] (step=0062091) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.14850322833105, LR: 0.0003 +[2026-03-05 01:13:55] (step=0062092) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.148698884758364, LR: 0.0003 +[2026-03-05 01:14:03] (step=0062093) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 12.148894541185678, LR: 0.0003 +[2026-03-05 01:14:11] (step=0062094) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.149090197612992, LR: 0.0003 +[2026-03-05 01:14:18] (step=0062095) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.149285854040306, LR: 0.0003 +[2026-03-05 01:14:26] (step=0062096) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.149481510467618, LR: 0.0003 +[2026-03-05 01:14:34] (step=0062097) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.149677166894932, LR: 0.0003 +[2026-03-05 01:14:42] (step=0062098) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.149872823322246, LR: 0.0003 +[2026-03-05 01:14:50] (step=0062099) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.15006847974956, LR: 0.0003 +[2026-03-05 01:14:58] (step=0062100) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.150264136176874, LR: 0.0003 +[2026-03-05 01:15:06] (step=0062101) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 12.150459792604186, LR: 0.0003 +[2026-03-05 01:15:13] (step=0062102) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.1506554490315, LR: 0.0003 +[2026-03-05 01:15:21] (step=0062103) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.150851105458814, LR: 0.0003 +[2026-03-05 01:15:29] (step=0062104) Train Loss: 0.4491, Train Steps/Sec: 0.12, Epoch: 12.151046761886128, LR: 0.0003 +[2026-03-05 01:15:37] (step=0062105) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.151242418313442, LR: 0.0003 +[2026-03-05 01:15:45] (step=0062106) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.151438074740755, LR: 0.0003 +[2026-03-05 01:15:53] (step=0062107) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.151633731168069, LR: 0.0003 +[2026-03-05 01:16:01] (step=0062108) Train Loss: 0.4540, Train Steps/Sec: 0.12, Epoch: 12.151829387595383, LR: 0.0003 +[2026-03-05 01:16:09] (step=0062109) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.152025044022697, LR: 0.0003 +[2026-03-05 01:16:17] (step=0062110) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.152220700450009, LR: 0.0003 +[2026-03-05 01:16:24] (step=0062111) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.152416356877323, LR: 0.0003 +[2026-03-05 01:16:32] (step=0062112) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.152612013304637, LR: 0.0003 +[2026-03-05 01:16:40] (step=0062113) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.152807669731951, LR: 0.0003 +[2026-03-05 01:16:48] (step=0062114) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 12.153003326159265, LR: 0.0003 +[2026-03-05 01:16:56] (step=0062115) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 12.153198982586577, LR: 0.0003 +[2026-03-05 01:17:04] (step=0062116) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 12.153394639013891, LR: 0.0003 +[2026-03-05 01:17:12] (step=0062117) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.153590295441205, LR: 0.0003 +[2026-03-05 01:17:20] (step=0062118) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.15378595186852, LR: 0.0003 +[2026-03-05 01:17:27] (step=0062119) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.153981608295833, LR: 0.0003 +[2026-03-05 01:17:35] (step=0062120) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.154177264723145, LR: 0.0003 +[2026-03-05 01:17:43] (step=0062121) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.15437292115046, LR: 0.0003 +[2026-03-05 01:17:51] (step=0062122) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.154568577577773, LR: 0.0003 +[2026-03-05 01:17:59] (step=0062123) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.154764234005087, LR: 0.0003 +[2026-03-05 01:18:07] (step=0062124) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.154959890432401, LR: 0.0003 +[2026-03-05 01:18:15] (step=0062125) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 12.155155546859714, LR: 0.0003 +[2026-03-05 01:18:22] (step=0062126) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.155351203287028, LR: 0.0003 +[2026-03-05 01:18:30] (step=0062127) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.155546859714342, LR: 0.0003 +[2026-03-05 01:18:38] (step=0062128) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.155742516141656, LR: 0.0003 +[2026-03-05 01:18:46] (step=0062129) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.15593817256897, LR: 0.0003 +[2026-03-05 01:18:54] (step=0062130) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 12.156133828996282, LR: 0.0003 +[2026-03-05 01:19:02] (step=0062131) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.156329485423596, LR: 0.0003 +[2026-03-05 01:19:10] (step=0062132) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.15652514185091, LR: 0.0003 +[2026-03-05 01:19:18] (step=0062133) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.156720798278224, LR: 0.0003 +[2026-03-05 01:19:25] (step=0062134) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.156916454705538, LR: 0.0003 +[2026-03-05 01:19:33] (step=0062135) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.15711211113285, LR: 0.0003 +[2026-03-05 01:19:41] (step=0062136) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.157307767560164, LR: 0.0003 +[2026-03-05 01:19:49] (step=0062137) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.157503423987478, LR: 0.0003 +[2026-03-05 01:19:57] (step=0062138) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.157699080414792, LR: 0.0003 +[2026-03-05 01:20:05] (step=0062139) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.157894736842104, LR: 0.0003 +[2026-03-05 01:20:13] (step=0062140) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.158090393269418, LR: 0.0003 +[2026-03-05 01:20:20] (step=0062141) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.158286049696732, LR: 0.0003 +[2026-03-05 01:20:28] (step=0062142) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.158481706124046, LR: 0.0003 +[2026-03-05 01:20:36] (step=0062143) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.15867736255136, LR: 0.0003 +[2026-03-05 01:20:44] (step=0062144) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 12.158873018978673, LR: 0.0003 +[2026-03-05 01:20:52] (step=0062145) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 12.159068675405987, LR: 0.0003 +[2026-03-05 01:21:00] (step=0062146) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.1592643318333, LR: 0.0003 +[2026-03-05 01:21:08] (step=0062147) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.159459988260615, LR: 0.0003 +[2026-03-05 01:21:15] (step=0062148) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.159655644687929, LR: 0.0003 +[2026-03-05 01:21:23] (step=0062149) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.159851301115241, LR: 0.0003 +[2026-03-05 01:21:31] (step=0062150) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.160046957542555, LR: 0.0003 +[2026-03-05 01:21:39] (step=0062151) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 12.160242613969869, LR: 0.0003 +[2026-03-05 01:21:47] (step=0062152) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.160438270397183, LR: 0.0003 +[2026-03-05 01:21:55] (step=0062153) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 12.160633926824497, LR: 0.0003 +[2026-03-05 01:22:03] (step=0062154) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 12.16082958325181, LR: 0.0003 +[2026-03-05 01:22:11] (step=0062155) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.161025239679123, LR: 0.0003 +[2026-03-05 01:22:19] (step=0062156) Train Loss: 0.4459, Train Steps/Sec: 0.12, Epoch: 12.161220896106437, LR: 0.0003 +[2026-03-05 01:22:27] (step=0062157) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.161416552533751, LR: 0.0003 +[2026-03-05 01:22:34] (step=0062158) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.161612208961065, LR: 0.0003 +[2026-03-05 01:22:42] (step=0062159) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.161807865388377, LR: 0.0003 +[2026-03-05 01:22:50] (step=0062160) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.162003521815691, LR: 0.0003 +[2026-03-05 01:22:58] (step=0062161) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.162199178243005, LR: 0.0003 +[2026-03-05 01:23:06] (step=0062162) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.16239483467032, LR: 0.0003 +[2026-03-05 01:23:14] (step=0062163) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.162590491097632, LR: 0.0003 +[2026-03-05 01:23:22] (step=0062164) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 12.162786147524946, LR: 0.0003 +[2026-03-05 01:23:29] (step=0062165) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 12.16298180395226, LR: 0.0003 +[2026-03-05 01:23:37] (step=0062166) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.163177460379574, LR: 0.0003 +[2026-03-05 01:23:45] (step=0062167) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.163373116806888, LR: 0.0003 +[2026-03-05 01:23:53] (step=0062168) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.1635687732342, LR: 0.0003 +[2026-03-05 01:24:01] (step=0062169) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.163764429661514, LR: 0.0003 +[2026-03-05 01:24:09] (step=0062170) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 12.163960086088828, LR: 0.0003 +[2026-03-05 01:24:17] (step=0062171) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.164155742516142, LR: 0.0003 +[2026-03-05 01:24:24] (step=0062172) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.164351398943456, LR: 0.0003 +[2026-03-05 01:24:32] (step=0062173) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.164547055370768, LR: 0.0003 +[2026-03-05 01:24:40] (step=0062174) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.164742711798082, LR: 0.0003 +[2026-03-05 01:24:48] (step=0062175) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.164938368225396, LR: 0.0003 +[2026-03-05 01:24:56] (step=0062176) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 12.16513402465271, LR: 0.0003 +[2026-03-05 01:25:04] (step=0062177) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.165329681080024, LR: 0.0003 +[2026-03-05 01:25:12] (step=0062178) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.165525337507336, LR: 0.0003 +[2026-03-05 01:25:19] (step=0062179) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.16572099393465, LR: 0.0003 +[2026-03-05 01:25:27] (step=0062180) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.165916650361964, LR: 0.0003 +[2026-03-05 01:25:35] (step=0062181) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.166112306789278, LR: 0.0003 +[2026-03-05 01:25:43] (step=0062182) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.166307963216592, LR: 0.0003 +[2026-03-05 01:25:51] (step=0062183) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.166503619643905, LR: 0.0003 +[2026-03-05 01:25:59] (step=0062184) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.166699276071219, LR: 0.0003 +[2026-03-05 01:26:07] (step=0062185) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 12.166894932498533, LR: 0.0003 +[2026-03-05 01:26:14] (step=0062186) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.167090588925847, LR: 0.0003 +[2026-03-05 01:26:22] (step=0062187) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.16728624535316, LR: 0.0003 +[2026-03-05 01:26:30] (step=0062188) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.167481901780473, LR: 0.0003 +[2026-03-05 01:26:38] (step=0062189) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.167677558207787, LR: 0.0003 +[2026-03-05 01:26:46] (step=0062190) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.167873214635101, LR: 0.0003 +[2026-03-05 01:26:54] (step=0062191) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 12.168068871062415, LR: 0.0003 +[2026-03-05 01:27:02] (step=0062192) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.168264527489727, LR: 0.0003 +[2026-03-05 01:27:09] (step=0062193) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.168460183917041, LR: 0.0003 +[2026-03-05 01:27:17] (step=0062194) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.168655840344355, LR: 0.0003 +[2026-03-05 01:27:25] (step=0062195) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.16885149677167, LR: 0.0003 +[2026-03-05 01:27:33] (step=0062196) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.169047153198983, LR: 0.0003 +[2026-03-05 01:27:41] (step=0062197) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 12.169242809626295, LR: 0.0003 +[2026-03-05 01:27:49] (step=0062198) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 12.16943846605361, LR: 0.0003 +[2026-03-05 01:27:57] (step=0062199) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 12.169634122480923, LR: 0.0003 +[2026-03-05 01:28:04] (step=0062200) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.169829778908237, LR: 0.0003 +[2026-03-05 01:28:12] (step=0062201) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.170025435335551, LR: 0.0003 +[2026-03-05 01:28:20] (step=0062202) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 12.170221091762864, LR: 0.0003 +[2026-03-05 01:28:28] (step=0062203) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 12.170416748190178, LR: 0.0003 +[2026-03-05 01:28:36] (step=0062204) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 12.170612404617492, LR: 0.0003 +[2026-03-05 01:28:44] (step=0062205) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.170808061044806, LR: 0.0003 +[2026-03-05 01:28:52] (step=0062206) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.17100371747212, LR: 0.0003 +[2026-03-05 01:29:00] (step=0062207) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.171199373899432, LR: 0.0003 +[2026-03-05 01:29:07] (step=0062208) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.171395030326746, LR: 0.0003 +[2026-03-05 01:29:15] (step=0062209) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.17159068675406, LR: 0.0003 +[2026-03-05 01:29:23] (step=0062210) Train Loss: 0.4513, Train Steps/Sec: 0.12, Epoch: 12.171786343181374, LR: 0.0003 +[2026-03-05 01:29:31] (step=0062211) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.171981999608688, LR: 0.0003 +[2026-03-05 01:29:39] (step=0062212) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.172177656036, LR: 0.0003 +[2026-03-05 01:29:47] (step=0062213) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.172373312463314, LR: 0.0003 +[2026-03-05 01:29:55] (step=0062214) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.172568968890628, LR: 0.0003 +[2026-03-05 01:30:03] (step=0062215) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 12.172764625317942, LR: 0.0003 +[2026-03-05 01:30:10] (step=0062216) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.172960281745254, LR: 0.0003 +[2026-03-05 01:30:18] (step=0062217) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.173155938172568, LR: 0.0003 +[2026-03-05 01:30:26] (step=0062218) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.173351594599882, LR: 0.0003 +[2026-03-05 01:30:34] (step=0062219) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.173547251027196, LR: 0.0003 +[2026-03-05 01:30:42] (step=0062220) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.17374290745451, LR: 0.0003 +[2026-03-05 01:30:50] (step=0062221) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 12.173938563881823, LR: 0.0003 +[2026-03-05 01:30:58] (step=0062222) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.174134220309137, LR: 0.0003 +[2026-03-05 01:31:06] (step=0062223) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.17432987673645, LR: 0.0003 +[2026-03-05 01:31:13] (step=0062224) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 12.174525533163765, LR: 0.0003 +[2026-03-05 01:31:21] (step=0062225) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.174721189591079, LR: 0.0003 +[2026-03-05 01:31:29] (step=0062226) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.174916846018391, LR: 0.0003 +[2026-03-05 01:31:37] (step=0062227) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.175112502445705, LR: 0.0003 +[2026-03-05 01:31:45] (step=0062228) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.175308158873019, LR: 0.0003 +[2026-03-05 01:31:53] (step=0062229) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.175503815300333, LR: 0.0003 +[2026-03-05 01:32:01] (step=0062230) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.175699471727647, LR: 0.0003 +[2026-03-05 01:32:08] (step=0062231) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.17589512815496, LR: 0.0003 +[2026-03-05 01:32:16] (step=0062232) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.176090784582273, LR: 0.0003 +[2026-03-05 01:32:24] (step=0062233) Train Loss: 0.4156, Train Steps/Sec: 0.13, Epoch: 12.176286441009587, LR: 0.0003 +[2026-03-05 01:32:32] (step=0062234) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.176482097436901, LR: 0.0003 +[2026-03-05 01:32:40] (step=0062235) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.176677753864215, LR: 0.0003 +[2026-03-05 01:32:48] (step=0062236) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.176873410291527, LR: 0.0003 +[2026-03-05 01:32:56] (step=0062237) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 12.177069066718841, LR: 0.0003 +[2026-03-05 01:33:03] (step=0062238) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 12.177264723146155, LR: 0.0003 +[2026-03-05 01:33:11] (step=0062239) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.17746037957347, LR: 0.0003 +[2026-03-05 01:33:19] (step=0062240) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.177656036000784, LR: 0.0003 +[2026-03-05 01:33:27] (step=0062241) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.177851692428096, LR: 0.0003 +[2026-03-05 01:33:35] (step=0062242) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.17804734885541, LR: 0.0003 +[2026-03-05 01:33:43] (step=0062243) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 12.178243005282724, LR: 0.0003 +[2026-03-05 01:33:51] (step=0062244) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.178438661710038, LR: 0.0003 +[2026-03-05 01:33:59] (step=0062245) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.17863431813735, LR: 0.0003 +[2026-03-05 01:34:06] (step=0062246) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 12.178829974564664, LR: 0.0003 +[2026-03-05 01:34:14] (step=0062247) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 12.179025630991978, LR: 0.0003 +[2026-03-05 01:34:22] (step=0062248) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.179221287419292, LR: 0.0003 +[2026-03-05 01:34:30] (step=0062249) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.179416943846606, LR: 0.0003 +[2026-03-05 01:34:38] (step=0062250) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.179612600273918, LR: 0.0003 +[2026-03-05 01:34:46] (step=0062251) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.179808256701232, LR: 0.0003 +[2026-03-05 01:34:54] (step=0062252) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.180003913128546, LR: 0.0003 +[2026-03-05 01:35:01] (step=0062253) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 12.18019956955586, LR: 0.0003 +[2026-03-05 01:35:09] (step=0062254) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.180395225983174, LR: 0.0003 +[2026-03-05 01:35:17] (step=0062255) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 12.180590882410486, LR: 0.0003 +[2026-03-05 01:35:25] (step=0062256) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.1807865388378, LR: 0.0003 +[2026-03-05 01:35:33] (step=0062257) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 12.180982195265114, LR: 0.0003 +[2026-03-05 01:35:41] (step=0062258) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.181177851692429, LR: 0.0003 +[2026-03-05 01:35:49] (step=0062259) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.181373508119743, LR: 0.0003 +[2026-03-05 01:35:57] (step=0062260) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.181569164547055, LR: 0.0003 +[2026-03-05 01:36:05] (step=0062261) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 12.181764820974369, LR: 0.0003 +[2026-03-05 01:36:12] (step=0062262) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.181960477401683, LR: 0.0003 +[2026-03-05 01:36:20] (step=0062263) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.182156133828997, LR: 0.0003 +[2026-03-05 01:36:28] (step=0062264) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.18235179025631, LR: 0.0003 +[2026-03-05 01:36:36] (step=0062265) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 12.182547446683623, LR: 0.0003 +[2026-03-05 01:36:44] (step=0062266) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 12.182743103110937, LR: 0.0003 +[2026-03-05 01:36:52] (step=0062267) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 12.182938759538251, LR: 0.0003 +[2026-03-05 01:37:00] (step=0062268) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.183134415965565, LR: 0.0003 +[2026-03-05 01:37:08] (step=0062269) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.183330072392877, LR: 0.0003 +[2026-03-05 01:37:15] (step=0062270) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 12.183525728820191, LR: 0.0003 +[2026-03-05 01:37:23] (step=0062271) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.183721385247505, LR: 0.0003 +[2026-03-05 01:37:31] (step=0062272) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 12.18391704167482, LR: 0.0003 +[2026-03-05 01:37:39] (step=0062273) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.184112698102133, LR: 0.0003 +[2026-03-05 01:37:47] (step=0062274) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 12.184308354529445, LR: 0.0003 +[2026-03-05 01:37:55] (step=0062275) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.18450401095676, LR: 0.0003 +[2026-03-05 01:38:03] (step=0062276) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.184699667384074, LR: 0.0003 +[2026-03-05 01:38:11] (step=0062277) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.184895323811388, LR: 0.0003 +[2026-03-05 01:38:18] (step=0062278) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.185090980238702, LR: 0.0003 +[2026-03-05 01:38:26] (step=0062279) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 12.185286636666014, LR: 0.0003 +[2026-03-05 01:38:34] (step=0062280) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.185482293093328, LR: 0.0003 +[2026-03-05 01:38:42] (step=0062281) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.185677949520642, LR: 0.0003 +[2026-03-05 01:38:50] (step=0062282) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.185873605947956, LR: 0.0003 +[2026-03-05 01:38:58] (step=0062283) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.18606926237527, LR: 0.0003 +[2026-03-05 01:39:06] (step=0062284) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.186264918802582, LR: 0.0003 +[2026-03-05 01:39:13] (step=0062285) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.186460575229896, LR: 0.0003 +[2026-03-05 01:39:21] (step=0062286) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.18665623165721, LR: 0.0003 +[2026-03-05 01:39:29] (step=0062287) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 12.186851888084524, LR: 0.0003 +[2026-03-05 01:39:37] (step=0062288) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.187047544511838, LR: 0.0003 +[2026-03-05 01:39:45] (step=0062289) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.18724320093915, LR: 0.0003 +[2026-03-05 01:39:53] (step=0062290) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.187438857366464, LR: 0.0003 +[2026-03-05 01:40:01] (step=0062291) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.187634513793778, LR: 0.0003 +[2026-03-05 01:40:08] (step=0062292) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.187830170221092, LR: 0.0003 +[2026-03-05 01:40:16] (step=0062293) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.188025826648406, LR: 0.0003 +[2026-03-05 01:40:24] (step=0062294) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.188221483075719, LR: 0.0003 +[2026-03-05 01:40:32] (step=0062295) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.188417139503033, LR: 0.0003 +[2026-03-05 01:40:40] (step=0062296) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.188612795930347, LR: 0.0003 +[2026-03-05 01:40:48] (step=0062297) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.18880845235766, LR: 0.0003 +[2026-03-05 01:40:56] (step=0062298) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 12.189004108784973, LR: 0.0003 +[2026-03-05 01:41:03] (step=0062299) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.189199765212287, LR: 0.0003 +[2026-03-05 01:41:11] (step=0062300) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 12.1893954216396, LR: 0.0003 +[2026-03-05 01:41:19] (step=0062301) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.189591078066915, LR: 0.0003 +[2026-03-05 01:41:27] (step=0062302) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.189786734494229, LR: 0.0003 +[2026-03-05 01:41:35] (step=0062303) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.189982390921541, LR: 0.0003 +[2026-03-05 01:41:43] (step=0062304) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 12.190178047348855, LR: 0.0003 +[2026-03-05 01:41:51] (step=0062305) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.190373703776169, LR: 0.0003 +[2026-03-05 01:41:59] (step=0062306) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.190569360203483, LR: 0.0003 +[2026-03-05 01:42:06] (step=0062307) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.190765016630797, LR: 0.0003 +[2026-03-05 01:42:14] (step=0062308) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 12.19096067305811, LR: 0.0003 +[2026-03-05 01:42:22] (step=0062309) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.191156329485423, LR: 0.0003 +[2026-03-05 01:42:30] (step=0062310) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.191351985912737, LR: 0.0003 +[2026-03-05 01:42:38] (step=0062311) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 12.191547642340051, LR: 0.0003 +[2026-03-05 01:42:46] (step=0062312) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 12.191743298767365, LR: 0.0003 +[2026-03-05 01:42:54] (step=0062313) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.191938955194678, LR: 0.0003 +[2026-03-05 01:43:02] (step=0062314) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.192134611621992, LR: 0.0003 +[2026-03-05 01:43:09] (step=0062315) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.192330268049306, LR: 0.0003 +[2026-03-05 01:43:17] (step=0062316) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.19252592447662, LR: 0.0003 +[2026-03-05 01:43:25] (step=0062317) Train Loss: 0.4378, Train Steps/Sec: 0.12, Epoch: 12.192721580903934, LR: 0.0003 +[2026-03-05 01:43:33] (step=0062318) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.192917237331246, LR: 0.0003 +[2026-03-05 01:43:41] (step=0062319) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.19311289375856, LR: 0.0003 +[2026-03-05 01:43:49] (step=0062320) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 12.193308550185874, LR: 0.0003 +[2026-03-05 01:43:57] (step=0062321) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.193504206613188, LR: 0.0003 +[2026-03-05 01:44:05] (step=0062322) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.1936998630405, LR: 0.0003 +[2026-03-05 01:44:12] (step=0062323) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.193895519467814, LR: 0.0003 +[2026-03-05 01:44:20] (step=0062324) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.194091175895128, LR: 0.0003 +[2026-03-05 01:44:28] (step=0062325) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 12.194286832322442, LR: 0.0003 +[2026-03-05 01:44:36] (step=0062326) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.194482488749756, LR: 0.0003 +[2026-03-05 01:44:44] (step=0062327) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.194678145177068, LR: 0.0003 +[2026-03-05 01:44:52] (step=0062328) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.194873801604382, LR: 0.0003 +[2026-03-05 01:45:00] (step=0062329) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.195069458031696, LR: 0.0003 +[2026-03-05 01:45:07] (step=0062330) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 12.19526511445901, LR: 0.0003 +[2026-03-05 01:45:15] (step=0062331) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 12.195460770886324, LR: 0.0003 +[2026-03-05 01:45:23] (step=0062332) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.195656427313637, LR: 0.0003 +[2026-03-05 01:45:31] (step=0062333) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.19585208374095, LR: 0.0003 +[2026-03-05 01:45:39] (step=0062334) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.196047740168265, LR: 0.0003 +[2026-03-05 01:45:47] (step=0062335) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.196243396595579, LR: 0.0003 +[2026-03-05 01:45:54] (step=0062336) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.196439053022893, LR: 0.0003 +[2026-03-05 01:46:02] (step=0062337) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.196634709450205, LR: 0.0003 +[2026-03-05 01:46:10] (step=0062338) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 12.196830365877519, LR: 0.0003 +[2026-03-05 01:46:18] (step=0062339) Train Loss: 0.4623, Train Steps/Sec: 0.13, Epoch: 12.197026022304833, LR: 0.0003 +[2026-03-05 01:46:26] (step=0062340) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.197221678732147, LR: 0.0003 +[2026-03-05 01:46:34] (step=0062341) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.19741733515946, LR: 0.0003 +[2026-03-05 01:46:42] (step=0062342) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.197612991586773, LR: 0.0003 +[2026-03-05 01:46:49] (step=0062343) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.197808648014087, LR: 0.0003 +[2026-03-05 01:46:57] (step=0062344) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.198004304441401, LR: 0.0003 +[2026-03-05 01:47:05] (step=0062345) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.198199960868715, LR: 0.0003 +[2026-03-05 01:47:13] (step=0062346) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.198395617296029, LR: 0.0003 +[2026-03-05 01:47:21] (step=0062347) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.198591273723341, LR: 0.0003 +[2026-03-05 01:47:29] (step=0062348) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.198786930150655, LR: 0.0003 +[2026-03-05 01:47:37] (step=0062349) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 12.19898258657797, LR: 0.0003 +[2026-03-05 01:47:44] (step=0062350) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.199178243005283, LR: 0.0003 +[2026-03-05 01:47:52] (step=0062351) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.199373899432596, LR: 0.0003 +[2026-03-05 01:48:00] (step=0062352) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.19956955585991, LR: 0.0003 +[2026-03-05 01:48:08] (step=0062353) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.199765212287224, LR: 0.0003 +[2026-03-05 01:48:16] (step=0062354) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.199960868714538, LR: 0.0003 +[2026-03-05 01:48:24] (step=0062355) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.200156525141852, LR: 0.0003 +[2026-03-05 01:48:32] (step=0062356) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 12.200352181569164, LR: 0.0003 +[2026-03-05 01:48:40] (step=0062357) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.200547837996478, LR: 0.0003 +[2026-03-05 01:48:47] (step=0062358) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 12.200743494423792, LR: 0.0003 +[2026-03-05 01:48:55] (step=0062359) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.200939150851106, LR: 0.0003 +[2026-03-05 01:49:03] (step=0062360) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.20113480727842, LR: 0.0003 +[2026-03-05 01:49:11] (step=0062361) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.201330463705732, LR: 0.0003 +[2026-03-05 01:49:19] (step=0062362) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 12.201526120133046, LR: 0.0003 +[2026-03-05 01:49:27] (step=0062363) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 12.20172177656036, LR: 0.0003 +[2026-03-05 01:49:35] (step=0062364) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 12.201917432987674, LR: 0.0003 +[2026-03-05 01:49:43] (step=0062365) Train Loss: 0.4348, Train Steps/Sec: 0.12, Epoch: 12.202113089414988, LR: 0.0003 +[2026-03-05 01:49:51] (step=0062366) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 12.2023087458423, LR: 0.0003 +[2026-03-05 01:49:58] (step=0062367) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.202504402269614, LR: 0.0003 +[2026-03-05 01:50:06] (step=0062368) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.202700058696928, LR: 0.0003 +[2026-03-05 01:50:14] (step=0062369) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.202895715124242, LR: 0.0003 +[2026-03-05 01:50:22] (step=0062370) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.203091371551556, LR: 0.0003 +[2026-03-05 01:50:30] (step=0062371) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.203287027978869, LR: 0.0003 +[2026-03-05 01:50:38] (step=0062372) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 12.203482684406183, LR: 0.0003 +[2026-03-05 01:50:46] (step=0062373) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 12.203678340833497, LR: 0.0003 +[2026-03-05 01:50:53] (step=0062374) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.20387399726081, LR: 0.0003 +[2026-03-05 01:51:01] (step=0062375) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.204069653688123, LR: 0.0003 +[2026-03-05 01:51:09] (step=0062376) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 12.204265310115437, LR: 0.0003 +[2026-03-05 01:51:17] (step=0062377) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 12.20446096654275, LR: 0.0003 +[2026-03-05 01:51:25] (step=0062378) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.204656622970065, LR: 0.0003 +[2026-03-05 01:51:33] (step=0062379) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.204852279397379, LR: 0.0003 +[2026-03-05 01:51:41] (step=0062380) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 12.205047935824691, LR: 0.0003 +[2026-03-05 01:51:49] (step=0062381) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.205243592252005, LR: 0.0003 +[2026-03-05 01:51:56] (step=0062382) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.205439248679319, LR: 0.0003 +[2026-03-05 01:52:04] (step=0062383) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.205634905106633, LR: 0.0003 +[2026-03-05 01:52:12] (step=0062384) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.205830561533947, LR: 0.0003 +[2026-03-05 01:52:20] (step=0062385) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.20602621796126, LR: 0.0003 +[2026-03-05 01:52:28] (step=0062386) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 12.206221874388573, LR: 0.0003 +[2026-03-05 01:52:36] (step=0062387) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.206417530815887, LR: 0.0003 +[2026-03-05 01:52:44] (step=0062388) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.206613187243201, LR: 0.0003 +[2026-03-05 01:52:51] (step=0062389) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.206808843670515, LR: 0.0003 +[2026-03-05 01:52:59] (step=0062390) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.207004500097828, LR: 0.0003 +[2026-03-05 01:53:07] (step=0062391) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.207200156525142, LR: 0.0003 +[2026-03-05 01:53:15] (step=0062392) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.207395812952456, LR: 0.0003 +[2026-03-05 01:53:23] (step=0062393) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.20759146937977, LR: 0.0003 +[2026-03-05 01:53:31] (step=0062394) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.207787125807084, LR: 0.0003 +[2026-03-05 01:53:39] (step=0062395) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 12.207982782234396, LR: 0.0003 +[2026-03-05 01:53:47] (step=0062396) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.20817843866171, LR: 0.0003 +[2026-03-05 01:53:54] (step=0062397) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 12.208374095089024, LR: 0.0003 +[2026-03-05 01:54:02] (step=0062398) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.208569751516338, LR: 0.0003 +[2026-03-05 01:54:10] (step=0062399) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.20876540794365, LR: 0.0003 +[2026-03-05 01:54:18] (step=0062400) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.208961064370964, LR: 0.0003 +[2026-03-05 01:54:26] (step=0062401) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 12.209156720798278, LR: 0.0003 +[2026-03-05 01:54:34] (step=0062402) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.209352377225592, LR: 0.0003 +[2026-03-05 01:54:42] (step=0062403) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.209548033652906, LR: 0.0003 +[2026-03-05 01:54:49] (step=0062404) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.209743690080218, LR: 0.0003 +[2026-03-05 01:54:57] (step=0062405) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 12.209939346507532, LR: 0.0003 +[2026-03-05 01:55:05] (step=0062406) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 12.210135002934846, LR: 0.0003 +[2026-03-05 01:55:13] (step=0062407) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 12.21033065936216, LR: 0.0003 +[2026-03-05 01:55:21] (step=0062408) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.210526315789474, LR: 0.0003 +[2026-03-05 01:55:29] (step=0062409) Train Loss: 0.4457, Train Steps/Sec: 0.12, Epoch: 12.210721972216787, LR: 0.0003 +[2026-03-05 01:55:37] (step=0062410) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.2109176286441, LR: 0.0003 +[2026-03-05 01:55:45] (step=0062411) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.211113285071415, LR: 0.0003 +[2026-03-05 01:55:53] (step=0062412) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.211308941498729, LR: 0.0003 +[2026-03-05 01:56:00] (step=0062413) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.211504597926043, LR: 0.0003 +[2026-03-05 01:56:08] (step=0062414) Train Loss: 0.4342, Train Steps/Sec: 0.12, Epoch: 12.211700254353355, LR: 0.0003 +[2026-03-05 01:56:16] (step=0062415) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.211895910780669, LR: 0.0003 +[2026-03-05 01:56:24] (step=0062416) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.212091567207983, LR: 0.0003 +[2026-03-05 01:56:32] (step=0062417) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.212287223635297, LR: 0.0003 +[2026-03-05 01:56:40] (step=0062418) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.21248288006261, LR: 0.0003 +[2026-03-05 01:56:48] (step=0062419) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.212678536489923, LR: 0.0003 +[2026-03-05 01:56:56] (step=0062420) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.212874192917237, LR: 0.0003 +[2026-03-05 01:57:03] (step=0062421) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.213069849344551, LR: 0.0003 +[2026-03-05 01:57:11] (step=0062422) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.213265505771865, LR: 0.0003 +[2026-03-05 01:57:19] (step=0062423) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.213461162199179, LR: 0.0003 +[2026-03-05 01:57:27] (step=0062424) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.213656818626491, LR: 0.0003 +[2026-03-05 01:57:35] (step=0062425) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.213852475053805, LR: 0.0003 +[2026-03-05 01:57:43] (step=0062426) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.21404813148112, LR: 0.0003 +[2026-03-05 01:57:51] (step=0062427) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 12.214243787908433, LR: 0.0003 +[2026-03-05 01:57:59] (step=0062428) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.214439444335746, LR: 0.0003 +[2026-03-05 01:58:06] (step=0062429) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.21463510076306, LR: 0.0003 +[2026-03-05 01:58:14] (step=0062430) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.214830757190374, LR: 0.0003 +[2026-03-05 01:58:22] (step=0062431) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.215026413617688, LR: 0.0003 +[2026-03-05 01:58:30] (step=0062432) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.215222070045002, LR: 0.0003 +[2026-03-05 01:58:38] (step=0062433) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.215417726472314, LR: 0.0003 +[2026-03-05 01:58:46] (step=0062434) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 12.215613382899628, LR: 0.0003 +[2026-03-05 01:58:54] (step=0062435) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.215809039326942, LR: 0.0003 +[2026-03-05 01:59:01] (step=0062436) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 12.216004695754256, LR: 0.0003 +[2026-03-05 01:59:09] (step=0062437) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.21620035218157, LR: 0.0003 +[2026-03-05 01:59:17] (step=0062438) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.216396008608882, LR: 0.0003 +[2026-03-05 01:59:25] (step=0062439) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.216591665036196, LR: 0.0003 +[2026-03-05 01:59:33] (step=0062440) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.21678732146351, LR: 0.0003 +[2026-03-05 01:59:41] (step=0062441) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.216982977890824, LR: 0.0003 +[2026-03-05 01:59:49] (step=0062442) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.217178634318138, LR: 0.0003 +[2026-03-05 01:59:56] (step=0062443) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.21737429074545, LR: 0.0003 +[2026-03-05 02:00:04] (step=0062444) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.217569947172764, LR: 0.0003 +[2026-03-05 02:00:12] (step=0062445) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 12.217765603600078, LR: 0.0003 +[2026-03-05 02:00:20] (step=0062446) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.217961260027392, LR: 0.0003 +[2026-03-05 02:00:28] (step=0062447) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.218156916454706, LR: 0.0003 +[2026-03-05 02:00:36] (step=0062448) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.218352572882019, LR: 0.0003 +[2026-03-05 02:00:44] (step=0062449) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 12.218548229309333, LR: 0.0003 +[2026-03-05 02:00:51] (step=0062450) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.218743885736647, LR: 0.0003 +[2026-03-05 02:00:59] (step=0062451) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 12.21893954216396, LR: 0.0003 +[2026-03-05 02:01:07] (step=0062452) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 12.219135198591273, LR: 0.0003 +[2026-03-05 02:01:15] (step=0062453) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.219330855018587, LR: 0.0003 +[2026-03-05 02:01:23] (step=0062454) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.2195265114459, LR: 0.0003 +[2026-03-05 02:01:31] (step=0062455) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.219722167873215, LR: 0.0003 +[2026-03-05 02:01:39] (step=0062456) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.219917824300529, LR: 0.0003 +[2026-03-05 02:01:47] (step=0062457) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.220113480727841, LR: 0.0003 +[2026-03-05 02:01:55] (step=0062458) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 12.220309137155155, LR: 0.0003 +[2026-03-05 02:02:02] (step=0062459) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 12.220504793582469, LR: 0.0003 +[2026-03-05 02:02:10] (step=0062460) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.220700450009783, LR: 0.0003 +[2026-03-05 02:02:18] (step=0062461) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 12.220896106437097, LR: 0.0003 +[2026-03-05 02:02:26] (step=0062462) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.22109176286441, LR: 0.0003 +[2026-03-05 02:02:34] (step=0062463) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.221287419291723, LR: 0.0003 +[2026-03-05 02:02:42] (step=0062464) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.221483075719037, LR: 0.0003 +[2026-03-05 02:02:50] (step=0062465) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.221678732146351, LR: 0.0003 +[2026-03-05 02:02:58] (step=0062466) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 12.221874388573665, LR: 0.0003 +[2026-03-05 02:03:05] (step=0062467) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.222070045000978, LR: 0.0003 +[2026-03-05 02:03:13] (step=0062468) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.222265701428292, LR: 0.0003 +[2026-03-05 02:03:21] (step=0062469) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.222461357855606, LR: 0.0003 +[2026-03-05 02:03:29] (step=0062470) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.22265701428292, LR: 0.0003 +[2026-03-05 02:03:37] (step=0062471) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.222852670710234, LR: 0.0003 +[2026-03-05 02:03:45] (step=0062472) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.223048327137546, LR: 0.0003 +[2026-03-05 02:03:53] (step=0062473) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.22324398356486, LR: 0.0003 +[2026-03-05 02:04:00] (step=0062474) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 12.223439639992174, LR: 0.0003 +[2026-03-05 02:04:08] (step=0062475) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.223635296419488, LR: 0.0003 +[2026-03-05 02:04:16] (step=0062476) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.223830952846802, LR: 0.0003 +[2026-03-05 02:04:24] (step=0062477) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 12.224026609274114, LR: 0.0003 +[2026-03-05 02:04:32] (step=0062478) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.224222265701428, LR: 0.0003 +[2026-03-05 02:04:40] (step=0062479) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.224417922128742, LR: 0.0003 +[2026-03-05 02:04:48] (step=0062480) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.224613578556056, LR: 0.0003 +[2026-03-05 02:04:55] (step=0062481) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 12.224809234983368, LR: 0.0003 +[2026-03-05 02:05:03] (step=0062482) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 12.225004891410682, LR: 0.0003 +[2026-03-05 02:05:11] (step=0062483) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.225200547837996, LR: 0.0003 +[2026-03-05 02:05:19] (step=0062484) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 12.22539620426531, LR: 0.0003 +[2026-03-05 02:05:27] (step=0062485) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.225591860692624, LR: 0.0003 +[2026-03-05 02:05:35] (step=0062486) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.225787517119937, LR: 0.0003 +[2026-03-05 02:05:42] (step=0062487) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.22598317354725, LR: 0.0003 +[2026-03-05 02:05:50] (step=0062488) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.226178829974565, LR: 0.0003 +[2026-03-05 02:05:58] (step=0062489) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.226374486401879, LR: 0.0003 +[2026-03-05 02:06:06] (step=0062490) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.226570142829193, LR: 0.0003 +[2026-03-05 02:06:14] (step=0062491) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.226765799256505, LR: 0.0003 +[2026-03-05 02:06:22] (step=0062492) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.226961455683819, LR: 0.0003 +[2026-03-05 02:06:30] (step=0062493) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.227157112111133, LR: 0.0003 +[2026-03-05 02:06:37] (step=0062494) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.227352768538447, LR: 0.0003 +[2026-03-05 02:06:45] (step=0062495) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.22754842496576, LR: 0.0003 +[2026-03-05 02:06:53] (step=0062496) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.227744081393073, LR: 0.0003 +[2026-03-05 02:07:01] (step=0062497) Train Loss: 0.4652, Train Steps/Sec: 0.13, Epoch: 12.227939737820387, LR: 0.0003 +[2026-03-05 02:07:09] (step=0062498) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 12.228135394247701, LR: 0.0003 +[2026-03-05 02:07:17] (step=0062499) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 12.228331050675015, LR: 0.0003 +[2026-03-05 02:07:25] (step=0062500) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.22852670710233, LR: 0.0003 +[2026-03-05 02:07:25] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0062500/ +[2026-03-05 02:07:33] (step=0062501) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.228722363529641, LR: 0.0003 +[2026-03-05 02:07:40] (step=0062502) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 12.228918019956955, LR: 0.0003 +[2026-03-05 02:07:48] (step=0062503) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.22911367638427, LR: 0.0003 +[2026-03-05 02:07:56] (step=0062504) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.229309332811583, LR: 0.0003 +[2026-03-05 02:08:04] (step=0062505) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 12.229504989238896, LR: 0.0003 +[2026-03-05 02:08:12] (step=0062506) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.22970064566621, LR: 0.0003 +[2026-03-05 02:08:20] (step=0062507) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.229896302093524, LR: 0.0003 +[2026-03-05 02:08:28] (step=0062508) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.230091958520838, LR: 0.0003 +[2026-03-05 02:08:35] (step=0062509) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.230287614948152, LR: 0.0003 +[2026-03-05 02:08:43] (step=0062510) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.230483271375464, LR: 0.0003 +[2026-03-05 02:08:51] (step=0062511) Train Loss: 0.4401, Train Steps/Sec: 0.12, Epoch: 12.230678927802778, LR: 0.0003 +[2026-03-05 02:08:59] (step=0062512) Train Loss: 0.4501, Train Steps/Sec: 0.12, Epoch: 12.230874584230092, LR: 0.0003 +[2026-03-05 02:09:07] (step=0062513) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.231070240657406, LR: 0.0003 +[2026-03-05 02:09:15] (step=0062514) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.23126589708472, LR: 0.0003 +[2026-03-05 02:09:23] (step=0062515) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.231461553512032, LR: 0.0003 +[2026-03-05 02:09:31] (step=0062516) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.231657209939346, LR: 0.0003 +[2026-03-05 02:09:39] (step=0062517) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.23185286636666, LR: 0.0003 +[2026-03-05 02:09:47] (step=0062518) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.232048522793974, LR: 0.0003 +[2026-03-05 02:09:54] (step=0062519) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.232244179221288, LR: 0.0003 +[2026-03-05 02:10:02] (step=0062520) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.2324398356486, LR: 0.0003 +[2026-03-05 02:10:10] (step=0062521) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.232635492075914, LR: 0.0003 +[2026-03-05 02:10:18] (step=0062522) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.232831148503228, LR: 0.0003 +[2026-03-05 02:10:26] (step=0062523) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 12.233026804930542, LR: 0.0003 +[2026-03-05 02:10:34] (step=0062524) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 12.233222461357856, LR: 0.0003 +[2026-03-05 02:10:42] (step=0062525) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.233418117785169, LR: 0.0003 +[2026-03-05 02:10:50] (step=0062526) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.233613774212483, LR: 0.0003 +[2026-03-05 02:10:57] (step=0062527) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.233809430639797, LR: 0.0003 +[2026-03-05 02:11:05] (step=0062528) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 12.23400508706711, LR: 0.0003 +[2026-03-05 02:11:13] (step=0062529) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.234200743494425, LR: 0.0003 +[2026-03-05 02:11:21] (step=0062530) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.234396399921737, LR: 0.0003 +[2026-03-05 02:11:29] (step=0062531) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.234592056349051, LR: 0.0003 +[2026-03-05 02:11:37] (step=0062532) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.234787712776365, LR: 0.0003 +[2026-03-05 02:11:45] (step=0062533) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.234983369203679, LR: 0.0003 +[2026-03-05 02:11:52] (step=0062534) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.235179025630991, LR: 0.0003 +[2026-03-05 02:12:00] (step=0062535) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 12.235374682058305, LR: 0.0003 +[2026-03-05 02:12:08] (step=0062536) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.23557033848562, LR: 0.0003 +[2026-03-05 02:12:16] (step=0062537) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 12.235765994912933, LR: 0.0003 +[2026-03-05 02:12:24] (step=0062538) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 12.235961651340247, LR: 0.0003 +[2026-03-05 02:12:32] (step=0062539) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 12.23615730776756, LR: 0.0003 +[2026-03-05 02:12:40] (step=0062540) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.236352964194873, LR: 0.0003 +[2026-03-05 02:12:47] (step=0062541) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.236548620622187, LR: 0.0003 +[2026-03-05 02:12:55] (step=0062542) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.236744277049501, LR: 0.0003 +[2026-03-05 02:13:03] (step=0062543) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.236939933476815, LR: 0.0003 +[2026-03-05 02:13:11] (step=0062544) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.237135589904128, LR: 0.0003 +[2026-03-05 02:13:19] (step=0062545) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.237331246331442, LR: 0.0003 +[2026-03-05 02:13:27] (step=0062546) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.237526902758756, LR: 0.0003 +[2026-03-05 02:13:35] (step=0062547) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.23772255918607, LR: 0.0003 +[2026-03-05 02:13:42] (step=0062548) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.237918215613384, LR: 0.0003 +[2026-03-05 02:13:50] (step=0062549) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.238113872040696, LR: 0.0003 +[2026-03-05 02:13:58] (step=0062550) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 12.23830952846801, LR: 0.0003 +[2026-03-05 02:14:06] (step=0062551) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.238505184895324, LR: 0.0003 +[2026-03-05 02:14:14] (step=0062552) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.238700841322638, LR: 0.0003 +[2026-03-05 02:14:22] (step=0062553) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.238896497749952, LR: 0.0003 +[2026-03-05 02:14:30] (step=0062554) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.239092154177264, LR: 0.0003 +[2026-03-05 02:14:38] (step=0062555) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.239287810604578, LR: 0.0003 +[2026-03-05 02:14:45] (step=0062556) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.239483467031892, LR: 0.0003 +[2026-03-05 02:14:53] (step=0062557) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.239679123459206, LR: 0.0003 +[2026-03-05 02:15:01] (step=0062558) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.239874779886518, LR: 0.0003 +[2026-03-05 02:15:09] (step=0062559) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.240070436313832, LR: 0.0003 +[2026-03-05 02:15:17] (step=0062560) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.240266092741146, LR: 0.0003 +[2026-03-05 02:15:25] (step=0062561) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.24046174916846, LR: 0.0003 +[2026-03-05 02:15:33] (step=0062562) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.240657405595774, LR: 0.0003 +[2026-03-05 02:15:41] (step=0062563) Train Loss: 0.4614, Train Steps/Sec: 0.12, Epoch: 12.240853062023087, LR: 0.0003 +[2026-03-05 02:15:49] (step=0062564) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.2410487184504, LR: 0.0003 +[2026-03-05 02:15:57] (step=0062565) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.241244374877715, LR: 0.0003 +[2026-03-05 02:16:04] (step=0062566) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.241440031305029, LR: 0.0003 +[2026-03-05 02:16:12] (step=0062567) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.241635687732343, LR: 0.0003 +[2026-03-05 02:16:20] (step=0062568) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.241831344159655, LR: 0.0003 +[2026-03-05 02:16:28] (step=0062569) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.242027000586969, LR: 0.0003 +[2026-03-05 02:16:36] (step=0062570) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.242222657014283, LR: 0.0003 +[2026-03-05 02:16:44] (step=0062571) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.242418313441597, LR: 0.0003 +[2026-03-05 02:16:52] (step=0062572) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.242613969868911, LR: 0.0003 +[2026-03-05 02:16:59] (step=0062573) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.242809626296223, LR: 0.0003 +[2026-03-05 02:17:07] (step=0062574) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.243005282723537, LR: 0.0003 +[2026-03-05 02:17:15] (step=0062575) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.243200939150851, LR: 0.0003 +[2026-03-05 02:17:23] (step=0062576) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.243396595578165, LR: 0.0003 +[2026-03-05 02:17:31] (step=0062577) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 12.24359225200548, LR: 0.0003 +[2026-03-05 02:17:39] (step=0062578) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.243787908432791, LR: 0.0003 +[2026-03-05 02:17:47] (step=0062579) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.243983564860105, LR: 0.0003 +[2026-03-05 02:17:54] (step=0062580) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 12.24417922128742, LR: 0.0003 +[2026-03-05 02:18:02] (step=0062581) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.244374877714733, LR: 0.0003 +[2026-03-05 02:18:10] (step=0062582) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 12.244570534142047, LR: 0.0003 +[2026-03-05 02:18:18] (step=0062583) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.24476619056936, LR: 0.0003 +[2026-03-05 02:18:26] (step=0062584) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 12.244961846996674, LR: 0.0003 +[2026-03-05 02:18:34] (step=0062585) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.245157503423988, LR: 0.0003 +[2026-03-05 02:18:42] (step=0062586) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 12.245353159851302, LR: 0.0003 +[2026-03-05 02:18:49] (step=0062587) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.245548816278614, LR: 0.0003 +[2026-03-05 02:18:57] (step=0062588) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.245744472705928, LR: 0.0003 +[2026-03-05 02:19:05] (step=0062589) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 12.245940129133242, LR: 0.0003 +[2026-03-05 02:19:13] (step=0062590) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.246135785560556, LR: 0.0003 +[2026-03-05 02:19:21] (step=0062591) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 12.24633144198787, LR: 0.0003 +[2026-03-05 02:19:29] (step=0062592) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.246527098415182, LR: 0.0003 +[2026-03-05 02:19:37] (step=0062593) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 12.246722754842496, LR: 0.0003 +[2026-03-05 02:19:45] (step=0062594) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.24691841126981, LR: 0.0003 +[2026-03-05 02:19:52] (step=0062595) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.247114067697124, LR: 0.0003 +[2026-03-05 02:20:00] (step=0062596) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.247309724124438, LR: 0.0003 +[2026-03-05 02:20:08] (step=0062597) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.24750538055175, LR: 0.0003 +[2026-03-05 02:20:16] (step=0062598) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.247701036979064, LR: 0.0003 +[2026-03-05 02:20:24] (step=0062599) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.247896693406378, LR: 0.0003 +[2026-03-05 02:20:32] (step=0062600) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.248092349833692, LR: 0.0003 +[2026-03-05 02:20:40] (step=0062601) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.248288006261006, LR: 0.0003 +[2026-03-05 02:20:47] (step=0062602) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.248483662688319, LR: 0.0003 +[2026-03-05 02:20:55] (step=0062603) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 12.248679319115633, LR: 0.0003 +[2026-03-05 02:21:03] (step=0062604) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.248874975542947, LR: 0.0003 +[2026-03-05 02:21:11] (step=0062605) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.24907063197026, LR: 0.0003 +[2026-03-05 02:21:19] (step=0062606) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 12.249266288397575, LR: 0.0003 +[2026-03-05 02:21:27] (step=0062607) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.249461944824887, LR: 0.0003 +[2026-03-05 02:21:35] (step=0062608) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.249657601252201, LR: 0.0003 +[2026-03-05 02:21:42] (step=0062609) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.249853257679515, LR: 0.0003 +[2026-03-05 02:21:51] (step=0062610) Train Loss: 0.4391, Train Steps/Sec: 0.12, Epoch: 12.250048914106829, LR: 0.0003 +[2026-03-05 02:21:58] (step=0062611) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.250244570534141, LR: 0.0003 +[2026-03-05 02:22:06] (step=0062612) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.250440226961455, LR: 0.0003 +[2026-03-05 02:22:14] (step=0062613) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.25063588338877, LR: 0.0003 +[2026-03-05 02:22:22] (step=0062614) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.250831539816083, LR: 0.0003 +[2026-03-05 02:22:30] (step=0062615) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.251027196243397, LR: 0.0003 +[2026-03-05 02:22:38] (step=0062616) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.25122285267071, LR: 0.0003 +[2026-03-05 02:22:46] (step=0062617) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.251418509098023, LR: 0.0003 +[2026-03-05 02:22:54] (step=0062618) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 12.251614165525337, LR: 0.0003 +[2026-03-05 02:23:01] (step=0062619) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.251809821952651, LR: 0.0003 +[2026-03-05 02:23:09] (step=0062620) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.252005478379965, LR: 0.0003 +[2026-03-05 02:23:17] (step=0062621) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.252201134807278, LR: 0.0003 +[2026-03-05 02:23:25] (step=0062622) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.252396791234592, LR: 0.0003 +[2026-03-05 02:23:33] (step=0062623) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 12.252592447661906, LR: 0.0003 +[2026-03-05 02:23:41] (step=0062624) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.25278810408922, LR: 0.0003 +[2026-03-05 02:23:49] (step=0062625) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 12.252983760516534, LR: 0.0003 +[2026-03-05 02:23:56] (step=0062626) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.253179416943846, LR: 0.0003 +[2026-03-05 02:24:04] (step=0062627) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 12.25337507337116, LR: 0.0003 +[2026-03-05 02:24:12] (step=0062628) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.253570729798474, LR: 0.0003 +[2026-03-05 02:24:20] (step=0062629) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.253766386225788, LR: 0.0003 +[2026-03-05 02:24:28] (step=0062630) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.253962042653102, LR: 0.0003 +[2026-03-05 02:24:36] (step=0062631) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 12.254157699080414, LR: 0.0003 +[2026-03-05 02:24:44] (step=0062632) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.254353355507728, LR: 0.0003 +[2026-03-05 02:24:51] (step=0062633) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.254549011935042, LR: 0.0003 +[2026-03-05 02:24:59] (step=0062634) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.254744668362356, LR: 0.0003 +[2026-03-05 02:25:07] (step=0062635) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.25494032478967, LR: 0.0003 +[2026-03-05 02:25:15] (step=0062636) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 12.255135981216982, LR: 0.0003 +[2026-03-05 02:25:23] (step=0062637) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.255331637644296, LR: 0.0003 +[2026-03-05 02:25:31] (step=0062638) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.25552729407161, LR: 0.0003 +[2026-03-05 02:25:39] (step=0062639) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.255722950498924, LR: 0.0003 +[2026-03-05 02:25:46] (step=0062640) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.255918606926237, LR: 0.0003 +[2026-03-05 02:25:54] (step=0062641) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.25611426335355, LR: 0.0003 +[2026-03-05 02:26:02] (step=0062642) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.256309919780865, LR: 0.0003 +[2026-03-05 02:26:10] (step=0062643) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.256505576208179, LR: 0.0003 +[2026-03-05 02:26:18] (step=0062644) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.256701232635493, LR: 0.0003 +[2026-03-05 02:26:26] (step=0062645) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 12.256896889062805, LR: 0.0003 +[2026-03-05 02:26:34] (step=0062646) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.257092545490119, LR: 0.0003 +[2026-03-05 02:26:41] (step=0062647) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.257288201917433, LR: 0.0003 +[2026-03-05 02:26:49] (step=0062648) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.257483858344747, LR: 0.0003 +[2026-03-05 02:26:57] (step=0062649) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.257679514772061, LR: 0.0003 +[2026-03-05 02:27:05] (step=0062650) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.257875171199373, LR: 0.0003 +[2026-03-05 02:27:13] (step=0062651) Train Loss: 0.4269, Train Steps/Sec: 0.13, Epoch: 12.258070827626687, LR: 0.0003 +[2026-03-05 02:27:21] (step=0062652) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.258266484054001, LR: 0.0003 +[2026-03-05 02:27:29] (step=0062653) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.258462140481315, LR: 0.0003 +[2026-03-05 02:27:36] (step=0062654) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.25865779690863, LR: 0.0003 +[2026-03-05 02:27:44] (step=0062655) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.258853453335941, LR: 0.0003 +[2026-03-05 02:27:52] (step=0062656) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.259049109763255, LR: 0.0003 +[2026-03-05 02:28:00] (step=0062657) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.25924476619057, LR: 0.0003 +[2026-03-05 02:28:08] (step=0062658) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.259440422617883, LR: 0.0003 +[2026-03-05 02:28:16] (step=0062659) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.259636079045197, LR: 0.0003 +[2026-03-05 02:28:24] (step=0062660) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.25983173547251, LR: 0.0003 +[2026-03-05 02:28:32] (step=0062661) Train Loss: 0.4375, Train Steps/Sec: 0.12, Epoch: 12.260027391899824, LR: 0.0003 +[2026-03-05 02:28:40] (step=0062662) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.260223048327138, LR: 0.0003 +[2026-03-05 02:28:47] (step=0062663) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.260418704754452, LR: 0.0003 +[2026-03-05 02:28:55] (step=0062664) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 12.260614361181764, LR: 0.0003 +[2026-03-05 02:29:03] (step=0062665) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.260810017609078, LR: 0.0003 +[2026-03-05 02:29:11] (step=0062666) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.261005674036392, LR: 0.0003 +[2026-03-05 02:29:19] (step=0062667) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 12.261201330463706, LR: 0.0003 +[2026-03-05 02:29:27] (step=0062668) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.26139698689102, LR: 0.0003 +[2026-03-05 02:29:35] (step=0062669) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 12.261592643318332, LR: 0.0003 +[2026-03-05 02:29:43] (step=0062670) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.261788299745646, LR: 0.0003 +[2026-03-05 02:29:50] (step=0062671) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 12.26198395617296, LR: 0.0003 +[2026-03-05 02:29:58] (step=0062672) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.262179612600274, LR: 0.0003 +[2026-03-05 02:30:06] (step=0062673) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.262375269027588, LR: 0.0003 +[2026-03-05 02:30:14] (step=0062674) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.2625709254549, LR: 0.0003 +[2026-03-05 02:30:22] (step=0062675) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.262766581882214, LR: 0.0003 +[2026-03-05 02:30:30] (step=0062676) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.262962238309528, LR: 0.0003 +[2026-03-05 02:30:38] (step=0062677) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.263157894736842, LR: 0.0003 +[2026-03-05 02:30:45] (step=0062678) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.263353551164156, LR: 0.0003 +[2026-03-05 02:30:53] (step=0062679) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.263549207591469, LR: 0.0003 +[2026-03-05 02:31:01] (step=0062680) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.263744864018783, LR: 0.0003 +[2026-03-05 02:31:09] (step=0062681) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.263940520446097, LR: 0.0003 +[2026-03-05 02:31:17] (step=0062682) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.26413617687341, LR: 0.0003 +[2026-03-05 02:31:25] (step=0062683) Train Loss: 0.4254, Train Steps/Sec: 0.13, Epoch: 12.264331833300725, LR: 0.0003 +[2026-03-05 02:31:33] (step=0062684) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 12.264527489728037, LR: 0.0003 +[2026-03-05 02:31:40] (step=0062685) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.264723146155351, LR: 0.0003 +[2026-03-05 02:31:48] (step=0062686) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.264918802582665, LR: 0.0003 +[2026-03-05 02:31:56] (step=0062687) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 12.265114459009979, LR: 0.0003 +[2026-03-05 02:32:04] (step=0062688) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 12.265310115437293, LR: 0.0003 +[2026-03-05 02:32:12] (step=0062689) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.265505771864605, LR: 0.0003 +[2026-03-05 02:32:20] (step=0062690) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.26570142829192, LR: 0.0003 +[2026-03-05 02:32:28] (step=0062691) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.265897084719233, LR: 0.0003 +[2026-03-05 02:32:36] (step=0062692) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 12.266092741146547, LR: 0.0003 +[2026-03-05 02:32:44] (step=0062693) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.26628839757386, LR: 0.0003 +[2026-03-05 02:32:51] (step=0062694) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.266484054001173, LR: 0.0003 +[2026-03-05 02:32:59] (step=0062695) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 12.266679710428487, LR: 0.0003 +[2026-03-05 02:33:07] (step=0062696) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.266875366855801, LR: 0.0003 +[2026-03-05 02:33:15] (step=0062697) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.267071023283115, LR: 0.0003 +[2026-03-05 02:33:23] (step=0062698) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.267266679710428, LR: 0.0003 +[2026-03-05 02:33:31] (step=0062699) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 12.267462336137742, LR: 0.0003 +[2026-03-05 02:33:39] (step=0062700) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.267657992565056, LR: 0.0003 +[2026-03-05 02:33:46] (step=0062701) Train Loss: 0.4237, Train Steps/Sec: 0.13, Epoch: 12.26785364899237, LR: 0.0003 +[2026-03-05 02:33:54] (step=0062702) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.268049305419684, LR: 0.0003 +[2026-03-05 02:34:02] (step=0062703) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 12.268244961846996, LR: 0.0003 +[2026-03-05 02:34:10] (step=0062704) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.26844061827431, LR: 0.0003 +[2026-03-05 02:34:18] (step=0062705) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.268636274701624, LR: 0.0003 +[2026-03-05 02:34:26] (step=0062706) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.268831931128938, LR: 0.0003 +[2026-03-05 02:34:34] (step=0062707) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.269027587556252, LR: 0.0003 +[2026-03-05 02:34:42] (step=0062708) Train Loss: 0.4441, Train Steps/Sec: 0.12, Epoch: 12.269223243983564, LR: 0.0003 +[2026-03-05 02:34:50] (step=0062709) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.269418900410878, LR: 0.0003 +[2026-03-05 02:34:58] (step=0062710) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.269614556838192, LR: 0.0003 +[2026-03-05 02:35:05] (step=0062711) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 12.269810213265506, LR: 0.0003 +[2026-03-05 02:35:13] (step=0062712) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 12.27000586969282, LR: 0.0003 +[2026-03-05 02:35:21] (step=0062713) Train Loss: 0.4567, Train Steps/Sec: 0.12, Epoch: 12.270201526120132, LR: 0.0003 +[2026-03-05 02:35:29] (step=0062714) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 12.270397182547446, LR: 0.0003 +[2026-03-05 02:35:37] (step=0062715) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.27059283897476, LR: 0.0003 +[2026-03-05 02:35:45] (step=0062716) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.270788495402075, LR: 0.0003 +[2026-03-05 02:35:53] (step=0062717) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.270984151829387, LR: 0.0003 +[2026-03-05 02:36:01] (step=0062718) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.2711798082567, LR: 0.0003 +[2026-03-05 02:36:09] (step=0062719) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.271375464684015, LR: 0.0003 +[2026-03-05 02:36:17] (step=0062720) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.271571121111329, LR: 0.0003 +[2026-03-05 02:36:24] (step=0062721) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.271766777538643, LR: 0.0003 +[2026-03-05 02:36:32] (step=0062722) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.271962433965955, LR: 0.0003 +[2026-03-05 02:36:40] (step=0062723) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.272158090393269, LR: 0.0003 +[2026-03-05 02:36:48] (step=0062724) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.272353746820583, LR: 0.0003 +[2026-03-05 02:36:56] (step=0062725) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.272549403247897, LR: 0.0003 +[2026-03-05 02:37:04] (step=0062726) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 12.272745059675211, LR: 0.0003 +[2026-03-05 02:37:12] (step=0062727) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.272940716102523, LR: 0.0003 +[2026-03-05 02:37:20] (step=0062728) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.273136372529837, LR: 0.0003 +[2026-03-05 02:37:27] (step=0062729) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.273332028957151, LR: 0.0003 +[2026-03-05 02:37:35] (step=0062730) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.273527685384465, LR: 0.0003 +[2026-03-05 02:37:43] (step=0062731) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.27372334181178, LR: 0.0003 +[2026-03-05 02:37:51] (step=0062732) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 12.273918998239091, LR: 0.0003 +[2026-03-05 02:37:59] (step=0062733) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.274114654666405, LR: 0.0003 +[2026-03-05 02:38:07] (step=0062734) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.27431031109372, LR: 0.0003 +[2026-03-05 02:38:15] (step=0062735) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 12.274505967521034, LR: 0.0003 +[2026-03-05 02:38:23] (step=0062736) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 12.274701623948348, LR: 0.0003 +[2026-03-05 02:38:30] (step=0062737) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.27489728037566, LR: 0.0003 +[2026-03-05 02:38:38] (step=0062738) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.275092936802974, LR: 0.0003 +[2026-03-05 02:38:46] (step=0062739) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.275288593230288, LR: 0.0003 +[2026-03-05 02:38:54] (step=0062740) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.275484249657602, LR: 0.0003 +[2026-03-05 02:39:02] (step=0062741) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 12.275679906084916, LR: 0.0003 +[2026-03-05 02:39:10] (step=0062742) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.275875562512228, LR: 0.0003 +[2026-03-05 02:39:18] (step=0062743) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 12.276071218939542, LR: 0.0003 +[2026-03-05 02:39:26] (step=0062744) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.276266875366856, LR: 0.0003 +[2026-03-05 02:39:33] (step=0062745) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.27646253179417, LR: 0.0003 +[2026-03-05 02:39:41] (step=0062746) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.276658188221482, LR: 0.0003 +[2026-03-05 02:39:49] (step=0062747) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.276853844648796, LR: 0.0003 +[2026-03-05 02:39:57] (step=0062748) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.27704950107611, LR: 0.0003 +[2026-03-05 02:40:05] (step=0062749) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.277245157503424, LR: 0.0003 +[2026-03-05 02:40:13] (step=0062750) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 12.277440813930738, LR: 0.0003 +[2026-03-05 02:40:21] (step=0062751) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.27763647035805, LR: 0.0003 +[2026-03-05 02:40:29] (step=0062752) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.277832126785365, LR: 0.0003 +[2026-03-05 02:40:36] (step=0062753) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.278027783212679, LR: 0.0003 +[2026-03-05 02:40:44] (step=0062754) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.278223439639993, LR: 0.0003 +[2026-03-05 02:40:52] (step=0062755) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 12.278419096067307, LR: 0.0003 +[2026-03-05 02:41:00] (step=0062756) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.278614752494619, LR: 0.0003 +[2026-03-05 02:41:08] (step=0062757) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.278810408921933, LR: 0.0003 +[2026-03-05 02:41:16] (step=0062758) Train Loss: 0.4312, Train Steps/Sec: 0.12, Epoch: 12.279006065349247, LR: 0.0003 +[2026-03-05 02:41:24] (step=0062759) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 12.27920172177656, LR: 0.0003 +[2026-03-05 02:41:32] (step=0062760) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.279397378203875, LR: 0.0003 +[2026-03-05 02:41:40] (step=0062761) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.279593034631187, LR: 0.0003 +[2026-03-05 02:41:48] (step=0062762) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 12.279788691058501, LR: 0.0003 +[2026-03-05 02:41:56] (step=0062763) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.279984347485815, LR: 0.0003 +[2026-03-05 02:42:03] (step=0062764) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 12.280180003913129, LR: 0.0003 +[2026-03-05 02:42:11] (step=0062765) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.280375660340443, LR: 0.0003 +[2026-03-05 02:42:19] (step=0062766) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.280571316767755, LR: 0.0003 +[2026-03-05 02:42:27] (step=0062767) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.28076697319507, LR: 0.0003 +[2026-03-05 02:42:35] (step=0062768) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.280962629622383, LR: 0.0003 +[2026-03-05 02:42:43] (step=0062769) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.281158286049697, LR: 0.0003 +[2026-03-05 02:42:51] (step=0062770) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.28135394247701, LR: 0.0003 +[2026-03-05 02:42:59] (step=0062771) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.281549598904324, LR: 0.0003 +[2026-03-05 02:43:06] (step=0062772) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.281745255331638, LR: 0.0003 +[2026-03-05 02:43:14] (step=0062773) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.281940911758952, LR: 0.0003 +[2026-03-05 02:43:22] (step=0062774) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.282136568186266, LR: 0.0003 +[2026-03-05 02:43:30] (step=0062775) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.282332224613578, LR: 0.0003 +[2026-03-05 02:43:38] (step=0062776) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.282527881040892, LR: 0.0003 +[2026-03-05 02:43:46] (step=0062777) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 12.282723537468206, LR: 0.0003 +[2026-03-05 02:43:54] (step=0062778) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.28291919389552, LR: 0.0003 +[2026-03-05 02:44:02] (step=0062779) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.283114850322834, LR: 0.0003 +[2026-03-05 02:44:10] (step=0062780) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.283310506750146, LR: 0.0003 +[2026-03-05 02:44:17] (step=0062781) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.28350616317746, LR: 0.0003 +[2026-03-05 02:44:25] (step=0062782) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.283701819604774, LR: 0.0003 +[2026-03-05 02:44:33] (step=0062783) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.283897476032088, LR: 0.0003 +[2026-03-05 02:44:41] (step=0062784) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.284093132459402, LR: 0.0003 +[2026-03-05 02:44:49] (step=0062785) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 12.284288788886714, LR: 0.0003 +[2026-03-05 02:44:57] (step=0062786) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.284484445314028, LR: 0.0003 +[2026-03-05 02:45:05] (step=0062787) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.284680101741342, LR: 0.0003 +[2026-03-05 02:45:12] (step=0062788) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.284875758168656, LR: 0.0003 +[2026-03-05 02:45:20] (step=0062789) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.28507141459597, LR: 0.0003 +[2026-03-05 02:45:28] (step=0062790) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.285267071023283, LR: 0.0003 +[2026-03-05 02:45:36] (step=0062791) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 12.285462727450597, LR: 0.0003 +[2026-03-05 02:45:44] (step=0062792) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.28565838387791, LR: 0.0003 +[2026-03-05 02:45:52] (step=0062793) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 12.285854040305225, LR: 0.0003 +[2026-03-05 02:46:00] (step=0062794) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.286049696732539, LR: 0.0003 +[2026-03-05 02:46:08] (step=0062795) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.28624535315985, LR: 0.0003 +[2026-03-05 02:46:15] (step=0062796) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.286441009587165, LR: 0.0003 +[2026-03-05 02:46:23] (step=0062797) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 12.286636666014479, LR: 0.0003 +[2026-03-05 02:46:31] (step=0062798) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 12.286832322441793, LR: 0.0003 +[2026-03-05 02:46:39] (step=0062799) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.287027978869105, LR: 0.0003 +[2026-03-05 02:46:47] (step=0062800) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.287223635296419, LR: 0.0003 +[2026-03-05 02:46:55] (step=0062801) Train Loss: 0.4510, Train Steps/Sec: 0.12, Epoch: 12.287419291723733, LR: 0.0003 +[2026-03-05 02:47:03] (step=0062802) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.287614948151047, LR: 0.0003 +[2026-03-05 02:47:11] (step=0062803) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 12.287810604578361, LR: 0.0003 +[2026-03-05 02:47:19] (step=0062804) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.288006261005673, LR: 0.0003 +[2026-03-05 02:47:27] (step=0062805) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 12.288201917432987, LR: 0.0003 +[2026-03-05 02:47:34] (step=0062806) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.288397573860301, LR: 0.0003 +[2026-03-05 02:47:42] (step=0062807) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.288593230287615, LR: 0.0003 +[2026-03-05 02:47:50] (step=0062808) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.28878888671493, LR: 0.0003 +[2026-03-05 02:47:58] (step=0062809) Train Loss: 0.4422, Train Steps/Sec: 0.12, Epoch: 12.288984543142242, LR: 0.0003 +[2026-03-05 02:48:06] (step=0062810) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 12.289180199569556, LR: 0.0003 +[2026-03-05 02:48:14] (step=0062811) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.28937585599687, LR: 0.0003 +[2026-03-05 02:48:22] (step=0062812) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.289571512424184, LR: 0.0003 +[2026-03-05 02:48:30] (step=0062813) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.289767168851498, LR: 0.0003 +[2026-03-05 02:48:38] (step=0062814) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.28996282527881, LR: 0.0003 +[2026-03-05 02:48:46] (step=0062815) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.290158481706124, LR: 0.0003 +[2026-03-05 02:48:53] (step=0062816) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.290354138133438, LR: 0.0003 +[2026-03-05 02:49:01] (step=0062817) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.290549794560752, LR: 0.0003 +[2026-03-05 02:49:09] (step=0062818) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 12.290745450988066, LR: 0.0003 +[2026-03-05 02:49:17] (step=0062819) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.290941107415378, LR: 0.0003 +[2026-03-05 02:49:25] (step=0062820) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.291136763842692, LR: 0.0003 +[2026-03-05 02:49:33] (step=0062821) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.291332420270006, LR: 0.0003 +[2026-03-05 02:49:41] (step=0062822) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.29152807669732, LR: 0.0003 +[2026-03-05 02:49:49] (step=0062823) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 12.291723733124632, LR: 0.0003 +[2026-03-05 02:49:56] (step=0062824) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.291919389551946, LR: 0.0003 +[2026-03-05 02:50:04] (step=0062825) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 12.29211504597926, LR: 0.0003 +[2026-03-05 02:50:12] (step=0062826) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.292310702406574, LR: 0.0003 +[2026-03-05 02:50:20] (step=0062827) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 12.292506358833888, LR: 0.0003 +[2026-03-05 02:50:28] (step=0062828) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.2927020152612, LR: 0.0003 +[2026-03-05 02:50:36] (step=0062829) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.292897671688515, LR: 0.0003 +[2026-03-05 02:50:44] (step=0062830) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 12.293093328115829, LR: 0.0003 +[2026-03-05 02:50:51] (step=0062831) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.293288984543143, LR: 0.0003 +[2026-03-05 02:50:59] (step=0062832) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.293484640970457, LR: 0.0003 +[2026-03-05 02:51:07] (step=0062833) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 12.293680297397769, LR: 0.0003 +[2026-03-05 02:51:15] (step=0062834) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.293875953825083, LR: 0.0003 +[2026-03-05 02:51:23] (step=0062835) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.294071610252397, LR: 0.0003 +[2026-03-05 02:51:31] (step=0062836) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 12.29426726667971, LR: 0.0003 +[2026-03-05 02:51:39] (step=0062837) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.294462923107025, LR: 0.0003 +[2026-03-05 02:51:47] (step=0062838) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.294658579534337, LR: 0.0003 +[2026-03-05 02:51:55] (step=0062839) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.294854235961651, LR: 0.0003 +[2026-03-05 02:52:03] (step=0062840) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 12.295049892388965, LR: 0.0003 +[2026-03-05 02:52:10] (step=0062841) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.295245548816279, LR: 0.0003 +[2026-03-05 02:52:18] (step=0062842) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.295441205243593, LR: 0.0003 +[2026-03-05 02:52:26] (step=0062843) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.295636861670905, LR: 0.0003 +[2026-03-05 02:52:34] (step=0062844) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.29583251809822, LR: 0.0003 +[2026-03-05 02:52:42] (step=0062845) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.296028174525533, LR: 0.0003 +[2026-03-05 02:52:50] (step=0062846) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.296223830952847, LR: 0.0003 +[2026-03-05 02:52:58] (step=0062847) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.29641948738016, LR: 0.0003 +[2026-03-05 02:53:06] (step=0062848) Train Loss: 0.4212, Train Steps/Sec: 0.13, Epoch: 12.296615143807474, LR: 0.0003 +[2026-03-05 02:53:13] (step=0062849) Train Loss: 0.4246, Train Steps/Sec: 0.13, Epoch: 12.296810800234788, LR: 0.0003 +[2026-03-05 02:53:21] (step=0062850) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 12.297006456662102, LR: 0.0003 +[2026-03-05 02:53:29] (step=0062851) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.297202113089416, LR: 0.0003 +[2026-03-05 02:53:37] (step=0062852) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 12.297397769516728, LR: 0.0003 +[2026-03-05 02:53:45] (step=0062853) Train Loss: 0.4323, Train Steps/Sec: 0.12, Epoch: 12.297593425944042, LR: 0.0003 +[2026-03-05 02:53:53] (step=0062854) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.297789082371356, LR: 0.0003 +[2026-03-05 02:54:01] (step=0062855) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 12.29798473879867, LR: 0.0003 +[2026-03-05 02:54:09] (step=0062856) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.298180395225984, LR: 0.0003 +[2026-03-05 02:54:16] (step=0062857) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.298376051653296, LR: 0.0003 +[2026-03-05 02:54:24] (step=0062858) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 12.29857170808061, LR: 0.0003 +[2026-03-05 02:54:32] (step=0062859) Train Loss: 0.4448, Train Steps/Sec: 0.12, Epoch: 12.298767364507924, LR: 0.0003 +[2026-03-05 02:54:40] (step=0062860) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.298963020935238, LR: 0.0003 +[2026-03-05 02:54:48] (step=0062861) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 12.299158677362552, LR: 0.0003 +[2026-03-05 02:54:56] (step=0062862) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 12.299354333789864, LR: 0.0003 +[2026-03-05 02:55:04] (step=0062863) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.299549990217178, LR: 0.0003 +[2026-03-05 02:55:12] (step=0062864) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 12.299745646644492, LR: 0.0003 +[2026-03-05 02:55:20] (step=0062865) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.299941303071806, LR: 0.0003 +[2026-03-05 02:55:27] (step=0062866) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 12.30013695949912, LR: 0.0003 +[2026-03-05 02:55:35] (step=0062867) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 12.300332615926433, LR: 0.0003 +[2026-03-05 02:55:43] (step=0062868) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 12.300528272353747, LR: 0.0003 +[2026-03-05 02:55:51] (step=0062869) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.30072392878106, LR: 0.0003 +[2026-03-05 02:55:59] (step=0062870) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.300919585208375, LR: 0.0003 +[2026-03-05 02:56:07] (step=0062871) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.301115241635689, LR: 0.0003 +[2026-03-05 02:56:15] (step=0062872) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.301310898063, LR: 0.0003 +[2026-03-05 02:56:23] (step=0062873) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.301506554490315, LR: 0.0003 +[2026-03-05 02:56:30] (step=0062874) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.301702210917629, LR: 0.0003 +[2026-03-05 02:56:38] (step=0062875) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 12.301897867344943, LR: 0.0003 +[2026-03-05 02:56:46] (step=0062876) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.302093523772255, LR: 0.0003 +[2026-03-05 02:56:54] (step=0062877) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.302289180199569, LR: 0.0003 +[2026-03-05 02:57:02] (step=0062878) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.302484836626883, LR: 0.0003 +[2026-03-05 02:57:10] (step=0062879) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.302680493054197, LR: 0.0003 +[2026-03-05 02:57:18] (step=0062880) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.302876149481511, LR: 0.0003 +[2026-03-05 02:57:25] (step=0062881) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.303071805908823, LR: 0.0003 +[2026-03-05 02:57:33] (step=0062882) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.303267462336137, LR: 0.0003 +[2026-03-05 02:57:41] (step=0062883) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.303463118763451, LR: 0.0003 +[2026-03-05 02:57:49] (step=0062884) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.303658775190765, LR: 0.0003 +[2026-03-05 02:57:57] (step=0062885) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 12.30385443161808, LR: 0.0003 +[2026-03-05 02:58:05] (step=0062886) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.304050088045392, LR: 0.0003 +[2026-03-05 02:58:13] (step=0062887) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 12.304245744472706, LR: 0.0003 +[2026-03-05 02:58:20] (step=0062888) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.30444140090002, LR: 0.0003 +[2026-03-05 02:58:28] (step=0062889) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.304637057327334, LR: 0.0003 +[2026-03-05 02:58:36] (step=0062890) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.304832713754648, LR: 0.0003 +[2026-03-05 02:58:44] (step=0062891) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 12.30502837018196, LR: 0.0003 +[2026-03-05 02:58:52] (step=0062892) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 12.305224026609274, LR: 0.0003 +[2026-03-05 02:59:00] (step=0062893) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 12.305419683036588, LR: 0.0003 +[2026-03-05 02:59:08] (step=0062894) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.305615339463902, LR: 0.0003 +[2026-03-05 02:59:15] (step=0062895) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 12.305810995891216, LR: 0.0003 +[2026-03-05 02:59:23] (step=0062896) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.306006652318528, LR: 0.0003 +[2026-03-05 02:59:31] (step=0062897) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 12.306202308745842, LR: 0.0003 +[2026-03-05 02:59:39] (step=0062898) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.306397965173156, LR: 0.0003 +[2026-03-05 02:59:47] (step=0062899) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.30659362160047, LR: 0.0003 +[2026-03-05 02:59:55] (step=0062900) Train Loss: 0.4465, Train Steps/Sec: 0.12, Epoch: 12.306789278027782, LR: 0.0003 +[2026-03-05 03:00:03] (step=0062901) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 12.306984934455096, LR: 0.0003 +[2026-03-05 03:00:11] (step=0062902) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 12.30718059088241, LR: 0.0003 +[2026-03-05 03:00:19] (step=0062903) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.307376247309724, LR: 0.0003 +[2026-03-05 03:00:27] (step=0062904) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.307571903737038, LR: 0.0003 +[2026-03-05 03:00:34] (step=0062905) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.30776756016435, LR: 0.0003 +[2026-03-05 03:00:42] (step=0062906) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.307963216591665, LR: 0.0003 +[2026-03-05 03:00:50] (step=0062907) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.308158873018979, LR: 0.0003 +[2026-03-05 03:00:58] (step=0062908) Train Loss: 0.4481, Train Steps/Sec: 0.12, Epoch: 12.308354529446293, LR: 0.0003 +[2026-03-05 03:01:06] (step=0062909) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 12.308550185873607, LR: 0.0003 +[2026-03-05 03:01:14] (step=0062910) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.308745842300919, LR: 0.0003 +[2026-03-05 03:01:22] (step=0062911) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 12.308941498728233, LR: 0.0003 +[2026-03-05 03:01:30] (step=0062912) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.309137155155547, LR: 0.0003 +[2026-03-05 03:01:38] (step=0062913) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.30933281158286, LR: 0.0003 +[2026-03-05 03:01:45] (step=0062914) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.309528468010175, LR: 0.0003 +[2026-03-05 03:01:53] (step=0062915) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.309724124437487, LR: 0.0003 +[2026-03-05 03:02:01] (step=0062916) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 12.309919780864801, LR: 0.0003 +[2026-03-05 03:02:09] (step=0062917) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 12.310115437292115, LR: 0.0003 +[2026-03-05 03:02:17] (step=0062918) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.310311093719429, LR: 0.0003 +[2026-03-05 03:02:25] (step=0062919) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 12.310506750146743, LR: 0.0003 +[2026-03-05 03:02:33] (step=0062920) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.310702406574055, LR: 0.0003 +[2026-03-05 03:02:41] (step=0062921) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.31089806300137, LR: 0.0003 +[2026-03-05 03:02:48] (step=0062922) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.311093719428683, LR: 0.0003 +[2026-03-05 03:02:56] (step=0062923) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.311289375855997, LR: 0.0003 +[2026-03-05 03:03:04] (step=0062924) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.311485032283311, LR: 0.0003 +[2026-03-05 03:03:12] (step=0062925) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.311680688710624, LR: 0.0003 +[2026-03-05 03:03:20] (step=0062926) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.311876345137938, LR: 0.0003 +[2026-03-05 03:03:28] (step=0062927) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 12.312072001565252, LR: 0.0003 +[2026-03-05 03:03:36] (step=0062928) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.312267657992566, LR: 0.0003 +[2026-03-05 03:03:43] (step=0062929) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.312463314419878, LR: 0.0003 +[2026-03-05 03:03:51] (step=0062930) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 12.312658970847192, LR: 0.0003 +[2026-03-05 03:03:59] (step=0062931) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.312854627274506, LR: 0.0003 +[2026-03-05 03:04:07] (step=0062932) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 12.31305028370182, LR: 0.0003 +[2026-03-05 03:04:15] (step=0062933) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.313245940129134, LR: 0.0003 +[2026-03-05 03:04:23] (step=0062934) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.313441596556446, LR: 0.0003 +[2026-03-05 03:04:31] (step=0062935) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.31363725298376, LR: 0.0003 +[2026-03-05 03:04:38] (step=0062936) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.313832909411074, LR: 0.0003 +[2026-03-05 03:04:46] (step=0062937) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 12.314028565838388, LR: 0.0003 +[2026-03-05 03:04:54] (step=0062938) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.314224222265702, LR: 0.0003 +[2026-03-05 03:05:02] (step=0062939) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.314419878693014, LR: 0.0003 +[2026-03-05 03:05:10] (step=0062940) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.314615535120328, LR: 0.0003 +[2026-03-05 03:05:18] (step=0062941) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.314811191547642, LR: 0.0003 +[2026-03-05 03:05:26] (step=0062942) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.315006847974956, LR: 0.0003 +[2026-03-05 03:05:34] (step=0062943) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.31520250440227, LR: 0.0003 +[2026-03-05 03:05:41] (step=0062944) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.315398160829583, LR: 0.0003 +[2026-03-05 03:05:49] (step=0062945) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 12.315593817256897, LR: 0.0003 +[2026-03-05 03:05:57] (step=0062946) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.31578947368421, LR: 0.0003 +[2026-03-05 03:06:05] (step=0062947) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.315985130111525, LR: 0.0003 +[2026-03-05 03:06:13] (step=0062948) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 12.316180786538839, LR: 0.0003 +[2026-03-05 03:06:21] (step=0062949) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.31637644296615, LR: 0.0003 +[2026-03-05 03:06:29] (step=0062950) Train Loss: 0.4433, Train Steps/Sec: 0.12, Epoch: 12.316572099393465, LR: 0.0003 +[2026-03-05 03:06:37] (step=0062951) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.316767755820779, LR: 0.0003 +[2026-03-05 03:06:45] (step=0062952) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.316963412248093, LR: 0.0003 +[2026-03-05 03:06:52] (step=0062953) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.317159068675405, LR: 0.0003 +[2026-03-05 03:07:00] (step=0062954) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.31735472510272, LR: 0.0003 +[2026-03-05 03:07:08] (step=0062955) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.317550381530033, LR: 0.0003 +[2026-03-05 03:07:16] (step=0062956) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.317746037957347, LR: 0.0003 +[2026-03-05 03:07:24] (step=0062957) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.317941694384661, LR: 0.0003 +[2026-03-05 03:07:32] (step=0062958) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.318137350811973, LR: 0.0003 +[2026-03-05 03:07:40] (step=0062959) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.318333007239287, LR: 0.0003 +[2026-03-05 03:07:48] (step=0062960) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 12.318528663666601, LR: 0.0003 +[2026-03-05 03:07:55] (step=0062961) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.318724320093915, LR: 0.0003 +[2026-03-05 03:08:03] (step=0062962) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.31891997652123, LR: 0.0003 +[2026-03-05 03:08:11] (step=0062963) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 12.319115632948542, LR: 0.0003 +[2026-03-05 03:08:19] (step=0062964) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.319311289375856, LR: 0.0003 +[2026-03-05 03:08:27] (step=0062965) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.31950694580317, LR: 0.0003 +[2026-03-05 03:08:35] (step=0062966) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.319702602230484, LR: 0.0003 +[2026-03-05 03:08:43] (step=0062967) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 12.319898258657798, LR: 0.0003 +[2026-03-05 03:08:50] (step=0062968) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 12.32009391508511, LR: 0.0003 +[2026-03-05 03:08:58] (step=0062969) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 12.320289571512424, LR: 0.0003 +[2026-03-05 03:09:06] (step=0062970) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.320485227939738, LR: 0.0003 +[2026-03-05 03:09:14] (step=0062971) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.320680884367052, LR: 0.0003 +[2026-03-05 03:09:22] (step=0062972) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.320876540794366, LR: 0.0003 +[2026-03-05 03:09:30] (step=0062973) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.321072197221678, LR: 0.0003 +[2026-03-05 03:09:38] (step=0062974) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.321267853648992, LR: 0.0003 +[2026-03-05 03:09:45] (step=0062975) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.321463510076306, LR: 0.0003 +[2026-03-05 03:09:53] (step=0062976) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.32165916650362, LR: 0.0003 +[2026-03-05 03:10:01] (step=0062977) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.321854822930934, LR: 0.0003 +[2026-03-05 03:10:09] (step=0062978) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.322050479358246, LR: 0.0003 +[2026-03-05 03:10:17] (step=0062979) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.32224613578556, LR: 0.0003 +[2026-03-05 03:10:25] (step=0062980) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.322441792212874, LR: 0.0003 +[2026-03-05 03:10:33] (step=0062981) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 12.322637448640188, LR: 0.0003 +[2026-03-05 03:10:41] (step=0062982) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 12.3228331050675, LR: 0.0003 +[2026-03-05 03:10:48] (step=0062983) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.323028761494815, LR: 0.0003 +[2026-03-05 03:10:56] (step=0062984) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.323224417922129, LR: 0.0003 +[2026-03-05 03:11:04] (step=0062985) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.323420074349443, LR: 0.0003 +[2026-03-05 03:11:12] (step=0062986) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.323615730776757, LR: 0.0003 +[2026-03-05 03:11:20] (step=0062987) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.323811387204069, LR: 0.0003 +[2026-03-05 03:11:28] (step=0062988) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.324007043631383, LR: 0.0003 +[2026-03-05 03:11:36] (step=0062989) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.324202700058697, LR: 0.0003 +[2026-03-05 03:11:43] (step=0062990) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.324398356486011, LR: 0.0003 +[2026-03-05 03:11:51] (step=0062991) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.324594012913325, LR: 0.0003 +[2026-03-05 03:11:59] (step=0062992) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.324789669340637, LR: 0.0003 +[2026-03-05 03:12:07] (step=0062993) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 12.324985325767951, LR: 0.0003 +[2026-03-05 03:12:15] (step=0062994) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.325180982195265, LR: 0.0003 +[2026-03-05 03:12:23] (step=0062995) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.32537663862258, LR: 0.0003 +[2026-03-05 03:12:31] (step=0062996) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.325572295049893, LR: 0.0003 +[2026-03-05 03:12:39] (step=0062997) Train Loss: 0.4409, Train Steps/Sec: 0.12, Epoch: 12.325767951477205, LR: 0.0003 +[2026-03-05 03:12:47] (step=0062998) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.32596360790452, LR: 0.0003 +[2026-03-05 03:12:54] (step=0062999) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.326159264331833, LR: 0.0003 +[2026-03-05 03:13:02] (step=0063000) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 12.326354920759147, LR: 0.0003 +[2026-03-05 03:13:02] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0063000/ +[2026-03-05 03:13:10] (step=0063001) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.326550577186461, LR: 0.0003 +[2026-03-05 03:13:18] (step=0063002) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 12.326746233613774, LR: 0.0003 +[2026-03-05 03:13:26] (step=0063003) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.326941890041088, LR: 0.0003 +[2026-03-05 03:13:34] (step=0063004) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.327137546468402, LR: 0.0003 +[2026-03-05 03:13:42] (step=0063005) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.327333202895716, LR: 0.0003 +[2026-03-05 03:13:49] (step=0063006) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.327528859323028, LR: 0.0003 +[2026-03-05 03:13:57] (step=0063007) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.327724515750342, LR: 0.0003 +[2026-03-05 03:14:05] (step=0063008) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 12.327920172177656, LR: 0.0003 +[2026-03-05 03:14:13] (step=0063009) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.32811582860497, LR: 0.0003 +[2026-03-05 03:14:21] (step=0063010) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 12.328311485032284, LR: 0.0003 +[2026-03-05 03:14:29] (step=0063011) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.328507141459596, LR: 0.0003 +[2026-03-05 03:14:37] (step=0063012) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.32870279788691, LR: 0.0003 +[2026-03-05 03:14:45] (step=0063013) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.328898454314224, LR: 0.0003 +[2026-03-05 03:14:52] (step=0063014) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.329094110741538, LR: 0.0003 +[2026-03-05 03:15:00] (step=0063015) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.329289767168852, LR: 0.0003 +[2026-03-05 03:15:08] (step=0063016) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.329485423596164, LR: 0.0003 +[2026-03-05 03:15:16] (step=0063017) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 12.329681080023478, LR: 0.0003 +[2026-03-05 03:15:24] (step=0063018) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 12.329876736450792, LR: 0.0003 +[2026-03-05 03:15:32] (step=0063019) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.330072392878106, LR: 0.0003 +[2026-03-05 03:15:40] (step=0063020) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 12.33026804930542, LR: 0.0003 +[2026-03-05 03:15:48] (step=0063021) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 12.330463705732733, LR: 0.0003 +[2026-03-05 03:15:55] (step=0063022) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 12.330659362160047, LR: 0.0003 +[2026-03-05 03:16:03] (step=0063023) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.33085501858736, LR: 0.0003 +[2026-03-05 03:16:11] (step=0063024) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.331050675014675, LR: 0.0003 +[2026-03-05 03:16:19] (step=0063025) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.331246331441989, LR: 0.0003 +[2026-03-05 03:16:27] (step=0063026) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.331441987869301, LR: 0.0003 +[2026-03-05 03:16:35] (step=0063027) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.331637644296615, LR: 0.0003 +[2026-03-05 03:16:43] (step=0063028) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.331833300723929, LR: 0.0003 +[2026-03-05 03:16:50] (step=0063029) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.332028957151243, LR: 0.0003 +[2026-03-05 03:16:58] (step=0063030) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.332224613578557, LR: 0.0003 +[2026-03-05 03:17:06] (step=0063031) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.33242027000587, LR: 0.0003 +[2026-03-05 03:17:14] (step=0063032) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 12.332615926433183, LR: 0.0003 +[2026-03-05 03:17:22] (step=0063033) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.332811582860497, LR: 0.0003 +[2026-03-05 03:17:30] (step=0063034) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.333007239287811, LR: 0.0003 +[2026-03-05 03:17:38] (step=0063035) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.333202895715123, LR: 0.0003 +[2026-03-05 03:17:45] (step=0063036) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.333398552142437, LR: 0.0003 +[2026-03-05 03:17:53] (step=0063037) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.333594208569751, LR: 0.0003 +[2026-03-05 03:18:01] (step=0063038) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 12.333789864997065, LR: 0.0003 +[2026-03-05 03:18:09] (step=0063039) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.33398552142438, LR: 0.0003 +[2026-03-05 03:18:17] (step=0063040) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 12.334181177851692, LR: 0.0003 +[2026-03-05 03:18:25] (step=0063041) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.334376834279006, LR: 0.0003 +[2026-03-05 03:18:33] (step=0063042) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.33457249070632, LR: 0.0003 +[2026-03-05 03:18:40] (step=0063043) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 12.334768147133634, LR: 0.0003 +[2026-03-05 03:18:48] (step=0063044) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 12.334963803560948, LR: 0.0003 +[2026-03-05 03:18:56] (step=0063045) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.33515945998826, LR: 0.0003 +[2026-03-05 03:19:04] (step=0063046) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.335355116415574, LR: 0.0003 +[2026-03-05 03:19:12] (step=0063047) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.335550772842888, LR: 0.0003 +[2026-03-05 03:19:20] (step=0063048) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.335746429270202, LR: 0.0003 +[2026-03-05 03:19:28] (step=0063049) Train Loss: 0.4450, Train Steps/Sec: 0.12, Epoch: 12.335942085697516, LR: 0.0003 +[2026-03-05 03:19:36] (step=0063050) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.336137742124828, LR: 0.0003 +[2026-03-05 03:19:44] (step=0063051) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.336333398552142, LR: 0.0003 +[2026-03-05 03:19:51] (step=0063052) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.336529054979456, LR: 0.0003 +[2026-03-05 03:19:59] (step=0063053) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 12.33672471140677, LR: 0.0003 +[2026-03-05 03:20:07] (step=0063054) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.336920367834084, LR: 0.0003 +[2026-03-05 03:20:15] (step=0063055) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.337116024261396, LR: 0.0003 +[2026-03-05 03:20:23] (step=0063056) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.33731168068871, LR: 0.0003 +[2026-03-05 03:20:31] (step=0063057) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.337507337116024, LR: 0.0003 +[2026-03-05 03:20:39] (step=0063058) Train Loss: 0.4478, Train Steps/Sec: 0.12, Epoch: 12.337702993543338, LR: 0.0003 +[2026-03-05 03:20:47] (step=0063059) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.33789864997065, LR: 0.0003 +[2026-03-05 03:20:54] (step=0063060) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.338094306397965, LR: 0.0003 +[2026-03-05 03:21:02] (step=0063061) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 12.338289962825279, LR: 0.0003 +[2026-03-05 03:21:10] (step=0063062) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.338485619252593, LR: 0.0003 +[2026-03-05 03:21:18] (step=0063063) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.338681275679907, LR: 0.0003 +[2026-03-05 03:21:26] (step=0063064) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 12.338876932107219, LR: 0.0003 +[2026-03-05 03:21:34] (step=0063065) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.339072588534533, LR: 0.0003 +[2026-03-05 03:21:42] (step=0063066) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.339268244961847, LR: 0.0003 +[2026-03-05 03:21:50] (step=0063067) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.339463901389161, LR: 0.0003 +[2026-03-05 03:21:57] (step=0063068) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.339659557816475, LR: 0.0003 +[2026-03-05 03:22:05] (step=0063069) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.339855214243787, LR: 0.0003 +[2026-03-05 03:22:13] (step=0063070) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.340050870671101, LR: 0.0003 +[2026-03-05 03:22:21] (step=0063071) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.340246527098415, LR: 0.0003 +[2026-03-05 03:22:29] (step=0063072) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.34044218352573, LR: 0.0003 +[2026-03-05 03:22:37] (step=0063073) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.340637839953043, LR: 0.0003 +[2026-03-05 03:22:45] (step=0063074) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 12.340833496380355, LR: 0.0003 +[2026-03-05 03:22:52] (step=0063075) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.34102915280767, LR: 0.0003 +[2026-03-05 03:23:00] (step=0063076) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.341224809234983, LR: 0.0003 +[2026-03-05 03:23:08] (step=0063077) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.341420465662297, LR: 0.0003 +[2026-03-05 03:23:16] (step=0063078) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.341616122089611, LR: 0.0003 +[2026-03-05 03:23:24] (step=0063079) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 12.341811778516924, LR: 0.0003 +[2026-03-05 03:23:32] (step=0063080) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.342007434944238, LR: 0.0003 +[2026-03-05 03:23:40] (step=0063081) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 12.342203091371552, LR: 0.0003 +[2026-03-05 03:23:48] (step=0063082) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.342398747798866, LR: 0.0003 +[2026-03-05 03:23:55] (step=0063083) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 12.34259440422618, LR: 0.0003 +[2026-03-05 03:24:03] (step=0063084) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.342790060653492, LR: 0.0003 +[2026-03-05 03:24:11] (step=0063085) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.342985717080806, LR: 0.0003 +[2026-03-05 03:24:19] (step=0063086) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.34318137350812, LR: 0.0003 +[2026-03-05 03:24:27] (step=0063087) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.343377029935434, LR: 0.0003 +[2026-03-05 03:24:35] (step=0063088) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.343572686362746, LR: 0.0003 +[2026-03-05 03:24:43] (step=0063089) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.34376834279006, LR: 0.0003 +[2026-03-05 03:24:50] (step=0063090) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.343963999217374, LR: 0.0003 +[2026-03-05 03:24:58] (step=0063091) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.344159655644688, LR: 0.0003 +[2026-03-05 03:25:06] (step=0063092) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.344355312072002, LR: 0.0003 +[2026-03-05 03:25:14] (step=0063093) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.344550968499314, LR: 0.0003 +[2026-03-05 03:25:22] (step=0063094) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.344746624926628, LR: 0.0003 +[2026-03-05 03:25:30] (step=0063095) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.344942281353942, LR: 0.0003 +[2026-03-05 03:25:38] (step=0063096) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.345137937781256, LR: 0.0003 +[2026-03-05 03:25:46] (step=0063097) Train Loss: 0.4456, Train Steps/Sec: 0.12, Epoch: 12.34533359420857, LR: 0.0003 +[2026-03-05 03:25:53] (step=0063098) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.345529250635883, LR: 0.0003 +[2026-03-05 03:26:01] (step=0063099) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.345724907063197, LR: 0.0003 +[2026-03-05 03:26:09] (step=0063100) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.34592056349051, LR: 0.0003 +[2026-03-05 03:26:17] (step=0063101) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.346116219917825, LR: 0.0003 +[2026-03-05 03:26:25] (step=0063102) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.346311876345139, LR: 0.0003 +[2026-03-05 03:26:33] (step=0063103) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 12.346507532772451, LR: 0.0003 +[2026-03-05 03:26:41] (step=0063104) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.346703189199765, LR: 0.0003 +[2026-03-05 03:26:49] (step=0063105) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.346898845627079, LR: 0.0003 +[2026-03-05 03:26:56] (step=0063106) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.347094502054393, LR: 0.0003 +[2026-03-05 03:27:04] (step=0063107) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 12.347290158481707, LR: 0.0003 +[2026-03-05 03:27:12] (step=0063108) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.34748581490902, LR: 0.0003 +[2026-03-05 03:27:20] (step=0063109) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.347681471336333, LR: 0.0003 +[2026-03-05 03:27:28] (step=0063110) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.347877127763647, LR: 0.0003 +[2026-03-05 03:27:36] (step=0063111) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.348072784190961, LR: 0.0003 +[2026-03-05 03:27:44] (step=0063112) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.348268440618273, LR: 0.0003 +[2026-03-05 03:27:52] (step=0063113) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.348464097045587, LR: 0.0003 +[2026-03-05 03:27:59] (step=0063114) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 12.348659753472901, LR: 0.0003 +[2026-03-05 03:28:07] (step=0063115) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.348855409900215, LR: 0.0003 +[2026-03-05 03:28:15] (step=0063116) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.34905106632753, LR: 0.0003 +[2026-03-05 03:28:23] (step=0063117) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.349246722754842, LR: 0.0003 +[2026-03-05 03:28:31] (step=0063118) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.349442379182156, LR: 0.0003 +[2026-03-05 03:28:39] (step=0063119) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.34963803560947, LR: 0.0003 +[2026-03-05 03:28:47] (step=0063120) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.349833692036784, LR: 0.0003 +[2026-03-05 03:28:55] (step=0063121) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.350029348464098, LR: 0.0003 +[2026-03-05 03:29:02] (step=0063122) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.35022500489141, LR: 0.0003 +[2026-03-05 03:29:10] (step=0063123) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.350420661318724, LR: 0.0003 +[2026-03-05 03:29:18] (step=0063124) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.350616317746038, LR: 0.0003 +[2026-03-05 03:29:26] (step=0063125) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.350811974173352, LR: 0.0003 +[2026-03-05 03:29:34] (step=0063126) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.351007630600666, LR: 0.0003 +[2026-03-05 03:29:42] (step=0063127) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.351203287027978, LR: 0.0003 +[2026-03-05 03:29:50] (step=0063128) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 12.351398943455292, LR: 0.0003 +[2026-03-05 03:29:57] (step=0063129) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.351594599882606, LR: 0.0003 +[2026-03-05 03:30:05] (step=0063130) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.35179025630992, LR: 0.0003 +[2026-03-05 03:30:13] (step=0063131) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 12.351985912737234, LR: 0.0003 +[2026-03-05 03:30:21] (step=0063132) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.352181569164546, LR: 0.0003 +[2026-03-05 03:30:29] (step=0063133) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.35237722559186, LR: 0.0003 +[2026-03-05 03:30:37] (step=0063134) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.352572882019174, LR: 0.0003 +[2026-03-05 03:30:45] (step=0063135) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.352768538446488, LR: 0.0003 +[2026-03-05 03:30:52] (step=0063136) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.352964194873802, LR: 0.0003 +[2026-03-05 03:31:00] (step=0063137) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.353159851301115, LR: 0.0003 +[2026-03-05 03:31:08] (step=0063138) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.353355507728429, LR: 0.0003 +[2026-03-05 03:31:16] (step=0063139) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.353551164155743, LR: 0.0003 +[2026-03-05 03:31:24] (step=0063140) Train Loss: 0.4149, Train Steps/Sec: 0.13, Epoch: 12.353746820583057, LR: 0.0003 +[2026-03-05 03:31:32] (step=0063141) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.353942477010369, LR: 0.0003 +[2026-03-05 03:31:40] (step=0063142) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.354138133437683, LR: 0.0003 +[2026-03-05 03:31:47] (step=0063143) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.354333789864997, LR: 0.0003 +[2026-03-05 03:31:56] (step=0063144) Train Loss: 0.4433, Train Steps/Sec: 0.12, Epoch: 12.354529446292311, LR: 0.0003 +[2026-03-05 03:32:03] (step=0063145) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.354725102719625, LR: 0.0003 +[2026-03-05 03:32:11] (step=0063146) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.354920759146937, LR: 0.0003 +[2026-03-05 03:32:19] (step=0063147) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 12.355116415574251, LR: 0.0003 +[2026-03-05 03:32:27] (step=0063148) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.355312072001565, LR: 0.0003 +[2026-03-05 03:32:35] (step=0063149) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 12.35550772842888, LR: 0.0003 +[2026-03-05 03:32:43] (step=0063150) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.355703384856193, LR: 0.0003 +[2026-03-05 03:32:51] (step=0063151) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.355899041283505, LR: 0.0003 +[2026-03-05 03:32:58] (step=0063152) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.35609469771082, LR: 0.0003 +[2026-03-05 03:33:06] (step=0063153) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.356290354138133, LR: 0.0003 +[2026-03-05 03:33:14] (step=0063154) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.356486010565447, LR: 0.0003 +[2026-03-05 03:33:22] (step=0063155) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.356681666992761, LR: 0.0003 +[2026-03-05 03:33:30] (step=0063156) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 12.356877323420074, LR: 0.0003 +[2026-03-05 03:33:38] (step=0063157) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.357072979847388, LR: 0.0003 +[2026-03-05 03:33:46] (step=0063158) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.357268636274702, LR: 0.0003 +[2026-03-05 03:33:54] (step=0063159) Train Loss: 0.4348, Train Steps/Sec: 0.12, Epoch: 12.357464292702016, LR: 0.0003 +[2026-03-05 03:34:01] (step=0063160) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.35765994912933, LR: 0.0003 +[2026-03-05 03:34:09] (step=0063161) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 12.357855605556642, LR: 0.0003 +[2026-03-05 03:34:17] (step=0063162) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.358051261983956, LR: 0.0003 +[2026-03-05 03:34:25] (step=0063163) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.35824691841127, LR: 0.0003 +[2026-03-05 03:34:33] (step=0063164) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.358442574838584, LR: 0.0003 +[2026-03-05 03:34:41] (step=0063165) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 12.358638231265896, LR: 0.0003 +[2026-03-05 03:34:49] (step=0063166) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.35883388769321, LR: 0.0003 +[2026-03-05 03:34:57] (step=0063167) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 12.359029544120524, LR: 0.0003 +[2026-03-05 03:35:04] (step=0063168) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 12.359225200547838, LR: 0.0003 +[2026-03-05 03:35:12] (step=0063169) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 12.359420856975152, LR: 0.0003 +[2026-03-05 03:35:20] (step=0063170) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 12.359616513402464, LR: 0.0003 +[2026-03-05 03:35:28] (step=0063171) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.359812169829778, LR: 0.0003 +[2026-03-05 03:35:36] (step=0063172) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.360007826257092, LR: 0.0003 +[2026-03-05 03:35:44] (step=0063173) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.360203482684406, LR: 0.0003 +[2026-03-05 03:35:51] (step=0063174) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.36039913911172, LR: 0.0003 +[2026-03-05 03:35:59] (step=0063175) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.360594795539033, LR: 0.0003 +[2026-03-05 03:36:07] (step=0063176) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.360790451966347, LR: 0.0003 +[2026-03-05 03:36:15] (step=0063177) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.36098610839366, LR: 0.0003 +[2026-03-05 03:36:23] (step=0063178) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.361181764820975, LR: 0.0003 +[2026-03-05 03:36:31] (step=0063179) Train Loss: 0.4239, Train Steps/Sec: 0.13, Epoch: 12.361377421248289, LR: 0.0003 +[2026-03-05 03:36:39] (step=0063180) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.361573077675601, LR: 0.0003 +[2026-03-05 03:36:47] (step=0063181) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.361768734102915, LR: 0.0003 +[2026-03-05 03:36:54] (step=0063182) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.361964390530229, LR: 0.0003 +[2026-03-05 03:37:02] (step=0063183) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.362160046957543, LR: 0.0003 +[2026-03-05 03:37:10] (step=0063184) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 12.362355703384857, LR: 0.0003 +[2026-03-05 03:37:18] (step=0063185) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 12.36255135981217, LR: 0.0003 +[2026-03-05 03:37:26] (step=0063186) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.362747016239483, LR: 0.0003 +[2026-03-05 03:37:34] (step=0063187) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.362942672666797, LR: 0.0003 +[2026-03-05 03:37:42] (step=0063188) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.363138329094111, LR: 0.0003 +[2026-03-05 03:37:49] (step=0063189) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.363333985521425, LR: 0.0003 +[2026-03-05 03:37:57] (step=0063190) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.363529641948737, LR: 0.0003 +[2026-03-05 03:38:05] (step=0063191) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.363725298376051, LR: 0.0003 +[2026-03-05 03:38:13] (step=0063192) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.363920954803366, LR: 0.0003 +[2026-03-05 03:38:21] (step=0063193) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.36411661123068, LR: 0.0003 +[2026-03-05 03:38:29] (step=0063194) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.364312267657992, LR: 0.0003 +[2026-03-05 03:38:37] (step=0063195) Train Loss: 0.4434, Train Steps/Sec: 0.12, Epoch: 12.364507924085306, LR: 0.0003 +[2026-03-05 03:38:45] (step=0063196) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.36470358051262, LR: 0.0003 +[2026-03-05 03:38:52] (step=0063197) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.364899236939934, LR: 0.0003 +[2026-03-05 03:39:00] (step=0063198) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 12.365094893367248, LR: 0.0003 +[2026-03-05 03:39:08] (step=0063199) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.36529054979456, LR: 0.0003 +[2026-03-05 03:39:16] (step=0063200) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 12.365486206221874, LR: 0.0003 +[2026-03-05 03:39:24] (step=0063201) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.365681862649188, LR: 0.0003 +[2026-03-05 03:39:32] (step=0063202) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.365877519076502, LR: 0.0003 +[2026-03-05 03:39:40] (step=0063203) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 12.366073175503816, LR: 0.0003 +[2026-03-05 03:39:47] (step=0063204) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.366268831931128, LR: 0.0003 +[2026-03-05 03:39:55] (step=0063205) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 12.366464488358442, LR: 0.0003 +[2026-03-05 03:40:03] (step=0063206) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.366660144785756, LR: 0.0003 +[2026-03-05 03:40:11] (step=0063207) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.36685580121307, LR: 0.0003 +[2026-03-05 03:40:19] (step=0063208) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.367051457640384, LR: 0.0003 +[2026-03-05 03:40:27] (step=0063209) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.367247114067697, LR: 0.0003 +[2026-03-05 03:40:35] (step=0063210) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.36744277049501, LR: 0.0003 +[2026-03-05 03:40:43] (step=0063211) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.367638426922325, LR: 0.0003 +[2026-03-05 03:40:50] (step=0063212) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.367834083349639, LR: 0.0003 +[2026-03-05 03:40:58] (step=0063213) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 12.368029739776953, LR: 0.0003 +[2026-03-05 03:41:06] (step=0063214) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.368225396204265, LR: 0.0003 +[2026-03-05 03:41:14] (step=0063215) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 12.368421052631579, LR: 0.0003 +[2026-03-05 03:41:22] (step=0063216) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.368616709058893, LR: 0.0003 +[2026-03-05 03:41:30] (step=0063217) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 12.368812365486207, LR: 0.0003 +[2026-03-05 03:41:38] (step=0063218) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.369008021913519, LR: 0.0003 +[2026-03-05 03:41:45] (step=0063219) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.369203678340833, LR: 0.0003 +[2026-03-05 03:41:53] (step=0063220) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.369399334768147, LR: 0.0003 +[2026-03-05 03:42:01] (step=0063221) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 12.369594991195461, LR: 0.0003 +[2026-03-05 03:42:09] (step=0063222) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 12.369790647622775, LR: 0.0003 +[2026-03-05 03:42:17] (step=0063223) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 12.369986304050087, LR: 0.0003 +[2026-03-05 03:42:25] (step=0063224) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.370181960477401, LR: 0.0003 +[2026-03-05 03:42:33] (step=0063225) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.370377616904715, LR: 0.0003 +[2026-03-05 03:42:40] (step=0063226) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 12.37057327333203, LR: 0.0003 +[2026-03-05 03:42:48] (step=0063227) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 12.370768929759343, LR: 0.0003 +[2026-03-05 03:42:56] (step=0063228) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.370964586186656, LR: 0.0003 +[2026-03-05 03:43:04] (step=0063229) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.37116024261397, LR: 0.0003 +[2026-03-05 03:43:12] (step=0063230) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.371355899041284, LR: 0.0003 +[2026-03-05 03:43:20] (step=0063231) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.371551555468598, LR: 0.0003 +[2026-03-05 03:43:28] (step=0063232) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.371747211895912, LR: 0.0003 +[2026-03-05 03:43:36] (step=0063233) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.371942868323224, LR: 0.0003 +[2026-03-05 03:43:43] (step=0063234) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.372138524750538, LR: 0.0003 +[2026-03-05 03:43:51] (step=0063235) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.372334181177852, LR: 0.0003 +[2026-03-05 03:43:59] (step=0063236) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.372529837605166, LR: 0.0003 +[2026-03-05 03:44:07] (step=0063237) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.37272549403248, LR: 0.0003 +[2026-03-05 03:44:15] (step=0063238) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 12.372921150459792, LR: 0.0003 +[2026-03-05 03:44:23] (step=0063239) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 12.373116806887106, LR: 0.0003 +[2026-03-05 03:44:31] (step=0063240) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 12.37331246331442, LR: 0.0003 +[2026-03-05 03:44:38] (step=0063241) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.373508119741734, LR: 0.0003 +[2026-03-05 03:44:46] (step=0063242) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.373703776169048, LR: 0.0003 +[2026-03-05 03:44:54] (step=0063243) Train Loss: 0.4368, Train Steps/Sec: 0.12, Epoch: 12.37389943259636, LR: 0.0003 +[2026-03-05 03:45:02] (step=0063244) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 12.374095089023674, LR: 0.0003 +[2026-03-05 03:45:10] (step=0063245) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 12.374290745450988, LR: 0.0003 +[2026-03-05 03:45:18] (step=0063246) Train Loss: 0.4669, Train Steps/Sec: 0.13, Epoch: 12.374486401878302, LR: 0.0003 +[2026-03-05 03:45:26] (step=0063247) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.374682058305615, LR: 0.0003 +[2026-03-05 03:45:34] (step=0063248) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.374877714732929, LR: 0.0003 +[2026-03-05 03:45:42] (step=0063249) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 12.375073371160243, LR: 0.0003 +[2026-03-05 03:45:49] (step=0063250) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.375269027587557, LR: 0.0003 +[2026-03-05 03:45:57] (step=0063251) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.37546468401487, LR: 0.0003 +[2026-03-05 03:46:05] (step=0063252) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.375660340442183, LR: 0.0003 +[2026-03-05 03:46:13] (step=0063253) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.375855996869497, LR: 0.0003 +[2026-03-05 03:46:21] (step=0063254) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.37605165329681, LR: 0.0003 +[2026-03-05 03:46:29] (step=0063255) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.376247309724125, LR: 0.0003 +[2026-03-05 03:46:37] (step=0063256) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.376442966151439, LR: 0.0003 +[2026-03-05 03:46:45] (step=0063257) Train Loss: 0.4456, Train Steps/Sec: 0.12, Epoch: 12.376638622578751, LR: 0.0003 +[2026-03-05 03:46:53] (step=0063258) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.376834279006065, LR: 0.0003 +[2026-03-05 03:47:00] (step=0063259) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.377029935433379, LR: 0.0003 +[2026-03-05 03:47:08] (step=0063260) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.377225591860693, LR: 0.0003 +[2026-03-05 03:47:16] (step=0063261) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.377421248288007, LR: 0.0003 +[2026-03-05 03:47:24] (step=0063262) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 12.37761690471532, LR: 0.0003 +[2026-03-05 03:47:32] (step=0063263) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.377812561142633, LR: 0.0003 +[2026-03-05 03:47:40] (step=0063264) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.378008217569947, LR: 0.0003 +[2026-03-05 03:47:48] (step=0063265) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 12.378203873997261, LR: 0.0003 +[2026-03-05 03:47:55] (step=0063266) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.378399530424575, LR: 0.0003 +[2026-03-05 03:48:03] (step=0063267) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.378595186851888, LR: 0.0003 +[2026-03-05 03:48:11] (step=0063268) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.378790843279202, LR: 0.0003 +[2026-03-05 03:48:19] (step=0063269) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.378986499706516, LR: 0.0003 +[2026-03-05 03:48:27] (step=0063270) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.37918215613383, LR: 0.0003 +[2026-03-05 03:48:35] (step=0063271) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.379377812561142, LR: 0.0003 +[2026-03-05 03:48:43] (step=0063272) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.379573468988456, LR: 0.0003 +[2026-03-05 03:48:50] (step=0063273) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.37976912541577, LR: 0.0003 +[2026-03-05 03:48:58] (step=0063274) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.379964781843084, LR: 0.0003 +[2026-03-05 03:49:06] (step=0063275) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.380160438270398, LR: 0.0003 +[2026-03-05 03:49:14] (step=0063276) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 12.38035609469771, LR: 0.0003 +[2026-03-05 03:49:22] (step=0063277) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.380551751125024, LR: 0.0003 +[2026-03-05 03:49:30] (step=0063278) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.380747407552338, LR: 0.0003 +[2026-03-05 03:49:38] (step=0063279) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.380943063979652, LR: 0.0003 +[2026-03-05 03:49:46] (step=0063280) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 12.381138720406966, LR: 0.0003 +[2026-03-05 03:49:53] (step=0063281) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 12.381334376834278, LR: 0.0003 +[2026-03-05 03:50:01] (step=0063282) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.381530033261592, LR: 0.0003 +[2026-03-05 03:50:09] (step=0063283) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 12.381725689688906, LR: 0.0003 +[2026-03-05 03:50:17] (step=0063284) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.38192134611622, LR: 0.0003 +[2026-03-05 03:50:25] (step=0063285) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.382117002543534, LR: 0.0003 +[2026-03-05 03:50:33] (step=0063286) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.382312658970847, LR: 0.0003 +[2026-03-05 03:50:41] (step=0063287) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.38250831539816, LR: 0.0003 +[2026-03-05 03:50:49] (step=0063288) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 12.382703971825475, LR: 0.0003 +[2026-03-05 03:50:57] (step=0063289) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.382899628252789, LR: 0.0003 +[2026-03-05 03:51:05] (step=0063290) Train Loss: 0.4420, Train Steps/Sec: 0.12, Epoch: 12.383095284680103, LR: 0.0003 +[2026-03-05 03:51:13] (step=0063291) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.383290941107415, LR: 0.0003 +[2026-03-05 03:51:20] (step=0063292) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 12.383486597534729, LR: 0.0003 +[2026-03-05 03:51:28] (step=0063293) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.383682253962043, LR: 0.0003 +[2026-03-05 03:51:36] (step=0063294) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.383877910389357, LR: 0.0003 +[2026-03-05 03:51:44] (step=0063295) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.384073566816669, LR: 0.0003 +[2026-03-05 03:51:52] (step=0063296) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.384269223243983, LR: 0.0003 +[2026-03-05 03:52:00] (step=0063297) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.384464879671297, LR: 0.0003 +[2026-03-05 03:52:08] (step=0063298) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.384660536098611, LR: 0.0003 +[2026-03-05 03:52:16] (step=0063299) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 12.384856192525925, LR: 0.0003 +[2026-03-05 03:52:23] (step=0063300) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.385051848953237, LR: 0.0003 +[2026-03-05 03:52:31] (step=0063301) Train Loss: 0.4361, Train Steps/Sec: 0.12, Epoch: 12.385247505380551, LR: 0.0003 +[2026-03-05 03:52:39] (step=0063302) Train Loss: 0.4250, Train Steps/Sec: 0.13, Epoch: 12.385443161807865, LR: 0.0003 +[2026-03-05 03:52:47] (step=0063303) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.38563881823518, LR: 0.0003 +[2026-03-05 03:52:55] (step=0063304) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.385834474662493, LR: 0.0003 +[2026-03-05 03:53:03] (step=0063305) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.386030131089806, LR: 0.0003 +[2026-03-05 03:53:11] (step=0063306) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 12.38622578751712, LR: 0.0003 +[2026-03-05 03:53:19] (step=0063307) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 12.386421443944434, LR: 0.0003 +[2026-03-05 03:53:27] (step=0063308) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.386617100371748, LR: 0.0003 +[2026-03-05 03:53:34] (step=0063309) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 12.386812756799062, LR: 0.0003 +[2026-03-05 03:53:42] (step=0063310) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 12.387008413226374, LR: 0.0003 +[2026-03-05 03:53:50] (step=0063311) Train Loss: 0.4217, Train Steps/Sec: 0.13, Epoch: 12.387204069653688, LR: 0.0003 +[2026-03-05 03:53:58] (step=0063312) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.387399726081002, LR: 0.0003 +[2026-03-05 03:54:06] (step=0063313) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.387595382508316, LR: 0.0003 +[2026-03-05 03:54:14] (step=0063314) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 12.38779103893563, LR: 0.0003 +[2026-03-05 03:54:22] (step=0063315) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.387986695362942, LR: 0.0003 +[2026-03-05 03:54:30] (step=0063316) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 12.388182351790256, LR: 0.0003 +[2026-03-05 03:54:38] (step=0063317) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.38837800821757, LR: 0.0003 +[2026-03-05 03:54:45] (step=0063318) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.388573664644884, LR: 0.0003 +[2026-03-05 03:54:53] (step=0063319) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.388769321072198, LR: 0.0003 +[2026-03-05 03:55:01] (step=0063320) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.38896497749951, LR: 0.0003 +[2026-03-05 03:55:09] (step=0063321) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.389160633926824, LR: 0.0003 +[2026-03-05 03:55:17] (step=0063322) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.389356290354138, LR: 0.0003 +[2026-03-05 03:55:25] (step=0063323) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.389551946781452, LR: 0.0003 +[2026-03-05 03:55:33] (step=0063324) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 12.389747603208765, LR: 0.0003 +[2026-03-05 03:55:41] (step=0063325) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 12.389943259636079, LR: 0.0003 +[2026-03-05 03:55:48] (step=0063326) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.390138916063393, LR: 0.0003 +[2026-03-05 03:55:56] (step=0063327) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.390334572490707, LR: 0.0003 +[2026-03-05 03:56:04] (step=0063328) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 12.39053022891802, LR: 0.0003 +[2026-03-05 03:56:12] (step=0063329) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.390725885345333, LR: 0.0003 +[2026-03-05 03:56:20] (step=0063330) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 12.390921541772647, LR: 0.0003 +[2026-03-05 03:56:28] (step=0063331) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.39111719819996, LR: 0.0003 +[2026-03-05 03:56:36] (step=0063332) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.391312854627275, LR: 0.0003 +[2026-03-05 03:56:44] (step=0063333) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.391508511054589, LR: 0.0003 +[2026-03-05 03:56:51] (step=0063334) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.391704167481901, LR: 0.0003 +[2026-03-05 03:56:59] (step=0063335) Train Loss: 0.4246, Train Steps/Sec: 0.13, Epoch: 12.391899823909215, LR: 0.0003 +[2026-03-05 03:57:07] (step=0063336) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.392095480336529, LR: 0.0003 +[2026-03-05 03:57:15] (step=0063337) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 12.392291136763843, LR: 0.0003 +[2026-03-05 03:57:23] (step=0063338) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 12.392486793191157, LR: 0.0003 +[2026-03-05 03:57:31] (step=0063339) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 12.39268244961847, LR: 0.0003 +[2026-03-05 03:57:39] (step=0063340) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.392878106045783, LR: 0.0003 +[2026-03-05 03:57:46] (step=0063341) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 12.393073762473097, LR: 0.0003 +[2026-03-05 03:57:54] (step=0063342) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.393269418900411, LR: 0.0003 +[2026-03-05 03:58:02] (step=0063343) Train Loss: 0.4435, Train Steps/Sec: 0.12, Epoch: 12.393465075327725, LR: 0.0003 +[2026-03-05 03:58:10] (step=0063344) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.393660731755038, LR: 0.0003 +[2026-03-05 03:58:18] (step=0063345) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.393856388182352, LR: 0.0003 +[2026-03-05 03:58:26] (step=0063346) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.394052044609666, LR: 0.0003 +[2026-03-05 03:58:34] (step=0063347) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.39424770103698, LR: 0.0003 +[2026-03-05 03:58:42] (step=0063348) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.394443357464292, LR: 0.0003 +[2026-03-05 03:58:50] (step=0063349) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.394639013891606, LR: 0.0003 +[2026-03-05 03:58:58] (step=0063350) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.39483467031892, LR: 0.0003 +[2026-03-05 03:59:05] (step=0063351) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.395030326746234, LR: 0.0003 +[2026-03-05 03:59:13] (step=0063352) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.395225983173548, LR: 0.0003 +[2026-03-05 03:59:21] (step=0063353) Train Loss: 0.4370, Train Steps/Sec: 0.12, Epoch: 12.39542163960086, LR: 0.0003 +[2026-03-05 03:59:29] (step=0063354) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.395617296028174, LR: 0.0003 +[2026-03-05 03:59:37] (step=0063355) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 12.395812952455488, LR: 0.0003 +[2026-03-05 03:59:45] (step=0063356) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.396008608882802, LR: 0.0003 +[2026-03-05 03:59:53] (step=0063357) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 12.396204265310116, LR: 0.0003 +[2026-03-05 04:00:01] (step=0063358) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.396399921737428, LR: 0.0003 +[2026-03-05 04:00:09] (step=0063359) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.396595578164742, LR: 0.0003 +[2026-03-05 04:00:16] (step=0063360) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.396791234592056, LR: 0.0003 +[2026-03-05 04:00:24] (step=0063361) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.39698689101937, LR: 0.0003 +[2026-03-05 04:00:32] (step=0063362) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.397182547446684, LR: 0.0003 +[2026-03-05 04:00:40] (step=0063363) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.397378203873997, LR: 0.0003 +[2026-03-05 04:00:48] (step=0063364) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.39757386030131, LR: 0.0003 +[2026-03-05 04:00:56] (step=0063365) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.397769516728625, LR: 0.0003 +[2026-03-05 04:01:04] (step=0063366) Train Loss: 0.4243, Train Steps/Sec: 0.13, Epoch: 12.397965173155939, LR: 0.0003 +[2026-03-05 04:01:12] (step=0063367) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 12.398160829583253, LR: 0.0003 +[2026-03-05 04:01:19] (step=0063368) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.398356486010565, LR: 0.0003 +[2026-03-05 04:01:27] (step=0063369) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 12.398552142437879, LR: 0.0003 +[2026-03-05 04:01:35] (step=0063370) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.398747798865193, LR: 0.0003 +[2026-03-05 04:01:43] (step=0063371) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.398943455292507, LR: 0.0003 +[2026-03-05 04:01:51] (step=0063372) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.39913911171982, LR: 0.0003 +[2026-03-05 04:01:59] (step=0063373) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.399334768147133, LR: 0.0003 +[2026-03-05 04:02:07] (step=0063374) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.399530424574447, LR: 0.0003 +[2026-03-05 04:02:14] (step=0063375) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.399726081001761, LR: 0.0003 +[2026-03-05 04:02:22] (step=0063376) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.399921737429075, LR: 0.0003 +[2026-03-05 04:02:30] (step=0063377) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.400117393856387, LR: 0.0003 +[2026-03-05 04:02:38] (step=0063378) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.400313050283701, LR: 0.0003 +[2026-03-05 04:02:46] (step=0063379) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.400508706711015, LR: 0.0003 +[2026-03-05 04:02:54] (step=0063380) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 12.40070436313833, LR: 0.0003 +[2026-03-05 04:03:02] (step=0063381) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.400900019565643, LR: 0.0003 +[2026-03-05 04:03:10] (step=0063382) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.401095675992956, LR: 0.0003 +[2026-03-05 04:03:17] (step=0063383) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.40129133242027, LR: 0.0003 +[2026-03-05 04:03:25] (step=0063384) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.401486988847584, LR: 0.0003 +[2026-03-05 04:03:33] (step=0063385) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.401682645274898, LR: 0.0003 +[2026-03-05 04:03:41] (step=0063386) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 12.401878301702212, LR: 0.0003 +[2026-03-05 04:03:49] (step=0063387) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.402073958129524, LR: 0.0003 +[2026-03-05 04:03:57] (step=0063388) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.402269614556838, LR: 0.0003 +[2026-03-05 04:04:05] (step=0063389) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.402465270984152, LR: 0.0003 +[2026-03-05 04:04:12] (step=0063390) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.402660927411466, LR: 0.0003 +[2026-03-05 04:04:20] (step=0063391) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.40285658383878, LR: 0.0003 +[2026-03-05 04:04:28] (step=0063392) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 12.403052240266092, LR: 0.0003 +[2026-03-05 04:04:36] (step=0063393) Train Loss: 0.4463, Train Steps/Sec: 0.12, Epoch: 12.403247896693406, LR: 0.0003 +[2026-03-05 04:04:44] (step=0063394) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.40344355312072, LR: 0.0003 +[2026-03-05 04:04:52] (step=0063395) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.403639209548034, LR: 0.0003 +[2026-03-05 04:05:00] (step=0063396) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.403834865975348, LR: 0.0003 +[2026-03-05 04:05:08] (step=0063397) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.40403052240266, LR: 0.0003 +[2026-03-05 04:05:15] (step=0063398) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 12.404226178829974, LR: 0.0003 +[2026-03-05 04:05:23] (step=0063399) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.404421835257288, LR: 0.0003 +[2026-03-05 04:05:31] (step=0063400) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 12.404617491684602, LR: 0.0003 +[2026-03-05 04:05:39] (step=0063401) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 12.404813148111915, LR: 0.0003 +[2026-03-05 04:05:47] (step=0063402) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 12.405008804539229, LR: 0.0003 +[2026-03-05 04:05:55] (step=0063403) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.405204460966543, LR: 0.0003 +[2026-03-05 04:06:03] (step=0063404) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.405400117393857, LR: 0.0003 +[2026-03-05 04:06:11] (step=0063405) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.40559577382117, LR: 0.0003 +[2026-03-05 04:06:18] (step=0063406) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 12.405791430248483, LR: 0.0003 +[2026-03-05 04:06:26] (step=0063407) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.405987086675797, LR: 0.0003 +[2026-03-05 04:06:34] (step=0063408) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 12.40618274310311, LR: 0.0003 +[2026-03-05 04:06:42] (step=0063409) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 12.406378399530425, LR: 0.0003 +[2026-03-05 04:06:50] (step=0063410) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.406574055957739, LR: 0.0003 +[2026-03-05 04:06:58] (step=0063411) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.406769712385051, LR: 0.0003 +[2026-03-05 04:07:06] (step=0063412) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 12.406965368812365, LR: 0.0003 +[2026-03-05 04:07:13] (step=0063413) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 12.40716102523968, LR: 0.0003 +[2026-03-05 04:07:21] (step=0063414) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.407356681666993, LR: 0.0003 +[2026-03-05 04:07:29] (step=0063415) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.407552338094307, LR: 0.0003 +[2026-03-05 04:07:37] (step=0063416) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.40774799452162, LR: 0.0003 +[2026-03-05 04:07:45] (step=0063417) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.407943650948933, LR: 0.0003 +[2026-03-05 04:07:53] (step=0063418) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.408139307376247, LR: 0.0003 +[2026-03-05 04:08:01] (step=0063419) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.408334963803561, LR: 0.0003 +[2026-03-05 04:08:08] (step=0063420) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.408530620230875, LR: 0.0003 +[2026-03-05 04:08:16] (step=0063421) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.408726276658188, LR: 0.0003 +[2026-03-05 04:08:24] (step=0063422) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.408921933085502, LR: 0.0003 +[2026-03-05 04:08:32] (step=0063423) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 12.409117589512816, LR: 0.0003 +[2026-03-05 04:08:40] (step=0063424) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.40931324594013, LR: 0.0003 +[2026-03-05 04:08:48] (step=0063425) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.409508902367444, LR: 0.0003 +[2026-03-05 04:08:56] (step=0063426) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.409704558794756, LR: 0.0003 +[2026-03-05 04:09:04] (step=0063427) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.40990021522207, LR: 0.0003 +[2026-03-05 04:09:11] (step=0063428) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.410095871649384, LR: 0.0003 +[2026-03-05 04:09:19] (step=0063429) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.410291528076698, LR: 0.0003 +[2026-03-05 04:09:27] (step=0063430) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.41048718450401, LR: 0.0003 +[2026-03-05 04:09:35] (step=0063431) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.410682840931324, LR: 0.0003 +[2026-03-05 04:09:43] (step=0063432) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.410878497358638, LR: 0.0003 +[2026-03-05 04:09:51] (step=0063433) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.411074153785952, LR: 0.0003 +[2026-03-05 04:09:59] (step=0063434) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 12.411269810213266, LR: 0.0003 +[2026-03-05 04:10:06] (step=0063435) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.411465466640578, LR: 0.0003 +[2026-03-05 04:10:14] (step=0063436) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.411661123067892, LR: 0.0003 +[2026-03-05 04:10:22] (step=0063437) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.411856779495206, LR: 0.0003 +[2026-03-05 04:10:30] (step=0063438) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.41205243592252, LR: 0.0003 +[2026-03-05 04:10:38] (step=0063439) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.412248092349834, LR: 0.0003 +[2026-03-05 04:10:46] (step=0063440) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.412443748777147, LR: 0.0003 +[2026-03-05 04:10:54] (step=0063441) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.41263940520446, LR: 0.0003 +[2026-03-05 04:11:02] (step=0063442) Train Loss: 0.4347, Train Steps/Sec: 0.12, Epoch: 12.412835061631775, LR: 0.0003 +[2026-03-05 04:11:10] (step=0063443) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.413030718059089, LR: 0.0003 +[2026-03-05 04:11:18] (step=0063444) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.413226374486403, LR: 0.0003 +[2026-03-05 04:11:25] (step=0063445) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.413422030913715, LR: 0.0003 +[2026-03-05 04:11:33] (step=0063446) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.413617687341029, LR: 0.0003 +[2026-03-05 04:11:41] (step=0063447) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.413813343768343, LR: 0.0003 +[2026-03-05 04:11:49] (step=0063448) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.414009000195657, LR: 0.0003 +[2026-03-05 04:11:57] (step=0063449) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.414204656622971, LR: 0.0003 +[2026-03-05 04:12:05] (step=0063450) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.414400313050283, LR: 0.0003 +[2026-03-05 04:12:13] (step=0063451) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.414595969477597, LR: 0.0003 +[2026-03-05 04:12:20] (step=0063452) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.414791625904911, LR: 0.0003 +[2026-03-05 04:12:28] (step=0063453) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.414987282332225, LR: 0.0003 +[2026-03-05 04:12:36] (step=0063454) Train Loss: 0.4477, Train Steps/Sec: 0.12, Epoch: 12.415182938759537, LR: 0.0003 +[2026-03-05 04:12:44] (step=0063455) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.415378595186851, LR: 0.0003 +[2026-03-05 04:12:52] (step=0063456) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.415574251614165, LR: 0.0003 +[2026-03-05 04:13:00] (step=0063457) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.41576990804148, LR: 0.0003 +[2026-03-05 04:13:08] (step=0063458) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.415965564468793, LR: 0.0003 +[2026-03-05 04:13:16] (step=0063459) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.416161220896106, LR: 0.0003 +[2026-03-05 04:13:24] (step=0063460) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 12.41635687732342, LR: 0.0003 +[2026-03-05 04:13:31] (step=0063461) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.416552533750734, LR: 0.0003 +[2026-03-05 04:13:39] (step=0063462) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.416748190178048, LR: 0.0003 +[2026-03-05 04:13:47] (step=0063463) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 12.416943846605362, LR: 0.0003 +[2026-03-05 04:13:55] (step=0063464) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.417139503032674, LR: 0.0003 +[2026-03-05 04:14:03] (step=0063465) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.417335159459988, LR: 0.0003 +[2026-03-05 04:14:11] (step=0063466) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 12.417530815887302, LR: 0.0003 +[2026-03-05 04:14:19] (step=0063467) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.417726472314616, LR: 0.0003 +[2026-03-05 04:14:26] (step=0063468) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 12.41792212874193, LR: 0.0003 +[2026-03-05 04:14:34] (step=0063469) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.418117785169242, LR: 0.0003 +[2026-03-05 04:14:42] (step=0063470) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.418313441596556, LR: 0.0003 +[2026-03-05 04:14:50] (step=0063471) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 12.41850909802387, LR: 0.0003 +[2026-03-05 04:14:58] (step=0063472) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.418704754451184, LR: 0.0003 +[2026-03-05 04:15:06] (step=0063473) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.418900410878498, LR: 0.0003 +[2026-03-05 04:15:14] (step=0063474) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.41909606730581, LR: 0.0003 +[2026-03-05 04:15:21] (step=0063475) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.419291723733124, LR: 0.0003 +[2026-03-05 04:15:29] (step=0063476) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.419487380160438, LR: 0.0003 +[2026-03-05 04:15:37] (step=0063477) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.419683036587752, LR: 0.0003 +[2026-03-05 04:15:45] (step=0063478) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 12.419878693015066, LR: 0.0003 +[2026-03-05 04:15:53] (step=0063479) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 12.420074349442379, LR: 0.0003 +[2026-03-05 04:16:01] (step=0063480) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.420270005869693, LR: 0.0003 +[2026-03-05 04:16:09] (step=0063481) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.420465662297007, LR: 0.0003 +[2026-03-05 04:16:17] (step=0063482) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.42066131872432, LR: 0.0003 +[2026-03-05 04:16:24] (step=0063483) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.420856975151633, LR: 0.0003 +[2026-03-05 04:16:32] (step=0063484) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.421052631578947, LR: 0.0003 +[2026-03-05 04:16:40] (step=0063485) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.421248288006261, LR: 0.0003 +[2026-03-05 04:16:48] (step=0063486) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 12.421443944433575, LR: 0.0003 +[2026-03-05 04:16:56] (step=0063487) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 12.421639600860889, LR: 0.0003 +[2026-03-05 04:17:04] (step=0063488) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.421835257288201, LR: 0.0003 +[2026-03-05 04:17:12] (step=0063489) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 12.422030913715515, LR: 0.0003 +[2026-03-05 04:17:20] (step=0063490) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.42222657014283, LR: 0.0003 +[2026-03-05 04:17:27] (step=0063491) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 12.422422226570143, LR: 0.0003 +[2026-03-05 04:17:35] (step=0063492) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.422617882997457, LR: 0.0003 +[2026-03-05 04:17:43] (step=0063493) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.42281353942477, LR: 0.0003 +[2026-03-05 04:17:51] (step=0063494) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 12.423009195852083, LR: 0.0003 +[2026-03-05 04:17:59] (step=0063495) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 12.423204852279397, LR: 0.0003 +[2026-03-05 04:18:07] (step=0063496) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 12.423400508706711, LR: 0.0003 +[2026-03-05 04:18:15] (step=0063497) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.423596165134025, LR: 0.0003 +[2026-03-05 04:18:23] (step=0063498) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.423791821561338, LR: 0.0003 +[2026-03-05 04:18:30] (step=0063499) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 12.423987477988652, LR: 0.0003 +[2026-03-05 04:18:38] (step=0063500) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.424183134415966, LR: 0.0003 +[2026-03-05 04:18:38] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0063500/ +[2026-03-05 04:18:46] (step=0063501) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.42437879084328, LR: 0.0003 +[2026-03-05 04:18:54] (step=0063502) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.424574447270594, LR: 0.0003 +[2026-03-05 04:19:02] (step=0063503) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 12.424770103697906, LR: 0.0003 +[2026-03-05 04:19:10] (step=0063504) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.42496576012522, LR: 0.0003 +[2026-03-05 04:19:18] (step=0063505) Train Loss: 0.4386, Train Steps/Sec: 0.12, Epoch: 12.425161416552534, LR: 0.0003 +[2026-03-05 04:19:26] (step=0063506) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.425357072979848, LR: 0.0003 +[2026-03-05 04:19:33] (step=0063507) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.42555272940716, LR: 0.0003 +[2026-03-05 04:19:41] (step=0063508) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.425748385834474, LR: 0.0003 +[2026-03-05 04:19:49] (step=0063509) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.425944042261788, LR: 0.0003 +[2026-03-05 04:19:57] (step=0063510) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.426139698689102, LR: 0.0003 +[2026-03-05 04:20:05] (step=0063511) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.426335355116416, LR: 0.0003 +[2026-03-05 04:20:13] (step=0063512) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.426531011543728, LR: 0.0003 +[2026-03-05 04:20:21] (step=0063513) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.426726667971042, LR: 0.0003 +[2026-03-05 04:20:29] (step=0063514) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.426922324398356, LR: 0.0003 +[2026-03-05 04:20:36] (step=0063515) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.42711798082567, LR: 0.0003 +[2026-03-05 04:20:44] (step=0063516) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 12.427313637252984, LR: 0.0003 +[2026-03-05 04:20:52] (step=0063517) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.427509293680297, LR: 0.0003 +[2026-03-05 04:21:00] (step=0063518) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.42770495010761, LR: 0.0003 +[2026-03-05 04:21:08] (step=0063519) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.427900606534925, LR: 0.0003 +[2026-03-05 04:21:16] (step=0063520) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 12.428096262962239, LR: 0.0003 +[2026-03-05 04:21:24] (step=0063521) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 12.428291919389553, LR: 0.0003 +[2026-03-05 04:21:31] (step=0063522) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.428487575816865, LR: 0.0003 +[2026-03-05 04:21:39] (step=0063523) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 12.428683232244179, LR: 0.0003 +[2026-03-05 04:21:47] (step=0063524) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.428878888671493, LR: 0.0003 +[2026-03-05 04:21:55] (step=0063525) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.429074545098807, LR: 0.0003 +[2026-03-05 04:22:03] (step=0063526) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 12.429270201526121, LR: 0.0003 +[2026-03-05 04:22:11] (step=0063527) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.429465857953433, LR: 0.0003 +[2026-03-05 04:22:19] (step=0063528) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 12.429661514380747, LR: 0.0003 +[2026-03-05 04:22:26] (step=0063529) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 12.429857170808061, LR: 0.0003 +[2026-03-05 04:22:34] (step=0063530) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.430052827235375, LR: 0.0003 +[2026-03-05 04:22:42] (step=0063531) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.43024848366269, LR: 0.0003 +[2026-03-05 04:22:50] (step=0063532) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.430444140090001, LR: 0.0003 +[2026-03-05 04:22:58] (step=0063533) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.430639796517315, LR: 0.0003 +[2026-03-05 04:23:06] (step=0063534) Train Loss: 0.4408, Train Steps/Sec: 0.12, Epoch: 12.43083545294463, LR: 0.0003 +[2026-03-05 04:23:14] (step=0063535) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.431031109371943, LR: 0.0003 +[2026-03-05 04:23:22] (step=0063536) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.431226765799256, LR: 0.0003 +[2026-03-05 04:23:30] (step=0063537) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.43142242222657, LR: 0.0003 +[2026-03-05 04:23:37] (step=0063538) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 12.431618078653884, LR: 0.0003 +[2026-03-05 04:23:45] (step=0063539) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.431813735081198, LR: 0.0003 +[2026-03-05 04:23:53] (step=0063540) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.432009391508512, LR: 0.0003 +[2026-03-05 04:24:01] (step=0063541) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.432205047935824, LR: 0.0003 +[2026-03-05 04:24:09] (step=0063542) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.432400704363138, LR: 0.0003 +[2026-03-05 04:24:17] (step=0063543) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.432596360790452, LR: 0.0003 +[2026-03-05 04:24:25] (step=0063544) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.432792017217766, LR: 0.0003 +[2026-03-05 04:24:32] (step=0063545) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.43298767364508, LR: 0.0003 +[2026-03-05 04:24:40] (step=0063546) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.433183330072392, LR: 0.0003 +[2026-03-05 04:24:48] (step=0063547) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.433378986499706, LR: 0.0003 +[2026-03-05 04:24:56] (step=0063548) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.43357464292702, LR: 0.0003 +[2026-03-05 04:25:04] (step=0063549) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.433770299354334, LR: 0.0003 +[2026-03-05 04:25:12] (step=0063550) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 12.433965955781648, LR: 0.0003 +[2026-03-05 04:25:20] (step=0063551) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.43416161220896, LR: 0.0003 +[2026-03-05 04:25:28] (step=0063552) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.434357268636274, LR: 0.0003 +[2026-03-05 04:25:35] (step=0063553) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.434552925063588, LR: 0.0003 +[2026-03-05 04:25:43] (step=0063554) Train Loss: 0.4409, Train Steps/Sec: 0.12, Epoch: 12.434748581490902, LR: 0.0003 +[2026-03-05 04:25:51] (step=0063555) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 12.434944237918216, LR: 0.0003 +[2026-03-05 04:25:59] (step=0063556) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.435139894345529, LR: 0.0003 +[2026-03-05 04:26:07] (step=0063557) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.435335550772843, LR: 0.0003 +[2026-03-05 04:26:15] (step=0063558) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.435531207200157, LR: 0.0003 +[2026-03-05 04:26:23] (step=0063559) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.43572686362747, LR: 0.0003 +[2026-03-05 04:26:31] (step=0063560) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.435922520054783, LR: 0.0003 +[2026-03-05 04:26:39] (step=0063561) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.436118176482097, LR: 0.0003 +[2026-03-05 04:26:46] (step=0063562) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.436313832909411, LR: 0.0003 +[2026-03-05 04:26:54] (step=0063563) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.436509489336725, LR: 0.0003 +[2026-03-05 04:27:02] (step=0063564) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.436705145764039, LR: 0.0003 +[2026-03-05 04:27:10] (step=0063565) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.436900802191351, LR: 0.0003 +[2026-03-05 04:27:18] (step=0063566) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 12.437096458618665, LR: 0.0003 +[2026-03-05 04:27:26] (step=0063567) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.43729211504598, LR: 0.0003 +[2026-03-05 04:27:34] (step=0063568) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.437487771473293, LR: 0.0003 +[2026-03-05 04:27:41] (step=0063569) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.437683427900607, LR: 0.0003 +[2026-03-05 04:27:49] (step=0063570) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.43787908432792, LR: 0.0003 +[2026-03-05 04:27:57] (step=0063571) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.438074740755233, LR: 0.0003 +[2026-03-05 04:28:05] (step=0063572) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 12.438270397182547, LR: 0.0003 +[2026-03-05 04:28:13] (step=0063573) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.438466053609861, LR: 0.0003 +[2026-03-05 04:28:21] (step=0063574) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 12.438661710037175, LR: 0.0003 +[2026-03-05 04:28:29] (step=0063575) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.438857366464488, LR: 0.0003 +[2026-03-05 04:28:36] (step=0063576) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.439053022891802, LR: 0.0003 +[2026-03-05 04:28:44] (step=0063577) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 12.439248679319116, LR: 0.0003 +[2026-03-05 04:28:52] (step=0063578) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.43944433574643, LR: 0.0003 +[2026-03-05 04:29:00] (step=0063579) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.439639992173744, LR: 0.0003 +[2026-03-05 04:29:08] (step=0063580) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 12.439835648601056, LR: 0.0003 +[2026-03-05 04:29:16] (step=0063581) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.44003130502837, LR: 0.0003 +[2026-03-05 04:29:24] (step=0063582) Train Loss: 0.4327, Train Steps/Sec: 0.12, Epoch: 12.440226961455684, LR: 0.0003 +[2026-03-05 04:29:32] (step=0063583) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.440422617882998, LR: 0.0003 +[2026-03-05 04:29:39] (step=0063584) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 12.440618274310312, LR: 0.0003 +[2026-03-05 04:29:47] (step=0063585) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.440813930737624, LR: 0.0003 +[2026-03-05 04:29:55] (step=0063586) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.441009587164938, LR: 0.0003 +[2026-03-05 04:30:03] (step=0063587) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.441205243592252, LR: 0.0003 +[2026-03-05 04:30:11] (step=0063588) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 12.441400900019566, LR: 0.0003 +[2026-03-05 04:30:19] (step=0063589) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 12.441596556446878, LR: 0.0003 +[2026-03-05 04:30:27] (step=0063590) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 12.441792212874192, LR: 0.0003 +[2026-03-05 04:30:35] (step=0063591) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.441987869301506, LR: 0.0003 +[2026-03-05 04:30:42] (step=0063592) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.44218352572882, LR: 0.0003 +[2026-03-05 04:30:50] (step=0063593) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 12.442379182156134, LR: 0.0003 +[2026-03-05 04:30:58] (step=0063594) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 12.442574838583447, LR: 0.0003 +[2026-03-05 04:31:06] (step=0063595) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.44277049501076, LR: 0.0003 +[2026-03-05 04:31:14] (step=0063596) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 12.442966151438075, LR: 0.0003 +[2026-03-05 04:31:22] (step=0063597) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 12.443161807865389, LR: 0.0003 +[2026-03-05 04:31:30] (step=0063598) Train Loss: 0.4532, Train Steps/Sec: 0.12, Epoch: 12.443357464292703, LR: 0.0003 +[2026-03-05 04:31:38] (step=0063599) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.443553120720015, LR: 0.0003 +[2026-03-05 04:31:46] (step=0063600) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.443748777147329, LR: 0.0003 +[2026-03-05 04:31:53] (step=0063601) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.443944433574643, LR: 0.0003 +[2026-03-05 04:32:01] (step=0063602) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.444140090001957, LR: 0.0003 +[2026-03-05 04:32:09] (step=0063603) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.444335746429271, LR: 0.0003 +[2026-03-05 04:32:17] (step=0063604) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 12.444531402856583, LR: 0.0003 +[2026-03-05 04:32:25] (step=0063605) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 12.444727059283897, LR: 0.0003 +[2026-03-05 04:32:33] (step=0063606) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 12.444922715711211, LR: 0.0003 +[2026-03-05 04:32:41] (step=0063607) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.445118372138525, LR: 0.0003 +[2026-03-05 04:32:48] (step=0063608) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.44531402856584, LR: 0.0003 +[2026-03-05 04:32:56] (step=0063609) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 12.445509684993151, LR: 0.0003 +[2026-03-05 04:33:04] (step=0063610) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.445705341420465, LR: 0.0003 +[2026-03-05 04:33:12] (step=0063611) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.44590099784778, LR: 0.0003 +[2026-03-05 04:33:20] (step=0063612) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.446096654275093, LR: 0.0003 +[2026-03-05 04:33:28] (step=0063613) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.446292310702406, LR: 0.0003 +[2026-03-05 04:33:36] (step=0063614) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 12.44648796712972, LR: 0.0003 +[2026-03-05 04:33:43] (step=0063615) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.446683623557034, LR: 0.0003 +[2026-03-05 04:33:51] (step=0063616) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.446879279984348, LR: 0.0003 +[2026-03-05 04:33:59] (step=0063617) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.447074936411662, LR: 0.0003 +[2026-03-05 04:34:07] (step=0063618) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.447270592838974, LR: 0.0003 +[2026-03-05 04:34:15] (step=0063619) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 12.447466249266288, LR: 0.0003 +[2026-03-05 04:34:23] (step=0063620) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.447661905693602, LR: 0.0003 +[2026-03-05 04:34:31] (step=0063621) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.447857562120916, LR: 0.0003 +[2026-03-05 04:34:38] (step=0063622) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.44805321854823, LR: 0.0003 +[2026-03-05 04:34:46] (step=0063623) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 12.448248874975542, LR: 0.0003 +[2026-03-05 04:34:54] (step=0063624) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 12.448444531402856, LR: 0.0003 +[2026-03-05 04:35:02] (step=0063625) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 12.44864018783017, LR: 0.0003 +[2026-03-05 04:35:10] (step=0063626) Train Loss: 0.4183, Train Steps/Sec: 0.13, Epoch: 12.448835844257484, LR: 0.0003 +[2026-03-05 04:35:18] (step=0063627) Train Loss: 0.4182, Train Steps/Sec: 0.13, Epoch: 12.449031500684798, LR: 0.0003 +[2026-03-05 04:35:26] (step=0063628) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 12.44922715711211, LR: 0.0003 +[2026-03-05 04:35:33] (step=0063629) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 12.449422813539424, LR: 0.0003 +[2026-03-05 04:35:41] (step=0063630) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 12.449618469966738, LR: 0.0003 +[2026-03-05 04:35:49] (step=0063631) Train Loss: 0.4511, Train Steps/Sec: 0.12, Epoch: 12.449814126394052, LR: 0.0003 +[2026-03-05 04:35:57] (step=0063632) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.450009782821366, LR: 0.0003 +[2026-03-05 04:36:05] (step=0063633) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.450205439248679, LR: 0.0003 +[2026-03-05 04:36:13] (step=0063634) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 12.450401095675993, LR: 0.0003 +[2026-03-05 04:36:21] (step=0063635) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.450596752103307, LR: 0.0003 +[2026-03-05 04:36:29] (step=0063636) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.45079240853062, LR: 0.0003 +[2026-03-05 04:36:37] (step=0063637) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.450988064957935, LR: 0.0003 +[2026-03-05 04:36:44] (step=0063638) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.451183721385247, LR: 0.0003 +[2026-03-05 04:36:52] (step=0063639) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.451379377812561, LR: 0.0003 +[2026-03-05 04:37:00] (step=0063640) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.451575034239875, LR: 0.0003 +[2026-03-05 04:37:08] (step=0063641) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.451770690667189, LR: 0.0003 +[2026-03-05 04:37:16] (step=0063642) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 12.451966347094501, LR: 0.0003 +[2026-03-05 04:37:24] (step=0063643) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 12.452162003521815, LR: 0.0003 +[2026-03-05 04:37:32] (step=0063644) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.45235765994913, LR: 0.0003 +[2026-03-05 04:37:39] (step=0063645) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 12.452553316376443, LR: 0.0003 +[2026-03-05 04:37:47] (step=0063646) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 12.452748972803757, LR: 0.0003 +[2026-03-05 04:37:55] (step=0063647) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.45294462923107, LR: 0.0003 +[2026-03-05 04:38:03] (step=0063648) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.453140285658383, LR: 0.0003 +[2026-03-05 04:38:11] (step=0063649) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 12.453335942085697, LR: 0.0003 +[2026-03-05 04:38:19] (step=0063650) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.453531598513012, LR: 0.0003 +[2026-03-05 04:38:27] (step=0063651) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.453727254940326, LR: 0.0003 +[2026-03-05 04:38:35] (step=0063652) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.453922911367638, LR: 0.0003 +[2026-03-05 04:38:42] (step=0063653) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.454118567794952, LR: 0.0003 +[2026-03-05 04:38:50] (step=0063654) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.454314224222266, LR: 0.0003 +[2026-03-05 04:38:58] (step=0063655) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 12.45450988064958, LR: 0.0003 +[2026-03-05 04:39:06] (step=0063656) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.454705537076894, LR: 0.0003 +[2026-03-05 04:39:14] (step=0063657) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.454901193504206, LR: 0.0003 +[2026-03-05 04:39:22] (step=0063658) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.45509684993152, LR: 0.0003 +[2026-03-05 04:39:30] (step=0063659) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.455292506358834, LR: 0.0003 +[2026-03-05 04:39:38] (step=0063660) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 12.455488162786148, LR: 0.0003 +[2026-03-05 04:39:45] (step=0063661) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.455683819213462, LR: 0.0003 +[2026-03-05 04:39:53] (step=0063662) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.455879475640774, LR: 0.0003 +[2026-03-05 04:40:01] (step=0063663) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 12.456075132068088, LR: 0.0003 +[2026-03-05 04:40:09] (step=0063664) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 12.456270788495402, LR: 0.0003 +[2026-03-05 04:40:17] (step=0063665) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 12.456466444922716, LR: 0.0003 +[2026-03-05 04:40:25] (step=0063666) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 12.456662101350028, LR: 0.0003 +[2026-03-05 04:40:33] (step=0063667) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 12.456857757777342, LR: 0.0003 +[2026-03-05 04:40:40] (step=0063668) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.457053414204657, LR: 0.0003 +[2026-03-05 04:40:48] (step=0063669) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.45724907063197, LR: 0.0003 +[2026-03-05 04:40:56] (step=0063670) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 12.457444727059285, LR: 0.0003 +[2026-03-05 04:41:04] (step=0063671) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.457640383486597, LR: 0.0003 +[2026-03-05 04:41:12] (step=0063672) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 12.45783603991391, LR: 0.0003 +[2026-03-05 04:41:20] (step=0063673) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.458031696341225, LR: 0.0003 +[2026-03-05 04:41:28] (step=0063674) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 12.458227352768539, LR: 0.0003 +[2026-03-05 04:41:36] (step=0063675) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 12.458423009195853, LR: 0.0003 +[2026-03-05 04:41:43] (step=0063676) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.458618665623165, LR: 0.0003 +[2026-03-05 04:41:51] (step=0063677) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.458814322050479, LR: 0.0003 +[2026-03-05 04:41:59] (step=0063678) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.459009978477793, LR: 0.0003 +[2026-03-05 04:42:07] (step=0063679) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.459205634905107, LR: 0.0003 +[2026-03-05 04:42:15] (step=0063680) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.459401291332421, LR: 0.0003 +[2026-03-05 04:42:23] (step=0063681) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 12.459596947759733, LR: 0.0003 +[2026-03-05 04:42:31] (step=0063682) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.459792604187047, LR: 0.0003 +[2026-03-05 04:42:38] (step=0063683) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.459988260614361, LR: 0.0003 +[2026-03-05 04:42:46] (step=0063684) Train Loss: 0.4513, Train Steps/Sec: 0.12, Epoch: 12.460183917041675, LR: 0.0003 +[2026-03-05 04:42:54] (step=0063685) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 12.46037957346899, LR: 0.0003 +[2026-03-05 04:43:02] (step=0063686) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.460575229896302, LR: 0.0003 +[2026-03-05 04:43:10] (step=0063687) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.460770886323616, LR: 0.0003 +[2026-03-05 04:43:18] (step=0063688) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.46096654275093, LR: 0.0003 +[2026-03-05 04:43:26] (step=0063689) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 12.461162199178244, LR: 0.0003 +[2026-03-05 04:43:34] (step=0063690) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.461357855605556, LR: 0.0003 +[2026-03-05 04:43:41] (step=0063691) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 12.46155351203287, LR: 0.0003 +[2026-03-05 04:43:49] (step=0063692) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.461749168460184, LR: 0.0003 +[2026-03-05 04:43:57] (step=0063693) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.461944824887498, LR: 0.0003 +[2026-03-05 04:44:05] (step=0063694) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.462140481314812, LR: 0.0003 +[2026-03-05 04:44:13] (step=0063695) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.462336137742124, LR: 0.0003 +[2026-03-05 04:44:21] (step=0063696) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.462531794169438, LR: 0.0003 +[2026-03-05 04:44:28] (step=0063697) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 12.462727450596752, LR: 0.0003 +[2026-03-05 04:44:36] (step=0063698) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.462923107024066, LR: 0.0003 +[2026-03-05 04:44:44] (step=0063699) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.46311876345138, LR: 0.0003 +[2026-03-05 04:44:52] (step=0063700) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.463314419878692, LR: 0.0003 +[2026-03-05 04:45:00] (step=0063701) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.463510076306006, LR: 0.0003 +[2026-03-05 04:45:08] (step=0063702) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.46370573273332, LR: 0.0003 +[2026-03-05 04:45:16] (step=0063703) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.463901389160634, LR: 0.0003 +[2026-03-05 04:45:24] (step=0063704) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.464097045587948, LR: 0.0003 +[2026-03-05 04:45:31] (step=0063705) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.46429270201526, LR: 0.0003 +[2026-03-05 04:45:39] (step=0063706) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 12.464488358442575, LR: 0.0003 +[2026-03-05 04:45:47] (step=0063707) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.464684014869889, LR: 0.0003 +[2026-03-05 04:45:55] (step=0063708) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.464879671297203, LR: 0.0003 +[2026-03-05 04:46:03] (step=0063709) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 12.465075327724517, LR: 0.0003 +[2026-03-05 04:46:11] (step=0063710) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.465270984151829, LR: 0.0003 +[2026-03-05 04:46:18] (step=0063711) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.465466640579143, LR: 0.0003 +[2026-03-05 04:46:26] (step=0063712) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.465662297006457, LR: 0.0003 +[2026-03-05 04:46:34] (step=0063713) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.46585795343377, LR: 0.0003 +[2026-03-05 04:46:42] (step=0063714) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 12.466053609861085, LR: 0.0003 +[2026-03-05 04:46:50] (step=0063715) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.466249266288397, LR: 0.0003 +[2026-03-05 04:46:58] (step=0063716) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 12.466444922715711, LR: 0.0003 +[2026-03-05 04:47:06] (step=0063717) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.466640579143025, LR: 0.0003 +[2026-03-05 04:47:13] (step=0063718) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.466836235570339, LR: 0.0003 +[2026-03-05 04:47:21] (step=0063719) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.467031891997651, LR: 0.0003 +[2026-03-05 04:47:29] (step=0063720) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.467227548424965, LR: 0.0003 +[2026-03-05 04:47:37] (step=0063721) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.46742320485228, LR: 0.0003 +[2026-03-05 04:47:45] (step=0063722) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 12.467618861279593, LR: 0.0003 +[2026-03-05 04:47:53] (step=0063723) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.467814517706907, LR: 0.0003 +[2026-03-05 04:48:00] (step=0063724) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 12.46801017413422, LR: 0.0003 +[2026-03-05 04:48:08] (step=0063725) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.468205830561534, LR: 0.0003 +[2026-03-05 04:48:16] (step=0063726) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 12.468401486988848, LR: 0.0003 +[2026-03-05 04:48:24] (step=0063727) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.468597143416162, LR: 0.0003 +[2026-03-05 04:48:32] (step=0063728) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.468792799843476, LR: 0.0003 +[2026-03-05 04:48:40] (step=0063729) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.468988456270788, LR: 0.0003 +[2026-03-05 04:48:48] (step=0063730) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.469184112698102, LR: 0.0003 +[2026-03-05 04:48:55] (step=0063731) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 12.469379769125416, LR: 0.0003 +[2026-03-05 04:49:03] (step=0063732) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.46957542555273, LR: 0.0003 +[2026-03-05 04:49:11] (step=0063733) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.469771081980044, LR: 0.0003 +[2026-03-05 04:49:19] (step=0063734) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.469966738407356, LR: 0.0003 +[2026-03-05 04:49:27] (step=0063735) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.47016239483467, LR: 0.0003 +[2026-03-05 04:49:35] (step=0063736) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.470358051261984, LR: 0.0003 +[2026-03-05 04:49:43] (step=0063737) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 12.470553707689298, LR: 0.0003 +[2026-03-05 04:49:51] (step=0063738) Train Loss: 0.4513, Train Steps/Sec: 0.12, Epoch: 12.470749364116612, LR: 0.0003 +[2026-03-05 04:49:58] (step=0063739) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 12.470945020543924, LR: 0.0003 +[2026-03-05 04:50:06] (step=0063740) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.471140676971238, LR: 0.0003 +[2026-03-05 04:50:14] (step=0063741) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.471336333398552, LR: 0.0003 +[2026-03-05 04:50:22] (step=0063742) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.471531989825866, LR: 0.0003 +[2026-03-05 04:50:30] (step=0063743) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.471727646253179, LR: 0.0003 +[2026-03-05 04:50:38] (step=0063744) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 12.471923302680493, LR: 0.0003 +[2026-03-05 04:50:46] (step=0063745) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.472118959107807, LR: 0.0003 +[2026-03-05 04:50:53] (step=0063746) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 12.47231461553512, LR: 0.0003 +[2026-03-05 04:51:01] (step=0063747) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.472510271962435, LR: 0.0003 +[2026-03-05 04:51:09] (step=0063748) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.472705928389747, LR: 0.0003 +[2026-03-05 04:51:17] (step=0063749) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.47290158481706, LR: 0.0003 +[2026-03-05 04:51:25] (step=0063750) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.473097241244375, LR: 0.0003 +[2026-03-05 04:51:33] (step=0063751) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.473292897671689, LR: 0.0003 +[2026-03-05 04:51:41] (step=0063752) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 12.473488554099003, LR: 0.0003 +[2026-03-05 04:51:48] (step=0063753) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.473684210526315, LR: 0.0003 +[2026-03-05 04:51:57] (step=0063754) Train Loss: 0.4415, Train Steps/Sec: 0.12, Epoch: 12.473879866953629, LR: 0.0003 +[2026-03-05 04:52:04] (step=0063755) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.474075523380943, LR: 0.0003 +[2026-03-05 04:52:12] (step=0063756) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 12.474271179808257, LR: 0.0003 +[2026-03-05 04:52:20] (step=0063757) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.474466836235571, LR: 0.0003 +[2026-03-05 04:52:28] (step=0063758) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.474662492662883, LR: 0.0003 +[2026-03-05 04:52:36] (step=0063759) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.474858149090197, LR: 0.0003 +[2026-03-05 04:52:44] (step=0063760) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.475053805517511, LR: 0.0003 +[2026-03-05 04:52:52] (step=0063761) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 12.475249461944825, LR: 0.0003 +[2026-03-05 04:52:59] (step=0063762) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.47544511837214, LR: 0.0003 +[2026-03-05 04:53:07] (step=0063763) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.475640774799452, LR: 0.0003 +[2026-03-05 04:53:15] (step=0063764) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.475836431226766, LR: 0.0003 +[2026-03-05 04:53:23] (step=0063765) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.47603208765408, LR: 0.0003 +[2026-03-05 04:53:31] (step=0063766) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.476227744081394, LR: 0.0003 +[2026-03-05 04:53:39] (step=0063767) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 12.476423400508708, LR: 0.0003 +[2026-03-05 04:53:47] (step=0063768) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.47661905693602, LR: 0.0003 +[2026-03-05 04:53:54] (step=0063769) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.476814713363334, LR: 0.0003 +[2026-03-05 04:54:02] (step=0063770) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.477010369790648, LR: 0.0003 +[2026-03-05 04:54:10] (step=0063771) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.477206026217962, LR: 0.0003 +[2026-03-05 04:54:18] (step=0063772) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.477401682645274, LR: 0.0003 +[2026-03-05 04:54:26] (step=0063773) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.477597339072588, LR: 0.0003 +[2026-03-05 04:54:34] (step=0063774) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 12.477792995499902, LR: 0.0003 +[2026-03-05 04:54:42] (step=0063775) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.477988651927216, LR: 0.0003 +[2026-03-05 04:54:49] (step=0063776) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.47818430835453, LR: 0.0003 +[2026-03-05 04:54:57] (step=0063777) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.478379964781842, LR: 0.0003 +[2026-03-05 04:55:05] (step=0063778) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.478575621209156, LR: 0.0003 +[2026-03-05 04:55:13] (step=0063779) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.47877127763647, LR: 0.0003 +[2026-03-05 04:55:21] (step=0063780) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 12.478966934063784, LR: 0.0003 +[2026-03-05 04:55:29] (step=0063781) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.479162590491098, LR: 0.0003 +[2026-03-05 04:55:36] (step=0063782) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.47935824691841, LR: 0.0003 +[2026-03-05 04:55:44] (step=0063783) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.479553903345725, LR: 0.0003 +[2026-03-05 04:55:52] (step=0063784) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.479749559773039, LR: 0.0003 +[2026-03-05 04:56:00] (step=0063785) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.479945216200353, LR: 0.0003 +[2026-03-05 04:56:08] (step=0063786) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.480140872627667, LR: 0.0003 +[2026-03-05 04:56:16] (step=0063787) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 12.480336529054979, LR: 0.0003 +[2026-03-05 04:56:23] (step=0063788) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.480532185482293, LR: 0.0003 +[2026-03-05 04:56:31] (step=0063789) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 12.480727841909607, LR: 0.0003 +[2026-03-05 04:56:39] (step=0063790) Train Loss: 0.4424, Train Steps/Sec: 0.12, Epoch: 12.48092349833692, LR: 0.0003 +[2026-03-05 04:56:47] (step=0063791) Train Loss: 0.4255, Train Steps/Sec: 0.13, Epoch: 12.481119154764235, LR: 0.0003 +[2026-03-05 04:56:55] (step=0063792) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.481314811191547, LR: 0.0003 +[2026-03-05 04:57:03] (step=0063793) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.481510467618861, LR: 0.0003 +[2026-03-05 04:57:11] (step=0063794) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.481706124046175, LR: 0.0003 +[2026-03-05 04:57:19] (step=0063795) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.481901780473489, LR: 0.0003 +[2026-03-05 04:57:26] (step=0063796) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.482097436900801, LR: 0.0003 +[2026-03-05 04:57:34] (step=0063797) Train Loss: 0.4193, Train Steps/Sec: 0.13, Epoch: 12.482293093328115, LR: 0.0003 +[2026-03-05 04:57:42] (step=0063798) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.48248874975543, LR: 0.0003 +[2026-03-05 04:57:50] (step=0063799) Train Loss: 0.4239, Train Steps/Sec: 0.13, Epoch: 12.482684406182743, LR: 0.0003 +[2026-03-05 04:57:58] (step=0063800) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 12.482880062610057, LR: 0.0003 +[2026-03-05 04:58:06] (step=0063801) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 12.48307571903737, LR: 0.0003 +[2026-03-05 04:58:14] (step=0063802) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.483271375464684, LR: 0.0003 +[2026-03-05 04:58:21] (step=0063803) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.483467031891998, LR: 0.0003 +[2026-03-05 04:58:29] (step=0063804) Train Loss: 0.4262, Train Steps/Sec: 0.13, Epoch: 12.483662688319312, LR: 0.0003 +[2026-03-05 04:58:37] (step=0063805) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.483858344746626, LR: 0.0003 +[2026-03-05 04:58:45] (step=0063806) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 12.484054001173938, LR: 0.0003 +[2026-03-05 04:58:53] (step=0063807) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.484249657601252, LR: 0.0003 +[2026-03-05 04:59:01] (step=0063808) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 12.484445314028566, LR: 0.0003 +[2026-03-05 04:59:08] (step=0063809) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 12.48464097045588, LR: 0.0003 +[2026-03-05 04:59:16] (step=0063810) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 12.484836626883194, LR: 0.0003 +[2026-03-05 04:59:24] (step=0063811) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.485032283310506, LR: 0.0003 +[2026-03-05 04:59:32] (step=0063812) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.48522793973782, LR: 0.0003 +[2026-03-05 04:59:40] (step=0063813) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.485423596165134, LR: 0.0003 +[2026-03-05 04:59:48] (step=0063814) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.485619252592448, LR: 0.0003 +[2026-03-05 04:59:56] (step=0063815) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.485814909019762, LR: 0.0003 +[2026-03-05 05:00:03] (step=0063816) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.486010565447074, LR: 0.0003 +[2026-03-05 05:00:11] (step=0063817) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.486206221874388, LR: 0.0003 +[2026-03-05 05:00:19] (step=0063818) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.486401878301702, LR: 0.0003 +[2026-03-05 05:00:27] (step=0063819) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.486597534729016, LR: 0.0003 +[2026-03-05 05:00:35] (step=0063820) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.48679319115633, LR: 0.0003 +[2026-03-05 05:00:43] (step=0063821) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.486988847583643, LR: 0.0003 +[2026-03-05 05:00:51] (step=0063822) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.487184504010957, LR: 0.0003 +[2026-03-05 05:00:58] (step=0063823) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.48738016043827, LR: 0.0003 +[2026-03-05 05:01:06] (step=0063824) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.487575816865585, LR: 0.0003 +[2026-03-05 05:01:14] (step=0063825) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 12.487771473292897, LR: 0.0003 +[2026-03-05 05:01:22] (step=0063826) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.48796712972021, LR: 0.0003 +[2026-03-05 05:01:30] (step=0063827) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.488162786147525, LR: 0.0003 +[2026-03-05 05:01:38] (step=0063828) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.488358442574839, LR: 0.0003 +[2026-03-05 05:01:45] (step=0063829) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.488554099002153, LR: 0.0003 +[2026-03-05 05:01:53] (step=0063830) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.488749755429465, LR: 0.0003 +[2026-03-05 05:02:01] (step=0063831) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.488945411856779, LR: 0.0003 +[2026-03-05 05:02:09] (step=0063832) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.489141068284093, LR: 0.0003 +[2026-03-05 05:02:17] (step=0063833) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.489336724711407, LR: 0.0003 +[2026-03-05 05:02:25] (step=0063834) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.489532381138721, LR: 0.0003 +[2026-03-05 05:02:33] (step=0063835) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.489728037566033, LR: 0.0003 +[2026-03-05 05:02:40] (step=0063836) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.489923693993347, LR: 0.0003 +[2026-03-05 05:02:48] (step=0063837) Train Loss: 0.4399, Train Steps/Sec: 0.12, Epoch: 12.490119350420661, LR: 0.0003 +[2026-03-05 05:02:56] (step=0063838) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 12.490315006847975, LR: 0.0003 +[2026-03-05 05:03:04] (step=0063839) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 12.49051066327529, LR: 0.0003 +[2026-03-05 05:03:12] (step=0063840) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.490706319702602, LR: 0.0003 +[2026-03-05 05:03:20] (step=0063841) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.490901976129916, LR: 0.0003 +[2026-03-05 05:03:28] (step=0063842) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.49109763255723, LR: 0.0003 +[2026-03-05 05:03:35] (step=0063843) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.491293288984544, LR: 0.0003 +[2026-03-05 05:03:43] (step=0063844) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.491488945411858, LR: 0.0003 +[2026-03-05 05:03:51] (step=0063845) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 12.49168460183917, LR: 0.0003 +[2026-03-05 05:03:59] (step=0063846) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.491880258266484, LR: 0.0003 +[2026-03-05 05:04:07] (step=0063847) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.492075914693798, LR: 0.0003 +[2026-03-05 05:04:15] (step=0063848) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.492271571121112, LR: 0.0003 +[2026-03-05 05:04:23] (step=0063849) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.492467227548424, LR: 0.0003 +[2026-03-05 05:04:30] (step=0063850) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 12.492662883975738, LR: 0.0003 +[2026-03-05 05:04:38] (step=0063851) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 12.492858540403052, LR: 0.0003 +[2026-03-05 05:04:46] (step=0063852) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.493054196830366, LR: 0.0003 +[2026-03-05 05:04:54] (step=0063853) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.49324985325768, LR: 0.0003 +[2026-03-05 05:05:02] (step=0063854) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.493445509684992, LR: 0.0003 +[2026-03-05 05:05:10] (step=0063855) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 12.493641166112306, LR: 0.0003 +[2026-03-05 05:05:18] (step=0063856) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.49383682253962, LR: 0.0003 +[2026-03-05 05:05:25] (step=0063857) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 12.494032478966934, LR: 0.0003 +[2026-03-05 05:05:33] (step=0063858) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.494228135394248, LR: 0.0003 +[2026-03-05 05:05:41] (step=0063859) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.49442379182156, LR: 0.0003 +[2026-03-05 05:05:49] (step=0063860) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.494619448248875, LR: 0.0003 +[2026-03-05 05:05:57] (step=0063861) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 12.494815104676189, LR: 0.0003 +[2026-03-05 05:06:05] (step=0063862) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.495010761103503, LR: 0.0003 +[2026-03-05 05:06:13] (step=0063863) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.495206417530817, LR: 0.0003 +[2026-03-05 05:06:20] (step=0063864) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.495402073958129, LR: 0.0003 +[2026-03-05 05:06:28] (step=0063865) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.495597730385443, LR: 0.0003 +[2026-03-05 05:06:36] (step=0063866) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.495793386812757, LR: 0.0003 +[2026-03-05 05:06:44] (step=0063867) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.49598904324007, LR: 0.0003 +[2026-03-05 05:06:52] (step=0063868) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.496184699667385, LR: 0.0003 +[2026-03-05 05:07:00] (step=0063869) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.496380356094697, LR: 0.0003 +[2026-03-05 05:07:08] (step=0063870) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.496576012522011, LR: 0.0003 +[2026-03-05 05:07:15] (step=0063871) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.496771668949325, LR: 0.0003 +[2026-03-05 05:07:23] (step=0063872) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.49696732537664, LR: 0.0003 +[2026-03-05 05:07:31] (step=0063873) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.497162981803953, LR: 0.0003 +[2026-03-05 05:07:39] (step=0063874) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 12.497358638231265, LR: 0.0003 +[2026-03-05 05:07:47] (step=0063875) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.49755429465858, LR: 0.0003 +[2026-03-05 05:07:55] (step=0063876) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.497749951085893, LR: 0.0003 +[2026-03-05 05:08:03] (step=0063877) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.497945607513207, LR: 0.0003 +[2026-03-05 05:08:10] (step=0063878) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.49814126394052, LR: 0.0003 +[2026-03-05 05:08:18] (step=0063879) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.498336920367834, LR: 0.0003 +[2026-03-05 05:08:26] (step=0063880) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 12.498532576795148, LR: 0.0003 +[2026-03-05 05:08:34] (step=0063881) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.498728233222462, LR: 0.0003 +[2026-03-05 05:08:42] (step=0063882) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.498923889649776, LR: 0.0003 +[2026-03-05 05:08:50] (step=0063883) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.499119546077088, LR: 0.0003 +[2026-03-05 05:08:57] (step=0063884) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.499315202504402, LR: 0.0003 +[2026-03-05 05:09:05] (step=0063885) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 12.499510858931716, LR: 0.0003 +[2026-03-05 05:09:13] (step=0063886) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.49970651535903, LR: 0.0003 +[2026-03-05 05:09:21] (step=0063887) Train Loss: 0.4522, Train Steps/Sec: 0.12, Epoch: 12.499902171786344, LR: 0.0003 +[2026-03-05 05:09:29] (step=0063888) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.500097828213656, LR: 0.0003 +[2026-03-05 05:09:37] (step=0063889) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 12.50029348464097, LR: 0.0003 +[2026-03-05 05:09:45] (step=0063890) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.500489141068284, LR: 0.0003 +[2026-03-05 05:09:53] (step=0063891) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.500684797495598, LR: 0.0003 +[2026-03-05 05:10:01] (step=0063892) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.500880453922912, LR: 0.0003 +[2026-03-05 05:10:08] (step=0063893) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.501076110350224, LR: 0.0003 +[2026-03-05 05:10:16] (step=0063894) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.501271766777538, LR: 0.0003 +[2026-03-05 05:10:24] (step=0063895) Train Loss: 0.4259, Train Steps/Sec: 0.13, Epoch: 12.501467423204852, LR: 0.0003 +[2026-03-05 05:10:32] (step=0063896) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.501663079632166, LR: 0.0003 +[2026-03-05 05:10:40] (step=0063897) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.50185873605948, LR: 0.0003 +[2026-03-05 05:10:48] (step=0063898) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.502054392486793, LR: 0.0003 +[2026-03-05 05:10:56] (step=0063899) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.502250048914107, LR: 0.0003 +[2026-03-05 05:11:04] (step=0063900) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.50244570534142, LR: 0.0003 +[2026-03-05 05:11:12] (step=0063901) Train Loss: 0.4365, Train Steps/Sec: 0.12, Epoch: 12.502641361768735, LR: 0.0003 +[2026-03-05 05:11:20] (step=0063902) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 12.502837018196047, LR: 0.0003 +[2026-03-05 05:11:27] (step=0063903) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.50303267462336, LR: 0.0003 +[2026-03-05 05:11:35] (step=0063904) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.503228331050675, LR: 0.0003 +[2026-03-05 05:11:43] (step=0063905) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 12.503423987477989, LR: 0.0003 +[2026-03-05 05:11:51] (step=0063906) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.503619643905303, LR: 0.0003 +[2026-03-05 05:11:59] (step=0063907) Train Loss: 0.4630, Train Steps/Sec: 0.13, Epoch: 12.503815300332615, LR: 0.0003 +[2026-03-05 05:12:07] (step=0063908) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 12.50401095675993, LR: 0.0003 +[2026-03-05 05:12:15] (step=0063909) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.504206613187243, LR: 0.0003 +[2026-03-05 05:12:22] (step=0063910) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.504402269614557, LR: 0.0003 +[2026-03-05 05:12:30] (step=0063911) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 12.504597926041871, LR: 0.0003 +[2026-03-05 05:12:38] (step=0063912) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.504793582469183, LR: 0.0003 +[2026-03-05 05:12:46] (step=0063913) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.504989238896497, LR: 0.0003 +[2026-03-05 05:12:54] (step=0063914) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.505184895323811, LR: 0.0003 +[2026-03-05 05:13:02] (step=0063915) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 12.505380551751125, LR: 0.0003 +[2026-03-05 05:13:09] (step=0063916) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 12.50557620817844, LR: 0.0003 +[2026-03-05 05:13:17] (step=0063917) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.505771864605752, LR: 0.0003 +[2026-03-05 05:13:25] (step=0063918) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.505967521033066, LR: 0.0003 +[2026-03-05 05:13:33] (step=0063919) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.50616317746038, LR: 0.0003 +[2026-03-05 05:13:41] (step=0063920) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.506358833887694, LR: 0.0003 +[2026-03-05 05:13:49] (step=0063921) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.506554490315008, LR: 0.0003 +[2026-03-05 05:13:57] (step=0063922) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.50675014674232, LR: 0.0003 +[2026-03-05 05:14:05] (step=0063923) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.506945803169634, LR: 0.0003 +[2026-03-05 05:14:12] (step=0063924) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.507141459596948, LR: 0.0003 +[2026-03-05 05:14:20] (step=0063925) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.507337116024262, LR: 0.0003 +[2026-03-05 05:14:28] (step=0063926) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.507532772451576, LR: 0.0003 +[2026-03-05 05:14:36] (step=0063927) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.507728428878888, LR: 0.0003 +[2026-03-05 05:14:44] (step=0063928) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.507924085306202, LR: 0.0003 +[2026-03-05 05:14:52] (step=0063929) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.508119741733516, LR: 0.0003 +[2026-03-05 05:15:00] (step=0063930) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.50831539816083, LR: 0.0003 +[2026-03-05 05:15:08] (step=0063931) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.508511054588142, LR: 0.0003 +[2026-03-05 05:15:15] (step=0063932) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 12.508706711015456, LR: 0.0003 +[2026-03-05 05:15:23] (step=0063933) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 12.50890236744277, LR: 0.0003 +[2026-03-05 05:15:31] (step=0063934) Train Loss: 0.4420, Train Steps/Sec: 0.12, Epoch: 12.509098023870084, LR: 0.0003 +[2026-03-05 05:15:39] (step=0063935) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.509293680297398, LR: 0.0003 +[2026-03-05 05:15:47] (step=0063936) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.50948933672471, LR: 0.0003 +[2026-03-05 05:15:55] (step=0063937) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 12.509684993152025, LR: 0.0003 +[2026-03-05 05:16:03] (step=0063938) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.509880649579339, LR: 0.0003 +[2026-03-05 05:16:11] (step=0063939) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 12.510076306006653, LR: 0.0003 +[2026-03-05 05:16:19] (step=0063940) Train Loss: 0.4431, Train Steps/Sec: 0.12, Epoch: 12.510271962433967, LR: 0.0003 +[2026-03-05 05:16:26] (step=0063941) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.510467618861279, LR: 0.0003 +[2026-03-05 05:16:34] (step=0063942) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.510663275288593, LR: 0.0003 +[2026-03-05 05:16:42] (step=0063943) Train Loss: 0.4433, Train Steps/Sec: 0.12, Epoch: 12.510858931715907, LR: 0.0003 +[2026-03-05 05:16:50] (step=0063944) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.511054588143221, LR: 0.0003 +[2026-03-05 05:16:58] (step=0063945) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 12.511250244570535, LR: 0.0003 +[2026-03-05 05:17:06] (step=0063946) Train Loss: 0.4262, Train Steps/Sec: 0.13, Epoch: 12.511445900997847, LR: 0.0003 +[2026-03-05 05:17:14] (step=0063947) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 12.511641557425161, LR: 0.0003 +[2026-03-05 05:17:22] (step=0063948) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.511837213852475, LR: 0.0003 +[2026-03-05 05:17:30] (step=0063949) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 12.51203287027979, LR: 0.0003 +[2026-03-05 05:17:38] (step=0063950) Train Loss: 0.4503, Train Steps/Sec: 0.12, Epoch: 12.512228526707103, LR: 0.0003 +[2026-03-05 05:17:46] (step=0063951) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 12.512424183134415, LR: 0.0003 +[2026-03-05 05:17:53] (step=0063952) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 12.51261983956173, LR: 0.0003 +[2026-03-05 05:18:01] (step=0063953) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.512815495989043, LR: 0.0003 +[2026-03-05 05:18:09] (step=0063954) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 12.513011152416357, LR: 0.0003 +[2026-03-05 05:18:17] (step=0063955) Train Loss: 0.4258, Train Steps/Sec: 0.13, Epoch: 12.51320680884367, LR: 0.0003 +[2026-03-05 05:18:25] (step=0063956) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.513402465270984, LR: 0.0003 +[2026-03-05 05:18:33] (step=0063957) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.513598121698298, LR: 0.0003 +[2026-03-05 05:18:41] (step=0063958) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.513793778125612, LR: 0.0003 +[2026-03-05 05:18:48] (step=0063959) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.513989434552926, LR: 0.0003 +[2026-03-05 05:18:56] (step=0063960) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.514185090980238, LR: 0.0003 +[2026-03-05 05:19:04] (step=0063961) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.514380747407552, LR: 0.0003 +[2026-03-05 05:19:12] (step=0063962) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.514576403834866, LR: 0.0003 +[2026-03-05 05:19:20] (step=0063963) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.51477206026218, LR: 0.0003 +[2026-03-05 05:19:28] (step=0063964) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.514967716689494, LR: 0.0003 +[2026-03-05 05:19:35] (step=0063965) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.515163373116806, LR: 0.0003 +[2026-03-05 05:19:43] (step=0063966) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.51535902954412, LR: 0.0003 +[2026-03-05 05:19:51] (step=0063967) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.515554685971434, LR: 0.0003 +[2026-03-05 05:19:59] (step=0063968) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.515750342398748, LR: 0.0003 +[2026-03-05 05:20:07] (step=0063969) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.515945998826062, LR: 0.0003 +[2026-03-05 05:20:15] (step=0063970) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.516141655253374, LR: 0.0003 +[2026-03-05 05:20:23] (step=0063971) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.516337311680688, LR: 0.0003 +[2026-03-05 05:20:30] (step=0063972) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.516532968108002, LR: 0.0003 +[2026-03-05 05:20:38] (step=0063973) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.516728624535316, LR: 0.0003 +[2026-03-05 05:20:46] (step=0063974) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 12.51692428096263, LR: 0.0003 +[2026-03-05 05:20:54] (step=0063975) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.517119937389943, LR: 0.0003 +[2026-03-05 05:21:02] (step=0063976) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.517315593817257, LR: 0.0003 +[2026-03-05 05:21:10] (step=0063977) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.51751125024457, LR: 0.0003 +[2026-03-05 05:21:17] (step=0063978) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 12.517706906671885, LR: 0.0003 +[2026-03-05 05:21:25] (step=0063979) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.517902563099199, LR: 0.0003 +[2026-03-05 05:21:33] (step=0063980) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.518098219526511, LR: 0.0003 +[2026-03-05 05:21:41] (step=0063981) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.518293875953825, LR: 0.0003 +[2026-03-05 05:21:49] (step=0063982) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 12.518489532381139, LR: 0.0003 +[2026-03-05 05:21:57] (step=0063983) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.518685188808453, LR: 0.0003 +[2026-03-05 05:22:05] (step=0063984) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.518880845235765, LR: 0.0003 +[2026-03-05 05:22:12] (step=0063985) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 12.51907650166308, LR: 0.0003 +[2026-03-05 05:22:20] (step=0063986) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.519272158090393, LR: 0.0003 +[2026-03-05 05:22:28] (step=0063987) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.519467814517707, LR: 0.0003 +[2026-03-05 05:22:36] (step=0063988) Train Loss: 0.4418, Train Steps/Sec: 0.12, Epoch: 12.519663470945021, LR: 0.0003 +[2026-03-05 05:22:44] (step=0063989) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.519859127372333, LR: 0.0003 +[2026-03-05 05:22:52] (step=0063990) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.520054783799647, LR: 0.0003 +[2026-03-05 05:23:00] (step=0063991) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.520250440226961, LR: 0.0003 +[2026-03-05 05:23:08] (step=0063992) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 12.520446096654275, LR: 0.0003 +[2026-03-05 05:23:15] (step=0063993) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.52064175308159, LR: 0.0003 +[2026-03-05 05:23:23] (step=0063994) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 12.520837409508902, LR: 0.0003 +[2026-03-05 05:23:31] (step=0063995) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.521033065936216, LR: 0.0003 +[2026-03-05 05:23:39] (step=0063996) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.52122872236353, LR: 0.0003 +[2026-03-05 05:23:47] (step=0063997) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.521424378790844, LR: 0.0003 +[2026-03-05 05:23:55] (step=0063998) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.521620035218158, LR: 0.0003 +[2026-03-05 05:24:03] (step=0063999) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 12.52181569164547, LR: 0.0003 +[2026-03-05 05:24:11] (step=0064000) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.522011348072784, LR: 0.0003 +[2026-03-05 05:24:11] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0064000/ +[2026-03-05 05:24:18] (step=0064001) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.522207004500098, LR: 0.0003 +[2026-03-05 05:24:26] (step=0064002) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.522402660927412, LR: 0.0003 +[2026-03-05 05:24:34] (step=0064003) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.522598317354726, LR: 0.0003 +[2026-03-05 05:24:42] (step=0064004) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.522793973782038, LR: 0.0003 +[2026-03-05 05:24:50] (step=0064005) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.522989630209352, LR: 0.0003 +[2026-03-05 05:24:58] (step=0064006) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.523185286636666, LR: 0.0003 +[2026-03-05 05:25:06] (step=0064007) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.52338094306398, LR: 0.0003 +[2026-03-05 05:25:13] (step=0064008) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.523576599491292, LR: 0.0003 +[2026-03-05 05:25:21] (step=0064009) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 12.523772255918606, LR: 0.0003 +[2026-03-05 05:25:29] (step=0064010) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.52396791234592, LR: 0.0003 +[2026-03-05 05:25:37] (step=0064011) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.524163568773234, LR: 0.0003 +[2026-03-05 05:25:45] (step=0064012) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.524359225200548, LR: 0.0003 +[2026-03-05 05:25:53] (step=0064013) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.52455488162786, LR: 0.0003 +[2026-03-05 05:26:01] (step=0064014) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.524750538055175, LR: 0.0003 +[2026-03-05 05:26:08] (step=0064015) Train Loss: 0.4254, Train Steps/Sec: 0.13, Epoch: 12.524946194482489, LR: 0.0003 +[2026-03-05 05:26:16] (step=0064016) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.525141850909803, LR: 0.0003 +[2026-03-05 05:26:24] (step=0064017) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 12.525337507337117, LR: 0.0003 +[2026-03-05 05:26:32] (step=0064018) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 12.525533163764429, LR: 0.0003 +[2026-03-05 05:26:40] (step=0064019) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.525728820191743, LR: 0.0003 +[2026-03-05 05:26:48] (step=0064020) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.525924476619057, LR: 0.0003 +[2026-03-05 05:26:56] (step=0064021) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.526120133046371, LR: 0.0003 +[2026-03-05 05:27:03] (step=0064022) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 12.526315789473685, LR: 0.0003 +[2026-03-05 05:27:11] (step=0064023) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 12.526511445900997, LR: 0.0003 +[2026-03-05 05:27:19] (step=0064024) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.526707102328311, LR: 0.0003 +[2026-03-05 05:27:27] (step=0064025) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.526902758755625, LR: 0.0003 +[2026-03-05 05:27:35] (step=0064026) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.52709841518294, LR: 0.0003 +[2026-03-05 05:27:43] (step=0064027) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.527294071610253, LR: 0.0003 +[2026-03-05 05:27:51] (step=0064028) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.527489728037565, LR: 0.0003 +[2026-03-05 05:27:58] (step=0064029) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.52768538446488, LR: 0.0003 +[2026-03-05 05:28:06] (step=0064030) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.527881040892193, LR: 0.0003 +[2026-03-05 05:28:14] (step=0064031) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 12.528076697319507, LR: 0.0003 +[2026-03-05 05:28:22] (step=0064032) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.528272353746821, LR: 0.0003 +[2026-03-05 05:28:30] (step=0064033) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.528468010174134, LR: 0.0003 +[2026-03-05 05:28:38] (step=0064034) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.528663666601448, LR: 0.0003 +[2026-03-05 05:28:45] (step=0064035) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.528859323028762, LR: 0.0003 +[2026-03-05 05:28:53] (step=0064036) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.529054979456076, LR: 0.0003 +[2026-03-05 05:29:01] (step=0064037) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.529250635883388, LR: 0.0003 +[2026-03-05 05:29:09] (step=0064038) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 12.529446292310702, LR: 0.0003 +[2026-03-05 05:29:17] (step=0064039) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.529641948738016, LR: 0.0003 +[2026-03-05 05:29:25] (step=0064040) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.52983760516533, LR: 0.0003 +[2026-03-05 05:29:33] (step=0064041) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.530033261592644, LR: 0.0003 +[2026-03-05 05:29:41] (step=0064042) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.530228918019956, LR: 0.0003 +[2026-03-05 05:29:48] (step=0064043) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 12.53042457444727, LR: 0.0003 +[2026-03-05 05:29:56] (step=0064044) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.530620230874584, LR: 0.0003 +[2026-03-05 05:30:04] (step=0064045) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.530815887301898, LR: 0.0003 +[2026-03-05 05:30:12] (step=0064046) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.531011543729212, LR: 0.0003 +[2026-03-05 05:30:20] (step=0064047) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.531207200156524, LR: 0.0003 +[2026-03-05 05:30:28] (step=0064048) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 12.531402856583838, LR: 0.0003 +[2026-03-05 05:30:36] (step=0064049) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 12.531598513011152, LR: 0.0003 +[2026-03-05 05:30:44] (step=0064050) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 12.531794169438466, LR: 0.0003 +[2026-03-05 05:30:51] (step=0064051) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.53198982586578, LR: 0.0003 +[2026-03-05 05:30:59] (step=0064052) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.532185482293093, LR: 0.0003 +[2026-03-05 05:31:07] (step=0064053) Train Loss: 0.4265, Train Steps/Sec: 0.13, Epoch: 12.532381138720407, LR: 0.0003 +[2026-03-05 05:31:15] (step=0064054) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.53257679514772, LR: 0.0003 +[2026-03-05 05:31:23] (step=0064055) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 12.532772451575035, LR: 0.0003 +[2026-03-05 05:31:31] (step=0064056) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.532968108002349, LR: 0.0003 +[2026-03-05 05:31:39] (step=0064057) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.533163764429661, LR: 0.0003 +[2026-03-05 05:31:46] (step=0064058) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 12.533359420856975, LR: 0.0003 +[2026-03-05 05:31:54] (step=0064059) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.533555077284289, LR: 0.0003 +[2026-03-05 05:32:02] (step=0064060) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.533750733711603, LR: 0.0003 +[2026-03-05 05:32:10] (step=0064061) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.533946390138915, LR: 0.0003 +[2026-03-05 05:32:18] (step=0064062) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.53414204656623, LR: 0.0003 +[2026-03-05 05:32:26] (step=0064063) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 12.534337702993543, LR: 0.0003 +[2026-03-05 05:32:34] (step=0064064) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.534533359420857, LR: 0.0003 +[2026-03-05 05:32:41] (step=0064065) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.534729015848171, LR: 0.0003 +[2026-03-05 05:32:49] (step=0064066) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.534924672275483, LR: 0.0003 +[2026-03-05 05:32:57] (step=0064067) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 12.535120328702797, LR: 0.0003 +[2026-03-05 05:33:05] (step=0064068) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 12.535315985130111, LR: 0.0003 +[2026-03-05 05:33:13] (step=0064069) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.535511641557425, LR: 0.0003 +[2026-03-05 05:33:21] (step=0064070) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.53570729798474, LR: 0.0003 +[2026-03-05 05:33:28] (step=0064071) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.535902954412052, LR: 0.0003 +[2026-03-05 05:33:36] (step=0064072) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.536098610839366, LR: 0.0003 +[2026-03-05 05:33:44] (step=0064073) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.53629426726668, LR: 0.0003 +[2026-03-05 05:33:52] (step=0064074) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 12.536489923693994, LR: 0.0003 +[2026-03-05 05:34:00] (step=0064075) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.536685580121308, LR: 0.0003 +[2026-03-05 05:34:08] (step=0064076) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.53688123654862, LR: 0.0003 +[2026-03-05 05:34:15] (step=0064077) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.537076892975934, LR: 0.0003 +[2026-03-05 05:34:23] (step=0064078) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.537272549403248, LR: 0.0003 +[2026-03-05 05:34:31] (step=0064079) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 12.537468205830562, LR: 0.0003 +[2026-03-05 05:34:39] (step=0064080) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.537663862257876, LR: 0.0003 +[2026-03-05 05:34:47] (step=0064081) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.537859518685188, LR: 0.0003 +[2026-03-05 05:34:55] (step=0064082) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.538055175112502, LR: 0.0003 +[2026-03-05 05:35:02] (step=0064083) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.538250831539816, LR: 0.0003 +[2026-03-05 05:35:10] (step=0064084) Train Loss: 0.4643, Train Steps/Sec: 0.13, Epoch: 12.53844648796713, LR: 0.0003 +[2026-03-05 05:35:18] (step=0064085) Train Loss: 0.4335, Train Steps/Sec: 0.12, Epoch: 12.538642144394444, LR: 0.0003 +[2026-03-05 05:35:26] (step=0064086) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.538837800821756, LR: 0.0003 +[2026-03-05 05:35:34] (step=0064087) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.53903345724907, LR: 0.0003 +[2026-03-05 05:35:42] (step=0064088) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 12.539229113676384, LR: 0.0003 +[2026-03-05 05:35:50] (step=0064089) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.539424770103698, LR: 0.0003 +[2026-03-05 05:35:58] (step=0064090) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.53962042653101, LR: 0.0003 +[2026-03-05 05:36:05] (step=0064091) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 12.539816082958325, LR: 0.0003 +[2026-03-05 05:36:13] (step=0064092) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.540011739385639, LR: 0.0003 +[2026-03-05 05:36:21] (step=0064093) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 12.540207395812953, LR: 0.0003 +[2026-03-05 05:36:29] (step=0064094) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.540403052240267, LR: 0.0003 +[2026-03-05 05:36:37] (step=0064095) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.540598708667579, LR: 0.0003 +[2026-03-05 05:36:45] (step=0064096) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.540794365094893, LR: 0.0003 +[2026-03-05 05:36:53] (step=0064097) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.540990021522207, LR: 0.0003 +[2026-03-05 05:37:01] (step=0064098) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.541185677949521, LR: 0.0003 +[2026-03-05 05:37:08] (step=0064099) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.541381334376835, LR: 0.0003 +[2026-03-05 05:37:16] (step=0064100) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.541576990804147, LR: 0.0003 +[2026-03-05 05:37:24] (step=0064101) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.541772647231461, LR: 0.0003 +[2026-03-05 05:37:32] (step=0064102) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.541968303658775, LR: 0.0003 +[2026-03-05 05:37:40] (step=0064103) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 12.54216396008609, LR: 0.0003 +[2026-03-05 05:37:48] (step=0064104) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.542359616513403, LR: 0.0003 +[2026-03-05 05:37:56] (step=0064105) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.542555272940715, LR: 0.0003 +[2026-03-05 05:38:03] (step=0064106) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.54275092936803, LR: 0.0003 +[2026-03-05 05:38:11] (step=0064107) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.542946585795343, LR: 0.0003 +[2026-03-05 05:38:19] (step=0064108) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 12.543142242222658, LR: 0.0003 +[2026-03-05 05:38:27] (step=0064109) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.543337898649972, LR: 0.0003 +[2026-03-05 05:38:35] (step=0064110) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.543533555077284, LR: 0.0003 +[2026-03-05 05:38:43] (step=0064111) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.543729211504598, LR: 0.0003 +[2026-03-05 05:38:51] (step=0064112) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 12.543924867931912, LR: 0.0003 +[2026-03-05 05:38:58] (step=0064113) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 12.544120524359226, LR: 0.0003 +[2026-03-05 05:39:06] (step=0064114) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.544316180786538, LR: 0.0003 +[2026-03-05 05:39:14] (step=0064115) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.544511837213852, LR: 0.0003 +[2026-03-05 05:39:22] (step=0064116) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.544707493641166, LR: 0.0003 +[2026-03-05 05:39:30] (step=0064117) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 12.54490315006848, LR: 0.0003 +[2026-03-05 05:39:38] (step=0064118) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 12.545098806495794, LR: 0.0003 +[2026-03-05 05:39:45] (step=0064119) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.545294462923106, LR: 0.0003 +[2026-03-05 05:39:53] (step=0064120) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.54549011935042, LR: 0.0003 +[2026-03-05 05:40:01] (step=0064121) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 12.545685775777734, LR: 0.0003 +[2026-03-05 05:40:09] (step=0064122) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.545881432205048, LR: 0.0003 +[2026-03-05 05:40:17] (step=0064123) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.546077088632362, LR: 0.0003 +[2026-03-05 05:40:25] (step=0064124) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 12.546272745059674, LR: 0.0003 +[2026-03-05 05:40:33] (step=0064125) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.546468401486988, LR: 0.0003 +[2026-03-05 05:40:40] (step=0064126) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.546664057914303, LR: 0.0003 +[2026-03-05 05:40:48] (step=0064127) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.546859714341617, LR: 0.0003 +[2026-03-05 05:40:56] (step=0064128) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.54705537076893, LR: 0.0003 +[2026-03-05 05:41:04] (step=0064129) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.547251027196243, LR: 0.0003 +[2026-03-05 05:41:12] (step=0064130) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.547446683623557, LR: 0.0003 +[2026-03-05 05:41:20] (step=0064131) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.54764234005087, LR: 0.0003 +[2026-03-05 05:41:27] (step=0064132) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.547837996478185, LR: 0.0003 +[2026-03-05 05:41:35] (step=0064133) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.548033652905499, LR: 0.0003 +[2026-03-05 05:41:43] (step=0064134) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.548229309332811, LR: 0.0003 +[2026-03-05 05:41:51] (step=0064135) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 12.548424965760125, LR: 0.0003 +[2026-03-05 05:41:59] (step=0064136) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.548620622187439, LR: 0.0003 +[2026-03-05 05:42:07] (step=0064137) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.548816278614753, LR: 0.0003 +[2026-03-05 05:42:15] (step=0064138) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.549011935042065, LR: 0.0003 +[2026-03-05 05:42:22] (step=0064139) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.54920759146938, LR: 0.0003 +[2026-03-05 05:42:30] (step=0064140) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 12.549403247896693, LR: 0.0003 +[2026-03-05 05:42:38] (step=0064141) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.549598904324007, LR: 0.0003 +[2026-03-05 05:42:46] (step=0064142) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.549794560751321, LR: 0.0003 +[2026-03-05 05:42:54] (step=0064143) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.549990217178634, LR: 0.0003 +[2026-03-05 05:43:02] (step=0064144) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.550185873605948, LR: 0.0003 +[2026-03-05 05:43:10] (step=0064145) Train Loss: 0.4239, Train Steps/Sec: 0.13, Epoch: 12.550381530033262, LR: 0.0003 +[2026-03-05 05:43:17] (step=0064146) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 12.550577186460576, LR: 0.0003 +[2026-03-05 05:43:25] (step=0064147) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 12.55077284288789, LR: 0.0003 +[2026-03-05 05:43:33] (step=0064148) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 12.550968499315202, LR: 0.0003 +[2026-03-05 05:43:41] (step=0064149) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 12.551164155742516, LR: 0.0003 +[2026-03-05 05:43:49] (step=0064150) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.55135981216983, LR: 0.0003 +[2026-03-05 05:43:57] (step=0064151) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.551555468597144, LR: 0.0003 +[2026-03-05 05:44:05] (step=0064152) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.551751125024458, LR: 0.0003 +[2026-03-05 05:44:12] (step=0064153) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.55194678145177, LR: 0.0003 +[2026-03-05 05:44:20] (step=0064154) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 12.552142437879084, LR: 0.0003 +[2026-03-05 05:44:28] (step=0064155) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.552338094306398, LR: 0.0003 +[2026-03-05 05:44:36] (step=0064156) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 12.552533750733712, LR: 0.0003 +[2026-03-05 05:44:44] (step=0064157) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 12.552729407161026, LR: 0.0003 +[2026-03-05 05:44:52] (step=0064158) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.552925063588338, LR: 0.0003 +[2026-03-05 05:44:59] (step=0064159) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.553120720015652, LR: 0.0003 +[2026-03-05 05:45:07] (step=0064160) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.553316376442966, LR: 0.0003 +[2026-03-05 05:45:15] (step=0064161) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.55351203287028, LR: 0.0003 +[2026-03-05 05:45:23] (step=0064162) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.553707689297594, LR: 0.0003 +[2026-03-05 05:45:31] (step=0064163) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.553903345724907, LR: 0.0003 +[2026-03-05 05:45:39] (step=0064164) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.55409900215222, LR: 0.0003 +[2026-03-05 05:45:47] (step=0064165) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.554294658579535, LR: 0.0003 +[2026-03-05 05:45:54] (step=0064166) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 12.554490315006849, LR: 0.0003 +[2026-03-05 05:46:02] (step=0064167) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 12.55468597143416, LR: 0.0003 +[2026-03-05 05:46:10] (step=0064168) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.554881627861475, LR: 0.0003 +[2026-03-05 05:46:18] (step=0064169) Train Loss: 0.4273, Train Steps/Sec: 0.13, Epoch: 12.555077284288789, LR: 0.0003 +[2026-03-05 05:46:26] (step=0064170) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.555272940716103, LR: 0.0003 +[2026-03-05 05:46:34] (step=0064171) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.555468597143417, LR: 0.0003 +[2026-03-05 05:46:41] (step=0064172) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 12.555664253570729, LR: 0.0003 +[2026-03-05 05:46:49] (step=0064173) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.555859909998043, LR: 0.0003 +[2026-03-05 05:46:57] (step=0064174) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 12.556055566425357, LR: 0.0003 +[2026-03-05 05:47:05] (step=0064175) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.556251222852671, LR: 0.0003 +[2026-03-05 05:47:13] (step=0064176) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.556446879279985, LR: 0.0003 +[2026-03-05 05:47:21] (step=0064177) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 12.556642535707297, LR: 0.0003 +[2026-03-05 05:47:29] (step=0064178) Train Loss: 0.4440, Train Steps/Sec: 0.12, Epoch: 12.556838192134611, LR: 0.0003 +[2026-03-05 05:47:37] (step=0064179) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.557033848561925, LR: 0.0003 +[2026-03-05 05:47:44] (step=0064180) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.55722950498924, LR: 0.0003 +[2026-03-05 05:47:52] (step=0064181) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.557425161416553, LR: 0.0003 +[2026-03-05 05:48:00] (step=0064182) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.557620817843866, LR: 0.0003 +[2026-03-05 05:48:08] (step=0064183) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.55781647427118, LR: 0.0003 +[2026-03-05 05:48:16] (step=0064184) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.558012130698494, LR: 0.0003 +[2026-03-05 05:48:24] (step=0064185) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.558207787125808, LR: 0.0003 +[2026-03-05 05:48:32] (step=0064186) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.558403443553122, LR: 0.0003 +[2026-03-05 05:48:39] (step=0064187) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.558599099980434, LR: 0.0003 +[2026-03-05 05:48:47] (step=0064188) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.558794756407748, LR: 0.0003 +[2026-03-05 05:48:55] (step=0064189) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.558990412835062, LR: 0.0003 +[2026-03-05 05:49:03] (step=0064190) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 12.559186069262376, LR: 0.0003 +[2026-03-05 05:49:11] (step=0064191) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.559381725689688, LR: 0.0003 +[2026-03-05 05:49:19] (step=0064192) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.559577382117002, LR: 0.0003 +[2026-03-05 05:49:26] (step=0064193) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.559773038544316, LR: 0.0003 +[2026-03-05 05:49:34] (step=0064194) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.55996869497163, LR: 0.0003 +[2026-03-05 05:49:42] (step=0064195) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.560164351398944, LR: 0.0003 +[2026-03-05 05:49:50] (step=0064196) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 12.560360007826256, LR: 0.0003 +[2026-03-05 05:49:58] (step=0064197) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 12.56055566425357, LR: 0.0003 +[2026-03-05 05:50:06] (step=0064198) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.560751320680884, LR: 0.0003 +[2026-03-05 05:50:14] (step=0064199) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.560946977108198, LR: 0.0003 +[2026-03-05 05:50:22] (step=0064200) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.561142633535512, LR: 0.0003 +[2026-03-05 05:50:29] (step=0064201) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.561338289962825, LR: 0.0003 +[2026-03-05 05:50:37] (step=0064202) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 12.561533946390139, LR: 0.0003 +[2026-03-05 05:50:45] (step=0064203) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.561729602817453, LR: 0.0003 +[2026-03-05 05:50:53] (step=0064204) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 12.561925259244767, LR: 0.0003 +[2026-03-05 05:51:01] (step=0064205) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.56212091567208, LR: 0.0003 +[2026-03-05 05:51:09] (step=0064206) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 12.562316572099393, LR: 0.0003 +[2026-03-05 05:51:17] (step=0064207) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.562512228526707, LR: 0.0003 +[2026-03-05 05:51:24] (step=0064208) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.56270788495402, LR: 0.0003 +[2026-03-05 05:51:32] (step=0064209) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.562903541381335, LR: 0.0003 +[2026-03-05 05:51:40] (step=0064210) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.563099197808649, LR: 0.0003 +[2026-03-05 05:51:48] (step=0064211) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.563294854235961, LR: 0.0003 +[2026-03-05 05:51:56] (step=0064212) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.563490510663275, LR: 0.0003 +[2026-03-05 05:52:04] (step=0064213) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.563686167090589, LR: 0.0003 +[2026-03-05 05:52:11] (step=0064214) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.563881823517903, LR: 0.0003 +[2026-03-05 05:52:19] (step=0064215) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.564077479945217, LR: 0.0003 +[2026-03-05 05:52:27] (step=0064216) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.56427313637253, LR: 0.0003 +[2026-03-05 05:52:35] (step=0064217) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.564468792799843, LR: 0.0003 +[2026-03-05 05:52:43] (step=0064218) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 12.564664449227157, LR: 0.0003 +[2026-03-05 05:52:51] (step=0064219) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 12.564860105654471, LR: 0.0003 +[2026-03-05 05:52:58] (step=0064220) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.565055762081784, LR: 0.0003 +[2026-03-05 05:53:06] (step=0064221) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.565251418509098, LR: 0.0003 +[2026-03-05 05:53:14] (step=0064222) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.565447074936412, LR: 0.0003 +[2026-03-05 05:53:22] (step=0064223) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.565642731363726, LR: 0.0003 +[2026-03-05 05:53:30] (step=0064224) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.56583838779104, LR: 0.0003 +[2026-03-05 05:53:38] (step=0064225) Train Loss: 0.4476, Train Steps/Sec: 0.12, Epoch: 12.566034044218352, LR: 0.0003 +[2026-03-05 05:53:46] (step=0064226) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.566229700645666, LR: 0.0003 +[2026-03-05 05:53:54] (step=0064227) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.56642535707298, LR: 0.0003 +[2026-03-05 05:54:01] (step=0064228) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.566621013500294, LR: 0.0003 +[2026-03-05 05:54:09] (step=0064229) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 12.566816669927608, LR: 0.0003 +[2026-03-05 05:54:17] (step=0064230) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.56701232635492, LR: 0.0003 +[2026-03-05 05:54:25] (step=0064231) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.567207982782234, LR: 0.0003 +[2026-03-05 05:54:33] (step=0064232) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.567403639209548, LR: 0.0003 +[2026-03-05 05:54:41] (step=0064233) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 12.567599295636862, LR: 0.0003 +[2026-03-05 05:54:49] (step=0064234) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.567794952064176, LR: 0.0003 +[2026-03-05 05:54:56] (step=0064235) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.567990608491488, LR: 0.0003 +[2026-03-05 05:55:04] (step=0064236) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.568186264918802, LR: 0.0003 +[2026-03-05 05:55:12] (step=0064237) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.568381921346116, LR: 0.0003 +[2026-03-05 05:55:20] (step=0064238) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.56857757777343, LR: 0.0003 +[2026-03-05 05:55:28] (step=0064239) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 12.568773234200744, LR: 0.0003 +[2026-03-05 05:55:36] (step=0064240) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.568968890628057, LR: 0.0003 +[2026-03-05 05:55:44] (step=0064241) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.56916454705537, LR: 0.0003 +[2026-03-05 05:55:51] (step=0064242) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.569360203482685, LR: 0.0003 +[2026-03-05 05:55:59] (step=0064243) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.569555859909999, LR: 0.0003 +[2026-03-05 05:56:07] (step=0064244) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.56975151633731, LR: 0.0003 +[2026-03-05 05:56:15] (step=0064245) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 12.569947172764625, LR: 0.0003 +[2026-03-05 05:56:23] (step=0064246) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.570142829191939, LR: 0.0003 +[2026-03-05 05:56:31] (step=0064247) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.570338485619253, LR: 0.0003 +[2026-03-05 05:56:39] (step=0064248) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.570534142046567, LR: 0.0003 +[2026-03-05 05:56:46] (step=0064249) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.570729798473879, LR: 0.0003 +[2026-03-05 05:56:54] (step=0064250) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 12.570925454901193, LR: 0.0003 +[2026-03-05 05:57:02] (step=0064251) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.571121111328507, LR: 0.0003 +[2026-03-05 05:57:10] (step=0064252) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 12.571316767755821, LR: 0.0003 +[2026-03-05 05:57:18] (step=0064253) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 12.571512424183135, LR: 0.0003 +[2026-03-05 05:57:26] (step=0064254) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.571708080610447, LR: 0.0003 +[2026-03-05 05:57:33] (step=0064255) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.571903737037761, LR: 0.0003 +[2026-03-05 05:57:41] (step=0064256) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 12.572099393465075, LR: 0.0003 +[2026-03-05 05:57:49] (step=0064257) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.57229504989239, LR: 0.0003 +[2026-03-05 05:57:57] (step=0064258) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.572490706319703, LR: 0.0003 +[2026-03-05 05:58:05] (step=0064259) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.572686362747016, LR: 0.0003 +[2026-03-05 05:58:13] (step=0064260) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.57288201917433, LR: 0.0003 +[2026-03-05 05:58:20] (step=0064261) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.573077675601644, LR: 0.0003 +[2026-03-05 05:58:28] (step=0064262) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 12.573273332028958, LR: 0.0003 +[2026-03-05 05:58:36] (step=0064263) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.573468988456272, LR: 0.0003 +[2026-03-05 05:58:44] (step=0064264) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.573664644883584, LR: 0.0003 +[2026-03-05 05:58:52] (step=0064265) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.573860301310898, LR: 0.0003 +[2026-03-05 05:59:00] (step=0064266) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.574055957738212, LR: 0.0003 +[2026-03-05 05:59:07] (step=0064267) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 12.574251614165526, LR: 0.0003 +[2026-03-05 05:59:15] (step=0064268) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.57444727059284, LR: 0.0003 +[2026-03-05 05:59:23] (step=0064269) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.574642927020152, LR: 0.0003 +[2026-03-05 05:59:31] (step=0064270) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.574838583447466, LR: 0.0003 +[2026-03-05 05:59:39] (step=0064271) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 12.57503423987478, LR: 0.0003 +[2026-03-05 05:59:47] (step=0064272) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.575229896302094, LR: 0.0003 +[2026-03-05 05:59:55] (step=0064273) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.575425552729406, LR: 0.0003 +[2026-03-05 06:00:03] (step=0064274) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 12.57562120915672, LR: 0.0003 +[2026-03-05 06:00:10] (step=0064275) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.575816865584034, LR: 0.0003 +[2026-03-05 06:00:18] (step=0064276) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.576012522011348, LR: 0.0003 +[2026-03-05 06:00:26] (step=0064277) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.576208178438662, LR: 0.0003 +[2026-03-05 06:00:34] (step=0064278) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.576403834865975, LR: 0.0003 +[2026-03-05 06:00:42] (step=0064279) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.576599491293289, LR: 0.0003 +[2026-03-05 06:00:50] (step=0064280) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.576795147720603, LR: 0.0003 +[2026-03-05 06:00:57] (step=0064281) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.576990804147917, LR: 0.0003 +[2026-03-05 06:01:05] (step=0064282) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.57718646057523, LR: 0.0003 +[2026-03-05 06:01:13] (step=0064283) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.577382117002543, LR: 0.0003 +[2026-03-05 06:01:21] (step=0064284) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.577577773429857, LR: 0.0003 +[2026-03-05 06:01:29] (step=0064285) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.57777342985717, LR: 0.0003 +[2026-03-05 06:01:37] (step=0064286) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 12.577969086284485, LR: 0.0003 +[2026-03-05 06:01:45] (step=0064287) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.578164742711799, LR: 0.0003 +[2026-03-05 06:01:52] (step=0064288) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.578360399139111, LR: 0.0003 +[2026-03-05 06:02:00] (step=0064289) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.578556055566425, LR: 0.0003 +[2026-03-05 06:02:08] (step=0064290) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.578751711993739, LR: 0.0003 +[2026-03-05 06:02:16] (step=0064291) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.578947368421053, LR: 0.0003 +[2026-03-05 06:02:24] (step=0064292) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.579143024848367, LR: 0.0003 +[2026-03-05 06:02:32] (step=0064293) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.57933868127568, LR: 0.0003 +[2026-03-05 06:02:40] (step=0064294) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.579534337702993, LR: 0.0003 +[2026-03-05 06:02:48] (step=0064295) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.579729994130307, LR: 0.0003 +[2026-03-05 06:02:55] (step=0064296) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.579925650557621, LR: 0.0003 +[2026-03-05 06:03:03] (step=0064297) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 12.580121306984934, LR: 0.0003 +[2026-03-05 06:03:11] (step=0064298) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.580316963412248, LR: 0.0003 +[2026-03-05 06:03:19] (step=0064299) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.580512619839562, LR: 0.0003 +[2026-03-05 06:03:27] (step=0064300) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.580708276266876, LR: 0.0003 +[2026-03-05 06:03:35] (step=0064301) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.58090393269419, LR: 0.0003 +[2026-03-05 06:03:42] (step=0064302) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.581099589121502, LR: 0.0003 +[2026-03-05 06:03:50] (step=0064303) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.581295245548816, LR: 0.0003 +[2026-03-05 06:03:58] (step=0064304) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 12.58149090197613, LR: 0.0003 +[2026-03-05 06:04:06] (step=0064305) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.581686558403444, LR: 0.0003 +[2026-03-05 06:04:14] (step=0064306) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 12.581882214830758, LR: 0.0003 +[2026-03-05 06:04:22] (step=0064307) Train Loss: 0.4215, Train Steps/Sec: 0.13, Epoch: 12.58207787125807, LR: 0.0003 +[2026-03-05 06:04:30] (step=0064308) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.582273527685384, LR: 0.0003 +[2026-03-05 06:04:37] (step=0064309) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.582469184112698, LR: 0.0003 +[2026-03-05 06:04:45] (step=0064310) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.582664840540012, LR: 0.0003 +[2026-03-05 06:04:53] (step=0064311) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.582860496967326, LR: 0.0003 +[2026-03-05 06:05:01] (step=0064312) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.583056153394638, LR: 0.0003 +[2026-03-05 06:05:09] (step=0064313) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 12.583251809821952, LR: 0.0003 +[2026-03-05 06:05:17] (step=0064314) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.583447466249266, LR: 0.0003 +[2026-03-05 06:05:24] (step=0064315) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 12.58364312267658, LR: 0.0003 +[2026-03-05 06:05:32] (step=0064316) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.583838779103894, LR: 0.0003 +[2026-03-05 06:05:40] (step=0064317) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.584034435531207, LR: 0.0003 +[2026-03-05 06:05:48] (step=0064318) Train Loss: 0.4412, Train Steps/Sec: 0.12, Epoch: 12.58423009195852, LR: 0.0003 +[2026-03-05 06:05:56] (step=0064319) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 12.584425748385835, LR: 0.0003 +[2026-03-05 06:06:04] (step=0064320) Train Loss: 0.4270, Train Steps/Sec: 0.13, Epoch: 12.584621404813149, LR: 0.0003 +[2026-03-05 06:06:12] (step=0064321) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.584817061240463, LR: 0.0003 +[2026-03-05 06:06:20] (step=0064322) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 12.585012717667775, LR: 0.0003 +[2026-03-05 06:06:27] (step=0064323) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 12.585208374095089, LR: 0.0003 +[2026-03-05 06:06:35] (step=0064324) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 12.585404030522403, LR: 0.0003 +[2026-03-05 06:06:43] (step=0064325) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 12.585599686949717, LR: 0.0003 +[2026-03-05 06:06:51] (step=0064326) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.585795343377029, LR: 0.0003 +[2026-03-05 06:06:59] (step=0064327) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.585990999804343, LR: 0.0003 +[2026-03-05 06:07:07] (step=0064328) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.586186656231657, LR: 0.0003 +[2026-03-05 06:07:14] (step=0064329) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.586382312658971, LR: 0.0003 +[2026-03-05 06:07:22] (step=0064330) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 12.586577969086285, LR: 0.0003 +[2026-03-05 06:07:30] (step=0064331) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.586773625513597, LR: 0.0003 +[2026-03-05 06:07:38] (step=0064332) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.586969281940911, LR: 0.0003 +[2026-03-05 06:07:46] (step=0064333) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.587164938368225, LR: 0.0003 +[2026-03-05 06:07:54] (step=0064334) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.58736059479554, LR: 0.0003 +[2026-03-05 06:08:02] (step=0064335) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.587556251222853, LR: 0.0003 +[2026-03-05 06:08:09] (step=0064336) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.587751907650166, LR: 0.0003 +[2026-03-05 06:08:17] (step=0064337) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.58794756407748, LR: 0.0003 +[2026-03-05 06:08:25] (step=0064338) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 12.588143220504794, LR: 0.0003 +[2026-03-05 06:08:33] (step=0064339) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.588338876932108, LR: 0.0003 +[2026-03-05 06:08:41] (step=0064340) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 12.588534533359422, LR: 0.0003 +[2026-03-05 06:08:49] (step=0064341) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 12.588730189786734, LR: 0.0003 +[2026-03-05 06:08:57] (step=0064342) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 12.588925846214048, LR: 0.0003 +[2026-03-05 06:09:04] (step=0064343) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 12.589121502641362, LR: 0.0003 +[2026-03-05 06:09:12] (step=0064344) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.589317159068676, LR: 0.0003 +[2026-03-05 06:09:20] (step=0064345) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.58951281549599, LR: 0.0003 +[2026-03-05 06:09:28] (step=0064346) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.589708471923302, LR: 0.0003 +[2026-03-05 06:09:36] (step=0064347) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.589904128350616, LR: 0.0003 +[2026-03-05 06:09:44] (step=0064348) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 12.59009978477793, LR: 0.0003 +[2026-03-05 06:09:52] (step=0064349) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.590295441205244, LR: 0.0003 +[2026-03-05 06:09:59] (step=0064350) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.590491097632556, LR: 0.0003 +[2026-03-05 06:10:07] (step=0064351) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.59068675405987, LR: 0.0003 +[2026-03-05 06:10:15] (step=0064352) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 12.590882410487184, LR: 0.0003 +[2026-03-05 06:10:23] (step=0064353) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.591078066914498, LR: 0.0003 +[2026-03-05 06:10:31] (step=0064354) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 12.591273723341812, LR: 0.0003 +[2026-03-05 06:10:39] (step=0064355) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.591469379769125, LR: 0.0003 +[2026-03-05 06:10:46] (step=0064356) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.591665036196439, LR: 0.0003 +[2026-03-05 06:10:54] (step=0064357) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.591860692623753, LR: 0.0003 +[2026-03-05 06:11:02] (step=0064358) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 12.592056349051067, LR: 0.0003 +[2026-03-05 06:11:10] (step=0064359) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.59225200547838, LR: 0.0003 +[2026-03-05 06:11:18] (step=0064360) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.592447661905693, LR: 0.0003 +[2026-03-05 06:11:26] (step=0064361) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.592643318333007, LR: 0.0003 +[2026-03-05 06:11:34] (step=0064362) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.59283897476032, LR: 0.0003 +[2026-03-05 06:11:41] (step=0064363) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.593034631187635, LR: 0.0003 +[2026-03-05 06:11:49] (step=0064364) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 12.593230287614949, LR: 0.0003 +[2026-03-05 06:11:57] (step=0064365) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.593425944042261, LR: 0.0003 +[2026-03-05 06:12:05] (step=0064366) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 12.593621600469575, LR: 0.0003 +[2026-03-05 06:12:13] (step=0064367) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.59381725689689, LR: 0.0003 +[2026-03-05 06:12:21] (step=0064368) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 12.594012913324203, LR: 0.0003 +[2026-03-05 06:12:29] (step=0064369) Train Loss: 0.4464, Train Steps/Sec: 0.12, Epoch: 12.594208569751517, LR: 0.0003 +[2026-03-05 06:12:37] (step=0064370) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.59440422617883, LR: 0.0003 +[2026-03-05 06:12:44] (step=0064371) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 12.594599882606143, LR: 0.0003 +[2026-03-05 06:12:52] (step=0064372) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.594795539033457, LR: 0.0003 +[2026-03-05 06:13:00] (step=0064373) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 12.594991195460771, LR: 0.0003 +[2026-03-05 06:13:08] (step=0064374) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.595186851888085, LR: 0.0003 +[2026-03-05 06:13:16] (step=0064375) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.595382508315398, LR: 0.0003 +[2026-03-05 06:13:24] (step=0064376) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 12.595578164742712, LR: 0.0003 +[2026-03-05 06:13:31] (step=0064377) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.595773821170026, LR: 0.0003 +[2026-03-05 06:13:39] (step=0064378) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.59596947759734, LR: 0.0003 +[2026-03-05 06:13:47] (step=0064379) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.596165134024652, LR: 0.0003 +[2026-03-05 06:13:55] (step=0064380) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.596360790451966, LR: 0.0003 +[2026-03-05 06:14:03] (step=0064381) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 12.59655644687928, LR: 0.0003 +[2026-03-05 06:14:11] (step=0064382) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 12.596752103306594, LR: 0.0003 +[2026-03-05 06:14:19] (step=0064383) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.596947759733908, LR: 0.0003 +[2026-03-05 06:14:26] (step=0064384) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.59714341616122, LR: 0.0003 +[2026-03-05 06:14:34] (step=0064385) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.597339072588534, LR: 0.0003 +[2026-03-05 06:14:42] (step=0064386) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.597534729015848, LR: 0.0003 +[2026-03-05 06:14:50] (step=0064387) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 12.597730385443162, LR: 0.0003 +[2026-03-05 06:14:58] (step=0064388) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.597926041870476, LR: 0.0003 +[2026-03-05 06:15:06] (step=0064389) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.598121698297788, LR: 0.0003 +[2026-03-05 06:15:13] (step=0064390) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.598317354725102, LR: 0.0003 +[2026-03-05 06:15:21] (step=0064391) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.598513011152416, LR: 0.0003 +[2026-03-05 06:15:29] (step=0064392) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.59870866757973, LR: 0.0003 +[2026-03-05 06:15:37] (step=0064393) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.598904324007044, LR: 0.0003 +[2026-03-05 06:15:45] (step=0064394) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.599099980434357, LR: 0.0003 +[2026-03-05 06:15:53] (step=0064395) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.59929563686167, LR: 0.0003 +[2026-03-05 06:16:01] (step=0064396) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.599491293288985, LR: 0.0003 +[2026-03-05 06:16:09] (step=0064397) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 12.599686949716299, LR: 0.0003 +[2026-03-05 06:16:16] (step=0064398) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.599882606143613, LR: 0.0003 +[2026-03-05 06:16:24] (step=0064399) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.600078262570925, LR: 0.0003 +[2026-03-05 06:16:32] (step=0064400) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.600273918998239, LR: 0.0003 +[2026-03-05 06:16:40] (step=0064401) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 12.600469575425553, LR: 0.0003 +[2026-03-05 06:16:48] (step=0064402) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.600665231852867, LR: 0.0003 +[2026-03-05 06:16:56] (step=0064403) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.60086088828018, LR: 0.0003 +[2026-03-05 06:17:03] (step=0064404) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.601056544707493, LR: 0.0003 +[2026-03-05 06:17:11] (step=0064405) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.601252201134807, LR: 0.0003 +[2026-03-05 06:17:19] (step=0064406) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.601447857562121, LR: 0.0003 +[2026-03-05 06:17:27] (step=0064407) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.601643513989435, LR: 0.0003 +[2026-03-05 06:17:35] (step=0064408) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.601839170416747, LR: 0.0003 +[2026-03-05 06:17:43] (step=0064409) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.602034826844061, LR: 0.0003 +[2026-03-05 06:17:50] (step=0064410) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.602230483271375, LR: 0.0003 +[2026-03-05 06:17:58] (step=0064411) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.60242613969869, LR: 0.0003 +[2026-03-05 06:18:06] (step=0064412) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 12.602621796126003, LR: 0.0003 +[2026-03-05 06:18:14] (step=0064413) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.602817452553316, LR: 0.0003 +[2026-03-05 06:18:22] (step=0064414) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.60301310898063, LR: 0.0003 +[2026-03-05 06:18:30] (step=0064415) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.603208765407944, LR: 0.0003 +[2026-03-05 06:18:38] (step=0064416) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.603404421835258, LR: 0.0003 +[2026-03-05 06:18:45] (step=0064417) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.603600078262572, LR: 0.0003 +[2026-03-05 06:18:53] (step=0064418) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.603795734689884, LR: 0.0003 +[2026-03-05 06:19:01] (step=0064419) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.603991391117198, LR: 0.0003 +[2026-03-05 06:19:09] (step=0064420) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.604187047544512, LR: 0.0003 +[2026-03-05 06:19:17] (step=0064421) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.604382703971826, LR: 0.0003 +[2026-03-05 06:19:25] (step=0064422) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 12.60457836039914, LR: 0.0003 +[2026-03-05 06:19:33] (step=0064423) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.604774016826452, LR: 0.0003 +[2026-03-05 06:19:40] (step=0064424) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 12.604969673253766, LR: 0.0003 +[2026-03-05 06:19:48] (step=0064425) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.60516532968108, LR: 0.0003 +[2026-03-05 06:19:56] (step=0064426) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.605360986108394, LR: 0.0003 +[2026-03-05 06:20:04] (step=0064427) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 12.605556642535708, LR: 0.0003 +[2026-03-05 06:20:12] (step=0064428) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.60575229896302, LR: 0.0003 +[2026-03-05 06:20:20] (step=0064429) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.605947955390334, LR: 0.0003 +[2026-03-05 06:20:27] (step=0064430) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.606143611817648, LR: 0.0003 +[2026-03-05 06:20:35] (step=0064431) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.606339268244962, LR: 0.0003 +[2026-03-05 06:20:43] (step=0064432) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 12.606534924672275, LR: 0.0003 +[2026-03-05 06:20:51] (step=0064433) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.606730581099589, LR: 0.0003 +[2026-03-05 06:20:59] (step=0064434) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.606926237526903, LR: 0.0003 +[2026-03-05 06:21:07] (step=0064435) Train Loss: 0.4606, Train Steps/Sec: 0.13, Epoch: 12.607121893954217, LR: 0.0003 +[2026-03-05 06:21:15] (step=0064436) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.60731755038153, LR: 0.0003 +[2026-03-05 06:21:23] (step=0064437) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 12.607513206808843, LR: 0.0003 +[2026-03-05 06:21:30] (step=0064438) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.607708863236157, LR: 0.0003 +[2026-03-05 06:21:38] (step=0064439) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.607904519663471, LR: 0.0003 +[2026-03-05 06:21:46] (step=0064440) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.608100176090785, LR: 0.0003 +[2026-03-05 06:21:54] (step=0064441) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.608295832518099, LR: 0.0003 +[2026-03-05 06:22:02] (step=0064442) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.608491488945411, LR: 0.0003 +[2026-03-05 06:22:10] (step=0064443) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.608687145372725, LR: 0.0003 +[2026-03-05 06:22:17] (step=0064444) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.60888280180004, LR: 0.0003 +[2026-03-05 06:22:25] (step=0064445) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.609078458227353, LR: 0.0003 +[2026-03-05 06:22:33] (step=0064446) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.609274114654667, LR: 0.0003 +[2026-03-05 06:22:41] (step=0064447) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 12.60946977108198, LR: 0.0003 +[2026-03-05 06:22:49] (step=0064448) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.609665427509293, LR: 0.0003 +[2026-03-05 06:22:57] (step=0064449) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 12.609861083936607, LR: 0.0003 +[2026-03-05 06:23:05] (step=0064450) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 12.610056740363921, LR: 0.0003 +[2026-03-05 06:23:12] (step=0064451) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.610252396791235, LR: 0.0003 +[2026-03-05 06:23:20] (step=0064452) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.610448053218548, LR: 0.0003 +[2026-03-05 06:23:28] (step=0064453) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.610643709645862, LR: 0.0003 +[2026-03-05 06:23:36] (step=0064454) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.610839366073176, LR: 0.0003 +[2026-03-05 06:23:44] (step=0064455) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.61103502250049, LR: 0.0003 +[2026-03-05 06:23:52] (step=0064456) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.611230678927802, LR: 0.0003 +[2026-03-05 06:23:59] (step=0064457) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.611426335355116, LR: 0.0003 +[2026-03-05 06:24:07] (step=0064458) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.61162199178243, LR: 0.0003 +[2026-03-05 06:24:15] (step=0064459) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 12.611817648209744, LR: 0.0003 +[2026-03-05 06:24:23] (step=0064460) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.612013304637058, LR: 0.0003 +[2026-03-05 06:24:31] (step=0064461) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.61220896106437, LR: 0.0003 +[2026-03-05 06:24:39] (step=0064462) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.612404617491684, LR: 0.0003 +[2026-03-05 06:24:46] (step=0064463) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.612600273918998, LR: 0.0003 +[2026-03-05 06:24:54] (step=0064464) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.612795930346312, LR: 0.0003 +[2026-03-05 06:25:02] (step=0064465) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 12.612991586773626, LR: 0.0003 +[2026-03-05 06:25:10] (step=0064466) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 12.613187243200938, LR: 0.0003 +[2026-03-05 06:25:18] (step=0064467) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 12.613382899628252, LR: 0.0003 +[2026-03-05 06:25:26] (step=0064468) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 12.613578556055566, LR: 0.0003 +[2026-03-05 06:25:34] (step=0064469) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.61377421248288, LR: 0.0003 +[2026-03-05 06:25:42] (step=0064470) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.613969868910194, LR: 0.0003 +[2026-03-05 06:25:49] (step=0064471) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.614165525337507, LR: 0.0003 +[2026-03-05 06:25:57] (step=0064472) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 12.61436118176482, LR: 0.0003 +[2026-03-05 06:26:05] (step=0064473) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.614556838192135, LR: 0.0003 +[2026-03-05 06:26:13] (step=0064474) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 12.614752494619449, LR: 0.0003 +[2026-03-05 06:26:21] (step=0064475) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.614948151046763, LR: 0.0003 +[2026-03-05 06:26:29] (step=0064476) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.615143807474075, LR: 0.0003 +[2026-03-05 06:26:37] (step=0064477) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.615339463901389, LR: 0.0003 +[2026-03-05 06:26:44] (step=0064478) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.615535120328703, LR: 0.0003 +[2026-03-05 06:26:52] (step=0064479) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.615730776756017, LR: 0.0003 +[2026-03-05 06:27:00] (step=0064480) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.615926433183331, LR: 0.0003 +[2026-03-05 06:27:08] (step=0064481) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.616122089610643, LR: 0.0003 +[2026-03-05 06:27:16] (step=0064482) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.616317746037957, LR: 0.0003 +[2026-03-05 06:27:24] (step=0064483) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.616513402465271, LR: 0.0003 +[2026-03-05 06:27:31] (step=0064484) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.616709058892585, LR: 0.0003 +[2026-03-05 06:27:39] (step=0064485) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 12.616904715319897, LR: 0.0003 +[2026-03-05 06:27:47] (step=0064486) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.617100371747211, LR: 0.0003 +[2026-03-05 06:27:55] (step=0064487) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.617296028174525, LR: 0.0003 +[2026-03-05 06:28:03] (step=0064488) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 12.61749168460184, LR: 0.0003 +[2026-03-05 06:28:11] (step=0064489) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.617687341029153, LR: 0.0003 +[2026-03-05 06:28:19] (step=0064490) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 12.617882997456466, LR: 0.0003 +[2026-03-05 06:28:26] (step=0064491) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.61807865388378, LR: 0.0003 +[2026-03-05 06:28:34] (step=0064492) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 12.618274310311094, LR: 0.0003 +[2026-03-05 06:28:42] (step=0064493) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.618469966738408, LR: 0.0003 +[2026-03-05 06:28:50] (step=0064494) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.618665623165722, LR: 0.0003 +[2026-03-05 06:28:58] (step=0064495) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 12.618861279593034, LR: 0.0003 +[2026-03-05 06:29:06] (step=0064496) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.619056936020348, LR: 0.0003 +[2026-03-05 06:29:14] (step=0064497) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.619252592447662, LR: 0.0003 +[2026-03-05 06:29:21] (step=0064498) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.619448248874976, LR: 0.0003 +[2026-03-05 06:29:29] (step=0064499) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 12.61964390530229, LR: 0.0003 +[2026-03-05 06:29:37] (step=0064500) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.619839561729602, LR: 0.0003 +[2026-03-05 06:29:37] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0064500/ +[2026-03-05 06:29:45] (step=0064501) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.620035218156916, LR: 0.0003 +[2026-03-05 06:29:53] (step=0064502) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.62023087458423, LR: 0.0003 +[2026-03-05 06:30:01] (step=0064503) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.620426531011544, LR: 0.0003 +[2026-03-05 06:30:09] (step=0064504) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 12.620622187438858, LR: 0.0003 +[2026-03-05 06:30:16] (step=0064505) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 12.62081784386617, LR: 0.0003 +[2026-03-05 06:30:24] (step=0064506) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.621013500293484, LR: 0.0003 +[2026-03-05 06:30:32] (step=0064507) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.621209156720798, LR: 0.0003 +[2026-03-05 06:30:40] (step=0064508) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 12.621404813148112, LR: 0.0003 +[2026-03-05 06:30:48] (step=0064509) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.621600469575425, LR: 0.0003 +[2026-03-05 06:30:56] (step=0064510) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.621796126002739, LR: 0.0003 +[2026-03-05 06:31:03] (step=0064511) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.621991782430053, LR: 0.0003 +[2026-03-05 06:31:11] (step=0064512) Train Loss: 0.4444, Train Steps/Sec: 0.12, Epoch: 12.622187438857367, LR: 0.0003 +[2026-03-05 06:31:19] (step=0064513) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 12.62238309528468, LR: 0.0003 +[2026-03-05 06:31:27] (step=0064514) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 12.622578751711993, LR: 0.0003 +[2026-03-05 06:31:35] (step=0064515) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.622774408139307, LR: 0.0003 +[2026-03-05 06:31:43] (step=0064516) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 12.622970064566621, LR: 0.0003 +[2026-03-05 06:31:51] (step=0064517) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.623165720993935, LR: 0.0003 +[2026-03-05 06:31:59] (step=0064518) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 12.623361377421249, LR: 0.0003 +[2026-03-05 06:32:06] (step=0064519) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.623557033848561, LR: 0.0003 +[2026-03-05 06:32:14] (step=0064520) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.623752690275875, LR: 0.0003 +[2026-03-05 06:32:22] (step=0064521) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.62394834670319, LR: 0.0003 +[2026-03-05 06:32:30] (step=0064522) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.624144003130503, LR: 0.0003 +[2026-03-05 06:32:38] (step=0064523) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 12.624339659557817, LR: 0.0003 +[2026-03-05 06:32:46] (step=0064524) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 12.62453531598513, LR: 0.0003 +[2026-03-05 06:32:54] (step=0064525) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.624730972412443, LR: 0.0003 +[2026-03-05 06:33:01] (step=0064526) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.624926628839757, LR: 0.0003 +[2026-03-05 06:33:09] (step=0064527) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.625122285267071, LR: 0.0003 +[2026-03-05 06:33:17] (step=0064528) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 12.625317941694385, LR: 0.0003 +[2026-03-05 06:33:25] (step=0064529) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 12.625513598121698, LR: 0.0003 +[2026-03-05 06:33:33] (step=0064530) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.625709254549012, LR: 0.0003 +[2026-03-05 06:33:41] (step=0064531) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.625904910976326, LR: 0.0003 +[2026-03-05 06:33:48] (step=0064532) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.62610056740364, LR: 0.0003 +[2026-03-05 06:33:56] (step=0064533) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 12.626296223830952, LR: 0.0003 +[2026-03-05 06:34:04] (step=0064534) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.626491880258266, LR: 0.0003 +[2026-03-05 06:34:12] (step=0064535) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 12.62668753668558, LR: 0.0003 +[2026-03-05 06:34:20] (step=0064536) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.626883193112894, LR: 0.0003 +[2026-03-05 06:34:28] (step=0064537) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 12.627078849540208, LR: 0.0003 +[2026-03-05 06:34:36] (step=0064538) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.62727450596752, LR: 0.0003 +[2026-03-05 06:34:44] (step=0064539) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 12.627470162394834, LR: 0.0003 +[2026-03-05 06:34:51] (step=0064540) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 12.627665818822148, LR: 0.0003 +[2026-03-05 06:34:59] (step=0064541) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.627861475249462, LR: 0.0003 +[2026-03-05 06:35:07] (step=0064542) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.628057131676776, LR: 0.0003 +[2026-03-05 06:35:15] (step=0064543) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.628252788104088, LR: 0.0003 +[2026-03-05 06:35:23] (step=0064544) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.628448444531402, LR: 0.0003 +[2026-03-05 06:35:31] (step=0064545) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.628644100958716, LR: 0.0003 +[2026-03-05 06:35:38] (step=0064546) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 12.62883975738603, LR: 0.0003 +[2026-03-05 06:35:46] (step=0064547) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.629035413813344, LR: 0.0003 +[2026-03-05 06:35:54] (step=0064548) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.629231070240657, LR: 0.0003 +[2026-03-05 06:36:02] (step=0064549) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.62942672666797, LR: 0.0003 +[2026-03-05 06:36:10] (step=0064550) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.629622383095285, LR: 0.0003 +[2026-03-05 06:36:18] (step=0064551) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 12.629818039522599, LR: 0.0003 +[2026-03-05 06:36:26] (step=0064552) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 12.630013695949913, LR: 0.0003 +[2026-03-05 06:36:33] (step=0064553) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 12.630209352377225, LR: 0.0003 +[2026-03-05 06:36:41] (step=0064554) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.630405008804539, LR: 0.0003 +[2026-03-05 06:36:49] (step=0064555) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.630600665231853, LR: 0.0003 +[2026-03-05 06:36:57] (step=0064556) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.630796321659167, LR: 0.0003 +[2026-03-05 06:37:05] (step=0064557) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.630991978086481, LR: 0.0003 +[2026-03-05 06:37:13] (step=0064558) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.631187634513793, LR: 0.0003 +[2026-03-05 06:37:21] (step=0064559) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 12.631383290941107, LR: 0.0003 +[2026-03-05 06:37:28] (step=0064560) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 12.631578947368421, LR: 0.0003 +[2026-03-05 06:37:36] (step=0064561) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.631774603795735, LR: 0.0003 +[2026-03-05 06:37:44] (step=0064562) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.631970260223047, LR: 0.0003 +[2026-03-05 06:37:52] (step=0064563) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.632165916650361, LR: 0.0003 +[2026-03-05 06:38:00] (step=0064564) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.632361573077675, LR: 0.0003 +[2026-03-05 06:38:08] (step=0064565) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 12.63255722950499, LR: 0.0003 +[2026-03-05 06:38:16] (step=0064566) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.632752885932303, LR: 0.0003 +[2026-03-05 06:38:23] (step=0064567) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.632948542359616, LR: 0.0003 +[2026-03-05 06:38:31] (step=0064568) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 12.63314419878693, LR: 0.0003 +[2026-03-05 06:38:39] (step=0064569) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 12.633339855214244, LR: 0.0003 +[2026-03-05 06:38:47] (step=0064570) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.633535511641558, LR: 0.0003 +[2026-03-05 06:38:55] (step=0064571) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.633731168068872, LR: 0.0003 +[2026-03-05 06:39:03] (step=0064572) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 12.633926824496184, LR: 0.0003 +[2026-03-05 06:39:11] (step=0064573) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.634122480923498, LR: 0.0003 +[2026-03-05 06:39:18] (step=0064574) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.634318137350812, LR: 0.0003 +[2026-03-05 06:39:26] (step=0064575) Train Loss: 0.4276, Train Steps/Sec: 0.13, Epoch: 12.634513793778126, LR: 0.0003 +[2026-03-05 06:39:34] (step=0064576) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.63470945020544, LR: 0.0003 +[2026-03-05 06:39:42] (step=0064577) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.634905106632752, LR: 0.0003 +[2026-03-05 06:39:50] (step=0064578) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 12.635100763060066, LR: 0.0003 +[2026-03-05 06:39:58] (step=0064579) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 12.63529641948738, LR: 0.0003 +[2026-03-05 06:40:05] (step=0064580) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 12.635492075914694, LR: 0.0003 +[2026-03-05 06:40:13] (step=0064581) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.635687732342008, LR: 0.0003 +[2026-03-05 06:40:21] (step=0064582) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 12.63588338876932, LR: 0.0003 +[2026-03-05 06:40:29] (step=0064583) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.636079045196634, LR: 0.0003 +[2026-03-05 06:40:37] (step=0064584) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.636274701623949, LR: 0.0003 +[2026-03-05 06:40:45] (step=0064585) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 12.636470358051263, LR: 0.0003 +[2026-03-05 06:40:53] (step=0064586) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.636666014478575, LR: 0.0003 +[2026-03-05 06:41:01] (step=0064587) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 12.636861670905889, LR: 0.0003 +[2026-03-05 06:41:08] (step=0064588) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.637057327333203, LR: 0.0003 +[2026-03-05 06:41:16] (step=0064589) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 12.637252983760517, LR: 0.0003 +[2026-03-05 06:41:24] (step=0064590) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.63744864018783, LR: 0.0003 +[2026-03-05 06:41:32] (step=0064591) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.637644296615143, LR: 0.0003 +[2026-03-05 06:41:40] (step=0064592) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.637839953042457, LR: 0.0003 +[2026-03-05 06:41:48] (step=0064593) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.638035609469771, LR: 0.0003 +[2026-03-05 06:41:55] (step=0064594) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.638231265897085, LR: 0.0003 +[2026-03-05 06:42:03] (step=0064595) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.638426922324399, LR: 0.0003 +[2026-03-05 06:42:11] (step=0064596) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.638622578751711, LR: 0.0003 +[2026-03-05 06:42:19] (step=0064597) Train Loss: 0.4269, Train Steps/Sec: 0.13, Epoch: 12.638818235179025, LR: 0.0003 +[2026-03-05 06:42:27] (step=0064598) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.63901389160634, LR: 0.0003 +[2026-03-05 06:42:35] (step=0064599) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.639209548033653, LR: 0.0003 +[2026-03-05 06:42:43] (step=0064600) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 12.639405204460967, LR: 0.0003 +[2026-03-05 06:42:50] (step=0064601) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.63960086088828, LR: 0.0003 +[2026-03-05 06:42:58] (step=0064602) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.639796517315594, LR: 0.0003 +[2026-03-05 06:43:06] (step=0064603) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.639992173742908, LR: 0.0003 +[2026-03-05 06:43:14] (step=0064604) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.640187830170222, LR: 0.0003 +[2026-03-05 06:43:22] (step=0064605) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.640383486597536, LR: 0.0003 +[2026-03-05 06:43:30] (step=0064606) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.640579143024848, LR: 0.0003 +[2026-03-05 06:43:37] (step=0064607) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.640774799452162, LR: 0.0003 +[2026-03-05 06:43:45] (step=0064608) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 12.640970455879476, LR: 0.0003 +[2026-03-05 06:43:53] (step=0064609) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.64116611230679, LR: 0.0003 +[2026-03-05 06:44:01] (step=0064610) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.641361768734104, LR: 0.0003 +[2026-03-05 06:44:09] (step=0064611) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 12.641557425161416, LR: 0.0003 +[2026-03-05 06:44:17] (step=0064612) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 12.64175308158873, LR: 0.0003 +[2026-03-05 06:44:25] (step=0064613) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.641948738016044, LR: 0.0003 +[2026-03-05 06:44:33] (step=0064614) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.642144394443358, LR: 0.0003 +[2026-03-05 06:44:40] (step=0064615) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 12.64234005087067, LR: 0.0003 +[2026-03-05 06:44:48] (step=0064616) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.642535707297984, LR: 0.0003 +[2026-03-05 06:44:56] (step=0064617) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.642731363725298, LR: 0.0003 +[2026-03-05 06:45:04] (step=0064618) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 12.642927020152612, LR: 0.0003 +[2026-03-05 06:45:12] (step=0064619) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.643122676579926, LR: 0.0003 +[2026-03-05 06:45:20] (step=0064620) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.643318333007239, LR: 0.0003 +[2026-03-05 06:45:27] (step=0064621) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 12.643513989434553, LR: 0.0003 +[2026-03-05 06:45:35] (step=0064622) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.643709645861867, LR: 0.0003 +[2026-03-05 06:45:43] (step=0064623) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.64390530228918, LR: 0.0003 +[2026-03-05 06:45:51] (step=0064624) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.644100958716495, LR: 0.0003 +[2026-03-05 06:45:59] (step=0064625) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.644296615143807, LR: 0.0003 +[2026-03-05 06:46:07] (step=0064626) Train Loss: 0.4246, Train Steps/Sec: 0.13, Epoch: 12.64449227157112, LR: 0.0003 +[2026-03-05 06:46:14] (step=0064627) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 12.644687927998435, LR: 0.0003 +[2026-03-05 06:46:22] (step=0064628) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.644883584425749, LR: 0.0003 +[2026-03-05 06:46:30] (step=0064629) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.645079240853063, LR: 0.0003 +[2026-03-05 06:46:38] (step=0064630) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.645274897280375, LR: 0.0003 +[2026-03-05 06:46:46] (step=0064631) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 12.645470553707689, LR: 0.0003 +[2026-03-05 06:46:54] (step=0064632) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.645666210135003, LR: 0.0003 +[2026-03-05 06:47:02] (step=0064633) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.645861866562317, LR: 0.0003 +[2026-03-05 06:47:09] (step=0064634) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.646057522989631, LR: 0.0003 +[2026-03-05 06:47:17] (step=0064635) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.646253179416943, LR: 0.0003 +[2026-03-05 06:47:25] (step=0064636) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.646448835844257, LR: 0.0003 +[2026-03-05 06:47:33] (step=0064637) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 12.646644492271571, LR: 0.0003 +[2026-03-05 06:47:41] (step=0064638) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.646840148698885, LR: 0.0003 +[2026-03-05 06:47:49] (step=0064639) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.647035805126198, LR: 0.0003 +[2026-03-05 06:47:56] (step=0064640) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.647231461553512, LR: 0.0003 +[2026-03-05 06:48:04] (step=0064641) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.647427117980826, LR: 0.0003 +[2026-03-05 06:48:12] (step=0064642) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.64762277440814, LR: 0.0003 +[2026-03-05 06:48:20] (step=0064643) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.647818430835454, LR: 0.0003 +[2026-03-05 06:48:28] (step=0064644) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 12.648014087262766, LR: 0.0003 +[2026-03-05 06:48:36] (step=0064645) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 12.64820974369008, LR: 0.0003 +[2026-03-05 06:48:44] (step=0064646) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.648405400117394, LR: 0.0003 +[2026-03-05 06:48:51] (step=0064647) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.648601056544708, LR: 0.0003 +[2026-03-05 06:48:59] (step=0064648) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 12.648796712972022, LR: 0.0003 +[2026-03-05 06:49:07] (step=0064649) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 12.648992369399334, LR: 0.0003 +[2026-03-05 06:49:15] (step=0064650) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.649188025826648, LR: 0.0003 +[2026-03-05 06:49:23] (step=0064651) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.649383682253962, LR: 0.0003 +[2026-03-05 06:49:31] (step=0064652) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.649579338681276, LR: 0.0003 +[2026-03-05 06:49:39] (step=0064653) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.64977499510859, LR: 0.0003 +[2026-03-05 06:49:46] (step=0064654) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.649970651535902, LR: 0.0003 +[2026-03-05 06:49:54] (step=0064655) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.650166307963216, LR: 0.0003 +[2026-03-05 06:50:02] (step=0064656) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.65036196439053, LR: 0.0003 +[2026-03-05 06:50:10] (step=0064657) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 12.650557620817844, LR: 0.0003 +[2026-03-05 06:50:18] (step=0064658) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.650753277245158, LR: 0.0003 +[2026-03-05 06:50:26] (step=0064659) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.65094893367247, LR: 0.0003 +[2026-03-05 06:50:34] (step=0064660) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.651144590099785, LR: 0.0003 +[2026-03-05 06:50:41] (step=0064661) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 12.651340246527099, LR: 0.0003 +[2026-03-05 06:50:49] (step=0064662) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.651535902954413, LR: 0.0003 +[2026-03-05 06:50:57] (step=0064663) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.651731559381727, LR: 0.0003 +[2026-03-05 06:51:05] (step=0064664) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.651927215809039, LR: 0.0003 +[2026-03-05 06:51:13] (step=0064665) Train Loss: 0.4261, Train Steps/Sec: 0.13, Epoch: 12.652122872236353, LR: 0.0003 +[2026-03-05 06:51:21] (step=0064666) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.652318528663667, LR: 0.0003 +[2026-03-05 06:51:29] (step=0064667) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.65251418509098, LR: 0.0003 +[2026-03-05 06:51:36] (step=0064668) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.652709841518293, LR: 0.0003 +[2026-03-05 06:51:44] (step=0064669) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.652905497945607, LR: 0.0003 +[2026-03-05 06:51:52] (step=0064670) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.653101154372921, LR: 0.0003 +[2026-03-05 06:52:00] (step=0064671) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.653296810800235, LR: 0.0003 +[2026-03-05 06:52:08] (step=0064672) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.653492467227549, LR: 0.0003 +[2026-03-05 06:52:16] (step=0064673) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.653688123654861, LR: 0.0003 +[2026-03-05 06:52:23] (step=0064674) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.653883780082175, LR: 0.0003 +[2026-03-05 06:52:31] (step=0064675) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.65407943650949, LR: 0.0003 +[2026-03-05 06:52:39] (step=0064676) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.654275092936803, LR: 0.0003 +[2026-03-05 06:52:47] (step=0064677) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.654470749364117, LR: 0.0003 +[2026-03-05 06:52:55] (step=0064678) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.65466640579143, LR: 0.0003 +[2026-03-05 06:53:03] (step=0064679) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.654862062218744, LR: 0.0003 +[2026-03-05 06:53:11] (step=0064680) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.655057718646058, LR: 0.0003 +[2026-03-05 06:53:18] (step=0064681) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.655253375073372, LR: 0.0003 +[2026-03-05 06:53:26] (step=0064682) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.655449031500686, LR: 0.0003 +[2026-03-05 06:53:34] (step=0064683) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.655644687927998, LR: 0.0003 +[2026-03-05 06:53:42] (step=0064684) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.655840344355312, LR: 0.0003 +[2026-03-05 06:53:50] (step=0064685) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.656036000782626, LR: 0.0003 +[2026-03-05 06:53:58] (step=0064686) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.65623165720994, LR: 0.0003 +[2026-03-05 06:54:06] (step=0064687) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.656427313637254, LR: 0.0003 +[2026-03-05 06:54:13] (step=0064688) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 12.656622970064566, LR: 0.0003 +[2026-03-05 06:54:21] (step=0064689) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.65681862649188, LR: 0.0003 +[2026-03-05 06:54:29] (step=0064690) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 12.657014282919194, LR: 0.0003 +[2026-03-05 06:54:37] (step=0064691) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.657209939346508, LR: 0.0003 +[2026-03-05 06:54:45] (step=0064692) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.65740559577382, LR: 0.0003 +[2026-03-05 06:54:53] (step=0064693) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.657601252201134, LR: 0.0003 +[2026-03-05 06:55:01] (step=0064694) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.657796908628448, LR: 0.0003 +[2026-03-05 06:55:08] (step=0064695) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.657992565055762, LR: 0.0003 +[2026-03-05 06:55:16] (step=0064696) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.658188221483076, LR: 0.0003 +[2026-03-05 06:55:24] (step=0064697) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.658383877910389, LR: 0.0003 +[2026-03-05 06:55:32] (step=0064698) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.658579534337703, LR: 0.0003 +[2026-03-05 06:55:40] (step=0064699) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 12.658775190765017, LR: 0.0003 +[2026-03-05 06:55:48] (step=0064700) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.65897084719233, LR: 0.0003 +[2026-03-05 06:55:55] (step=0064701) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.659166503619645, LR: 0.0003 +[2026-03-05 06:56:03] (step=0064702) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.659362160046957, LR: 0.0003 +[2026-03-05 06:56:11] (step=0064703) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.65955781647427, LR: 0.0003 +[2026-03-05 06:56:19] (step=0064704) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 12.659753472901585, LR: 0.0003 +[2026-03-05 06:56:27] (step=0064705) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.659949129328899, LR: 0.0003 +[2026-03-05 06:56:35] (step=0064706) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.660144785756213, LR: 0.0003 +[2026-03-05 06:56:43] (step=0064707) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.660340442183525, LR: 0.0003 +[2026-03-05 06:56:51] (step=0064708) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.660536098610839, LR: 0.0003 +[2026-03-05 06:56:58] (step=0064709) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.660731755038153, LR: 0.0003 +[2026-03-05 06:57:06] (step=0064710) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.660927411465467, LR: 0.0003 +[2026-03-05 06:57:14] (step=0064711) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.661123067892781, LR: 0.0003 +[2026-03-05 06:57:22] (step=0064712) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.661318724320093, LR: 0.0003 +[2026-03-05 06:57:30] (step=0064713) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.661514380747407, LR: 0.0003 +[2026-03-05 06:57:38] (step=0064714) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.661710037174721, LR: 0.0003 +[2026-03-05 06:57:45] (step=0064715) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.661905693602035, LR: 0.0003 +[2026-03-05 06:57:53] (step=0064716) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.66210135002935, LR: 0.0003 +[2026-03-05 06:58:01] (step=0064717) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.662297006456662, LR: 0.0003 +[2026-03-05 06:58:09] (step=0064718) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.662492662883976, LR: 0.0003 +[2026-03-05 06:58:17] (step=0064719) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 12.66268831931129, LR: 0.0003 +[2026-03-05 06:58:25] (step=0064720) Train Loss: 0.4228, Train Steps/Sec: 0.13, Epoch: 12.662883975738604, LR: 0.0003 +[2026-03-05 06:58:33] (step=0064721) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.663079632165916, LR: 0.0003 +[2026-03-05 06:58:40] (step=0064722) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.66327528859323, LR: 0.0003 +[2026-03-05 06:58:48] (step=0064723) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.663470945020544, LR: 0.0003 +[2026-03-05 06:58:56] (step=0064724) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.663666601447858, LR: 0.0003 +[2026-03-05 06:59:04] (step=0064725) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.663862257875172, LR: 0.0003 +[2026-03-05 06:59:12] (step=0064726) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.664057914302484, LR: 0.0003 +[2026-03-05 06:59:20] (step=0064727) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.664253570729798, LR: 0.0003 +[2026-03-05 06:59:27] (step=0064728) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.664449227157112, LR: 0.0003 +[2026-03-05 06:59:35] (step=0064729) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.664644883584426, LR: 0.0003 +[2026-03-05 06:59:43] (step=0064730) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.66484054001174, LR: 0.0003 +[2026-03-05 06:59:51] (step=0064731) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 12.665036196439052, LR: 0.0003 +[2026-03-05 06:59:59] (step=0064732) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 12.665231852866366, LR: 0.0003 +[2026-03-05 07:00:07] (step=0064733) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.66542750929368, LR: 0.0003 +[2026-03-05 07:00:15] (step=0064734) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.665623165720994, LR: 0.0003 +[2026-03-05 07:00:23] (step=0064735) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.665818822148308, LR: 0.0003 +[2026-03-05 07:00:30] (step=0064736) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.66601447857562, LR: 0.0003 +[2026-03-05 07:00:38] (step=0064737) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 12.666210135002935, LR: 0.0003 +[2026-03-05 07:00:46] (step=0064738) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 12.666405791430249, LR: 0.0003 +[2026-03-05 07:00:54] (step=0064739) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.666601447857563, LR: 0.0003 +[2026-03-05 07:01:02] (step=0064740) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.666797104284877, LR: 0.0003 +[2026-03-05 07:01:10] (step=0064741) Train Loss: 0.4257, Train Steps/Sec: 0.13, Epoch: 12.666992760712189, LR: 0.0003 +[2026-03-05 07:01:17] (step=0064742) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.667188417139503, LR: 0.0003 +[2026-03-05 07:01:25] (step=0064743) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.667384073566817, LR: 0.0003 +[2026-03-05 07:01:33] (step=0064744) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.66757972999413, LR: 0.0003 +[2026-03-05 07:01:41] (step=0064745) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.667775386421443, LR: 0.0003 +[2026-03-05 07:01:49] (step=0064746) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.667971042848757, LR: 0.0003 +[2026-03-05 07:01:57] (step=0064747) Train Loss: 0.4249, Train Steps/Sec: 0.13, Epoch: 12.668166699276071, LR: 0.0003 +[2026-03-05 07:02:05] (step=0064748) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.668362355703385, LR: 0.0003 +[2026-03-05 07:02:12] (step=0064749) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 12.668558012130699, LR: 0.0003 +[2026-03-05 07:02:20] (step=0064750) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.668753668558011, LR: 0.0003 +[2026-03-05 07:02:28] (step=0064751) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.668949324985325, LR: 0.0003 +[2026-03-05 07:02:36] (step=0064752) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.66914498141264, LR: 0.0003 +[2026-03-05 07:02:44] (step=0064753) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 12.669340637839953, LR: 0.0003 +[2026-03-05 07:02:52] (step=0064754) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.669536294267267, LR: 0.0003 +[2026-03-05 07:02:59] (step=0064755) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.66973195069458, LR: 0.0003 +[2026-03-05 07:03:07] (step=0064756) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.669927607121894, LR: 0.0003 +[2026-03-05 07:03:15] (step=0064757) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.670123263549208, LR: 0.0003 +[2026-03-05 07:03:23] (step=0064758) Train Loss: 0.4473, Train Steps/Sec: 0.12, Epoch: 12.670318919976522, LR: 0.0003 +[2026-03-05 07:03:31] (step=0064759) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.670514576403836, LR: 0.0003 +[2026-03-05 07:03:39] (step=0064760) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.670710232831148, LR: 0.0003 +[2026-03-05 07:03:47] (step=0064761) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 12.670905889258462, LR: 0.0003 +[2026-03-05 07:03:55] (step=0064762) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.671101545685776, LR: 0.0003 +[2026-03-05 07:04:03] (step=0064763) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.67129720211309, LR: 0.0003 +[2026-03-05 07:04:10] (step=0064764) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.671492858540404, LR: 0.0003 +[2026-03-05 07:04:18] (step=0064765) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.671688514967716, LR: 0.0003 +[2026-03-05 07:04:26] (step=0064766) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 12.67188417139503, LR: 0.0003 +[2026-03-05 07:04:34] (step=0064767) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 12.672079827822344, LR: 0.0003 +[2026-03-05 07:04:42] (step=0064768) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 12.672275484249658, LR: 0.0003 +[2026-03-05 07:04:50] (step=0064769) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.672471140676972, LR: 0.0003 +[2026-03-05 07:04:58] (step=0064770) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.672666797104284, LR: 0.0003 +[2026-03-05 07:05:05] (step=0064771) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.672862453531598, LR: 0.0003 +[2026-03-05 07:05:13] (step=0064772) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.673058109958912, LR: 0.0003 +[2026-03-05 07:05:21] (step=0064773) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 12.673253766386226, LR: 0.0003 +[2026-03-05 07:05:29] (step=0064774) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.673449422813539, LR: 0.0003 +[2026-03-05 07:05:37] (step=0064775) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 12.673645079240853, LR: 0.0003 +[2026-03-05 07:05:45] (step=0064776) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 12.673840735668167, LR: 0.0003 +[2026-03-05 07:05:52] (step=0064777) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 12.67403639209548, LR: 0.0003 +[2026-03-05 07:06:00] (step=0064778) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.674232048522795, LR: 0.0003 +[2026-03-05 07:06:08] (step=0064779) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.674427704950107, LR: 0.0003 +[2026-03-05 07:06:16] (step=0064780) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.67462336137742, LR: 0.0003 +[2026-03-05 07:06:24] (step=0064781) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 12.674819017804735, LR: 0.0003 +[2026-03-05 07:06:32] (step=0064782) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.675014674232049, LR: 0.0003 +[2026-03-05 07:06:39] (step=0064783) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 12.675210330659363, LR: 0.0003 +[2026-03-05 07:06:47] (step=0064784) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 12.675405987086675, LR: 0.0003 +[2026-03-05 07:06:55] (step=0064785) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 12.675601643513989, LR: 0.0003 +[2026-03-05 07:07:03] (step=0064786) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.675797299941303, LR: 0.0003 +[2026-03-05 07:07:11] (step=0064787) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.675992956368617, LR: 0.0003 +[2026-03-05 07:07:19] (step=0064788) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.676188612795931, LR: 0.0003 +[2026-03-05 07:07:27] (step=0064789) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.676384269223243, LR: 0.0003 +[2026-03-05 07:07:35] (step=0064790) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 12.676579925650557, LR: 0.0003 +[2026-03-05 07:07:42] (step=0064791) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.676775582077871, LR: 0.0003 +[2026-03-05 07:07:50] (step=0064792) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.676971238505185, LR: 0.0003 +[2026-03-05 07:07:58] (step=0064793) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.6771668949325, LR: 0.0003 +[2026-03-05 07:08:06] (step=0064794) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.677362551359812, LR: 0.0003 +[2026-03-05 07:08:14] (step=0064795) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.677558207787126, LR: 0.0003 +[2026-03-05 07:08:22] (step=0064796) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.67775386421444, LR: 0.0003 +[2026-03-05 07:08:30] (step=0064797) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.677949520641754, LR: 0.0003 +[2026-03-05 07:08:37] (step=0064798) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 12.678145177069066, LR: 0.0003 +[2026-03-05 07:08:45] (step=0064799) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.67834083349638, LR: 0.0003 +[2026-03-05 07:08:53] (step=0064800) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.678536489923694, LR: 0.0003 +[2026-03-05 07:09:01] (step=0064801) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.678732146351008, LR: 0.0003 +[2026-03-05 07:09:09] (step=0064802) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.678927802778322, LR: 0.0003 +[2026-03-05 07:09:17] (step=0064803) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.679123459205634, LR: 0.0003 +[2026-03-05 07:09:24] (step=0064804) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.679319115632948, LR: 0.0003 +[2026-03-05 07:09:32] (step=0064805) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.679514772060262, LR: 0.0003 +[2026-03-05 07:09:40] (step=0064806) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.679710428487576, LR: 0.0003 +[2026-03-05 07:09:48] (step=0064807) Train Loss: 0.4320, Train Steps/Sec: 0.12, Epoch: 12.67990608491489, LR: 0.0003 +[2026-03-05 07:09:56] (step=0064808) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 12.680101741342202, LR: 0.0003 +[2026-03-05 07:10:04] (step=0064809) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 12.680297397769516, LR: 0.0003 +[2026-03-05 07:10:12] (step=0064810) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.68049305419683, LR: 0.0003 +[2026-03-05 07:10:20] (step=0064811) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.680688710624144, LR: 0.0003 +[2026-03-05 07:10:27] (step=0064812) Train Loss: 0.4198, Train Steps/Sec: 0.13, Epoch: 12.680884367051458, LR: 0.0003 +[2026-03-05 07:10:35] (step=0064813) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 12.68108002347877, LR: 0.0003 +[2026-03-05 07:10:43] (step=0064814) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 12.681275679906085, LR: 0.0003 +[2026-03-05 07:10:51] (step=0064815) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.681471336333399, LR: 0.0003 +[2026-03-05 07:10:59] (step=0064816) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 12.681666992760713, LR: 0.0003 +[2026-03-05 07:11:07] (step=0064817) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.681862649188027, LR: 0.0003 +[2026-03-05 07:11:14] (step=0064818) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.682058305615339, LR: 0.0003 +[2026-03-05 07:11:22] (step=0064819) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.682253962042653, LR: 0.0003 +[2026-03-05 07:11:30] (step=0064820) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.682449618469967, LR: 0.0003 +[2026-03-05 07:11:38] (step=0064821) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.68264527489728, LR: 0.0003 +[2026-03-05 07:11:46] (step=0064822) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 12.682840931324595, LR: 0.0003 +[2026-03-05 07:11:54] (step=0064823) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.683036587751907, LR: 0.0003 +[2026-03-05 07:12:02] (step=0064824) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.683232244179221, LR: 0.0003 +[2026-03-05 07:12:09] (step=0064825) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.683427900606535, LR: 0.0003 +[2026-03-05 07:12:17] (step=0064826) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.68362355703385, LR: 0.0003 +[2026-03-05 07:12:25] (step=0064827) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.683819213461161, LR: 0.0003 +[2026-03-05 07:12:33] (step=0064828) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 12.684014869888475, LR: 0.0003 +[2026-03-05 07:12:41] (step=0064829) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.68421052631579, LR: 0.0003 +[2026-03-05 07:12:49] (step=0064830) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 12.684406182743103, LR: 0.0003 +[2026-03-05 07:12:56] (step=0064831) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 12.684601839170417, LR: 0.0003 +[2026-03-05 07:13:04] (step=0064832) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 12.68479749559773, LR: 0.0003 +[2026-03-05 07:13:12] (step=0064833) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.684993152025044, LR: 0.0003 +[2026-03-05 07:13:20] (step=0064834) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.685188808452358, LR: 0.0003 +[2026-03-05 07:13:28] (step=0064835) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.685384464879672, LR: 0.0003 +[2026-03-05 07:13:36] (step=0064836) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.685580121306986, LR: 0.0003 +[2026-03-05 07:13:44] (step=0064837) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.685775777734298, LR: 0.0003 +[2026-03-05 07:13:52] (step=0064838) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 12.685971434161612, LR: 0.0003 +[2026-03-05 07:13:59] (step=0064839) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.686167090588926, LR: 0.0003 +[2026-03-05 07:14:07] (step=0064840) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 12.68636274701624, LR: 0.0003 +[2026-03-05 07:14:15] (step=0064841) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.686558403443554, LR: 0.0003 +[2026-03-05 07:14:23] (step=0064842) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.686754059870866, LR: 0.0003 +[2026-03-05 07:14:31] (step=0064843) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.68694971629818, LR: 0.0003 +[2026-03-05 07:14:39] (step=0064844) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.687145372725494, LR: 0.0003 +[2026-03-05 07:14:47] (step=0064845) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.687341029152808, LR: 0.0003 +[2026-03-05 07:14:54] (step=0064846) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.687536685580122, LR: 0.0003 +[2026-03-05 07:15:02] (step=0064847) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 12.687732342007434, LR: 0.0003 +[2026-03-05 07:15:10] (step=0064848) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.687927998434748, LR: 0.0003 +[2026-03-05 07:15:18] (step=0064849) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.688123654862062, LR: 0.0003 +[2026-03-05 07:15:26] (step=0064850) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.688319311289376, LR: 0.0003 +[2026-03-05 07:15:34] (step=0064851) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.688514967716689, LR: 0.0003 +[2026-03-05 07:15:42] (step=0064852) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.688710624144003, LR: 0.0003 +[2026-03-05 07:15:49] (step=0064853) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.688906280571317, LR: 0.0003 +[2026-03-05 07:15:57] (step=0064854) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.68910193699863, LR: 0.0003 +[2026-03-05 07:16:05] (step=0064855) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.689297593425945, LR: 0.0003 +[2026-03-05 07:16:13] (step=0064856) Train Loss: 0.4348, Train Steps/Sec: 0.12, Epoch: 12.689493249853257, LR: 0.0003 +[2026-03-05 07:16:21] (step=0064857) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 12.689688906280571, LR: 0.0003 +[2026-03-05 07:16:29] (step=0064858) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.689884562707885, LR: 0.0003 +[2026-03-05 07:16:37] (step=0064859) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.690080219135199, LR: 0.0003 +[2026-03-05 07:16:44] (step=0064860) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.690275875562513, LR: 0.0003 +[2026-03-05 07:16:52] (step=0064861) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.690471531989825, LR: 0.0003 +[2026-03-05 07:17:00] (step=0064862) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 12.69066718841714, LR: 0.0003 +[2026-03-05 07:17:08] (step=0064863) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.690862844844453, LR: 0.0003 +[2026-03-05 07:17:16] (step=0064864) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.691058501271767, LR: 0.0003 +[2026-03-05 07:17:24] (step=0064865) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.691254157699081, LR: 0.0003 +[2026-03-05 07:17:32] (step=0064866) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.691449814126393, LR: 0.0003 +[2026-03-05 07:17:39] (step=0064867) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 12.691645470553707, LR: 0.0003 +[2026-03-05 07:17:47] (step=0064868) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 12.691841126981021, LR: 0.0003 +[2026-03-05 07:17:55] (step=0064869) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 12.692036783408335, LR: 0.0003 +[2026-03-05 07:18:03] (step=0064870) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.69223243983565, LR: 0.0003 +[2026-03-05 07:18:11] (step=0064871) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 12.692428096262962, LR: 0.0003 +[2026-03-05 07:18:19] (step=0064872) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.692623752690276, LR: 0.0003 +[2026-03-05 07:18:26] (step=0064873) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.69281940911759, LR: 0.0003 +[2026-03-05 07:18:34] (step=0064874) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.693015065544904, LR: 0.0003 +[2026-03-05 07:18:42] (step=0064875) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.693210721972218, LR: 0.0003 +[2026-03-05 07:18:50] (step=0064876) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.69340637839953, LR: 0.0003 +[2026-03-05 07:18:58] (step=0064877) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.693602034826844, LR: 0.0003 +[2026-03-05 07:19:06] (step=0064878) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.693797691254158, LR: 0.0003 +[2026-03-05 07:19:14] (step=0064879) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.693993347681472, LR: 0.0003 +[2026-03-05 07:19:21] (step=0064880) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.694189004108784, LR: 0.0003 +[2026-03-05 07:19:29] (step=0064881) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.694384660536098, LR: 0.0003 +[2026-03-05 07:19:37] (step=0064882) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.694580316963412, LR: 0.0003 +[2026-03-05 07:19:45] (step=0064883) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 12.694775973390726, LR: 0.0003 +[2026-03-05 07:19:53] (step=0064884) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.69497162981804, LR: 0.0003 +[2026-03-05 07:20:01] (step=0064885) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.695167286245352, LR: 0.0003 +[2026-03-05 07:20:09] (step=0064886) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.695362942672666, LR: 0.0003 +[2026-03-05 07:20:16] (step=0064887) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 12.69555859909998, LR: 0.0003 +[2026-03-05 07:20:24] (step=0064888) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.695754255527294, LR: 0.0003 +[2026-03-05 07:20:32] (step=0064889) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.695949911954608, LR: 0.0003 +[2026-03-05 07:20:40] (step=0064890) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 12.69614556838192, LR: 0.0003 +[2026-03-05 07:20:48] (step=0064891) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.696341224809235, LR: 0.0003 +[2026-03-05 07:20:56] (step=0064892) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.696536881236549, LR: 0.0003 +[2026-03-05 07:21:03] (step=0064893) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.696732537663863, LR: 0.0003 +[2026-03-05 07:21:11] (step=0064894) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.696928194091177, LR: 0.0003 +[2026-03-05 07:21:19] (step=0064895) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 12.697123850518489, LR: 0.0003 +[2026-03-05 07:21:27] (step=0064896) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.697319506945803, LR: 0.0003 +[2026-03-05 07:21:35] (step=0064897) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 12.697515163373117, LR: 0.0003 +[2026-03-05 07:21:43] (step=0064898) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.697710819800431, LR: 0.0003 +[2026-03-05 07:21:50] (step=0064899) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 12.697906476227745, LR: 0.0003 +[2026-03-05 07:21:58] (step=0064900) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.698102132655057, LR: 0.0003 +[2026-03-05 07:22:06] (step=0064901) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 12.698297789082371, LR: 0.0003 +[2026-03-05 07:22:14] (step=0064902) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 12.698493445509685, LR: 0.0003 +[2026-03-05 07:22:22] (step=0064903) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.698689101937, LR: 0.0003 +[2026-03-05 07:22:30] (step=0064904) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 12.698884758364311, LR: 0.0003 +[2026-03-05 07:22:38] (step=0064905) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.699080414791625, LR: 0.0003 +[2026-03-05 07:22:45] (step=0064906) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 12.69927607121894, LR: 0.0003 +[2026-03-05 07:22:53] (step=0064907) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.699471727646253, LR: 0.0003 +[2026-03-05 07:23:01] (step=0064908) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.699667384073567, LR: 0.0003 +[2026-03-05 07:23:09] (step=0064909) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.69986304050088, LR: 0.0003 +[2026-03-05 07:23:17] (step=0064910) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.700058696928194, LR: 0.0003 +[2026-03-05 07:23:25] (step=0064911) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.700254353355508, LR: 0.0003 +[2026-03-05 07:23:33] (step=0064912) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 12.700450009782822, LR: 0.0003 +[2026-03-05 07:23:40] (step=0064913) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.700645666210136, LR: 0.0003 +[2026-03-05 07:23:48] (step=0064914) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.700841322637448, LR: 0.0003 +[2026-03-05 07:23:56] (step=0064915) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.701036979064762, LR: 0.0003 +[2026-03-05 07:24:04] (step=0064916) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 12.701232635492076, LR: 0.0003 +[2026-03-05 07:24:12] (step=0064917) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.70142829191939, LR: 0.0003 +[2026-03-05 07:24:20] (step=0064918) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 12.701623948346704, LR: 0.0003 +[2026-03-05 07:24:27] (step=0064919) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 12.701819604774016, LR: 0.0003 +[2026-03-05 07:24:35] (step=0064920) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 12.70201526120133, LR: 0.0003 +[2026-03-05 07:24:43] (step=0064921) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.702210917628644, LR: 0.0003 +[2026-03-05 07:24:51] (step=0064922) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.702406574055958, LR: 0.0003 +[2026-03-05 07:24:59] (step=0064923) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 12.702602230483272, LR: 0.0003 +[2026-03-05 07:25:07] (step=0064924) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 12.702797886910584, LR: 0.0003 +[2026-03-05 07:25:15] (step=0064925) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 12.702993543337898, LR: 0.0003 +[2026-03-05 07:25:22] (step=0064926) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.703189199765212, LR: 0.0003 +[2026-03-05 07:25:30] (step=0064927) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 12.703384856192526, LR: 0.0003 +[2026-03-05 07:25:38] (step=0064928) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.70358051261984, LR: 0.0003 +[2026-03-05 07:25:46] (step=0064929) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.703776169047153, LR: 0.0003 +[2026-03-05 07:25:54] (step=0064930) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.703971825474467, LR: 0.0003 +[2026-03-05 07:26:02] (step=0064931) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.70416748190178, LR: 0.0003 +[2026-03-05 07:26:10] (step=0064932) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.704363138329095, LR: 0.0003 +[2026-03-05 07:26:17] (step=0064933) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.704558794756407, LR: 0.0003 +[2026-03-05 07:26:25] (step=0064934) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 12.704754451183721, LR: 0.0003 +[2026-03-05 07:26:33] (step=0064935) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 12.704950107611035, LR: 0.0003 +[2026-03-05 07:26:41] (step=0064936) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 12.705145764038349, LR: 0.0003 +[2026-03-05 07:26:49] (step=0064937) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 12.705341420465663, LR: 0.0003 +[2026-03-05 07:26:57] (step=0064938) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.705537076892975, LR: 0.0003 +[2026-03-05 07:27:05] (step=0064939) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.70573273332029, LR: 0.0003 +[2026-03-05 07:27:12] (step=0064940) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.705928389747603, LR: 0.0003 +[2026-03-05 07:27:20] (step=0064941) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 12.706124046174917, LR: 0.0003 +[2026-03-05 07:27:28] (step=0064942) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.706319702602231, LR: 0.0003 +[2026-03-05 07:27:36] (step=0064943) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.706515359029543, LR: 0.0003 +[2026-03-05 07:27:44] (step=0064944) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.706711015456857, LR: 0.0003 +[2026-03-05 07:27:52] (step=0064945) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.706906671884171, LR: 0.0003 +[2026-03-05 07:27:59] (step=0064946) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 12.707102328311485, LR: 0.0003 +[2026-03-05 07:28:07] (step=0064947) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.7072979847388, LR: 0.0003 +[2026-03-05 07:28:15] (step=0064948) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 12.707493641166112, LR: 0.0003 +[2026-03-05 07:28:23] (step=0064949) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.707689297593426, LR: 0.0003 +[2026-03-05 07:28:31] (step=0064950) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.70788495402074, LR: 0.0003 +[2026-03-05 07:28:39] (step=0064951) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.708080610448054, LR: 0.0003 +[2026-03-05 07:28:46] (step=0064952) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.708276266875368, LR: 0.0003 +[2026-03-05 07:28:55] (step=0064953) Train Loss: 0.4463, Train Steps/Sec: 0.12, Epoch: 12.70847192330268, LR: 0.0003 +[2026-03-05 07:29:02] (step=0064954) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.708667579729994, LR: 0.0003 +[2026-03-05 07:29:10] (step=0064955) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.708863236157308, LR: 0.0003 +[2026-03-05 07:29:18] (step=0064956) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.709058892584622, LR: 0.0003 +[2026-03-05 07:29:26] (step=0064957) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.709254549011934, LR: 0.0003 +[2026-03-05 07:29:34] (step=0064958) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.709450205439248, LR: 0.0003 +[2026-03-05 07:29:42] (step=0064959) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.709645861866562, LR: 0.0003 +[2026-03-05 07:29:49] (step=0064960) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.709841518293876, LR: 0.0003 +[2026-03-05 07:29:57] (step=0064961) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 12.71003717472119, LR: 0.0003 +[2026-03-05 07:30:05] (step=0064962) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.710232831148502, LR: 0.0003 +[2026-03-05 07:30:13] (step=0064963) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.710428487575816, LR: 0.0003 +[2026-03-05 07:30:21] (step=0064964) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.71062414400313, LR: 0.0003 +[2026-03-05 07:30:29] (step=0064965) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.710819800430444, LR: 0.0003 +[2026-03-05 07:30:37] (step=0064966) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.711015456857758, LR: 0.0003 +[2026-03-05 07:30:44] (step=0064967) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 12.71121111328507, LR: 0.0003 +[2026-03-05 07:30:52] (step=0064968) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.711406769712385, LR: 0.0003 +[2026-03-05 07:31:00] (step=0064969) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.711602426139699, LR: 0.0003 +[2026-03-05 07:31:08] (step=0064970) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 12.711798082567013, LR: 0.0003 +[2026-03-05 07:31:16] (step=0064971) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.711993738994327, LR: 0.0003 +[2026-03-05 07:31:24] (step=0064972) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.712189395421639, LR: 0.0003 +[2026-03-05 07:31:32] (step=0064973) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.712385051848953, LR: 0.0003 +[2026-03-05 07:31:39] (step=0064974) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.712580708276267, LR: 0.0003 +[2026-03-05 07:31:47] (step=0064975) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.712776364703581, LR: 0.0003 +[2026-03-05 07:31:55] (step=0064976) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 12.712972021130895, LR: 0.0003 +[2026-03-05 07:32:03] (step=0064977) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 12.713167677558207, LR: 0.0003 +[2026-03-05 07:32:11] (step=0064978) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.713363333985521, LR: 0.0003 +[2026-03-05 07:32:19] (step=0064979) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.713558990412835, LR: 0.0003 +[2026-03-05 07:32:26] (step=0064980) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.71375464684015, LR: 0.0003 +[2026-03-05 07:32:34] (step=0064981) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.713950303267461, LR: 0.0003 +[2026-03-05 07:32:42] (step=0064982) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.714145959694775, LR: 0.0003 +[2026-03-05 07:32:50] (step=0064983) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.71434161612209, LR: 0.0003 +[2026-03-05 07:32:58] (step=0064984) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.714537272549403, LR: 0.0003 +[2026-03-05 07:33:06] (step=0064985) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.714732928976717, LR: 0.0003 +[2026-03-05 07:33:14] (step=0064986) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 12.71492858540403, LR: 0.0003 +[2026-03-05 07:33:22] (step=0064987) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.715124241831344, LR: 0.0003 +[2026-03-05 07:33:29] (step=0064988) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.715319898258658, LR: 0.0003 +[2026-03-05 07:33:37] (step=0064989) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.715515554685972, LR: 0.0003 +[2026-03-05 07:33:45] (step=0064990) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 12.715711211113286, LR: 0.0003 +[2026-03-05 07:33:53] (step=0064991) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 12.715906867540598, LR: 0.0003 +[2026-03-05 07:34:01] (step=0064992) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.716102523967912, LR: 0.0003 +[2026-03-05 07:34:09] (step=0064993) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.716298180395226, LR: 0.0003 +[2026-03-05 07:34:17] (step=0064994) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.71649383682254, LR: 0.0003 +[2026-03-05 07:34:24] (step=0064995) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.716689493249854, LR: 0.0003 +[2026-03-05 07:34:32] (step=0064996) Train Loss: 0.4361, Train Steps/Sec: 0.12, Epoch: 12.716885149677166, LR: 0.0003 +[2026-03-05 07:34:40] (step=0064997) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.71708080610448, LR: 0.0003 +[2026-03-05 07:34:48] (step=0064998) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.717276462531794, LR: 0.0003 +[2026-03-05 07:34:56] (step=0064999) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.717472118959108, LR: 0.0003 +[2026-03-05 07:35:04] (step=0065000) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 12.717667775386422, LR: 0.0003 +[2026-03-05 07:35:04] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0065000/ +[2026-03-05 07:35:12] (step=0065001) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.717863431813734, LR: 0.0003 +[2026-03-05 07:35:20] (step=0065002) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.718059088241048, LR: 0.0003 +[2026-03-05 07:35:27] (step=0065003) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.718254744668362, LR: 0.0003 +[2026-03-05 07:35:35] (step=0065004) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 12.718450401095676, LR: 0.0003 +[2026-03-05 07:35:43] (step=0065005) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.71864605752299, LR: 0.0003 +[2026-03-05 07:35:51] (step=0065006) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.718841713950303, LR: 0.0003 +[2026-03-05 07:35:59] (step=0065007) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 12.719037370377617, LR: 0.0003 +[2026-03-05 07:36:07] (step=0065008) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.71923302680493, LR: 0.0003 +[2026-03-05 07:36:14] (step=0065009) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.719428683232245, LR: 0.0003 +[2026-03-05 07:36:22] (step=0065010) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.719624339659557, LR: 0.0003 +[2026-03-05 07:36:30] (step=0065011) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.719819996086871, LR: 0.0003 +[2026-03-05 07:36:38] (step=0065012) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.720015652514185, LR: 0.0003 +[2026-03-05 07:36:46] (step=0065013) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.720211308941499, LR: 0.0003 +[2026-03-05 07:36:54] (step=0065014) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.720406965368813, LR: 0.0003 +[2026-03-05 07:37:02] (step=0065015) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 12.720602621796125, LR: 0.0003 +[2026-03-05 07:37:10] (step=0065016) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.72079827822344, LR: 0.0003 +[2026-03-05 07:37:17] (step=0065017) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 12.720993934650753, LR: 0.0003 +[2026-03-05 07:37:25] (step=0065018) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.721189591078067, LR: 0.0003 +[2026-03-05 07:37:33] (step=0065019) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.721385247505381, LR: 0.0003 +[2026-03-05 07:37:41] (step=0065020) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.721580903932693, LR: 0.0003 +[2026-03-05 07:37:49] (step=0065021) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.721776560360007, LR: 0.0003 +[2026-03-05 07:37:57] (step=0065022) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 12.721972216787321, LR: 0.0003 +[2026-03-05 07:38:05] (step=0065023) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.722167873214635, LR: 0.0003 +[2026-03-05 07:38:12] (step=0065024) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.72236352964195, LR: 0.0003 +[2026-03-05 07:38:20] (step=0065025) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.722559186069262, LR: 0.0003 +[2026-03-05 07:38:28] (step=0065026) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 12.722754842496576, LR: 0.0003 +[2026-03-05 07:38:36] (step=0065027) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.72295049892389, LR: 0.0003 +[2026-03-05 07:38:44] (step=0065028) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 12.723146155351204, LR: 0.0003 +[2026-03-05 07:38:52] (step=0065029) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.723341811778518, LR: 0.0003 +[2026-03-05 07:38:59] (step=0065030) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.72353746820583, LR: 0.0003 +[2026-03-05 07:39:07] (step=0065031) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.723733124633144, LR: 0.0003 +[2026-03-05 07:39:15] (step=0065032) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.723928781060458, LR: 0.0003 +[2026-03-05 07:39:23] (step=0065033) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 12.724124437487772, LR: 0.0003 +[2026-03-05 07:39:31] (step=0065034) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.724320093915084, LR: 0.0003 +[2026-03-05 07:39:39] (step=0065035) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 12.724515750342398, LR: 0.0003 +[2026-03-05 07:39:47] (step=0065036) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.724711406769712, LR: 0.0003 +[2026-03-05 07:39:55] (step=0065037) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.724907063197026, LR: 0.0003 +[2026-03-05 07:40:02] (step=0065038) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 12.72510271962434, LR: 0.0003 +[2026-03-05 07:40:10] (step=0065039) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.725298376051652, LR: 0.0003 +[2026-03-05 07:40:18] (step=0065040) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.725494032478966, LR: 0.0003 +[2026-03-05 07:40:26] (step=0065041) Train Loss: 0.4420, Train Steps/Sec: 0.12, Epoch: 12.72568968890628, LR: 0.0003 +[2026-03-05 07:40:34] (step=0065042) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.725885345333595, LR: 0.0003 +[2026-03-05 07:40:42] (step=0065043) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.726081001760909, LR: 0.0003 +[2026-03-05 07:40:50] (step=0065044) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.72627665818822, LR: 0.0003 +[2026-03-05 07:40:57] (step=0065045) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.726472314615535, LR: 0.0003 +[2026-03-05 07:41:05] (step=0065046) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.726667971042849, LR: 0.0003 +[2026-03-05 07:41:13] (step=0065047) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.726863627470163, LR: 0.0003 +[2026-03-05 07:41:21] (step=0065048) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.727059283897477, LR: 0.0003 +[2026-03-05 07:41:29] (step=0065049) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.727254940324789, LR: 0.0003 +[2026-03-05 07:41:37] (step=0065050) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.727450596752103, LR: 0.0003 +[2026-03-05 07:41:45] (step=0065051) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.727646253179417, LR: 0.0003 +[2026-03-05 07:41:52] (step=0065052) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.727841909606731, LR: 0.0003 +[2026-03-05 07:42:00] (step=0065053) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.728037566034045, LR: 0.0003 +[2026-03-05 07:42:08] (step=0065054) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 12.728233222461357, LR: 0.0003 +[2026-03-05 07:42:16] (step=0065055) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 12.728428878888671, LR: 0.0003 +[2026-03-05 07:42:24] (step=0065056) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.728624535315985, LR: 0.0003 +[2026-03-05 07:42:32] (step=0065057) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.7288201917433, LR: 0.0003 +[2026-03-05 07:42:40] (step=0065058) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.729015848170613, LR: 0.0003 +[2026-03-05 07:42:47] (step=0065059) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.729211504597925, LR: 0.0003 +[2026-03-05 07:42:55] (step=0065060) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.72940716102524, LR: 0.0003 +[2026-03-05 07:43:03] (step=0065061) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.729602817452554, LR: 0.0003 +[2026-03-05 07:43:11] (step=0065062) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 12.729798473879868, LR: 0.0003 +[2026-03-05 07:43:19] (step=0065063) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.72999413030718, LR: 0.0003 +[2026-03-05 07:43:27] (step=0065064) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.730189786734494, LR: 0.0003 +[2026-03-05 07:43:34] (step=0065065) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 12.730385443161808, LR: 0.0003 +[2026-03-05 07:43:42] (step=0065066) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.730581099589122, LR: 0.0003 +[2026-03-05 07:43:50] (step=0065067) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.730776756016436, LR: 0.0003 +[2026-03-05 07:43:58] (step=0065068) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.730972412443748, LR: 0.0003 +[2026-03-05 07:44:06] (step=0065069) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.731168068871062, LR: 0.0003 +[2026-03-05 07:44:14] (step=0065070) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.731363725298376, LR: 0.0003 +[2026-03-05 07:44:22] (step=0065071) Train Loss: 0.4634, Train Steps/Sec: 0.13, Epoch: 12.73155938172569, LR: 0.0003 +[2026-03-05 07:44:29] (step=0065072) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.731755038153004, LR: 0.0003 +[2026-03-05 07:44:37] (step=0065073) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.731950694580316, LR: 0.0003 +[2026-03-05 07:44:45] (step=0065074) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 12.73214635100763, LR: 0.0003 +[2026-03-05 07:44:53] (step=0065075) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.732342007434944, LR: 0.0003 +[2026-03-05 07:45:01] (step=0065076) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.732537663862258, LR: 0.0003 +[2026-03-05 07:45:09] (step=0065077) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.732733320289572, LR: 0.0003 +[2026-03-05 07:45:16] (step=0065078) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 12.732928976716885, LR: 0.0003 +[2026-03-05 07:45:24] (step=0065079) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.733124633144199, LR: 0.0003 +[2026-03-05 07:45:32] (step=0065080) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.733320289571513, LR: 0.0003 +[2026-03-05 07:45:40] (step=0065081) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.733515945998827, LR: 0.0003 +[2026-03-05 07:45:48] (step=0065082) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.73371160242614, LR: 0.0003 +[2026-03-05 07:45:56] (step=0065083) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.733907258853453, LR: 0.0003 +[2026-03-05 07:46:04] (step=0065084) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.734102915280767, LR: 0.0003 +[2026-03-05 07:46:12] (step=0065085) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.73429857170808, LR: 0.0003 +[2026-03-05 07:46:19] (step=0065086) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.734494228135395, LR: 0.0003 +[2026-03-05 07:46:27] (step=0065087) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.734689884562707, LR: 0.0003 +[2026-03-05 07:46:35] (step=0065088) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 12.734885540990021, LR: 0.0003 +[2026-03-05 07:46:43] (step=0065089) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.735081197417335, LR: 0.0003 +[2026-03-05 07:46:51] (step=0065090) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 12.735276853844649, LR: 0.0003 +[2026-03-05 07:46:59] (step=0065091) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.735472510271963, LR: 0.0003 +[2026-03-05 07:47:06] (step=0065092) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.735668166699275, LR: 0.0003 +[2026-03-05 07:47:14] (step=0065093) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.73586382312659, LR: 0.0003 +[2026-03-05 07:47:22] (step=0065094) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 12.736059479553903, LR: 0.0003 +[2026-03-05 07:47:30] (step=0065095) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.736255135981217, LR: 0.0003 +[2026-03-05 07:47:38] (step=0065096) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.736450792408531, LR: 0.0003 +[2026-03-05 07:47:46] (step=0065097) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.736646448835844, LR: 0.0003 +[2026-03-05 07:47:54] (step=0065098) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.736842105263158, LR: 0.0003 +[2026-03-05 07:48:01] (step=0065099) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.737037761690472, LR: 0.0003 +[2026-03-05 07:48:09] (step=0065100) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.737233418117786, LR: 0.0003 +[2026-03-05 07:48:17] (step=0065101) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.7374290745451, LR: 0.0003 +[2026-03-05 07:48:25] (step=0065102) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.737624730972412, LR: 0.0003 +[2026-03-05 07:48:33] (step=0065103) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.737820387399726, LR: 0.0003 +[2026-03-05 07:48:41] (step=0065104) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.73801604382704, LR: 0.0003 +[2026-03-05 07:48:49] (step=0065105) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.738211700254354, LR: 0.0003 +[2026-03-05 07:48:56] (step=0065106) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.738407356681668, LR: 0.0003 +[2026-03-05 07:49:04] (step=0065107) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.73860301310898, LR: 0.0003 +[2026-03-05 07:49:12] (step=0065108) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.738798669536294, LR: 0.0003 +[2026-03-05 07:49:20] (step=0065109) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.738994325963608, LR: 0.0003 +[2026-03-05 07:49:28] (step=0065110) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.739189982390922, LR: 0.0003 +[2026-03-05 07:49:36] (step=0065111) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.739385638818236, LR: 0.0003 +[2026-03-05 07:49:43] (step=0065112) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.739581295245548, LR: 0.0003 +[2026-03-05 07:49:51] (step=0065113) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.739776951672862, LR: 0.0003 +[2026-03-05 07:49:59] (step=0065114) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.739972608100176, LR: 0.0003 +[2026-03-05 07:50:07] (step=0065115) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.74016826452749, LR: 0.0003 +[2026-03-05 07:50:15] (step=0065116) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 12.740363920954803, LR: 0.0003 +[2026-03-05 07:50:23] (step=0065117) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.740559577382117, LR: 0.0003 +[2026-03-05 07:50:31] (step=0065118) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.74075523380943, LR: 0.0003 +[2026-03-05 07:50:38] (step=0065119) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 12.740950890236745, LR: 0.0003 +[2026-03-05 07:50:46] (step=0065120) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.741146546664059, LR: 0.0003 +[2026-03-05 07:50:54] (step=0065121) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.74134220309137, LR: 0.0003 +[2026-03-05 07:51:02] (step=0065122) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.741537859518685, LR: 0.0003 +[2026-03-05 07:51:10] (step=0065123) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.741733515945999, LR: 0.0003 +[2026-03-05 07:51:18] (step=0065124) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.741929172373313, LR: 0.0003 +[2026-03-05 07:51:26] (step=0065125) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.742124828800627, LR: 0.0003 +[2026-03-05 07:51:34] (step=0065126) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 12.742320485227939, LR: 0.0003 +[2026-03-05 07:51:41] (step=0065127) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.742516141655253, LR: 0.0003 +[2026-03-05 07:51:49] (step=0065128) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.742711798082567, LR: 0.0003 +[2026-03-05 07:51:57] (step=0065129) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.742907454509881, LR: 0.0003 +[2026-03-05 07:52:05] (step=0065130) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.743103110937195, LR: 0.0003 +[2026-03-05 07:52:13] (step=0065131) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.743298767364507, LR: 0.0003 +[2026-03-05 07:52:21] (step=0065132) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.743494423791821, LR: 0.0003 +[2026-03-05 07:52:28] (step=0065133) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.743690080219135, LR: 0.0003 +[2026-03-05 07:52:36] (step=0065134) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.74388573664645, LR: 0.0003 +[2026-03-05 07:52:44] (step=0065135) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 12.744081393073763, LR: 0.0003 +[2026-03-05 07:52:52] (step=0065136) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.744277049501076, LR: 0.0003 +[2026-03-05 07:53:00] (step=0065137) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.74447270592839, LR: 0.0003 +[2026-03-05 07:53:08] (step=0065138) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.744668362355704, LR: 0.0003 +[2026-03-05 07:53:16] (step=0065139) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.744864018783018, LR: 0.0003 +[2026-03-05 07:53:23] (step=0065140) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.74505967521033, LR: 0.0003 +[2026-03-05 07:53:31] (step=0065141) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.745255331637644, LR: 0.0003 +[2026-03-05 07:53:39] (step=0065142) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.745450988064958, LR: 0.0003 +[2026-03-05 07:53:47] (step=0065143) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.745646644492272, LR: 0.0003 +[2026-03-05 07:53:55] (step=0065144) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.745842300919586, LR: 0.0003 +[2026-03-05 07:54:03] (step=0065145) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.746037957346898, LR: 0.0003 +[2026-03-05 07:54:10] (step=0065146) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.746233613774212, LR: 0.0003 +[2026-03-05 07:54:18] (step=0065147) Train Loss: 0.4331, Train Steps/Sec: 0.12, Epoch: 12.746429270201526, LR: 0.0003 +[2026-03-05 07:54:26] (step=0065148) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.74662492662884, LR: 0.0003 +[2026-03-05 07:54:34] (step=0065149) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.746820583056154, LR: 0.0003 +[2026-03-05 07:54:42] (step=0065150) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.747016239483466, LR: 0.0003 +[2026-03-05 07:54:50] (step=0065151) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.74721189591078, LR: 0.0003 +[2026-03-05 07:54:58] (step=0065152) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.747407552338094, LR: 0.0003 +[2026-03-05 07:55:06] (step=0065153) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.747603208765408, LR: 0.0003 +[2026-03-05 07:55:13] (step=0065154) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 12.747798865192722, LR: 0.0003 +[2026-03-05 07:55:21] (step=0065155) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.747994521620035, LR: 0.0003 +[2026-03-05 07:55:29] (step=0065156) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 12.748190178047349, LR: 0.0003 +[2026-03-05 07:55:37] (step=0065157) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.748385834474663, LR: 0.0003 +[2026-03-05 07:55:45] (step=0065158) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 12.748581490901977, LR: 0.0003 +[2026-03-05 07:55:53] (step=0065159) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.74877714732929, LR: 0.0003 +[2026-03-05 07:56:00] (step=0065160) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.748972803756603, LR: 0.0003 +[2026-03-05 07:56:08] (step=0065161) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.749168460183917, LR: 0.0003 +[2026-03-05 07:56:16] (step=0065162) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.74936411661123, LR: 0.0003 +[2026-03-05 07:56:24] (step=0065163) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.749559773038545, LR: 0.0003 +[2026-03-05 07:56:32] (step=0065164) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.749755429465859, LR: 0.0003 +[2026-03-05 07:56:40] (step=0065165) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 12.749951085893171, LR: 0.0003 +[2026-03-05 07:56:48] (step=0065166) Train Loss: 0.4243, Train Steps/Sec: 0.13, Epoch: 12.750146742320485, LR: 0.0003 +[2026-03-05 07:56:55] (step=0065167) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.750342398747799, LR: 0.0003 +[2026-03-05 07:57:03] (step=0065168) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.750538055175113, LR: 0.0003 +[2026-03-05 07:57:11] (step=0065169) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.750733711602425, LR: 0.0003 +[2026-03-05 07:57:19] (step=0065170) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.75092936802974, LR: 0.0003 +[2026-03-05 07:57:27] (step=0065171) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 12.751125024457053, LR: 0.0003 +[2026-03-05 07:57:35] (step=0065172) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.751320680884367, LR: 0.0003 +[2026-03-05 07:57:43] (step=0065173) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.751516337311681, LR: 0.0003 +[2026-03-05 07:57:50] (step=0065174) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.751711993738994, LR: 0.0003 +[2026-03-05 07:57:58] (step=0065175) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.751907650166308, LR: 0.0003 +[2026-03-05 07:58:06] (step=0065176) Train Loss: 0.4273, Train Steps/Sec: 0.13, Epoch: 12.752103306593622, LR: 0.0003 +[2026-03-05 07:58:14] (step=0065177) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.752298963020936, LR: 0.0003 +[2026-03-05 07:58:22] (step=0065178) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.75249461944825, LR: 0.0003 +[2026-03-05 07:58:30] (step=0065179) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.752690275875562, LR: 0.0003 +[2026-03-05 07:58:38] (step=0065180) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.752885932302876, LR: 0.0003 +[2026-03-05 07:58:45] (step=0065181) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.75308158873019, LR: 0.0003 +[2026-03-05 07:58:53] (step=0065182) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 12.753277245157504, LR: 0.0003 +[2026-03-05 07:59:01] (step=0065183) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.753472901584818, LR: 0.0003 +[2026-03-05 07:59:09] (step=0065184) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 12.75366855801213, LR: 0.0003 +[2026-03-05 07:59:17] (step=0065185) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.753864214439444, LR: 0.0003 +[2026-03-05 07:59:25] (step=0065186) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 12.754059870866758, LR: 0.0003 +[2026-03-05 07:59:32] (step=0065187) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.754255527294072, LR: 0.0003 +[2026-03-05 07:59:40] (step=0065188) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.754451183721386, LR: 0.0003 +[2026-03-05 07:59:48] (step=0065189) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 12.754646840148698, LR: 0.0003 +[2026-03-05 07:59:56] (step=0065190) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.754842496576012, LR: 0.0003 +[2026-03-05 08:00:04] (step=0065191) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.755038153003326, LR: 0.0003 +[2026-03-05 08:00:12] (step=0065192) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 12.75523380943064, LR: 0.0003 +[2026-03-05 08:00:20] (step=0065193) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.755429465857953, LR: 0.0003 +[2026-03-05 08:00:27] (step=0065194) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.755625122285267, LR: 0.0003 +[2026-03-05 08:00:35] (step=0065195) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.75582077871258, LR: 0.0003 +[2026-03-05 08:00:43] (step=0065196) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.756016435139895, LR: 0.0003 +[2026-03-05 08:00:51] (step=0065197) Train Loss: 0.4327, Train Steps/Sec: 0.12, Epoch: 12.756212091567209, LR: 0.0003 +[2026-03-05 08:00:59] (step=0065198) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.75640774799452, LR: 0.0003 +[2026-03-05 08:01:07] (step=0065199) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.756603404421835, LR: 0.0003 +[2026-03-05 08:01:15] (step=0065200) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.756799060849149, LR: 0.0003 +[2026-03-05 08:01:23] (step=0065201) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.756994717276463, LR: 0.0003 +[2026-03-05 08:01:30] (step=0065202) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.757190373703777, LR: 0.0003 +[2026-03-05 08:01:38] (step=0065203) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.757386030131089, LR: 0.0003 +[2026-03-05 08:01:46] (step=0065204) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.757581686558403, LR: 0.0003 +[2026-03-05 08:01:54] (step=0065205) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 12.757777342985717, LR: 0.0003 +[2026-03-05 08:02:02] (step=0065206) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.757972999413031, LR: 0.0003 +[2026-03-05 08:02:10] (step=0065207) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.758168655840345, LR: 0.0003 +[2026-03-05 08:02:18] (step=0065208) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 12.758364312267657, LR: 0.0003 +[2026-03-05 08:02:25] (step=0065209) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.758559968694971, LR: 0.0003 +[2026-03-05 08:02:33] (step=0065210) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.758755625122285, LR: 0.0003 +[2026-03-05 08:02:41] (step=0065211) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 12.7589512815496, LR: 0.0003 +[2026-03-05 08:02:49] (step=0065212) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.759146937976913, LR: 0.0003 +[2026-03-05 08:02:57] (step=0065213) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.759342594404226, LR: 0.0003 +[2026-03-05 08:03:05] (step=0065214) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.75953825083154, LR: 0.0003 +[2026-03-05 08:03:12] (step=0065215) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 12.759733907258854, LR: 0.0003 +[2026-03-05 08:03:20] (step=0065216) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.759929563686168, LR: 0.0003 +[2026-03-05 08:03:28] (step=0065217) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 12.760125220113482, LR: 0.0003 +[2026-03-05 08:03:36] (step=0065218) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.760320876540794, LR: 0.0003 +[2026-03-05 08:03:44] (step=0065219) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.760516532968108, LR: 0.0003 +[2026-03-05 08:03:52] (step=0065220) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.760712189395422, LR: 0.0003 +[2026-03-05 08:03:59] (step=0065221) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.760907845822736, LR: 0.0003 +[2026-03-05 08:04:07] (step=0065222) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.761103502250048, LR: 0.0003 +[2026-03-05 08:04:15] (step=0065223) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 12.761299158677362, LR: 0.0003 +[2026-03-05 08:04:23] (step=0065224) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.761494815104676, LR: 0.0003 +[2026-03-05 08:04:31] (step=0065225) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.76169047153199, LR: 0.0003 +[2026-03-05 08:04:39] (step=0065226) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.761886127959304, LR: 0.0003 +[2026-03-05 08:04:47] (step=0065227) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 12.762081784386616, LR: 0.0003 +[2026-03-05 08:04:54] (step=0065228) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.76227744081393, LR: 0.0003 +[2026-03-05 08:05:02] (step=0065229) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.762473097241244, LR: 0.0003 +[2026-03-05 08:05:10] (step=0065230) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 12.762668753668558, LR: 0.0003 +[2026-03-05 08:05:18] (step=0065231) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.762864410095872, LR: 0.0003 +[2026-03-05 08:05:26] (step=0065232) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 12.763060066523185, LR: 0.0003 +[2026-03-05 08:05:34] (step=0065233) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.763255722950499, LR: 0.0003 +[2026-03-05 08:05:42] (step=0065234) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 12.763451379377813, LR: 0.0003 +[2026-03-05 08:05:49] (step=0065235) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 12.763647035805127, LR: 0.0003 +[2026-03-05 08:05:57] (step=0065236) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.76384269223244, LR: 0.0003 +[2026-03-05 08:06:05] (step=0065237) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.764038348659753, LR: 0.0003 +[2026-03-05 08:06:13] (step=0065238) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.764234005087067, LR: 0.0003 +[2026-03-05 08:06:21] (step=0065239) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.76442966151438, LR: 0.0003 +[2026-03-05 08:06:29] (step=0065240) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 12.764625317941695, LR: 0.0003 +[2026-03-05 08:06:37] (step=0065241) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 12.764820974369009, LR: 0.0003 +[2026-03-05 08:06:44] (step=0065242) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.765016630796321, LR: 0.0003 +[2026-03-05 08:06:52] (step=0065243) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 12.765212287223635, LR: 0.0003 +[2026-03-05 08:07:00] (step=0065244) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.765407943650949, LR: 0.0003 +[2026-03-05 08:07:08] (step=0065245) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.765603600078263, LR: 0.0003 +[2026-03-05 08:07:16] (step=0065246) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.765799256505575, LR: 0.0003 +[2026-03-05 08:07:24] (step=0065247) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.76599491293289, LR: 0.0003 +[2026-03-05 08:07:32] (step=0065248) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.766190569360203, LR: 0.0003 +[2026-03-05 08:07:40] (step=0065249) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.766386225787517, LR: 0.0003 +[2026-03-05 08:07:47] (step=0065250) Train Loss: 0.4266, Train Steps/Sec: 0.13, Epoch: 12.766581882214831, LR: 0.0003 +[2026-03-05 08:07:55] (step=0065251) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 12.766777538642144, LR: 0.0003 +[2026-03-05 08:08:03] (step=0065252) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.766973195069458, LR: 0.0003 +[2026-03-05 08:08:11] (step=0065253) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.767168851496772, LR: 0.0003 +[2026-03-05 08:08:19] (step=0065254) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 12.767364507924086, LR: 0.0003 +[2026-03-05 08:08:27] (step=0065255) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.7675601643514, LR: 0.0003 +[2026-03-05 08:08:34] (step=0065256) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.767755820778712, LR: 0.0003 +[2026-03-05 08:08:42] (step=0065257) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.767951477206026, LR: 0.0003 +[2026-03-05 08:08:50] (step=0065258) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.76814713363334, LR: 0.0003 +[2026-03-05 08:08:58] (step=0065259) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.768342790060654, LR: 0.0003 +[2026-03-05 08:09:06] (step=0065260) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 12.768538446487968, LR: 0.0003 +[2026-03-05 08:09:14] (step=0065261) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.76873410291528, LR: 0.0003 +[2026-03-05 08:09:22] (step=0065262) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.768929759342594, LR: 0.0003 +[2026-03-05 08:09:29] (step=0065263) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 12.769125415769908, LR: 0.0003 +[2026-03-05 08:09:37] (step=0065264) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.769321072197222, LR: 0.0003 +[2026-03-05 08:09:45] (step=0065265) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.769516728624536, LR: 0.0003 +[2026-03-05 08:09:53] (step=0065266) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.769712385051848, LR: 0.0003 +[2026-03-05 08:10:01] (step=0065267) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.769908041479162, LR: 0.0003 +[2026-03-05 08:10:09] (step=0065268) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.770103697906476, LR: 0.0003 +[2026-03-05 08:10:16] (step=0065269) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.77029935433379, LR: 0.0003 +[2026-03-05 08:10:24] (step=0065270) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.770495010761104, LR: 0.0003 +[2026-03-05 08:10:32] (step=0065271) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.770690667188417, LR: 0.0003 +[2026-03-05 08:10:40] (step=0065272) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.77088632361573, LR: 0.0003 +[2026-03-05 08:10:48] (step=0065273) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.771081980043045, LR: 0.0003 +[2026-03-05 08:10:56] (step=0065274) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.771277636470359, LR: 0.0003 +[2026-03-05 08:11:04] (step=0065275) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.77147329289767, LR: 0.0003 +[2026-03-05 08:11:11] (step=0065276) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.771668949324985, LR: 0.0003 +[2026-03-05 08:11:19] (step=0065277) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.771864605752299, LR: 0.0003 +[2026-03-05 08:11:27] (step=0065278) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.772060262179613, LR: 0.0003 +[2026-03-05 08:11:35] (step=0065279) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.772255918606927, LR: 0.0003 +[2026-03-05 08:11:43] (step=0065280) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 12.77245157503424, LR: 0.0003 +[2026-03-05 08:11:51] (step=0065281) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.772647231461553, LR: 0.0003 +[2026-03-05 08:11:58] (step=0065282) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.772842887888867, LR: 0.0003 +[2026-03-05 08:12:06] (step=0065283) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.773038544316181, LR: 0.0003 +[2026-03-05 08:12:14] (step=0065284) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.773234200743495, LR: 0.0003 +[2026-03-05 08:12:22] (step=0065285) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.773429857170807, LR: 0.0003 +[2026-03-05 08:12:30] (step=0065286) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.773625513598121, LR: 0.0003 +[2026-03-05 08:12:38] (step=0065287) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 12.773821170025435, LR: 0.0003 +[2026-03-05 08:12:46] (step=0065288) Train Loss: 0.4249, Train Steps/Sec: 0.13, Epoch: 12.77401682645275, LR: 0.0003 +[2026-03-05 08:12:53] (step=0065289) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.774212482880063, LR: 0.0003 +[2026-03-05 08:13:01] (step=0065290) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.774408139307376, LR: 0.0003 +[2026-03-05 08:13:09] (step=0065291) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.77460379573469, LR: 0.0003 +[2026-03-05 08:13:17] (step=0065292) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 12.774799452162004, LR: 0.0003 +[2026-03-05 08:13:25] (step=0065293) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 12.774995108589318, LR: 0.0003 +[2026-03-05 08:13:33] (step=0065294) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.775190765016632, LR: 0.0003 +[2026-03-05 08:13:40] (step=0065295) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.775386421443944, LR: 0.0003 +[2026-03-05 08:13:48] (step=0065296) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.775582077871258, LR: 0.0003 +[2026-03-05 08:13:56] (step=0065297) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.775777734298572, LR: 0.0003 +[2026-03-05 08:14:04] (step=0065298) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.775973390725886, LR: 0.0003 +[2026-03-05 08:14:12] (step=0065299) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 12.776169047153198, LR: 0.0003 +[2026-03-05 08:14:20] (step=0065300) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.776364703580512, LR: 0.0003 +[2026-03-05 08:14:28] (step=0065301) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.776560360007826, LR: 0.0003 +[2026-03-05 08:14:36] (step=0065302) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.77675601643514, LR: 0.0003 +[2026-03-05 08:14:43] (step=0065303) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 12.776951672862454, LR: 0.0003 +[2026-03-05 08:14:51] (step=0065304) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.777147329289766, LR: 0.0003 +[2026-03-05 08:14:59] (step=0065305) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.77734298571708, LR: 0.0003 +[2026-03-05 08:15:07] (step=0065306) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.777538642144394, LR: 0.0003 +[2026-03-05 08:15:15] (step=0065307) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.777734298571708, LR: 0.0003 +[2026-03-05 08:15:23] (step=0065308) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.777929954999022, LR: 0.0003 +[2026-03-05 08:15:30] (step=0065309) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.778125611426335, LR: 0.0003 +[2026-03-05 08:15:38] (step=0065310) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.778321267853649, LR: 0.0003 +[2026-03-05 08:15:46] (step=0065311) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.778516924280963, LR: 0.0003 +[2026-03-05 08:15:54] (step=0065312) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 12.778712580708277, LR: 0.0003 +[2026-03-05 08:16:02] (step=0065313) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 12.77890823713559, LR: 0.0003 +[2026-03-05 08:16:10] (step=0065314) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.779103893562903, LR: 0.0003 +[2026-03-05 08:16:18] (step=0065315) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 12.779299549990217, LR: 0.0003 +[2026-03-05 08:16:25] (step=0065316) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.779495206417531, LR: 0.0003 +[2026-03-05 08:16:33] (step=0065317) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.779690862844845, LR: 0.0003 +[2026-03-05 08:16:41] (step=0065318) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.779886519272159, LR: 0.0003 +[2026-03-05 08:16:49] (step=0065319) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.780082175699471, LR: 0.0003 +[2026-03-05 08:16:57] (step=0065320) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 12.780277832126785, LR: 0.0003 +[2026-03-05 08:17:05] (step=0065321) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.7804734885541, LR: 0.0003 +[2026-03-05 08:17:13] (step=0065322) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.780669144981413, LR: 0.0003 +[2026-03-05 08:17:21] (step=0065323) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 12.780864801408727, LR: 0.0003 +[2026-03-05 08:17:28] (step=0065324) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 12.78106045783604, LR: 0.0003 +[2026-03-05 08:17:36] (step=0065325) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 12.781256114263353, LR: 0.0003 +[2026-03-05 08:17:44] (step=0065326) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.781451770690667, LR: 0.0003 +[2026-03-05 08:17:52] (step=0065327) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 12.781647427117981, LR: 0.0003 +[2026-03-05 08:18:00] (step=0065328) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.781843083545294, LR: 0.0003 +[2026-03-05 08:18:08] (step=0065329) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.782038739972608, LR: 0.0003 +[2026-03-05 08:18:15] (step=0065330) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.782234396399922, LR: 0.0003 +[2026-03-05 08:18:23] (step=0065331) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 12.782430052827236, LR: 0.0003 +[2026-03-05 08:18:31] (step=0065332) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.78262570925455, LR: 0.0003 +[2026-03-05 08:18:39] (step=0065333) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.782821365681862, LR: 0.0003 +[2026-03-05 08:18:47] (step=0065334) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.783017022109176, LR: 0.0003 +[2026-03-05 08:18:55] (step=0065335) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.78321267853649, LR: 0.0003 +[2026-03-05 08:19:03] (step=0065336) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 12.783408334963804, LR: 0.0003 +[2026-03-05 08:19:10] (step=0065337) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.783603991391118, LR: 0.0003 +[2026-03-05 08:19:18] (step=0065338) Train Loss: 0.4268, Train Steps/Sec: 0.13, Epoch: 12.78379964781843, LR: 0.0003 +[2026-03-05 08:19:26] (step=0065339) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.783995304245744, LR: 0.0003 +[2026-03-05 08:19:34] (step=0065340) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 12.784190960673058, LR: 0.0003 +[2026-03-05 08:19:42] (step=0065341) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.784386617100372, LR: 0.0003 +[2026-03-05 08:19:50] (step=0065342) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 12.784582273527686, LR: 0.0003 +[2026-03-05 08:19:57] (step=0065343) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.784777929954998, LR: 0.0003 +[2026-03-05 08:20:05] (step=0065344) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.784973586382312, LR: 0.0003 +[2026-03-05 08:20:13] (step=0065345) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.785169242809626, LR: 0.0003 +[2026-03-05 08:20:21] (step=0065346) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.78536489923694, LR: 0.0003 +[2026-03-05 08:20:29] (step=0065347) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.785560555664254, LR: 0.0003 +[2026-03-05 08:20:37] (step=0065348) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 12.785756212091567, LR: 0.0003 +[2026-03-05 08:20:45] (step=0065349) Train Loss: 0.4521, Train Steps/Sec: 0.12, Epoch: 12.78595186851888, LR: 0.0003 +[2026-03-05 08:20:53] (step=0065350) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.786147524946195, LR: 0.0003 +[2026-03-05 08:21:00] (step=0065351) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.786343181373509, LR: 0.0003 +[2026-03-05 08:21:08] (step=0065352) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.786538837800821, LR: 0.0003 +[2026-03-05 08:21:16] (step=0065353) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.786734494228135, LR: 0.0003 +[2026-03-05 08:21:24] (step=0065354) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.786930150655449, LR: 0.0003 +[2026-03-05 08:21:32] (step=0065355) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.787125807082763, LR: 0.0003 +[2026-03-05 08:21:40] (step=0065356) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.787321463510077, LR: 0.0003 +[2026-03-05 08:21:47] (step=0065357) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.78751711993739, LR: 0.0003 +[2026-03-05 08:21:55] (step=0065358) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.787712776364703, LR: 0.0003 +[2026-03-05 08:22:03] (step=0065359) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.787908432792017, LR: 0.0003 +[2026-03-05 08:22:11] (step=0065360) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.788104089219331, LR: 0.0003 +[2026-03-05 08:22:19] (step=0065361) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.788299745646645, LR: 0.0003 +[2026-03-05 08:22:27] (step=0065362) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.788495402073957, LR: 0.0003 +[2026-03-05 08:22:35] (step=0065363) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 12.788691058501271, LR: 0.0003 +[2026-03-05 08:22:42] (step=0065364) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.788886714928585, LR: 0.0003 +[2026-03-05 08:22:50] (step=0065365) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 12.7890823713559, LR: 0.0003 +[2026-03-05 08:22:58] (step=0065366) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.789278027783213, LR: 0.0003 +[2026-03-05 08:23:06] (step=0065367) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.789473684210526, LR: 0.0003 +[2026-03-05 08:23:14] (step=0065368) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.78966934063784, LR: 0.0003 +[2026-03-05 08:23:22] (step=0065369) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.789864997065154, LR: 0.0003 +[2026-03-05 08:23:29] (step=0065370) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.790060653492468, LR: 0.0003 +[2026-03-05 08:23:37] (step=0065371) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.790256309919782, LR: 0.0003 +[2026-03-05 08:23:45] (step=0065372) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.790451966347094, LR: 0.0003 +[2026-03-05 08:23:53] (step=0065373) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.790647622774408, LR: 0.0003 +[2026-03-05 08:24:01] (step=0065374) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.790843279201722, LR: 0.0003 +[2026-03-05 08:24:09] (step=0065375) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.791038935629036, LR: 0.0003 +[2026-03-05 08:24:17] (step=0065376) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 12.79123459205635, LR: 0.0003 +[2026-03-05 08:24:25] (step=0065377) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 12.791430248483662, LR: 0.0003 +[2026-03-05 08:24:32] (step=0065378) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.791625904910976, LR: 0.0003 +[2026-03-05 08:24:40] (step=0065379) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.79182156133829, LR: 0.0003 +[2026-03-05 08:24:48] (step=0065380) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.792017217765604, LR: 0.0003 +[2026-03-05 08:24:56] (step=0065381) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.792212874192916, LR: 0.0003 +[2026-03-05 08:25:04] (step=0065382) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.79240853062023, LR: 0.0003 +[2026-03-05 08:25:12] (step=0065383) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.792604187047544, LR: 0.0003 +[2026-03-05 08:25:19] (step=0065384) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.792799843474858, LR: 0.0003 +[2026-03-05 08:25:27] (step=0065385) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.792995499902172, LR: 0.0003 +[2026-03-05 08:25:35] (step=0065386) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.793191156329485, LR: 0.0003 +[2026-03-05 08:25:43] (step=0065387) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.793386812756799, LR: 0.0003 +[2026-03-05 08:25:51] (step=0065388) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.793582469184113, LR: 0.0003 +[2026-03-05 08:25:59] (step=0065389) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.793778125611427, LR: 0.0003 +[2026-03-05 08:26:07] (step=0065390) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.79397378203874, LR: 0.0003 +[2026-03-05 08:26:14] (step=0065391) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 12.794169438466053, LR: 0.0003 +[2026-03-05 08:26:22] (step=0065392) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.794365094893367, LR: 0.0003 +[2026-03-05 08:26:30] (step=0065393) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.794560751320681, LR: 0.0003 +[2026-03-05 08:26:38] (step=0065394) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 12.794756407747995, LR: 0.0003 +[2026-03-05 08:26:46] (step=0065395) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.794952064175309, LR: 0.0003 +[2026-03-05 08:26:54] (step=0065396) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 12.795147720602621, LR: 0.0003 +[2026-03-05 08:27:02] (step=0065397) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.795343377029935, LR: 0.0003 +[2026-03-05 08:27:09] (step=0065398) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 12.79553903345725, LR: 0.0003 +[2026-03-05 08:27:17] (step=0065399) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.795734689884563, LR: 0.0003 +[2026-03-05 08:27:25] (step=0065400) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.795930346311877, LR: 0.0003 +[2026-03-05 08:27:33] (step=0065401) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.79612600273919, LR: 0.0003 +[2026-03-05 08:27:41] (step=0065402) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.796321659166503, LR: 0.0003 +[2026-03-05 08:27:49] (step=0065403) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.796517315593817, LR: 0.0003 +[2026-03-05 08:27:57] (step=0065404) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 12.796712972021131, LR: 0.0003 +[2026-03-05 08:28:04] (step=0065405) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.796908628448444, LR: 0.0003 +[2026-03-05 08:28:12] (step=0065406) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.797104284875758, LR: 0.0003 +[2026-03-05 08:28:20] (step=0065407) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.797299941303072, LR: 0.0003 +[2026-03-05 08:28:28] (step=0065408) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.797495597730386, LR: 0.0003 +[2026-03-05 08:28:36] (step=0065409) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.7976912541577, LR: 0.0003 +[2026-03-05 08:28:44] (step=0065410) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.797886910585012, LR: 0.0003 +[2026-03-05 08:28:51] (step=0065411) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.798082567012326, LR: 0.0003 +[2026-03-05 08:28:59] (step=0065412) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 12.79827822343964, LR: 0.0003 +[2026-03-05 08:29:07] (step=0065413) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 12.798473879866954, LR: 0.0003 +[2026-03-05 08:29:15] (step=0065414) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 12.798669536294268, LR: 0.0003 +[2026-03-05 08:29:23] (step=0065415) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.79886519272158, LR: 0.0003 +[2026-03-05 08:29:31] (step=0065416) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.799060849148894, LR: 0.0003 +[2026-03-05 08:29:39] (step=0065417) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.799256505576208, LR: 0.0003 +[2026-03-05 08:29:46] (step=0065418) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.799452162003522, LR: 0.0003 +[2026-03-05 08:29:54] (step=0065419) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.799647818430836, LR: 0.0003 +[2026-03-05 08:30:02] (step=0065420) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 12.799843474858148, LR: 0.0003 +[2026-03-05 08:30:10] (step=0065421) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.800039131285462, LR: 0.0003 +[2026-03-05 08:30:18] (step=0065422) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.800234787712776, LR: 0.0003 +[2026-03-05 08:30:26] (step=0065423) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.80043044414009, LR: 0.0003 +[2026-03-05 08:30:34] (step=0065424) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.800626100567404, LR: 0.0003 +[2026-03-05 08:30:41] (step=0065425) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.800821756994717, LR: 0.0003 +[2026-03-05 08:30:49] (step=0065426) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.80101741342203, LR: 0.0003 +[2026-03-05 08:30:57] (step=0065427) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.801213069849345, LR: 0.0003 +[2026-03-05 08:31:05] (step=0065428) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 12.801408726276659, LR: 0.0003 +[2026-03-05 08:31:13] (step=0065429) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.801604382703971, LR: 0.0003 +[2026-03-05 08:31:21] (step=0065430) Train Loss: 0.4260, Train Steps/Sec: 0.13, Epoch: 12.801800039131285, LR: 0.0003 +[2026-03-05 08:31:29] (step=0065431) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 12.801995695558599, LR: 0.0003 +[2026-03-05 08:31:36] (step=0065432) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.802191351985913, LR: 0.0003 +[2026-03-05 08:31:44] (step=0065433) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.802387008413227, LR: 0.0003 +[2026-03-05 08:31:52] (step=0065434) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.80258266484054, LR: 0.0003 +[2026-03-05 08:32:00] (step=0065435) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.802778321267853, LR: 0.0003 +[2026-03-05 08:32:08] (step=0065436) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 12.802973977695167, LR: 0.0003 +[2026-03-05 08:32:16] (step=0065437) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.803169634122481, LR: 0.0003 +[2026-03-05 08:32:23] (step=0065438) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.803365290549795, LR: 0.0003 +[2026-03-05 08:32:31] (step=0065439) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.803560946977107, LR: 0.0003 +[2026-03-05 08:32:39] (step=0065440) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.803756603404421, LR: 0.0003 +[2026-03-05 08:32:47] (step=0065441) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 12.803952259831735, LR: 0.0003 +[2026-03-05 08:32:55] (step=0065442) Train Loss: 0.4422, Train Steps/Sec: 0.12, Epoch: 12.80414791625905, LR: 0.0003 +[2026-03-05 08:33:03] (step=0065443) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 12.804343572686363, LR: 0.0003 +[2026-03-05 08:33:11] (step=0065444) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.804539229113676, LR: 0.0003 +[2026-03-05 08:33:18] (step=0065445) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 12.80473488554099, LR: 0.0003 +[2026-03-05 08:33:26] (step=0065446) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 12.804930541968304, LR: 0.0003 +[2026-03-05 08:33:34] (step=0065447) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 12.805126198395618, LR: 0.0003 +[2026-03-05 08:33:42] (step=0065448) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 12.805321854822932, LR: 0.0003 +[2026-03-05 08:33:50] (step=0065449) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.805517511250244, LR: 0.0003 +[2026-03-05 08:33:58] (step=0065450) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 12.805713167677558, LR: 0.0003 +[2026-03-05 08:34:06] (step=0065451) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 12.805908824104872, LR: 0.0003 +[2026-03-05 08:34:13] (step=0065452) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.806104480532186, LR: 0.0003 +[2026-03-05 08:34:21] (step=0065453) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.8063001369595, LR: 0.0003 +[2026-03-05 08:34:29] (step=0065454) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.806495793386812, LR: 0.0003 +[2026-03-05 08:34:37] (step=0065455) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.806691449814126, LR: 0.0003 +[2026-03-05 08:34:45] (step=0065456) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.80688710624144, LR: 0.0003 +[2026-03-05 08:34:53] (step=0065457) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.807082762668754, LR: 0.0003 +[2026-03-05 08:35:01] (step=0065458) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.807278419096066, LR: 0.0003 +[2026-03-05 08:35:08] (step=0065459) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.80747407552338, LR: 0.0003 +[2026-03-05 08:35:16] (step=0065460) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.807669731950694, LR: 0.0003 +[2026-03-05 08:35:24] (step=0065461) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 12.807865388378008, LR: 0.0003 +[2026-03-05 08:35:32] (step=0065462) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 12.808061044805322, LR: 0.0003 +[2026-03-05 08:35:40] (step=0065463) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 12.808256701232635, LR: 0.0003 +[2026-03-05 08:35:48] (step=0065464) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.808452357659949, LR: 0.0003 +[2026-03-05 08:35:55] (step=0065465) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.808648014087263, LR: 0.0003 +[2026-03-05 08:36:03] (step=0065466) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.808843670514577, LR: 0.0003 +[2026-03-05 08:36:11] (step=0065467) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 12.80903932694189, LR: 0.0003 +[2026-03-05 08:36:19] (step=0065468) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.809234983369203, LR: 0.0003 +[2026-03-05 08:36:27] (step=0065469) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.809430639796517, LR: 0.0003 +[2026-03-05 08:36:35] (step=0065470) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 12.809626296223831, LR: 0.0003 +[2026-03-05 08:36:43] (step=0065471) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.809821952651145, LR: 0.0003 +[2026-03-05 08:36:50] (step=0065472) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.810017609078459, LR: 0.0003 +[2026-03-05 08:36:58] (step=0065473) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.810213265505771, LR: 0.0003 +[2026-03-05 08:37:06] (step=0065474) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.810408921933085, LR: 0.0003 +[2026-03-05 08:37:14] (step=0065475) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 12.8106045783604, LR: 0.0003 +[2026-03-05 08:37:22] (step=0065476) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 12.810800234787713, LR: 0.0003 +[2026-03-05 08:37:30] (step=0065477) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.810995891215027, LR: 0.0003 +[2026-03-05 08:37:38] (step=0065478) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.81119154764234, LR: 0.0003 +[2026-03-05 08:37:45] (step=0065479) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 12.811387204069653, LR: 0.0003 +[2026-03-05 08:37:53] (step=0065480) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.811582860496967, LR: 0.0003 +[2026-03-05 08:38:01] (step=0065481) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.811778516924281, LR: 0.0003 +[2026-03-05 08:38:09] (step=0065482) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.811974173351594, LR: 0.0003 +[2026-03-05 08:38:17] (step=0065483) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.812169829778908, LR: 0.0003 +[2026-03-05 08:38:25] (step=0065484) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.812365486206222, LR: 0.0003 +[2026-03-05 08:38:32] (step=0065485) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.812561142633536, LR: 0.0003 +[2026-03-05 08:38:40] (step=0065486) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 12.81275679906085, LR: 0.0003 +[2026-03-05 08:38:48] (step=0065487) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.812952455488162, LR: 0.0003 +[2026-03-05 08:38:56] (step=0065488) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.813148111915476, LR: 0.0003 +[2026-03-05 08:39:04] (step=0065489) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 12.81334376834279, LR: 0.0003 +[2026-03-05 08:39:12] (step=0065490) Train Loss: 0.4452, Train Steps/Sec: 0.12, Epoch: 12.813539424770104, LR: 0.0003 +[2026-03-05 08:39:20] (step=0065491) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 12.813735081197418, LR: 0.0003 +[2026-03-05 08:39:28] (step=0065492) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.81393073762473, LR: 0.0003 +[2026-03-05 08:39:35] (step=0065493) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.814126394052044, LR: 0.0003 +[2026-03-05 08:39:43] (step=0065494) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 12.814322050479358, LR: 0.0003 +[2026-03-05 08:39:51] (step=0065495) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 12.814517706906672, LR: 0.0003 +[2026-03-05 08:39:59] (step=0065496) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.814713363333986, LR: 0.0003 +[2026-03-05 08:40:07] (step=0065497) Train Loss: 0.4222, Train Steps/Sec: 0.13, Epoch: 12.814909019761298, LR: 0.0003 +[2026-03-05 08:40:15] (step=0065498) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 12.815104676188612, LR: 0.0003 +[2026-03-05 08:40:23] (step=0065499) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.815300332615926, LR: 0.0003 +[2026-03-05 08:40:30] (step=0065500) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.81549598904324, LR: 0.0003 +[2026-03-05 08:40:30] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0065500/ +[2026-03-05 08:40:38] (step=0065501) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.815691645470555, LR: 0.0003 +[2026-03-05 08:40:46] (step=0065502) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.815887301897867, LR: 0.0003 +[2026-03-05 08:40:54] (step=0065503) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.81608295832518, LR: 0.0003 +[2026-03-05 08:41:02] (step=0065504) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.816278614752495, LR: 0.0003 +[2026-03-05 08:41:10] (step=0065505) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.816474271179809, LR: 0.0003 +[2026-03-05 08:41:17] (step=0065506) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.816669927607123, LR: 0.0003 +[2026-03-05 08:41:25] (step=0065507) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.816865584034435, LR: 0.0003 +[2026-03-05 08:41:33] (step=0065508) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.817061240461749, LR: 0.0003 +[2026-03-05 08:41:41] (step=0065509) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 12.817256896889063, LR: 0.0003 +[2026-03-05 08:41:49] (step=0065510) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.817452553316377, LR: 0.0003 +[2026-03-05 08:41:57] (step=0065511) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.81764820974369, LR: 0.0003 +[2026-03-05 08:42:05] (step=0065512) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.817843866171003, LR: 0.0003 +[2026-03-05 08:42:12] (step=0065513) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.818039522598317, LR: 0.0003 +[2026-03-05 08:42:20] (step=0065514) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.818235179025631, LR: 0.0003 +[2026-03-05 08:42:28] (step=0065515) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 12.818430835452945, LR: 0.0003 +[2026-03-05 08:42:36] (step=0065516) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.818626491880257, LR: 0.0003 +[2026-03-05 08:42:44] (step=0065517) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 12.818822148307571, LR: 0.0003 +[2026-03-05 08:42:52] (step=0065518) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.819017804734886, LR: 0.0003 +[2026-03-05 08:42:59] (step=0065519) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.8192134611622, LR: 0.0003 +[2026-03-05 08:43:07] (step=0065520) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.819409117589514, LR: 0.0003 +[2026-03-05 08:43:15] (step=0065521) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.819604774016826, LR: 0.0003 +[2026-03-05 08:43:23] (step=0065522) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.81980043044414, LR: 0.0003 +[2026-03-05 08:43:31] (step=0065523) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.819996086871454, LR: 0.0003 +[2026-03-05 08:43:39] (step=0065524) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 12.820191743298768, LR: 0.0003 +[2026-03-05 08:43:47] (step=0065525) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 12.820387399726082, LR: 0.0003 +[2026-03-05 08:43:55] (step=0065526) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 12.820583056153394, LR: 0.0003 +[2026-03-05 08:44:02] (step=0065527) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.820778712580708, LR: 0.0003 +[2026-03-05 08:44:10] (step=0065528) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.820974369008022, LR: 0.0003 +[2026-03-05 08:44:18] (step=0065529) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 12.821170025435336, LR: 0.0003 +[2026-03-05 08:44:26] (step=0065530) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.82136568186265, LR: 0.0003 +[2026-03-05 08:44:34] (step=0065531) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.821561338289962, LR: 0.0003 +[2026-03-05 08:44:42] (step=0065532) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.821756994717276, LR: 0.0003 +[2026-03-05 08:44:50] (step=0065533) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.82195265114459, LR: 0.0003 +[2026-03-05 08:44:57] (step=0065534) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 12.822148307571904, LR: 0.0003 +[2026-03-05 08:45:05] (step=0065535) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.822343963999216, LR: 0.0003 +[2026-03-05 08:45:13] (step=0065536) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.82253962042653, LR: 0.0003 +[2026-03-05 08:45:21] (step=0065537) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.822735276853845, LR: 0.0003 +[2026-03-05 08:45:29] (step=0065538) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.822930933281159, LR: 0.0003 +[2026-03-05 08:45:37] (step=0065539) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.823126589708473, LR: 0.0003 +[2026-03-05 08:45:45] (step=0065540) Train Loss: 0.4235, Train Steps/Sec: 0.12, Epoch: 12.823322246135785, LR: 0.0003 +[2026-03-05 08:45:53] (step=0065541) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.823517902563099, LR: 0.0003 +[2026-03-05 08:46:00] (step=0065542) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 12.823713558990413, LR: 0.0003 +[2026-03-05 08:46:08] (step=0065543) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.823909215417727, LR: 0.0003 +[2026-03-05 08:46:16] (step=0065544) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.82410487184504, LR: 0.0003 +[2026-03-05 08:46:24] (step=0065545) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 12.824300528272353, LR: 0.0003 +[2026-03-05 08:46:32] (step=0065546) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.824496184699667, LR: 0.0003 +[2026-03-05 08:46:40] (step=0065547) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.824691841126981, LR: 0.0003 +[2026-03-05 08:46:47] (step=0065548) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 12.824887497554295, LR: 0.0003 +[2026-03-05 08:46:55] (step=0065549) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 12.825083153981609, LR: 0.0003 +[2026-03-05 08:47:03] (step=0065550) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.825278810408921, LR: 0.0003 +[2026-03-05 08:47:11] (step=0065551) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.825474466836235, LR: 0.0003 +[2026-03-05 08:47:19] (step=0065552) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.82567012326355, LR: 0.0003 +[2026-03-05 08:47:27] (step=0065553) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.825865779690863, LR: 0.0003 +[2026-03-05 08:47:35] (step=0065554) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.826061436118177, LR: 0.0003 +[2026-03-05 08:47:42] (step=0065555) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.82625709254549, LR: 0.0003 +[2026-03-05 08:47:50] (step=0065556) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.826452748972804, LR: 0.0003 +[2026-03-05 08:47:58] (step=0065557) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.826648405400118, LR: 0.0003 +[2026-03-05 08:48:06] (step=0065558) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.826844061827432, LR: 0.0003 +[2026-03-05 08:48:14] (step=0065559) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 12.827039718254746, LR: 0.0003 +[2026-03-05 08:48:22] (step=0065560) Train Loss: 0.4251, Train Steps/Sec: 0.13, Epoch: 12.827235374682058, LR: 0.0003 +[2026-03-05 08:48:29] (step=0065561) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.827431031109372, LR: 0.0003 +[2026-03-05 08:48:37] (step=0065562) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.827626687536686, LR: 0.0003 +[2026-03-05 08:48:45] (step=0065563) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.827822343964, LR: 0.0003 +[2026-03-05 08:48:53] (step=0065564) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.828018000391312, LR: 0.0003 +[2026-03-05 08:49:01] (step=0065565) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.828213656818626, LR: 0.0003 +[2026-03-05 08:49:09] (step=0065566) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 12.82840931324594, LR: 0.0003 +[2026-03-05 08:49:17] (step=0065567) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.828604969673254, LR: 0.0003 +[2026-03-05 08:49:24] (step=0065568) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.828800626100568, LR: 0.0003 +[2026-03-05 08:49:32] (step=0065569) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.82899628252788, LR: 0.0003 +[2026-03-05 08:49:40] (step=0065570) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.829191938955194, LR: 0.0003 +[2026-03-05 08:49:48] (step=0065571) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.829387595382508, LR: 0.0003 +[2026-03-05 08:49:56] (step=0065572) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.829583251809822, LR: 0.0003 +[2026-03-05 08:50:04] (step=0065573) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.829778908237136, LR: 0.0003 +[2026-03-05 08:50:12] (step=0065574) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.829974564664449, LR: 0.0003 +[2026-03-05 08:50:20] (step=0065575) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.830170221091763, LR: 0.0003 +[2026-03-05 08:50:27] (step=0065576) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.830365877519077, LR: 0.0003 +[2026-03-05 08:50:35] (step=0065577) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 12.83056153394639, LR: 0.0003 +[2026-03-05 08:50:43] (step=0065578) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.830757190373705, LR: 0.0003 +[2026-03-05 08:50:51] (step=0065579) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.830952846801017, LR: 0.0003 +[2026-03-05 08:50:59] (step=0065580) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 12.83114850322833, LR: 0.0003 +[2026-03-05 08:51:07] (step=0065581) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.831344159655645, LR: 0.0003 +[2026-03-05 08:51:15] (step=0065582) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.831539816082959, LR: 0.0003 +[2026-03-05 08:51:22] (step=0065583) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.831735472510273, LR: 0.0003 +[2026-03-05 08:51:30] (step=0065584) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.831931128937585, LR: 0.0003 +[2026-03-05 08:51:38] (step=0065585) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.832126785364899, LR: 0.0003 +[2026-03-05 08:51:46] (step=0065586) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.832322441792213, LR: 0.0003 +[2026-03-05 08:51:54] (step=0065587) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.832518098219527, LR: 0.0003 +[2026-03-05 08:52:02] (step=0065588) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.83271375464684, LR: 0.0003 +[2026-03-05 08:52:09] (step=0065589) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.832909411074153, LR: 0.0003 +[2026-03-05 08:52:17] (step=0065590) Train Loss: 0.4532, Train Steps/Sec: 0.12, Epoch: 12.833105067501467, LR: 0.0003 +[2026-03-05 08:52:25] (step=0065591) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.833300723928781, LR: 0.0003 +[2026-03-05 08:52:33] (step=0065592) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.833496380356095, LR: 0.0003 +[2026-03-05 08:52:41] (step=0065593) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.833692036783408, LR: 0.0003 +[2026-03-05 08:52:49] (step=0065594) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.833887693210722, LR: 0.0003 +[2026-03-05 08:52:57] (step=0065595) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.834083349638036, LR: 0.0003 +[2026-03-05 08:53:05] (step=0065596) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.83427900606535, LR: 0.0003 +[2026-03-05 08:53:12] (step=0065597) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.834474662492664, LR: 0.0003 +[2026-03-05 08:53:20] (step=0065598) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.834670318919976, LR: 0.0003 +[2026-03-05 08:53:28] (step=0065599) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.83486597534729, LR: 0.0003 +[2026-03-05 08:53:36] (step=0065600) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.835061631774604, LR: 0.0003 +[2026-03-05 08:53:44] (step=0065601) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.835257288201918, LR: 0.0003 +[2026-03-05 08:53:52] (step=0065602) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.835452944629232, LR: 0.0003 +[2026-03-05 08:53:59] (step=0065603) Train Loss: 0.4259, Train Steps/Sec: 0.13, Epoch: 12.835648601056544, LR: 0.0003 +[2026-03-05 08:54:07] (step=0065604) Train Loss: 0.4233, Train Steps/Sec: 0.13, Epoch: 12.835844257483858, LR: 0.0003 +[2026-03-05 08:54:15] (step=0065605) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.836039913911172, LR: 0.0003 +[2026-03-05 08:54:23] (step=0065606) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.836235570338486, LR: 0.0003 +[2026-03-05 08:54:31] (step=0065607) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.8364312267658, LR: 0.0003 +[2026-03-05 08:54:39] (step=0065608) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.836626883193112, LR: 0.0003 +[2026-03-05 08:54:46] (step=0065609) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.836822539620426, LR: 0.0003 +[2026-03-05 08:54:54] (step=0065610) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.83701819604774, LR: 0.0003 +[2026-03-05 08:55:02] (step=0065611) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.837213852475054, LR: 0.0003 +[2026-03-05 08:55:10] (step=0065612) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.837409508902368, LR: 0.0003 +[2026-03-05 08:55:18] (step=0065613) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.83760516532968, LR: 0.0003 +[2026-03-05 08:55:26] (step=0065614) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.837800821756995, LR: 0.0003 +[2026-03-05 08:55:34] (step=0065615) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 12.837996478184309, LR: 0.0003 +[2026-03-05 08:55:41] (step=0065616) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.838192134611623, LR: 0.0003 +[2026-03-05 08:55:49] (step=0065617) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.838387791038935, LR: 0.0003 +[2026-03-05 08:55:57] (step=0065618) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.838583447466249, LR: 0.0003 +[2026-03-05 08:56:05] (step=0065619) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.838779103893563, LR: 0.0003 +[2026-03-05 08:56:13] (step=0065620) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.838974760320877, LR: 0.0003 +[2026-03-05 08:56:21] (step=0065621) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.83917041674819, LR: 0.0003 +[2026-03-05 08:56:29] (step=0065622) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 12.839366073175503, LR: 0.0003 +[2026-03-05 08:56:36] (step=0065623) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.839561729602817, LR: 0.0003 +[2026-03-05 08:56:44] (step=0065624) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.839757386030131, LR: 0.0003 +[2026-03-05 08:56:52] (step=0065625) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.839953042457445, LR: 0.0003 +[2026-03-05 08:57:00] (step=0065626) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.840148698884759, LR: 0.0003 +[2026-03-05 08:57:08] (step=0065627) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.840344355312071, LR: 0.0003 +[2026-03-05 08:57:16] (step=0065628) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.840540011739385, LR: 0.0003 +[2026-03-05 08:57:24] (step=0065629) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.8407356681667, LR: 0.0003 +[2026-03-05 08:57:31] (step=0065630) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 12.840931324594013, LR: 0.0003 +[2026-03-05 08:57:39] (step=0065631) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.841126981021327, LR: 0.0003 +[2026-03-05 08:57:47] (step=0065632) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.84132263744864, LR: 0.0003 +[2026-03-05 08:57:55] (step=0065633) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.841518293875954, LR: 0.0003 +[2026-03-05 08:58:03] (step=0065634) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.841713950303268, LR: 0.0003 +[2026-03-05 08:58:11] (step=0065635) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 12.841909606730582, LR: 0.0003 +[2026-03-05 08:58:18] (step=0065636) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 12.842105263157896, LR: 0.0003 +[2026-03-05 08:58:26] (step=0065637) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.842300919585208, LR: 0.0003 +[2026-03-05 08:58:34] (step=0065638) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.842496576012522, LR: 0.0003 +[2026-03-05 08:58:42] (step=0065639) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.842692232439836, LR: 0.0003 +[2026-03-05 08:58:50] (step=0065640) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.84288788886715, LR: 0.0003 +[2026-03-05 08:58:58] (step=0065641) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.843083545294462, LR: 0.0003 +[2026-03-05 08:59:06] (step=0065642) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.843279201721776, LR: 0.0003 +[2026-03-05 08:59:14] (step=0065643) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.84347485814909, LR: 0.0003 +[2026-03-05 08:59:21] (step=0065644) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.843670514576404, LR: 0.0003 +[2026-03-05 08:59:29] (step=0065645) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.843866171003718, LR: 0.0003 +[2026-03-05 08:59:37] (step=0065646) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.84406182743103, LR: 0.0003 +[2026-03-05 08:59:45] (step=0065647) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.844257483858344, LR: 0.0003 +[2026-03-05 08:59:53] (step=0065648) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 12.844453140285658, LR: 0.0003 +[2026-03-05 09:00:01] (step=0065649) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.844648796712972, LR: 0.0003 +[2026-03-05 09:00:09] (step=0065650) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.844844453140286, LR: 0.0003 +[2026-03-05 09:00:16] (step=0065651) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.845040109567599, LR: 0.0003 +[2026-03-05 09:00:24] (step=0065652) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 12.845235765994913, LR: 0.0003 +[2026-03-05 09:00:32] (step=0065653) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.845431422422227, LR: 0.0003 +[2026-03-05 09:00:40] (step=0065654) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.84562707884954, LR: 0.0003 +[2026-03-05 09:00:48] (step=0065655) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.845822735276855, LR: 0.0003 +[2026-03-05 09:00:56] (step=0065656) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 12.846018391704167, LR: 0.0003 +[2026-03-05 09:01:03] (step=0065657) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 12.84621404813148, LR: 0.0003 +[2026-03-05 09:01:11] (step=0065658) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 12.846409704558795, LR: 0.0003 +[2026-03-05 09:01:19] (step=0065659) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 12.846605360986109, LR: 0.0003 +[2026-03-05 09:01:27] (step=0065660) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 12.846801017413423, LR: 0.0003 +[2026-03-05 09:01:35] (step=0065661) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 12.846996673840735, LR: 0.0003 +[2026-03-05 09:01:43] (step=0065662) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.847192330268049, LR: 0.0003 +[2026-03-05 09:01:50] (step=0065663) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.847387986695363, LR: 0.0003 +[2026-03-05 09:01:58] (step=0065664) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.847583643122677, LR: 0.0003 +[2026-03-05 09:02:06] (step=0065665) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.847779299549991, LR: 0.0003 +[2026-03-05 09:02:14] (step=0065666) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 12.847974955977303, LR: 0.0003 +[2026-03-05 09:02:22] (step=0065667) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.848170612404617, LR: 0.0003 +[2026-03-05 09:02:30] (step=0065668) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 12.848366268831931, LR: 0.0003 +[2026-03-05 09:02:38] (step=0065669) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.848561925259245, LR: 0.0003 +[2026-03-05 09:02:46] (step=0065670) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.848757581686558, LR: 0.0003 +[2026-03-05 09:02:53] (step=0065671) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.848953238113872, LR: 0.0003 +[2026-03-05 09:03:01] (step=0065672) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.849148894541186, LR: 0.0003 +[2026-03-05 09:03:09] (step=0065673) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.8493445509685, LR: 0.0003 +[2026-03-05 09:03:17] (step=0065674) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.849540207395814, LR: 0.0003 +[2026-03-05 09:03:25] (step=0065675) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.849735863823126, LR: 0.0003 +[2026-03-05 09:03:33] (step=0065676) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.84993152025044, LR: 0.0003 +[2026-03-05 09:03:40] (step=0065677) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.850127176677754, LR: 0.0003 +[2026-03-05 09:03:48] (step=0065678) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.850322833105068, LR: 0.0003 +[2026-03-05 09:03:56] (step=0065679) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 12.850518489532382, LR: 0.0003 +[2026-03-05 09:04:04] (step=0065680) Train Loss: 0.4703, Train Steps/Sec: 0.13, Epoch: 12.850714145959694, LR: 0.0003 +[2026-03-05 09:04:12] (step=0065681) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.850909802387008, LR: 0.0003 +[2026-03-05 09:04:20] (step=0065682) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.851105458814322, LR: 0.0003 +[2026-03-05 09:04:27] (step=0065683) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.851301115241636, LR: 0.0003 +[2026-03-05 09:04:35] (step=0065684) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.85149677166895, LR: 0.0003 +[2026-03-05 09:04:43] (step=0065685) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.851692428096262, LR: 0.0003 +[2026-03-05 09:04:51] (step=0065686) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.851888084523576, LR: 0.0003 +[2026-03-05 09:04:59] (step=0065687) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.85208374095089, LR: 0.0003 +[2026-03-05 09:05:07] (step=0065688) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.852279397378204, LR: 0.0003 +[2026-03-05 09:05:15] (step=0065689) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.852475053805518, LR: 0.0003 +[2026-03-05 09:05:22] (step=0065690) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.85267071023283, LR: 0.0003 +[2026-03-05 09:05:30] (step=0065691) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 12.852866366660145, LR: 0.0003 +[2026-03-05 09:05:38] (step=0065692) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.853062023087459, LR: 0.0003 +[2026-03-05 09:05:46] (step=0065693) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.853257679514773, LR: 0.0003 +[2026-03-05 09:05:54] (step=0065694) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.853453335942085, LR: 0.0003 +[2026-03-05 09:06:02] (step=0065695) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 12.853648992369399, LR: 0.0003 +[2026-03-05 09:06:09] (step=0065696) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.853844648796713, LR: 0.0003 +[2026-03-05 09:06:17] (step=0065697) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.854040305224027, LR: 0.0003 +[2026-03-05 09:06:25] (step=0065698) Train Loss: 0.4257, Train Steps/Sec: 0.13, Epoch: 12.85423596165134, LR: 0.0003 +[2026-03-05 09:06:33] (step=0065699) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.854431618078653, LR: 0.0003 +[2026-03-05 09:06:41] (step=0065700) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.854627274505967, LR: 0.0003 +[2026-03-05 09:06:49] (step=0065701) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.854822930933281, LR: 0.0003 +[2026-03-05 09:06:57] (step=0065702) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.855018587360595, LR: 0.0003 +[2026-03-05 09:07:04] (step=0065703) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.85521424378791, LR: 0.0003 +[2026-03-05 09:07:12] (step=0065704) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.855409900215221, LR: 0.0003 +[2026-03-05 09:07:20] (step=0065705) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.855605556642535, LR: 0.0003 +[2026-03-05 09:07:28] (step=0065706) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.85580121306985, LR: 0.0003 +[2026-03-05 09:07:36] (step=0065707) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 12.855996869497163, LR: 0.0003 +[2026-03-05 09:07:44] (step=0065708) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.856192525924477, LR: 0.0003 +[2026-03-05 09:07:52] (step=0065709) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.85638818235179, LR: 0.0003 +[2026-03-05 09:07:59] (step=0065710) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.856583838779104, LR: 0.0003 +[2026-03-05 09:08:07] (step=0065711) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 12.856779495206418, LR: 0.0003 +[2026-03-05 09:08:15] (step=0065712) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.856975151633732, LR: 0.0003 +[2026-03-05 09:08:23] (step=0065713) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.857170808061046, LR: 0.0003 +[2026-03-05 09:08:31] (step=0065714) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.857366464488358, LR: 0.0003 +[2026-03-05 09:08:39] (step=0065715) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.857562120915672, LR: 0.0003 +[2026-03-05 09:08:46] (step=0065716) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 12.857757777342986, LR: 0.0003 +[2026-03-05 09:08:54] (step=0065717) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.8579534337703, LR: 0.0003 +[2026-03-05 09:09:02] (step=0065718) Train Loss: 0.4276, Train Steps/Sec: 0.13, Epoch: 12.858149090197614, LR: 0.0003 +[2026-03-05 09:09:10] (step=0065719) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.858344746624926, LR: 0.0003 +[2026-03-05 09:09:18] (step=0065720) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.85854040305224, LR: 0.0003 +[2026-03-05 09:09:26] (step=0065721) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 12.858736059479554, LR: 0.0003 +[2026-03-05 09:09:34] (step=0065722) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.858931715906868, LR: 0.0003 +[2026-03-05 09:09:42] (step=0065723) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.85912737233418, LR: 0.0003 +[2026-03-05 09:09:49] (step=0065724) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.859323028761494, LR: 0.0003 +[2026-03-05 09:09:57] (step=0065725) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 12.859518685188808, LR: 0.0003 +[2026-03-05 09:10:05] (step=0065726) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.859714341616122, LR: 0.0003 +[2026-03-05 09:10:13] (step=0065727) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.859909998043436, LR: 0.0003 +[2026-03-05 09:10:21] (step=0065728) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 12.860105654470749, LR: 0.0003 +[2026-03-05 09:10:29] (step=0065729) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 12.860301310898063, LR: 0.0003 +[2026-03-05 09:10:37] (step=0065730) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 12.860496967325377, LR: 0.0003 +[2026-03-05 09:10:44] (step=0065731) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.86069262375269, LR: 0.0003 +[2026-03-05 09:10:52] (step=0065732) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 12.860888280180005, LR: 0.0003 +[2026-03-05 09:11:00] (step=0065733) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.861083936607317, LR: 0.0003 +[2026-03-05 09:11:08] (step=0065734) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 12.86127959303463, LR: 0.0003 +[2026-03-05 09:11:16] (step=0065735) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 12.861475249461945, LR: 0.0003 +[2026-03-05 09:11:24] (step=0065736) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 12.861670905889259, LR: 0.0003 +[2026-03-05 09:11:32] (step=0065737) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.861866562316573, LR: 0.0003 +[2026-03-05 09:11:39] (step=0065738) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 12.862062218743885, LR: 0.0003 +[2026-03-05 09:11:47] (step=0065739) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.8622578751712, LR: 0.0003 +[2026-03-05 09:11:55] (step=0065740) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.862453531598513, LR: 0.0003 +[2026-03-05 09:12:03] (step=0065741) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.862649188025827, LR: 0.0003 +[2026-03-05 09:12:11] (step=0065742) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 12.862844844453141, LR: 0.0003 +[2026-03-05 09:12:19] (step=0065743) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.863040500880453, LR: 0.0003 +[2026-03-05 09:12:26] (step=0065744) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 12.863236157307767, LR: 0.0003 +[2026-03-05 09:12:34] (step=0065745) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.863431813735081, LR: 0.0003 +[2026-03-05 09:12:42] (step=0065746) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.863627470162395, LR: 0.0003 +[2026-03-05 09:12:50] (step=0065747) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.863823126589708, LR: 0.0003 +[2026-03-05 09:12:58] (step=0065748) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 12.864018783017022, LR: 0.0003 +[2026-03-05 09:13:06] (step=0065749) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.864214439444336, LR: 0.0003 +[2026-03-05 09:13:13] (step=0065750) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 12.86441009587165, LR: 0.0003 +[2026-03-05 09:13:21] (step=0065751) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.864605752298964, LR: 0.0003 +[2026-03-05 09:13:29] (step=0065752) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 12.864801408726276, LR: 0.0003 +[2026-03-05 09:13:37] (step=0065753) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.86499706515359, LR: 0.0003 +[2026-03-05 09:13:45] (step=0065754) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.865192721580904, LR: 0.0003 +[2026-03-05 09:13:53] (step=0065755) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.865388378008218, LR: 0.0003 +[2026-03-05 09:14:01] (step=0065756) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.865584034435532, LR: 0.0003 +[2026-03-05 09:14:08] (step=0065757) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.865779690862844, LR: 0.0003 +[2026-03-05 09:14:16] (step=0065758) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.865975347290158, LR: 0.0003 +[2026-03-05 09:14:24] (step=0065759) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.866171003717472, LR: 0.0003 +[2026-03-05 09:14:32] (step=0065760) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.866366660144786, LR: 0.0003 +[2026-03-05 09:14:40] (step=0065761) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 12.8665623165721, LR: 0.0003 +[2026-03-05 09:14:48] (step=0065762) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.866757972999412, LR: 0.0003 +[2026-03-05 09:14:55] (step=0065763) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.866953629426726, LR: 0.0003 +[2026-03-05 09:15:03] (step=0065764) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.86714928585404, LR: 0.0003 +[2026-03-05 09:15:11] (step=0065765) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.867344942281354, LR: 0.0003 +[2026-03-05 09:15:19] (step=0065766) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.867540598708668, LR: 0.0003 +[2026-03-05 09:15:27] (step=0065767) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 12.86773625513598, LR: 0.0003 +[2026-03-05 09:15:35] (step=0065768) Train Loss: 0.4273, Train Steps/Sec: 0.13, Epoch: 12.867931911563295, LR: 0.0003 +[2026-03-05 09:15:42] (step=0065769) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 12.868127567990609, LR: 0.0003 +[2026-03-05 09:15:50] (step=0065770) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.868323224417923, LR: 0.0003 +[2026-03-05 09:15:58] (step=0065771) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.868518880845237, LR: 0.0003 +[2026-03-05 09:16:06] (step=0065772) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.868714537272549, LR: 0.0003 +[2026-03-05 09:16:14] (step=0065773) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.868910193699863, LR: 0.0003 +[2026-03-05 09:16:22] (step=0065774) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.869105850127177, LR: 0.0003 +[2026-03-05 09:16:30] (step=0065775) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.869301506554491, LR: 0.0003 +[2026-03-05 09:16:38] (step=0065776) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.869497162981803, LR: 0.0003 +[2026-03-05 09:16:45] (step=0065777) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.869692819409117, LR: 0.0003 +[2026-03-05 09:16:53] (step=0065778) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.869888475836431, LR: 0.0003 +[2026-03-05 09:17:01] (step=0065779) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.870084132263745, LR: 0.0003 +[2026-03-05 09:17:09] (step=0065780) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.87027978869106, LR: 0.0003 +[2026-03-05 09:17:17] (step=0065781) Train Loss: 0.4453, Train Steps/Sec: 0.12, Epoch: 12.870475445118371, LR: 0.0003 +[2026-03-05 09:17:25] (step=0065782) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 12.870671101545685, LR: 0.0003 +[2026-03-05 09:17:33] (step=0065783) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.870866757973, LR: 0.0003 +[2026-03-05 09:17:41] (step=0065784) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 12.871062414400313, LR: 0.0003 +[2026-03-05 09:17:48] (step=0065785) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 12.871258070827627, LR: 0.0003 +[2026-03-05 09:17:56] (step=0065786) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.87145372725494, LR: 0.0003 +[2026-03-05 09:18:04] (step=0065787) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.871649383682254, LR: 0.0003 +[2026-03-05 09:18:12] (step=0065788) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 12.871845040109568, LR: 0.0003 +[2026-03-05 09:18:20] (step=0065789) Train Loss: 0.4226, Train Steps/Sec: 0.13, Epoch: 12.872040696536882, LR: 0.0003 +[2026-03-05 09:18:28] (step=0065790) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.872236352964196, LR: 0.0003 +[2026-03-05 09:18:36] (step=0065791) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.872432009391508, LR: 0.0003 +[2026-03-05 09:18:43] (step=0065792) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.872627665818822, LR: 0.0003 +[2026-03-05 09:18:51] (step=0065793) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.872823322246136, LR: 0.0003 +[2026-03-05 09:18:59] (step=0065794) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.87301897867345, LR: 0.0003 +[2026-03-05 09:19:07] (step=0065795) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.873214635100764, LR: 0.0003 +[2026-03-05 09:19:15] (step=0065796) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.873410291528076, LR: 0.0003 +[2026-03-05 09:19:23] (step=0065797) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.87360594795539, LR: 0.0003 +[2026-03-05 09:19:31] (step=0065798) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.873801604382704, LR: 0.0003 +[2026-03-05 09:19:38] (step=0065799) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.873997260810018, LR: 0.0003 +[2026-03-05 09:19:46] (step=0065800) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.87419291723733, LR: 0.0003 +[2026-03-05 09:19:54] (step=0065801) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.874388573664644, LR: 0.0003 +[2026-03-05 09:20:02] (step=0065802) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.874584230091958, LR: 0.0003 +[2026-03-05 09:20:10] (step=0065803) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 12.874779886519272, LR: 0.0003 +[2026-03-05 09:20:18] (step=0065804) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.874975542946586, LR: 0.0003 +[2026-03-05 09:20:25] (step=0065805) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.875171199373899, LR: 0.0003 +[2026-03-05 09:20:33] (step=0065806) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.875366855801213, LR: 0.0003 +[2026-03-05 09:20:41] (step=0065807) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.875562512228527, LR: 0.0003 +[2026-03-05 09:20:49] (step=0065808) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.87575816865584, LR: 0.0003 +[2026-03-05 09:20:57] (step=0065809) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.875953825083155, LR: 0.0003 +[2026-03-05 09:21:05] (step=0065810) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.876149481510467, LR: 0.0003 +[2026-03-05 09:21:12] (step=0065811) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.876345137937781, LR: 0.0003 +[2026-03-05 09:21:20] (step=0065812) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.876540794365095, LR: 0.0003 +[2026-03-05 09:21:28] (step=0065813) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.876736450792409, LR: 0.0003 +[2026-03-05 09:21:36] (step=0065814) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 12.876932107219723, LR: 0.0003 +[2026-03-05 09:21:44] (step=0065815) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 12.877127763647035, LR: 0.0003 +[2026-03-05 09:21:52] (step=0065816) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 12.87732342007435, LR: 0.0003 +[2026-03-05 09:22:00] (step=0065817) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.877519076501663, LR: 0.0003 +[2026-03-05 09:22:07] (step=0065818) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.877714732928977, LR: 0.0003 +[2026-03-05 09:22:15] (step=0065819) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.877910389356291, LR: 0.0003 +[2026-03-05 09:22:23] (step=0065820) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.878106045783603, LR: 0.0003 +[2026-03-05 09:22:31] (step=0065821) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.878301702210917, LR: 0.0003 +[2026-03-05 09:22:39] (step=0065822) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.878497358638231, LR: 0.0003 +[2026-03-05 09:22:47] (step=0065823) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 12.878693015065545, LR: 0.0003 +[2026-03-05 09:22:55] (step=0065824) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.878888671492858, LR: 0.0003 +[2026-03-05 09:23:03] (step=0065825) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 12.879084327920172, LR: 0.0003 +[2026-03-05 09:23:10] (step=0065826) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.879279984347486, LR: 0.0003 +[2026-03-05 09:23:18] (step=0065827) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.8794756407748, LR: 0.0003 +[2026-03-05 09:23:26] (step=0065828) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.879671297202114, LR: 0.0003 +[2026-03-05 09:23:34] (step=0065829) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.879866953629426, LR: 0.0003 +[2026-03-05 09:23:42] (step=0065830) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.88006261005674, LR: 0.0003 +[2026-03-05 09:23:50] (step=0065831) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.880258266484054, LR: 0.0003 +[2026-03-05 09:23:58] (step=0065832) Train Loss: 0.4432, Train Steps/Sec: 0.12, Epoch: 12.880453922911368, LR: 0.0003 +[2026-03-05 09:24:05] (step=0065833) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.880649579338682, LR: 0.0003 +[2026-03-05 09:24:13] (step=0065834) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 12.880845235765994, LR: 0.0003 +[2026-03-05 09:24:21] (step=0065835) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.881040892193308, LR: 0.0003 +[2026-03-05 09:24:29] (step=0065836) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.881236548620622, LR: 0.0003 +[2026-03-05 09:24:37] (step=0065837) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.881432205047936, LR: 0.0003 +[2026-03-05 09:24:45] (step=0065838) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.88162786147525, LR: 0.0003 +[2026-03-05 09:24:53] (step=0065839) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.881823517902562, LR: 0.0003 +[2026-03-05 09:25:00] (step=0065840) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.882019174329876, LR: 0.0003 +[2026-03-05 09:25:08] (step=0065841) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.88221483075719, LR: 0.0003 +[2026-03-05 09:25:16] (step=0065842) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.882410487184504, LR: 0.0003 +[2026-03-05 09:25:24] (step=0065843) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.882606143611818, LR: 0.0003 +[2026-03-05 09:25:32] (step=0065844) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 12.88280180003913, LR: 0.0003 +[2026-03-05 09:25:40] (step=0065845) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.882997456466445, LR: 0.0003 +[2026-03-05 09:25:47] (step=0065846) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.883193112893759, LR: 0.0003 +[2026-03-05 09:25:55] (step=0065847) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 12.883388769321073, LR: 0.0003 +[2026-03-05 09:26:03] (step=0065848) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.883584425748387, LR: 0.0003 +[2026-03-05 09:26:11] (step=0065849) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.883780082175699, LR: 0.0003 +[2026-03-05 09:26:19] (step=0065850) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 12.883975738603013, LR: 0.0003 +[2026-03-05 09:26:27] (step=0065851) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.884171395030327, LR: 0.0003 +[2026-03-05 09:26:35] (step=0065852) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.884367051457641, LR: 0.0003 +[2026-03-05 09:26:42] (step=0065853) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 12.884562707884953, LR: 0.0003 +[2026-03-05 09:26:50] (step=0065854) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 12.884758364312267, LR: 0.0003 +[2026-03-05 09:26:58] (step=0065855) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 12.884954020739581, LR: 0.0003 +[2026-03-05 09:27:06] (step=0065856) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.885149677166895, LR: 0.0003 +[2026-03-05 09:27:14] (step=0065857) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.88534533359421, LR: 0.0003 +[2026-03-05 09:27:22] (step=0065858) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.885540990021521, LR: 0.0003 +[2026-03-05 09:27:30] (step=0065859) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.885736646448835, LR: 0.0003 +[2026-03-05 09:27:37] (step=0065860) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.88593230287615, LR: 0.0003 +[2026-03-05 09:27:45] (step=0065861) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.886127959303463, LR: 0.0003 +[2026-03-05 09:27:53] (step=0065862) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.886323615730777, LR: 0.0003 +[2026-03-05 09:28:01] (step=0065863) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.88651927215809, LR: 0.0003 +[2026-03-05 09:28:09] (step=0065864) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.886714928585404, LR: 0.0003 +[2026-03-05 09:28:17] (step=0065865) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.886910585012718, LR: 0.0003 +[2026-03-05 09:28:24] (step=0065866) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 12.887106241440032, LR: 0.0003 +[2026-03-05 09:28:32] (step=0065867) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 12.887301897867346, LR: 0.0003 +[2026-03-05 09:28:40] (step=0065868) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.887497554294658, LR: 0.0003 +[2026-03-05 09:28:48] (step=0065869) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.887693210721972, LR: 0.0003 +[2026-03-05 09:28:56] (step=0065870) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.887888867149286, LR: 0.0003 +[2026-03-05 09:29:04] (step=0065871) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 12.8880845235766, LR: 0.0003 +[2026-03-05 09:29:12] (step=0065872) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.888280180003914, LR: 0.0003 +[2026-03-05 09:29:20] (step=0065873) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 12.888475836431226, LR: 0.0003 +[2026-03-05 09:29:27] (step=0065874) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.88867149285854, LR: 0.0003 +[2026-03-05 09:29:35] (step=0065875) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.888867149285854, LR: 0.0003 +[2026-03-05 09:29:43] (step=0065876) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.889062805713168, LR: 0.0003 +[2026-03-05 09:29:51] (step=0065877) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 12.88925846214048, LR: 0.0003 +[2026-03-05 09:29:59] (step=0065878) Train Loss: 0.4378, Train Steps/Sec: 0.12, Epoch: 12.889454118567794, LR: 0.0003 +[2026-03-05 09:30:07] (step=0065879) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 12.889649774995108, LR: 0.0003 +[2026-03-05 09:30:15] (step=0065880) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 12.889845431422422, LR: 0.0003 +[2026-03-05 09:30:22] (step=0065881) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.890041087849736, LR: 0.0003 +[2026-03-05 09:30:30] (step=0065882) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.890236744277049, LR: 0.0003 +[2026-03-05 09:30:38] (step=0065883) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 12.890432400704363, LR: 0.0003 +[2026-03-05 09:30:46] (step=0065884) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 12.890628057131677, LR: 0.0003 +[2026-03-05 09:30:54] (step=0065885) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.89082371355899, LR: 0.0003 +[2026-03-05 09:31:02] (step=0065886) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.891019369986305, LR: 0.0003 +[2026-03-05 09:31:10] (step=0065887) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.891215026413617, LR: 0.0003 +[2026-03-05 09:31:17] (step=0065888) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 12.891410682840931, LR: 0.0003 +[2026-03-05 09:31:25] (step=0065889) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.891606339268245, LR: 0.0003 +[2026-03-05 09:31:33] (step=0065890) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.891801995695559, LR: 0.0003 +[2026-03-05 09:31:41] (step=0065891) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 12.891997652122873, LR: 0.0003 +[2026-03-05 09:31:49] (step=0065892) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.892193308550185, LR: 0.0003 +[2026-03-05 09:31:57] (step=0065893) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.8923889649775, LR: 0.0003 +[2026-03-05 09:32:05] (step=0065894) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 12.892584621404813, LR: 0.0003 +[2026-03-05 09:32:12] (step=0065895) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.892780277832127, LR: 0.0003 +[2026-03-05 09:32:20] (step=0065896) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.892975934259441, LR: 0.0003 +[2026-03-05 09:32:28] (step=0065897) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.893171590686753, LR: 0.0003 +[2026-03-05 09:32:36] (step=0065898) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 12.893367247114067, LR: 0.0003 +[2026-03-05 09:32:44] (step=0065899) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.893562903541381, LR: 0.0003 +[2026-03-05 09:32:52] (step=0065900) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 12.893758559968695, LR: 0.0003 +[2026-03-05 09:32:59] (step=0065901) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 12.89395421639601, LR: 0.0003 +[2026-03-05 09:33:07] (step=0065902) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.894149872823322, LR: 0.0003 +[2026-03-05 09:33:15] (step=0065903) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.894345529250636, LR: 0.0003 +[2026-03-05 09:33:23] (step=0065904) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.89454118567795, LR: 0.0003 +[2026-03-05 09:33:31] (step=0065905) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.894736842105264, LR: 0.0003 +[2026-03-05 09:33:39] (step=0065906) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.894932498532576, LR: 0.0003 +[2026-03-05 09:33:46] (step=0065907) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 12.89512815495989, LR: 0.0003 +[2026-03-05 09:33:54] (step=0065908) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.895323811387204, LR: 0.0003 +[2026-03-05 09:34:02] (step=0065909) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.895519467814518, LR: 0.0003 +[2026-03-05 09:34:10] (step=0065910) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.895715124241832, LR: 0.0003 +[2026-03-05 09:34:18] (step=0065911) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 12.895910780669144, LR: 0.0003 +[2026-03-05 09:34:26] (step=0065912) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.896106437096458, LR: 0.0003 +[2026-03-05 09:34:34] (step=0065913) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 12.896302093523772, LR: 0.0003 +[2026-03-05 09:34:41] (step=0065914) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.896497749951086, LR: 0.0003 +[2026-03-05 09:34:49] (step=0065915) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.8966934063784, LR: 0.0003 +[2026-03-05 09:34:57] (step=0065916) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.896889062805712, LR: 0.0003 +[2026-03-05 09:35:05] (step=0065917) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.897084719233026, LR: 0.0003 +[2026-03-05 09:35:13] (step=0065918) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.89728037566034, LR: 0.0003 +[2026-03-05 09:35:21] (step=0065919) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.897476032087654, LR: 0.0003 +[2026-03-05 09:35:29] (step=0065920) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.897671688514968, LR: 0.0003 +[2026-03-05 09:35:36] (step=0065921) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.89786734494228, LR: 0.0003 +[2026-03-05 09:35:44] (step=0065922) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 12.898063001369595, LR: 0.0003 +[2026-03-05 09:35:52] (step=0065923) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.898258657796909, LR: 0.0003 +[2026-03-05 09:36:00] (step=0065924) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 12.898454314224223, LR: 0.0003 +[2026-03-05 09:36:08] (step=0065925) Train Loss: 0.4409, Train Steps/Sec: 0.12, Epoch: 12.898649970651537, LR: 0.0003 +[2026-03-05 09:36:16] (step=0065926) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.898845627078849, LR: 0.0003 +[2026-03-05 09:36:24] (step=0065927) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.899041283506163, LR: 0.0003 +[2026-03-05 09:36:32] (step=0065928) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.899236939933477, LR: 0.0003 +[2026-03-05 09:36:39] (step=0065929) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.899432596360791, LR: 0.0003 +[2026-03-05 09:36:47] (step=0065930) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.899628252788103, LR: 0.0003 +[2026-03-05 09:36:55] (step=0065931) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.899823909215417, LR: 0.0003 +[2026-03-05 09:37:03] (step=0065932) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.900019565642731, LR: 0.0003 +[2026-03-05 09:37:11] (step=0065933) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.900215222070045, LR: 0.0003 +[2026-03-05 09:37:19] (step=0065934) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 12.90041087849736, LR: 0.0003 +[2026-03-05 09:37:27] (step=0065935) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 12.900606534924671, LR: 0.0003 +[2026-03-05 09:37:34] (step=0065936) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 12.900802191351985, LR: 0.0003 +[2026-03-05 09:37:42] (step=0065937) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.9009978477793, LR: 0.0003 +[2026-03-05 09:37:50] (step=0065938) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.901193504206613, LR: 0.0003 +[2026-03-05 09:37:58] (step=0065939) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 12.901389160633927, LR: 0.0003 +[2026-03-05 09:38:06] (step=0065940) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.90158481706124, LR: 0.0003 +[2026-03-05 09:38:14] (step=0065941) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 12.901780473488554, LR: 0.0003 +[2026-03-05 09:38:21] (step=0065942) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.901976129915868, LR: 0.0003 +[2026-03-05 09:38:29] (step=0065943) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.902171786343182, LR: 0.0003 +[2026-03-05 09:38:37] (step=0065944) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.902367442770496, LR: 0.0003 +[2026-03-05 09:38:45] (step=0065945) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 12.902563099197808, LR: 0.0003 +[2026-03-05 09:38:53] (step=0065946) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 12.902758755625122, LR: 0.0003 +[2026-03-05 09:39:01] (step=0065947) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 12.902954412052436, LR: 0.0003 +[2026-03-05 09:39:09] (step=0065948) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.90315006847975, LR: 0.0003 +[2026-03-05 09:39:16] (step=0065949) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.903345724907064, LR: 0.0003 +[2026-03-05 09:39:24] (step=0065950) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.903541381334376, LR: 0.0003 +[2026-03-05 09:39:32] (step=0065951) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.90373703776169, LR: 0.0003 +[2026-03-05 09:39:40] (step=0065952) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 12.903932694189004, LR: 0.0003 +[2026-03-05 09:39:48] (step=0065953) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.904128350616318, LR: 0.0003 +[2026-03-05 09:39:56] (step=0065954) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.904324007043632, LR: 0.0003 +[2026-03-05 09:40:03] (step=0065955) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.904519663470944, LR: 0.0003 +[2026-03-05 09:40:11] (step=0065956) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.904715319898258, LR: 0.0003 +[2026-03-05 09:40:19] (step=0065957) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.904910976325572, LR: 0.0003 +[2026-03-05 09:40:27] (step=0065958) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.905106632752886, LR: 0.0003 +[2026-03-05 09:40:35] (step=0065959) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.905302289180199, LR: 0.0003 +[2026-03-05 09:40:43] (step=0065960) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.905497945607513, LR: 0.0003 +[2026-03-05 09:40:51] (step=0065961) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.905693602034827, LR: 0.0003 +[2026-03-05 09:40:58] (step=0065962) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 12.90588925846214, LR: 0.0003 +[2026-03-05 09:41:06] (step=0065963) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.906084914889455, LR: 0.0003 +[2026-03-05 09:41:14] (step=0065964) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.906280571316767, LR: 0.0003 +[2026-03-05 09:41:22] (step=0065965) Train Loss: 0.4251, Train Steps/Sec: 0.13, Epoch: 12.906476227744081, LR: 0.0003 +[2026-03-05 09:41:30] (step=0065966) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 12.906671884171395, LR: 0.0003 +[2026-03-05 09:41:38] (step=0065967) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.906867540598709, LR: 0.0003 +[2026-03-05 09:41:46] (step=0065968) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 12.907063197026023, LR: 0.0003 +[2026-03-05 09:41:53] (step=0065969) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 12.907258853453335, LR: 0.0003 +[2026-03-05 09:42:01] (step=0065970) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.90745450988065, LR: 0.0003 +[2026-03-05 09:42:09] (step=0065971) Train Loss: 0.4342, Train Steps/Sec: 0.12, Epoch: 12.907650166307963, LR: 0.0003 +[2026-03-05 09:42:17] (step=0065972) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.907845822735277, LR: 0.0003 +[2026-03-05 09:42:25] (step=0065973) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 12.908041479162591, LR: 0.0003 +[2026-03-05 09:42:33] (step=0065974) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.908237135589903, LR: 0.0003 +[2026-03-05 09:42:41] (step=0065975) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.908432792017217, LR: 0.0003 +[2026-03-05 09:42:49] (step=0065976) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 12.908628448444532, LR: 0.0003 +[2026-03-05 09:42:56] (step=0065977) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.908824104871846, LR: 0.0003 +[2026-03-05 09:43:04] (step=0065978) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.90901976129916, LR: 0.0003 +[2026-03-05 09:43:12] (step=0065979) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 12.909215417726472, LR: 0.0003 +[2026-03-05 09:43:20] (step=0065980) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 12.909411074153786, LR: 0.0003 +[2026-03-05 09:43:28] (step=0065981) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.9096067305811, LR: 0.0003 +[2026-03-05 09:43:36] (step=0065982) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 12.909802387008414, LR: 0.0003 +[2026-03-05 09:43:44] (step=0065983) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 12.909998043435726, LR: 0.0003 +[2026-03-05 09:43:51] (step=0065984) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.91019369986304, LR: 0.0003 +[2026-03-05 09:43:59] (step=0065985) Train Loss: 0.4265, Train Steps/Sec: 0.13, Epoch: 12.910389356290354, LR: 0.0003 +[2026-03-05 09:44:07] (step=0065986) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.910585012717668, LR: 0.0003 +[2026-03-05 09:44:15] (step=0065987) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 12.910780669144982, LR: 0.0003 +[2026-03-05 09:44:23] (step=0065988) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.910976325572294, LR: 0.0003 +[2026-03-05 09:44:31] (step=0065989) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.911171981999608, LR: 0.0003 +[2026-03-05 09:44:39] (step=0065990) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.911367638426922, LR: 0.0003 +[2026-03-05 09:44:46] (step=0065991) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.911563294854236, LR: 0.0003 +[2026-03-05 09:44:54] (step=0065992) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.91175895128155, LR: 0.0003 +[2026-03-05 09:45:02] (step=0065993) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 12.911954607708862, LR: 0.0003 +[2026-03-05 09:45:10] (step=0065994) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 12.912150264136177, LR: 0.0003 +[2026-03-05 09:45:18] (step=0065995) Train Loss: 0.4605, Train Steps/Sec: 0.13, Epoch: 12.91234592056349, LR: 0.0003 +[2026-03-05 09:45:26] (step=0065996) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.912541576990805, LR: 0.0003 +[2026-03-05 09:45:33] (step=0065997) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.912737233418119, LR: 0.0003 +[2026-03-05 09:45:41] (step=0065998) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 12.91293288984543, LR: 0.0003 +[2026-03-05 09:45:49] (step=0065999) Train Loss: 0.4256, Train Steps/Sec: 0.13, Epoch: 12.913128546272745, LR: 0.0003 +[2026-03-05 09:45:57] (step=0066000) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.913324202700059, LR: 0.0003 +[2026-03-05 09:45:57] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0066000/ +[2026-03-05 09:46:05] (step=0066001) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.913519859127373, LR: 0.0003 +[2026-03-05 09:46:13] (step=0066002) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 12.913715515554687, LR: 0.0003 +[2026-03-05 09:46:21] (step=0066003) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.913911171981999, LR: 0.0003 +[2026-03-05 09:46:28] (step=0066004) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 12.914106828409313, LR: 0.0003 +[2026-03-05 09:46:36] (step=0066005) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.914302484836627, LR: 0.0003 +[2026-03-05 09:46:44] (step=0066006) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.914498141263941, LR: 0.0003 +[2026-03-05 09:46:52] (step=0066007) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.914693797691255, LR: 0.0003 +[2026-03-05 09:47:00] (step=0066008) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.914889454118567, LR: 0.0003 +[2026-03-05 09:47:08] (step=0066009) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.915085110545881, LR: 0.0003 +[2026-03-05 09:47:15] (step=0066010) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.915280766973195, LR: 0.0003 +[2026-03-05 09:47:23] (step=0066011) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.91547642340051, LR: 0.0003 +[2026-03-05 09:47:31] (step=0066012) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.915672079827822, LR: 0.0003 +[2026-03-05 09:47:39] (step=0066013) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.915867736255136, LR: 0.0003 +[2026-03-05 09:47:47] (step=0066014) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.91606339268245, LR: 0.0003 +[2026-03-05 09:47:55] (step=0066015) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 12.916259049109764, LR: 0.0003 +[2026-03-05 09:48:03] (step=0066016) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.916454705537078, LR: 0.0003 +[2026-03-05 09:48:11] (step=0066017) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 12.91665036196439, LR: 0.0003 +[2026-03-05 09:48:18] (step=0066018) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.916846018391704, LR: 0.0003 +[2026-03-05 09:48:26] (step=0066019) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.917041674819018, LR: 0.0003 +[2026-03-05 09:48:34] (step=0066020) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.917237331246332, LR: 0.0003 +[2026-03-05 09:48:42] (step=0066021) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.917432987673646, LR: 0.0003 +[2026-03-05 09:48:50] (step=0066022) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.917628644100958, LR: 0.0003 +[2026-03-05 09:48:58] (step=0066023) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.917824300528272, LR: 0.0003 +[2026-03-05 09:49:06] (step=0066024) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 12.918019956955586, LR: 0.0003 +[2026-03-05 09:49:13] (step=0066025) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.9182156133829, LR: 0.0003 +[2026-03-05 09:49:21] (step=0066026) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.918411269810214, LR: 0.0003 +[2026-03-05 09:49:29] (step=0066027) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.918606926237526, LR: 0.0003 +[2026-03-05 09:49:37] (step=0066028) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.91880258266484, LR: 0.0003 +[2026-03-05 09:49:45] (step=0066029) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 12.918998239092154, LR: 0.0003 +[2026-03-05 09:49:53] (step=0066030) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.919193895519468, LR: 0.0003 +[2026-03-05 09:50:01] (step=0066031) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.919389551946782, LR: 0.0003 +[2026-03-05 09:50:08] (step=0066032) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.919585208374095, LR: 0.0003 +[2026-03-05 09:50:16] (step=0066033) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.919780864801409, LR: 0.0003 +[2026-03-05 09:50:24] (step=0066034) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.919976521228723, LR: 0.0003 +[2026-03-05 09:50:32] (step=0066035) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.920172177656037, LR: 0.0003 +[2026-03-05 09:50:40] (step=0066036) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.920367834083349, LR: 0.0003 +[2026-03-05 09:50:48] (step=0066037) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.920563490510663, LR: 0.0003 +[2026-03-05 09:50:55] (step=0066038) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 12.920759146937977, LR: 0.0003 +[2026-03-05 09:51:03] (step=0066039) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 12.92095480336529, LR: 0.0003 +[2026-03-05 09:51:11] (step=0066040) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.921150459792605, LR: 0.0003 +[2026-03-05 09:51:19] (step=0066041) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.921346116219917, LR: 0.0003 +[2026-03-05 09:51:27] (step=0066042) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.921541772647231, LR: 0.0003 +[2026-03-05 09:51:35] (step=0066043) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.921737429074545, LR: 0.0003 +[2026-03-05 09:51:42] (step=0066044) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.921933085501859, LR: 0.0003 +[2026-03-05 09:51:50] (step=0066045) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.922128741929173, LR: 0.0003 +[2026-03-05 09:51:58] (step=0066046) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.922324398356485, LR: 0.0003 +[2026-03-05 09:52:06] (step=0066047) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 12.9225200547838, LR: 0.0003 +[2026-03-05 09:52:14] (step=0066048) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.922715711211113, LR: 0.0003 +[2026-03-05 09:52:22] (step=0066049) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 12.922911367638427, LR: 0.0003 +[2026-03-05 09:52:30] (step=0066050) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.923107024065741, LR: 0.0003 +[2026-03-05 09:52:37] (step=0066051) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.923302680493054, LR: 0.0003 +[2026-03-05 09:52:45] (step=0066052) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.923498336920368, LR: 0.0003 +[2026-03-05 09:52:53] (step=0066053) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.923693993347682, LR: 0.0003 +[2026-03-05 09:53:01] (step=0066054) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.923889649774996, LR: 0.0003 +[2026-03-05 09:53:09] (step=0066055) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.92408530620231, LR: 0.0003 +[2026-03-05 09:53:16] (step=0066056) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 12.924280962629622, LR: 0.0003 +[2026-03-05 09:53:24] (step=0066057) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 12.924476619056936, LR: 0.0003 +[2026-03-05 09:53:32] (step=0066058) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.92467227548425, LR: 0.0003 +[2026-03-05 09:53:40] (step=0066059) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.924867931911564, LR: 0.0003 +[2026-03-05 09:53:48] (step=0066060) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 12.925063588338878, LR: 0.0003 +[2026-03-05 09:53:56] (step=0066061) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 12.92525924476619, LR: 0.0003 +[2026-03-05 09:54:03] (step=0066062) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.925454901193504, LR: 0.0003 +[2026-03-05 09:54:11] (step=0066063) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.925650557620818, LR: 0.0003 +[2026-03-05 09:54:19] (step=0066064) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.925846214048132, LR: 0.0003 +[2026-03-05 09:54:27] (step=0066065) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.926041870475444, LR: 0.0003 +[2026-03-05 09:54:35] (step=0066066) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 12.926237526902758, LR: 0.0003 +[2026-03-05 09:54:43] (step=0066067) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 12.926433183330072, LR: 0.0003 +[2026-03-05 09:54:51] (step=0066068) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 12.926628839757386, LR: 0.0003 +[2026-03-05 09:54:58] (step=0066069) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 12.9268244961847, LR: 0.0003 +[2026-03-05 09:55:06] (step=0066070) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 12.927020152612013, LR: 0.0003 +[2026-03-05 09:55:14] (step=0066071) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.927215809039327, LR: 0.0003 +[2026-03-05 09:55:22] (step=0066072) Train Loss: 0.4217, Train Steps/Sec: 0.13, Epoch: 12.92741146546664, LR: 0.0003 +[2026-03-05 09:55:30] (step=0066073) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.927607121893955, LR: 0.0003 +[2026-03-05 09:55:38] (step=0066074) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.927802778321269, LR: 0.0003 +[2026-03-05 09:55:46] (step=0066075) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 12.92799843474858, LR: 0.0003 +[2026-03-05 09:55:53] (step=0066076) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 12.928194091175895, LR: 0.0003 +[2026-03-05 09:56:01] (step=0066077) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.928389747603209, LR: 0.0003 +[2026-03-05 09:56:09] (step=0066078) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.928585404030523, LR: 0.0003 +[2026-03-05 09:56:17] (step=0066079) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 12.928781060457837, LR: 0.0003 +[2026-03-05 09:56:25] (step=0066080) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.928976716885149, LR: 0.0003 +[2026-03-05 09:56:33] (step=0066081) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.929172373312463, LR: 0.0003 +[2026-03-05 09:56:41] (step=0066082) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 12.929368029739777, LR: 0.0003 +[2026-03-05 09:56:48] (step=0066083) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.929563686167091, LR: 0.0003 +[2026-03-05 09:56:56] (step=0066084) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 12.929759342594405, LR: 0.0003 +[2026-03-05 09:57:04] (step=0066085) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.929954999021717, LR: 0.0003 +[2026-03-05 09:57:12] (step=0066086) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 12.930150655449031, LR: 0.0003 +[2026-03-05 09:57:20] (step=0066087) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.930346311876345, LR: 0.0003 +[2026-03-05 09:57:28] (step=0066088) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.93054196830366, LR: 0.0003 +[2026-03-05 09:57:35] (step=0066089) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.930737624730972, LR: 0.0003 +[2026-03-05 09:57:43] (step=0066090) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.930933281158286, LR: 0.0003 +[2026-03-05 09:57:51] (step=0066091) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.9311289375856, LR: 0.0003 +[2026-03-05 09:57:59] (step=0066092) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.931324594012914, LR: 0.0003 +[2026-03-05 09:58:07] (step=0066093) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 12.931520250440228, LR: 0.0003 +[2026-03-05 09:58:15] (step=0066094) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.93171590686754, LR: 0.0003 +[2026-03-05 09:58:23] (step=0066095) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 12.931911563294854, LR: 0.0003 +[2026-03-05 09:58:30] (step=0066096) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.932107219722168, LR: 0.0003 +[2026-03-05 09:58:38] (step=0066097) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 12.932302876149482, LR: 0.0003 +[2026-03-05 09:58:46] (step=0066098) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.932498532576796, LR: 0.0003 +[2026-03-05 09:58:54] (step=0066099) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 12.932694189004108, LR: 0.0003 +[2026-03-05 09:59:02] (step=0066100) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.932889845431422, LR: 0.0003 +[2026-03-05 09:59:10] (step=0066101) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 12.933085501858736, LR: 0.0003 +[2026-03-05 09:59:17] (step=0066102) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 12.93328115828605, LR: 0.0003 +[2026-03-05 09:59:25] (step=0066103) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 12.933476814713364, LR: 0.0003 +[2026-03-05 09:59:33] (step=0066104) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 12.933672471140676, LR: 0.0003 +[2026-03-05 09:59:41] (step=0066105) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.93386812756799, LR: 0.0003 +[2026-03-05 09:59:49] (step=0066106) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.934063783995304, LR: 0.0003 +[2026-03-05 09:59:57] (step=0066107) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.934259440422618, LR: 0.0003 +[2026-03-05 10:00:05] (step=0066108) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 12.934455096849932, LR: 0.0003 +[2026-03-05 10:00:12] (step=0066109) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.934650753277245, LR: 0.0003 +[2026-03-05 10:00:20] (step=0066110) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.934846409704559, LR: 0.0003 +[2026-03-05 10:00:28] (step=0066111) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.935042066131873, LR: 0.0003 +[2026-03-05 10:00:36] (step=0066112) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.935237722559187, LR: 0.0003 +[2026-03-05 10:00:44] (step=0066113) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.9354333789865, LR: 0.0003 +[2026-03-05 10:00:52] (step=0066114) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.935629035413813, LR: 0.0003 +[2026-03-05 10:01:00] (step=0066115) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 12.935824691841127, LR: 0.0003 +[2026-03-05 10:01:07] (step=0066116) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.93602034826844, LR: 0.0003 +[2026-03-05 10:01:15] (step=0066117) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 12.936216004695755, LR: 0.0003 +[2026-03-05 10:01:23] (step=0066118) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 12.936411661123067, LR: 0.0003 +[2026-03-05 10:01:31] (step=0066119) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.936607317550381, LR: 0.0003 +[2026-03-05 10:01:39] (step=0066120) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.936802973977695, LR: 0.0003 +[2026-03-05 10:01:47] (step=0066121) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.936998630405009, LR: 0.0003 +[2026-03-05 10:01:55] (step=0066122) Train Loss: 0.4351, Train Steps/Sec: 0.12, Epoch: 12.937194286832323, LR: 0.0003 +[2026-03-05 10:02:03] (step=0066123) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.937389943259635, LR: 0.0003 +[2026-03-05 10:02:10] (step=0066124) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.93758559968695, LR: 0.0003 +[2026-03-05 10:02:18] (step=0066125) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 12.937781256114263, LR: 0.0003 +[2026-03-05 10:02:26] (step=0066126) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.937976912541577, LR: 0.0003 +[2026-03-05 10:02:34] (step=0066127) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.938172568968891, LR: 0.0003 +[2026-03-05 10:02:42] (step=0066128) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.938368225396204, LR: 0.0003 +[2026-03-05 10:02:50] (step=0066129) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.938563881823518, LR: 0.0003 +[2026-03-05 10:02:57] (step=0066130) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.938759538250832, LR: 0.0003 +[2026-03-05 10:03:05] (step=0066131) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 12.938955194678146, LR: 0.0003 +[2026-03-05 10:03:13] (step=0066132) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.93915085110546, LR: 0.0003 +[2026-03-05 10:03:21] (step=0066133) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.939346507532772, LR: 0.0003 +[2026-03-05 10:03:29] (step=0066134) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.939542163960086, LR: 0.0003 +[2026-03-05 10:03:37] (step=0066135) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 12.9397378203874, LR: 0.0003 +[2026-03-05 10:03:45] (step=0066136) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 12.939933476814714, LR: 0.0003 +[2026-03-05 10:03:52] (step=0066137) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 12.940129133242028, LR: 0.0003 +[2026-03-05 10:04:00] (step=0066138) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.94032478966934, LR: 0.0003 +[2026-03-05 10:04:08] (step=0066139) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.940520446096654, LR: 0.0003 +[2026-03-05 10:04:16] (step=0066140) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.940716102523968, LR: 0.0003 +[2026-03-05 10:04:24] (step=0066141) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.940911758951282, LR: 0.0003 +[2026-03-05 10:04:32] (step=0066142) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 12.941107415378594, LR: 0.0003 +[2026-03-05 10:04:39] (step=0066143) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.941303071805908, LR: 0.0003 +[2026-03-05 10:04:47] (step=0066144) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.941498728233222, LR: 0.0003 +[2026-03-05 10:04:55] (step=0066145) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.941694384660536, LR: 0.0003 +[2026-03-05 10:05:03] (step=0066146) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.94189004108785, LR: 0.0003 +[2026-03-05 10:05:11] (step=0066147) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 12.942085697515163, LR: 0.0003 +[2026-03-05 10:05:19] (step=0066148) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.942281353942477, LR: 0.0003 +[2026-03-05 10:05:27] (step=0066149) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.94247701036979, LR: 0.0003 +[2026-03-05 10:05:34] (step=0066150) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 12.942672666797105, LR: 0.0003 +[2026-03-05 10:05:42] (step=0066151) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.942868323224419, LR: 0.0003 +[2026-03-05 10:05:50] (step=0066152) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 12.94306397965173, LR: 0.0003 +[2026-03-05 10:05:58] (step=0066153) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.943259636079045, LR: 0.0003 +[2026-03-05 10:06:06] (step=0066154) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.943455292506359, LR: 0.0003 +[2026-03-05 10:06:14] (step=0066155) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.943650948933673, LR: 0.0003 +[2026-03-05 10:06:21] (step=0066156) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 12.943846605360987, LR: 0.0003 +[2026-03-05 10:06:29] (step=0066157) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.944042261788299, LR: 0.0003 +[2026-03-05 10:06:37] (step=0066158) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.944237918215613, LR: 0.0003 +[2026-03-05 10:06:45] (step=0066159) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 12.944433574642927, LR: 0.0003 +[2026-03-05 10:06:53] (step=0066160) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.944629231070241, LR: 0.0003 +[2026-03-05 10:07:01] (step=0066161) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 12.944824887497555, LR: 0.0003 +[2026-03-05 10:07:09] (step=0066162) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 12.945020543924867, LR: 0.0003 +[2026-03-05 10:07:17] (step=0066163) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 12.945216200352181, LR: 0.0003 +[2026-03-05 10:07:24] (step=0066164) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.945411856779495, LR: 0.0003 +[2026-03-05 10:07:32] (step=0066165) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 12.94560751320681, LR: 0.0003 +[2026-03-05 10:07:40] (step=0066166) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 12.945803169634123, LR: 0.0003 +[2026-03-05 10:07:48] (step=0066167) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 12.945998826061436, LR: 0.0003 +[2026-03-05 10:07:56] (step=0066168) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 12.94619448248875, LR: 0.0003 +[2026-03-05 10:08:04] (step=0066169) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 12.946390138916064, LR: 0.0003 +[2026-03-05 10:08:11] (step=0066170) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.946585795343378, LR: 0.0003 +[2026-03-05 10:08:19] (step=0066171) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.94678145177069, LR: 0.0003 +[2026-03-05 10:08:27] (step=0066172) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 12.946977108198004, LR: 0.0003 +[2026-03-05 10:08:35] (step=0066173) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 12.947172764625318, LR: 0.0003 +[2026-03-05 10:08:43] (step=0066174) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 12.947368421052632, LR: 0.0003 +[2026-03-05 10:08:51] (step=0066175) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 12.947564077479946, LR: 0.0003 +[2026-03-05 10:08:59] (step=0066176) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 12.947759733907258, LR: 0.0003 +[2026-03-05 10:09:06] (step=0066177) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 12.947955390334572, LR: 0.0003 +[2026-03-05 10:09:14] (step=0066178) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.948151046761886, LR: 0.0003 +[2026-03-05 10:09:22] (step=0066179) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 12.9483467031892, LR: 0.0003 +[2026-03-05 10:09:30] (step=0066180) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.948542359616514, LR: 0.0003 +[2026-03-05 10:09:38] (step=0066181) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.948738016043826, LR: 0.0003 +[2026-03-05 10:09:46] (step=0066182) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 12.94893367247114, LR: 0.0003 +[2026-03-05 10:09:54] (step=0066183) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.949129328898454, LR: 0.0003 +[2026-03-05 10:10:01] (step=0066184) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.949324985325768, LR: 0.0003 +[2026-03-05 10:10:09] (step=0066185) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 12.949520641753082, LR: 0.0003 +[2026-03-05 10:10:17] (step=0066186) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.949716298180395, LR: 0.0003 +[2026-03-05 10:10:25] (step=0066187) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 12.949911954607709, LR: 0.0003 +[2026-03-05 10:10:33] (step=0066188) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 12.950107611035023, LR: 0.0003 +[2026-03-05 10:10:41] (step=0066189) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 12.950303267462337, LR: 0.0003 +[2026-03-05 10:10:48] (step=0066190) Train Loss: 0.4239, Train Steps/Sec: 0.13, Epoch: 12.95049892388965, LR: 0.0003 +[2026-03-05 10:10:56] (step=0066191) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 12.950694580316963, LR: 0.0003 +[2026-03-05 10:11:04] (step=0066192) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.950890236744277, LR: 0.0003 +[2026-03-05 10:11:12] (step=0066193) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.95108589317159, LR: 0.0003 +[2026-03-05 10:11:20] (step=0066194) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.951281549598905, LR: 0.0003 +[2026-03-05 10:11:28] (step=0066195) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 12.951477206026217, LR: 0.0003 +[2026-03-05 10:11:36] (step=0066196) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 12.951672862453531, LR: 0.0003 +[2026-03-05 10:11:43] (step=0066197) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.951868518880845, LR: 0.0003 +[2026-03-05 10:11:51] (step=0066198) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.95206417530816, LR: 0.0003 +[2026-03-05 10:11:59] (step=0066199) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 12.952259831735473, LR: 0.0003 +[2026-03-05 10:12:07] (step=0066200) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 12.952455488162785, LR: 0.0003 +[2026-03-05 10:12:15] (step=0066201) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.9526511445901, LR: 0.0003 +[2026-03-05 10:12:23] (step=0066202) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.952846801017413, LR: 0.0003 +[2026-03-05 10:12:31] (step=0066203) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 12.953042457444727, LR: 0.0003 +[2026-03-05 10:12:38] (step=0066204) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 12.953238113872041, LR: 0.0003 +[2026-03-05 10:12:46] (step=0066205) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 12.953433770299354, LR: 0.0003 +[2026-03-05 10:12:54] (step=0066206) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 12.953629426726668, LR: 0.0003 +[2026-03-05 10:13:02] (step=0066207) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.953825083153982, LR: 0.0003 +[2026-03-05 10:13:10] (step=0066208) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 12.954020739581296, LR: 0.0003 +[2026-03-05 10:13:18] (step=0066209) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.95421639600861, LR: 0.0003 +[2026-03-05 10:13:25] (step=0066210) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 12.954412052435922, LR: 0.0003 +[2026-03-05 10:13:33] (step=0066211) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 12.954607708863236, LR: 0.0003 +[2026-03-05 10:13:41] (step=0066212) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 12.95480336529055, LR: 0.0003 +[2026-03-05 10:13:49] (step=0066213) Train Loss: 0.4398, Train Steps/Sec: 0.12, Epoch: 12.954999021717864, LR: 0.0003 +[2026-03-05 10:13:57] (step=0066214) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.955194678145178, LR: 0.0003 +[2026-03-05 10:14:05] (step=0066215) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.95539033457249, LR: 0.0003 +[2026-03-05 10:14:13] (step=0066216) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.955585990999804, LR: 0.0003 +[2026-03-05 10:14:21] (step=0066217) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 12.955781647427118, LR: 0.0003 +[2026-03-05 10:14:28] (step=0066218) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.955977303854432, LR: 0.0003 +[2026-03-05 10:14:36] (step=0066219) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.956172960281746, LR: 0.0003 +[2026-03-05 10:14:44] (step=0066220) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.956368616709058, LR: 0.0003 +[2026-03-05 10:14:52] (step=0066221) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.956564273136372, LR: 0.0003 +[2026-03-05 10:15:00] (step=0066222) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.956759929563686, LR: 0.0003 +[2026-03-05 10:15:08] (step=0066223) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.956955585991, LR: 0.0003 +[2026-03-05 10:15:16] (step=0066224) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 12.957151242418313, LR: 0.0003 +[2026-03-05 10:15:23] (step=0066225) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 12.957346898845627, LR: 0.0003 +[2026-03-05 10:15:31] (step=0066226) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.95754255527294, LR: 0.0003 +[2026-03-05 10:15:39] (step=0066227) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 12.957738211700255, LR: 0.0003 +[2026-03-05 10:15:47] (step=0066228) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 12.957933868127569, LR: 0.0003 +[2026-03-05 10:15:55] (step=0066229) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.95812952455488, LR: 0.0003 +[2026-03-05 10:16:03] (step=0066230) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 12.958325180982195, LR: 0.0003 +[2026-03-05 10:16:11] (step=0066231) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.958520837409509, LR: 0.0003 +[2026-03-05 10:16:18] (step=0066232) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 12.958716493836823, LR: 0.0003 +[2026-03-05 10:16:26] (step=0066233) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.958912150264137, LR: 0.0003 +[2026-03-05 10:16:34] (step=0066234) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.95910780669145, LR: 0.0003 +[2026-03-05 10:16:42] (step=0066235) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.959303463118763, LR: 0.0003 +[2026-03-05 10:16:50] (step=0066236) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 12.959499119546077, LR: 0.0003 +[2026-03-05 10:16:58] (step=0066237) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 12.959694775973391, LR: 0.0003 +[2026-03-05 10:17:06] (step=0066238) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 12.959890432400705, LR: 0.0003 +[2026-03-05 10:17:13] (step=0066239) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 12.960086088828017, LR: 0.0003 +[2026-03-05 10:17:21] (step=0066240) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 12.960281745255331, LR: 0.0003 +[2026-03-05 10:17:29] (step=0066241) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.960477401682645, LR: 0.0003 +[2026-03-05 10:17:37] (step=0066242) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 12.96067305810996, LR: 0.0003 +[2026-03-05 10:17:45] (step=0066243) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 12.960868714537273, LR: 0.0003 +[2026-03-05 10:17:53] (step=0066244) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.961064370964586, LR: 0.0003 +[2026-03-05 10:18:00] (step=0066245) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.9612600273919, LR: 0.0003 +[2026-03-05 10:18:08] (step=0066246) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.961455683819214, LR: 0.0003 +[2026-03-05 10:18:16] (step=0066247) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 12.961651340246528, LR: 0.0003 +[2026-03-05 10:18:24] (step=0066248) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.96184699667384, LR: 0.0003 +[2026-03-05 10:18:32] (step=0066249) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 12.962042653101154, LR: 0.0003 +[2026-03-05 10:18:40] (step=0066250) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 12.962238309528468, LR: 0.0003 +[2026-03-05 10:18:48] (step=0066251) Train Loss: 0.4255, Train Steps/Sec: 0.13, Epoch: 12.962433965955782, LR: 0.0003 +[2026-03-05 10:18:55] (step=0066252) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.962629622383096, LR: 0.0003 +[2026-03-05 10:19:03] (step=0066253) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 12.962825278810408, LR: 0.0003 +[2026-03-05 10:19:11] (step=0066254) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 12.963020935237722, LR: 0.0003 +[2026-03-05 10:19:19] (step=0066255) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 12.963216591665036, LR: 0.0003 +[2026-03-05 10:19:27] (step=0066256) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 12.96341224809235, LR: 0.0003 +[2026-03-05 10:19:35] (step=0066257) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 12.963607904519664, LR: 0.0003 +[2026-03-05 10:19:43] (step=0066258) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.963803560946976, LR: 0.0003 +[2026-03-05 10:19:50] (step=0066259) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.96399921737429, LR: 0.0003 +[2026-03-05 10:19:58] (step=0066260) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.964194873801604, LR: 0.0003 +[2026-03-05 10:20:06] (step=0066261) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 12.964390530228918, LR: 0.0003 +[2026-03-05 10:20:14] (step=0066262) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 12.964586186656232, LR: 0.0003 +[2026-03-05 10:20:22] (step=0066263) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 12.964781843083545, LR: 0.0003 +[2026-03-05 10:20:30] (step=0066264) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.964977499510859, LR: 0.0003 +[2026-03-05 10:20:38] (step=0066265) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.965173155938173, LR: 0.0003 +[2026-03-05 10:20:45] (step=0066266) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.965368812365487, LR: 0.0003 +[2026-03-05 10:20:53] (step=0066267) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 12.9655644687928, LR: 0.0003 +[2026-03-05 10:21:01] (step=0066268) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 12.965760125220113, LR: 0.0003 +[2026-03-05 10:21:09] (step=0066269) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 12.965955781647427, LR: 0.0003 +[2026-03-05 10:21:17] (step=0066270) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 12.966151438074741, LR: 0.0003 +[2026-03-05 10:21:25] (step=0066271) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.966347094502055, LR: 0.0003 +[2026-03-05 10:21:33] (step=0066272) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 12.966542750929367, LR: 0.0003 +[2026-03-05 10:21:40] (step=0066273) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 12.966738407356681, LR: 0.0003 +[2026-03-05 10:21:48] (step=0066274) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.966934063783995, LR: 0.0003 +[2026-03-05 10:21:56] (step=0066275) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.96712972021131, LR: 0.0003 +[2026-03-05 10:22:04] (step=0066276) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 12.967325376638623, LR: 0.0003 +[2026-03-05 10:22:12] (step=0066277) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.967521033065935, LR: 0.0003 +[2026-03-05 10:22:20] (step=0066278) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 12.96771668949325, LR: 0.0003 +[2026-03-05 10:22:28] (step=0066279) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 12.967912345920563, LR: 0.0003 +[2026-03-05 10:22:35] (step=0066280) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 12.968108002347877, LR: 0.0003 +[2026-03-05 10:22:43] (step=0066281) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 12.968303658775191, LR: 0.0003 +[2026-03-05 10:22:51] (step=0066282) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 12.968499315202504, LR: 0.0003 +[2026-03-05 10:22:59] (step=0066283) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 12.968694971629818, LR: 0.0003 +[2026-03-05 10:23:07] (step=0066284) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 12.968890628057132, LR: 0.0003 +[2026-03-05 10:23:15] (step=0066285) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.969086284484446, LR: 0.0003 +[2026-03-05 10:23:23] (step=0066286) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 12.96928194091176, LR: 0.0003 +[2026-03-05 10:23:30] (step=0066287) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.969477597339072, LR: 0.0003 +[2026-03-05 10:23:38] (step=0066288) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 12.969673253766386, LR: 0.0003 +[2026-03-05 10:23:46] (step=0066289) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 12.9698689101937, LR: 0.0003 +[2026-03-05 10:23:54] (step=0066290) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 12.970064566621014, LR: 0.0003 +[2026-03-05 10:24:02] (step=0066291) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.970260223048328, LR: 0.0003 +[2026-03-05 10:24:10] (step=0066292) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.97045587947564, LR: 0.0003 +[2026-03-05 10:24:17] (step=0066293) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.970651535902954, LR: 0.0003 +[2026-03-05 10:24:25] (step=0066294) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 12.970847192330268, LR: 0.0003 +[2026-03-05 10:24:33] (step=0066295) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 12.971042848757582, LR: 0.0003 +[2026-03-05 10:24:41] (step=0066296) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 12.971238505184896, LR: 0.0003 +[2026-03-05 10:24:49] (step=0066297) Train Loss: 0.4631, Train Steps/Sec: 0.13, Epoch: 12.971434161612208, LR: 0.0003 +[2026-03-05 10:24:57] (step=0066298) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 12.971629818039522, LR: 0.0003 +[2026-03-05 10:25:04] (step=0066299) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 12.971825474466836, LR: 0.0003 +[2026-03-05 10:25:12] (step=0066300) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 12.97202113089415, LR: 0.0003 +[2026-03-05 10:25:20] (step=0066301) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 12.972216787321463, LR: 0.0003 +[2026-03-05 10:25:28] (step=0066302) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.972412443748777, LR: 0.0003 +[2026-03-05 10:25:36] (step=0066303) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 12.97260810017609, LR: 0.0003 +[2026-03-05 10:25:44] (step=0066304) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 12.972803756603405, LR: 0.0003 +[2026-03-05 10:25:52] (step=0066305) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.972999413030719, LR: 0.0003 +[2026-03-05 10:25:59] (step=0066306) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.973195069458031, LR: 0.0003 +[2026-03-05 10:26:07] (step=0066307) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.973390725885345, LR: 0.0003 +[2026-03-05 10:26:15] (step=0066308) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.973586382312659, LR: 0.0003 +[2026-03-05 10:26:23] (step=0066309) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 12.973782038739973, LR: 0.0003 +[2026-03-05 10:26:31] (step=0066310) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.973977695167287, LR: 0.0003 +[2026-03-05 10:26:39] (step=0066311) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 12.9741733515946, LR: 0.0003 +[2026-03-05 10:26:47] (step=0066312) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 12.974369008021913, LR: 0.0003 +[2026-03-05 10:26:54] (step=0066313) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 12.974564664449227, LR: 0.0003 +[2026-03-05 10:27:02] (step=0066314) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 12.974760320876541, LR: 0.0003 +[2026-03-05 10:27:10] (step=0066315) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.974955977303855, LR: 0.0003 +[2026-03-05 10:27:18] (step=0066316) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 12.975151633731167, LR: 0.0003 +[2026-03-05 10:27:26] (step=0066317) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 12.975347290158481, LR: 0.0003 +[2026-03-05 10:27:34] (step=0066318) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 12.975542946585795, LR: 0.0003 +[2026-03-05 10:27:42] (step=0066319) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 12.97573860301311, LR: 0.0003 +[2026-03-05 10:27:50] (step=0066320) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 12.975934259440423, LR: 0.0003 +[2026-03-05 10:27:57] (step=0066321) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.976129915867736, LR: 0.0003 +[2026-03-05 10:28:05] (step=0066322) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 12.97632557229505, LR: 0.0003 +[2026-03-05 10:28:13] (step=0066323) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 12.976521228722364, LR: 0.0003 +[2026-03-05 10:28:21] (step=0066324) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 12.976716885149678, LR: 0.0003 +[2026-03-05 10:28:29] (step=0066325) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 12.97691254157699, LR: 0.0003 +[2026-03-05 10:28:37] (step=0066326) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 12.977108198004304, LR: 0.0003 +[2026-03-05 10:28:44] (step=0066327) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 12.977303854431618, LR: 0.0003 +[2026-03-05 10:28:52] (step=0066328) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.977499510858932, LR: 0.0003 +[2026-03-05 10:29:00] (step=0066329) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.977695167286246, LR: 0.0003 +[2026-03-05 10:29:08] (step=0066330) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 12.977890823713558, LR: 0.0003 +[2026-03-05 10:29:16] (step=0066331) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 12.978086480140872, LR: 0.0003 +[2026-03-05 10:29:24] (step=0066332) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 12.978282136568186, LR: 0.0003 +[2026-03-05 10:29:32] (step=0066333) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 12.9784777929955, LR: 0.0003 +[2026-03-05 10:29:39] (step=0066334) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.978673449422814, LR: 0.0003 +[2026-03-05 10:29:47] (step=0066335) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 12.978869105850126, LR: 0.0003 +[2026-03-05 10:29:55] (step=0066336) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 12.97906476227744, LR: 0.0003 +[2026-03-05 10:30:03] (step=0066337) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 12.979260418704754, LR: 0.0003 +[2026-03-05 10:30:11] (step=0066338) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.979456075132068, LR: 0.0003 +[2026-03-05 10:30:19] (step=0066339) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.979651731559382, LR: 0.0003 +[2026-03-05 10:30:27] (step=0066340) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.979847387986695, LR: 0.0003 +[2026-03-05 10:30:34] (step=0066341) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 12.980043044414009, LR: 0.0003 +[2026-03-05 10:30:42] (step=0066342) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 12.980238700841323, LR: 0.0003 +[2026-03-05 10:30:50] (step=0066343) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.980434357268637, LR: 0.0003 +[2026-03-05 10:30:58] (step=0066344) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 12.98063001369595, LR: 0.0003 +[2026-03-05 10:31:06] (step=0066345) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 12.980825670123263, LR: 0.0003 +[2026-03-05 10:31:14] (step=0066346) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 12.981021326550577, LR: 0.0003 +[2026-03-05 10:31:21] (step=0066347) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 12.981216982977891, LR: 0.0003 +[2026-03-05 10:31:29] (step=0066348) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.981412639405205, LR: 0.0003 +[2026-03-05 10:31:37] (step=0066349) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 12.981608295832519, LR: 0.0003 +[2026-03-05 10:31:45] (step=0066350) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 12.981803952259831, LR: 0.0003 +[2026-03-05 10:31:53] (step=0066351) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 12.981999608687145, LR: 0.0003 +[2026-03-05 10:32:01] (step=0066352) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 12.98219526511446, LR: 0.0003 +[2026-03-05 10:32:09] (step=0066353) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.982390921541773, LR: 0.0003 +[2026-03-05 10:32:16] (step=0066354) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 12.982586577969085, LR: 0.0003 +[2026-03-05 10:32:24] (step=0066355) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 12.9827822343964, LR: 0.0003 +[2026-03-05 10:32:32] (step=0066356) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 12.982977890823713, LR: 0.0003 +[2026-03-05 10:32:40] (step=0066357) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.983173547251027, LR: 0.0003 +[2026-03-05 10:32:48] (step=0066358) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.983369203678341, LR: 0.0003 +[2026-03-05 10:32:56] (step=0066359) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 12.983564860105654, LR: 0.0003 +[2026-03-05 10:33:04] (step=0066360) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 12.983760516532968, LR: 0.0003 +[2026-03-05 10:33:11] (step=0066361) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 12.983956172960282, LR: 0.0003 +[2026-03-05 10:33:19] (step=0066362) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 12.984151829387596, LR: 0.0003 +[2026-03-05 10:33:27] (step=0066363) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.98434748581491, LR: 0.0003 +[2026-03-05 10:33:35] (step=0066364) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 12.984543142242222, LR: 0.0003 +[2026-03-05 10:33:43] (step=0066365) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 12.984738798669536, LR: 0.0003 +[2026-03-05 10:33:51] (step=0066366) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 12.98493445509685, LR: 0.0003 +[2026-03-05 10:33:59] (step=0066367) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 12.985130111524164, LR: 0.0003 +[2026-03-05 10:34:06] (step=0066368) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 12.985325767951478, LR: 0.0003 +[2026-03-05 10:34:14] (step=0066369) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 12.98552142437879, LR: 0.0003 +[2026-03-05 10:34:22] (step=0066370) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.985717080806104, LR: 0.0003 +[2026-03-05 10:34:30] (step=0066371) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.985912737233418, LR: 0.0003 +[2026-03-05 10:34:38] (step=0066372) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 12.986108393660732, LR: 0.0003 +[2026-03-05 10:34:46] (step=0066373) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.986304050088046, LR: 0.0003 +[2026-03-05 10:34:54] (step=0066374) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.986499706515358, LR: 0.0003 +[2026-03-05 10:35:01] (step=0066375) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 12.986695362942672, LR: 0.0003 +[2026-03-05 10:35:09] (step=0066376) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 12.986891019369986, LR: 0.0003 +[2026-03-05 10:35:17] (step=0066377) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.9870866757973, LR: 0.0003 +[2026-03-05 10:35:25] (step=0066378) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 12.987282332224613, LR: 0.0003 +[2026-03-05 10:35:33] (step=0066379) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 12.987477988651927, LR: 0.0003 +[2026-03-05 10:35:41] (step=0066380) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 12.98767364507924, LR: 0.0003 +[2026-03-05 10:35:49] (step=0066381) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 12.987869301506555, LR: 0.0003 +[2026-03-05 10:35:56] (step=0066382) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 12.988064957933869, LR: 0.0003 +[2026-03-05 10:36:04] (step=0066383) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 12.988260614361181, LR: 0.0003 +[2026-03-05 10:36:12] (step=0066384) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 12.988456270788495, LR: 0.0003 +[2026-03-05 10:36:20] (step=0066385) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 12.988651927215809, LR: 0.0003 +[2026-03-05 10:36:28] (step=0066386) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 12.988847583643123, LR: 0.0003 +[2026-03-05 10:36:36] (step=0066387) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 12.989043240070437, LR: 0.0003 +[2026-03-05 10:36:43] (step=0066388) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 12.98923889649775, LR: 0.0003 +[2026-03-05 10:36:51] (step=0066389) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 12.989434552925063, LR: 0.0003 +[2026-03-05 10:36:59] (step=0066390) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 12.989630209352377, LR: 0.0003 +[2026-03-05 10:37:07] (step=0066391) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.989825865779691, LR: 0.0003 +[2026-03-05 10:37:15] (step=0066392) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 12.990021522207005, LR: 0.0003 +[2026-03-05 10:37:23] (step=0066393) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 12.990217178634317, LR: 0.0003 +[2026-03-05 10:37:31] (step=0066394) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 12.990412835061631, LR: 0.0003 +[2026-03-05 10:37:38] (step=0066395) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 12.990608491488945, LR: 0.0003 +[2026-03-05 10:37:46] (step=0066396) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 12.99080414791626, LR: 0.0003 +[2026-03-05 10:37:54] (step=0066397) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 12.990999804343573, LR: 0.0003 +[2026-03-05 10:38:02] (step=0066398) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 12.991195460770886, LR: 0.0003 +[2026-03-05 10:38:10] (step=0066399) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 12.9913911171982, LR: 0.0003 +[2026-03-05 10:38:18] (step=0066400) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 12.991586773625514, LR: 0.0003 +[2026-03-05 10:38:26] (step=0066401) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 12.991782430052828, LR: 0.0003 +[2026-03-05 10:38:34] (step=0066402) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 12.991978086480142, LR: 0.0003 +[2026-03-05 10:38:41] (step=0066403) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.992173742907454, LR: 0.0003 +[2026-03-05 10:38:49] (step=0066404) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 12.992369399334768, LR: 0.0003 +[2026-03-05 10:38:57] (step=0066405) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 12.992565055762082, LR: 0.0003 +[2026-03-05 10:39:05] (step=0066406) Train Loss: 0.4242, Train Steps/Sec: 0.13, Epoch: 12.992760712189396, LR: 0.0003 +[2026-03-05 10:39:13] (step=0066407) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 12.992956368616708, LR: 0.0003 +[2026-03-05 10:39:21] (step=0066408) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 12.993152025044022, LR: 0.0003 +[2026-03-05 10:39:28] (step=0066409) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 12.993347681471336, LR: 0.0003 +[2026-03-05 10:39:36] (step=0066410) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 12.99354333789865, LR: 0.0003 +[2026-03-05 10:39:44] (step=0066411) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 12.993738994325964, LR: 0.0003 +[2026-03-05 10:39:52] (step=0066412) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.993934650753276, LR: 0.0003 +[2026-03-05 10:40:00] (step=0066413) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 12.99413030718059, LR: 0.0003 +[2026-03-05 10:40:08] (step=0066414) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 12.994325963607904, LR: 0.0003 +[2026-03-05 10:40:16] (step=0066415) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 12.994521620035218, LR: 0.0003 +[2026-03-05 10:40:24] (step=0066416) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 12.994717276462532, LR: 0.0003 +[2026-03-05 10:40:31] (step=0066417) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 12.994912932889845, LR: 0.0003 +[2026-03-05 10:40:39] (step=0066418) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.995108589317159, LR: 0.0003 +[2026-03-05 10:40:47] (step=0066419) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 12.995304245744473, LR: 0.0003 +[2026-03-05 10:40:55] (step=0066420) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 12.995499902171787, LR: 0.0003 +[2026-03-05 10:41:03] (step=0066421) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 12.9956955585991, LR: 0.0003 +[2026-03-05 10:41:11] (step=0066422) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 12.995891215026413, LR: 0.0003 +[2026-03-05 10:41:18] (step=0066423) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 12.996086871453727, LR: 0.0003 +[2026-03-05 10:41:26] (step=0066424) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 12.996282527881041, LR: 0.0003 +[2026-03-05 10:41:34] (step=0066425) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 12.996478184308355, LR: 0.0003 +[2026-03-05 10:41:42] (step=0066426) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 12.996673840735669, LR: 0.0003 +[2026-03-05 10:41:50] (step=0066427) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 12.996869497162981, LR: 0.0003 +[2026-03-05 10:41:58] (step=0066428) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 12.997065153590295, LR: 0.0003 +[2026-03-05 10:42:05] (step=0066429) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 12.99726081001761, LR: 0.0003 +[2026-03-05 10:42:13] (step=0066430) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 12.997456466444923, LR: 0.0003 +[2026-03-05 10:42:21] (step=0066431) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 12.997652122872235, LR: 0.0003 +[2026-03-05 10:42:29] (step=0066432) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 12.99784777929955, LR: 0.0003 +[2026-03-05 10:42:37] (step=0066433) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 12.998043435726863, LR: 0.0003 +[2026-03-05 10:42:45] (step=0066434) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 12.998239092154177, LR: 0.0003 +[2026-03-05 10:42:53] (step=0066435) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 12.998434748581492, LR: 0.0003 +[2026-03-05 10:43:00] (step=0066436) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 12.998630405008804, LR: 0.0003 +[2026-03-05 10:43:08] (step=0066437) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 12.998826061436118, LR: 0.0003 +[2026-03-05 10:43:16] (step=0066438) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 12.999021717863432, LR: 0.0003 +[2026-03-05 10:43:24] (step=0066439) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 12.999217374290746, LR: 0.0003 +[2026-03-05 10:43:32] (step=0066440) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 12.99941303071806, LR: 0.0003 +[2026-03-05 10:43:40] (step=0066441) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 12.999608687145372, LR: 0.0003 +[2026-03-05 10:43:48] (step=0066442) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 12.999804343572686, LR: 0.0003 +[2026-03-05 10:43:56] (step=0066443) Train Loss: 0.4446, Train Steps/Sec: 0.12, Epoch: 13.0, LR: 0.0003 +[2026-03-05 10:43:56] Beginning epoch 13... +[2026-03-05 10:44:05] (step=0066444) Train Loss: 0.4448, Train Steps/Sec: 0.10, Epoch: 13.000195656427314, LR: 0.0003 +[2026-03-05 10:44:13] (step=0066445) Train Loss: 0.4259, Train Steps/Sec: 0.13, Epoch: 13.000391312854628, LR: 0.0003 +[2026-03-05 10:44:21] (step=0066446) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.00058696928194, LR: 0.0003 +[2026-03-05 10:44:29] (step=0066447) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.000782625709254, LR: 0.0003 +[2026-03-05 10:44:37] (step=0066448) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.000978282136568, LR: 0.0003 +[2026-03-05 10:44:45] (step=0066449) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 13.001173938563882, LR: 0.0003 +[2026-03-05 10:44:53] (step=0066450) Train Loss: 0.4414, Train Steps/Sec: 0.12, Epoch: 13.001369594991196, LR: 0.0003 +[2026-03-05 10:45:00] (step=0066451) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.001565251418508, LR: 0.0003 +[2026-03-05 10:45:08] (step=0066452) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.001760907845823, LR: 0.0003 +[2026-03-05 10:45:16] (step=0066453) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 13.001956564273137, LR: 0.0003 +[2026-03-05 10:45:24] (step=0066454) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.00215222070045, LR: 0.0003 +[2026-03-05 10:45:32] (step=0066455) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 13.002347877127765, LR: 0.0003 +[2026-03-05 10:45:40] (step=0066456) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 13.002543533555077, LR: 0.0003 +[2026-03-05 10:45:47] (step=0066457) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.00273918998239, LR: 0.0003 +[2026-03-05 10:45:55] (step=0066458) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.002934846409705, LR: 0.0003 +[2026-03-05 10:46:03] (step=0066459) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 13.003130502837019, LR: 0.0003 +[2026-03-05 10:46:11] (step=0066460) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.003326159264331, LR: 0.0003 +[2026-03-05 10:46:19] (step=0066461) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 13.003521815691645, LR: 0.0003 +[2026-03-05 10:46:27] (step=0066462) Train Loss: 0.4427, Train Steps/Sec: 0.12, Epoch: 13.003717472118959, LR: 0.0003 +[2026-03-05 10:46:35] (step=0066463) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 13.003913128546273, LR: 0.0003 +[2026-03-05 10:46:43] (step=0066464) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.004108784973587, LR: 0.0003 +[2026-03-05 10:46:50] (step=0066465) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 13.0043044414009, LR: 0.0003 +[2026-03-05 10:46:58] (step=0066466) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.004500097828213, LR: 0.0003 +[2026-03-05 10:47:06] (step=0066467) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.004695754255527, LR: 0.0003 +[2026-03-05 10:47:14] (step=0066468) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.004891410682841, LR: 0.0003 +[2026-03-05 10:47:22] (step=0066469) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.005087067110155, LR: 0.0003 +[2026-03-05 10:47:30] (step=0066470) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.005282723537468, LR: 0.0003 +[2026-03-05 10:47:38] (step=0066471) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.005478379964782, LR: 0.0003 +[2026-03-05 10:47:45] (step=0066472) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.005674036392096, LR: 0.0003 +[2026-03-05 10:47:53] (step=0066473) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.00586969281941, LR: 0.0003 +[2026-03-05 10:48:01] (step=0066474) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 13.006065349246724, LR: 0.0003 +[2026-03-05 10:48:09] (step=0066475) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 13.006261005674036, LR: 0.0003 +[2026-03-05 10:48:17] (step=0066476) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.00645666210135, LR: 0.0003 +[2026-03-05 10:48:25] (step=0066477) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.006652318528664, LR: 0.0003 +[2026-03-05 10:48:32] (step=0066478) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.006847974955978, LR: 0.0003 +[2026-03-05 10:48:40] (step=0066479) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.007043631383292, LR: 0.0003 +[2026-03-05 10:48:48] (step=0066480) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 13.007239287810604, LR: 0.0003 +[2026-03-05 10:48:56] (step=0066481) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.007434944237918, LR: 0.0003 +[2026-03-05 10:49:04] (step=0066482) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.007630600665232, LR: 0.0003 +[2026-03-05 10:49:12] (step=0066483) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.007826257092546, LR: 0.0003 +[2026-03-05 10:49:20] (step=0066484) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 13.008021913519858, LR: 0.0003 +[2026-03-05 10:49:27] (step=0066485) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.008217569947172, LR: 0.0003 +[2026-03-05 10:49:35] (step=0066486) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.008413226374486, LR: 0.0003 +[2026-03-05 10:49:43] (step=0066487) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 13.0086088828018, LR: 0.0003 +[2026-03-05 10:49:51] (step=0066488) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.008804539229114, LR: 0.0003 +[2026-03-05 10:49:59] (step=0066489) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.009000195656427, LR: 0.0003 +[2026-03-05 10:50:07] (step=0066490) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.00919585208374, LR: 0.0003 +[2026-03-05 10:50:14] (step=0066491) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.009391508511055, LR: 0.0003 +[2026-03-05 10:50:22] (step=0066492) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.009587164938369, LR: 0.0003 +[2026-03-05 10:50:30] (step=0066493) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 13.009782821365683, LR: 0.0003 +[2026-03-05 10:50:38] (step=0066494) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 13.009978477792995, LR: 0.0003 +[2026-03-05 10:50:46] (step=0066495) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.010174134220309, LR: 0.0003 +[2026-03-05 10:50:54] (step=0066496) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.010369790647623, LR: 0.0003 +[2026-03-05 10:51:01] (step=0066497) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.010565447074937, LR: 0.0003 +[2026-03-05 10:51:09] (step=0066498) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.01076110350225, LR: 0.0003 +[2026-03-05 10:51:17] (step=0066499) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.010956759929563, LR: 0.0003 +[2026-03-05 10:51:25] (step=0066500) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.011152416356877, LR: 0.0003 +[2026-03-05 10:51:25] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0066500/ +[2026-03-05 10:51:33] (step=0066501) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.011348072784191, LR: 0.0003 +[2026-03-05 10:51:41] (step=0066502) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.011543729211505, LR: 0.0003 +[2026-03-05 10:51:49] (step=0066503) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.011739385638819, LR: 0.0003 +[2026-03-05 10:51:57] (step=0066504) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.011935042066131, LR: 0.0003 +[2026-03-05 10:52:04] (step=0066505) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.012130698493445, LR: 0.0003 +[2026-03-05 10:52:12] (step=0066506) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.01232635492076, LR: 0.0003 +[2026-03-05 10:52:20] (step=0066507) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 13.012522011348073, LR: 0.0003 +[2026-03-05 10:52:28] (step=0066508) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 13.012717667775387, LR: 0.0003 +[2026-03-05 10:52:36] (step=0066509) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.0129133242027, LR: 0.0003 +[2026-03-05 10:52:44] (step=0066510) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 13.013108980630014, LR: 0.0003 +[2026-03-05 10:52:52] (step=0066511) Train Loss: 0.4399, Train Steps/Sec: 0.12, Epoch: 13.013304637057328, LR: 0.0003 +[2026-03-05 10:53:00] (step=0066512) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 13.013500293484642, LR: 0.0003 +[2026-03-05 10:53:07] (step=0066513) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.013695949911954, LR: 0.0003 +[2026-03-05 10:53:15] (step=0066514) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 13.013891606339268, LR: 0.0003 +[2026-03-05 10:53:23] (step=0066515) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.014087262766582, LR: 0.0003 +[2026-03-05 10:53:31] (step=0066516) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.014282919193896, LR: 0.0003 +[2026-03-05 10:53:39] (step=0066517) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.01447857562121, LR: 0.0003 +[2026-03-05 10:53:47] (step=0066518) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 13.014674232048522, LR: 0.0003 +[2026-03-05 10:53:55] (step=0066519) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 13.014869888475836, LR: 0.0003 +[2026-03-05 10:54:02] (step=0066520) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.01506554490315, LR: 0.0003 +[2026-03-05 10:54:10] (step=0066521) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 13.015261201330464, LR: 0.0003 +[2026-03-05 10:54:18] (step=0066522) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.015456857757778, LR: 0.0003 +[2026-03-05 10:54:26] (step=0066523) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.01565251418509, LR: 0.0003 +[2026-03-05 10:54:34] (step=0066524) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.015848170612404, LR: 0.0003 +[2026-03-05 10:54:42] (step=0066525) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.016043827039718, LR: 0.0003 +[2026-03-05 10:54:50] (step=0066526) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.016239483467032, LR: 0.0003 +[2026-03-05 10:54:57] (step=0066527) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 13.016435139894346, LR: 0.0003 +[2026-03-05 10:55:05] (step=0066528) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 13.016630796321659, LR: 0.0003 +[2026-03-05 10:55:13] (step=0066529) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.016826452748973, LR: 0.0003 +[2026-03-05 10:55:21] (step=0066530) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 13.017022109176287, LR: 0.0003 +[2026-03-05 10:55:29] (step=0066531) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.0172177656036, LR: 0.0003 +[2026-03-05 10:55:37] (step=0066532) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.017413422030915, LR: 0.0003 +[2026-03-05 10:55:44] (step=0066533) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.017609078458227, LR: 0.0003 +[2026-03-05 10:55:52] (step=0066534) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.01780473488554, LR: 0.0003 +[2026-03-05 10:56:00] (step=0066535) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.018000391312855, LR: 0.0003 +[2026-03-05 10:56:08] (step=0066536) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.018196047740169, LR: 0.0003 +[2026-03-05 10:56:16] (step=0066537) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 13.018391704167481, LR: 0.0003 +[2026-03-05 10:56:24] (step=0066538) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.018587360594795, LR: 0.0003 +[2026-03-05 10:56:31] (step=0066539) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.018783017022109, LR: 0.0003 +[2026-03-05 10:56:39] (step=0066540) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.018978673449423, LR: 0.0003 +[2026-03-05 10:56:47] (step=0066541) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.019174329876737, LR: 0.0003 +[2026-03-05 10:56:55] (step=0066542) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 13.01936998630405, LR: 0.0003 +[2026-03-05 10:57:03] (step=0066543) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.019565642731363, LR: 0.0003 +[2026-03-05 10:57:11] (step=0066544) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.019761299158677, LR: 0.0003 +[2026-03-05 10:57:19] (step=0066545) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.019956955585991, LR: 0.0003 +[2026-03-05 10:57:26] (step=0066546) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.020152612013305, LR: 0.0003 +[2026-03-05 10:57:34] (step=0066547) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 13.020348268440618, LR: 0.0003 +[2026-03-05 10:57:42] (step=0066548) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.020543924867932, LR: 0.0003 +[2026-03-05 10:57:50] (step=0066549) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.020739581295246, LR: 0.0003 +[2026-03-05 10:57:58] (step=0066550) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.02093523772256, LR: 0.0003 +[2026-03-05 10:58:06] (step=0066551) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.021130894149874, LR: 0.0003 +[2026-03-05 10:58:14] (step=0066552) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 13.021326550577186, LR: 0.0003 +[2026-03-05 10:58:21] (step=0066553) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 13.0215222070045, LR: 0.0003 +[2026-03-05 10:58:29] (step=0066554) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.021717863431814, LR: 0.0003 +[2026-03-05 10:58:37] (step=0066555) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.021913519859128, LR: 0.0003 +[2026-03-05 10:58:45] (step=0066556) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.022109176286442, LR: 0.0003 +[2026-03-05 10:58:53] (step=0066557) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 13.022304832713754, LR: 0.0003 +[2026-03-05 10:59:01] (step=0066558) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 13.022500489141068, LR: 0.0003 +[2026-03-05 10:59:08] (step=0066559) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.022696145568382, LR: 0.0003 +[2026-03-05 10:59:16] (step=0066560) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.022891801995696, LR: 0.0003 +[2026-03-05 10:59:24] (step=0066561) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 13.02308745842301, LR: 0.0003 +[2026-03-05 10:59:32] (step=0066562) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.023283114850322, LR: 0.0003 +[2026-03-05 10:59:40] (step=0066563) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.023478771277636, LR: 0.0003 +[2026-03-05 10:59:48] (step=0066564) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 13.02367442770495, LR: 0.0003 +[2026-03-05 10:59:56] (step=0066565) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.023870084132264, LR: 0.0003 +[2026-03-05 11:00:04] (step=0066566) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 13.024065740559577, LR: 0.0003 +[2026-03-05 11:00:11] (step=0066567) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.02426139698689, LR: 0.0003 +[2026-03-05 11:00:19] (step=0066568) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 13.024457053414205, LR: 0.0003 +[2026-03-05 11:00:27] (step=0066569) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.024652709841519, LR: 0.0003 +[2026-03-05 11:00:35] (step=0066570) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.024848366268833, LR: 0.0003 +[2026-03-05 11:00:43] (step=0066571) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.025044022696145, LR: 0.0003 +[2026-03-05 11:00:51] (step=0066572) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.025239679123459, LR: 0.0003 +[2026-03-05 11:00:58] (step=0066573) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 13.025435335550773, LR: 0.0003 +[2026-03-05 11:01:06] (step=0066574) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.025630991978087, LR: 0.0003 +[2026-03-05 11:01:14] (step=0066575) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.0258266484054, LR: 0.0003 +[2026-03-05 11:01:22] (step=0066576) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.026022304832713, LR: 0.0003 +[2026-03-05 11:01:30] (step=0066577) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 13.026217961260027, LR: 0.0003 +[2026-03-05 11:01:38] (step=0066578) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.026413617687341, LR: 0.0003 +[2026-03-05 11:01:46] (step=0066579) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 13.026609274114655, LR: 0.0003 +[2026-03-05 11:01:53] (step=0066580) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 13.026804930541969, LR: 0.0003 +[2026-03-05 11:02:01] (step=0066581) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.027000586969281, LR: 0.0003 +[2026-03-05 11:02:09] (step=0066582) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.027196243396595, LR: 0.0003 +[2026-03-05 11:02:17] (step=0066583) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.02739189982391, LR: 0.0003 +[2026-03-05 11:02:25] (step=0066584) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 13.027587556251223, LR: 0.0003 +[2026-03-05 11:02:33] (step=0066585) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.027783212678537, LR: 0.0003 +[2026-03-05 11:02:40] (step=0066586) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.02797886910585, LR: 0.0003 +[2026-03-05 11:02:48] (step=0066587) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.028174525533164, LR: 0.0003 +[2026-03-05 11:02:56] (step=0066588) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 13.028370181960478, LR: 0.0003 +[2026-03-05 11:03:04] (step=0066589) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 13.028565838387792, LR: 0.0003 +[2026-03-05 11:03:12] (step=0066590) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.028761494815104, LR: 0.0003 +[2026-03-05 11:03:20] (step=0066591) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.028957151242418, LR: 0.0003 +[2026-03-05 11:03:27] (step=0066592) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 13.029152807669732, LR: 0.0003 +[2026-03-05 11:03:35] (step=0066593) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.029348464097046, LR: 0.0003 +[2026-03-05 11:03:43] (step=0066594) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.02954412052436, LR: 0.0003 +[2026-03-05 11:03:51] (step=0066595) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.029739776951672, LR: 0.0003 +[2026-03-05 11:03:59] (step=0066596) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 13.029935433378986, LR: 0.0003 +[2026-03-05 11:04:07] (step=0066597) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 13.0301310898063, LR: 0.0003 +[2026-03-05 11:04:15] (step=0066598) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 13.030326746233614, LR: 0.0003 +[2026-03-05 11:04:22] (step=0066599) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.030522402660928, LR: 0.0003 +[2026-03-05 11:04:30] (step=0066600) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.03071805908824, LR: 0.0003 +[2026-03-05 11:04:38] (step=0066601) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.030913715515554, LR: 0.0003 +[2026-03-05 11:04:46] (step=0066602) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 13.031109371942868, LR: 0.0003 +[2026-03-05 11:04:54] (step=0066603) Train Loss: 0.4450, Train Steps/Sec: 0.12, Epoch: 13.031305028370182, LR: 0.0003 +[2026-03-05 11:05:02] (step=0066604) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.031500684797496, LR: 0.0003 +[2026-03-05 11:05:10] (step=0066605) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 13.031696341224809, LR: 0.0003 +[2026-03-05 11:05:18] (step=0066606) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 13.031891997652123, LR: 0.0003 +[2026-03-05 11:05:25] (step=0066607) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 13.032087654079437, LR: 0.0003 +[2026-03-05 11:05:33] (step=0066608) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 13.03228331050675, LR: 0.0003 +[2026-03-05 11:05:41] (step=0066609) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 13.032478966934065, LR: 0.0003 +[2026-03-05 11:05:49] (step=0066610) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.032674623361377, LR: 0.0003 +[2026-03-05 11:05:57] (step=0066611) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.03287027978869, LR: 0.0003 +[2026-03-05 11:06:05] (step=0066612) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.033065936216005, LR: 0.0003 +[2026-03-05 11:06:13] (step=0066613) Train Loss: 0.4328, Train Steps/Sec: 0.12, Epoch: 13.033261592643319, LR: 0.0003 +[2026-03-05 11:06:20] (step=0066614) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.033457249070633, LR: 0.0003 +[2026-03-05 11:06:28] (step=0066615) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.033652905497945, LR: 0.0003 +[2026-03-05 11:06:36] (step=0066616) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.033848561925259, LR: 0.0003 +[2026-03-05 11:06:44] (step=0066617) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.034044218352573, LR: 0.0003 +[2026-03-05 11:06:52] (step=0066618) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.034239874779887, LR: 0.0003 +[2026-03-05 11:07:00] (step=0066619) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.0344355312072, LR: 0.0003 +[2026-03-05 11:07:08] (step=0066620) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.034631187634513, LR: 0.0003 +[2026-03-05 11:07:16] (step=0066621) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.034826844061827, LR: 0.0003 +[2026-03-05 11:07:23] (step=0066622) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 13.035022500489141, LR: 0.0003 +[2026-03-05 11:07:31] (step=0066623) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.035218156916455, LR: 0.0003 +[2026-03-05 11:07:39] (step=0066624) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.035413813343768, LR: 0.0003 +[2026-03-05 11:07:47] (step=0066625) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 13.035609469771082, LR: 0.0003 +[2026-03-05 11:07:55] (step=0066626) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.035805126198396, LR: 0.0003 +[2026-03-05 11:08:03] (step=0066627) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.03600078262571, LR: 0.0003 +[2026-03-05 11:08:11] (step=0066628) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.036196439053024, LR: 0.0003 +[2026-03-05 11:08:18] (step=0066629) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.036392095480336, LR: 0.0003 +[2026-03-05 11:08:26] (step=0066630) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.03658775190765, LR: 0.0003 +[2026-03-05 11:08:34] (step=0066631) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.036783408334964, LR: 0.0003 +[2026-03-05 11:08:42] (step=0066632) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.036979064762278, LR: 0.0003 +[2026-03-05 11:08:50] (step=0066633) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.037174721189592, LR: 0.0003 +[2026-03-05 11:08:58] (step=0066634) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.037370377616904, LR: 0.0003 +[2026-03-05 11:09:05] (step=0066635) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.037566034044218, LR: 0.0003 +[2026-03-05 11:09:13] (step=0066636) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.037761690471532, LR: 0.0003 +[2026-03-05 11:09:21] (step=0066637) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.037957346898846, LR: 0.0003 +[2026-03-05 11:09:29] (step=0066638) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.03815300332616, LR: 0.0003 +[2026-03-05 11:09:37] (step=0066639) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.038348659753472, LR: 0.0003 +[2026-03-05 11:09:45] (step=0066640) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.038544316180786, LR: 0.0003 +[2026-03-05 11:09:53] (step=0066641) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.0387399726081, LR: 0.0003 +[2026-03-05 11:10:00] (step=0066642) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.038935629035414, LR: 0.0003 +[2026-03-05 11:10:08] (step=0066643) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.039131285462727, LR: 0.0003 +[2026-03-05 11:10:16] (step=0066644) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.03932694189004, LR: 0.0003 +[2026-03-05 11:10:24] (step=0066645) Train Loss: 0.4207, Train Steps/Sec: 0.13, Epoch: 13.039522598317355, LR: 0.0003 +[2026-03-05 11:10:32] (step=0066646) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.039718254744669, LR: 0.0003 +[2026-03-05 11:10:40] (step=0066647) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.039913911171983, LR: 0.0003 +[2026-03-05 11:10:48] (step=0066648) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.040109567599295, LR: 0.0003 +[2026-03-05 11:10:55] (step=0066649) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.040305224026609, LR: 0.0003 +[2026-03-05 11:11:03] (step=0066650) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.040500880453923, LR: 0.0003 +[2026-03-05 11:11:11] (step=0066651) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.040696536881237, LR: 0.0003 +[2026-03-05 11:11:19] (step=0066652) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.04089219330855, LR: 0.0003 +[2026-03-05 11:11:27] (step=0066653) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.041087849735863, LR: 0.0003 +[2026-03-05 11:11:35] (step=0066654) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.041283506163177, LR: 0.0003 +[2026-03-05 11:11:43] (step=0066655) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.041479162590491, LR: 0.0003 +[2026-03-05 11:11:50] (step=0066656) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.041674819017805, LR: 0.0003 +[2026-03-05 11:11:58] (step=0066657) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.04187047544512, LR: 0.0003 +[2026-03-05 11:12:06] (step=0066658) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.042066131872431, LR: 0.0003 +[2026-03-05 11:12:14] (step=0066659) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.042261788299745, LR: 0.0003 +[2026-03-05 11:12:22] (step=0066660) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.04245744472706, LR: 0.0003 +[2026-03-05 11:12:30] (step=0066661) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.042653101154373, LR: 0.0003 +[2026-03-05 11:12:38] (step=0066662) Train Loss: 0.4291, Train Steps/Sec: 0.12, Epoch: 13.042848757581687, LR: 0.0003 +[2026-03-05 11:12:46] (step=0066663) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.043044414009, LR: 0.0003 +[2026-03-05 11:12:53] (step=0066664) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.043240070436314, LR: 0.0003 +[2026-03-05 11:13:01] (step=0066665) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.043435726863628, LR: 0.0003 +[2026-03-05 11:13:09] (step=0066666) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.043631383290942, LR: 0.0003 +[2026-03-05 11:13:17] (step=0066667) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.043827039718254, LR: 0.0003 +[2026-03-05 11:13:25] (step=0066668) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.044022696145568, LR: 0.0003 +[2026-03-05 11:13:33] (step=0066669) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.044218352572882, LR: 0.0003 +[2026-03-05 11:13:40] (step=0066670) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.044414009000196, LR: 0.0003 +[2026-03-05 11:13:48] (step=0066671) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.04460966542751, LR: 0.0003 +[2026-03-05 11:13:56] (step=0066672) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.044805321854822, LR: 0.0003 +[2026-03-05 11:14:04] (step=0066673) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.045000978282136, LR: 0.0003 +[2026-03-05 11:14:12] (step=0066674) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.04519663470945, LR: 0.0003 +[2026-03-05 11:14:20] (step=0066675) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 13.045392291136764, LR: 0.0003 +[2026-03-05 11:14:28] (step=0066676) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 13.045587947564078, LR: 0.0003 +[2026-03-05 11:14:35] (step=0066677) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 13.04578360399139, LR: 0.0003 +[2026-03-05 11:14:43] (step=0066678) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.045979260418704, LR: 0.0003 +[2026-03-05 11:14:51] (step=0066679) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.046174916846018, LR: 0.0003 +[2026-03-05 11:14:59] (step=0066680) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.046370573273332, LR: 0.0003 +[2026-03-05 11:15:07] (step=0066681) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.046566229700646, LR: 0.0003 +[2026-03-05 11:15:15] (step=0066682) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.046761886127959, LR: 0.0003 +[2026-03-05 11:15:22] (step=0066683) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 13.046957542555273, LR: 0.0003 +[2026-03-05 11:15:30] (step=0066684) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 13.047153198982587, LR: 0.0003 +[2026-03-05 11:15:38] (step=0066685) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 13.0473488554099, LR: 0.0003 +[2026-03-05 11:15:46] (step=0066686) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 13.047544511837215, LR: 0.0003 +[2026-03-05 11:15:54] (step=0066687) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 13.047740168264527, LR: 0.0003 +[2026-03-05 11:16:02] (step=0066688) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.04793582469184, LR: 0.0003 +[2026-03-05 11:16:09] (step=0066689) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.048131481119155, LR: 0.0003 +[2026-03-05 11:16:17] (step=0066690) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.048327137546469, LR: 0.0003 +[2026-03-05 11:16:25] (step=0066691) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 13.048522793973783, LR: 0.0003 +[2026-03-05 11:16:33] (step=0066692) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.048718450401095, LR: 0.0003 +[2026-03-05 11:16:41] (step=0066693) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.04891410682841, LR: 0.0003 +[2026-03-05 11:16:49] (step=0066694) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.049109763255723, LR: 0.0003 +[2026-03-05 11:16:57] (step=0066695) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.049305419683037, LR: 0.0003 +[2026-03-05 11:17:04] (step=0066696) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.04950107611035, LR: 0.0003 +[2026-03-05 11:17:12] (step=0066697) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.049696732537663, LR: 0.0003 +[2026-03-05 11:17:20] (step=0066698) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.049892388964977, LR: 0.0003 +[2026-03-05 11:17:28] (step=0066699) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.050088045392291, LR: 0.0003 +[2026-03-05 11:17:36] (step=0066700) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.050283701819605, LR: 0.0003 +[2026-03-05 11:17:44] (step=0066701) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.050479358246918, LR: 0.0003 +[2026-03-05 11:17:52] (step=0066702) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.050675014674232, LR: 0.0003 +[2026-03-05 11:17:59] (step=0066703) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.050870671101546, LR: 0.0003 +[2026-03-05 11:18:07] (step=0066704) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.05106632752886, LR: 0.0003 +[2026-03-05 11:18:15] (step=0066705) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.051261983956174, LR: 0.0003 +[2026-03-05 11:18:23] (step=0066706) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 13.051457640383486, LR: 0.0003 +[2026-03-05 11:18:31] (step=0066707) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.0516532968108, LR: 0.0003 +[2026-03-05 11:18:39] (step=0066708) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.051848953238114, LR: 0.0003 +[2026-03-05 11:18:46] (step=0066709) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 13.052044609665428, LR: 0.0003 +[2026-03-05 11:18:55] (step=0066710) Train Loss: 0.4405, Train Steps/Sec: 0.12, Epoch: 13.052240266092742, LR: 0.0003 +[2026-03-05 11:19:02] (step=0066711) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.052435922520054, LR: 0.0003 +[2026-03-05 11:19:10] (step=0066712) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.052631578947368, LR: 0.0003 +[2026-03-05 11:19:18] (step=0066713) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 13.052827235374682, LR: 0.0003 +[2026-03-05 11:19:26] (step=0066714) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.053022891801996, LR: 0.0003 +[2026-03-05 11:19:34] (step=0066715) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.05321854822931, LR: 0.0003 +[2026-03-05 11:19:42] (step=0066716) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.053414204656622, LR: 0.0003 +[2026-03-05 11:19:49] (step=0066717) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 13.053609861083936, LR: 0.0003 +[2026-03-05 11:19:57] (step=0066718) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.05380551751125, LR: 0.0003 +[2026-03-05 11:20:05] (step=0066719) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.054001173938564, LR: 0.0003 +[2026-03-05 11:20:13] (step=0066720) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 13.054196830365877, LR: 0.0003 +[2026-03-05 11:20:21] (step=0066721) Train Loss: 0.4270, Train Steps/Sec: 0.13, Epoch: 13.05439248679319, LR: 0.0003 +[2026-03-05 11:20:29] (step=0066722) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 13.054588143220505, LR: 0.0003 +[2026-03-05 11:20:37] (step=0066723) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.054783799647819, LR: 0.0003 +[2026-03-05 11:20:44] (step=0066724) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.054979456075133, LR: 0.0003 +[2026-03-05 11:20:52] (step=0066725) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.055175112502445, LR: 0.0003 +[2026-03-05 11:21:00] (step=0066726) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 13.055370768929759, LR: 0.0003 +[2026-03-05 11:21:08] (step=0066727) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.055566425357073, LR: 0.0003 +[2026-03-05 11:21:16] (step=0066728) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.055762081784387, LR: 0.0003 +[2026-03-05 11:21:24] (step=0066729) Train Loss: 0.4239, Train Steps/Sec: 0.13, Epoch: 13.055957738211701, LR: 0.0003 +[2026-03-05 11:21:31] (step=0066730) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.056153394639013, LR: 0.0003 +[2026-03-05 11:21:39] (step=0066731) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.056349051066327, LR: 0.0003 +[2026-03-05 11:21:47] (step=0066732) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.056544707493641, LR: 0.0003 +[2026-03-05 11:21:55] (step=0066733) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.056740363920955, LR: 0.0003 +[2026-03-05 11:22:03] (step=0066734) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.05693602034827, LR: 0.0003 +[2026-03-05 11:22:11] (step=0066735) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.057131676775581, LR: 0.0003 +[2026-03-05 11:22:19] (step=0066736) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 13.057327333202895, LR: 0.0003 +[2026-03-05 11:22:26] (step=0066737) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.05752298963021, LR: 0.0003 +[2026-03-05 11:22:34] (step=0066738) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 13.057718646057523, LR: 0.0003 +[2026-03-05 11:22:42] (step=0066739) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.057914302484837, LR: 0.0003 +[2026-03-05 11:22:50] (step=0066740) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 13.05810995891215, LR: 0.0003 +[2026-03-05 11:22:58] (step=0066741) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 13.058305615339464, LR: 0.0003 +[2026-03-05 11:23:06] (step=0066742) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.058501271766778, LR: 0.0003 +[2026-03-05 11:23:14] (step=0066743) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.058696928194092, LR: 0.0003 +[2026-03-05 11:23:21] (step=0066744) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.058892584621406, LR: 0.0003 +[2026-03-05 11:23:29] (step=0066745) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 13.059088241048718, LR: 0.0003 +[2026-03-05 11:23:37] (step=0066746) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.059283897476032, LR: 0.0003 +[2026-03-05 11:23:45] (step=0066747) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.059479553903346, LR: 0.0003 +[2026-03-05 11:23:53] (step=0066748) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.05967521033066, LR: 0.0003 +[2026-03-05 11:24:01] (step=0066749) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.059870866757972, LR: 0.0003 +[2026-03-05 11:24:09] (step=0066750) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.060066523185286, LR: 0.0003 +[2026-03-05 11:24:16] (step=0066751) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.0602621796126, LR: 0.0003 +[2026-03-05 11:24:24] (step=0066752) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 13.060457836039914, LR: 0.0003 +[2026-03-05 11:24:32] (step=0066753) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.060653492467228, LR: 0.0003 +[2026-03-05 11:24:40] (step=0066754) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 13.06084914889454, LR: 0.0003 +[2026-03-05 11:24:48] (step=0066755) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.061044805321854, LR: 0.0003 +[2026-03-05 11:24:56] (step=0066756) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.061240461749168, LR: 0.0003 +[2026-03-05 11:25:03] (step=0066757) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.061436118176482, LR: 0.0003 +[2026-03-05 11:25:11] (step=0066758) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 13.061631774603796, LR: 0.0003 +[2026-03-05 11:25:19] (step=0066759) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.061827431031109, LR: 0.0003 +[2026-03-05 11:25:27] (step=0066760) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.062023087458423, LR: 0.0003 +[2026-03-05 11:25:35] (step=0066761) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.062218743885737, LR: 0.0003 +[2026-03-05 11:25:43] (step=0066762) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.06241440031305, LR: 0.0003 +[2026-03-05 11:25:51] (step=0066763) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 13.062610056740365, LR: 0.0003 +[2026-03-05 11:25:58] (step=0066764) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.062805713167677, LR: 0.0003 +[2026-03-05 11:26:06] (step=0066765) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 13.063001369594991, LR: 0.0003 +[2026-03-05 11:26:14] (step=0066766) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.063197026022305, LR: 0.0003 +[2026-03-05 11:26:22] (step=0066767) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.063392682449619, LR: 0.0003 +[2026-03-05 11:26:30] (step=0066768) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.063588338876933, LR: 0.0003 +[2026-03-05 11:26:38] (step=0066769) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.063783995304245, LR: 0.0003 +[2026-03-05 11:26:46] (step=0066770) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 13.06397965173156, LR: 0.0003 +[2026-03-05 11:26:53] (step=0066771) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 13.064175308158873, LR: 0.0003 +[2026-03-05 11:27:01] (step=0066772) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.064370964586187, LR: 0.0003 +[2026-03-05 11:27:09] (step=0066773) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.0645666210135, LR: 0.0003 +[2026-03-05 11:27:17] (step=0066774) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.064762277440813, LR: 0.0003 +[2026-03-05 11:27:25] (step=0066775) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.064957933868127, LR: 0.0003 +[2026-03-05 11:27:33] (step=0066776) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 13.065153590295441, LR: 0.0003 +[2026-03-05 11:27:41] (step=0066777) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.065349246722755, LR: 0.0003 +[2026-03-05 11:27:48] (step=0066778) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.065544903150068, LR: 0.0003 +[2026-03-05 11:27:56] (step=0066779) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.065740559577382, LR: 0.0003 +[2026-03-05 11:28:04] (step=0066780) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.065936216004696, LR: 0.0003 +[2026-03-05 11:28:12] (step=0066781) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.06613187243201, LR: 0.0003 +[2026-03-05 11:28:20] (step=0066782) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.066327528859324, LR: 0.0003 +[2026-03-05 11:28:28] (step=0066783) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.066523185286636, LR: 0.0003 +[2026-03-05 11:28:35] (step=0066784) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.06671884171395, LR: 0.0003 +[2026-03-05 11:28:43] (step=0066785) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 13.066914498141264, LR: 0.0003 +[2026-03-05 11:28:51] (step=0066786) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 13.067110154568578, LR: 0.0003 +[2026-03-05 11:28:59] (step=0066787) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 13.067305810995892, LR: 0.0003 +[2026-03-05 11:29:07] (step=0066788) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.067501467423204, LR: 0.0003 +[2026-03-05 11:29:15] (step=0066789) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 13.067697123850518, LR: 0.0003 +[2026-03-05 11:29:23] (step=0066790) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.067892780277832, LR: 0.0003 +[2026-03-05 11:29:30] (step=0066791) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 13.068088436705146, LR: 0.0003 +[2026-03-05 11:29:38] (step=0066792) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.06828409313246, LR: 0.0003 +[2026-03-05 11:29:46] (step=0066793) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.068479749559772, LR: 0.0003 +[2026-03-05 11:29:54] (step=0066794) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 13.068675405987086, LR: 0.0003 +[2026-03-05 11:30:02] (step=0066795) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.0688710624144, LR: 0.0003 +[2026-03-05 11:30:10] (step=0066796) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.069066718841714, LR: 0.0003 +[2026-03-05 11:30:18] (step=0066797) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.069262375269028, LR: 0.0003 +[2026-03-05 11:30:25] (step=0066798) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.06945803169634, LR: 0.0003 +[2026-03-05 11:30:33] (step=0066799) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.069653688123655, LR: 0.0003 +[2026-03-05 11:30:41] (step=0066800) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 13.069849344550969, LR: 0.0003 +[2026-03-05 11:30:49] (step=0066801) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.070045000978283, LR: 0.0003 +[2026-03-05 11:30:57] (step=0066802) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 13.070240657405595, LR: 0.0003 +[2026-03-05 11:31:05] (step=0066803) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 13.070436313832909, LR: 0.0003 +[2026-03-05 11:31:13] (step=0066804) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.070631970260223, LR: 0.0003 +[2026-03-05 11:31:20] (step=0066805) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.070827626687537, LR: 0.0003 +[2026-03-05 11:31:28] (step=0066806) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.071023283114851, LR: 0.0003 +[2026-03-05 11:31:36] (step=0066807) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 13.071218939542163, LR: 0.0003 +[2026-03-05 11:31:44] (step=0066808) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.071414595969477, LR: 0.0003 +[2026-03-05 11:31:52] (step=0066809) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.071610252396791, LR: 0.0003 +[2026-03-05 11:32:00] (step=0066810) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.071805908824105, LR: 0.0003 +[2026-03-05 11:32:08] (step=0066811) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 13.07200156525142, LR: 0.0003 +[2026-03-05 11:32:15] (step=0066812) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 13.072197221678731, LR: 0.0003 +[2026-03-05 11:32:23] (step=0066813) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.072392878106045, LR: 0.0003 +[2026-03-05 11:32:31] (step=0066814) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.07258853453336, LR: 0.0003 +[2026-03-05 11:32:39] (step=0066815) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.072784190960673, LR: 0.0003 +[2026-03-05 11:32:47] (step=0066816) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.072979847387987, LR: 0.0003 +[2026-03-05 11:32:55] (step=0066817) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.0731755038153, LR: 0.0003 +[2026-03-05 11:33:02] (step=0066818) Train Loss: 0.4276, Train Steps/Sec: 0.13, Epoch: 13.073371160242614, LR: 0.0003 +[2026-03-05 11:33:10] (step=0066819) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.073566816669928, LR: 0.0003 +[2026-03-05 11:33:18] (step=0066820) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 13.073762473097242, LR: 0.0003 +[2026-03-05 11:33:26] (step=0066821) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.073958129524556, LR: 0.0003 +[2026-03-05 11:33:34] (step=0066822) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.074153785951868, LR: 0.0003 +[2026-03-05 11:33:42] (step=0066823) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.074349442379182, LR: 0.0003 +[2026-03-05 11:33:50] (step=0066824) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.074545098806496, LR: 0.0003 +[2026-03-05 11:33:57] (step=0066825) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 13.07474075523381, LR: 0.0003 +[2026-03-05 11:34:05] (step=0066826) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.074936411661122, LR: 0.0003 +[2026-03-05 11:34:13] (step=0066827) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.075132068088436, LR: 0.0003 +[2026-03-05 11:34:21] (step=0066828) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.07532772451575, LR: 0.0003 +[2026-03-05 11:34:29] (step=0066829) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.075523380943064, LR: 0.0003 +[2026-03-05 11:34:37] (step=0066830) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 13.075719037370378, LR: 0.0003 +[2026-03-05 11:34:44] (step=0066831) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.07591469379769, LR: 0.0003 +[2026-03-05 11:34:52] (step=0066832) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.076110350225004, LR: 0.0003 +[2026-03-05 11:35:00] (step=0066833) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 13.076306006652318, LR: 0.0003 +[2026-03-05 11:35:08] (step=0066834) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 13.076501663079632, LR: 0.0003 +[2026-03-05 11:35:16] (step=0066835) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 13.076697319506946, LR: 0.0003 +[2026-03-05 11:35:24] (step=0066836) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.076892975934259, LR: 0.0003 +[2026-03-05 11:35:32] (step=0066837) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.077088632361573, LR: 0.0003 +[2026-03-05 11:35:39] (step=0066838) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 13.077284288788887, LR: 0.0003 +[2026-03-05 11:35:47] (step=0066839) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.0774799452162, LR: 0.0003 +[2026-03-05 11:35:55] (step=0066840) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.077675601643515, LR: 0.0003 +[2026-03-05 11:36:03] (step=0066841) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.077871258070827, LR: 0.0003 +[2026-03-05 11:36:11] (step=0066842) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.078066914498141, LR: 0.0003 +[2026-03-05 11:36:19] (step=0066843) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.078262570925455, LR: 0.0003 +[2026-03-05 11:36:27] (step=0066844) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.078458227352769, LR: 0.0003 +[2026-03-05 11:36:35] (step=0066845) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.078653883780083, LR: 0.0003 +[2026-03-05 11:36:42] (step=0066846) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 13.078849540207395, LR: 0.0003 +[2026-03-05 11:36:50] (step=0066847) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.07904519663471, LR: 0.0003 +[2026-03-05 11:36:58] (step=0066848) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 13.079240853062023, LR: 0.0003 +[2026-03-05 11:37:06] (step=0066849) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 13.079436509489337, LR: 0.0003 +[2026-03-05 11:37:14] (step=0066850) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.079632165916651, LR: 0.0003 +[2026-03-05 11:37:22] (step=0066851) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 13.079827822343963, LR: 0.0003 +[2026-03-05 11:37:29] (step=0066852) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.080023478771277, LR: 0.0003 +[2026-03-05 11:37:37] (step=0066853) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.080219135198591, LR: 0.0003 +[2026-03-05 11:37:45] (step=0066854) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.080414791625905, LR: 0.0003 +[2026-03-05 11:37:53] (step=0066855) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.080610448053218, LR: 0.0003 +[2026-03-05 11:38:01] (step=0066856) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 13.080806104480532, LR: 0.0003 +[2026-03-05 11:38:09] (step=0066857) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.081001760907846, LR: 0.0003 +[2026-03-05 11:38:16] (step=0066858) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.08119741733516, LR: 0.0003 +[2026-03-05 11:38:24] (step=0066859) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.081393073762474, LR: 0.0003 +[2026-03-05 11:38:32] (step=0066860) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.081588730189786, LR: 0.0003 +[2026-03-05 11:38:40] (step=0066861) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.0817843866171, LR: 0.0003 +[2026-03-05 11:38:48] (step=0066862) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.081980043044414, LR: 0.0003 +[2026-03-05 11:38:56] (step=0066863) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.082175699471728, LR: 0.0003 +[2026-03-05 11:39:04] (step=0066864) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.082371355899042, LR: 0.0003 +[2026-03-05 11:39:12] (step=0066865) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.082567012326354, LR: 0.0003 +[2026-03-05 11:39:19] (step=0066866) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.082762668753668, LR: 0.0003 +[2026-03-05 11:39:27] (step=0066867) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.082958325180982, LR: 0.0003 +[2026-03-05 11:39:35] (step=0066868) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 13.083153981608296, LR: 0.0003 +[2026-03-05 11:39:43] (step=0066869) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.08334963803561, LR: 0.0003 +[2026-03-05 11:39:51] (step=0066870) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.083545294462922, LR: 0.0003 +[2026-03-05 11:39:59] (step=0066871) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.083740950890236, LR: 0.0003 +[2026-03-05 11:40:06] (step=0066872) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.08393660731755, LR: 0.0003 +[2026-03-05 11:40:14] (step=0066873) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.084132263744864, LR: 0.0003 +[2026-03-05 11:40:22] (step=0066874) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.084327920172178, LR: 0.0003 +[2026-03-05 11:40:30] (step=0066875) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.08452357659949, LR: 0.0003 +[2026-03-05 11:40:38] (step=0066876) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.084719233026805, LR: 0.0003 +[2026-03-05 11:40:46] (step=0066877) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.084914889454119, LR: 0.0003 +[2026-03-05 11:40:54] (step=0066878) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 13.085110545881433, LR: 0.0003 +[2026-03-05 11:41:01] (step=0066879) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.085306202308745, LR: 0.0003 +[2026-03-05 11:41:09] (step=0066880) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.085501858736059, LR: 0.0003 +[2026-03-05 11:41:17] (step=0066881) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.085697515163373, LR: 0.0003 +[2026-03-05 11:41:25] (step=0066882) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.085893171590687, LR: 0.0003 +[2026-03-05 11:41:33] (step=0066883) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.086088828018001, LR: 0.0003 +[2026-03-05 11:41:41] (step=0066884) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.086284484445313, LR: 0.0003 +[2026-03-05 11:41:48] (step=0066885) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.086480140872627, LR: 0.0003 +[2026-03-05 11:41:56] (step=0066886) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.086675797299941, LR: 0.0003 +[2026-03-05 11:42:04] (step=0066887) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.086871453727255, LR: 0.0003 +[2026-03-05 11:42:12] (step=0066888) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.08706711015457, LR: 0.0003 +[2026-03-05 11:42:20] (step=0066889) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 13.087262766581881, LR: 0.0003 +[2026-03-05 11:42:28] (step=0066890) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.087458423009195, LR: 0.0003 +[2026-03-05 11:42:36] (step=0066891) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.08765407943651, LR: 0.0003 +[2026-03-05 11:42:44] (step=0066892) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 13.087849735863823, LR: 0.0003 +[2026-03-05 11:42:51] (step=0066893) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.088045392291138, LR: 0.0003 +[2026-03-05 11:42:59] (step=0066894) Train Loss: 0.4608, Train Steps/Sec: 0.13, Epoch: 13.08824104871845, LR: 0.0003 +[2026-03-05 11:43:07] (step=0066895) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.088436705145764, LR: 0.0003 +[2026-03-05 11:43:15] (step=0066896) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.088632361573078, LR: 0.0003 +[2026-03-05 11:43:23] (step=0066897) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 13.088828018000392, LR: 0.0003 +[2026-03-05 11:43:31] (step=0066898) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.089023674427706, LR: 0.0003 +[2026-03-05 11:43:39] (step=0066899) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.089219330855018, LR: 0.0003 +[2026-03-05 11:43:46] (step=0066900) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 13.089414987282332, LR: 0.0003 +[2026-03-05 11:43:54] (step=0066901) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.089610643709646, LR: 0.0003 +[2026-03-05 11:44:02] (step=0066902) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.08980630013696, LR: 0.0003 +[2026-03-05 11:44:10] (step=0066903) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.090001956564274, LR: 0.0003 +[2026-03-05 11:44:18] (step=0066904) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.090197612991586, LR: 0.0003 +[2026-03-05 11:44:26] (step=0066905) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.0903932694189, LR: 0.0003 +[2026-03-05 11:44:34] (step=0066906) Train Loss: 0.4216, Train Steps/Sec: 0.13, Epoch: 13.090588925846214, LR: 0.0003 +[2026-03-05 11:44:41] (step=0066907) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 13.090784582273528, LR: 0.0003 +[2026-03-05 11:44:49] (step=0066908) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 13.09098023870084, LR: 0.0003 +[2026-03-05 11:44:57] (step=0066909) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.091175895128154, LR: 0.0003 +[2026-03-05 11:45:05] (step=0066910) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.091371551555468, LR: 0.0003 +[2026-03-05 11:45:13] (step=0066911) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 13.091567207982783, LR: 0.0003 +[2026-03-05 11:45:21] (step=0066912) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.091762864410097, LR: 0.0003 +[2026-03-05 11:45:28] (step=0066913) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.091958520837409, LR: 0.0003 +[2026-03-05 11:45:36] (step=0066914) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.092154177264723, LR: 0.0003 +[2026-03-05 11:45:44] (step=0066915) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 13.092349833692037, LR: 0.0003 +[2026-03-05 11:45:52] (step=0066916) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.09254549011935, LR: 0.0003 +[2026-03-05 11:46:00] (step=0066917) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.092741146546665, LR: 0.0003 +[2026-03-05 11:46:08] (step=0066918) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.092936802973977, LR: 0.0003 +[2026-03-05 11:46:15] (step=0066919) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 13.093132459401291, LR: 0.0003 +[2026-03-05 11:46:23] (step=0066920) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.093328115828605, LR: 0.0003 +[2026-03-05 11:46:31] (step=0066921) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.093523772255919, LR: 0.0003 +[2026-03-05 11:46:39] (step=0066922) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.093719428683233, LR: 0.0003 +[2026-03-05 11:46:47] (step=0066923) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 13.093915085110545, LR: 0.0003 +[2026-03-05 11:46:55] (step=0066924) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 13.09411074153786, LR: 0.0003 +[2026-03-05 11:47:02] (step=0066925) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.094306397965173, LR: 0.0003 +[2026-03-05 11:47:10] (step=0066926) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 13.094502054392487, LR: 0.0003 +[2026-03-05 11:47:18] (step=0066927) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.094697710819801, LR: 0.0003 +[2026-03-05 11:47:26] (step=0066928) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 13.094893367247114, LR: 0.0003 +[2026-03-05 11:47:34] (step=0066929) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.095089023674428, LR: 0.0003 +[2026-03-05 11:47:42] (step=0066930) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 13.095284680101742, LR: 0.0003 +[2026-03-05 11:47:50] (step=0066931) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.095480336529056, LR: 0.0003 +[2026-03-05 11:47:57] (step=0066932) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.095675992956368, LR: 0.0003 +[2026-03-05 11:48:05] (step=0066933) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.095871649383682, LR: 0.0003 +[2026-03-05 11:48:13] (step=0066934) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.096067305810996, LR: 0.0003 +[2026-03-05 11:48:21] (step=0066935) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.09626296223831, LR: 0.0003 +[2026-03-05 11:48:29] (step=0066936) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.096458618665624, LR: 0.0003 +[2026-03-05 11:48:37] (step=0066937) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.096654275092936, LR: 0.0003 +[2026-03-05 11:48:44] (step=0066938) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.09684993152025, LR: 0.0003 +[2026-03-05 11:48:52] (step=0066939) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.097045587947564, LR: 0.0003 +[2026-03-05 11:49:00] (step=0066940) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 13.097241244374878, LR: 0.0003 +[2026-03-05 11:49:08] (step=0066941) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.097436900802192, LR: 0.0003 +[2026-03-05 11:49:16] (step=0066942) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.097632557229504, LR: 0.0003 +[2026-03-05 11:49:24] (step=0066943) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.097828213656818, LR: 0.0003 +[2026-03-05 11:49:32] (step=0066944) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.098023870084132, LR: 0.0003 +[2026-03-05 11:49:39] (step=0066945) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.098219526511446, LR: 0.0003 +[2026-03-05 11:49:47] (step=0066946) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 13.09841518293876, LR: 0.0003 +[2026-03-05 11:49:55] (step=0066947) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 13.098610839366073, LR: 0.0003 +[2026-03-05 11:50:03] (step=0066948) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.098806495793387, LR: 0.0003 +[2026-03-05 11:50:11] (step=0066949) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.0990021522207, LR: 0.0003 +[2026-03-05 11:50:19] (step=0066950) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.099197808648015, LR: 0.0003 +[2026-03-05 11:50:26] (step=0066951) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.099393465075329, LR: 0.0003 +[2026-03-05 11:50:34] (step=0066952) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 13.09958912150264, LR: 0.0003 +[2026-03-05 11:50:42] (step=0066953) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 13.099784777929955, LR: 0.0003 +[2026-03-05 11:50:50] (step=0066954) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.099980434357269, LR: 0.0003 +[2026-03-05 11:50:58] (step=0066955) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.100176090784583, LR: 0.0003 +[2026-03-05 11:51:06] (step=0066956) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.100371747211897, LR: 0.0003 +[2026-03-05 11:51:14] (step=0066957) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.100567403639209, LR: 0.0003 +[2026-03-05 11:51:21] (step=0066958) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.100763060066523, LR: 0.0003 +[2026-03-05 11:51:29] (step=0066959) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.100958716493837, LR: 0.0003 +[2026-03-05 11:51:37] (step=0066960) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 13.101154372921151, LR: 0.0003 +[2026-03-05 11:51:45] (step=0066961) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 13.101350029348463, LR: 0.0003 +[2026-03-05 11:51:53] (step=0066962) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.101545685775777, LR: 0.0003 +[2026-03-05 11:52:01] (step=0066963) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 13.101741342203091, LR: 0.0003 +[2026-03-05 11:52:09] (step=0066964) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 13.101936998630405, LR: 0.0003 +[2026-03-05 11:52:16] (step=0066965) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.10213265505772, LR: 0.0003 +[2026-03-05 11:52:24] (step=0066966) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.102328311485032, LR: 0.0003 +[2026-03-05 11:52:32] (step=0066967) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.102523967912346, LR: 0.0003 +[2026-03-05 11:52:40] (step=0066968) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.10271962433966, LR: 0.0003 +[2026-03-05 11:52:48] (step=0066969) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 13.102915280766974, LR: 0.0003 +[2026-03-05 11:52:56] (step=0066970) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.103110937194288, LR: 0.0003 +[2026-03-05 11:53:03] (step=0066971) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.1033065936216, LR: 0.0003 +[2026-03-05 11:53:11] (step=0066972) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 13.103502250048914, LR: 0.0003 +[2026-03-05 11:53:19] (step=0066973) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 13.103697906476228, LR: 0.0003 +[2026-03-05 11:53:27] (step=0066974) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 13.103893562903542, LR: 0.0003 +[2026-03-05 11:53:35] (step=0066975) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.104089219330856, LR: 0.0003 +[2026-03-05 11:53:43] (step=0066976) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.104284875758168, LR: 0.0003 +[2026-03-05 11:53:51] (step=0066977) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.104480532185482, LR: 0.0003 +[2026-03-05 11:53:58] (step=0066978) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.104676188612796, LR: 0.0003 +[2026-03-05 11:54:06] (step=0066979) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.10487184504011, LR: 0.0003 +[2026-03-05 11:54:14] (step=0066980) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.105067501467424, LR: 0.0003 +[2026-03-05 11:54:22] (step=0066981) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 13.105263157894736, LR: 0.0003 +[2026-03-05 11:54:30] (step=0066982) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.10545881432205, LR: 0.0003 +[2026-03-05 11:54:38] (step=0066983) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.105654470749364, LR: 0.0003 +[2026-03-05 11:54:45] (step=0066984) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 13.105850127176678, LR: 0.0003 +[2026-03-05 11:54:53] (step=0066985) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 13.10604578360399, LR: 0.0003 +[2026-03-05 11:55:01] (step=0066986) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 13.106241440031305, LR: 0.0003 +[2026-03-05 11:55:09] (step=0066987) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.106437096458619, LR: 0.0003 +[2026-03-05 11:55:17] (step=0066988) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 13.106632752885933, LR: 0.0003 +[2026-03-05 11:55:25] (step=0066989) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.106828409313247, LR: 0.0003 +[2026-03-05 11:55:33] (step=0066990) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 13.107024065740559, LR: 0.0003 +[2026-03-05 11:55:40] (step=0066991) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 13.107219722167873, LR: 0.0003 +[2026-03-05 11:55:48] (step=0066992) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.107415378595187, LR: 0.0003 +[2026-03-05 11:55:56] (step=0066993) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 13.1076110350225, LR: 0.0003 +[2026-03-05 11:56:04] (step=0066994) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.107806691449815, LR: 0.0003 +[2026-03-05 11:56:12] (step=0066995) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 13.108002347877127, LR: 0.0003 +[2026-03-05 11:56:20] (step=0066996) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.108198004304441, LR: 0.0003 +[2026-03-05 11:56:28] (step=0066997) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.108393660731755, LR: 0.0003 +[2026-03-05 11:56:35] (step=0066998) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.108589317159069, LR: 0.0003 +[2026-03-05 11:56:43] (step=0066999) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 13.108784973586383, LR: 0.0003 +[2026-03-05 11:56:51] (step=0067000) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.108980630013695, LR: 0.0003 +[2026-03-05 11:56:51] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0067000/ +[2026-03-05 11:56:59] (step=0067001) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.10917628644101, LR: 0.0003 +[2026-03-05 11:57:07] (step=0067002) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.109371942868323, LR: 0.0003 +[2026-03-05 11:57:15] (step=0067003) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.109567599295637, LR: 0.0003 +[2026-03-05 11:57:23] (step=0067004) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.109763255722951, LR: 0.0003 +[2026-03-05 11:57:30] (step=0067005) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.109958912150264, LR: 0.0003 +[2026-03-05 11:57:38] (step=0067006) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 13.110154568577578, LR: 0.0003 +[2026-03-05 11:57:46] (step=0067007) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 13.110350225004892, LR: 0.0003 +[2026-03-05 11:57:54] (step=0067008) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 13.110545881432206, LR: 0.0003 +[2026-03-05 11:58:02] (step=0067009) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.11074153785952, LR: 0.0003 +[2026-03-05 11:58:10] (step=0067010) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.110937194286832, LR: 0.0003 +[2026-03-05 11:58:18] (step=0067011) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.111132850714146, LR: 0.0003 +[2026-03-05 11:58:26] (step=0067012) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.11132850714146, LR: 0.0003 +[2026-03-05 11:58:33] (step=0067013) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.111524163568774, LR: 0.0003 +[2026-03-05 11:58:41] (step=0067014) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.111719819996086, LR: 0.0003 +[2026-03-05 11:58:49] (step=0067015) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.1119154764234, LR: 0.0003 +[2026-03-05 11:58:57] (step=0067016) Train Loss: 0.4219, Train Steps/Sec: 0.13, Epoch: 13.112111132850714, LR: 0.0003 +[2026-03-05 11:59:05] (step=0067017) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.112306789278028, LR: 0.0003 +[2026-03-05 11:59:13] (step=0067018) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.112502445705342, LR: 0.0003 +[2026-03-05 11:59:20] (step=0067019) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.112698102132654, LR: 0.0003 +[2026-03-05 11:59:28] (step=0067020) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.112893758559968, LR: 0.0003 +[2026-03-05 11:59:36] (step=0067021) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 13.113089414987282, LR: 0.0003 +[2026-03-05 11:59:44] (step=0067022) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.113285071414596, LR: 0.0003 +[2026-03-05 11:59:52] (step=0067023) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.11348072784191, LR: 0.0003 +[2026-03-05 12:00:00] (step=0067024) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.113676384269223, LR: 0.0003 +[2026-03-05 12:00:07] (step=0067025) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.113872040696537, LR: 0.0003 +[2026-03-05 12:00:15] (step=0067026) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.11406769712385, LR: 0.0003 +[2026-03-05 12:00:23] (step=0067027) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 13.114263353551165, LR: 0.0003 +[2026-03-05 12:00:31] (step=0067028) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.114459009978479, LR: 0.0003 +[2026-03-05 12:00:39] (step=0067029) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.11465466640579, LR: 0.0003 +[2026-03-05 12:00:47] (step=0067030) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 13.114850322833105, LR: 0.0003 +[2026-03-05 12:00:55] (step=0067031) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.115045979260419, LR: 0.0003 +[2026-03-05 12:01:02] (step=0067032) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.115241635687733, LR: 0.0003 +[2026-03-05 12:01:10] (step=0067033) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.115437292115047, LR: 0.0003 +[2026-03-05 12:01:18] (step=0067034) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 13.115632948542359, LR: 0.0003 +[2026-03-05 12:01:26] (step=0067035) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.115828604969673, LR: 0.0003 +[2026-03-05 12:01:34] (step=0067036) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.116024261396987, LR: 0.0003 +[2026-03-05 12:01:42] (step=0067037) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.116219917824301, LR: 0.0003 +[2026-03-05 12:01:50] (step=0067038) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.116415574251613, LR: 0.0003 +[2026-03-05 12:01:57] (step=0067039) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.116611230678927, LR: 0.0003 +[2026-03-05 12:02:05] (step=0067040) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.116806887106241, LR: 0.0003 +[2026-03-05 12:02:13] (step=0067041) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.117002543533555, LR: 0.0003 +[2026-03-05 12:02:21] (step=0067042) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.11719819996087, LR: 0.0003 +[2026-03-05 12:02:29] (step=0067043) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.117393856388182, LR: 0.0003 +[2026-03-05 12:02:37] (step=0067044) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 13.117589512815496, LR: 0.0003 +[2026-03-05 12:02:44] (step=0067045) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.11778516924281, LR: 0.0003 +[2026-03-05 12:02:52] (step=0067046) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 13.117980825670124, LR: 0.0003 +[2026-03-05 12:03:00] (step=0067047) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 13.118176482097438, LR: 0.0003 +[2026-03-05 12:03:08] (step=0067048) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 13.11837213852475, LR: 0.0003 +[2026-03-05 12:03:16] (step=0067049) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.118567794952064, LR: 0.0003 +[2026-03-05 12:03:24] (step=0067050) Train Loss: 0.4686, Train Steps/Sec: 0.13, Epoch: 13.118763451379378, LR: 0.0003 +[2026-03-05 12:03:31] (step=0067051) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.118959107806692, LR: 0.0003 +[2026-03-05 12:03:39] (step=0067052) Train Loss: 0.4619, Train Steps/Sec: 0.13, Epoch: 13.119154764234006, LR: 0.0003 +[2026-03-05 12:03:47] (step=0067053) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.119350420661318, LR: 0.0003 +[2026-03-05 12:03:55] (step=0067054) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.119546077088632, LR: 0.0003 +[2026-03-05 12:04:03] (step=0067055) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.119741733515946, LR: 0.0003 +[2026-03-05 12:04:11] (step=0067056) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 13.11993738994326, LR: 0.0003 +[2026-03-05 12:04:19] (step=0067057) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.120133046370574, LR: 0.0003 +[2026-03-05 12:04:27] (step=0067058) Train Loss: 0.4411, Train Steps/Sec: 0.12, Epoch: 13.120328702797886, LR: 0.0003 +[2026-03-05 12:04:34] (step=0067059) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.1205243592252, LR: 0.0003 +[2026-03-05 12:04:42] (step=0067060) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.120720015652514, LR: 0.0003 +[2026-03-05 12:04:50] (step=0067061) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.120915672079828, LR: 0.0003 +[2026-03-05 12:04:58] (step=0067062) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.121111328507142, LR: 0.0003 +[2026-03-05 12:05:06] (step=0067063) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 13.121306984934455, LR: 0.0003 +[2026-03-05 12:05:14] (step=0067064) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.121502641361769, LR: 0.0003 +[2026-03-05 12:05:21] (step=0067065) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.121698297789083, LR: 0.0003 +[2026-03-05 12:05:29] (step=0067066) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.121893954216397, LR: 0.0003 +[2026-03-05 12:05:37] (step=0067067) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.122089610643709, LR: 0.0003 +[2026-03-05 12:05:45] (step=0067068) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.122285267071023, LR: 0.0003 +[2026-03-05 12:05:53] (step=0067069) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.122480923498337, LR: 0.0003 +[2026-03-05 12:06:01] (step=0067070) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.12267657992565, LR: 0.0003 +[2026-03-05 12:06:09] (step=0067071) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 13.122872236352965, LR: 0.0003 +[2026-03-05 12:06:16] (step=0067072) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.123067892780277, LR: 0.0003 +[2026-03-05 12:06:24] (step=0067073) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.123263549207591, LR: 0.0003 +[2026-03-05 12:06:32] (step=0067074) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.123459205634905, LR: 0.0003 +[2026-03-05 12:06:40] (step=0067075) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.123654862062219, LR: 0.0003 +[2026-03-05 12:06:48] (step=0067076) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.123850518489533, LR: 0.0003 +[2026-03-05 12:06:56] (step=0067077) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 13.124046174916845, LR: 0.0003 +[2026-03-05 12:07:03] (step=0067078) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.12424183134416, LR: 0.0003 +[2026-03-05 12:07:11] (step=0067079) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.124437487771473, LR: 0.0003 +[2026-03-05 12:07:19] (step=0067080) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.124633144198787, LR: 0.0003 +[2026-03-05 12:07:27] (step=0067081) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.124828800626101, LR: 0.0003 +[2026-03-05 12:07:35] (step=0067082) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 13.125024457053414, LR: 0.0003 +[2026-03-05 12:07:43] (step=0067083) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.125220113480728, LR: 0.0003 +[2026-03-05 12:07:51] (step=0067084) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.125415769908042, LR: 0.0003 +[2026-03-05 12:07:59] (step=0067085) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.125611426335356, LR: 0.0003 +[2026-03-05 12:08:06] (step=0067086) Train Loss: 0.4278, Train Steps/Sec: 0.13, Epoch: 13.12580708276267, LR: 0.0003 +[2026-03-05 12:08:14] (step=0067087) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.126002739189982, LR: 0.0003 +[2026-03-05 12:08:22] (step=0067088) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.126198395617296, LR: 0.0003 +[2026-03-05 12:08:30] (step=0067089) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 13.12639405204461, LR: 0.0003 +[2026-03-05 12:08:38] (step=0067090) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.126589708471924, LR: 0.0003 +[2026-03-05 12:08:46] (step=0067091) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 13.126785364899236, LR: 0.0003 +[2026-03-05 12:08:53] (step=0067092) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.12698102132655, LR: 0.0003 +[2026-03-05 12:09:01] (step=0067093) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 13.127176677753864, LR: 0.0003 +[2026-03-05 12:09:09] (step=0067094) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 13.127372334181178, LR: 0.0003 +[2026-03-05 12:09:17] (step=0067095) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.127567990608492, LR: 0.0003 +[2026-03-05 12:09:25] (step=0067096) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.127763647035804, LR: 0.0003 +[2026-03-05 12:09:33] (step=0067097) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.127959303463118, LR: 0.0003 +[2026-03-05 12:09:41] (step=0067098) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.128154959890432, LR: 0.0003 +[2026-03-05 12:09:48] (step=0067099) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.128350616317746, LR: 0.0003 +[2026-03-05 12:09:56] (step=0067100) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 13.12854627274506, LR: 0.0003 +[2026-03-05 12:10:04] (step=0067101) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 13.128741929172373, LR: 0.0003 +[2026-03-05 12:10:12] (step=0067102) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 13.128937585599687, LR: 0.0003 +[2026-03-05 12:10:20] (step=0067103) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.129133242027, LR: 0.0003 +[2026-03-05 12:10:28] (step=0067104) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.129328898454315, LR: 0.0003 +[2026-03-05 12:10:35] (step=0067105) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 13.129524554881629, LR: 0.0003 +[2026-03-05 12:10:43] (step=0067106) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 13.12972021130894, LR: 0.0003 +[2026-03-05 12:10:51] (step=0067107) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.129915867736255, LR: 0.0003 +[2026-03-05 12:10:59] (step=0067108) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.130111524163569, LR: 0.0003 +[2026-03-05 12:11:07] (step=0067109) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.130307180590883, LR: 0.0003 +[2026-03-05 12:11:15] (step=0067110) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.130502837018197, LR: 0.0003 +[2026-03-05 12:11:23] (step=0067111) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.130698493445509, LR: 0.0003 +[2026-03-05 12:11:30] (step=0067112) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.130894149872823, LR: 0.0003 +[2026-03-05 12:11:38] (step=0067113) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.131089806300137, LR: 0.0003 +[2026-03-05 12:11:46] (step=0067114) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.131285462727451, LR: 0.0003 +[2026-03-05 12:11:54] (step=0067115) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.131481119154763, LR: 0.0003 +[2026-03-05 12:12:02] (step=0067116) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.131676775582077, LR: 0.0003 +[2026-03-05 12:12:10] (step=0067117) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.131872432009391, LR: 0.0003 +[2026-03-05 12:12:18] (step=0067118) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.132068088436705, LR: 0.0003 +[2026-03-05 12:12:25] (step=0067119) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.13226374486402, LR: 0.0003 +[2026-03-05 12:12:33] (step=0067120) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.132459401291332, LR: 0.0003 +[2026-03-05 12:12:41] (step=0067121) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.132655057718646, LR: 0.0003 +[2026-03-05 12:12:49] (step=0067122) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.13285071414596, LR: 0.0003 +[2026-03-05 12:12:57] (step=0067123) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 13.133046370573274, LR: 0.0003 +[2026-03-05 12:13:05] (step=0067124) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.133242027000588, LR: 0.0003 +[2026-03-05 12:13:12] (step=0067125) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.1334376834279, LR: 0.0003 +[2026-03-05 12:13:20] (step=0067126) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.133633339855214, LR: 0.0003 +[2026-03-05 12:13:28] (step=0067127) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.133828996282528, LR: 0.0003 +[2026-03-05 12:13:36] (step=0067128) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.134024652709842, LR: 0.0003 +[2026-03-05 12:13:44] (step=0067129) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 13.134220309137156, LR: 0.0003 +[2026-03-05 12:13:52] (step=0067130) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.134415965564468, LR: 0.0003 +[2026-03-05 12:14:00] (step=0067131) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.134611621991782, LR: 0.0003 +[2026-03-05 12:14:07] (step=0067132) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.134807278419096, LR: 0.0003 +[2026-03-05 12:14:15] (step=0067133) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.13500293484641, LR: 0.0003 +[2026-03-05 12:14:23] (step=0067134) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.135198591273724, LR: 0.0003 +[2026-03-05 12:14:31] (step=0067135) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 13.135394247701036, LR: 0.0003 +[2026-03-05 12:14:39] (step=0067136) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 13.13558990412835, LR: 0.0003 +[2026-03-05 12:14:47] (step=0067137) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.135785560555664, LR: 0.0003 +[2026-03-05 12:14:55] (step=0067138) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.135981216982978, LR: 0.0003 +[2026-03-05 12:15:02] (step=0067139) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 13.136176873410292, LR: 0.0003 +[2026-03-05 12:15:10] (step=0067140) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 13.136372529837605, LR: 0.0003 +[2026-03-05 12:15:18] (step=0067141) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.136568186264919, LR: 0.0003 +[2026-03-05 12:15:26] (step=0067142) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.136763842692233, LR: 0.0003 +[2026-03-05 12:15:34] (step=0067143) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.136959499119547, LR: 0.0003 +[2026-03-05 12:15:42] (step=0067144) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.137155155546859, LR: 0.0003 +[2026-03-05 12:15:49] (step=0067145) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.137350811974173, LR: 0.0003 +[2026-03-05 12:15:57] (step=0067146) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.137546468401487, LR: 0.0003 +[2026-03-05 12:16:05] (step=0067147) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.1377421248288, LR: 0.0003 +[2026-03-05 12:16:13] (step=0067148) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 13.137937781256115, LR: 0.0003 +[2026-03-05 12:16:21] (step=0067149) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 13.138133437683427, LR: 0.0003 +[2026-03-05 12:16:29] (step=0067150) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.138329094110741, LR: 0.0003 +[2026-03-05 12:16:36] (step=0067151) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.138524750538055, LR: 0.0003 +[2026-03-05 12:16:44] (step=0067152) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.13872040696537, LR: 0.0003 +[2026-03-05 12:16:52] (step=0067153) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.138916063392683, LR: 0.0003 +[2026-03-05 12:17:00] (step=0067154) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 13.139111719819995, LR: 0.0003 +[2026-03-05 12:17:08] (step=0067155) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.13930737624731, LR: 0.0003 +[2026-03-05 12:17:16] (step=0067156) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.139503032674623, LR: 0.0003 +[2026-03-05 12:17:24] (step=0067157) Train Loss: 0.4610, Train Steps/Sec: 0.13, Epoch: 13.139698689101937, LR: 0.0003 +[2026-03-05 12:17:31] (step=0067158) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 13.139894345529251, LR: 0.0003 +[2026-03-05 12:17:39] (step=0067159) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 13.140090001956564, LR: 0.0003 +[2026-03-05 12:17:47] (step=0067160) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.140285658383878, LR: 0.0003 +[2026-03-05 12:17:55] (step=0067161) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 13.140481314811192, LR: 0.0003 +[2026-03-05 12:18:03] (step=0067162) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 13.140676971238506, LR: 0.0003 +[2026-03-05 12:18:11] (step=0067163) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 13.14087262766582, LR: 0.0003 +[2026-03-05 12:18:18] (step=0067164) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.141068284093132, LR: 0.0003 +[2026-03-05 12:18:26] (step=0067165) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.141263940520446, LR: 0.0003 +[2026-03-05 12:18:34] (step=0067166) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.14145959694776, LR: 0.0003 +[2026-03-05 12:18:42] (step=0067167) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.141655253375074, LR: 0.0003 +[2026-03-05 12:18:50] (step=0067168) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.141850909802386, LR: 0.0003 +[2026-03-05 12:18:58] (step=0067169) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.1420465662297, LR: 0.0003 +[2026-03-05 12:19:05] (step=0067170) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.142242222657014, LR: 0.0003 +[2026-03-05 12:19:13] (step=0067171) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.142437879084328, LR: 0.0003 +[2026-03-05 12:19:21] (step=0067172) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.142633535511642, LR: 0.0003 +[2026-03-05 12:19:29] (step=0067173) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 13.142829191938954, LR: 0.0003 +[2026-03-05 12:19:37] (step=0067174) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 13.143024848366268, LR: 0.0003 +[2026-03-05 12:19:45] (step=0067175) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.143220504793582, LR: 0.0003 +[2026-03-05 12:19:52] (step=0067176) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 13.143416161220896, LR: 0.0003 +[2026-03-05 12:20:00] (step=0067177) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 13.14361181764821, LR: 0.0003 +[2026-03-05 12:20:08] (step=0067178) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.143807474075523, LR: 0.0003 +[2026-03-05 12:20:16] (step=0067179) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 13.144003130502837, LR: 0.0003 +[2026-03-05 12:20:24] (step=0067180) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 13.14419878693015, LR: 0.0003 +[2026-03-05 12:20:32] (step=0067181) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.144394443357465, LR: 0.0003 +[2026-03-05 12:20:40] (step=0067182) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.144590099784779, LR: 0.0003 +[2026-03-05 12:20:47] (step=0067183) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.14478575621209, LR: 0.0003 +[2026-03-05 12:20:55] (step=0067184) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 13.144981412639405, LR: 0.0003 +[2026-03-05 12:21:03] (step=0067185) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.145177069066719, LR: 0.0003 +[2026-03-05 12:21:11] (step=0067186) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.145372725494033, LR: 0.0003 +[2026-03-05 12:21:19] (step=0067187) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.145568381921347, LR: 0.0003 +[2026-03-05 12:21:27] (step=0067188) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 13.14576403834866, LR: 0.0003 +[2026-03-05 12:21:35] (step=0067189) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.145959694775973, LR: 0.0003 +[2026-03-05 12:21:42] (step=0067190) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 13.146155351203287, LR: 0.0003 +[2026-03-05 12:21:50] (step=0067191) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.146351007630601, LR: 0.0003 +[2026-03-05 12:21:58] (step=0067192) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 13.146546664057915, LR: 0.0003 +[2026-03-05 12:22:06] (step=0067193) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.146742320485227, LR: 0.0003 +[2026-03-05 12:22:14] (step=0067194) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.146937976912541, LR: 0.0003 +[2026-03-05 12:22:22] (step=0067195) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.147133633339855, LR: 0.0003 +[2026-03-05 12:22:29] (step=0067196) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.14732928976717, LR: 0.0003 +[2026-03-05 12:22:37] (step=0067197) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.147524946194482, LR: 0.0003 +[2026-03-05 12:22:45] (step=0067198) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.147720602621796, LR: 0.0003 +[2026-03-05 12:22:53] (step=0067199) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.14791625904911, LR: 0.0003 +[2026-03-05 12:23:01] (step=0067200) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.148111915476424, LR: 0.0003 +[2026-03-05 12:23:09] (step=0067201) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.148307571903738, LR: 0.0003 +[2026-03-05 12:23:17] (step=0067202) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.14850322833105, LR: 0.0003 +[2026-03-05 12:23:24] (step=0067203) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.148698884758364, LR: 0.0003 +[2026-03-05 12:23:32] (step=0067204) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.148894541185678, LR: 0.0003 +[2026-03-05 12:23:40] (step=0067205) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 13.149090197612992, LR: 0.0003 +[2026-03-05 12:23:48] (step=0067206) Train Loss: 0.4607, Train Steps/Sec: 0.13, Epoch: 13.149285854040306, LR: 0.0003 +[2026-03-05 12:23:56] (step=0067207) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.149481510467618, LR: 0.0003 +[2026-03-05 12:24:04] (step=0067208) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.149677166894932, LR: 0.0003 +[2026-03-05 12:24:11] (step=0067209) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 13.149872823322246, LR: 0.0003 +[2026-03-05 12:24:19] (step=0067210) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.15006847974956, LR: 0.0003 +[2026-03-05 12:24:27] (step=0067211) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.150264136176874, LR: 0.0003 +[2026-03-05 12:24:35] (step=0067212) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.150459792604186, LR: 0.0003 +[2026-03-05 12:24:43] (step=0067213) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.1506554490315, LR: 0.0003 +[2026-03-05 12:24:51] (step=0067214) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.150851105458814, LR: 0.0003 +[2026-03-05 12:24:59] (step=0067215) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.151046761886128, LR: 0.0003 +[2026-03-05 12:25:06] (step=0067216) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.151242418313442, LR: 0.0003 +[2026-03-05 12:25:14] (step=0067217) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.151438074740755, LR: 0.0003 +[2026-03-05 12:25:22] (step=0067218) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.151633731168069, LR: 0.0003 +[2026-03-05 12:25:30] (step=0067219) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 13.151829387595383, LR: 0.0003 +[2026-03-05 12:25:38] (step=0067220) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.152025044022697, LR: 0.0003 +[2026-03-05 12:25:46] (step=0067221) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.152220700450009, LR: 0.0003 +[2026-03-05 12:25:53] (step=0067222) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.152416356877323, LR: 0.0003 +[2026-03-05 12:26:01] (step=0067223) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.152612013304637, LR: 0.0003 +[2026-03-05 12:26:09] (step=0067224) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 13.152807669731951, LR: 0.0003 +[2026-03-05 12:26:17] (step=0067225) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 13.153003326159265, LR: 0.0003 +[2026-03-05 12:26:25] (step=0067226) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 13.153198982586577, LR: 0.0003 +[2026-03-05 12:26:33] (step=0067227) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.153394639013891, LR: 0.0003 +[2026-03-05 12:26:40] (step=0067228) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 13.153590295441205, LR: 0.0003 +[2026-03-05 12:26:48] (step=0067229) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 13.15378595186852, LR: 0.0003 +[2026-03-05 12:26:56] (step=0067230) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.153981608295833, LR: 0.0003 +[2026-03-05 12:27:04] (step=0067231) Train Loss: 0.4243, Train Steps/Sec: 0.13, Epoch: 13.154177264723145, LR: 0.0003 +[2026-03-05 12:27:12] (step=0067232) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 13.15437292115046, LR: 0.0003 +[2026-03-05 12:27:20] (step=0067233) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.154568577577773, LR: 0.0003 +[2026-03-05 12:27:28] (step=0067234) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.154764234005087, LR: 0.0003 +[2026-03-05 12:27:36] (step=0067235) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.154959890432401, LR: 0.0003 +[2026-03-05 12:27:43] (step=0067236) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 13.155155546859714, LR: 0.0003 +[2026-03-05 12:27:51] (step=0067237) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.155351203287028, LR: 0.0003 +[2026-03-05 12:27:59] (step=0067238) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.155546859714342, LR: 0.0003 +[2026-03-05 12:28:07] (step=0067239) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 13.155742516141656, LR: 0.0003 +[2026-03-05 12:28:15] (step=0067240) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.15593817256897, LR: 0.0003 +[2026-03-05 12:28:23] (step=0067241) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.156133828996282, LR: 0.0003 +[2026-03-05 12:28:30] (step=0067242) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.156329485423596, LR: 0.0003 +[2026-03-05 12:28:38] (step=0067243) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.15652514185091, LR: 0.0003 +[2026-03-05 12:28:46] (step=0067244) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.156720798278224, LR: 0.0003 +[2026-03-05 12:28:54] (step=0067245) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.156916454705538, LR: 0.0003 +[2026-03-05 12:29:02] (step=0067246) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 13.15711211113285, LR: 0.0003 +[2026-03-05 12:29:10] (step=0067247) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.157307767560164, LR: 0.0003 +[2026-03-05 12:29:17] (step=0067248) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.157503423987478, LR: 0.0003 +[2026-03-05 12:29:25] (step=0067249) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 13.157699080414792, LR: 0.0003 +[2026-03-05 12:29:33] (step=0067250) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.157894736842104, LR: 0.0003 +[2026-03-05 12:29:41] (step=0067251) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.158090393269418, LR: 0.0003 +[2026-03-05 12:29:49] (step=0067252) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 13.158286049696732, LR: 0.0003 +[2026-03-05 12:29:57] (step=0067253) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.158481706124046, LR: 0.0003 +[2026-03-05 12:30:05] (step=0067254) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.15867736255136, LR: 0.0003 +[2026-03-05 12:30:12] (step=0067255) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.158873018978673, LR: 0.0003 +[2026-03-05 12:30:20] (step=0067256) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.159068675405987, LR: 0.0003 +[2026-03-05 12:30:28] (step=0067257) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.1592643318333, LR: 0.0003 +[2026-03-05 12:30:36] (step=0067258) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.159459988260615, LR: 0.0003 +[2026-03-05 12:30:44] (step=0067259) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.159655644687929, LR: 0.0003 +[2026-03-05 12:30:52] (step=0067260) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.159851301115241, LR: 0.0003 +[2026-03-05 12:31:00] (step=0067261) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.160046957542555, LR: 0.0003 +[2026-03-05 12:31:07] (step=0067262) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.160242613969869, LR: 0.0003 +[2026-03-05 12:31:15] (step=0067263) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.160438270397183, LR: 0.0003 +[2026-03-05 12:31:23] (step=0067264) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 13.160633926824497, LR: 0.0003 +[2026-03-05 12:31:31] (step=0067265) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.16082958325181, LR: 0.0003 +[2026-03-05 12:31:39] (step=0067266) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.161025239679123, LR: 0.0003 +[2026-03-05 12:31:47] (step=0067267) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.161220896106437, LR: 0.0003 +[2026-03-05 12:31:55] (step=0067268) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.161416552533751, LR: 0.0003 +[2026-03-05 12:32:02] (step=0067269) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.161612208961065, LR: 0.0003 +[2026-03-05 12:32:10] (step=0067270) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.161807865388377, LR: 0.0003 +[2026-03-05 12:32:18] (step=0067271) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.162003521815691, LR: 0.0003 +[2026-03-05 12:32:26] (step=0067272) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.162199178243005, LR: 0.0003 +[2026-03-05 12:32:34] (step=0067273) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.16239483467032, LR: 0.0003 +[2026-03-05 12:32:42] (step=0067274) Train Loss: 0.4257, Train Steps/Sec: 0.13, Epoch: 13.162590491097632, LR: 0.0003 +[2026-03-05 12:32:49] (step=0067275) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.162786147524946, LR: 0.0003 +[2026-03-05 12:32:57] (step=0067276) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.16298180395226, LR: 0.0003 +[2026-03-05 12:33:05] (step=0067277) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.163177460379574, LR: 0.0003 +[2026-03-05 12:33:13] (step=0067278) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.163373116806888, LR: 0.0003 +[2026-03-05 12:33:21] (step=0067279) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 13.1635687732342, LR: 0.0003 +[2026-03-05 12:33:29] (step=0067280) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.163764429661514, LR: 0.0003 +[2026-03-05 12:33:37] (step=0067281) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.163960086088828, LR: 0.0003 +[2026-03-05 12:33:45] (step=0067282) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.164155742516142, LR: 0.0003 +[2026-03-05 12:33:52] (step=0067283) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.164351398943456, LR: 0.0003 +[2026-03-05 12:34:00] (step=0067284) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.164547055370768, LR: 0.0003 +[2026-03-05 12:34:08] (step=0067285) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.164742711798082, LR: 0.0003 +[2026-03-05 12:34:16] (step=0067286) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.164938368225396, LR: 0.0003 +[2026-03-05 12:34:24] (step=0067287) Train Loss: 0.4272, Train Steps/Sec: 0.13, Epoch: 13.16513402465271, LR: 0.0003 +[2026-03-05 12:34:32] (step=0067288) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.165329681080024, LR: 0.0003 +[2026-03-05 12:34:39] (step=0067289) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.165525337507336, LR: 0.0003 +[2026-03-05 12:34:47] (step=0067290) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.16572099393465, LR: 0.0003 +[2026-03-05 12:34:55] (step=0067291) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.165916650361964, LR: 0.0003 +[2026-03-05 12:35:03] (step=0067292) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.166112306789278, LR: 0.0003 +[2026-03-05 12:35:11] (step=0067293) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.166307963216592, LR: 0.0003 +[2026-03-05 12:35:19] (step=0067294) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.166503619643905, LR: 0.0003 +[2026-03-05 12:35:26] (step=0067295) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.166699276071219, LR: 0.0003 +[2026-03-05 12:35:34] (step=0067296) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.166894932498533, LR: 0.0003 +[2026-03-05 12:35:42] (step=0067297) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.167090588925847, LR: 0.0003 +[2026-03-05 12:35:50] (step=0067298) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.16728624535316, LR: 0.0003 +[2026-03-05 12:35:58] (step=0067299) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.167481901780473, LR: 0.0003 +[2026-03-05 12:36:06] (step=0067300) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.167677558207787, LR: 0.0003 +[2026-03-05 12:36:13] (step=0067301) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 13.167873214635101, LR: 0.0003 +[2026-03-05 12:36:21] (step=0067302) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 13.168068871062415, LR: 0.0003 +[2026-03-05 12:36:29] (step=0067303) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.168264527489727, LR: 0.0003 +[2026-03-05 12:36:37] (step=0067304) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 13.168460183917041, LR: 0.0003 +[2026-03-05 12:36:45] (step=0067305) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.168655840344355, LR: 0.0003 +[2026-03-05 12:36:53] (step=0067306) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.16885149677167, LR: 0.0003 +[2026-03-05 12:37:01] (step=0067307) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.169047153198983, LR: 0.0003 +[2026-03-05 12:37:08] (step=0067308) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.169242809626295, LR: 0.0003 +[2026-03-05 12:37:16] (step=0067309) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.16943846605361, LR: 0.0003 +[2026-03-05 12:37:24] (step=0067310) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.169634122480923, LR: 0.0003 +[2026-03-05 12:37:32] (step=0067311) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 13.169829778908237, LR: 0.0003 +[2026-03-05 12:37:40] (step=0067312) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.170025435335551, LR: 0.0003 +[2026-03-05 12:37:48] (step=0067313) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.170221091762864, LR: 0.0003 +[2026-03-05 12:37:55] (step=0067314) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 13.170416748190178, LR: 0.0003 +[2026-03-05 12:38:03] (step=0067315) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.170612404617492, LR: 0.0003 +[2026-03-05 12:38:11] (step=0067316) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.170808061044806, LR: 0.0003 +[2026-03-05 12:38:19] (step=0067317) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.17100371747212, LR: 0.0003 +[2026-03-05 12:38:27] (step=0067318) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.171199373899432, LR: 0.0003 +[2026-03-05 12:38:35] (step=0067319) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.171395030326746, LR: 0.0003 +[2026-03-05 12:38:43] (step=0067320) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.17159068675406, LR: 0.0003 +[2026-03-05 12:38:50] (step=0067321) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 13.171786343181374, LR: 0.0003 +[2026-03-05 12:38:58] (step=0067322) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 13.171981999608688, LR: 0.0003 +[2026-03-05 12:39:06] (step=0067323) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 13.172177656036, LR: 0.0003 +[2026-03-05 12:39:14] (step=0067324) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.172373312463314, LR: 0.0003 +[2026-03-05 12:39:22] (step=0067325) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.172568968890628, LR: 0.0003 +[2026-03-05 12:39:30] (step=0067326) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 13.172764625317942, LR: 0.0003 +[2026-03-05 12:39:37] (step=0067327) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.172960281745254, LR: 0.0003 +[2026-03-05 12:39:45] (step=0067328) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 13.173155938172568, LR: 0.0003 +[2026-03-05 12:39:53] (step=0067329) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.173351594599882, LR: 0.0003 +[2026-03-05 12:40:01] (step=0067330) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 13.173547251027196, LR: 0.0003 +[2026-03-05 12:40:09] (step=0067331) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.17374290745451, LR: 0.0003 +[2026-03-05 12:40:17] (step=0067332) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.173938563881823, LR: 0.0003 +[2026-03-05 12:40:25] (step=0067333) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.174134220309137, LR: 0.0003 +[2026-03-05 12:40:32] (step=0067334) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.17432987673645, LR: 0.0003 +[2026-03-05 12:40:40] (step=0067335) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.174525533163765, LR: 0.0003 +[2026-03-05 12:40:48] (step=0067336) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.174721189591079, LR: 0.0003 +[2026-03-05 12:40:56] (step=0067337) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.174916846018391, LR: 0.0003 +[2026-03-05 12:41:04] (step=0067338) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.175112502445705, LR: 0.0003 +[2026-03-05 12:41:12] (step=0067339) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 13.175308158873019, LR: 0.0003 +[2026-03-05 12:41:20] (step=0067340) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.175503815300333, LR: 0.0003 +[2026-03-05 12:41:28] (step=0067341) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 13.175699471727647, LR: 0.0003 +[2026-03-05 12:41:35] (step=0067342) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.17589512815496, LR: 0.0003 +[2026-03-05 12:41:43] (step=0067343) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.176090784582273, LR: 0.0003 +[2026-03-05 12:41:51] (step=0067344) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.176286441009587, LR: 0.0003 +[2026-03-05 12:41:59] (step=0067345) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 13.176482097436901, LR: 0.0003 +[2026-03-05 12:42:07] (step=0067346) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.176677753864215, LR: 0.0003 +[2026-03-05 12:42:15] (step=0067347) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.176873410291527, LR: 0.0003 +[2026-03-05 12:42:22] (step=0067348) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.177069066718841, LR: 0.0003 +[2026-03-05 12:42:30] (step=0067349) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.177264723146155, LR: 0.0003 +[2026-03-05 12:42:38] (step=0067350) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 13.17746037957347, LR: 0.0003 +[2026-03-05 12:42:46] (step=0067351) Train Loss: 0.4601, Train Steps/Sec: 0.13, Epoch: 13.177656036000784, LR: 0.0003 +[2026-03-05 12:42:54] (step=0067352) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.177851692428096, LR: 0.0003 +[2026-03-05 12:43:02] (step=0067353) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.17804734885541, LR: 0.0003 +[2026-03-05 12:43:10] (step=0067354) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.178243005282724, LR: 0.0003 +[2026-03-05 12:43:17] (step=0067355) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.178438661710038, LR: 0.0003 +[2026-03-05 12:43:25] (step=0067356) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.17863431813735, LR: 0.0003 +[2026-03-05 12:43:33] (step=0067357) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 13.178829974564664, LR: 0.0003 +[2026-03-05 12:43:41] (step=0067358) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 13.179025630991978, LR: 0.0003 +[2026-03-05 12:43:49] (step=0067359) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.179221287419292, LR: 0.0003 +[2026-03-05 12:43:57] (step=0067360) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 13.179416943846606, LR: 0.0003 +[2026-03-05 12:44:05] (step=0067361) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.179612600273918, LR: 0.0003 +[2026-03-05 12:44:12] (step=0067362) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.179808256701232, LR: 0.0003 +[2026-03-05 12:44:20] (step=0067363) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 13.180003913128546, LR: 0.0003 +[2026-03-05 12:44:28] (step=0067364) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.18019956955586, LR: 0.0003 +[2026-03-05 12:44:36] (step=0067365) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.180395225983174, LR: 0.0003 +[2026-03-05 12:44:44] (step=0067366) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.180590882410486, LR: 0.0003 +[2026-03-05 12:44:52] (step=0067367) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.1807865388378, LR: 0.0003 +[2026-03-05 12:44:59] (step=0067368) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 13.180982195265114, LR: 0.0003 +[2026-03-05 12:45:07] (step=0067369) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.181177851692429, LR: 0.0003 +[2026-03-05 12:45:15] (step=0067370) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.181373508119743, LR: 0.0003 +[2026-03-05 12:45:23] (step=0067371) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.181569164547055, LR: 0.0003 +[2026-03-05 12:45:31] (step=0067372) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.181764820974369, LR: 0.0003 +[2026-03-05 12:45:39] (step=0067373) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.181960477401683, LR: 0.0003 +[2026-03-05 12:45:46] (step=0067374) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.182156133828997, LR: 0.0003 +[2026-03-05 12:45:54] (step=0067375) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.18235179025631, LR: 0.0003 +[2026-03-05 12:46:02] (step=0067376) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.182547446683623, LR: 0.0003 +[2026-03-05 12:46:10] (step=0067377) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.182743103110937, LR: 0.0003 +[2026-03-05 12:46:18] (step=0067378) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.182938759538251, LR: 0.0003 +[2026-03-05 12:46:26] (step=0067379) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.183134415965565, LR: 0.0003 +[2026-03-05 12:46:34] (step=0067380) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.183330072392877, LR: 0.0003 +[2026-03-05 12:46:41] (step=0067381) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.183525728820191, LR: 0.0003 +[2026-03-05 12:46:49] (step=0067382) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.183721385247505, LR: 0.0003 +[2026-03-05 12:46:57] (step=0067383) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.18391704167482, LR: 0.0003 +[2026-03-05 12:47:05] (step=0067384) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 13.184112698102133, LR: 0.0003 +[2026-03-05 12:47:13] (step=0067385) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.184308354529445, LR: 0.0003 +[2026-03-05 12:47:21] (step=0067386) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.18450401095676, LR: 0.0003 +[2026-03-05 12:47:29] (step=0067387) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 13.184699667384074, LR: 0.0003 +[2026-03-05 12:47:36] (step=0067388) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 13.184895323811388, LR: 0.0003 +[2026-03-05 12:47:44] (step=0067389) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 13.185090980238702, LR: 0.0003 +[2026-03-05 12:47:52] (step=0067390) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.185286636666014, LR: 0.0003 +[2026-03-05 12:48:00] (step=0067391) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.185482293093328, LR: 0.0003 +[2026-03-05 12:48:08] (step=0067392) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.185677949520642, LR: 0.0003 +[2026-03-05 12:48:16] (step=0067393) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.185873605947956, LR: 0.0003 +[2026-03-05 12:48:23] (step=0067394) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.18606926237527, LR: 0.0003 +[2026-03-05 12:48:31] (step=0067395) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.186264918802582, LR: 0.0003 +[2026-03-05 12:48:39] (step=0067396) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.186460575229896, LR: 0.0003 +[2026-03-05 12:48:47] (step=0067397) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.18665623165721, LR: 0.0003 +[2026-03-05 12:48:55] (step=0067398) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.186851888084524, LR: 0.0003 +[2026-03-05 12:49:03] (step=0067399) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.187047544511838, LR: 0.0003 +[2026-03-05 12:49:10] (step=0067400) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.18724320093915, LR: 0.0003 +[2026-03-05 12:49:18] (step=0067401) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.187438857366464, LR: 0.0003 +[2026-03-05 12:49:26] (step=0067402) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.187634513793778, LR: 0.0003 +[2026-03-05 12:49:34] (step=0067403) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.187830170221092, LR: 0.0003 +[2026-03-05 12:49:42] (step=0067404) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.188025826648406, LR: 0.0003 +[2026-03-05 12:49:50] (step=0067405) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.188221483075719, LR: 0.0003 +[2026-03-05 12:49:58] (step=0067406) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.188417139503033, LR: 0.0003 +[2026-03-05 12:50:05] (step=0067407) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.188612795930347, LR: 0.0003 +[2026-03-05 12:50:13] (step=0067408) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.18880845235766, LR: 0.0003 +[2026-03-05 12:50:21] (step=0067409) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.189004108784973, LR: 0.0003 +[2026-03-05 12:50:29] (step=0067410) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.189199765212287, LR: 0.0003 +[2026-03-05 12:50:37] (step=0067411) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 13.1893954216396, LR: 0.0003 +[2026-03-05 12:50:45] (step=0067412) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.189591078066915, LR: 0.0003 +[2026-03-05 12:50:53] (step=0067413) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.189786734494229, LR: 0.0003 +[2026-03-05 12:51:00] (step=0067414) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.189982390921541, LR: 0.0003 +[2026-03-05 12:51:08] (step=0067415) Train Loss: 0.4199, Train Steps/Sec: 0.13, Epoch: 13.190178047348855, LR: 0.0003 +[2026-03-05 12:51:16] (step=0067416) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.190373703776169, LR: 0.0003 +[2026-03-05 12:51:24] (step=0067417) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.190569360203483, LR: 0.0003 +[2026-03-05 12:51:32] (step=0067418) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 13.190765016630797, LR: 0.0003 +[2026-03-05 12:51:40] (step=0067419) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.19096067305811, LR: 0.0003 +[2026-03-05 12:51:47] (step=0067420) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 13.191156329485423, LR: 0.0003 +[2026-03-05 12:51:55] (step=0067421) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 13.191351985912737, LR: 0.0003 +[2026-03-05 12:52:03] (step=0067422) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.191547642340051, LR: 0.0003 +[2026-03-05 12:52:11] (step=0067423) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 13.191743298767365, LR: 0.0003 +[2026-03-05 12:52:19] (step=0067424) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 13.191938955194678, LR: 0.0003 +[2026-03-05 12:52:27] (step=0067425) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.192134611621992, LR: 0.0003 +[2026-03-05 12:52:34] (step=0067426) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.192330268049306, LR: 0.0003 +[2026-03-05 12:52:42] (step=0067427) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.19252592447662, LR: 0.0003 +[2026-03-05 12:52:50] (step=0067428) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.192721580903934, LR: 0.0003 +[2026-03-05 12:52:58] (step=0067429) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.192917237331246, LR: 0.0003 +[2026-03-05 12:53:06] (step=0067430) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 13.19311289375856, LR: 0.0003 +[2026-03-05 12:53:14] (step=0067431) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.193308550185874, LR: 0.0003 +[2026-03-05 12:53:22] (step=0067432) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.193504206613188, LR: 0.0003 +[2026-03-05 12:53:29] (step=0067433) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.1936998630405, LR: 0.0003 +[2026-03-05 12:53:37] (step=0067434) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.193895519467814, LR: 0.0003 +[2026-03-05 12:53:45] (step=0067435) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.194091175895128, LR: 0.0003 +[2026-03-05 12:53:53] (step=0067436) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.194286832322442, LR: 0.0003 +[2026-03-05 12:54:01] (step=0067437) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.194482488749756, LR: 0.0003 +[2026-03-05 12:54:09] (step=0067438) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.194678145177068, LR: 0.0003 +[2026-03-05 12:54:16] (step=0067439) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.194873801604382, LR: 0.0003 +[2026-03-05 12:54:24] (step=0067440) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 13.195069458031696, LR: 0.0003 +[2026-03-05 12:54:32] (step=0067441) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.19526511445901, LR: 0.0003 +[2026-03-05 12:54:40] (step=0067442) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.195460770886324, LR: 0.0003 +[2026-03-05 12:54:48] (step=0067443) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 13.195656427313637, LR: 0.0003 +[2026-03-05 12:54:56] (step=0067444) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.19585208374095, LR: 0.0003 +[2026-03-05 12:55:03] (step=0067445) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.196047740168265, LR: 0.0003 +[2026-03-05 12:55:11] (step=0067446) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.196243396595579, LR: 0.0003 +[2026-03-05 12:55:19] (step=0067447) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.196439053022893, LR: 0.0003 +[2026-03-05 12:55:27] (step=0067448) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.196634709450205, LR: 0.0003 +[2026-03-05 12:55:35] (step=0067449) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.196830365877519, LR: 0.0003 +[2026-03-05 12:55:43] (step=0067450) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.197026022304833, LR: 0.0003 +[2026-03-05 12:55:51] (step=0067451) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 13.197221678732147, LR: 0.0003 +[2026-03-05 12:55:59] (step=0067452) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.19741733515946, LR: 0.0003 +[2026-03-05 12:56:06] (step=0067453) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.197612991586773, LR: 0.0003 +[2026-03-05 12:56:14] (step=0067454) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.197808648014087, LR: 0.0003 +[2026-03-05 12:56:22] (step=0067455) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.198004304441401, LR: 0.0003 +[2026-03-05 12:56:30] (step=0067456) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.198199960868715, LR: 0.0003 +[2026-03-05 12:56:38] (step=0067457) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.198395617296029, LR: 0.0003 +[2026-03-05 12:56:46] (step=0067458) Train Loss: 0.4243, Train Steps/Sec: 0.13, Epoch: 13.198591273723341, LR: 0.0003 +[2026-03-05 12:56:53] (step=0067459) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.198786930150655, LR: 0.0003 +[2026-03-05 12:57:01] (step=0067460) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 13.19898258657797, LR: 0.0003 +[2026-03-05 12:57:09] (step=0067461) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.199178243005283, LR: 0.0003 +[2026-03-05 12:57:17] (step=0067462) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.199373899432596, LR: 0.0003 +[2026-03-05 12:57:25] (step=0067463) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.19956955585991, LR: 0.0003 +[2026-03-05 12:57:33] (step=0067464) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 13.199765212287224, LR: 0.0003 +[2026-03-05 12:57:40] (step=0067465) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.199960868714538, LR: 0.0003 +[2026-03-05 12:57:48] (step=0067466) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.200156525141852, LR: 0.0003 +[2026-03-05 12:57:56] (step=0067467) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.200352181569164, LR: 0.0003 +[2026-03-05 12:58:04] (step=0067468) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.200547837996478, LR: 0.0003 +[2026-03-05 12:58:12] (step=0067469) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.200743494423792, LR: 0.0003 +[2026-03-05 12:58:20] (step=0067470) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.200939150851106, LR: 0.0003 +[2026-03-05 12:58:27] (step=0067471) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.20113480727842, LR: 0.0003 +[2026-03-05 12:58:35] (step=0067472) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.201330463705732, LR: 0.0003 +[2026-03-05 12:58:43] (step=0067473) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.201526120133046, LR: 0.0003 +[2026-03-05 12:58:51] (step=0067474) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.20172177656036, LR: 0.0003 +[2026-03-05 12:58:59] (step=0067475) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.201917432987674, LR: 0.0003 +[2026-03-05 12:59:07] (step=0067476) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 13.202113089414988, LR: 0.0003 +[2026-03-05 12:59:14] (step=0067477) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.2023087458423, LR: 0.0003 +[2026-03-05 12:59:22] (step=0067478) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 13.202504402269614, LR: 0.0003 +[2026-03-05 12:59:30] (step=0067479) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.202700058696928, LR: 0.0003 +[2026-03-05 12:59:38] (step=0067480) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.202895715124242, LR: 0.0003 +[2026-03-05 12:59:46] (step=0067481) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.203091371551556, LR: 0.0003 +[2026-03-05 12:59:54] (step=0067482) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.203287027978869, LR: 0.0003 +[2026-03-05 13:00:02] (step=0067483) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 13.203482684406183, LR: 0.0003 +[2026-03-05 13:00:09] (step=0067484) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.203678340833497, LR: 0.0003 +[2026-03-05 13:00:17] (step=0067485) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.20387399726081, LR: 0.0003 +[2026-03-05 13:00:25] (step=0067486) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.204069653688123, LR: 0.0003 +[2026-03-05 13:00:33] (step=0067487) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.204265310115437, LR: 0.0003 +[2026-03-05 13:00:41] (step=0067488) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.20446096654275, LR: 0.0003 +[2026-03-05 13:00:49] (step=0067489) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 13.204656622970065, LR: 0.0003 +[2026-03-05 13:00:56] (step=0067490) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.204852279397379, LR: 0.0003 +[2026-03-05 13:01:04] (step=0067491) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.205047935824691, LR: 0.0003 +[2026-03-05 13:01:12] (step=0067492) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.205243592252005, LR: 0.0003 +[2026-03-05 13:01:20] (step=0067493) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.205439248679319, LR: 0.0003 +[2026-03-05 13:01:28] (step=0067494) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.205634905106633, LR: 0.0003 +[2026-03-05 13:01:36] (step=0067495) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 13.205830561533947, LR: 0.0003 +[2026-03-05 13:01:43] (step=0067496) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.20602621796126, LR: 0.0003 +[2026-03-05 13:01:51] (step=0067497) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.206221874388573, LR: 0.0003 +[2026-03-05 13:01:59] (step=0067498) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.206417530815887, LR: 0.0003 +[2026-03-05 13:02:07] (step=0067499) Train Loss: 0.4239, Train Steps/Sec: 0.13, Epoch: 13.206613187243201, LR: 0.0003 +[2026-03-05 13:02:15] (step=0067500) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.206808843670515, LR: 0.0003 +[2026-03-05 13:02:15] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0067500/ +[2026-03-05 13:02:23] (step=0067501) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.207004500097828, LR: 0.0003 +[2026-03-05 13:02:31] (step=0067502) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.207200156525142, LR: 0.0003 +[2026-03-05 13:02:38] (step=0067503) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.207395812952456, LR: 0.0003 +[2026-03-05 13:02:46] (step=0067504) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.20759146937977, LR: 0.0003 +[2026-03-05 13:02:54] (step=0067505) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.207787125807084, LR: 0.0003 +[2026-03-05 13:03:02] (step=0067506) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.207982782234396, LR: 0.0003 +[2026-03-05 13:03:10] (step=0067507) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.20817843866171, LR: 0.0003 +[2026-03-05 13:03:18] (step=0067508) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.208374095089024, LR: 0.0003 +[2026-03-05 13:03:26] (step=0067509) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.208569751516338, LR: 0.0003 +[2026-03-05 13:03:33] (step=0067510) Train Loss: 0.4269, Train Steps/Sec: 0.13, Epoch: 13.20876540794365, LR: 0.0003 +[2026-03-05 13:03:41] (step=0067511) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 13.208961064370964, LR: 0.0003 +[2026-03-05 13:03:49] (step=0067512) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.209156720798278, LR: 0.0003 +[2026-03-05 13:03:57] (step=0067513) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.209352377225592, LR: 0.0003 +[2026-03-05 13:04:05] (step=0067514) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 13.209548033652906, LR: 0.0003 +[2026-03-05 13:04:13] (step=0067515) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.209743690080218, LR: 0.0003 +[2026-03-05 13:04:20] (step=0067516) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.209939346507532, LR: 0.0003 +[2026-03-05 13:04:28] (step=0067517) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.210135002934846, LR: 0.0003 +[2026-03-05 13:04:36] (step=0067518) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.21033065936216, LR: 0.0003 +[2026-03-05 13:04:44] (step=0067519) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.210526315789474, LR: 0.0003 +[2026-03-05 13:04:52] (step=0067520) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.210721972216787, LR: 0.0003 +[2026-03-05 13:05:00] (step=0067521) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.2109176286441, LR: 0.0003 +[2026-03-05 13:05:08] (step=0067522) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.211113285071415, LR: 0.0003 +[2026-03-05 13:05:15] (step=0067523) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.211308941498729, LR: 0.0003 +[2026-03-05 13:05:23] (step=0067524) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 13.211504597926043, LR: 0.0003 +[2026-03-05 13:05:31] (step=0067525) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.211700254353355, LR: 0.0003 +[2026-03-05 13:05:39] (step=0067526) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.211895910780669, LR: 0.0003 +[2026-03-05 13:05:47] (step=0067527) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 13.212091567207983, LR: 0.0003 +[2026-03-05 13:05:55] (step=0067528) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.212287223635297, LR: 0.0003 +[2026-03-05 13:06:02] (step=0067529) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.21248288006261, LR: 0.0003 +[2026-03-05 13:06:10] (step=0067530) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.212678536489923, LR: 0.0003 +[2026-03-05 13:06:18] (step=0067531) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.212874192917237, LR: 0.0003 +[2026-03-05 13:06:26] (step=0067532) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.213069849344551, LR: 0.0003 +[2026-03-05 13:06:34] (step=0067533) Train Loss: 0.4618, Train Steps/Sec: 0.13, Epoch: 13.213265505771865, LR: 0.0003 +[2026-03-05 13:06:42] (step=0067534) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 13.213461162199179, LR: 0.0003 +[2026-03-05 13:06:50] (step=0067535) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.213656818626491, LR: 0.0003 +[2026-03-05 13:06:57] (step=0067536) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.213852475053805, LR: 0.0003 +[2026-03-05 13:07:05] (step=0067537) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.21404813148112, LR: 0.0003 +[2026-03-05 13:07:13] (step=0067538) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 13.214243787908433, LR: 0.0003 +[2026-03-05 13:07:21] (step=0067539) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 13.214439444335746, LR: 0.0003 +[2026-03-05 13:07:29] (step=0067540) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.21463510076306, LR: 0.0003 +[2026-03-05 13:07:37] (step=0067541) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.214830757190374, LR: 0.0003 +[2026-03-05 13:07:45] (step=0067542) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.215026413617688, LR: 0.0003 +[2026-03-05 13:07:52] (step=0067543) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.215222070045002, LR: 0.0003 +[2026-03-05 13:08:00] (step=0067544) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.215417726472314, LR: 0.0003 +[2026-03-05 13:08:08] (step=0067545) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 13.215613382899628, LR: 0.0003 +[2026-03-05 13:08:16] (step=0067546) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.215809039326942, LR: 0.0003 +[2026-03-05 13:08:24] (step=0067547) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 13.216004695754256, LR: 0.0003 +[2026-03-05 13:08:32] (step=0067548) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.21620035218157, LR: 0.0003 +[2026-03-05 13:08:39] (step=0067549) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.216396008608882, LR: 0.0003 +[2026-03-05 13:08:47] (step=0067550) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.216591665036196, LR: 0.0003 +[2026-03-05 13:08:55] (step=0067551) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.21678732146351, LR: 0.0003 +[2026-03-05 13:09:03] (step=0067552) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.216982977890824, LR: 0.0003 +[2026-03-05 13:09:11] (step=0067553) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.217178634318138, LR: 0.0003 +[2026-03-05 13:09:19] (step=0067554) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.21737429074545, LR: 0.0003 +[2026-03-05 13:09:26] (step=0067555) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 13.217569947172764, LR: 0.0003 +[2026-03-05 13:09:34] (step=0067556) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.217765603600078, LR: 0.0003 +[2026-03-05 13:09:42] (step=0067557) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 13.217961260027392, LR: 0.0003 +[2026-03-05 13:09:50] (step=0067558) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.218156916454706, LR: 0.0003 +[2026-03-05 13:09:58] (step=0067559) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 13.218352572882019, LR: 0.0003 +[2026-03-05 13:10:06] (step=0067560) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.218548229309333, LR: 0.0003 +[2026-03-05 13:10:14] (step=0067561) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.218743885736647, LR: 0.0003 +[2026-03-05 13:10:21] (step=0067562) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.21893954216396, LR: 0.0003 +[2026-03-05 13:10:29] (step=0067563) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.219135198591273, LR: 0.0003 +[2026-03-05 13:10:37] (step=0067564) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.219330855018587, LR: 0.0003 +[2026-03-05 13:10:45] (step=0067565) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.2195265114459, LR: 0.0003 +[2026-03-05 13:10:53] (step=0067566) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 13.219722167873215, LR: 0.0003 +[2026-03-05 13:11:01] (step=0067567) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.219917824300529, LR: 0.0003 +[2026-03-05 13:11:09] (step=0067568) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 13.220113480727841, LR: 0.0003 +[2026-03-05 13:11:16] (step=0067569) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.220309137155155, LR: 0.0003 +[2026-03-05 13:11:24] (step=0067570) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 13.220504793582469, LR: 0.0003 +[2026-03-05 13:11:32] (step=0067571) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 13.220700450009783, LR: 0.0003 +[2026-03-05 13:11:40] (step=0067572) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.220896106437097, LR: 0.0003 +[2026-03-05 13:11:48] (step=0067573) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.22109176286441, LR: 0.0003 +[2026-03-05 13:11:56] (step=0067574) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.221287419291723, LR: 0.0003 +[2026-03-05 13:12:03] (step=0067575) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 13.221483075719037, LR: 0.0003 +[2026-03-05 13:12:11] (step=0067576) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.221678732146351, LR: 0.0003 +[2026-03-05 13:12:19] (step=0067577) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 13.221874388573665, LR: 0.0003 +[2026-03-05 13:12:27] (step=0067578) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.222070045000978, LR: 0.0003 +[2026-03-05 13:12:35] (step=0067579) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.222265701428292, LR: 0.0003 +[2026-03-05 13:12:43] (step=0067580) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.222461357855606, LR: 0.0003 +[2026-03-05 13:12:51] (step=0067581) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.22265701428292, LR: 0.0003 +[2026-03-05 13:12:58] (step=0067582) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.222852670710234, LR: 0.0003 +[2026-03-05 13:13:06] (step=0067583) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 13.223048327137546, LR: 0.0003 +[2026-03-05 13:13:14] (step=0067584) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.22324398356486, LR: 0.0003 +[2026-03-05 13:13:22] (step=0067585) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.223439639992174, LR: 0.0003 +[2026-03-05 13:13:30] (step=0067586) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.223635296419488, LR: 0.0003 +[2026-03-05 13:13:38] (step=0067587) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.223830952846802, LR: 0.0003 +[2026-03-05 13:13:46] (step=0067588) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.224026609274114, LR: 0.0003 +[2026-03-05 13:13:53] (step=0067589) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 13.224222265701428, LR: 0.0003 +[2026-03-05 13:14:01] (step=0067590) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.224417922128742, LR: 0.0003 +[2026-03-05 13:14:09] (step=0067591) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.224613578556056, LR: 0.0003 +[2026-03-05 13:14:17] (step=0067592) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.224809234983368, LR: 0.0003 +[2026-03-05 13:14:25] (step=0067593) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.225004891410682, LR: 0.0003 +[2026-03-05 13:14:33] (step=0067594) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 13.225200547837996, LR: 0.0003 +[2026-03-05 13:14:40] (step=0067595) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.22539620426531, LR: 0.0003 +[2026-03-05 13:14:48] (step=0067596) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.225591860692624, LR: 0.0003 +[2026-03-05 13:14:56] (step=0067597) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.225787517119937, LR: 0.0003 +[2026-03-05 13:15:04] (step=0067598) Train Loss: 0.4272, Train Steps/Sec: 0.13, Epoch: 13.22598317354725, LR: 0.0003 +[2026-03-05 13:15:12] (step=0067599) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.226178829974565, LR: 0.0003 +[2026-03-05 13:15:20] (step=0067600) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.226374486401879, LR: 0.0003 +[2026-03-05 13:15:27] (step=0067601) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.226570142829193, LR: 0.0003 +[2026-03-05 13:15:35] (step=0067602) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.226765799256505, LR: 0.0003 +[2026-03-05 13:15:43] (step=0067603) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.226961455683819, LR: 0.0003 +[2026-03-05 13:15:51] (step=0067604) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 13.227157112111133, LR: 0.0003 +[2026-03-05 13:15:59] (step=0067605) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.227352768538447, LR: 0.0003 +[2026-03-05 13:16:07] (step=0067606) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.22754842496576, LR: 0.0003 +[2026-03-05 13:16:15] (step=0067607) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.227744081393073, LR: 0.0003 +[2026-03-05 13:16:22] (step=0067608) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.227939737820387, LR: 0.0003 +[2026-03-05 13:16:30] (step=0067609) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.228135394247701, LR: 0.0003 +[2026-03-05 13:16:38] (step=0067610) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.228331050675015, LR: 0.0003 +[2026-03-05 13:16:46] (step=0067611) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.22852670710233, LR: 0.0003 +[2026-03-05 13:16:54] (step=0067612) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.228722363529641, LR: 0.0003 +[2026-03-05 13:17:02] (step=0067613) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.228918019956955, LR: 0.0003 +[2026-03-05 13:17:09] (step=0067614) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.22911367638427, LR: 0.0003 +[2026-03-05 13:17:17] (step=0067615) Train Loss: 0.4639, Train Steps/Sec: 0.13, Epoch: 13.229309332811583, LR: 0.0003 +[2026-03-05 13:17:25] (step=0067616) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.229504989238896, LR: 0.0003 +[2026-03-05 13:17:33] (step=0067617) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.22970064566621, LR: 0.0003 +[2026-03-05 13:17:41] (step=0067618) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.229896302093524, LR: 0.0003 +[2026-03-05 13:17:49] (step=0067619) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.230091958520838, LR: 0.0003 +[2026-03-05 13:17:56] (step=0067620) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 13.230287614948152, LR: 0.0003 +[2026-03-05 13:18:04] (step=0067621) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.230483271375464, LR: 0.0003 +[2026-03-05 13:18:12] (step=0067622) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 13.230678927802778, LR: 0.0003 +[2026-03-05 13:18:20] (step=0067623) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.230874584230092, LR: 0.0003 +[2026-03-05 13:18:28] (step=0067624) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.231070240657406, LR: 0.0003 +[2026-03-05 13:18:36] (step=0067625) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 13.23126589708472, LR: 0.0003 +[2026-03-05 13:18:44] (step=0067626) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 13.231461553512032, LR: 0.0003 +[2026-03-05 13:18:51] (step=0067627) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.231657209939346, LR: 0.0003 +[2026-03-05 13:18:59] (step=0067628) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 13.23185286636666, LR: 0.0003 +[2026-03-05 13:19:07] (step=0067629) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 13.232048522793974, LR: 0.0003 +[2026-03-05 13:19:15] (step=0067630) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.232244179221288, LR: 0.0003 +[2026-03-05 13:19:23] (step=0067631) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 13.2324398356486, LR: 0.0003 +[2026-03-05 13:19:31] (step=0067632) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.232635492075914, LR: 0.0003 +[2026-03-05 13:19:39] (step=0067633) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 13.232831148503228, LR: 0.0003 +[2026-03-05 13:19:46] (step=0067634) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.233026804930542, LR: 0.0003 +[2026-03-05 13:19:54] (step=0067635) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 13.233222461357856, LR: 0.0003 +[2026-03-05 13:20:02] (step=0067636) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.233418117785169, LR: 0.0003 +[2026-03-05 13:20:10] (step=0067637) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.233613774212483, LR: 0.0003 +[2026-03-05 13:20:18] (step=0067638) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.233809430639797, LR: 0.0003 +[2026-03-05 13:20:26] (step=0067639) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.23400508706711, LR: 0.0003 +[2026-03-05 13:20:34] (step=0067640) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.234200743494425, LR: 0.0003 +[2026-03-05 13:20:41] (step=0067641) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.234396399921737, LR: 0.0003 +[2026-03-05 13:20:49] (step=0067642) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.234592056349051, LR: 0.0003 +[2026-03-05 13:20:57] (step=0067643) Train Loss: 0.4231, Train Steps/Sec: 0.13, Epoch: 13.234787712776365, LR: 0.0003 +[2026-03-05 13:21:05] (step=0067644) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.234983369203679, LR: 0.0003 +[2026-03-05 13:21:13] (step=0067645) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.235179025630991, LR: 0.0003 +[2026-03-05 13:21:21] (step=0067646) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.235374682058305, LR: 0.0003 +[2026-03-05 13:21:28] (step=0067647) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.23557033848562, LR: 0.0003 +[2026-03-05 13:21:36] (step=0067648) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.235765994912933, LR: 0.0003 +[2026-03-05 13:21:44] (step=0067649) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 13.235961651340247, LR: 0.0003 +[2026-03-05 13:21:52] (step=0067650) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.23615730776756, LR: 0.0003 +[2026-03-05 13:22:00] (step=0067651) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 13.236352964194873, LR: 0.0003 +[2026-03-05 13:22:08] (step=0067652) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.236548620622187, LR: 0.0003 +[2026-03-05 13:22:16] (step=0067653) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 13.236744277049501, LR: 0.0003 +[2026-03-05 13:22:23] (step=0067654) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.236939933476815, LR: 0.0003 +[2026-03-05 13:22:31] (step=0067655) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.237135589904128, LR: 0.0003 +[2026-03-05 13:22:39] (step=0067656) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.237331246331442, LR: 0.0003 +[2026-03-05 13:22:47] (step=0067657) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.237526902758756, LR: 0.0003 +[2026-03-05 13:22:55] (step=0067658) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 13.23772255918607, LR: 0.0003 +[2026-03-05 13:23:03] (step=0067659) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.237918215613384, LR: 0.0003 +[2026-03-05 13:23:10] (step=0067660) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 13.238113872040696, LR: 0.0003 +[2026-03-05 13:23:18] (step=0067661) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.23830952846801, LR: 0.0003 +[2026-03-05 13:23:26] (step=0067662) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 13.238505184895324, LR: 0.0003 +[2026-03-05 13:23:34] (step=0067663) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.238700841322638, LR: 0.0003 +[2026-03-05 13:23:42] (step=0067664) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.238896497749952, LR: 0.0003 +[2026-03-05 13:23:50] (step=0067665) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.239092154177264, LR: 0.0003 +[2026-03-05 13:23:58] (step=0067666) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.239287810604578, LR: 0.0003 +[2026-03-05 13:24:05] (step=0067667) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.239483467031892, LR: 0.0003 +[2026-03-05 13:24:13] (step=0067668) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.239679123459206, LR: 0.0003 +[2026-03-05 13:24:21] (step=0067669) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.239874779886518, LR: 0.0003 +[2026-03-05 13:24:29] (step=0067670) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.240070436313832, LR: 0.0003 +[2026-03-05 13:24:37] (step=0067671) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.240266092741146, LR: 0.0003 +[2026-03-05 13:24:45] (step=0067672) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 13.24046174916846, LR: 0.0003 +[2026-03-05 13:24:52] (step=0067673) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.240657405595774, LR: 0.0003 +[2026-03-05 13:25:00] (step=0067674) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.240853062023087, LR: 0.0003 +[2026-03-05 13:25:08] (step=0067675) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 13.2410487184504, LR: 0.0003 +[2026-03-05 13:25:16] (step=0067676) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.241244374877715, LR: 0.0003 +[2026-03-05 13:25:24] (step=0067677) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.241440031305029, LR: 0.0003 +[2026-03-05 13:25:32] (step=0067678) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.241635687732343, LR: 0.0003 +[2026-03-05 13:25:39] (step=0067679) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 13.241831344159655, LR: 0.0003 +[2026-03-05 13:25:47] (step=0067680) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 13.242027000586969, LR: 0.0003 +[2026-03-05 13:25:55] (step=0067681) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.242222657014283, LR: 0.0003 +[2026-03-05 13:26:03] (step=0067682) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.242418313441597, LR: 0.0003 +[2026-03-05 13:26:11] (step=0067683) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.242613969868911, LR: 0.0003 +[2026-03-05 13:26:19] (step=0067684) Train Loss: 0.4229, Train Steps/Sec: 0.13, Epoch: 13.242809626296223, LR: 0.0003 +[2026-03-05 13:26:27] (step=0067685) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.243005282723537, LR: 0.0003 +[2026-03-05 13:26:35] (step=0067686) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.243200939150851, LR: 0.0003 +[2026-03-05 13:26:42] (step=0067687) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.243396595578165, LR: 0.0003 +[2026-03-05 13:26:50] (step=0067688) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.24359225200548, LR: 0.0003 +[2026-03-05 13:26:58] (step=0067689) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 13.243787908432791, LR: 0.0003 +[2026-03-05 13:27:06] (step=0067690) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.243983564860105, LR: 0.0003 +[2026-03-05 13:27:14] (step=0067691) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 13.24417922128742, LR: 0.0003 +[2026-03-05 13:27:22] (step=0067692) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.244374877714733, LR: 0.0003 +[2026-03-05 13:27:29] (step=0067693) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.244570534142047, LR: 0.0003 +[2026-03-05 13:27:37] (step=0067694) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 13.24476619056936, LR: 0.0003 +[2026-03-05 13:27:45] (step=0067695) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.244961846996674, LR: 0.0003 +[2026-03-05 13:27:53] (step=0067696) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 13.245157503423988, LR: 0.0003 +[2026-03-05 13:28:01] (step=0067697) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.245353159851302, LR: 0.0003 +[2026-03-05 13:28:09] (step=0067698) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.245548816278614, LR: 0.0003 +[2026-03-05 13:28:17] (step=0067699) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.245744472705928, LR: 0.0003 +[2026-03-05 13:28:24] (step=0067700) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.245940129133242, LR: 0.0003 +[2026-03-05 13:28:32] (step=0067701) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 13.246135785560556, LR: 0.0003 +[2026-03-05 13:28:40] (step=0067702) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 13.24633144198787, LR: 0.0003 +[2026-03-05 13:28:48] (step=0067703) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 13.246527098415182, LR: 0.0003 +[2026-03-05 13:28:56] (step=0067704) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 13.246722754842496, LR: 0.0003 +[2026-03-05 13:29:04] (step=0067705) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.24691841126981, LR: 0.0003 +[2026-03-05 13:29:12] (step=0067706) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.247114067697124, LR: 0.0003 +[2026-03-05 13:29:19] (step=0067707) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 13.247309724124438, LR: 0.0003 +[2026-03-05 13:29:27] (step=0067708) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.24750538055175, LR: 0.0003 +[2026-03-05 13:29:35] (step=0067709) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.247701036979064, LR: 0.0003 +[2026-03-05 13:29:43] (step=0067710) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.247896693406378, LR: 0.0003 +[2026-03-05 13:29:51] (step=0067711) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.248092349833692, LR: 0.0003 +[2026-03-05 13:29:59] (step=0067712) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.248288006261006, LR: 0.0003 +[2026-03-05 13:30:06] (step=0067713) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 13.248483662688319, LR: 0.0003 +[2026-03-05 13:30:14] (step=0067714) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.248679319115633, LR: 0.0003 +[2026-03-05 13:30:22] (step=0067715) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.248874975542947, LR: 0.0003 +[2026-03-05 13:30:30] (step=0067716) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.24907063197026, LR: 0.0003 +[2026-03-05 13:30:38] (step=0067717) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.249266288397575, LR: 0.0003 +[2026-03-05 13:30:46] (step=0067718) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 13.249461944824887, LR: 0.0003 +[2026-03-05 13:30:54] (step=0067719) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.249657601252201, LR: 0.0003 +[2026-03-05 13:31:01] (step=0067720) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.249853257679515, LR: 0.0003 +[2026-03-05 13:31:09] (step=0067721) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.250048914106829, LR: 0.0003 +[2026-03-05 13:31:17] (step=0067722) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.250244570534141, LR: 0.0003 +[2026-03-05 13:31:25] (step=0067723) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.250440226961455, LR: 0.0003 +[2026-03-05 13:31:33] (step=0067724) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.25063588338877, LR: 0.0003 +[2026-03-05 13:31:41] (step=0067725) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 13.250831539816083, LR: 0.0003 +[2026-03-05 13:31:48] (step=0067726) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 13.251027196243397, LR: 0.0003 +[2026-03-05 13:31:56] (step=0067727) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.25122285267071, LR: 0.0003 +[2026-03-05 13:32:04] (step=0067728) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.251418509098023, LR: 0.0003 +[2026-03-05 13:32:12] (step=0067729) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 13.251614165525337, LR: 0.0003 +[2026-03-05 13:32:20] (step=0067730) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.251809821952651, LR: 0.0003 +[2026-03-05 13:32:28] (step=0067731) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.252005478379965, LR: 0.0003 +[2026-03-05 13:32:36] (step=0067732) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.252201134807278, LR: 0.0003 +[2026-03-05 13:32:43] (step=0067733) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.252396791234592, LR: 0.0003 +[2026-03-05 13:32:51] (step=0067734) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.252592447661906, LR: 0.0003 +[2026-03-05 13:32:59] (step=0067735) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.25278810408922, LR: 0.0003 +[2026-03-05 13:33:07] (step=0067736) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.252983760516534, LR: 0.0003 +[2026-03-05 13:33:15] (step=0067737) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.253179416943846, LR: 0.0003 +[2026-03-05 13:33:23] (step=0067738) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 13.25337507337116, LR: 0.0003 +[2026-03-05 13:33:31] (step=0067739) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.253570729798474, LR: 0.0003 +[2026-03-05 13:33:38] (step=0067740) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.253766386225788, LR: 0.0003 +[2026-03-05 13:33:46] (step=0067741) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.253962042653102, LR: 0.0003 +[2026-03-05 13:33:54] (step=0067742) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.254157699080414, LR: 0.0003 +[2026-03-05 13:34:02] (step=0067743) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.254353355507728, LR: 0.0003 +[2026-03-05 13:34:10] (step=0067744) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 13.254549011935042, LR: 0.0003 +[2026-03-05 13:34:18] (step=0067745) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.254744668362356, LR: 0.0003 +[2026-03-05 13:34:25] (step=0067746) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 13.25494032478967, LR: 0.0003 +[2026-03-05 13:34:33] (step=0067747) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.255135981216982, LR: 0.0003 +[2026-03-05 13:34:41] (step=0067748) Train Loss: 0.4254, Train Steps/Sec: 0.13, Epoch: 13.255331637644296, LR: 0.0003 +[2026-03-05 13:34:49] (step=0067749) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.25552729407161, LR: 0.0003 +[2026-03-05 13:34:57] (step=0067750) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.255722950498924, LR: 0.0003 +[2026-03-05 13:35:05] (step=0067751) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.255918606926237, LR: 0.0003 +[2026-03-05 13:35:13] (step=0067752) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.25611426335355, LR: 0.0003 +[2026-03-05 13:35:20] (step=0067753) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.256309919780865, LR: 0.0003 +[2026-03-05 13:35:28] (step=0067754) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 13.256505576208179, LR: 0.0003 +[2026-03-05 13:35:36] (step=0067755) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 13.256701232635493, LR: 0.0003 +[2026-03-05 13:35:44] (step=0067756) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 13.256896889062805, LR: 0.0003 +[2026-03-05 13:35:52] (step=0067757) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 13.257092545490119, LR: 0.0003 +[2026-03-05 13:36:00] (step=0067758) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.257288201917433, LR: 0.0003 +[2026-03-05 13:36:08] (step=0067759) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 13.257483858344747, LR: 0.0003 +[2026-03-05 13:36:15] (step=0067760) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 13.257679514772061, LR: 0.0003 +[2026-03-05 13:36:23] (step=0067761) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.257875171199373, LR: 0.0003 +[2026-03-05 13:36:31] (step=0067762) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 13.258070827626687, LR: 0.0003 +[2026-03-05 13:36:39] (step=0067763) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.258266484054001, LR: 0.0003 +[2026-03-05 13:36:47] (step=0067764) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 13.258462140481315, LR: 0.0003 +[2026-03-05 13:36:55] (step=0067765) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.25865779690863, LR: 0.0003 +[2026-03-05 13:37:02] (step=0067766) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.258853453335941, LR: 0.0003 +[2026-03-05 13:37:10] (step=0067767) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.259049109763255, LR: 0.0003 +[2026-03-05 13:37:18] (step=0067768) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.25924476619057, LR: 0.0003 +[2026-03-05 13:37:26] (step=0067769) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 13.259440422617883, LR: 0.0003 +[2026-03-05 13:37:34] (step=0067770) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.259636079045197, LR: 0.0003 +[2026-03-05 13:37:42] (step=0067771) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 13.25983173547251, LR: 0.0003 +[2026-03-05 13:37:50] (step=0067772) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.260027391899824, LR: 0.0003 +[2026-03-05 13:37:57] (step=0067773) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.260223048327138, LR: 0.0003 +[2026-03-05 13:38:05] (step=0067774) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.260418704754452, LR: 0.0003 +[2026-03-05 13:38:13] (step=0067775) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.260614361181764, LR: 0.0003 +[2026-03-05 13:38:21] (step=0067776) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.260810017609078, LR: 0.0003 +[2026-03-05 13:38:29] (step=0067777) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 13.261005674036392, LR: 0.0003 +[2026-03-05 13:38:37] (step=0067778) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.261201330463706, LR: 0.0003 +[2026-03-05 13:38:44] (step=0067779) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 13.26139698689102, LR: 0.0003 +[2026-03-05 13:38:52] (step=0067780) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 13.261592643318332, LR: 0.0003 +[2026-03-05 13:39:00] (step=0067781) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.261788299745646, LR: 0.0003 +[2026-03-05 13:39:08] (step=0067782) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.26198395617296, LR: 0.0003 +[2026-03-05 13:39:16] (step=0067783) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.262179612600274, LR: 0.0003 +[2026-03-05 13:39:24] (step=0067784) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 13.262375269027588, LR: 0.0003 +[2026-03-05 13:39:32] (step=0067785) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 13.2625709254549, LR: 0.0003 +[2026-03-05 13:39:40] (step=0067786) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.262766581882214, LR: 0.0003 +[2026-03-05 13:39:47] (step=0067787) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.262962238309528, LR: 0.0003 +[2026-03-05 13:39:55] (step=0067788) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.263157894736842, LR: 0.0003 +[2026-03-05 13:40:03] (step=0067789) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.263353551164156, LR: 0.0003 +[2026-03-05 13:40:11] (step=0067790) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 13.263549207591469, LR: 0.0003 +[2026-03-05 13:40:19] (step=0067791) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.263744864018783, LR: 0.0003 +[2026-03-05 13:40:27] (step=0067792) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.263940520446097, LR: 0.0003 +[2026-03-05 13:40:35] (step=0067793) Train Loss: 0.4256, Train Steps/Sec: 0.13, Epoch: 13.26413617687341, LR: 0.0003 +[2026-03-05 13:40:42] (step=0067794) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 13.264331833300725, LR: 0.0003 +[2026-03-05 13:40:50] (step=0067795) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.264527489728037, LR: 0.0003 +[2026-03-05 13:40:58] (step=0067796) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 13.264723146155351, LR: 0.0003 +[2026-03-05 13:41:06] (step=0067797) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.264918802582665, LR: 0.0003 +[2026-03-05 13:41:14] (step=0067798) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.265114459009979, LR: 0.0003 +[2026-03-05 13:41:22] (step=0067799) Train Loss: 0.4253, Train Steps/Sec: 0.12, Epoch: 13.265310115437293, LR: 0.0003 +[2026-03-05 13:41:30] (step=0067800) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.265505771864605, LR: 0.0003 +[2026-03-05 13:41:38] (step=0067801) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.26570142829192, LR: 0.0003 +[2026-03-05 13:41:45] (step=0067802) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.265897084719233, LR: 0.0003 +[2026-03-05 13:41:53] (step=0067803) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.266092741146547, LR: 0.0003 +[2026-03-05 13:42:01] (step=0067804) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.26628839757386, LR: 0.0003 +[2026-03-05 13:42:09] (step=0067805) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.266484054001173, LR: 0.0003 +[2026-03-05 13:42:17] (step=0067806) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.266679710428487, LR: 0.0003 +[2026-03-05 13:42:25] (step=0067807) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.266875366855801, LR: 0.0003 +[2026-03-05 13:42:32] (step=0067808) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.267071023283115, LR: 0.0003 +[2026-03-05 13:42:40] (step=0067809) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.267266679710428, LR: 0.0003 +[2026-03-05 13:42:48] (step=0067810) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.267462336137742, LR: 0.0003 +[2026-03-05 13:42:56] (step=0067811) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.267657992565056, LR: 0.0003 +[2026-03-05 13:43:04] (step=0067812) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.26785364899237, LR: 0.0003 +[2026-03-05 13:43:12] (step=0067813) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.268049305419684, LR: 0.0003 +[2026-03-05 13:43:20] (step=0067814) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.268244961846996, LR: 0.0003 +[2026-03-05 13:43:27] (step=0067815) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.26844061827431, LR: 0.0003 +[2026-03-05 13:43:35] (step=0067816) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 13.268636274701624, LR: 0.0003 +[2026-03-05 13:43:43] (step=0067817) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.268831931128938, LR: 0.0003 +[2026-03-05 13:43:51] (step=0067818) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.269027587556252, LR: 0.0003 +[2026-03-05 13:43:59] (step=0067819) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.269223243983564, LR: 0.0003 +[2026-03-05 13:44:07] (step=0067820) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.269418900410878, LR: 0.0003 +[2026-03-05 13:44:14] (step=0067821) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 13.269614556838192, LR: 0.0003 +[2026-03-05 13:44:22] (step=0067822) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.269810213265506, LR: 0.0003 +[2026-03-05 13:44:30] (step=0067823) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.27000586969282, LR: 0.0003 +[2026-03-05 13:44:38] (step=0067824) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.270201526120132, LR: 0.0003 +[2026-03-05 13:44:46] (step=0067825) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.270397182547446, LR: 0.0003 +[2026-03-05 13:44:54] (step=0067826) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.27059283897476, LR: 0.0003 +[2026-03-05 13:45:02] (step=0067827) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.270788495402075, LR: 0.0003 +[2026-03-05 13:45:09] (step=0067828) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.270984151829387, LR: 0.0003 +[2026-03-05 13:45:17] (step=0067829) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.2711798082567, LR: 0.0003 +[2026-03-05 13:45:25] (step=0067830) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.271375464684015, LR: 0.0003 +[2026-03-05 13:45:33] (step=0067831) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.271571121111329, LR: 0.0003 +[2026-03-05 13:45:41] (step=0067832) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 13.271766777538643, LR: 0.0003 +[2026-03-05 13:45:49] (step=0067833) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.271962433965955, LR: 0.0003 +[2026-03-05 13:45:57] (step=0067834) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.272158090393269, LR: 0.0003 +[2026-03-05 13:46:05] (step=0067835) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.272353746820583, LR: 0.0003 +[2026-03-05 13:46:12] (step=0067836) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 13.272549403247897, LR: 0.0003 +[2026-03-05 13:46:20] (step=0067837) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.272745059675211, LR: 0.0003 +[2026-03-05 13:46:28] (step=0067838) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.272940716102523, LR: 0.0003 +[2026-03-05 13:46:36] (step=0067839) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.273136372529837, LR: 0.0003 +[2026-03-05 13:46:44] (step=0067840) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 13.273332028957151, LR: 0.0003 +[2026-03-05 13:46:52] (step=0067841) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.273527685384465, LR: 0.0003 +[2026-03-05 13:47:00] (step=0067842) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.27372334181178, LR: 0.0003 +[2026-03-05 13:47:07] (step=0067843) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.273918998239091, LR: 0.0003 +[2026-03-05 13:47:15] (step=0067844) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.274114654666405, LR: 0.0003 +[2026-03-05 13:47:23] (step=0067845) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.27431031109372, LR: 0.0003 +[2026-03-05 13:47:31] (step=0067846) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 13.274505967521034, LR: 0.0003 +[2026-03-05 13:47:39] (step=0067847) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.274701623948348, LR: 0.0003 +[2026-03-05 13:47:47] (step=0067848) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.27489728037566, LR: 0.0003 +[2026-03-05 13:47:55] (step=0067849) Train Loss: 0.4583, Train Steps/Sec: 0.12, Epoch: 13.275092936802974, LR: 0.0003 +[2026-03-05 13:48:02] (step=0067850) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.275288593230288, LR: 0.0003 +[2026-03-05 13:48:10] (step=0067851) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.275484249657602, LR: 0.0003 +[2026-03-05 13:48:18] (step=0067852) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 13.275679906084916, LR: 0.0003 +[2026-03-05 13:48:26] (step=0067853) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 13.275875562512228, LR: 0.0003 +[2026-03-05 13:48:34] (step=0067854) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.276071218939542, LR: 0.0003 +[2026-03-05 13:48:42] (step=0067855) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.276266875366856, LR: 0.0003 +[2026-03-05 13:48:50] (step=0067856) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.27646253179417, LR: 0.0003 +[2026-03-05 13:48:57] (step=0067857) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.276658188221482, LR: 0.0003 +[2026-03-05 13:49:05] (step=0067858) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.276853844648796, LR: 0.0003 +[2026-03-05 13:49:13] (step=0067859) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.27704950107611, LR: 0.0003 +[2026-03-05 13:49:21] (step=0067860) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.277245157503424, LR: 0.0003 +[2026-03-05 13:49:29] (step=0067861) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.277440813930738, LR: 0.0003 +[2026-03-05 13:49:37] (step=0067862) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.27763647035805, LR: 0.0003 +[2026-03-05 13:49:45] (step=0067863) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.277832126785365, LR: 0.0003 +[2026-03-05 13:49:52] (step=0067864) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.278027783212679, LR: 0.0003 +[2026-03-05 13:50:00] (step=0067865) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 13.278223439639993, LR: 0.0003 +[2026-03-05 13:50:08] (step=0067866) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.278419096067307, LR: 0.0003 +[2026-03-05 13:50:16] (step=0067867) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.278614752494619, LR: 0.0003 +[2026-03-05 13:50:24] (step=0067868) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.278810408921933, LR: 0.0003 +[2026-03-05 13:50:32] (step=0067869) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.279006065349247, LR: 0.0003 +[2026-03-05 13:50:40] (step=0067870) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.27920172177656, LR: 0.0003 +[2026-03-05 13:50:47] (step=0067871) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 13.279397378203875, LR: 0.0003 +[2026-03-05 13:50:55] (step=0067872) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.279593034631187, LR: 0.0003 +[2026-03-05 13:51:03] (step=0067873) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.279788691058501, LR: 0.0003 +[2026-03-05 13:51:11] (step=0067874) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.279984347485815, LR: 0.0003 +[2026-03-05 13:51:19] (step=0067875) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.280180003913129, LR: 0.0003 +[2026-03-05 13:51:27] (step=0067876) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.280375660340443, LR: 0.0003 +[2026-03-05 13:51:35] (step=0067877) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.280571316767755, LR: 0.0003 +[2026-03-05 13:51:42] (step=0067878) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.28076697319507, LR: 0.0003 +[2026-03-05 13:51:50] (step=0067879) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 13.280962629622383, LR: 0.0003 +[2026-03-05 13:51:58] (step=0067880) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.281158286049697, LR: 0.0003 +[2026-03-05 13:52:06] (step=0067881) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 13.28135394247701, LR: 0.0003 +[2026-03-05 13:52:14] (step=0067882) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.281549598904324, LR: 0.0003 +[2026-03-05 13:52:22] (step=0067883) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.281745255331638, LR: 0.0003 +[2026-03-05 13:52:30] (step=0067884) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.281940911758952, LR: 0.0003 +[2026-03-05 13:52:38] (step=0067885) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 13.282136568186266, LR: 0.0003 +[2026-03-05 13:52:45] (step=0067886) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.282332224613578, LR: 0.0003 +[2026-03-05 13:52:53] (step=0067887) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.282527881040892, LR: 0.0003 +[2026-03-05 13:53:01] (step=0067888) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.282723537468206, LR: 0.0003 +[2026-03-05 13:53:09] (step=0067889) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 13.28291919389552, LR: 0.0003 +[2026-03-05 13:53:17] (step=0067890) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 13.283114850322834, LR: 0.0003 +[2026-03-05 13:53:25] (step=0067891) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.283310506750146, LR: 0.0003 +[2026-03-05 13:53:33] (step=0067892) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.28350616317746, LR: 0.0003 +[2026-03-05 13:53:40] (step=0067893) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.283701819604774, LR: 0.0003 +[2026-03-05 13:53:48] (step=0067894) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.283897476032088, LR: 0.0003 +[2026-03-05 13:53:56] (step=0067895) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 13.284093132459402, LR: 0.0003 +[2026-03-05 13:54:04] (step=0067896) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.284288788886714, LR: 0.0003 +[2026-03-05 13:54:12] (step=0067897) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 13.284484445314028, LR: 0.0003 +[2026-03-05 13:54:20] (step=0067898) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.284680101741342, LR: 0.0003 +[2026-03-05 13:54:28] (step=0067899) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.284875758168656, LR: 0.0003 +[2026-03-05 13:54:35] (step=0067900) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.28507141459597, LR: 0.0003 +[2026-03-05 13:54:44] (step=0067901) Train Loss: 0.4328, Train Steps/Sec: 0.12, Epoch: 13.285267071023283, LR: 0.0003 +[2026-03-05 13:54:51] (step=0067902) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.285462727450597, LR: 0.0003 +[2026-03-05 13:54:59] (step=0067903) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.28565838387791, LR: 0.0003 +[2026-03-05 13:55:07] (step=0067904) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.285854040305225, LR: 0.0003 +[2026-03-05 13:55:15] (step=0067905) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.286049696732539, LR: 0.0003 +[2026-03-05 13:55:23] (step=0067906) Train Loss: 0.4256, Train Steps/Sec: 0.13, Epoch: 13.28624535315985, LR: 0.0003 +[2026-03-05 13:55:31] (step=0067907) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 13.286441009587165, LR: 0.0003 +[2026-03-05 13:55:39] (step=0067908) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.286636666014479, LR: 0.0003 +[2026-03-05 13:55:46] (step=0067909) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.286832322441793, LR: 0.0003 +[2026-03-05 13:55:54] (step=0067910) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.287027978869105, LR: 0.0003 +[2026-03-05 13:56:02] (step=0067911) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 13.287223635296419, LR: 0.0003 +[2026-03-05 13:56:10] (step=0067912) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 13.287419291723733, LR: 0.0003 +[2026-03-05 13:56:18] (step=0067913) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.287614948151047, LR: 0.0003 +[2026-03-05 13:56:26] (step=0067914) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 13.287810604578361, LR: 0.0003 +[2026-03-05 13:56:33] (step=0067915) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 13.288006261005673, LR: 0.0003 +[2026-03-05 13:56:41] (step=0067916) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.288201917432987, LR: 0.0003 +[2026-03-05 13:56:49] (step=0067917) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.288397573860301, LR: 0.0003 +[2026-03-05 13:56:57] (step=0067918) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.288593230287615, LR: 0.0003 +[2026-03-05 13:57:05] (step=0067919) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.28878888671493, LR: 0.0003 +[2026-03-05 13:57:13] (step=0067920) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 13.288984543142242, LR: 0.0003 +[2026-03-05 13:57:21] (step=0067921) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.289180199569556, LR: 0.0003 +[2026-03-05 13:57:29] (step=0067922) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.28937585599687, LR: 0.0003 +[2026-03-05 13:57:36] (step=0067923) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.289571512424184, LR: 0.0003 +[2026-03-05 13:57:44] (step=0067924) Train Loss: 0.4278, Train Steps/Sec: 0.13, Epoch: 13.289767168851498, LR: 0.0003 +[2026-03-05 13:57:52] (step=0067925) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.28996282527881, LR: 0.0003 +[2026-03-05 13:58:00] (step=0067926) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.290158481706124, LR: 0.0003 +[2026-03-05 13:58:08] (step=0067927) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.290354138133438, LR: 0.0003 +[2026-03-05 13:58:16] (step=0067928) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.290549794560752, LR: 0.0003 +[2026-03-05 13:58:24] (step=0067929) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.290745450988066, LR: 0.0003 +[2026-03-05 13:58:32] (step=0067930) Train Loss: 0.4437, Train Steps/Sec: 0.12, Epoch: 13.290941107415378, LR: 0.0003 +[2026-03-05 13:58:39] (step=0067931) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.291136763842692, LR: 0.0003 +[2026-03-05 13:58:47] (step=0067932) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.291332420270006, LR: 0.0003 +[2026-03-05 13:58:55] (step=0067933) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.29152807669732, LR: 0.0003 +[2026-03-05 13:59:03] (step=0067934) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.291723733124632, LR: 0.0003 +[2026-03-05 13:59:11] (step=0067935) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.291919389551946, LR: 0.0003 +[2026-03-05 13:59:19] (step=0067936) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.29211504597926, LR: 0.0003 +[2026-03-05 13:59:27] (step=0067937) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.292310702406574, LR: 0.0003 +[2026-03-05 13:59:34] (step=0067938) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 13.292506358833888, LR: 0.0003 +[2026-03-05 13:59:42] (step=0067939) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 13.2927020152612, LR: 0.0003 +[2026-03-05 13:59:50] (step=0067940) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.292897671688515, LR: 0.0003 +[2026-03-05 13:59:58] (step=0067941) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.293093328115829, LR: 0.0003 +[2026-03-05 14:00:06] (step=0067942) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 13.293288984543143, LR: 0.0003 +[2026-03-05 14:00:14] (step=0067943) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 13.293484640970457, LR: 0.0003 +[2026-03-05 14:00:22] (step=0067944) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.293680297397769, LR: 0.0003 +[2026-03-05 14:00:30] (step=0067945) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 13.293875953825083, LR: 0.0003 +[2026-03-05 14:00:37] (step=0067946) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 13.294071610252397, LR: 0.0003 +[2026-03-05 14:00:45] (step=0067947) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 13.29426726667971, LR: 0.0003 +[2026-03-05 14:00:53] (step=0067948) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.294462923107025, LR: 0.0003 +[2026-03-05 14:01:01] (step=0067949) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.294658579534337, LR: 0.0003 +[2026-03-05 14:01:09] (step=0067950) Train Loss: 0.4615, Train Steps/Sec: 0.13, Epoch: 13.294854235961651, LR: 0.0003 +[2026-03-05 14:01:17] (step=0067951) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.295049892388965, LR: 0.0003 +[2026-03-05 14:01:25] (step=0067952) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.295245548816279, LR: 0.0003 +[2026-03-05 14:01:33] (step=0067953) Train Loss: 0.4395, Train Steps/Sec: 0.12, Epoch: 13.295441205243593, LR: 0.0003 +[2026-03-05 14:01:40] (step=0067954) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.295636861670905, LR: 0.0003 +[2026-03-05 14:01:48] (step=0067955) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.29583251809822, LR: 0.0003 +[2026-03-05 14:01:56] (step=0067956) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.296028174525533, LR: 0.0003 +[2026-03-05 14:02:04] (step=0067957) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.296223830952847, LR: 0.0003 +[2026-03-05 14:02:12] (step=0067958) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.29641948738016, LR: 0.0003 +[2026-03-05 14:02:20] (step=0067959) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.296615143807474, LR: 0.0003 +[2026-03-05 14:02:28] (step=0067960) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 13.296810800234788, LR: 0.0003 +[2026-03-05 14:02:35] (step=0067961) Train Loss: 0.4262, Train Steps/Sec: 0.13, Epoch: 13.297006456662102, LR: 0.0003 +[2026-03-05 14:02:43] (step=0067962) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 13.297202113089416, LR: 0.0003 +[2026-03-05 14:02:51] (step=0067963) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.297397769516728, LR: 0.0003 +[2026-03-05 14:02:59] (step=0067964) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.297593425944042, LR: 0.0003 +[2026-03-05 14:03:07] (step=0067965) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.297789082371356, LR: 0.0003 +[2026-03-05 14:03:15] (step=0067966) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 13.29798473879867, LR: 0.0003 +[2026-03-05 14:03:23] (step=0067967) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.298180395225984, LR: 0.0003 +[2026-03-05 14:03:30] (step=0067968) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.298376051653296, LR: 0.0003 +[2026-03-05 14:03:38] (step=0067969) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.29857170808061, LR: 0.0003 +[2026-03-05 14:03:46] (step=0067970) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 13.298767364507924, LR: 0.0003 +[2026-03-05 14:03:54] (step=0067971) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.298963020935238, LR: 0.0003 +[2026-03-05 14:04:02] (step=0067972) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.299158677362552, LR: 0.0003 +[2026-03-05 14:04:10] (step=0067973) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.299354333789864, LR: 0.0003 +[2026-03-05 14:04:18] (step=0067974) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.299549990217178, LR: 0.0003 +[2026-03-05 14:04:25] (step=0067975) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 13.299745646644492, LR: 0.0003 +[2026-03-05 14:04:33] (step=0067976) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.299941303071806, LR: 0.0003 +[2026-03-05 14:04:41] (step=0067977) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 13.30013695949912, LR: 0.0003 +[2026-03-05 14:04:49] (step=0067978) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.300332615926433, LR: 0.0003 +[2026-03-05 14:04:57] (step=0067979) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.300528272353747, LR: 0.0003 +[2026-03-05 14:05:05] (step=0067980) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.30072392878106, LR: 0.0003 +[2026-03-05 14:05:13] (step=0067981) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.300919585208375, LR: 0.0003 +[2026-03-05 14:05:21] (step=0067982) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.301115241635689, LR: 0.0003 +[2026-03-05 14:05:28] (step=0067983) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.301310898063, LR: 0.0003 +[2026-03-05 14:05:36] (step=0067984) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.301506554490315, LR: 0.0003 +[2026-03-05 14:05:44] (step=0067985) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.301702210917629, LR: 0.0003 +[2026-03-05 14:05:52] (step=0067986) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.301897867344943, LR: 0.0003 +[2026-03-05 14:06:00] (step=0067987) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.302093523772255, LR: 0.0003 +[2026-03-05 14:06:08] (step=0067988) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.302289180199569, LR: 0.0003 +[2026-03-05 14:06:16] (step=0067989) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 13.302484836626883, LR: 0.0003 +[2026-03-05 14:06:23] (step=0067990) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.302680493054197, LR: 0.0003 +[2026-03-05 14:06:31] (step=0067991) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 13.302876149481511, LR: 0.0003 +[2026-03-05 14:06:39] (step=0067992) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.303071805908823, LR: 0.0003 +[2026-03-05 14:06:47] (step=0067993) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 13.303267462336137, LR: 0.0003 +[2026-03-05 14:06:55] (step=0067994) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.303463118763451, LR: 0.0003 +[2026-03-05 14:07:03] (step=0067995) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.303658775190765, LR: 0.0003 +[2026-03-05 14:07:10] (step=0067996) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.30385443161808, LR: 0.0003 +[2026-03-05 14:07:18] (step=0067997) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.304050088045392, LR: 0.0003 +[2026-03-05 14:07:26] (step=0067998) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.304245744472706, LR: 0.0003 +[2026-03-05 14:07:34] (step=0067999) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 13.30444140090002, LR: 0.0003 +[2026-03-05 14:07:42] (step=0068000) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.304637057327334, LR: 0.0003 +[2026-03-05 14:07:42] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0068000/ +[2026-03-05 14:07:50] (step=0068001) Train Loss: 0.4501, Train Steps/Sec: 0.12, Epoch: 13.304832713754648, LR: 0.0003 +[2026-03-05 14:07:58] (step=0068002) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.30502837018196, LR: 0.0003 +[2026-03-05 14:08:06] (step=0068003) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.305224026609274, LR: 0.0003 +[2026-03-05 14:08:14] (step=0068004) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 13.305419683036588, LR: 0.0003 +[2026-03-05 14:08:21] (step=0068005) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.305615339463902, LR: 0.0003 +[2026-03-05 14:08:29] (step=0068006) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.305810995891216, LR: 0.0003 +[2026-03-05 14:08:37] (step=0068007) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 13.306006652318528, LR: 0.0003 +[2026-03-05 14:08:45] (step=0068008) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.306202308745842, LR: 0.0003 +[2026-03-05 14:08:53] (step=0068009) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.306397965173156, LR: 0.0003 +[2026-03-05 14:09:01] (step=0068010) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.30659362160047, LR: 0.0003 +[2026-03-05 14:09:09] (step=0068011) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 13.306789278027782, LR: 0.0003 +[2026-03-05 14:09:16] (step=0068012) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.306984934455096, LR: 0.0003 +[2026-03-05 14:09:24] (step=0068013) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.30718059088241, LR: 0.0003 +[2026-03-05 14:09:32] (step=0068014) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.307376247309724, LR: 0.0003 +[2026-03-05 14:09:40] (step=0068015) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.307571903737038, LR: 0.0003 +[2026-03-05 14:09:48] (step=0068016) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.30776756016435, LR: 0.0003 +[2026-03-05 14:09:56] (step=0068017) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.307963216591665, LR: 0.0003 +[2026-03-05 14:10:04] (step=0068018) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.308158873018979, LR: 0.0003 +[2026-03-05 14:10:11] (step=0068019) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 13.308354529446293, LR: 0.0003 +[2026-03-05 14:10:19] (step=0068020) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.308550185873607, LR: 0.0003 +[2026-03-05 14:10:27] (step=0068021) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.308745842300919, LR: 0.0003 +[2026-03-05 14:10:35] (step=0068022) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.308941498728233, LR: 0.0003 +[2026-03-05 14:10:43] (step=0068023) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 13.309137155155547, LR: 0.0003 +[2026-03-05 14:10:51] (step=0068024) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.30933281158286, LR: 0.0003 +[2026-03-05 14:10:59] (step=0068025) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.309528468010175, LR: 0.0003 +[2026-03-05 14:11:06] (step=0068026) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.309724124437487, LR: 0.0003 +[2026-03-05 14:11:14] (step=0068027) Train Loss: 0.4565, Train Steps/Sec: 0.12, Epoch: 13.309919780864801, LR: 0.0003 +[2026-03-05 14:11:22] (step=0068028) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.310115437292115, LR: 0.0003 +[2026-03-05 14:11:30] (step=0068029) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.310311093719429, LR: 0.0003 +[2026-03-05 14:11:38] (step=0068030) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.310506750146743, LR: 0.0003 +[2026-03-05 14:11:46] (step=0068031) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.310702406574055, LR: 0.0003 +[2026-03-05 14:11:54] (step=0068032) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.31089806300137, LR: 0.0003 +[2026-03-05 14:12:02] (step=0068033) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.311093719428683, LR: 0.0003 +[2026-03-05 14:12:09] (step=0068034) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.311289375855997, LR: 0.0003 +[2026-03-05 14:12:17] (step=0068035) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.311485032283311, LR: 0.0003 +[2026-03-05 14:12:25] (step=0068036) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.311680688710624, LR: 0.0003 +[2026-03-05 14:12:33] (step=0068037) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.311876345137938, LR: 0.0003 +[2026-03-05 14:12:41] (step=0068038) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.312072001565252, LR: 0.0003 +[2026-03-05 14:12:49] (step=0068039) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.312267657992566, LR: 0.0003 +[2026-03-05 14:12:57] (step=0068040) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.312463314419878, LR: 0.0003 +[2026-03-05 14:13:04] (step=0068041) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.312658970847192, LR: 0.0003 +[2026-03-05 14:13:12] (step=0068042) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.312854627274506, LR: 0.0003 +[2026-03-05 14:13:20] (step=0068043) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.31305028370182, LR: 0.0003 +[2026-03-05 14:13:28] (step=0068044) Train Loss: 0.4347, Train Steps/Sec: 0.12, Epoch: 13.313245940129134, LR: 0.0003 +[2026-03-05 14:13:36] (step=0068045) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 13.313441596556446, LR: 0.0003 +[2026-03-05 14:13:44] (step=0068046) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.31363725298376, LR: 0.0003 +[2026-03-05 14:13:52] (step=0068047) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.313832909411074, LR: 0.0003 +[2026-03-05 14:14:00] (step=0068048) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.314028565838388, LR: 0.0003 +[2026-03-05 14:14:08] (step=0068049) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.314224222265702, LR: 0.0003 +[2026-03-05 14:14:15] (step=0068050) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.314419878693014, LR: 0.0003 +[2026-03-05 14:14:23] (step=0068051) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 13.314615535120328, LR: 0.0003 +[2026-03-05 14:14:31] (step=0068052) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 13.314811191547642, LR: 0.0003 +[2026-03-05 14:14:39] (step=0068053) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.315006847974956, LR: 0.0003 +[2026-03-05 14:14:47] (step=0068054) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 13.31520250440227, LR: 0.0003 +[2026-03-05 14:14:55] (step=0068055) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.315398160829583, LR: 0.0003 +[2026-03-05 14:15:03] (step=0068056) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 13.315593817256897, LR: 0.0003 +[2026-03-05 14:15:10] (step=0068057) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.31578947368421, LR: 0.0003 +[2026-03-05 14:15:18] (step=0068058) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.315985130111525, LR: 0.0003 +[2026-03-05 14:15:26] (step=0068059) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.316180786538839, LR: 0.0003 +[2026-03-05 14:15:34] (step=0068060) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 13.31637644296615, LR: 0.0003 +[2026-03-05 14:15:42] (step=0068061) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.316572099393465, LR: 0.0003 +[2026-03-05 14:15:50] (step=0068062) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 13.316767755820779, LR: 0.0003 +[2026-03-05 14:15:58] (step=0068063) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 13.316963412248093, LR: 0.0003 +[2026-03-05 14:16:06] (step=0068064) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.317159068675405, LR: 0.0003 +[2026-03-05 14:16:13] (step=0068065) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.31735472510272, LR: 0.0003 +[2026-03-05 14:16:21] (step=0068066) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.317550381530033, LR: 0.0003 +[2026-03-05 14:16:29] (step=0068067) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.317746037957347, LR: 0.0003 +[2026-03-05 14:16:37] (step=0068068) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 13.317941694384661, LR: 0.0003 +[2026-03-05 14:16:45] (step=0068069) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.318137350811973, LR: 0.0003 +[2026-03-05 14:16:53] (step=0068070) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 13.318333007239287, LR: 0.0003 +[2026-03-05 14:17:01] (step=0068071) Train Loss: 0.4252, Train Steps/Sec: 0.13, Epoch: 13.318528663666601, LR: 0.0003 +[2026-03-05 14:17:08] (step=0068072) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.318724320093915, LR: 0.0003 +[2026-03-05 14:17:16] (step=0068073) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.31891997652123, LR: 0.0003 +[2026-03-05 14:17:24] (step=0068074) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 13.319115632948542, LR: 0.0003 +[2026-03-05 14:17:32] (step=0068075) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.319311289375856, LR: 0.0003 +[2026-03-05 14:17:40] (step=0068076) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.31950694580317, LR: 0.0003 +[2026-03-05 14:17:48] (step=0068077) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.319702602230484, LR: 0.0003 +[2026-03-05 14:17:56] (step=0068078) Train Loss: 0.4354, Train Steps/Sec: 0.12, Epoch: 13.319898258657798, LR: 0.0003 +[2026-03-05 14:18:04] (step=0068079) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.32009391508511, LR: 0.0003 +[2026-03-05 14:18:11] (step=0068080) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 13.320289571512424, LR: 0.0003 +[2026-03-05 14:18:19] (step=0068081) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.320485227939738, LR: 0.0003 +[2026-03-05 14:18:27] (step=0068082) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 13.320680884367052, LR: 0.0003 +[2026-03-05 14:18:35] (step=0068083) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.320876540794366, LR: 0.0003 +[2026-03-05 14:18:43] (step=0068084) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 13.321072197221678, LR: 0.0003 +[2026-03-05 14:18:51] (step=0068085) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.321267853648992, LR: 0.0003 +[2026-03-05 14:18:59] (step=0068086) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.321463510076306, LR: 0.0003 +[2026-03-05 14:19:06] (step=0068087) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.32165916650362, LR: 0.0003 +[2026-03-05 14:19:14] (step=0068088) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.321854822930934, LR: 0.0003 +[2026-03-05 14:19:22] (step=0068089) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 13.322050479358246, LR: 0.0003 +[2026-03-05 14:19:30] (step=0068090) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.32224613578556, LR: 0.0003 +[2026-03-05 14:19:38] (step=0068091) Train Loss: 0.4383, Train Steps/Sec: 0.12, Epoch: 13.322441792212874, LR: 0.0003 +[2026-03-05 14:19:46] (step=0068092) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.322637448640188, LR: 0.0003 +[2026-03-05 14:19:54] (step=0068093) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.3228331050675, LR: 0.0003 +[2026-03-05 14:20:02] (step=0068094) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 13.323028761494815, LR: 0.0003 +[2026-03-05 14:20:09] (step=0068095) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.323224417922129, LR: 0.0003 +[2026-03-05 14:20:17] (step=0068096) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.323420074349443, LR: 0.0003 +[2026-03-05 14:20:25] (step=0068097) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.323615730776757, LR: 0.0003 +[2026-03-05 14:20:33] (step=0068098) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.323811387204069, LR: 0.0003 +[2026-03-05 14:20:41] (step=0068099) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.324007043631383, LR: 0.0003 +[2026-03-05 14:20:49] (step=0068100) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 13.324202700058697, LR: 0.0003 +[2026-03-05 14:20:57] (step=0068101) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 13.324398356486011, LR: 0.0003 +[2026-03-05 14:21:04] (step=0068102) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.324594012913325, LR: 0.0003 +[2026-03-05 14:21:12] (step=0068103) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.324789669340637, LR: 0.0003 +[2026-03-05 14:21:20] (step=0068104) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.324985325767951, LR: 0.0003 +[2026-03-05 14:21:28] (step=0068105) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 13.325180982195265, LR: 0.0003 +[2026-03-05 14:21:36] (step=0068106) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 13.32537663862258, LR: 0.0003 +[2026-03-05 14:21:44] (step=0068107) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.325572295049893, LR: 0.0003 +[2026-03-05 14:21:52] (step=0068108) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.325767951477205, LR: 0.0003 +[2026-03-05 14:21:59] (step=0068109) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.32596360790452, LR: 0.0003 +[2026-03-05 14:22:07] (step=0068110) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.326159264331833, LR: 0.0003 +[2026-03-05 14:22:15] (step=0068111) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.326354920759147, LR: 0.0003 +[2026-03-05 14:22:23] (step=0068112) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 13.326550577186461, LR: 0.0003 +[2026-03-05 14:22:31] (step=0068113) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.326746233613774, LR: 0.0003 +[2026-03-05 14:22:39] (step=0068114) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 13.326941890041088, LR: 0.0003 +[2026-03-05 14:22:47] (step=0068115) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 13.327137546468402, LR: 0.0003 +[2026-03-05 14:22:54] (step=0068116) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.327333202895716, LR: 0.0003 +[2026-03-05 14:23:02] (step=0068117) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 13.327528859323028, LR: 0.0003 +[2026-03-05 14:23:10] (step=0068118) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 13.327724515750342, LR: 0.0003 +[2026-03-05 14:23:18] (step=0068119) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.327920172177656, LR: 0.0003 +[2026-03-05 14:23:26] (step=0068120) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 13.32811582860497, LR: 0.0003 +[2026-03-05 14:23:34] (step=0068121) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.328311485032284, LR: 0.0003 +[2026-03-05 14:23:42] (step=0068122) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.328507141459596, LR: 0.0003 +[2026-03-05 14:23:50] (step=0068123) Train Loss: 0.4379, Train Steps/Sec: 0.12, Epoch: 13.32870279788691, LR: 0.0003 +[2026-03-05 14:23:57] (step=0068124) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.328898454314224, LR: 0.0003 +[2026-03-05 14:24:05] (step=0068125) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.329094110741538, LR: 0.0003 +[2026-03-05 14:24:13] (step=0068126) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 13.329289767168852, LR: 0.0003 +[2026-03-05 14:24:21] (step=0068127) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 13.329485423596164, LR: 0.0003 +[2026-03-05 14:24:29] (step=0068128) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.329681080023478, LR: 0.0003 +[2026-03-05 14:24:37] (step=0068129) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.329876736450792, LR: 0.0003 +[2026-03-05 14:24:45] (step=0068130) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.330072392878106, LR: 0.0003 +[2026-03-05 14:24:52] (step=0068131) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.33026804930542, LR: 0.0003 +[2026-03-05 14:25:00] (step=0068132) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.330463705732733, LR: 0.0003 +[2026-03-05 14:25:08] (step=0068133) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.330659362160047, LR: 0.0003 +[2026-03-05 14:25:16] (step=0068134) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.33085501858736, LR: 0.0003 +[2026-03-05 14:25:24] (step=0068135) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.331050675014675, LR: 0.0003 +[2026-03-05 14:25:32] (step=0068136) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.331246331441989, LR: 0.0003 +[2026-03-05 14:25:40] (step=0068137) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 13.331441987869301, LR: 0.0003 +[2026-03-05 14:25:48] (step=0068138) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.331637644296615, LR: 0.0003 +[2026-03-05 14:25:55] (step=0068139) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.331833300723929, LR: 0.0003 +[2026-03-05 14:26:03] (step=0068140) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.332028957151243, LR: 0.0003 +[2026-03-05 14:26:11] (step=0068141) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 13.332224613578557, LR: 0.0003 +[2026-03-05 14:26:19] (step=0068142) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 13.33242027000587, LR: 0.0003 +[2026-03-05 14:26:27] (step=0068143) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.332615926433183, LR: 0.0003 +[2026-03-05 14:26:35] (step=0068144) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 13.332811582860497, LR: 0.0003 +[2026-03-05 14:26:43] (step=0068145) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.333007239287811, LR: 0.0003 +[2026-03-05 14:26:50] (step=0068146) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.333202895715123, LR: 0.0003 +[2026-03-05 14:26:58] (step=0068147) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.333398552142437, LR: 0.0003 +[2026-03-05 14:27:06] (step=0068148) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.333594208569751, LR: 0.0003 +[2026-03-05 14:27:14] (step=0068149) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.333789864997065, LR: 0.0003 +[2026-03-05 14:27:22] (step=0068150) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 13.33398552142438, LR: 0.0003 +[2026-03-05 14:27:30] (step=0068151) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.334181177851692, LR: 0.0003 +[2026-03-05 14:27:38] (step=0068152) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 13.334376834279006, LR: 0.0003 +[2026-03-05 14:27:45] (step=0068153) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 13.33457249070632, LR: 0.0003 +[2026-03-05 14:27:53] (step=0068154) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 13.334768147133634, LR: 0.0003 +[2026-03-05 14:28:01] (step=0068155) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 13.334963803560948, LR: 0.0003 +[2026-03-05 14:28:09] (step=0068156) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.33515945998826, LR: 0.0003 +[2026-03-05 14:28:17] (step=0068157) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.335355116415574, LR: 0.0003 +[2026-03-05 14:28:25] (step=0068158) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.335550772842888, LR: 0.0003 +[2026-03-05 14:28:33] (step=0068159) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.335746429270202, LR: 0.0003 +[2026-03-05 14:28:40] (step=0068160) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.335942085697516, LR: 0.0003 +[2026-03-05 14:28:48] (step=0068161) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.336137742124828, LR: 0.0003 +[2026-03-05 14:28:56] (step=0068162) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.336333398552142, LR: 0.0003 +[2026-03-05 14:29:04] (step=0068163) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.336529054979456, LR: 0.0003 +[2026-03-05 14:29:12] (step=0068164) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.33672471140677, LR: 0.0003 +[2026-03-05 14:29:20] (step=0068165) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.336920367834084, LR: 0.0003 +[2026-03-05 14:29:28] (step=0068166) Train Loss: 0.4611, Train Steps/Sec: 0.13, Epoch: 13.337116024261396, LR: 0.0003 +[2026-03-05 14:29:35] (step=0068167) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.33731168068871, LR: 0.0003 +[2026-03-05 14:29:43] (step=0068168) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.337507337116024, LR: 0.0003 +[2026-03-05 14:29:51] (step=0068169) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.337702993543338, LR: 0.0003 +[2026-03-05 14:29:59] (step=0068170) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 13.33789864997065, LR: 0.0003 +[2026-03-05 14:30:07] (step=0068171) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 13.338094306397965, LR: 0.0003 +[2026-03-05 14:30:15] (step=0068172) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.338289962825279, LR: 0.0003 +[2026-03-05 14:30:23] (step=0068173) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.338485619252593, LR: 0.0003 +[2026-03-05 14:30:31] (step=0068174) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.338681275679907, LR: 0.0003 +[2026-03-05 14:30:38] (step=0068175) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 13.338876932107219, LR: 0.0003 +[2026-03-05 14:30:46] (step=0068176) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.339072588534533, LR: 0.0003 +[2026-03-05 14:30:54] (step=0068177) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.339268244961847, LR: 0.0003 +[2026-03-05 14:31:02] (step=0068178) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.339463901389161, LR: 0.0003 +[2026-03-05 14:31:10] (step=0068179) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.339659557816475, LR: 0.0003 +[2026-03-05 14:31:18] (step=0068180) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.339855214243787, LR: 0.0003 +[2026-03-05 14:31:26] (step=0068181) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.340050870671101, LR: 0.0003 +[2026-03-05 14:31:34] (step=0068182) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.340246527098415, LR: 0.0003 +[2026-03-05 14:31:41] (step=0068183) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.34044218352573, LR: 0.0003 +[2026-03-05 14:31:49] (step=0068184) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 13.340637839953043, LR: 0.0003 +[2026-03-05 14:31:57] (step=0068185) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 13.340833496380355, LR: 0.0003 +[2026-03-05 14:32:05] (step=0068186) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.34102915280767, LR: 0.0003 +[2026-03-05 14:32:13] (step=0068187) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.341224809234983, LR: 0.0003 +[2026-03-05 14:32:21] (step=0068188) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.341420465662297, LR: 0.0003 +[2026-03-05 14:32:29] (step=0068189) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.341616122089611, LR: 0.0003 +[2026-03-05 14:32:37] (step=0068190) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.341811778516924, LR: 0.0003 +[2026-03-05 14:32:45] (step=0068191) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.342007434944238, LR: 0.0003 +[2026-03-05 14:32:52] (step=0068192) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.342203091371552, LR: 0.0003 +[2026-03-05 14:33:00] (step=0068193) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.342398747798866, LR: 0.0003 +[2026-03-05 14:33:08] (step=0068194) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.34259440422618, LR: 0.0003 +[2026-03-05 14:33:16] (step=0068195) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.342790060653492, LR: 0.0003 +[2026-03-05 14:33:24] (step=0068196) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 13.342985717080806, LR: 0.0003 +[2026-03-05 14:33:32] (step=0068197) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.34318137350812, LR: 0.0003 +[2026-03-05 14:33:40] (step=0068198) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.343377029935434, LR: 0.0003 +[2026-03-05 14:33:47] (step=0068199) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.343572686362746, LR: 0.0003 +[2026-03-05 14:33:55] (step=0068200) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.34376834279006, LR: 0.0003 +[2026-03-05 14:34:03] (step=0068201) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 13.343963999217374, LR: 0.0003 +[2026-03-05 14:34:11] (step=0068202) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.344159655644688, LR: 0.0003 +[2026-03-05 14:34:19] (step=0068203) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.344355312072002, LR: 0.0003 +[2026-03-05 14:34:27] (step=0068204) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.344550968499314, LR: 0.0003 +[2026-03-05 14:34:35] (step=0068205) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.344746624926628, LR: 0.0003 +[2026-03-05 14:34:42] (step=0068206) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.344942281353942, LR: 0.0003 +[2026-03-05 14:34:50] (step=0068207) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.345137937781256, LR: 0.0003 +[2026-03-05 14:34:58] (step=0068208) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 13.34533359420857, LR: 0.0003 +[2026-03-05 14:35:06] (step=0068209) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 13.345529250635883, LR: 0.0003 +[2026-03-05 14:35:14] (step=0068210) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 13.345724907063197, LR: 0.0003 +[2026-03-05 14:35:22] (step=0068211) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.34592056349051, LR: 0.0003 +[2026-03-05 14:35:30] (step=0068212) Train Loss: 0.4250, Train Steps/Sec: 0.13, Epoch: 13.346116219917825, LR: 0.0003 +[2026-03-05 14:35:38] (step=0068213) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.346311876345139, LR: 0.0003 +[2026-03-05 14:35:45] (step=0068214) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.346507532772451, LR: 0.0003 +[2026-03-05 14:35:53] (step=0068215) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 13.346703189199765, LR: 0.0003 +[2026-03-05 14:36:01] (step=0068216) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 13.346898845627079, LR: 0.0003 +[2026-03-05 14:36:09] (step=0068217) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.347094502054393, LR: 0.0003 +[2026-03-05 14:36:17] (step=0068218) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.347290158481707, LR: 0.0003 +[2026-03-05 14:36:25] (step=0068219) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.34748581490902, LR: 0.0003 +[2026-03-05 14:36:33] (step=0068220) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 13.347681471336333, LR: 0.0003 +[2026-03-05 14:36:40] (step=0068221) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.347877127763647, LR: 0.0003 +[2026-03-05 14:36:48] (step=0068222) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.348072784190961, LR: 0.0003 +[2026-03-05 14:36:56] (step=0068223) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.348268440618273, LR: 0.0003 +[2026-03-05 14:37:04] (step=0068224) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.348464097045587, LR: 0.0003 +[2026-03-05 14:37:12] (step=0068225) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.348659753472901, LR: 0.0003 +[2026-03-05 14:37:20] (step=0068226) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.348855409900215, LR: 0.0003 +[2026-03-05 14:37:28] (step=0068227) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.34905106632753, LR: 0.0003 +[2026-03-05 14:37:36] (step=0068228) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.349246722754842, LR: 0.0003 +[2026-03-05 14:37:43] (step=0068229) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.349442379182156, LR: 0.0003 +[2026-03-05 14:37:51] (step=0068230) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 13.34963803560947, LR: 0.0003 +[2026-03-05 14:37:59] (step=0068231) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.349833692036784, LR: 0.0003 +[2026-03-05 14:38:07] (step=0068232) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.350029348464098, LR: 0.0003 +[2026-03-05 14:38:15] (step=0068233) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 13.35022500489141, LR: 0.0003 +[2026-03-05 14:38:23] (step=0068234) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.350420661318724, LR: 0.0003 +[2026-03-05 14:38:31] (step=0068235) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.350616317746038, LR: 0.0003 +[2026-03-05 14:38:39] (step=0068236) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.350811974173352, LR: 0.0003 +[2026-03-05 14:38:46] (step=0068237) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.351007630600666, LR: 0.0003 +[2026-03-05 14:38:54] (step=0068238) Train Loss: 0.4635, Train Steps/Sec: 0.13, Epoch: 13.351203287027978, LR: 0.0003 +[2026-03-05 14:39:02] (step=0068239) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 13.351398943455292, LR: 0.0003 +[2026-03-05 14:39:10] (step=0068240) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.351594599882606, LR: 0.0003 +[2026-03-05 14:39:18] (step=0068241) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 13.35179025630992, LR: 0.0003 +[2026-03-05 14:39:26] (step=0068242) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.351985912737234, LR: 0.0003 +[2026-03-05 14:39:34] (step=0068243) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.352181569164546, LR: 0.0003 +[2026-03-05 14:39:41] (step=0068244) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.35237722559186, LR: 0.0003 +[2026-03-05 14:39:49] (step=0068245) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.352572882019174, LR: 0.0003 +[2026-03-05 14:39:57] (step=0068246) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.352768538446488, LR: 0.0003 +[2026-03-05 14:40:05] (step=0068247) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.352964194873802, LR: 0.0003 +[2026-03-05 14:40:13] (step=0068248) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.353159851301115, LR: 0.0003 +[2026-03-05 14:40:21] (step=0068249) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 13.353355507728429, LR: 0.0003 +[2026-03-05 14:40:29] (step=0068250) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.353551164155743, LR: 0.0003 +[2026-03-05 14:40:36] (step=0068251) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.353746820583057, LR: 0.0003 +[2026-03-05 14:40:44] (step=0068252) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.353942477010369, LR: 0.0003 +[2026-03-05 14:40:52] (step=0068253) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.354138133437683, LR: 0.0003 +[2026-03-05 14:41:00] (step=0068254) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.354333789864997, LR: 0.0003 +[2026-03-05 14:41:08] (step=0068255) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.354529446292311, LR: 0.0003 +[2026-03-05 14:41:16] (step=0068256) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.354725102719625, LR: 0.0003 +[2026-03-05 14:41:24] (step=0068257) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.354920759146937, LR: 0.0003 +[2026-03-05 14:41:32] (step=0068258) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.355116415574251, LR: 0.0003 +[2026-03-05 14:41:39] (step=0068259) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.355312072001565, LR: 0.0003 +[2026-03-05 14:41:47] (step=0068260) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.35550772842888, LR: 0.0003 +[2026-03-05 14:41:55] (step=0068261) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.355703384856193, LR: 0.0003 +[2026-03-05 14:42:03] (step=0068262) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 13.355899041283505, LR: 0.0003 +[2026-03-05 14:42:11] (step=0068263) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 13.35609469771082, LR: 0.0003 +[2026-03-05 14:42:19] (step=0068264) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.356290354138133, LR: 0.0003 +[2026-03-05 14:42:27] (step=0068265) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.356486010565447, LR: 0.0003 +[2026-03-05 14:42:34] (step=0068266) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.356681666992761, LR: 0.0003 +[2026-03-05 14:42:42] (step=0068267) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 13.356877323420074, LR: 0.0003 +[2026-03-05 14:42:50] (step=0068268) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.357072979847388, LR: 0.0003 +[2026-03-05 14:42:58] (step=0068269) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.357268636274702, LR: 0.0003 +[2026-03-05 14:43:06] (step=0068270) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.357464292702016, LR: 0.0003 +[2026-03-05 14:43:14] (step=0068271) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 13.35765994912933, LR: 0.0003 +[2026-03-05 14:43:22] (step=0068272) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.357855605556642, LR: 0.0003 +[2026-03-05 14:43:29] (step=0068273) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.358051261983956, LR: 0.0003 +[2026-03-05 14:43:37] (step=0068274) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.35824691841127, LR: 0.0003 +[2026-03-05 14:43:45] (step=0068275) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 13.358442574838584, LR: 0.0003 +[2026-03-05 14:43:53] (step=0068276) Train Loss: 0.4235, Train Steps/Sec: 0.13, Epoch: 13.358638231265896, LR: 0.0003 +[2026-03-05 14:44:01] (step=0068277) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 13.35883388769321, LR: 0.0003 +[2026-03-05 14:44:09] (step=0068278) Train Loss: 0.4505, Train Steps/Sec: 0.12, Epoch: 13.359029544120524, LR: 0.0003 +[2026-03-05 14:44:17] (step=0068279) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.359225200547838, LR: 0.0003 +[2026-03-05 14:44:25] (step=0068280) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.359420856975152, LR: 0.0003 +[2026-03-05 14:44:32] (step=0068281) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 13.359616513402464, LR: 0.0003 +[2026-03-05 14:44:40] (step=0068282) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.359812169829778, LR: 0.0003 +[2026-03-05 14:44:48] (step=0068283) Train Loss: 0.4484, Train Steps/Sec: 0.12, Epoch: 13.360007826257092, LR: 0.0003 +[2026-03-05 14:44:56] (step=0068284) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.360203482684406, LR: 0.0003 +[2026-03-05 14:45:04] (step=0068285) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.36039913911172, LR: 0.0003 +[2026-03-05 14:45:12] (step=0068286) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.360594795539033, LR: 0.0003 +[2026-03-05 14:45:20] (step=0068287) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.360790451966347, LR: 0.0003 +[2026-03-05 14:45:28] (step=0068288) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.36098610839366, LR: 0.0003 +[2026-03-05 14:45:35] (step=0068289) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.361181764820975, LR: 0.0003 +[2026-03-05 14:45:43] (step=0068290) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.361377421248289, LR: 0.0003 +[2026-03-05 14:45:51] (step=0068291) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.361573077675601, LR: 0.0003 +[2026-03-05 14:45:59] (step=0068292) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 13.361768734102915, LR: 0.0003 +[2026-03-05 14:46:07] (step=0068293) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.361964390530229, LR: 0.0003 +[2026-03-05 14:46:15] (step=0068294) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.362160046957543, LR: 0.0003 +[2026-03-05 14:46:23] (step=0068295) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.362355703384857, LR: 0.0003 +[2026-03-05 14:46:30] (step=0068296) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 13.36255135981217, LR: 0.0003 +[2026-03-05 14:46:38] (step=0068297) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 13.362747016239483, LR: 0.0003 +[2026-03-05 14:46:46] (step=0068298) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.362942672666797, LR: 0.0003 +[2026-03-05 14:46:54] (step=0068299) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.363138329094111, LR: 0.0003 +[2026-03-05 14:47:02] (step=0068300) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.363333985521425, LR: 0.0003 +[2026-03-05 14:47:10] (step=0068301) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.363529641948737, LR: 0.0003 +[2026-03-05 14:47:18] (step=0068302) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.363725298376051, LR: 0.0003 +[2026-03-05 14:47:25] (step=0068303) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.363920954803366, LR: 0.0003 +[2026-03-05 14:47:33] (step=0068304) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.36411661123068, LR: 0.0003 +[2026-03-05 14:47:41] (step=0068305) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.364312267657992, LR: 0.0003 +[2026-03-05 14:47:49] (step=0068306) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.364507924085306, LR: 0.0003 +[2026-03-05 14:47:57] (step=0068307) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.36470358051262, LR: 0.0003 +[2026-03-05 14:48:05] (step=0068308) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.364899236939934, LR: 0.0003 +[2026-03-05 14:48:13] (step=0068309) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 13.365094893367248, LR: 0.0003 +[2026-03-05 14:48:20] (step=0068310) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 13.36529054979456, LR: 0.0003 +[2026-03-05 14:48:28] (step=0068311) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 13.365486206221874, LR: 0.0003 +[2026-03-05 14:48:36] (step=0068312) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.365681862649188, LR: 0.0003 +[2026-03-05 14:48:44] (step=0068313) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 13.365877519076502, LR: 0.0003 +[2026-03-05 14:48:52] (step=0068314) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.366073175503816, LR: 0.0003 +[2026-03-05 14:49:00] (step=0068315) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.366268831931128, LR: 0.0003 +[2026-03-05 14:49:08] (step=0068316) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 13.366464488358442, LR: 0.0003 +[2026-03-05 14:49:15] (step=0068317) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.366660144785756, LR: 0.0003 +[2026-03-05 14:49:23] (step=0068318) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 13.36685580121307, LR: 0.0003 +[2026-03-05 14:49:31] (step=0068319) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.367051457640384, LR: 0.0003 +[2026-03-05 14:49:39] (step=0068320) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.367247114067697, LR: 0.0003 +[2026-03-05 14:49:47] (step=0068321) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.36744277049501, LR: 0.0003 +[2026-03-05 14:49:55] (step=0068322) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.367638426922325, LR: 0.0003 +[2026-03-05 14:50:03] (step=0068323) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.367834083349639, LR: 0.0003 +[2026-03-05 14:50:11] (step=0068324) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.368029739776953, LR: 0.0003 +[2026-03-05 14:50:18] (step=0068325) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.368225396204265, LR: 0.0003 +[2026-03-05 14:50:26] (step=0068326) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 13.368421052631579, LR: 0.0003 +[2026-03-05 14:50:34] (step=0068327) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.368616709058893, LR: 0.0003 +[2026-03-05 14:50:42] (step=0068328) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.368812365486207, LR: 0.0003 +[2026-03-05 14:50:50] (step=0068329) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.369008021913519, LR: 0.0003 +[2026-03-05 14:50:58] (step=0068330) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.369203678340833, LR: 0.0003 +[2026-03-05 14:51:06] (step=0068331) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.369399334768147, LR: 0.0003 +[2026-03-05 14:51:14] (step=0068332) Train Loss: 0.4513, Train Steps/Sec: 0.12, Epoch: 13.369594991195461, LR: 0.0003 +[2026-03-05 14:51:22] (step=0068333) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.369790647622775, LR: 0.0003 +[2026-03-05 14:51:29] (step=0068334) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.369986304050087, LR: 0.0003 +[2026-03-05 14:51:37] (step=0068335) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.370181960477401, LR: 0.0003 +[2026-03-05 14:51:45] (step=0068336) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.370377616904715, LR: 0.0003 +[2026-03-05 14:51:53] (step=0068337) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.37057327333203, LR: 0.0003 +[2026-03-05 14:52:01] (step=0068338) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.370768929759343, LR: 0.0003 +[2026-03-05 14:52:09] (step=0068339) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 13.370964586186656, LR: 0.0003 +[2026-03-05 14:52:17] (step=0068340) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 13.37116024261397, LR: 0.0003 +[2026-03-05 14:52:24] (step=0068341) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.371355899041284, LR: 0.0003 +[2026-03-05 14:52:32] (step=0068342) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.371551555468598, LR: 0.0003 +[2026-03-05 14:52:40] (step=0068343) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.371747211895912, LR: 0.0003 +[2026-03-05 14:52:48] (step=0068344) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 13.371942868323224, LR: 0.0003 +[2026-03-05 14:52:56] (step=0068345) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 13.372138524750538, LR: 0.0003 +[2026-03-05 14:53:04] (step=0068346) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.372334181177852, LR: 0.0003 +[2026-03-05 14:53:12] (step=0068347) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.372529837605166, LR: 0.0003 +[2026-03-05 14:53:19] (step=0068348) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.37272549403248, LR: 0.0003 +[2026-03-05 14:53:27] (step=0068349) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.372921150459792, LR: 0.0003 +[2026-03-05 14:53:35] (step=0068350) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.373116806887106, LR: 0.0003 +[2026-03-05 14:53:43] (step=0068351) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.37331246331442, LR: 0.0003 +[2026-03-05 14:53:51] (step=0068352) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.373508119741734, LR: 0.0003 +[2026-03-05 14:53:59] (step=0068353) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.373703776169048, LR: 0.0003 +[2026-03-05 14:54:07] (step=0068354) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.37389943259636, LR: 0.0003 +[2026-03-05 14:54:14] (step=0068355) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.374095089023674, LR: 0.0003 +[2026-03-05 14:54:22] (step=0068356) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.374290745450988, LR: 0.0003 +[2026-03-05 14:54:30] (step=0068357) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.374486401878302, LR: 0.0003 +[2026-03-05 14:54:38] (step=0068358) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.374682058305615, LR: 0.0003 +[2026-03-05 14:54:46] (step=0068359) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 13.374877714732929, LR: 0.0003 +[2026-03-05 14:54:54] (step=0068360) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.375073371160243, LR: 0.0003 +[2026-03-05 14:55:02] (step=0068361) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.375269027587557, LR: 0.0003 +[2026-03-05 14:55:10] (step=0068362) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.37546468401487, LR: 0.0003 +[2026-03-05 14:55:17] (step=0068363) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.375660340442183, LR: 0.0003 +[2026-03-05 14:55:25] (step=0068364) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 13.375855996869497, LR: 0.0003 +[2026-03-05 14:55:33] (step=0068365) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.37605165329681, LR: 0.0003 +[2026-03-05 14:55:41] (step=0068366) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 13.376247309724125, LR: 0.0003 +[2026-03-05 14:55:49] (step=0068367) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 13.376442966151439, LR: 0.0003 +[2026-03-05 14:55:57] (step=0068368) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.376638622578751, LR: 0.0003 +[2026-03-05 14:56:05] (step=0068369) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.376834279006065, LR: 0.0003 +[2026-03-05 14:56:12] (step=0068370) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.377029935433379, LR: 0.0003 +[2026-03-05 14:56:20] (step=0068371) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.377225591860693, LR: 0.0003 +[2026-03-05 14:56:28] (step=0068372) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.377421248288007, LR: 0.0003 +[2026-03-05 14:56:36] (step=0068373) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.37761690471532, LR: 0.0003 +[2026-03-05 14:56:44] (step=0068374) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.377812561142633, LR: 0.0003 +[2026-03-05 14:56:52] (step=0068375) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.378008217569947, LR: 0.0003 +[2026-03-05 14:57:00] (step=0068376) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.378203873997261, LR: 0.0003 +[2026-03-05 14:57:08] (step=0068377) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.378399530424575, LR: 0.0003 +[2026-03-05 14:57:15] (step=0068378) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.378595186851888, LR: 0.0003 +[2026-03-05 14:57:23] (step=0068379) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.378790843279202, LR: 0.0003 +[2026-03-05 14:57:31] (step=0068380) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 13.378986499706516, LR: 0.0003 +[2026-03-05 14:57:39] (step=0068381) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.37918215613383, LR: 0.0003 +[2026-03-05 14:57:47] (step=0068382) Train Loss: 0.4283, Train Steps/Sec: 0.12, Epoch: 13.379377812561142, LR: 0.0003 +[2026-03-05 14:57:55] (step=0068383) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.379573468988456, LR: 0.0003 +[2026-03-05 14:58:03] (step=0068384) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 13.37976912541577, LR: 0.0003 +[2026-03-05 14:58:11] (step=0068385) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 13.379964781843084, LR: 0.0003 +[2026-03-05 14:58:19] (step=0068386) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 13.380160438270398, LR: 0.0003 +[2026-03-05 14:58:26] (step=0068387) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.38035609469771, LR: 0.0003 +[2026-03-05 14:58:34] (step=0068388) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.380551751125024, LR: 0.0003 +[2026-03-05 14:58:42] (step=0068389) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.380747407552338, LR: 0.0003 +[2026-03-05 14:58:50] (step=0068390) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.380943063979652, LR: 0.0003 +[2026-03-05 14:58:58] (step=0068391) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.381138720406966, LR: 0.0003 +[2026-03-05 14:59:06] (step=0068392) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 13.381334376834278, LR: 0.0003 +[2026-03-05 14:59:14] (step=0068393) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.381530033261592, LR: 0.0003 +[2026-03-05 14:59:21] (step=0068394) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.381725689688906, LR: 0.0003 +[2026-03-05 14:59:29] (step=0068395) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 13.38192134611622, LR: 0.0003 +[2026-03-05 14:59:37] (step=0068396) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.382117002543534, LR: 0.0003 +[2026-03-05 14:59:45] (step=0068397) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.382312658970847, LR: 0.0003 +[2026-03-05 14:59:53] (step=0068398) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.38250831539816, LR: 0.0003 +[2026-03-05 15:00:01] (step=0068399) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 13.382703971825475, LR: 0.0003 +[2026-03-05 15:00:09] (step=0068400) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.382899628252789, LR: 0.0003 +[2026-03-05 15:00:16] (step=0068401) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.383095284680103, LR: 0.0003 +[2026-03-05 15:00:24] (step=0068402) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.383290941107415, LR: 0.0003 +[2026-03-05 15:00:32] (step=0068403) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.383486597534729, LR: 0.0003 +[2026-03-05 15:00:40] (step=0068404) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.383682253962043, LR: 0.0003 +[2026-03-05 15:00:48] (step=0068405) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 13.383877910389357, LR: 0.0003 +[2026-03-05 15:00:56] (step=0068406) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 13.384073566816669, LR: 0.0003 +[2026-03-05 15:01:04] (step=0068407) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.384269223243983, LR: 0.0003 +[2026-03-05 15:01:12] (step=0068408) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.384464879671297, LR: 0.0003 +[2026-03-05 15:01:19] (step=0068409) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.384660536098611, LR: 0.0003 +[2026-03-05 15:01:27] (step=0068410) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.384856192525925, LR: 0.0003 +[2026-03-05 15:01:35] (step=0068411) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 13.385051848953237, LR: 0.0003 +[2026-03-05 15:01:43] (step=0068412) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.385247505380551, LR: 0.0003 +[2026-03-05 15:01:51] (step=0068413) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.385443161807865, LR: 0.0003 +[2026-03-05 15:01:59] (step=0068414) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.38563881823518, LR: 0.0003 +[2026-03-05 15:02:07] (step=0068415) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.385834474662493, LR: 0.0003 +[2026-03-05 15:02:14] (step=0068416) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.386030131089806, LR: 0.0003 +[2026-03-05 15:02:22] (step=0068417) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.38622578751712, LR: 0.0003 +[2026-03-05 15:02:30] (step=0068418) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.386421443944434, LR: 0.0003 +[2026-03-05 15:02:38] (step=0068419) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 13.386617100371748, LR: 0.0003 +[2026-03-05 15:02:46] (step=0068420) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 13.386812756799062, LR: 0.0003 +[2026-03-05 15:02:54] (step=0068421) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.387008413226374, LR: 0.0003 +[2026-03-05 15:03:02] (step=0068422) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.387204069653688, LR: 0.0003 +[2026-03-05 15:03:09] (step=0068423) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.387399726081002, LR: 0.0003 +[2026-03-05 15:03:17] (step=0068424) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.387595382508316, LR: 0.0003 +[2026-03-05 15:03:25] (step=0068425) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.38779103893563, LR: 0.0003 +[2026-03-05 15:03:33] (step=0068426) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 13.387986695362942, LR: 0.0003 +[2026-03-05 15:03:41] (step=0068427) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.388182351790256, LR: 0.0003 +[2026-03-05 15:03:49] (step=0068428) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.38837800821757, LR: 0.0003 +[2026-03-05 15:03:57] (step=0068429) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 13.388573664644884, LR: 0.0003 +[2026-03-05 15:04:05] (step=0068430) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 13.388769321072198, LR: 0.0003 +[2026-03-05 15:04:12] (step=0068431) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 13.38896497749951, LR: 0.0003 +[2026-03-05 15:04:20] (step=0068432) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.389160633926824, LR: 0.0003 +[2026-03-05 15:04:28] (step=0068433) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.389356290354138, LR: 0.0003 +[2026-03-05 15:04:36] (step=0068434) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.389551946781452, LR: 0.0003 +[2026-03-05 15:04:44] (step=0068435) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.389747603208765, LR: 0.0003 +[2026-03-05 15:04:52] (step=0068436) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.389943259636079, LR: 0.0003 +[2026-03-05 15:05:00] (step=0068437) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.390138916063393, LR: 0.0003 +[2026-03-05 15:05:08] (step=0068438) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.390334572490707, LR: 0.0003 +[2026-03-05 15:05:15] (step=0068439) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.39053022891802, LR: 0.0003 +[2026-03-05 15:05:23] (step=0068440) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 13.390725885345333, LR: 0.0003 +[2026-03-05 15:05:31] (step=0068441) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.390921541772647, LR: 0.0003 +[2026-03-05 15:05:39] (step=0068442) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 13.39111719819996, LR: 0.0003 +[2026-03-05 15:05:47] (step=0068443) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.391312854627275, LR: 0.0003 +[2026-03-05 15:05:55] (step=0068444) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.391508511054589, LR: 0.0003 +[2026-03-05 15:06:03] (step=0068445) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 13.391704167481901, LR: 0.0003 +[2026-03-05 15:06:11] (step=0068446) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.391899823909215, LR: 0.0003 +[2026-03-05 15:06:18] (step=0068447) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.392095480336529, LR: 0.0003 +[2026-03-05 15:06:26] (step=0068448) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.392291136763843, LR: 0.0003 +[2026-03-05 15:06:34] (step=0068449) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 13.392486793191157, LR: 0.0003 +[2026-03-05 15:06:42] (step=0068450) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 13.39268244961847, LR: 0.0003 +[2026-03-05 15:06:50] (step=0068451) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 13.392878106045783, LR: 0.0003 +[2026-03-05 15:06:58] (step=0068452) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.393073762473097, LR: 0.0003 +[2026-03-05 15:07:06] (step=0068453) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 13.393269418900411, LR: 0.0003 +[2026-03-05 15:07:13] (step=0068454) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.393465075327725, LR: 0.0003 +[2026-03-05 15:07:21] (step=0068455) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 13.393660731755038, LR: 0.0003 +[2026-03-05 15:07:29] (step=0068456) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.393856388182352, LR: 0.0003 +[2026-03-05 15:07:37] (step=0068457) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 13.394052044609666, LR: 0.0003 +[2026-03-05 15:07:45] (step=0068458) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.39424770103698, LR: 0.0003 +[2026-03-05 15:07:53] (step=0068459) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.394443357464292, LR: 0.0003 +[2026-03-05 15:08:01] (step=0068460) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.394639013891606, LR: 0.0003 +[2026-03-05 15:08:08] (step=0068461) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.39483467031892, LR: 0.0003 +[2026-03-05 15:08:16] (step=0068462) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.395030326746234, LR: 0.0003 +[2026-03-05 15:08:24] (step=0068463) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 13.395225983173548, LR: 0.0003 +[2026-03-05 15:08:32] (step=0068464) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.39542163960086, LR: 0.0003 +[2026-03-05 15:08:40] (step=0068465) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 13.395617296028174, LR: 0.0003 +[2026-03-05 15:08:48] (step=0068466) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.395812952455488, LR: 0.0003 +[2026-03-05 15:08:56] (step=0068467) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.396008608882802, LR: 0.0003 +[2026-03-05 15:09:03] (step=0068468) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.396204265310116, LR: 0.0003 +[2026-03-05 15:09:11] (step=0068469) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 13.396399921737428, LR: 0.0003 +[2026-03-05 15:09:19] (step=0068470) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 13.396595578164742, LR: 0.0003 +[2026-03-05 15:09:27] (step=0068471) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 13.396791234592056, LR: 0.0003 +[2026-03-05 15:09:35] (step=0068472) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.39698689101937, LR: 0.0003 +[2026-03-05 15:09:43] (step=0068473) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 13.397182547446684, LR: 0.0003 +[2026-03-05 15:09:51] (step=0068474) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 13.397378203873997, LR: 0.0003 +[2026-03-05 15:09:59] (step=0068475) Train Loss: 0.4373, Train Steps/Sec: 0.12, Epoch: 13.39757386030131, LR: 0.0003 +[2026-03-05 15:10:06] (step=0068476) Train Loss: 0.4242, Train Steps/Sec: 0.13, Epoch: 13.397769516728625, LR: 0.0003 +[2026-03-05 15:10:14] (step=0068477) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.397965173155939, LR: 0.0003 +[2026-03-05 15:10:22] (step=0068478) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.398160829583253, LR: 0.0003 +[2026-03-05 15:10:30] (step=0068479) Train Loss: 0.4346, Train Steps/Sec: 0.12, Epoch: 13.398356486010565, LR: 0.0003 +[2026-03-05 15:10:38] (step=0068480) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.398552142437879, LR: 0.0003 +[2026-03-05 15:10:46] (step=0068481) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.398747798865193, LR: 0.0003 +[2026-03-05 15:10:54] (step=0068482) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.398943455292507, LR: 0.0003 +[2026-03-05 15:11:02] (step=0068483) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.39913911171982, LR: 0.0003 +[2026-03-05 15:11:09] (step=0068484) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.399334768147133, LR: 0.0003 +[2026-03-05 15:11:17] (step=0068485) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.399530424574447, LR: 0.0003 +[2026-03-05 15:11:25] (step=0068486) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.399726081001761, LR: 0.0003 +[2026-03-05 15:11:33] (step=0068487) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.399921737429075, LR: 0.0003 +[2026-03-05 15:11:41] (step=0068488) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.400117393856387, LR: 0.0003 +[2026-03-05 15:11:49] (step=0068489) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.400313050283701, LR: 0.0003 +[2026-03-05 15:11:57] (step=0068490) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.400508706711015, LR: 0.0003 +[2026-03-05 15:12:04] (step=0068491) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.40070436313833, LR: 0.0003 +[2026-03-05 15:12:12] (step=0068492) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.400900019565643, LR: 0.0003 +[2026-03-05 15:12:20] (step=0068493) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 13.401095675992956, LR: 0.0003 +[2026-03-05 15:12:28] (step=0068494) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.40129133242027, LR: 0.0003 +[2026-03-05 15:12:36] (step=0068495) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.401486988847584, LR: 0.0003 +[2026-03-05 15:12:44] (step=0068496) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 13.401682645274898, LR: 0.0003 +[2026-03-05 15:12:52] (step=0068497) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.401878301702212, LR: 0.0003 +[2026-03-05 15:12:59] (step=0068498) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.402073958129524, LR: 0.0003 +[2026-03-05 15:13:07] (step=0068499) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.402269614556838, LR: 0.0003 +[2026-03-05 15:13:15] (step=0068500) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 13.402465270984152, LR: 0.0003 +[2026-03-05 15:13:15] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0068500/ +[2026-03-05 15:13:23] (step=0068501) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.402660927411466, LR: 0.0003 +[2026-03-05 15:13:31] (step=0068502) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 13.40285658383878, LR: 0.0003 +[2026-03-05 15:13:39] (step=0068503) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.403052240266092, LR: 0.0003 +[2026-03-05 15:13:47] (step=0068504) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.403247896693406, LR: 0.0003 +[2026-03-05 15:13:54] (step=0068505) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.40344355312072, LR: 0.0003 +[2026-03-05 15:14:02] (step=0068506) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.403639209548034, LR: 0.0003 +[2026-03-05 15:14:10] (step=0068507) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.403834865975348, LR: 0.0003 +[2026-03-05 15:14:18] (step=0068508) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.40403052240266, LR: 0.0003 +[2026-03-05 15:14:26] (step=0068509) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.404226178829974, LR: 0.0003 +[2026-03-05 15:14:34] (step=0068510) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.404421835257288, LR: 0.0003 +[2026-03-05 15:14:42] (step=0068511) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.404617491684602, LR: 0.0003 +[2026-03-05 15:14:49] (step=0068512) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.404813148111915, LR: 0.0003 +[2026-03-05 15:14:57] (step=0068513) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.405008804539229, LR: 0.0003 +[2026-03-05 15:15:05] (step=0068514) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.405204460966543, LR: 0.0003 +[2026-03-05 15:15:13] (step=0068515) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.405400117393857, LR: 0.0003 +[2026-03-05 15:15:21] (step=0068516) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 13.40559577382117, LR: 0.0003 +[2026-03-05 15:15:29] (step=0068517) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.405791430248483, LR: 0.0003 +[2026-03-05 15:15:37] (step=0068518) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.405987086675797, LR: 0.0003 +[2026-03-05 15:15:44] (step=0068519) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 13.40618274310311, LR: 0.0003 +[2026-03-05 15:15:52] (step=0068520) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 13.406378399530425, LR: 0.0003 +[2026-03-05 15:16:00] (step=0068521) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.406574055957739, LR: 0.0003 +[2026-03-05 15:16:08] (step=0068522) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.406769712385051, LR: 0.0003 +[2026-03-05 15:16:16] (step=0068523) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.406965368812365, LR: 0.0003 +[2026-03-05 15:16:24] (step=0068524) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.40716102523968, LR: 0.0003 +[2026-03-05 15:16:32] (step=0068525) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.407356681666993, LR: 0.0003 +[2026-03-05 15:16:40] (step=0068526) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.407552338094307, LR: 0.0003 +[2026-03-05 15:16:47] (step=0068527) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.40774799452162, LR: 0.0003 +[2026-03-05 15:16:55] (step=0068528) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.407943650948933, LR: 0.0003 +[2026-03-05 15:17:03] (step=0068529) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.408139307376247, LR: 0.0003 +[2026-03-05 15:17:11] (step=0068530) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.408334963803561, LR: 0.0003 +[2026-03-05 15:17:19] (step=0068531) Train Loss: 0.4395, Train Steps/Sec: 0.12, Epoch: 13.408530620230875, LR: 0.0003 +[2026-03-05 15:17:27] (step=0068532) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.408726276658188, LR: 0.0003 +[2026-03-05 15:17:35] (step=0068533) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.408921933085502, LR: 0.0003 +[2026-03-05 15:17:42] (step=0068534) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.409117589512816, LR: 0.0003 +[2026-03-05 15:17:50] (step=0068535) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.40931324594013, LR: 0.0003 +[2026-03-05 15:17:58] (step=0068536) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.409508902367444, LR: 0.0003 +[2026-03-05 15:18:06] (step=0068537) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 13.409704558794756, LR: 0.0003 +[2026-03-05 15:18:14] (step=0068538) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.40990021522207, LR: 0.0003 +[2026-03-05 15:18:22] (step=0068539) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.410095871649384, LR: 0.0003 +[2026-03-05 15:18:30] (step=0068540) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.410291528076698, LR: 0.0003 +[2026-03-05 15:18:37] (step=0068541) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.41048718450401, LR: 0.0003 +[2026-03-05 15:18:45] (step=0068542) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 13.410682840931324, LR: 0.0003 +[2026-03-05 15:18:53] (step=0068543) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 13.410878497358638, LR: 0.0003 +[2026-03-05 15:19:01] (step=0068544) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.411074153785952, LR: 0.0003 +[2026-03-05 15:19:09] (step=0068545) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.411269810213266, LR: 0.0003 +[2026-03-05 15:19:17] (step=0068546) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.411465466640578, LR: 0.0003 +[2026-03-05 15:19:25] (step=0068547) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 13.411661123067892, LR: 0.0003 +[2026-03-05 15:19:32] (step=0068548) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.411856779495206, LR: 0.0003 +[2026-03-05 15:19:40] (step=0068549) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.41205243592252, LR: 0.0003 +[2026-03-05 15:19:48] (step=0068550) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.412248092349834, LR: 0.0003 +[2026-03-05 15:19:56] (step=0068551) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.412443748777147, LR: 0.0003 +[2026-03-05 15:20:04] (step=0068552) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.41263940520446, LR: 0.0003 +[2026-03-05 15:20:12] (step=0068553) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.412835061631775, LR: 0.0003 +[2026-03-05 15:20:20] (step=0068554) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.413030718059089, LR: 0.0003 +[2026-03-05 15:20:27] (step=0068555) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.413226374486403, LR: 0.0003 +[2026-03-05 15:20:35] (step=0068556) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.413422030913715, LR: 0.0003 +[2026-03-05 15:20:43] (step=0068557) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.413617687341029, LR: 0.0003 +[2026-03-05 15:20:51] (step=0068558) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.413813343768343, LR: 0.0003 +[2026-03-05 15:20:59] (step=0068559) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.414009000195657, LR: 0.0003 +[2026-03-05 15:21:07] (step=0068560) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.414204656622971, LR: 0.0003 +[2026-03-05 15:21:15] (step=0068561) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 13.414400313050283, LR: 0.0003 +[2026-03-05 15:21:22] (step=0068562) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.414595969477597, LR: 0.0003 +[2026-03-05 15:21:30] (step=0068563) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.414791625904911, LR: 0.0003 +[2026-03-05 15:21:38] (step=0068564) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 13.414987282332225, LR: 0.0003 +[2026-03-05 15:21:46] (step=0068565) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 13.415182938759537, LR: 0.0003 +[2026-03-05 15:21:54] (step=0068566) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 13.415378595186851, LR: 0.0003 +[2026-03-05 15:22:02] (step=0068567) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.415574251614165, LR: 0.0003 +[2026-03-05 15:22:10] (step=0068568) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.41576990804148, LR: 0.0003 +[2026-03-05 15:22:17] (step=0068569) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.415965564468793, LR: 0.0003 +[2026-03-05 15:22:25] (step=0068570) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.416161220896106, LR: 0.0003 +[2026-03-05 15:22:33] (step=0068571) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 13.41635687732342, LR: 0.0003 +[2026-03-05 15:22:41] (step=0068572) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.416552533750734, LR: 0.0003 +[2026-03-05 15:22:49] (step=0068573) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.416748190178048, LR: 0.0003 +[2026-03-05 15:22:57] (step=0068574) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 13.416943846605362, LR: 0.0003 +[2026-03-05 15:23:05] (step=0068575) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 13.417139503032674, LR: 0.0003 +[2026-03-05 15:23:12] (step=0068576) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.417335159459988, LR: 0.0003 +[2026-03-05 15:23:20] (step=0068577) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.417530815887302, LR: 0.0003 +[2026-03-05 15:23:28] (step=0068578) Train Loss: 0.4524, Train Steps/Sec: 0.12, Epoch: 13.417726472314616, LR: 0.0003 +[2026-03-05 15:23:36] (step=0068579) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.41792212874193, LR: 0.0003 +[2026-03-05 15:23:44] (step=0068580) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.418117785169242, LR: 0.0003 +[2026-03-05 15:23:52] (step=0068581) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.418313441596556, LR: 0.0003 +[2026-03-05 15:24:00] (step=0068582) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 13.41850909802387, LR: 0.0003 +[2026-03-05 15:24:08] (step=0068583) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.418704754451184, LR: 0.0003 +[2026-03-05 15:24:15] (step=0068584) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.418900410878498, LR: 0.0003 +[2026-03-05 15:24:23] (step=0068585) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.41909606730581, LR: 0.0003 +[2026-03-05 15:24:31] (step=0068586) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 13.419291723733124, LR: 0.0003 +[2026-03-05 15:24:39] (step=0068587) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.419487380160438, LR: 0.0003 +[2026-03-05 15:24:47] (step=0068588) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.419683036587752, LR: 0.0003 +[2026-03-05 15:24:55] (step=0068589) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.419878693015066, LR: 0.0003 +[2026-03-05 15:25:03] (step=0068590) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 13.420074349442379, LR: 0.0003 +[2026-03-05 15:25:10] (step=0068591) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.420270005869693, LR: 0.0003 +[2026-03-05 15:25:18] (step=0068592) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.420465662297007, LR: 0.0003 +[2026-03-05 15:25:26] (step=0068593) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 13.42066131872432, LR: 0.0003 +[2026-03-05 15:25:34] (step=0068594) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 13.420856975151633, LR: 0.0003 +[2026-03-05 15:25:42] (step=0068595) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.421052631578947, LR: 0.0003 +[2026-03-05 15:25:50] (step=0068596) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.421248288006261, LR: 0.0003 +[2026-03-05 15:25:58] (step=0068597) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.421443944433575, LR: 0.0003 +[2026-03-05 15:26:05] (step=0068598) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 13.421639600860889, LR: 0.0003 +[2026-03-05 15:26:13] (step=0068599) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 13.421835257288201, LR: 0.0003 +[2026-03-05 15:26:21] (step=0068600) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.422030913715515, LR: 0.0003 +[2026-03-05 15:26:29] (step=0068601) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.42222657014283, LR: 0.0003 +[2026-03-05 15:26:37] (step=0068602) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.422422226570143, LR: 0.0003 +[2026-03-05 15:26:45] (step=0068603) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.422617882997457, LR: 0.0003 +[2026-03-05 15:26:52] (step=0068604) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.42281353942477, LR: 0.0003 +[2026-03-05 15:27:00] (step=0068605) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.423009195852083, LR: 0.0003 +[2026-03-05 15:27:08] (step=0068606) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.423204852279397, LR: 0.0003 +[2026-03-05 15:27:16] (step=0068607) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.423400508706711, LR: 0.0003 +[2026-03-05 15:27:24] (step=0068608) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.423596165134025, LR: 0.0003 +[2026-03-05 15:27:32] (step=0068609) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.423791821561338, LR: 0.0003 +[2026-03-05 15:27:40] (step=0068610) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 13.423987477988652, LR: 0.0003 +[2026-03-05 15:27:47] (step=0068611) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.424183134415966, LR: 0.0003 +[2026-03-05 15:27:55] (step=0068612) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 13.42437879084328, LR: 0.0003 +[2026-03-05 15:28:03] (step=0068613) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.424574447270594, LR: 0.0003 +[2026-03-05 15:28:11] (step=0068614) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.424770103697906, LR: 0.0003 +[2026-03-05 15:28:19] (step=0068615) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.42496576012522, LR: 0.0003 +[2026-03-05 15:28:27] (step=0068616) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.425161416552534, LR: 0.0003 +[2026-03-05 15:28:35] (step=0068617) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.425357072979848, LR: 0.0003 +[2026-03-05 15:28:42] (step=0068618) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.42555272940716, LR: 0.0003 +[2026-03-05 15:28:50] (step=0068619) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.425748385834474, LR: 0.0003 +[2026-03-05 15:28:58] (step=0068620) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.425944042261788, LR: 0.0003 +[2026-03-05 15:29:06] (step=0068621) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.426139698689102, LR: 0.0003 +[2026-03-05 15:29:14] (step=0068622) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.426335355116416, LR: 0.0003 +[2026-03-05 15:29:22] (step=0068623) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.426531011543728, LR: 0.0003 +[2026-03-05 15:29:30] (step=0068624) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.426726667971042, LR: 0.0003 +[2026-03-05 15:29:38] (step=0068625) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 13.426922324398356, LR: 0.0003 +[2026-03-05 15:29:45] (step=0068626) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 13.42711798082567, LR: 0.0003 +[2026-03-05 15:29:53] (step=0068627) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.427313637252984, LR: 0.0003 +[2026-03-05 15:30:01] (step=0068628) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.427509293680297, LR: 0.0003 +[2026-03-05 15:30:09] (step=0068629) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.42770495010761, LR: 0.0003 +[2026-03-05 15:30:17] (step=0068630) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.427900606534925, LR: 0.0003 +[2026-03-05 15:30:25] (step=0068631) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.428096262962239, LR: 0.0003 +[2026-03-05 15:30:33] (step=0068632) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.428291919389553, LR: 0.0003 +[2026-03-05 15:30:40] (step=0068633) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.428487575816865, LR: 0.0003 +[2026-03-05 15:30:48] (step=0068634) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.428683232244179, LR: 0.0003 +[2026-03-05 15:30:56] (step=0068635) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 13.428878888671493, LR: 0.0003 +[2026-03-05 15:31:04] (step=0068636) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.429074545098807, LR: 0.0003 +[2026-03-05 15:31:12] (step=0068637) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.429270201526121, LR: 0.0003 +[2026-03-05 15:31:20] (step=0068638) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.429465857953433, LR: 0.0003 +[2026-03-05 15:31:28] (step=0068639) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.429661514380747, LR: 0.0003 +[2026-03-05 15:31:35] (step=0068640) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.429857170808061, LR: 0.0003 +[2026-03-05 15:31:43] (step=0068641) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 13.430052827235375, LR: 0.0003 +[2026-03-05 15:31:51] (step=0068642) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.43024848366269, LR: 0.0003 +[2026-03-05 15:31:59] (step=0068643) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.430444140090001, LR: 0.0003 +[2026-03-05 15:32:07] (step=0068644) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.430639796517315, LR: 0.0003 +[2026-03-05 15:32:15] (step=0068645) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.43083545294463, LR: 0.0003 +[2026-03-05 15:32:23] (step=0068646) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.431031109371943, LR: 0.0003 +[2026-03-05 15:32:30] (step=0068647) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.431226765799256, LR: 0.0003 +[2026-03-05 15:32:38] (step=0068648) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.43142242222657, LR: 0.0003 +[2026-03-05 15:32:46] (step=0068649) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 13.431618078653884, LR: 0.0003 +[2026-03-05 15:32:54] (step=0068650) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 13.431813735081198, LR: 0.0003 +[2026-03-05 15:33:02] (step=0068651) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.432009391508512, LR: 0.0003 +[2026-03-05 15:33:10] (step=0068652) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.432205047935824, LR: 0.0003 +[2026-03-05 15:33:18] (step=0068653) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.432400704363138, LR: 0.0003 +[2026-03-05 15:33:25] (step=0068654) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 13.432596360790452, LR: 0.0003 +[2026-03-05 15:33:33] (step=0068655) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.432792017217766, LR: 0.0003 +[2026-03-05 15:33:41] (step=0068656) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 13.43298767364508, LR: 0.0003 +[2026-03-05 15:33:49] (step=0068657) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 13.433183330072392, LR: 0.0003 +[2026-03-05 15:33:57] (step=0068658) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 13.433378986499706, LR: 0.0003 +[2026-03-05 15:34:05] (step=0068659) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.43357464292702, LR: 0.0003 +[2026-03-05 15:34:13] (step=0068660) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 13.433770299354334, LR: 0.0003 +[2026-03-05 15:34:20] (step=0068661) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.433965955781648, LR: 0.0003 +[2026-03-05 15:34:28] (step=0068662) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 13.43416161220896, LR: 0.0003 +[2026-03-05 15:34:36] (step=0068663) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.434357268636274, LR: 0.0003 +[2026-03-05 15:34:44] (step=0068664) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.434552925063588, LR: 0.0003 +[2026-03-05 15:34:52] (step=0068665) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.434748581490902, LR: 0.0003 +[2026-03-05 15:35:00] (step=0068666) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.434944237918216, LR: 0.0003 +[2026-03-05 15:35:07] (step=0068667) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.435139894345529, LR: 0.0003 +[2026-03-05 15:35:15] (step=0068668) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.435335550772843, LR: 0.0003 +[2026-03-05 15:35:23] (step=0068669) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 13.435531207200157, LR: 0.0003 +[2026-03-05 15:35:31] (step=0068670) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.43572686362747, LR: 0.0003 +[2026-03-05 15:35:39] (step=0068671) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.435922520054783, LR: 0.0003 +[2026-03-05 15:35:47] (step=0068672) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.436118176482097, LR: 0.0003 +[2026-03-05 15:35:55] (step=0068673) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.436313832909411, LR: 0.0003 +[2026-03-05 15:36:03] (step=0068674) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.436509489336725, LR: 0.0003 +[2026-03-05 15:36:10] (step=0068675) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 13.436705145764039, LR: 0.0003 +[2026-03-05 15:36:18] (step=0068676) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.436900802191351, LR: 0.0003 +[2026-03-05 15:36:26] (step=0068677) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.437096458618665, LR: 0.0003 +[2026-03-05 15:36:34] (step=0068678) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 13.43729211504598, LR: 0.0003 +[2026-03-05 15:36:42] (step=0068679) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.437487771473293, LR: 0.0003 +[2026-03-05 15:36:50] (step=0068680) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.437683427900607, LR: 0.0003 +[2026-03-05 15:36:58] (step=0068681) Train Loss: 0.4490, Train Steps/Sec: 0.12, Epoch: 13.43787908432792, LR: 0.0003 +[2026-03-05 15:37:06] (step=0068682) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.438074740755233, LR: 0.0003 +[2026-03-05 15:37:13] (step=0068683) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.438270397182547, LR: 0.0003 +[2026-03-05 15:37:21] (step=0068684) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.438466053609861, LR: 0.0003 +[2026-03-05 15:37:29] (step=0068685) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.438661710037175, LR: 0.0003 +[2026-03-05 15:37:37] (step=0068686) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.438857366464488, LR: 0.0003 +[2026-03-05 15:37:45] (step=0068687) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.439053022891802, LR: 0.0003 +[2026-03-05 15:37:53] (step=0068688) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.439248679319116, LR: 0.0003 +[2026-03-05 15:38:01] (step=0068689) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.43944433574643, LR: 0.0003 +[2026-03-05 15:38:09] (step=0068690) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.439639992173744, LR: 0.0003 +[2026-03-05 15:38:16] (step=0068691) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 13.439835648601056, LR: 0.0003 +[2026-03-05 15:38:24] (step=0068692) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.44003130502837, LR: 0.0003 +[2026-03-05 15:38:32] (step=0068693) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 13.440226961455684, LR: 0.0003 +[2026-03-05 15:38:40] (step=0068694) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 13.440422617882998, LR: 0.0003 +[2026-03-05 15:38:48] (step=0068695) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.440618274310312, LR: 0.0003 +[2026-03-05 15:38:56] (step=0068696) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.440813930737624, LR: 0.0003 +[2026-03-05 15:39:04] (step=0068697) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.441009587164938, LR: 0.0003 +[2026-03-05 15:39:11] (step=0068698) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.441205243592252, LR: 0.0003 +[2026-03-05 15:39:19] (step=0068699) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.441400900019566, LR: 0.0003 +[2026-03-05 15:39:27] (step=0068700) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 13.441596556446878, LR: 0.0003 +[2026-03-05 15:39:35] (step=0068701) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.441792212874192, LR: 0.0003 +[2026-03-05 15:39:43] (step=0068702) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.441987869301506, LR: 0.0003 +[2026-03-05 15:39:51] (step=0068703) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 13.44218352572882, LR: 0.0003 +[2026-03-05 15:39:59] (step=0068704) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 13.442379182156134, LR: 0.0003 +[2026-03-05 15:40:06] (step=0068705) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.442574838583447, LR: 0.0003 +[2026-03-05 15:40:14] (step=0068706) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.44277049501076, LR: 0.0003 +[2026-03-05 15:40:22] (step=0068707) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.442966151438075, LR: 0.0003 +[2026-03-05 15:40:30] (step=0068708) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 13.443161807865389, LR: 0.0003 +[2026-03-05 15:40:38] (step=0068709) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 13.443357464292703, LR: 0.0003 +[2026-03-05 15:40:46] (step=0068710) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.443553120720015, LR: 0.0003 +[2026-03-05 15:40:53] (step=0068711) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 13.443748777147329, LR: 0.0003 +[2026-03-05 15:41:01] (step=0068712) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 13.443944433574643, LR: 0.0003 +[2026-03-05 15:41:09] (step=0068713) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 13.444140090001957, LR: 0.0003 +[2026-03-05 15:41:17] (step=0068714) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 13.444335746429271, LR: 0.0003 +[2026-03-05 15:41:25] (step=0068715) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.444531402856583, LR: 0.0003 +[2026-03-05 15:41:33] (step=0068716) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.444727059283897, LR: 0.0003 +[2026-03-05 15:41:40] (step=0068717) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.444922715711211, LR: 0.0003 +[2026-03-05 15:41:48] (step=0068718) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.445118372138525, LR: 0.0003 +[2026-03-05 15:41:56] (step=0068719) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.44531402856584, LR: 0.0003 +[2026-03-05 15:42:04] (step=0068720) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.445509684993151, LR: 0.0003 +[2026-03-05 15:42:12] (step=0068721) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.445705341420465, LR: 0.0003 +[2026-03-05 15:42:20] (step=0068722) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 13.44590099784778, LR: 0.0003 +[2026-03-05 15:42:28] (step=0068723) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.446096654275093, LR: 0.0003 +[2026-03-05 15:42:35] (step=0068724) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 13.446292310702406, LR: 0.0003 +[2026-03-05 15:42:43] (step=0068725) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 13.44648796712972, LR: 0.0003 +[2026-03-05 15:42:51] (step=0068726) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.446683623557034, LR: 0.0003 +[2026-03-05 15:42:59] (step=0068727) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 13.446879279984348, LR: 0.0003 +[2026-03-05 15:43:07] (step=0068728) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.447074936411662, LR: 0.0003 +[2026-03-05 15:43:15] (step=0068729) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.447270592838974, LR: 0.0003 +[2026-03-05 15:43:23] (step=0068730) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.447466249266288, LR: 0.0003 +[2026-03-05 15:43:30] (step=0068731) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.447661905693602, LR: 0.0003 +[2026-03-05 15:43:38] (step=0068732) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 13.447857562120916, LR: 0.0003 +[2026-03-05 15:43:46] (step=0068733) Train Loss: 0.4370, Train Steps/Sec: 0.12, Epoch: 13.44805321854823, LR: 0.0003 +[2026-03-05 15:43:54] (step=0068734) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.448248874975542, LR: 0.0003 +[2026-03-05 15:44:02] (step=0068735) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 13.448444531402856, LR: 0.0003 +[2026-03-05 15:44:10] (step=0068736) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.44864018783017, LR: 0.0003 +[2026-03-05 15:44:18] (step=0068737) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.448835844257484, LR: 0.0003 +[2026-03-05 15:44:26] (step=0068738) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 13.449031500684798, LR: 0.0003 +[2026-03-05 15:44:33] (step=0068739) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.44922715711211, LR: 0.0003 +[2026-03-05 15:44:41] (step=0068740) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.449422813539424, LR: 0.0003 +[2026-03-05 15:44:49] (step=0068741) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.449618469966738, LR: 0.0003 +[2026-03-05 15:44:57] (step=0068742) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 13.449814126394052, LR: 0.0003 +[2026-03-05 15:45:05] (step=0068743) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.450009782821366, LR: 0.0003 +[2026-03-05 15:45:13] (step=0068744) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.450205439248679, LR: 0.0003 +[2026-03-05 15:45:21] (step=0068745) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 13.450401095675993, LR: 0.0003 +[2026-03-05 15:45:28] (step=0068746) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.450596752103307, LR: 0.0003 +[2026-03-05 15:45:36] (step=0068747) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.45079240853062, LR: 0.0003 +[2026-03-05 15:45:44] (step=0068748) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 13.450988064957935, LR: 0.0003 +[2026-03-05 15:45:52] (step=0068749) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.451183721385247, LR: 0.0003 +[2026-03-05 15:46:00] (step=0068750) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.451379377812561, LR: 0.0003 +[2026-03-05 15:46:08] (step=0068751) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.451575034239875, LR: 0.0003 +[2026-03-05 15:46:16] (step=0068752) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 13.451770690667189, LR: 0.0003 +[2026-03-05 15:46:23] (step=0068753) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 13.451966347094501, LR: 0.0003 +[2026-03-05 15:46:31] (step=0068754) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.452162003521815, LR: 0.0003 +[2026-03-05 15:46:39] (step=0068755) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.45235765994913, LR: 0.0003 +[2026-03-05 15:46:47] (step=0068756) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.452553316376443, LR: 0.0003 +[2026-03-05 15:46:55] (step=0068757) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.452748972803757, LR: 0.0003 +[2026-03-05 15:47:03] (step=0068758) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 13.45294462923107, LR: 0.0003 +[2026-03-05 15:47:10] (step=0068759) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 13.453140285658383, LR: 0.0003 +[2026-03-05 15:47:18] (step=0068760) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 13.453335942085697, LR: 0.0003 +[2026-03-05 15:47:26] (step=0068761) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.453531598513012, LR: 0.0003 +[2026-03-05 15:47:34] (step=0068762) Train Loss: 0.4242, Train Steps/Sec: 0.13, Epoch: 13.453727254940326, LR: 0.0003 +[2026-03-05 15:47:42] (step=0068763) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 13.453922911367638, LR: 0.0003 +[2026-03-05 15:47:50] (step=0068764) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.454118567794952, LR: 0.0003 +[2026-03-05 15:47:58] (step=0068765) Train Loss: 0.4357, Train Steps/Sec: 0.12, Epoch: 13.454314224222266, LR: 0.0003 +[2026-03-05 15:48:06] (step=0068766) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.45450988064958, LR: 0.0003 +[2026-03-05 15:48:13] (step=0068767) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 13.454705537076894, LR: 0.0003 +[2026-03-05 15:48:21] (step=0068768) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.454901193504206, LR: 0.0003 +[2026-03-05 15:48:29] (step=0068769) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 13.45509684993152, LR: 0.0003 +[2026-03-05 15:48:37] (step=0068770) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.455292506358834, LR: 0.0003 +[2026-03-05 15:48:45] (step=0068771) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.455488162786148, LR: 0.0003 +[2026-03-05 15:48:53] (step=0068772) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.455683819213462, LR: 0.0003 +[2026-03-05 15:49:01] (step=0068773) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.455879475640774, LR: 0.0003 +[2026-03-05 15:49:08] (step=0068774) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.456075132068088, LR: 0.0003 +[2026-03-05 15:49:16] (step=0068775) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.456270788495402, LR: 0.0003 +[2026-03-05 15:49:24] (step=0068776) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 13.456466444922716, LR: 0.0003 +[2026-03-05 15:49:32] (step=0068777) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.456662101350028, LR: 0.0003 +[2026-03-05 15:49:40] (step=0068778) Train Loss: 0.4494, Train Steps/Sec: 0.12, Epoch: 13.456857757777342, LR: 0.0003 +[2026-03-05 15:49:48] (step=0068779) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.457053414204657, LR: 0.0003 +[2026-03-05 15:49:56] (step=0068780) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.45724907063197, LR: 0.0003 +[2026-03-05 15:50:04] (step=0068781) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.457444727059285, LR: 0.0003 +[2026-03-05 15:50:12] (step=0068782) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.457640383486597, LR: 0.0003 +[2026-03-05 15:50:19] (step=0068783) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.45783603991391, LR: 0.0003 +[2026-03-05 15:50:27] (step=0068784) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.458031696341225, LR: 0.0003 +[2026-03-05 15:50:35] (step=0068785) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 13.458227352768539, LR: 0.0003 +[2026-03-05 15:50:43] (step=0068786) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.458423009195853, LR: 0.0003 +[2026-03-05 15:50:51] (step=0068787) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.458618665623165, LR: 0.0003 +[2026-03-05 15:50:59] (step=0068788) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.458814322050479, LR: 0.0003 +[2026-03-05 15:51:07] (step=0068789) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.459009978477793, LR: 0.0003 +[2026-03-05 15:51:14] (step=0068790) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.459205634905107, LR: 0.0003 +[2026-03-05 15:51:22] (step=0068791) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.459401291332421, LR: 0.0003 +[2026-03-05 15:51:30] (step=0068792) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.459596947759733, LR: 0.0003 +[2026-03-05 15:51:38] (step=0068793) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 13.459792604187047, LR: 0.0003 +[2026-03-05 15:51:46] (step=0068794) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.459988260614361, LR: 0.0003 +[2026-03-05 15:51:54] (step=0068795) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.460183917041675, LR: 0.0003 +[2026-03-05 15:52:02] (step=0068796) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 13.46037957346899, LR: 0.0003 +[2026-03-05 15:52:09] (step=0068797) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.460575229896302, LR: 0.0003 +[2026-03-05 15:52:17] (step=0068798) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 13.460770886323616, LR: 0.0003 +[2026-03-05 15:52:25] (step=0068799) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.46096654275093, LR: 0.0003 +[2026-03-05 15:52:33] (step=0068800) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 13.461162199178244, LR: 0.0003 +[2026-03-05 15:52:41] (step=0068801) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.461357855605556, LR: 0.0003 +[2026-03-05 15:52:49] (step=0068802) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.46155351203287, LR: 0.0003 +[2026-03-05 15:52:57] (step=0068803) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 13.461749168460184, LR: 0.0003 +[2026-03-05 15:53:04] (step=0068804) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.461944824887498, LR: 0.0003 +[2026-03-05 15:53:12] (step=0068805) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.462140481314812, LR: 0.0003 +[2026-03-05 15:53:20] (step=0068806) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 13.462336137742124, LR: 0.0003 +[2026-03-05 15:53:28] (step=0068807) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.462531794169438, LR: 0.0003 +[2026-03-05 15:53:36] (step=0068808) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.462727450596752, LR: 0.0003 +[2026-03-05 15:53:44] (step=0068809) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.462923107024066, LR: 0.0003 +[2026-03-05 15:53:52] (step=0068810) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.46311876345138, LR: 0.0003 +[2026-03-05 15:54:00] (step=0068811) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 13.463314419878692, LR: 0.0003 +[2026-03-05 15:54:07] (step=0068812) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 13.463510076306006, LR: 0.0003 +[2026-03-05 15:54:15] (step=0068813) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 13.46370573273332, LR: 0.0003 +[2026-03-05 15:54:23] (step=0068814) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.463901389160634, LR: 0.0003 +[2026-03-05 15:54:31] (step=0068815) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.464097045587948, LR: 0.0003 +[2026-03-05 15:54:39] (step=0068816) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.46429270201526, LR: 0.0003 +[2026-03-05 15:54:47] (step=0068817) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 13.464488358442575, LR: 0.0003 +[2026-03-05 15:54:55] (step=0068818) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.464684014869889, LR: 0.0003 +[2026-03-05 15:55:03] (step=0068819) Train Loss: 0.4281, Train Steps/Sec: 0.12, Epoch: 13.464879671297203, LR: 0.0003 +[2026-03-05 15:55:10] (step=0068820) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 13.465075327724517, LR: 0.0003 +[2026-03-05 15:55:18] (step=0068821) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.465270984151829, LR: 0.0003 +[2026-03-05 15:55:26] (step=0068822) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 13.465466640579143, LR: 0.0003 +[2026-03-05 15:55:34] (step=0068823) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.465662297006457, LR: 0.0003 +[2026-03-05 15:55:42] (step=0068824) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.46585795343377, LR: 0.0003 +[2026-03-05 15:55:50] (step=0068825) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.466053609861085, LR: 0.0003 +[2026-03-05 15:55:58] (step=0068826) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 13.466249266288397, LR: 0.0003 +[2026-03-05 15:56:06] (step=0068827) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.466444922715711, LR: 0.0003 +[2026-03-05 15:56:13] (step=0068828) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.466640579143025, LR: 0.0003 +[2026-03-05 15:56:21] (step=0068829) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.466836235570339, LR: 0.0003 +[2026-03-05 15:56:29] (step=0068830) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.467031891997651, LR: 0.0003 +[2026-03-05 15:56:37] (step=0068831) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.467227548424965, LR: 0.0003 +[2026-03-05 15:56:45] (step=0068832) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.46742320485228, LR: 0.0003 +[2026-03-05 15:56:53] (step=0068833) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 13.467618861279593, LR: 0.0003 +[2026-03-05 15:57:01] (step=0068834) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.467814517706907, LR: 0.0003 +[2026-03-05 15:57:09] (step=0068835) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.46801017413422, LR: 0.0003 +[2026-03-05 15:57:16] (step=0068836) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 13.468205830561534, LR: 0.0003 +[2026-03-05 15:57:24] (step=0068837) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.468401486988848, LR: 0.0003 +[2026-03-05 15:57:32] (step=0068838) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.468597143416162, LR: 0.0003 +[2026-03-05 15:57:40] (step=0068839) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.468792799843476, LR: 0.0003 +[2026-03-05 15:57:48] (step=0068840) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.468988456270788, LR: 0.0003 +[2026-03-05 15:57:56] (step=0068841) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.469184112698102, LR: 0.0003 +[2026-03-05 15:58:04] (step=0068842) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.469379769125416, LR: 0.0003 +[2026-03-05 15:58:11] (step=0068843) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 13.46957542555273, LR: 0.0003 +[2026-03-05 15:58:19] (step=0068844) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.469771081980044, LR: 0.0003 +[2026-03-05 15:58:27] (step=0068845) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.469966738407356, LR: 0.0003 +[2026-03-05 15:58:35] (step=0068846) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.47016239483467, LR: 0.0003 +[2026-03-05 15:58:43] (step=0068847) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.470358051261984, LR: 0.0003 +[2026-03-05 15:58:51] (step=0068848) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.470553707689298, LR: 0.0003 +[2026-03-05 15:58:59] (step=0068849) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.470749364116612, LR: 0.0003 +[2026-03-05 15:59:06] (step=0068850) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.470945020543924, LR: 0.0003 +[2026-03-05 15:59:14] (step=0068851) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.471140676971238, LR: 0.0003 +[2026-03-05 15:59:22] (step=0068852) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.471336333398552, LR: 0.0003 +[2026-03-05 15:59:30] (step=0068853) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 13.471531989825866, LR: 0.0003 +[2026-03-05 15:59:38] (step=0068854) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.471727646253179, LR: 0.0003 +[2026-03-05 15:59:46] (step=0068855) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.471923302680493, LR: 0.0003 +[2026-03-05 15:59:54] (step=0068856) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.472118959107807, LR: 0.0003 +[2026-03-05 16:00:02] (step=0068857) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.47231461553512, LR: 0.0003 +[2026-03-05 16:00:09] (step=0068858) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.472510271962435, LR: 0.0003 +[2026-03-05 16:00:17] (step=0068859) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.472705928389747, LR: 0.0003 +[2026-03-05 16:00:25] (step=0068860) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.47290158481706, LR: 0.0003 +[2026-03-05 16:00:33] (step=0068861) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.473097241244375, LR: 0.0003 +[2026-03-05 16:00:41] (step=0068862) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.473292897671689, LR: 0.0003 +[2026-03-05 16:00:49] (step=0068863) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.473488554099003, LR: 0.0003 +[2026-03-05 16:00:57] (step=0068864) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 13.473684210526315, LR: 0.0003 +[2026-03-05 16:01:04] (step=0068865) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.473879866953629, LR: 0.0003 +[2026-03-05 16:01:12] (step=0068866) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.474075523380943, LR: 0.0003 +[2026-03-05 16:01:20] (step=0068867) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.474271179808257, LR: 0.0003 +[2026-03-05 16:01:28] (step=0068868) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.474466836235571, LR: 0.0003 +[2026-03-05 16:01:36] (step=0068869) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.474662492662883, LR: 0.0003 +[2026-03-05 16:01:44] (step=0068870) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 13.474858149090197, LR: 0.0003 +[2026-03-05 16:01:52] (step=0068871) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.475053805517511, LR: 0.0003 +[2026-03-05 16:02:00] (step=0068872) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.475249461944825, LR: 0.0003 +[2026-03-05 16:02:08] (step=0068873) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.47544511837214, LR: 0.0003 +[2026-03-05 16:02:15] (step=0068874) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 13.475640774799452, LR: 0.0003 +[2026-03-05 16:02:23] (step=0068875) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.475836431226766, LR: 0.0003 +[2026-03-05 16:02:31] (step=0068876) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.47603208765408, LR: 0.0003 +[2026-03-05 16:02:39] (step=0068877) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 13.476227744081394, LR: 0.0003 +[2026-03-05 16:02:47] (step=0068878) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.476423400508708, LR: 0.0003 +[2026-03-05 16:02:55] (step=0068879) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.47661905693602, LR: 0.0003 +[2026-03-05 16:03:03] (step=0068880) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.476814713363334, LR: 0.0003 +[2026-03-05 16:03:11] (step=0068881) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 13.477010369790648, LR: 0.0003 +[2026-03-05 16:03:18] (step=0068882) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.477206026217962, LR: 0.0003 +[2026-03-05 16:03:26] (step=0068883) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.477401682645274, LR: 0.0003 +[2026-03-05 16:03:34] (step=0068884) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.477597339072588, LR: 0.0003 +[2026-03-05 16:03:42] (step=0068885) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.477792995499902, LR: 0.0003 +[2026-03-05 16:03:50] (step=0068886) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.477988651927216, LR: 0.0003 +[2026-03-05 16:03:58] (step=0068887) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.47818430835453, LR: 0.0003 +[2026-03-05 16:04:06] (step=0068888) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 13.478379964781842, LR: 0.0003 +[2026-03-05 16:04:13] (step=0068889) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.478575621209156, LR: 0.0003 +[2026-03-05 16:04:21] (step=0068890) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.47877127763647, LR: 0.0003 +[2026-03-05 16:04:29] (step=0068891) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.478966934063784, LR: 0.0003 +[2026-03-05 16:04:37] (step=0068892) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 13.479162590491098, LR: 0.0003 +[2026-03-05 16:04:45] (step=0068893) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.47935824691841, LR: 0.0003 +[2026-03-05 16:04:53] (step=0068894) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.479553903345725, LR: 0.0003 +[2026-03-05 16:05:01] (step=0068895) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.479749559773039, LR: 0.0003 +[2026-03-05 16:05:09] (step=0068896) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.479945216200353, LR: 0.0003 +[2026-03-05 16:05:16] (step=0068897) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.480140872627667, LR: 0.0003 +[2026-03-05 16:05:24] (step=0068898) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.480336529054979, LR: 0.0003 +[2026-03-05 16:05:32] (step=0068899) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 13.480532185482293, LR: 0.0003 +[2026-03-05 16:05:40] (step=0068900) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.480727841909607, LR: 0.0003 +[2026-03-05 16:05:48] (step=0068901) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.48092349833692, LR: 0.0003 +[2026-03-05 16:05:56] (step=0068902) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 13.481119154764235, LR: 0.0003 +[2026-03-05 16:06:03] (step=0068903) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.481314811191547, LR: 0.0003 +[2026-03-05 16:06:11] (step=0068904) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 13.481510467618861, LR: 0.0003 +[2026-03-05 16:06:19] (step=0068905) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 13.481706124046175, LR: 0.0003 +[2026-03-05 16:06:27] (step=0068906) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 13.481901780473489, LR: 0.0003 +[2026-03-05 16:06:35] (step=0068907) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 13.482097436900801, LR: 0.0003 +[2026-03-05 16:06:43] (step=0068908) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.482293093328115, LR: 0.0003 +[2026-03-05 16:06:51] (step=0068909) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.48248874975543, LR: 0.0003 +[2026-03-05 16:06:58] (step=0068910) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.482684406182743, LR: 0.0003 +[2026-03-05 16:07:06] (step=0068911) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 13.482880062610057, LR: 0.0003 +[2026-03-05 16:07:14] (step=0068912) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.48307571903737, LR: 0.0003 +[2026-03-05 16:07:22] (step=0068913) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.483271375464684, LR: 0.0003 +[2026-03-05 16:07:30] (step=0068914) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.483467031891998, LR: 0.0003 +[2026-03-05 16:07:38] (step=0068915) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 13.483662688319312, LR: 0.0003 +[2026-03-05 16:07:45] (step=0068916) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.483858344746626, LR: 0.0003 +[2026-03-05 16:07:53] (step=0068917) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.484054001173938, LR: 0.0003 +[2026-03-05 16:08:01] (step=0068918) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.484249657601252, LR: 0.0003 +[2026-03-05 16:08:09] (step=0068919) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.484445314028566, LR: 0.0003 +[2026-03-05 16:08:17] (step=0068920) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.48464097045588, LR: 0.0003 +[2026-03-05 16:08:25] (step=0068921) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.484836626883194, LR: 0.0003 +[2026-03-05 16:08:33] (step=0068922) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.485032283310506, LR: 0.0003 +[2026-03-05 16:08:41] (step=0068923) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.48522793973782, LR: 0.0003 +[2026-03-05 16:08:48] (step=0068924) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 13.485423596165134, LR: 0.0003 +[2026-03-05 16:08:56] (step=0068925) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.485619252592448, LR: 0.0003 +[2026-03-05 16:09:04] (step=0068926) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.485814909019762, LR: 0.0003 +[2026-03-05 16:09:12] (step=0068927) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.486010565447074, LR: 0.0003 +[2026-03-05 16:09:20] (step=0068928) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 13.486206221874388, LR: 0.0003 +[2026-03-05 16:09:28] (step=0068929) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 13.486401878301702, LR: 0.0003 +[2026-03-05 16:09:36] (step=0068930) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 13.486597534729016, LR: 0.0003 +[2026-03-05 16:09:43] (step=0068931) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 13.48679319115633, LR: 0.0003 +[2026-03-05 16:09:51] (step=0068932) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.486988847583643, LR: 0.0003 +[2026-03-05 16:09:59] (step=0068933) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.487184504010957, LR: 0.0003 +[2026-03-05 16:10:07] (step=0068934) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.48738016043827, LR: 0.0003 +[2026-03-05 16:10:15] (step=0068935) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.487575816865585, LR: 0.0003 +[2026-03-05 16:10:23] (step=0068936) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 13.487771473292897, LR: 0.0003 +[2026-03-05 16:10:31] (step=0068937) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.48796712972021, LR: 0.0003 +[2026-03-05 16:10:38] (step=0068938) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.488162786147525, LR: 0.0003 +[2026-03-05 16:10:46] (step=0068939) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.488358442574839, LR: 0.0003 +[2026-03-05 16:10:54] (step=0068940) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 13.488554099002153, LR: 0.0003 +[2026-03-05 16:11:02] (step=0068941) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.488749755429465, LR: 0.0003 +[2026-03-05 16:11:10] (step=0068942) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.488945411856779, LR: 0.0003 +[2026-03-05 16:11:18] (step=0068943) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.489141068284093, LR: 0.0003 +[2026-03-05 16:11:26] (step=0068944) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.489336724711407, LR: 0.0003 +[2026-03-05 16:11:33] (step=0068945) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.489532381138721, LR: 0.0003 +[2026-03-05 16:11:41] (step=0068946) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.489728037566033, LR: 0.0003 +[2026-03-05 16:11:49] (step=0068947) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.489923693993347, LR: 0.0003 +[2026-03-05 16:11:57] (step=0068948) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.490119350420661, LR: 0.0003 +[2026-03-05 16:12:05] (step=0068949) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.490315006847975, LR: 0.0003 +[2026-03-05 16:12:13] (step=0068950) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.49051066327529, LR: 0.0003 +[2026-03-05 16:12:21] (step=0068951) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 13.490706319702602, LR: 0.0003 +[2026-03-05 16:12:28] (step=0068952) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.490901976129916, LR: 0.0003 +[2026-03-05 16:12:36] (step=0068953) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.49109763255723, LR: 0.0003 +[2026-03-05 16:12:44] (step=0068954) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.491293288984544, LR: 0.0003 +[2026-03-05 16:12:52] (step=0068955) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.491488945411858, LR: 0.0003 +[2026-03-05 16:13:00] (step=0068956) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.49168460183917, LR: 0.0003 +[2026-03-05 16:13:08] (step=0068957) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 13.491880258266484, LR: 0.0003 +[2026-03-05 16:13:16] (step=0068958) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.492075914693798, LR: 0.0003 +[2026-03-05 16:13:23] (step=0068959) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.492271571121112, LR: 0.0003 +[2026-03-05 16:13:31] (step=0068960) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.492467227548424, LR: 0.0003 +[2026-03-05 16:13:39] (step=0068961) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 13.492662883975738, LR: 0.0003 +[2026-03-05 16:13:47] (step=0068962) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 13.492858540403052, LR: 0.0003 +[2026-03-05 16:13:55] (step=0068963) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.493054196830366, LR: 0.0003 +[2026-03-05 16:14:03] (step=0068964) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.49324985325768, LR: 0.0003 +[2026-03-05 16:14:11] (step=0068965) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.493445509684992, LR: 0.0003 +[2026-03-05 16:14:18] (step=0068966) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.493641166112306, LR: 0.0003 +[2026-03-05 16:14:26] (step=0068967) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.49383682253962, LR: 0.0003 +[2026-03-05 16:14:34] (step=0068968) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.494032478966934, LR: 0.0003 +[2026-03-05 16:14:42] (step=0068969) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.494228135394248, LR: 0.0003 +[2026-03-05 16:14:50] (step=0068970) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.49442379182156, LR: 0.0003 +[2026-03-05 16:14:58] (step=0068971) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.494619448248875, LR: 0.0003 +[2026-03-05 16:15:06] (step=0068972) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.494815104676189, LR: 0.0003 +[2026-03-05 16:15:14] (step=0068973) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.495010761103503, LR: 0.0003 +[2026-03-05 16:15:22] (step=0068974) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 13.495206417530817, LR: 0.0003 +[2026-03-05 16:15:30] (step=0068975) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.495402073958129, LR: 0.0003 +[2026-03-05 16:15:37] (step=0068976) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 13.495597730385443, LR: 0.0003 +[2026-03-05 16:15:45] (step=0068977) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.495793386812757, LR: 0.0003 +[2026-03-05 16:15:53] (step=0068978) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.49598904324007, LR: 0.0003 +[2026-03-05 16:16:01] (step=0068979) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.496184699667385, LR: 0.0003 +[2026-03-05 16:16:09] (step=0068980) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.496380356094697, LR: 0.0003 +[2026-03-05 16:16:17] (step=0068981) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.496576012522011, LR: 0.0003 +[2026-03-05 16:16:25] (step=0068982) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.496771668949325, LR: 0.0003 +[2026-03-05 16:16:32] (step=0068983) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.49696732537664, LR: 0.0003 +[2026-03-05 16:16:40] (step=0068984) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 13.497162981803953, LR: 0.0003 +[2026-03-05 16:16:48] (step=0068985) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 13.497358638231265, LR: 0.0003 +[2026-03-05 16:16:56] (step=0068986) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 13.49755429465858, LR: 0.0003 +[2026-03-05 16:17:04] (step=0068987) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 13.497749951085893, LR: 0.0003 +[2026-03-05 16:17:12] (step=0068988) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.497945607513207, LR: 0.0003 +[2026-03-05 16:17:20] (step=0068989) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.49814126394052, LR: 0.0003 +[2026-03-05 16:17:28] (step=0068990) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.498336920367834, LR: 0.0003 +[2026-03-05 16:17:35] (step=0068991) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.498532576795148, LR: 0.0003 +[2026-03-05 16:17:43] (step=0068992) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.498728233222462, LR: 0.0003 +[2026-03-05 16:17:51] (step=0068993) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 13.498923889649776, LR: 0.0003 +[2026-03-05 16:17:59] (step=0068994) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 13.499119546077088, LR: 0.0003 +[2026-03-05 16:18:07] (step=0068995) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.499315202504402, LR: 0.0003 +[2026-03-05 16:18:15] (step=0068996) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.499510858931716, LR: 0.0003 +[2026-03-05 16:18:23] (step=0068997) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.49970651535903, LR: 0.0003 +[2026-03-05 16:18:30] (step=0068998) Train Loss: 0.4256, Train Steps/Sec: 0.13, Epoch: 13.499902171786344, LR: 0.0003 +[2026-03-05 16:18:38] (step=0068999) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.500097828213656, LR: 0.0003 +[2026-03-05 16:18:46] (step=0069000) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 13.50029348464097, LR: 0.0003 +[2026-03-05 16:18:46] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0069000/ +[2026-03-05 16:18:54] (step=0069001) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.500489141068284, LR: 0.0003 +[2026-03-05 16:19:02] (step=0069002) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.500684797495598, LR: 0.0003 +[2026-03-05 16:19:10] (step=0069003) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.500880453922912, LR: 0.0003 +[2026-03-05 16:19:18] (step=0069004) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.501076110350224, LR: 0.0003 +[2026-03-05 16:19:26] (step=0069005) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 13.501271766777538, LR: 0.0003 +[2026-03-05 16:19:33] (step=0069006) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.501467423204852, LR: 0.0003 +[2026-03-05 16:19:41] (step=0069007) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.501663079632166, LR: 0.0003 +[2026-03-05 16:19:49] (step=0069008) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.50185873605948, LR: 0.0003 +[2026-03-05 16:19:57] (step=0069009) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.502054392486793, LR: 0.0003 +[2026-03-05 16:20:05] (step=0069010) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.502250048914107, LR: 0.0003 +[2026-03-05 16:20:13] (step=0069011) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 13.50244570534142, LR: 0.0003 +[2026-03-05 16:20:21] (step=0069012) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.502641361768735, LR: 0.0003 +[2026-03-05 16:20:28] (step=0069013) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.502837018196047, LR: 0.0003 +[2026-03-05 16:20:36] (step=0069014) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.50303267462336, LR: 0.0003 +[2026-03-05 16:20:44] (step=0069015) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.503228331050675, LR: 0.0003 +[2026-03-05 16:20:52] (step=0069016) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.503423987477989, LR: 0.0003 +[2026-03-05 16:21:00] (step=0069017) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.503619643905303, LR: 0.0003 +[2026-03-05 16:21:08] (step=0069018) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.503815300332615, LR: 0.0003 +[2026-03-05 16:21:16] (step=0069019) Train Loss: 0.4455, Train Steps/Sec: 0.12, Epoch: 13.50401095675993, LR: 0.0003 +[2026-03-05 16:21:24] (step=0069020) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 13.504206613187243, LR: 0.0003 +[2026-03-05 16:21:32] (step=0069021) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.504402269614557, LR: 0.0003 +[2026-03-05 16:21:40] (step=0069022) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 13.504597926041871, LR: 0.0003 +[2026-03-05 16:21:47] (step=0069023) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.504793582469183, LR: 0.0003 +[2026-03-05 16:21:55] (step=0069024) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.504989238896497, LR: 0.0003 +[2026-03-05 16:22:03] (step=0069025) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 13.505184895323811, LR: 0.0003 +[2026-03-05 16:22:11] (step=0069026) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.505380551751125, LR: 0.0003 +[2026-03-05 16:22:19] (step=0069027) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.50557620817844, LR: 0.0003 +[2026-03-05 16:22:27] (step=0069028) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.505771864605752, LR: 0.0003 +[2026-03-05 16:22:35] (step=0069029) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.505967521033066, LR: 0.0003 +[2026-03-05 16:22:42] (step=0069030) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.50616317746038, LR: 0.0003 +[2026-03-05 16:22:50] (step=0069031) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.506358833887694, LR: 0.0003 +[2026-03-05 16:22:58] (step=0069032) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 13.506554490315008, LR: 0.0003 +[2026-03-05 16:23:06] (step=0069033) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.50675014674232, LR: 0.0003 +[2026-03-05 16:23:14] (step=0069034) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.506945803169634, LR: 0.0003 +[2026-03-05 16:23:22] (step=0069035) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.507141459596948, LR: 0.0003 +[2026-03-05 16:23:30] (step=0069036) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.507337116024262, LR: 0.0003 +[2026-03-05 16:23:38] (step=0069037) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.507532772451576, LR: 0.0003 +[2026-03-05 16:23:45] (step=0069038) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.507728428878888, LR: 0.0003 +[2026-03-05 16:23:53] (step=0069039) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.507924085306202, LR: 0.0003 +[2026-03-05 16:24:01] (step=0069040) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.508119741733516, LR: 0.0003 +[2026-03-05 16:24:09] (step=0069041) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.50831539816083, LR: 0.0003 +[2026-03-05 16:24:17] (step=0069042) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.508511054588142, LR: 0.0003 +[2026-03-05 16:24:25] (step=0069043) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.508706711015456, LR: 0.0003 +[2026-03-05 16:24:33] (step=0069044) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.50890236744277, LR: 0.0003 +[2026-03-05 16:24:40] (step=0069045) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.509098023870084, LR: 0.0003 +[2026-03-05 16:24:48] (step=0069046) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.509293680297398, LR: 0.0003 +[2026-03-05 16:24:56] (step=0069047) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 13.50948933672471, LR: 0.0003 +[2026-03-05 16:25:04] (step=0069048) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 13.509684993152025, LR: 0.0003 +[2026-03-05 16:25:12] (step=0069049) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 13.509880649579339, LR: 0.0003 +[2026-03-05 16:25:20] (step=0069050) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 13.510076306006653, LR: 0.0003 +[2026-03-05 16:25:28] (step=0069051) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.510271962433967, LR: 0.0003 +[2026-03-05 16:25:36] (step=0069052) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 13.510467618861279, LR: 0.0003 +[2026-03-05 16:25:43] (step=0069053) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 13.510663275288593, LR: 0.0003 +[2026-03-05 16:25:51] (step=0069054) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 13.510858931715907, LR: 0.0003 +[2026-03-05 16:25:59] (step=0069055) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.511054588143221, LR: 0.0003 +[2026-03-05 16:26:07] (step=0069056) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.511250244570535, LR: 0.0003 +[2026-03-05 16:26:15] (step=0069057) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.511445900997847, LR: 0.0003 +[2026-03-05 16:26:23] (step=0069058) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.511641557425161, LR: 0.0003 +[2026-03-05 16:26:31] (step=0069059) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.511837213852475, LR: 0.0003 +[2026-03-05 16:26:38] (step=0069060) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.51203287027979, LR: 0.0003 +[2026-03-05 16:26:46] (step=0069061) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.512228526707103, LR: 0.0003 +[2026-03-05 16:26:54] (step=0069062) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.512424183134415, LR: 0.0003 +[2026-03-05 16:27:02] (step=0069063) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 13.51261983956173, LR: 0.0003 +[2026-03-05 16:27:10] (step=0069064) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.512815495989043, LR: 0.0003 +[2026-03-05 16:27:18] (step=0069065) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.513011152416357, LR: 0.0003 +[2026-03-05 16:27:26] (step=0069066) Train Loss: 0.4411, Train Steps/Sec: 0.12, Epoch: 13.51320680884367, LR: 0.0003 +[2026-03-05 16:27:34] (step=0069067) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 13.513402465270984, LR: 0.0003 +[2026-03-05 16:27:42] (step=0069068) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.513598121698298, LR: 0.0003 +[2026-03-05 16:27:50] (step=0069069) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.513793778125612, LR: 0.0003 +[2026-03-05 16:27:57] (step=0069070) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.513989434552926, LR: 0.0003 +[2026-03-05 16:28:05] (step=0069071) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.514185090980238, LR: 0.0003 +[2026-03-05 16:28:13] (step=0069072) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.514380747407552, LR: 0.0003 +[2026-03-05 16:28:21] (step=0069073) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 13.514576403834866, LR: 0.0003 +[2026-03-05 16:28:29] (step=0069074) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.51477206026218, LR: 0.0003 +[2026-03-05 16:28:37] (step=0069075) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.514967716689494, LR: 0.0003 +[2026-03-05 16:28:45] (step=0069076) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 13.515163373116806, LR: 0.0003 +[2026-03-05 16:28:52] (step=0069077) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.51535902954412, LR: 0.0003 +[2026-03-05 16:29:00] (step=0069078) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.515554685971434, LR: 0.0003 +[2026-03-05 16:29:08] (step=0069079) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.515750342398748, LR: 0.0003 +[2026-03-05 16:29:16] (step=0069080) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.515945998826062, LR: 0.0003 +[2026-03-05 16:29:24] (step=0069081) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 13.516141655253374, LR: 0.0003 +[2026-03-05 16:29:32] (step=0069082) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.516337311680688, LR: 0.0003 +[2026-03-05 16:29:40] (step=0069083) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 13.516532968108002, LR: 0.0003 +[2026-03-05 16:29:47] (step=0069084) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.516728624535316, LR: 0.0003 +[2026-03-05 16:29:55] (step=0069085) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.51692428096263, LR: 0.0003 +[2026-03-05 16:30:03] (step=0069086) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.517119937389943, LR: 0.0003 +[2026-03-05 16:30:11] (step=0069087) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.517315593817257, LR: 0.0003 +[2026-03-05 16:30:19] (step=0069088) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.51751125024457, LR: 0.0003 +[2026-03-05 16:30:27] (step=0069089) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.517706906671885, LR: 0.0003 +[2026-03-05 16:30:35] (step=0069090) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 13.517902563099199, LR: 0.0003 +[2026-03-05 16:30:42] (step=0069091) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.518098219526511, LR: 0.0003 +[2026-03-05 16:30:50] (step=0069092) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.518293875953825, LR: 0.0003 +[2026-03-05 16:30:58] (step=0069093) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.518489532381139, LR: 0.0003 +[2026-03-05 16:31:06] (step=0069094) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.518685188808453, LR: 0.0003 +[2026-03-05 16:31:14] (step=0069095) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.518880845235765, LR: 0.0003 +[2026-03-05 16:31:22] (step=0069096) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.51907650166308, LR: 0.0003 +[2026-03-05 16:31:30] (step=0069097) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.519272158090393, LR: 0.0003 +[2026-03-05 16:31:38] (step=0069098) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.519467814517707, LR: 0.0003 +[2026-03-05 16:31:45] (step=0069099) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.519663470945021, LR: 0.0003 +[2026-03-05 16:31:53] (step=0069100) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.519859127372333, LR: 0.0003 +[2026-03-05 16:32:01] (step=0069101) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.520054783799647, LR: 0.0003 +[2026-03-05 16:32:09] (step=0069102) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.520250440226961, LR: 0.0003 +[2026-03-05 16:32:17] (step=0069103) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.520446096654275, LR: 0.0003 +[2026-03-05 16:32:25] (step=0069104) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.52064175308159, LR: 0.0003 +[2026-03-05 16:32:33] (step=0069105) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 13.520837409508902, LR: 0.0003 +[2026-03-05 16:32:40] (step=0069106) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.521033065936216, LR: 0.0003 +[2026-03-05 16:32:48] (step=0069107) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 13.52122872236353, LR: 0.0003 +[2026-03-05 16:32:56] (step=0069108) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.521424378790844, LR: 0.0003 +[2026-03-05 16:33:04] (step=0069109) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.521620035218158, LR: 0.0003 +[2026-03-05 16:33:12] (step=0069110) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.52181569164547, LR: 0.0003 +[2026-03-05 16:33:20] (step=0069111) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.522011348072784, LR: 0.0003 +[2026-03-05 16:33:28] (step=0069112) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.522207004500098, LR: 0.0003 +[2026-03-05 16:33:35] (step=0069113) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.522402660927412, LR: 0.0003 +[2026-03-05 16:33:44] (step=0069114) Train Loss: 0.4444, Train Steps/Sec: 0.12, Epoch: 13.522598317354726, LR: 0.0003 +[2026-03-05 16:33:52] (step=0069115) Train Loss: 0.4379, Train Steps/Sec: 0.12, Epoch: 13.522793973782038, LR: 0.0003 +[2026-03-05 16:33:59] (step=0069116) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 13.522989630209352, LR: 0.0003 +[2026-03-05 16:34:07] (step=0069117) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.523185286636666, LR: 0.0003 +[2026-03-05 16:34:15] (step=0069118) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.52338094306398, LR: 0.0003 +[2026-03-05 16:34:23] (step=0069119) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.523576599491292, LR: 0.0003 +[2026-03-05 16:34:31] (step=0069120) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 13.523772255918606, LR: 0.0003 +[2026-03-05 16:34:39] (step=0069121) Train Loss: 0.4701, Train Steps/Sec: 0.13, Epoch: 13.52396791234592, LR: 0.0003 +[2026-03-05 16:34:47] (step=0069122) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 13.524163568773234, LR: 0.0003 +[2026-03-05 16:34:54] (step=0069123) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.524359225200548, LR: 0.0003 +[2026-03-05 16:35:02] (step=0069124) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.52455488162786, LR: 0.0003 +[2026-03-05 16:35:10] (step=0069125) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.524750538055175, LR: 0.0003 +[2026-03-05 16:35:18] (step=0069126) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.524946194482489, LR: 0.0003 +[2026-03-05 16:35:26] (step=0069127) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.525141850909803, LR: 0.0003 +[2026-03-05 16:35:34] (step=0069128) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.525337507337117, LR: 0.0003 +[2026-03-05 16:35:42] (step=0069129) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.525533163764429, LR: 0.0003 +[2026-03-05 16:35:50] (step=0069130) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.525728820191743, LR: 0.0003 +[2026-03-05 16:35:57] (step=0069131) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.525924476619057, LR: 0.0003 +[2026-03-05 16:36:05] (step=0069132) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.526120133046371, LR: 0.0003 +[2026-03-05 16:36:13] (step=0069133) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.526315789473685, LR: 0.0003 +[2026-03-05 16:36:21] (step=0069134) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 13.526511445900997, LR: 0.0003 +[2026-03-05 16:36:29] (step=0069135) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.526707102328311, LR: 0.0003 +[2026-03-05 16:36:37] (step=0069136) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 13.526902758755625, LR: 0.0003 +[2026-03-05 16:36:45] (step=0069137) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 13.52709841518294, LR: 0.0003 +[2026-03-05 16:36:52] (step=0069138) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.527294071610253, LR: 0.0003 +[2026-03-05 16:37:00] (step=0069139) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 13.527489728037565, LR: 0.0003 +[2026-03-05 16:37:08] (step=0069140) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 13.52768538446488, LR: 0.0003 +[2026-03-05 16:37:16] (step=0069141) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.527881040892193, LR: 0.0003 +[2026-03-05 16:37:24] (step=0069142) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 13.528076697319507, LR: 0.0003 +[2026-03-05 16:37:32] (step=0069143) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.528272353746821, LR: 0.0003 +[2026-03-05 16:37:40] (step=0069144) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.528468010174134, LR: 0.0003 +[2026-03-05 16:37:47] (step=0069145) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 13.528663666601448, LR: 0.0003 +[2026-03-05 16:37:55] (step=0069146) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 13.528859323028762, LR: 0.0003 +[2026-03-05 16:38:03] (step=0069147) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.529054979456076, LR: 0.0003 +[2026-03-05 16:38:11] (step=0069148) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.529250635883388, LR: 0.0003 +[2026-03-05 16:38:19] (step=0069149) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.529446292310702, LR: 0.0003 +[2026-03-05 16:38:27] (step=0069150) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.529641948738016, LR: 0.0003 +[2026-03-05 16:38:35] (step=0069151) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.52983760516533, LR: 0.0003 +[2026-03-05 16:38:42] (step=0069152) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.530033261592644, LR: 0.0003 +[2026-03-05 16:38:50] (step=0069153) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.530228918019956, LR: 0.0003 +[2026-03-05 16:38:58] (step=0069154) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.53042457444727, LR: 0.0003 +[2026-03-05 16:39:06] (step=0069155) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.530620230874584, LR: 0.0003 +[2026-03-05 16:39:14] (step=0069156) Train Loss: 0.4268, Train Steps/Sec: 0.13, Epoch: 13.530815887301898, LR: 0.0003 +[2026-03-05 16:39:22] (step=0069157) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.531011543729212, LR: 0.0003 +[2026-03-05 16:39:30] (step=0069158) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.531207200156524, LR: 0.0003 +[2026-03-05 16:39:38] (step=0069159) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.531402856583838, LR: 0.0003 +[2026-03-05 16:39:45] (step=0069160) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.531598513011152, LR: 0.0003 +[2026-03-05 16:39:53] (step=0069161) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.531794169438466, LR: 0.0003 +[2026-03-05 16:40:01] (step=0069162) Train Loss: 0.4357, Train Steps/Sec: 0.12, Epoch: 13.53198982586578, LR: 0.0003 +[2026-03-05 16:40:09] (step=0069163) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 13.532185482293093, LR: 0.0003 +[2026-03-05 16:40:17] (step=0069164) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.532381138720407, LR: 0.0003 +[2026-03-05 16:40:25] (step=0069165) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 13.53257679514772, LR: 0.0003 +[2026-03-05 16:40:33] (step=0069166) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.532772451575035, LR: 0.0003 +[2026-03-05 16:40:41] (step=0069167) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.532968108002349, LR: 0.0003 +[2026-03-05 16:40:49] (step=0069168) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.533163764429661, LR: 0.0003 +[2026-03-05 16:40:56] (step=0069169) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 13.533359420856975, LR: 0.0003 +[2026-03-05 16:41:04] (step=0069170) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 13.533555077284289, LR: 0.0003 +[2026-03-05 16:41:12] (step=0069171) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 13.533750733711603, LR: 0.0003 +[2026-03-05 16:41:20] (step=0069172) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.533946390138915, LR: 0.0003 +[2026-03-05 16:41:28] (step=0069173) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.53414204656623, LR: 0.0003 +[2026-03-05 16:41:36] (step=0069174) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.534337702993543, LR: 0.0003 +[2026-03-05 16:41:44] (step=0069175) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 13.534533359420857, LR: 0.0003 +[2026-03-05 16:41:51] (step=0069176) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.534729015848171, LR: 0.0003 +[2026-03-05 16:41:59] (step=0069177) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.534924672275483, LR: 0.0003 +[2026-03-05 16:42:07] (step=0069178) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.535120328702797, LR: 0.0003 +[2026-03-05 16:42:15] (step=0069179) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 13.535315985130111, LR: 0.0003 +[2026-03-05 16:42:23] (step=0069180) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.535511641557425, LR: 0.0003 +[2026-03-05 16:42:31] (step=0069181) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.53570729798474, LR: 0.0003 +[2026-03-05 16:42:39] (step=0069182) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.535902954412052, LR: 0.0003 +[2026-03-05 16:42:47] (step=0069183) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.536098610839366, LR: 0.0003 +[2026-03-05 16:42:54] (step=0069184) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.53629426726668, LR: 0.0003 +[2026-03-05 16:43:02] (step=0069185) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.536489923693994, LR: 0.0003 +[2026-03-05 16:43:10] (step=0069186) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.536685580121308, LR: 0.0003 +[2026-03-05 16:43:18] (step=0069187) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 13.53688123654862, LR: 0.0003 +[2026-03-05 16:43:26] (step=0069188) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 13.537076892975934, LR: 0.0003 +[2026-03-05 16:43:34] (step=0069189) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.537272549403248, LR: 0.0003 +[2026-03-05 16:43:42] (step=0069190) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.537468205830562, LR: 0.0003 +[2026-03-05 16:43:50] (step=0069191) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.537663862257876, LR: 0.0003 +[2026-03-05 16:43:57] (step=0069192) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 13.537859518685188, LR: 0.0003 +[2026-03-05 16:44:05] (step=0069193) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 13.538055175112502, LR: 0.0003 +[2026-03-05 16:44:13] (step=0069194) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.538250831539816, LR: 0.0003 +[2026-03-05 16:44:21] (step=0069195) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.53844648796713, LR: 0.0003 +[2026-03-05 16:44:29] (step=0069196) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.538642144394444, LR: 0.0003 +[2026-03-05 16:44:37] (step=0069197) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.538837800821756, LR: 0.0003 +[2026-03-05 16:44:45] (step=0069198) Train Loss: 0.4273, Train Steps/Sec: 0.13, Epoch: 13.53903345724907, LR: 0.0003 +[2026-03-05 16:44:52] (step=0069199) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 13.539229113676384, LR: 0.0003 +[2026-03-05 16:45:00] (step=0069200) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 13.539424770103698, LR: 0.0003 +[2026-03-05 16:45:08] (step=0069201) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 13.53962042653101, LR: 0.0003 +[2026-03-05 16:45:16] (step=0069202) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.539816082958325, LR: 0.0003 +[2026-03-05 16:45:24] (step=0069203) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 13.540011739385639, LR: 0.0003 +[2026-03-05 16:45:32] (step=0069204) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.540207395812953, LR: 0.0003 +[2026-03-05 16:45:40] (step=0069205) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.540403052240267, LR: 0.0003 +[2026-03-05 16:45:48] (step=0069206) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.540598708667579, LR: 0.0003 +[2026-03-05 16:45:55] (step=0069207) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.540794365094893, LR: 0.0003 +[2026-03-05 16:46:03] (step=0069208) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 13.540990021522207, LR: 0.0003 +[2026-03-05 16:46:11] (step=0069209) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.541185677949521, LR: 0.0003 +[2026-03-05 16:46:19] (step=0069210) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.541381334376835, LR: 0.0003 +[2026-03-05 16:46:27] (step=0069211) Train Loss: 0.4499, Train Steps/Sec: 0.12, Epoch: 13.541576990804147, LR: 0.0003 +[2026-03-05 16:46:35] (step=0069212) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.541772647231461, LR: 0.0003 +[2026-03-05 16:46:43] (step=0069213) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 13.541968303658775, LR: 0.0003 +[2026-03-05 16:46:51] (step=0069214) Train Loss: 0.4238, Train Steps/Sec: 0.13, Epoch: 13.54216396008609, LR: 0.0003 +[2026-03-05 16:46:59] (step=0069215) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.542359616513403, LR: 0.0003 +[2026-03-05 16:47:07] (step=0069216) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.542555272940715, LR: 0.0003 +[2026-03-05 16:47:14] (step=0069217) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 13.54275092936803, LR: 0.0003 +[2026-03-05 16:47:22] (step=0069218) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.542946585795343, LR: 0.0003 +[2026-03-05 16:47:30] (step=0069219) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.543142242222658, LR: 0.0003 +[2026-03-05 16:47:38] (step=0069220) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.543337898649972, LR: 0.0003 +[2026-03-05 16:47:46] (step=0069221) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.543533555077284, LR: 0.0003 +[2026-03-05 16:47:54] (step=0069222) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 13.543729211504598, LR: 0.0003 +[2026-03-05 16:48:02] (step=0069223) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 13.543924867931912, LR: 0.0003 +[2026-03-05 16:48:09] (step=0069224) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.544120524359226, LR: 0.0003 +[2026-03-05 16:48:17] (step=0069225) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 13.544316180786538, LR: 0.0003 +[2026-03-05 16:48:25] (step=0069226) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 13.544511837213852, LR: 0.0003 +[2026-03-05 16:48:33] (step=0069227) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 13.544707493641166, LR: 0.0003 +[2026-03-05 16:48:41] (step=0069228) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.54490315006848, LR: 0.0003 +[2026-03-05 16:48:49] (step=0069229) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 13.545098806495794, LR: 0.0003 +[2026-03-05 16:48:57] (step=0069230) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.545294462923106, LR: 0.0003 +[2026-03-05 16:49:05] (step=0069231) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.54549011935042, LR: 0.0003 +[2026-03-05 16:49:12] (step=0069232) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.545685775777734, LR: 0.0003 +[2026-03-05 16:49:20] (step=0069233) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.545881432205048, LR: 0.0003 +[2026-03-05 16:49:28] (step=0069234) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 13.546077088632362, LR: 0.0003 +[2026-03-05 16:49:36] (step=0069235) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 13.546272745059674, LR: 0.0003 +[2026-03-05 16:49:44] (step=0069236) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.546468401486988, LR: 0.0003 +[2026-03-05 16:49:52] (step=0069237) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.546664057914303, LR: 0.0003 +[2026-03-05 16:50:00] (step=0069238) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.546859714341617, LR: 0.0003 +[2026-03-05 16:50:07] (step=0069239) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.54705537076893, LR: 0.0003 +[2026-03-05 16:50:15] (step=0069240) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.547251027196243, LR: 0.0003 +[2026-03-05 16:50:23] (step=0069241) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 13.547446683623557, LR: 0.0003 +[2026-03-05 16:50:31] (step=0069242) Train Loss: 0.4600, Train Steps/Sec: 0.13, Epoch: 13.54764234005087, LR: 0.0003 +[2026-03-05 16:50:39] (step=0069243) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.547837996478185, LR: 0.0003 +[2026-03-05 16:50:47] (step=0069244) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.548033652905499, LR: 0.0003 +[2026-03-05 16:50:55] (step=0069245) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.548229309332811, LR: 0.0003 +[2026-03-05 16:51:02] (step=0069246) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.548424965760125, LR: 0.0003 +[2026-03-05 16:51:10] (step=0069247) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.548620622187439, LR: 0.0003 +[2026-03-05 16:51:18] (step=0069248) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.548816278614753, LR: 0.0003 +[2026-03-05 16:51:26] (step=0069249) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.549011935042065, LR: 0.0003 +[2026-03-05 16:51:34] (step=0069250) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 13.54920759146938, LR: 0.0003 +[2026-03-05 16:51:42] (step=0069251) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.549403247896693, LR: 0.0003 +[2026-03-05 16:51:50] (step=0069252) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.549598904324007, LR: 0.0003 +[2026-03-05 16:51:58] (step=0069253) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.549794560751321, LR: 0.0003 +[2026-03-05 16:52:05] (step=0069254) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.549990217178634, LR: 0.0003 +[2026-03-05 16:52:13] (step=0069255) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.550185873605948, LR: 0.0003 +[2026-03-05 16:52:21] (step=0069256) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 13.550381530033262, LR: 0.0003 +[2026-03-05 16:52:29] (step=0069257) Train Loss: 0.4331, Train Steps/Sec: 0.12, Epoch: 13.550577186460576, LR: 0.0003 +[2026-03-05 16:52:37] (step=0069258) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 13.55077284288789, LR: 0.0003 +[2026-03-05 16:52:45] (step=0069259) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.550968499315202, LR: 0.0003 +[2026-03-05 16:52:53] (step=0069260) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.551164155742516, LR: 0.0003 +[2026-03-05 16:53:01] (step=0069261) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.55135981216983, LR: 0.0003 +[2026-03-05 16:53:09] (step=0069262) Train Loss: 0.4640, Train Steps/Sec: 0.12, Epoch: 13.551555468597144, LR: 0.0003 +[2026-03-05 16:53:17] (step=0069263) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.551751125024458, LR: 0.0003 +[2026-03-05 16:53:24] (step=0069264) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 13.55194678145177, LR: 0.0003 +[2026-03-05 16:53:32] (step=0069265) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.552142437879084, LR: 0.0003 +[2026-03-05 16:53:40] (step=0069266) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.552338094306398, LR: 0.0003 +[2026-03-05 16:53:48] (step=0069267) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.552533750733712, LR: 0.0003 +[2026-03-05 16:53:56] (step=0069268) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.552729407161026, LR: 0.0003 +[2026-03-05 16:54:04] (step=0069269) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.552925063588338, LR: 0.0003 +[2026-03-05 16:54:12] (step=0069270) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.553120720015652, LR: 0.0003 +[2026-03-05 16:54:19] (step=0069271) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.553316376442966, LR: 0.0003 +[2026-03-05 16:54:27] (step=0069272) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.55351203287028, LR: 0.0003 +[2026-03-05 16:54:35] (step=0069273) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.553707689297594, LR: 0.0003 +[2026-03-05 16:54:43] (step=0069274) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.553903345724907, LR: 0.0003 +[2026-03-05 16:54:51] (step=0069275) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.55409900215222, LR: 0.0003 +[2026-03-05 16:54:59] (step=0069276) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.554294658579535, LR: 0.0003 +[2026-03-05 16:55:07] (step=0069277) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.554490315006849, LR: 0.0003 +[2026-03-05 16:55:14] (step=0069278) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 13.55468597143416, LR: 0.0003 +[2026-03-05 16:55:22] (step=0069279) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.554881627861475, LR: 0.0003 +[2026-03-05 16:55:30] (step=0069280) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.555077284288789, LR: 0.0003 +[2026-03-05 16:55:38] (step=0069281) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.555272940716103, LR: 0.0003 +[2026-03-05 16:55:46] (step=0069282) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.555468597143417, LR: 0.0003 +[2026-03-05 16:55:54] (step=0069283) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.555664253570729, LR: 0.0003 +[2026-03-05 16:56:02] (step=0069284) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.555859909998043, LR: 0.0003 +[2026-03-05 16:56:10] (step=0069285) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.556055566425357, LR: 0.0003 +[2026-03-05 16:56:17] (step=0069286) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 13.556251222852671, LR: 0.0003 +[2026-03-05 16:56:25] (step=0069287) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.556446879279985, LR: 0.0003 +[2026-03-05 16:56:33] (step=0069288) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.556642535707297, LR: 0.0003 +[2026-03-05 16:56:41] (step=0069289) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.556838192134611, LR: 0.0003 +[2026-03-05 16:56:49] (step=0069290) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.557033848561925, LR: 0.0003 +[2026-03-05 16:56:57] (step=0069291) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 13.55722950498924, LR: 0.0003 +[2026-03-05 16:57:05] (step=0069292) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.557425161416553, LR: 0.0003 +[2026-03-05 16:57:12] (step=0069293) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.557620817843866, LR: 0.0003 +[2026-03-05 16:57:20] (step=0069294) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.55781647427118, LR: 0.0003 +[2026-03-05 16:57:28] (step=0069295) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.558012130698494, LR: 0.0003 +[2026-03-05 16:57:36] (step=0069296) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 13.558207787125808, LR: 0.0003 +[2026-03-05 16:57:44] (step=0069297) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.558403443553122, LR: 0.0003 +[2026-03-05 16:57:52] (step=0069298) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 13.558599099980434, LR: 0.0003 +[2026-03-05 16:58:00] (step=0069299) Train Loss: 0.4252, Train Steps/Sec: 0.13, Epoch: 13.558794756407748, LR: 0.0003 +[2026-03-05 16:58:07] (step=0069300) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 13.558990412835062, LR: 0.0003 +[2026-03-05 16:58:15] (step=0069301) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 13.559186069262376, LR: 0.0003 +[2026-03-05 16:58:23] (step=0069302) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.559381725689688, LR: 0.0003 +[2026-03-05 16:58:31] (step=0069303) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 13.559577382117002, LR: 0.0003 +[2026-03-05 16:58:39] (step=0069304) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.559773038544316, LR: 0.0003 +[2026-03-05 16:58:47] (step=0069305) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.55996869497163, LR: 0.0003 +[2026-03-05 16:58:55] (step=0069306) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 13.560164351398944, LR: 0.0003 +[2026-03-05 16:59:03] (step=0069307) Train Loss: 0.4470, Train Steps/Sec: 0.12, Epoch: 13.560360007826256, LR: 0.0003 +[2026-03-05 16:59:11] (step=0069308) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.56055566425357, LR: 0.0003 +[2026-03-05 16:59:19] (step=0069309) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.560751320680884, LR: 0.0003 +[2026-03-05 16:59:26] (step=0069310) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 13.560946977108198, LR: 0.0003 +[2026-03-05 16:59:34] (step=0069311) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 13.561142633535512, LR: 0.0003 +[2026-03-05 16:59:42] (step=0069312) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.561338289962825, LR: 0.0003 +[2026-03-05 16:59:50] (step=0069313) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.561533946390139, LR: 0.0003 +[2026-03-05 16:59:58] (step=0069314) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.561729602817453, LR: 0.0003 +[2026-03-05 17:00:06] (step=0069315) Train Loss: 0.4644, Train Steps/Sec: 0.13, Epoch: 13.561925259244767, LR: 0.0003 +[2026-03-05 17:00:14] (step=0069316) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.56212091567208, LR: 0.0003 +[2026-03-05 17:00:21] (step=0069317) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.562316572099393, LR: 0.0003 +[2026-03-05 17:00:29] (step=0069318) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 13.562512228526707, LR: 0.0003 +[2026-03-05 17:00:37] (step=0069319) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.56270788495402, LR: 0.0003 +[2026-03-05 17:00:45] (step=0069320) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 13.562903541381335, LR: 0.0003 +[2026-03-05 17:00:53] (step=0069321) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 13.563099197808649, LR: 0.0003 +[2026-03-05 17:01:01] (step=0069322) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.563294854235961, LR: 0.0003 +[2026-03-05 17:01:09] (step=0069323) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 13.563490510663275, LR: 0.0003 +[2026-03-05 17:01:16] (step=0069324) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 13.563686167090589, LR: 0.0003 +[2026-03-05 17:01:24] (step=0069325) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.563881823517903, LR: 0.0003 +[2026-03-05 17:01:32] (step=0069326) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.564077479945217, LR: 0.0003 +[2026-03-05 17:01:40] (step=0069327) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.56427313637253, LR: 0.0003 +[2026-03-05 17:01:48] (step=0069328) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 13.564468792799843, LR: 0.0003 +[2026-03-05 17:01:56] (step=0069329) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.564664449227157, LR: 0.0003 +[2026-03-05 17:02:04] (step=0069330) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.564860105654471, LR: 0.0003 +[2026-03-05 17:02:11] (step=0069331) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 13.565055762081784, LR: 0.0003 +[2026-03-05 17:02:19] (step=0069332) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.565251418509098, LR: 0.0003 +[2026-03-05 17:02:27] (step=0069333) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.565447074936412, LR: 0.0003 +[2026-03-05 17:02:35] (step=0069334) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.565642731363726, LR: 0.0003 +[2026-03-05 17:02:43] (step=0069335) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.56583838779104, LR: 0.0003 +[2026-03-05 17:02:51] (step=0069336) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.566034044218352, LR: 0.0003 +[2026-03-05 17:02:59] (step=0069337) Train Loss: 0.4258, Train Steps/Sec: 0.13, Epoch: 13.566229700645666, LR: 0.0003 +[2026-03-05 17:03:06] (step=0069338) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 13.56642535707298, LR: 0.0003 +[2026-03-05 17:03:14] (step=0069339) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.566621013500294, LR: 0.0003 +[2026-03-05 17:03:22] (step=0069340) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.566816669927608, LR: 0.0003 +[2026-03-05 17:03:30] (step=0069341) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.56701232635492, LR: 0.0003 +[2026-03-05 17:03:38] (step=0069342) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.567207982782234, LR: 0.0003 +[2026-03-05 17:03:46] (step=0069343) Train Loss: 0.4226, Train Steps/Sec: 0.13, Epoch: 13.567403639209548, LR: 0.0003 +[2026-03-05 17:03:54] (step=0069344) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 13.567599295636862, LR: 0.0003 +[2026-03-05 17:04:02] (step=0069345) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.567794952064176, LR: 0.0003 +[2026-03-05 17:04:09] (step=0069346) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.567990608491488, LR: 0.0003 +[2026-03-05 17:04:17] (step=0069347) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.568186264918802, LR: 0.0003 +[2026-03-05 17:04:25] (step=0069348) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.568381921346116, LR: 0.0003 +[2026-03-05 17:04:33] (step=0069349) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.56857757777343, LR: 0.0003 +[2026-03-05 17:04:41] (step=0069350) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.568773234200744, LR: 0.0003 +[2026-03-05 17:04:49] (step=0069351) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.568968890628057, LR: 0.0003 +[2026-03-05 17:04:57] (step=0069352) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.56916454705537, LR: 0.0003 +[2026-03-05 17:05:04] (step=0069353) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.569360203482685, LR: 0.0003 +[2026-03-05 17:05:12] (step=0069354) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.569555859909999, LR: 0.0003 +[2026-03-05 17:05:20] (step=0069355) Train Loss: 0.4493, Train Steps/Sec: 0.12, Epoch: 13.56975151633731, LR: 0.0003 +[2026-03-05 17:05:28] (step=0069356) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 13.569947172764625, LR: 0.0003 +[2026-03-05 17:05:36] (step=0069357) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 13.570142829191939, LR: 0.0003 +[2026-03-05 17:05:44] (step=0069358) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.570338485619253, LR: 0.0003 +[2026-03-05 17:05:52] (step=0069359) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.570534142046567, LR: 0.0003 +[2026-03-05 17:06:00] (step=0069360) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.570729798473879, LR: 0.0003 +[2026-03-05 17:06:08] (step=0069361) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.570925454901193, LR: 0.0003 +[2026-03-05 17:06:16] (step=0069362) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.571121111328507, LR: 0.0003 +[2026-03-05 17:06:23] (step=0069363) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 13.571316767755821, LR: 0.0003 +[2026-03-05 17:06:31] (step=0069364) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.571512424183135, LR: 0.0003 +[2026-03-05 17:06:39] (step=0069365) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 13.571708080610447, LR: 0.0003 +[2026-03-05 17:06:47] (step=0069366) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.571903737037761, LR: 0.0003 +[2026-03-05 17:06:55] (step=0069367) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.572099393465075, LR: 0.0003 +[2026-03-05 17:07:03] (step=0069368) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 13.57229504989239, LR: 0.0003 +[2026-03-05 17:07:11] (step=0069369) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.572490706319703, LR: 0.0003 +[2026-03-05 17:07:19] (step=0069370) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 13.572686362747016, LR: 0.0003 +[2026-03-05 17:07:26] (step=0069371) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.57288201917433, LR: 0.0003 +[2026-03-05 17:07:34] (step=0069372) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.573077675601644, LR: 0.0003 +[2026-03-05 17:07:42] (step=0069373) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 13.573273332028958, LR: 0.0003 +[2026-03-05 17:07:50] (step=0069374) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 13.573468988456272, LR: 0.0003 +[2026-03-05 17:07:58] (step=0069375) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.573664644883584, LR: 0.0003 +[2026-03-05 17:08:06] (step=0069376) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.573860301310898, LR: 0.0003 +[2026-03-05 17:08:14] (step=0069377) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 13.574055957738212, LR: 0.0003 +[2026-03-05 17:08:21] (step=0069378) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.574251614165526, LR: 0.0003 +[2026-03-05 17:08:29] (step=0069379) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 13.57444727059284, LR: 0.0003 +[2026-03-05 17:08:37] (step=0069380) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.574642927020152, LR: 0.0003 +[2026-03-05 17:08:45] (step=0069381) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.574838583447466, LR: 0.0003 +[2026-03-05 17:08:53] (step=0069382) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 13.57503423987478, LR: 0.0003 +[2026-03-05 17:09:01] (step=0069383) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.575229896302094, LR: 0.0003 +[2026-03-05 17:09:09] (step=0069384) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 13.575425552729406, LR: 0.0003 +[2026-03-05 17:09:17] (step=0069385) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.57562120915672, LR: 0.0003 +[2026-03-05 17:09:24] (step=0069386) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.575816865584034, LR: 0.0003 +[2026-03-05 17:09:32] (step=0069387) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.576012522011348, LR: 0.0003 +[2026-03-05 17:09:40] (step=0069388) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.576208178438662, LR: 0.0003 +[2026-03-05 17:09:48] (step=0069389) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.576403834865975, LR: 0.0003 +[2026-03-05 17:09:56] (step=0069390) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.576599491293289, LR: 0.0003 +[2026-03-05 17:10:04] (step=0069391) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.576795147720603, LR: 0.0003 +[2026-03-05 17:10:12] (step=0069392) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.576990804147917, LR: 0.0003 +[2026-03-05 17:10:20] (step=0069393) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.57718646057523, LR: 0.0003 +[2026-03-05 17:10:27] (step=0069394) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.577382117002543, LR: 0.0003 +[2026-03-05 17:10:35] (step=0069395) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.577577773429857, LR: 0.0003 +[2026-03-05 17:10:43] (step=0069396) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.57777342985717, LR: 0.0003 +[2026-03-05 17:10:51] (step=0069397) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 13.577969086284485, LR: 0.0003 +[2026-03-05 17:10:59] (step=0069398) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.578164742711799, LR: 0.0003 +[2026-03-05 17:11:07] (step=0069399) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.578360399139111, LR: 0.0003 +[2026-03-05 17:11:15] (step=0069400) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.578556055566425, LR: 0.0003 +[2026-03-05 17:11:22] (step=0069401) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.578751711993739, LR: 0.0003 +[2026-03-05 17:11:30] (step=0069402) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.578947368421053, LR: 0.0003 +[2026-03-05 17:11:38] (step=0069403) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.579143024848367, LR: 0.0003 +[2026-03-05 17:11:46] (step=0069404) Train Loss: 0.4504, Train Steps/Sec: 0.12, Epoch: 13.57933868127568, LR: 0.0003 +[2026-03-05 17:11:54] (step=0069405) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.579534337702993, LR: 0.0003 +[2026-03-05 17:12:02] (step=0069406) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.579729994130307, LR: 0.0003 +[2026-03-05 17:12:10] (step=0069407) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 13.579925650557621, LR: 0.0003 +[2026-03-05 17:12:18] (step=0069408) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.580121306984934, LR: 0.0003 +[2026-03-05 17:12:26] (step=0069409) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.580316963412248, LR: 0.0003 +[2026-03-05 17:12:34] (step=0069410) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.580512619839562, LR: 0.0003 +[2026-03-05 17:12:41] (step=0069411) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.580708276266876, LR: 0.0003 +[2026-03-05 17:12:49] (step=0069412) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.58090393269419, LR: 0.0003 +[2026-03-05 17:12:57] (step=0069413) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.581099589121502, LR: 0.0003 +[2026-03-05 17:13:05] (step=0069414) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.581295245548816, LR: 0.0003 +[2026-03-05 17:13:13] (step=0069415) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.58149090197613, LR: 0.0003 +[2026-03-05 17:13:21] (step=0069416) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.581686558403444, LR: 0.0003 +[2026-03-05 17:13:29] (step=0069417) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.581882214830758, LR: 0.0003 +[2026-03-05 17:13:36] (step=0069418) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.58207787125807, LR: 0.0003 +[2026-03-05 17:13:44] (step=0069419) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.582273527685384, LR: 0.0003 +[2026-03-05 17:13:52] (step=0069420) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.582469184112698, LR: 0.0003 +[2026-03-05 17:14:00] (step=0069421) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.582664840540012, LR: 0.0003 +[2026-03-05 17:14:08] (step=0069422) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.582860496967326, LR: 0.0003 +[2026-03-05 17:14:16] (step=0069423) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.583056153394638, LR: 0.0003 +[2026-03-05 17:14:24] (step=0069424) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.583251809821952, LR: 0.0003 +[2026-03-05 17:14:32] (step=0069425) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.583447466249266, LR: 0.0003 +[2026-03-05 17:14:39] (step=0069426) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 13.58364312267658, LR: 0.0003 +[2026-03-05 17:14:47] (step=0069427) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.583838779103894, LR: 0.0003 +[2026-03-05 17:14:55] (step=0069428) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.584034435531207, LR: 0.0003 +[2026-03-05 17:15:03] (step=0069429) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.58423009195852, LR: 0.0003 +[2026-03-05 17:15:11] (step=0069430) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.584425748385835, LR: 0.0003 +[2026-03-05 17:15:19] (step=0069431) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 13.584621404813149, LR: 0.0003 +[2026-03-05 17:15:27] (step=0069432) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 13.584817061240463, LR: 0.0003 +[2026-03-05 17:15:34] (step=0069433) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 13.585012717667775, LR: 0.0003 +[2026-03-05 17:15:42] (step=0069434) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 13.585208374095089, LR: 0.0003 +[2026-03-05 17:15:50] (step=0069435) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 13.585404030522403, LR: 0.0003 +[2026-03-05 17:15:58] (step=0069436) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.585599686949717, LR: 0.0003 +[2026-03-05 17:16:06] (step=0069437) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 13.585795343377029, LR: 0.0003 +[2026-03-05 17:16:14] (step=0069438) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 13.585990999804343, LR: 0.0003 +[2026-03-05 17:16:22] (step=0069439) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.586186656231657, LR: 0.0003 +[2026-03-05 17:16:30] (step=0069440) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.586382312658971, LR: 0.0003 +[2026-03-05 17:16:37] (step=0069441) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 13.586577969086285, LR: 0.0003 +[2026-03-05 17:16:45] (step=0069442) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 13.586773625513597, LR: 0.0003 +[2026-03-05 17:16:53] (step=0069443) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 13.586969281940911, LR: 0.0003 +[2026-03-05 17:17:01] (step=0069444) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 13.587164938368225, LR: 0.0003 +[2026-03-05 17:17:09] (step=0069445) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.58736059479554, LR: 0.0003 +[2026-03-05 17:17:17] (step=0069446) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.587556251222853, LR: 0.0003 +[2026-03-05 17:17:25] (step=0069447) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.587751907650166, LR: 0.0003 +[2026-03-05 17:17:32] (step=0069448) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.58794756407748, LR: 0.0003 +[2026-03-05 17:17:40] (step=0069449) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.588143220504794, LR: 0.0003 +[2026-03-05 17:17:48] (step=0069450) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.588338876932108, LR: 0.0003 +[2026-03-05 17:17:56] (step=0069451) Train Loss: 0.4270, Train Steps/Sec: 0.13, Epoch: 13.588534533359422, LR: 0.0003 +[2026-03-05 17:18:04] (step=0069452) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 13.588730189786734, LR: 0.0003 +[2026-03-05 17:18:12] (step=0069453) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.588925846214048, LR: 0.0003 +[2026-03-05 17:18:20] (step=0069454) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 13.589121502641362, LR: 0.0003 +[2026-03-05 17:18:28] (step=0069455) Train Loss: 0.4389, Train Steps/Sec: 0.12, Epoch: 13.589317159068676, LR: 0.0003 +[2026-03-05 17:18:36] (step=0069456) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.58951281549599, LR: 0.0003 +[2026-03-05 17:18:44] (step=0069457) Train Loss: 0.4216, Train Steps/Sec: 0.13, Epoch: 13.589708471923302, LR: 0.0003 +[2026-03-05 17:18:51] (step=0069458) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 13.589904128350616, LR: 0.0003 +[2026-03-05 17:18:59] (step=0069459) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 13.59009978477793, LR: 0.0003 +[2026-03-05 17:19:07] (step=0069460) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.590295441205244, LR: 0.0003 +[2026-03-05 17:19:15] (step=0069461) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.590491097632556, LR: 0.0003 +[2026-03-05 17:19:23] (step=0069462) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.59068675405987, LR: 0.0003 +[2026-03-05 17:19:31] (step=0069463) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 13.590882410487184, LR: 0.0003 +[2026-03-05 17:19:39] (step=0069464) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.591078066914498, LR: 0.0003 +[2026-03-05 17:19:46] (step=0069465) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 13.591273723341812, LR: 0.0003 +[2026-03-05 17:19:54] (step=0069466) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.591469379769125, LR: 0.0003 +[2026-03-05 17:20:02] (step=0069467) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.591665036196439, LR: 0.0003 +[2026-03-05 17:20:10] (step=0069468) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.591860692623753, LR: 0.0003 +[2026-03-05 17:20:18] (step=0069469) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.592056349051067, LR: 0.0003 +[2026-03-05 17:20:26] (step=0069470) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 13.59225200547838, LR: 0.0003 +[2026-03-05 17:20:34] (step=0069471) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.592447661905693, LR: 0.0003 +[2026-03-05 17:20:42] (step=0069472) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 13.592643318333007, LR: 0.0003 +[2026-03-05 17:20:49] (step=0069473) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.59283897476032, LR: 0.0003 +[2026-03-05 17:20:57] (step=0069474) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.593034631187635, LR: 0.0003 +[2026-03-05 17:21:05] (step=0069475) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 13.593230287614949, LR: 0.0003 +[2026-03-05 17:21:13] (step=0069476) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.593425944042261, LR: 0.0003 +[2026-03-05 17:21:21] (step=0069477) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.593621600469575, LR: 0.0003 +[2026-03-05 17:21:29] (step=0069478) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.59381725689689, LR: 0.0003 +[2026-03-05 17:21:37] (step=0069479) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 13.594012913324203, LR: 0.0003 +[2026-03-05 17:21:44] (step=0069480) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.594208569751517, LR: 0.0003 +[2026-03-05 17:21:52] (step=0069481) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.59440422617883, LR: 0.0003 +[2026-03-05 17:22:00] (step=0069482) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 13.594599882606143, LR: 0.0003 +[2026-03-05 17:22:08] (step=0069483) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 13.594795539033457, LR: 0.0003 +[2026-03-05 17:22:16] (step=0069484) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.594991195460771, LR: 0.0003 +[2026-03-05 17:22:24] (step=0069485) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.595186851888085, LR: 0.0003 +[2026-03-05 17:22:32] (step=0069486) Train Loss: 0.4239, Train Steps/Sec: 0.13, Epoch: 13.595382508315398, LR: 0.0003 +[2026-03-05 17:22:39] (step=0069487) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.595578164742712, LR: 0.0003 +[2026-03-05 17:22:47] (step=0069488) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.595773821170026, LR: 0.0003 +[2026-03-05 17:22:55] (step=0069489) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.59596947759734, LR: 0.0003 +[2026-03-05 17:23:03] (step=0069490) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.596165134024652, LR: 0.0003 +[2026-03-05 17:23:11] (step=0069491) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 13.596360790451966, LR: 0.0003 +[2026-03-05 17:23:19] (step=0069492) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 13.59655644687928, LR: 0.0003 +[2026-03-05 17:23:27] (step=0069493) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.596752103306594, LR: 0.0003 +[2026-03-05 17:23:35] (step=0069494) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.596947759733908, LR: 0.0003 +[2026-03-05 17:23:42] (step=0069495) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 13.59714341616122, LR: 0.0003 +[2026-03-05 17:23:50] (step=0069496) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.597339072588534, LR: 0.0003 +[2026-03-05 17:23:58] (step=0069497) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.597534729015848, LR: 0.0003 +[2026-03-05 17:24:06] (step=0069498) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.597730385443162, LR: 0.0003 +[2026-03-05 17:24:14] (step=0069499) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.597926041870476, LR: 0.0003 +[2026-03-05 17:24:22] (step=0069500) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.598121698297788, LR: 0.0003 +[2026-03-05 17:24:22] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0069500/ +[2026-03-05 17:24:30] (step=0069501) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.598317354725102, LR: 0.0003 +[2026-03-05 17:24:37] (step=0069502) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 13.598513011152416, LR: 0.0003 +[2026-03-05 17:24:45] (step=0069503) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 13.59870866757973, LR: 0.0003 +[2026-03-05 17:24:53] (step=0069504) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.598904324007044, LR: 0.0003 +[2026-03-05 17:25:01] (step=0069505) Train Loss: 0.4556, Train Steps/Sec: 0.12, Epoch: 13.599099980434357, LR: 0.0003 +[2026-03-05 17:25:09] (step=0069506) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.59929563686167, LR: 0.0003 +[2026-03-05 17:25:17] (step=0069507) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.599491293288985, LR: 0.0003 +[2026-03-05 17:25:25] (step=0069508) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 13.599686949716299, LR: 0.0003 +[2026-03-05 17:25:33] (step=0069509) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 13.599882606143613, LR: 0.0003 +[2026-03-05 17:25:41] (step=0069510) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.600078262570925, LR: 0.0003 +[2026-03-05 17:25:49] (step=0069511) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.600273918998239, LR: 0.0003 +[2026-03-05 17:25:56] (step=0069512) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.600469575425553, LR: 0.0003 +[2026-03-05 17:26:04] (step=0069513) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.600665231852867, LR: 0.0003 +[2026-03-05 17:26:12] (step=0069514) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.60086088828018, LR: 0.0003 +[2026-03-05 17:26:20] (step=0069515) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 13.601056544707493, LR: 0.0003 +[2026-03-05 17:26:28] (step=0069516) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 13.601252201134807, LR: 0.0003 +[2026-03-05 17:26:36] (step=0069517) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.601447857562121, LR: 0.0003 +[2026-03-05 17:26:44] (step=0069518) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.601643513989435, LR: 0.0003 +[2026-03-05 17:26:52] (step=0069519) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.601839170416747, LR: 0.0003 +[2026-03-05 17:26:59] (step=0069520) Train Loss: 0.4268, Train Steps/Sec: 0.13, Epoch: 13.602034826844061, LR: 0.0003 +[2026-03-05 17:27:07] (step=0069521) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.602230483271375, LR: 0.0003 +[2026-03-05 17:27:15] (step=0069522) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 13.60242613969869, LR: 0.0003 +[2026-03-05 17:27:23] (step=0069523) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 13.602621796126003, LR: 0.0003 +[2026-03-05 17:27:31] (step=0069524) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.602817452553316, LR: 0.0003 +[2026-03-05 17:27:39] (step=0069525) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.60301310898063, LR: 0.0003 +[2026-03-05 17:27:47] (step=0069526) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.603208765407944, LR: 0.0003 +[2026-03-05 17:27:54] (step=0069527) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.603404421835258, LR: 0.0003 +[2026-03-05 17:28:02] (step=0069528) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 13.603600078262572, LR: 0.0003 +[2026-03-05 17:28:10] (step=0069529) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.603795734689884, LR: 0.0003 +[2026-03-05 17:28:18] (step=0069530) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.603991391117198, LR: 0.0003 +[2026-03-05 17:28:26] (step=0069531) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 13.604187047544512, LR: 0.0003 +[2026-03-05 17:28:34] (step=0069532) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.604382703971826, LR: 0.0003 +[2026-03-05 17:28:42] (step=0069533) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.60457836039914, LR: 0.0003 +[2026-03-05 17:28:49] (step=0069534) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.604774016826452, LR: 0.0003 +[2026-03-05 17:28:57] (step=0069535) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.604969673253766, LR: 0.0003 +[2026-03-05 17:29:05] (step=0069536) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.60516532968108, LR: 0.0003 +[2026-03-05 17:29:13] (step=0069537) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.605360986108394, LR: 0.0003 +[2026-03-05 17:29:21] (step=0069538) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.605556642535708, LR: 0.0003 +[2026-03-05 17:29:29] (step=0069539) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 13.60575229896302, LR: 0.0003 +[2026-03-05 17:29:37] (step=0069540) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.605947955390334, LR: 0.0003 +[2026-03-05 17:29:44] (step=0069541) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 13.606143611817648, LR: 0.0003 +[2026-03-05 17:29:52] (step=0069542) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.606339268244962, LR: 0.0003 +[2026-03-05 17:30:00] (step=0069543) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.606534924672275, LR: 0.0003 +[2026-03-05 17:30:08] (step=0069544) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 13.606730581099589, LR: 0.0003 +[2026-03-05 17:30:16] (step=0069545) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 13.606926237526903, LR: 0.0003 +[2026-03-05 17:30:24] (step=0069546) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.607121893954217, LR: 0.0003 +[2026-03-05 17:30:32] (step=0069547) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 13.60731755038153, LR: 0.0003 +[2026-03-05 17:30:39] (step=0069548) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 13.607513206808843, LR: 0.0003 +[2026-03-05 17:30:47] (step=0069549) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.607708863236157, LR: 0.0003 +[2026-03-05 17:30:55] (step=0069550) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.607904519663471, LR: 0.0003 +[2026-03-05 17:31:03] (step=0069551) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.608100176090785, LR: 0.0003 +[2026-03-05 17:31:11] (step=0069552) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.608295832518099, LR: 0.0003 +[2026-03-05 17:31:19] (step=0069553) Train Loss: 0.4450, Train Steps/Sec: 0.12, Epoch: 13.608491488945411, LR: 0.0003 +[2026-03-05 17:31:27] (step=0069554) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.608687145372725, LR: 0.0003 +[2026-03-05 17:31:35] (step=0069555) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.60888280180004, LR: 0.0003 +[2026-03-05 17:31:43] (step=0069556) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 13.609078458227353, LR: 0.0003 +[2026-03-05 17:31:50] (step=0069557) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 13.609274114654667, LR: 0.0003 +[2026-03-05 17:31:58] (step=0069558) Train Loss: 0.4259, Train Steps/Sec: 0.13, Epoch: 13.60946977108198, LR: 0.0003 +[2026-03-05 17:32:06] (step=0069559) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.609665427509293, LR: 0.0003 +[2026-03-05 17:32:14] (step=0069560) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.609861083936607, LR: 0.0003 +[2026-03-05 17:32:22] (step=0069561) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.610056740363921, LR: 0.0003 +[2026-03-05 17:32:30] (step=0069562) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.610252396791235, LR: 0.0003 +[2026-03-05 17:32:38] (step=0069563) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 13.610448053218548, LR: 0.0003 +[2026-03-05 17:32:45] (step=0069564) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 13.610643709645862, LR: 0.0003 +[2026-03-05 17:32:53] (step=0069565) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 13.610839366073176, LR: 0.0003 +[2026-03-05 17:33:01] (step=0069566) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.61103502250049, LR: 0.0003 +[2026-03-05 17:33:09] (step=0069567) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 13.611230678927802, LR: 0.0003 +[2026-03-05 17:33:17] (step=0069568) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.611426335355116, LR: 0.0003 +[2026-03-05 17:33:25] (step=0069569) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.61162199178243, LR: 0.0003 +[2026-03-05 17:33:33] (step=0069570) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.611817648209744, LR: 0.0003 +[2026-03-05 17:33:40] (step=0069571) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 13.612013304637058, LR: 0.0003 +[2026-03-05 17:33:48] (step=0069572) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.61220896106437, LR: 0.0003 +[2026-03-05 17:33:56] (step=0069573) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.612404617491684, LR: 0.0003 +[2026-03-05 17:34:04] (step=0069574) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 13.612600273918998, LR: 0.0003 +[2026-03-05 17:34:12] (step=0069575) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.612795930346312, LR: 0.0003 +[2026-03-05 17:34:20] (step=0069576) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.612991586773626, LR: 0.0003 +[2026-03-05 17:34:28] (step=0069577) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.613187243200938, LR: 0.0003 +[2026-03-05 17:34:35] (step=0069578) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 13.613382899628252, LR: 0.0003 +[2026-03-05 17:34:43] (step=0069579) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.613578556055566, LR: 0.0003 +[2026-03-05 17:34:51] (step=0069580) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 13.61377421248288, LR: 0.0003 +[2026-03-05 17:34:59] (step=0069581) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 13.613969868910194, LR: 0.0003 +[2026-03-05 17:35:07] (step=0069582) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 13.614165525337507, LR: 0.0003 +[2026-03-05 17:35:15] (step=0069583) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.61436118176482, LR: 0.0003 +[2026-03-05 17:35:23] (step=0069584) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.614556838192135, LR: 0.0003 +[2026-03-05 17:35:31] (step=0069585) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.614752494619449, LR: 0.0003 +[2026-03-05 17:35:38] (step=0069586) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.614948151046763, LR: 0.0003 +[2026-03-05 17:35:46] (step=0069587) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 13.615143807474075, LR: 0.0003 +[2026-03-05 17:35:54] (step=0069588) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.615339463901389, LR: 0.0003 +[2026-03-05 17:36:02] (step=0069589) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.615535120328703, LR: 0.0003 +[2026-03-05 17:36:10] (step=0069590) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.615730776756017, LR: 0.0003 +[2026-03-05 17:36:18] (step=0069591) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.615926433183331, LR: 0.0003 +[2026-03-05 17:36:26] (step=0069592) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.616122089610643, LR: 0.0003 +[2026-03-05 17:36:33] (step=0069593) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.616317746037957, LR: 0.0003 +[2026-03-05 17:36:41] (step=0069594) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.616513402465271, LR: 0.0003 +[2026-03-05 17:36:49] (step=0069595) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 13.616709058892585, LR: 0.0003 +[2026-03-05 17:36:57] (step=0069596) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 13.616904715319897, LR: 0.0003 +[2026-03-05 17:37:05] (step=0069597) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 13.617100371747211, LR: 0.0003 +[2026-03-05 17:37:13] (step=0069598) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.617296028174525, LR: 0.0003 +[2026-03-05 17:37:21] (step=0069599) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.61749168460184, LR: 0.0003 +[2026-03-05 17:37:29] (step=0069600) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.617687341029153, LR: 0.0003 +[2026-03-05 17:37:36] (step=0069601) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 13.617882997456466, LR: 0.0003 +[2026-03-05 17:37:44] (step=0069602) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.61807865388378, LR: 0.0003 +[2026-03-05 17:37:52] (step=0069603) Train Loss: 0.4448, Train Steps/Sec: 0.12, Epoch: 13.618274310311094, LR: 0.0003 +[2026-03-05 17:38:00] (step=0069604) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.618469966738408, LR: 0.0003 +[2026-03-05 17:38:08] (step=0069605) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.618665623165722, LR: 0.0003 +[2026-03-05 17:38:16] (step=0069606) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.618861279593034, LR: 0.0003 +[2026-03-05 17:38:24] (step=0069607) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 13.619056936020348, LR: 0.0003 +[2026-03-05 17:38:32] (step=0069608) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.619252592447662, LR: 0.0003 +[2026-03-05 17:38:40] (step=0069609) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.619448248874976, LR: 0.0003 +[2026-03-05 17:38:47] (step=0069610) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.61964390530229, LR: 0.0003 +[2026-03-05 17:38:55] (step=0069611) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.619839561729602, LR: 0.0003 +[2026-03-05 17:39:03] (step=0069612) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.620035218156916, LR: 0.0003 +[2026-03-05 17:39:11] (step=0069613) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.62023087458423, LR: 0.0003 +[2026-03-05 17:39:19] (step=0069614) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.620426531011544, LR: 0.0003 +[2026-03-05 17:39:27] (step=0069615) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.620622187438858, LR: 0.0003 +[2026-03-05 17:39:34] (step=0069616) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.62081784386617, LR: 0.0003 +[2026-03-05 17:39:42] (step=0069617) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.621013500293484, LR: 0.0003 +[2026-03-05 17:39:50] (step=0069618) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.621209156720798, LR: 0.0003 +[2026-03-05 17:39:58] (step=0069619) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.621404813148112, LR: 0.0003 +[2026-03-05 17:40:06] (step=0069620) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 13.621600469575425, LR: 0.0003 +[2026-03-05 17:40:14] (step=0069621) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 13.621796126002739, LR: 0.0003 +[2026-03-05 17:40:22] (step=0069622) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 13.621991782430053, LR: 0.0003 +[2026-03-05 17:40:29] (step=0069623) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 13.622187438857367, LR: 0.0003 +[2026-03-05 17:40:37] (step=0069624) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.62238309528468, LR: 0.0003 +[2026-03-05 17:40:45] (step=0069625) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.622578751711993, LR: 0.0003 +[2026-03-05 17:40:53] (step=0069626) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.622774408139307, LR: 0.0003 +[2026-03-05 17:41:01] (step=0069627) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 13.622970064566621, LR: 0.0003 +[2026-03-05 17:41:09] (step=0069628) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.623165720993935, LR: 0.0003 +[2026-03-05 17:41:16] (step=0069629) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.623361377421249, LR: 0.0003 +[2026-03-05 17:41:24] (step=0069630) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 13.623557033848561, LR: 0.0003 +[2026-03-05 17:41:32] (step=0069631) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.623752690275875, LR: 0.0003 +[2026-03-05 17:41:40] (step=0069632) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.62394834670319, LR: 0.0003 +[2026-03-05 17:41:48] (step=0069633) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.624144003130503, LR: 0.0003 +[2026-03-05 17:41:56] (step=0069634) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 13.624339659557817, LR: 0.0003 +[2026-03-05 17:42:04] (step=0069635) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.62453531598513, LR: 0.0003 +[2026-03-05 17:42:11] (step=0069636) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.624730972412443, LR: 0.0003 +[2026-03-05 17:42:19] (step=0069637) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.624926628839757, LR: 0.0003 +[2026-03-05 17:42:27] (step=0069638) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.625122285267071, LR: 0.0003 +[2026-03-05 17:42:35] (step=0069639) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.625317941694385, LR: 0.0003 +[2026-03-05 17:42:43] (step=0069640) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.625513598121698, LR: 0.0003 +[2026-03-05 17:42:51] (step=0069641) Train Loss: 0.4234, Train Steps/Sec: 0.13, Epoch: 13.625709254549012, LR: 0.0003 +[2026-03-05 17:42:58] (step=0069642) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.625904910976326, LR: 0.0003 +[2026-03-05 17:43:06] (step=0069643) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.62610056740364, LR: 0.0003 +[2026-03-05 17:43:14] (step=0069644) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.626296223830952, LR: 0.0003 +[2026-03-05 17:43:22] (step=0069645) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.626491880258266, LR: 0.0003 +[2026-03-05 17:43:30] (step=0069646) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.62668753668558, LR: 0.0003 +[2026-03-05 17:43:38] (step=0069647) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.626883193112894, LR: 0.0003 +[2026-03-05 17:43:46] (step=0069648) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.627078849540208, LR: 0.0003 +[2026-03-05 17:43:53] (step=0069649) Train Loss: 0.4270, Train Steps/Sec: 0.13, Epoch: 13.62727450596752, LR: 0.0003 +[2026-03-05 17:44:01] (step=0069650) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 13.627470162394834, LR: 0.0003 +[2026-03-05 17:44:09] (step=0069651) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 13.627665818822148, LR: 0.0003 +[2026-03-05 17:44:17] (step=0069652) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.627861475249462, LR: 0.0003 +[2026-03-05 17:44:25] (step=0069653) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.628057131676776, LR: 0.0003 +[2026-03-05 17:44:33] (step=0069654) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.628252788104088, LR: 0.0003 +[2026-03-05 17:44:41] (step=0069655) Train Loss: 0.4502, Train Steps/Sec: 0.12, Epoch: 13.628448444531402, LR: 0.0003 +[2026-03-05 17:44:49] (step=0069656) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.628644100958716, LR: 0.0003 +[2026-03-05 17:44:56] (step=0069657) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 13.62883975738603, LR: 0.0003 +[2026-03-05 17:45:04] (step=0069658) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.629035413813344, LR: 0.0003 +[2026-03-05 17:45:12] (step=0069659) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.629231070240657, LR: 0.0003 +[2026-03-05 17:45:20] (step=0069660) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 13.62942672666797, LR: 0.0003 +[2026-03-05 17:45:28] (step=0069661) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.629622383095285, LR: 0.0003 +[2026-03-05 17:45:36] (step=0069662) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.629818039522599, LR: 0.0003 +[2026-03-05 17:45:43] (step=0069663) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.630013695949913, LR: 0.0003 +[2026-03-05 17:45:51] (step=0069664) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.630209352377225, LR: 0.0003 +[2026-03-05 17:45:59] (step=0069665) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.630405008804539, LR: 0.0003 +[2026-03-05 17:46:07] (step=0069666) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 13.630600665231853, LR: 0.0003 +[2026-03-05 17:46:15] (step=0069667) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.630796321659167, LR: 0.0003 +[2026-03-05 17:46:23] (step=0069668) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.630991978086481, LR: 0.0003 +[2026-03-05 17:46:30] (step=0069669) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.631187634513793, LR: 0.0003 +[2026-03-05 17:46:38] (step=0069670) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.631383290941107, LR: 0.0003 +[2026-03-05 17:46:46] (step=0069671) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.631578947368421, LR: 0.0003 +[2026-03-05 17:46:54] (step=0069672) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.631774603795735, LR: 0.0003 +[2026-03-05 17:47:02] (step=0069673) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.631970260223047, LR: 0.0003 +[2026-03-05 17:47:10] (step=0069674) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.632165916650361, LR: 0.0003 +[2026-03-05 17:47:18] (step=0069675) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.632361573077675, LR: 0.0003 +[2026-03-05 17:47:25] (step=0069676) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.63255722950499, LR: 0.0003 +[2026-03-05 17:47:33] (step=0069677) Train Loss: 0.4278, Train Steps/Sec: 0.13, Epoch: 13.632752885932303, LR: 0.0003 +[2026-03-05 17:47:41] (step=0069678) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.632948542359616, LR: 0.0003 +[2026-03-05 17:47:49] (step=0069679) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.63314419878693, LR: 0.0003 +[2026-03-05 17:47:57] (step=0069680) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 13.633339855214244, LR: 0.0003 +[2026-03-05 17:48:05] (step=0069681) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.633535511641558, LR: 0.0003 +[2026-03-05 17:48:12] (step=0069682) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.633731168068872, LR: 0.0003 +[2026-03-05 17:48:20] (step=0069683) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.633926824496184, LR: 0.0003 +[2026-03-05 17:48:28] (step=0069684) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.634122480923498, LR: 0.0003 +[2026-03-05 17:48:36] (step=0069685) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.634318137350812, LR: 0.0003 +[2026-03-05 17:48:44] (step=0069686) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.634513793778126, LR: 0.0003 +[2026-03-05 17:48:52] (step=0069687) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.63470945020544, LR: 0.0003 +[2026-03-05 17:48:59] (step=0069688) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 13.634905106632752, LR: 0.0003 +[2026-03-05 17:49:07] (step=0069689) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.635100763060066, LR: 0.0003 +[2026-03-05 17:49:15] (step=0069690) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.63529641948738, LR: 0.0003 +[2026-03-05 17:49:23] (step=0069691) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 13.635492075914694, LR: 0.0003 +[2026-03-05 17:49:31] (step=0069692) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.635687732342008, LR: 0.0003 +[2026-03-05 17:49:39] (step=0069693) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 13.63588338876932, LR: 0.0003 +[2026-03-05 17:49:47] (step=0069694) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 13.636079045196634, LR: 0.0003 +[2026-03-05 17:49:54] (step=0069695) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 13.636274701623949, LR: 0.0003 +[2026-03-05 17:50:02] (step=0069696) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.636470358051263, LR: 0.0003 +[2026-03-05 17:50:10] (step=0069697) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.636666014478575, LR: 0.0003 +[2026-03-05 17:50:18] (step=0069698) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.636861670905889, LR: 0.0003 +[2026-03-05 17:50:26] (step=0069699) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.637057327333203, LR: 0.0003 +[2026-03-05 17:50:34] (step=0069700) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.637252983760517, LR: 0.0003 +[2026-03-05 17:50:42] (step=0069701) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.63744864018783, LR: 0.0003 +[2026-03-05 17:50:49] (step=0069702) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.637644296615143, LR: 0.0003 +[2026-03-05 17:50:57] (step=0069703) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 13.637839953042457, LR: 0.0003 +[2026-03-05 17:51:05] (step=0069704) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.638035609469771, LR: 0.0003 +[2026-03-05 17:51:13] (step=0069705) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.638231265897085, LR: 0.0003 +[2026-03-05 17:51:21] (step=0069706) Train Loss: 0.4472, Train Steps/Sec: 0.12, Epoch: 13.638426922324399, LR: 0.0003 +[2026-03-05 17:51:29] (step=0069707) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.638622578751711, LR: 0.0003 +[2026-03-05 17:51:37] (step=0069708) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.638818235179025, LR: 0.0003 +[2026-03-05 17:51:45] (step=0069709) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.63901389160634, LR: 0.0003 +[2026-03-05 17:51:52] (step=0069710) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.639209548033653, LR: 0.0003 +[2026-03-05 17:52:00] (step=0069711) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.639405204460967, LR: 0.0003 +[2026-03-05 17:52:08] (step=0069712) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.63960086088828, LR: 0.0003 +[2026-03-05 17:52:16] (step=0069713) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.639796517315594, LR: 0.0003 +[2026-03-05 17:52:24] (step=0069714) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.639992173742908, LR: 0.0003 +[2026-03-05 17:52:32] (step=0069715) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.640187830170222, LR: 0.0003 +[2026-03-05 17:52:39] (step=0069716) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.640383486597536, LR: 0.0003 +[2026-03-05 17:52:47] (step=0069717) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 13.640579143024848, LR: 0.0003 +[2026-03-05 17:52:55] (step=0069718) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.640774799452162, LR: 0.0003 +[2026-03-05 17:53:03] (step=0069719) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.640970455879476, LR: 0.0003 +[2026-03-05 17:53:11] (step=0069720) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.64116611230679, LR: 0.0003 +[2026-03-05 17:53:19] (step=0069721) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 13.641361768734104, LR: 0.0003 +[2026-03-05 17:53:26] (step=0069722) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.641557425161416, LR: 0.0003 +[2026-03-05 17:53:34] (step=0069723) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.64175308158873, LR: 0.0003 +[2026-03-05 17:53:42] (step=0069724) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 13.641948738016044, LR: 0.0003 +[2026-03-05 17:53:50] (step=0069725) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.642144394443358, LR: 0.0003 +[2026-03-05 17:53:58] (step=0069726) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.64234005087067, LR: 0.0003 +[2026-03-05 17:54:06] (step=0069727) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.642535707297984, LR: 0.0003 +[2026-03-05 17:54:14] (step=0069728) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.642731363725298, LR: 0.0003 +[2026-03-05 17:54:21] (step=0069729) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.642927020152612, LR: 0.0003 +[2026-03-05 17:54:29] (step=0069730) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 13.643122676579926, LR: 0.0003 +[2026-03-05 17:54:37] (step=0069731) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.643318333007239, LR: 0.0003 +[2026-03-05 17:54:45] (step=0069732) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.643513989434553, LR: 0.0003 +[2026-03-05 17:54:53] (step=0069733) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.643709645861867, LR: 0.0003 +[2026-03-05 17:55:01] (step=0069734) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.64390530228918, LR: 0.0003 +[2026-03-05 17:55:08] (step=0069735) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.644100958716495, LR: 0.0003 +[2026-03-05 17:55:16] (step=0069736) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 13.644296615143807, LR: 0.0003 +[2026-03-05 17:55:24] (step=0069737) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.64449227157112, LR: 0.0003 +[2026-03-05 17:55:32] (step=0069738) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 13.644687927998435, LR: 0.0003 +[2026-03-05 17:55:40] (step=0069739) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 13.644883584425749, LR: 0.0003 +[2026-03-05 17:55:48] (step=0069740) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 13.645079240853063, LR: 0.0003 +[2026-03-05 17:55:56] (step=0069741) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.645274897280375, LR: 0.0003 +[2026-03-05 17:56:03] (step=0069742) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 13.645470553707689, LR: 0.0003 +[2026-03-05 17:56:11] (step=0069743) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.645666210135003, LR: 0.0003 +[2026-03-05 17:56:19] (step=0069744) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.645861866562317, LR: 0.0003 +[2026-03-05 17:56:27] (step=0069745) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.646057522989631, LR: 0.0003 +[2026-03-05 17:56:35] (step=0069746) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 13.646253179416943, LR: 0.0003 +[2026-03-05 17:56:43] (step=0069747) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 13.646448835844257, LR: 0.0003 +[2026-03-05 17:56:51] (step=0069748) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.646644492271571, LR: 0.0003 +[2026-03-05 17:56:58] (step=0069749) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 13.646840148698885, LR: 0.0003 +[2026-03-05 17:57:06] (step=0069750) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.647035805126198, LR: 0.0003 +[2026-03-05 17:57:14] (step=0069751) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.647231461553512, LR: 0.0003 +[2026-03-05 17:57:22] (step=0069752) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 13.647427117980826, LR: 0.0003 +[2026-03-05 17:57:30] (step=0069753) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.64762277440814, LR: 0.0003 +[2026-03-05 17:57:38] (step=0069754) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.647818430835454, LR: 0.0003 +[2026-03-05 17:57:45] (step=0069755) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.648014087262766, LR: 0.0003 +[2026-03-05 17:57:53] (step=0069756) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 13.64820974369008, LR: 0.0003 +[2026-03-05 17:58:01] (step=0069757) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 13.648405400117394, LR: 0.0003 +[2026-03-05 17:58:09] (step=0069758) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.648601056544708, LR: 0.0003 +[2026-03-05 17:58:17] (step=0069759) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.648796712972022, LR: 0.0003 +[2026-03-05 17:58:25] (step=0069760) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.648992369399334, LR: 0.0003 +[2026-03-05 17:58:33] (step=0069761) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.649188025826648, LR: 0.0003 +[2026-03-05 17:58:40] (step=0069762) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.649383682253962, LR: 0.0003 +[2026-03-05 17:58:48] (step=0069763) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.649579338681276, LR: 0.0003 +[2026-03-05 17:58:56] (step=0069764) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.64977499510859, LR: 0.0003 +[2026-03-05 17:59:04] (step=0069765) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.649970651535902, LR: 0.0003 +[2026-03-05 17:59:12] (step=0069766) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 13.650166307963216, LR: 0.0003 +[2026-03-05 17:59:20] (step=0069767) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 13.65036196439053, LR: 0.0003 +[2026-03-05 17:59:28] (step=0069768) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.650557620817844, LR: 0.0003 +[2026-03-05 17:59:35] (step=0069769) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.650753277245158, LR: 0.0003 +[2026-03-05 17:59:43] (step=0069770) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.65094893367247, LR: 0.0003 +[2026-03-05 17:59:51] (step=0069771) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 13.651144590099785, LR: 0.0003 +[2026-03-05 17:59:59] (step=0069772) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.651340246527099, LR: 0.0003 +[2026-03-05 18:00:07] (step=0069773) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.651535902954413, LR: 0.0003 +[2026-03-05 18:00:15] (step=0069774) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.651731559381727, LR: 0.0003 +[2026-03-05 18:00:22] (step=0069775) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.651927215809039, LR: 0.0003 +[2026-03-05 18:00:30] (step=0069776) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.652122872236353, LR: 0.0003 +[2026-03-05 18:00:38] (step=0069777) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.652318528663667, LR: 0.0003 +[2026-03-05 18:00:46] (step=0069778) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.65251418509098, LR: 0.0003 +[2026-03-05 18:00:54] (step=0069779) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 13.652709841518293, LR: 0.0003 +[2026-03-05 18:01:02] (step=0069780) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.652905497945607, LR: 0.0003 +[2026-03-05 18:01:09] (step=0069781) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.653101154372921, LR: 0.0003 +[2026-03-05 18:01:17] (step=0069782) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 13.653296810800235, LR: 0.0003 +[2026-03-05 18:01:25] (step=0069783) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.653492467227549, LR: 0.0003 +[2026-03-05 18:01:33] (step=0069784) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 13.653688123654861, LR: 0.0003 +[2026-03-05 18:01:41] (step=0069785) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.653883780082175, LR: 0.0003 +[2026-03-05 18:01:49] (step=0069786) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 13.65407943650949, LR: 0.0003 +[2026-03-05 18:01:56] (step=0069787) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.654275092936803, LR: 0.0003 +[2026-03-05 18:02:04] (step=0069788) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.654470749364117, LR: 0.0003 +[2026-03-05 18:02:12] (step=0069789) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.65466640579143, LR: 0.0003 +[2026-03-05 18:02:20] (step=0069790) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.654862062218744, LR: 0.0003 +[2026-03-05 18:02:28] (step=0069791) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.655057718646058, LR: 0.0003 +[2026-03-05 18:02:36] (step=0069792) Train Loss: 0.4650, Train Steps/Sec: 0.13, Epoch: 13.655253375073372, LR: 0.0003 +[2026-03-05 18:02:44] (step=0069793) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.655449031500686, LR: 0.0003 +[2026-03-05 18:02:52] (step=0069794) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.655644687927998, LR: 0.0003 +[2026-03-05 18:02:59] (step=0069795) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.655840344355312, LR: 0.0003 +[2026-03-05 18:03:07] (step=0069796) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.656036000782626, LR: 0.0003 +[2026-03-05 18:03:15] (step=0069797) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 13.65623165720994, LR: 0.0003 +[2026-03-05 18:03:23] (step=0069798) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.656427313637254, LR: 0.0003 +[2026-03-05 18:03:31] (step=0069799) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.656622970064566, LR: 0.0003 +[2026-03-05 18:03:39] (step=0069800) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.65681862649188, LR: 0.0003 +[2026-03-05 18:03:47] (step=0069801) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.657014282919194, LR: 0.0003 +[2026-03-05 18:03:54] (step=0069802) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.657209939346508, LR: 0.0003 +[2026-03-05 18:04:02] (step=0069803) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 13.65740559577382, LR: 0.0003 +[2026-03-05 18:04:10] (step=0069804) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.657601252201134, LR: 0.0003 +[2026-03-05 18:04:18] (step=0069805) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 13.657796908628448, LR: 0.0003 +[2026-03-05 18:04:26] (step=0069806) Train Loss: 0.4355, Train Steps/Sec: 0.12, Epoch: 13.657992565055762, LR: 0.0003 +[2026-03-05 18:04:34] (step=0069807) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.658188221483076, LR: 0.0003 +[2026-03-05 18:04:42] (step=0069808) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.658383877910389, LR: 0.0003 +[2026-03-05 18:04:50] (step=0069809) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.658579534337703, LR: 0.0003 +[2026-03-05 18:04:57] (step=0069810) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.658775190765017, LR: 0.0003 +[2026-03-05 18:05:05] (step=0069811) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.65897084719233, LR: 0.0003 +[2026-03-05 18:05:13] (step=0069812) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.659166503619645, LR: 0.0003 +[2026-03-05 18:05:21] (step=0069813) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.659362160046957, LR: 0.0003 +[2026-03-05 18:05:29] (step=0069814) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.65955781647427, LR: 0.0003 +[2026-03-05 18:05:37] (step=0069815) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.659753472901585, LR: 0.0003 +[2026-03-05 18:05:44] (step=0069816) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.659949129328899, LR: 0.0003 +[2026-03-05 18:05:52] (step=0069817) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 13.660144785756213, LR: 0.0003 +[2026-03-05 18:06:00] (step=0069818) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.660340442183525, LR: 0.0003 +[2026-03-05 18:06:08] (step=0069819) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.660536098610839, LR: 0.0003 +[2026-03-05 18:06:16] (step=0069820) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.660731755038153, LR: 0.0003 +[2026-03-05 18:06:24] (step=0069821) Train Loss: 0.4215, Train Steps/Sec: 0.13, Epoch: 13.660927411465467, LR: 0.0003 +[2026-03-05 18:06:32] (step=0069822) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.661123067892781, LR: 0.0003 +[2026-03-05 18:06:39] (step=0069823) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.661318724320093, LR: 0.0003 +[2026-03-05 18:06:47] (step=0069824) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 13.661514380747407, LR: 0.0003 +[2026-03-05 18:06:55] (step=0069825) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.661710037174721, LR: 0.0003 +[2026-03-05 18:07:03] (step=0069826) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.661905693602035, LR: 0.0003 +[2026-03-05 18:07:11] (step=0069827) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.66210135002935, LR: 0.0003 +[2026-03-05 18:07:19] (step=0069828) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 13.662297006456662, LR: 0.0003 +[2026-03-05 18:07:26] (step=0069829) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.662492662883976, LR: 0.0003 +[2026-03-05 18:07:34] (step=0069830) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.66268831931129, LR: 0.0003 +[2026-03-05 18:07:42] (step=0069831) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.662883975738604, LR: 0.0003 +[2026-03-05 18:07:50] (step=0069832) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.663079632165916, LR: 0.0003 +[2026-03-05 18:07:58] (step=0069833) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 13.66327528859323, LR: 0.0003 +[2026-03-05 18:08:06] (step=0069834) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.663470945020544, LR: 0.0003 +[2026-03-05 18:08:13] (step=0069835) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.663666601447858, LR: 0.0003 +[2026-03-05 18:08:21] (step=0069836) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.663862257875172, LR: 0.0003 +[2026-03-05 18:08:29] (step=0069837) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.664057914302484, LR: 0.0003 +[2026-03-05 18:08:37] (step=0069838) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.664253570729798, LR: 0.0003 +[2026-03-05 18:08:45] (step=0069839) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 13.664449227157112, LR: 0.0003 +[2026-03-05 18:08:53] (step=0069840) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.664644883584426, LR: 0.0003 +[2026-03-05 18:09:01] (step=0069841) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.66484054001174, LR: 0.0003 +[2026-03-05 18:09:09] (step=0069842) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.665036196439052, LR: 0.0003 +[2026-03-05 18:09:16] (step=0069843) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.665231852866366, LR: 0.0003 +[2026-03-05 18:09:24] (step=0069844) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 13.66542750929368, LR: 0.0003 +[2026-03-05 18:09:32] (step=0069845) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.665623165720994, LR: 0.0003 +[2026-03-05 18:09:40] (step=0069846) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.665818822148308, LR: 0.0003 +[2026-03-05 18:09:48] (step=0069847) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.66601447857562, LR: 0.0003 +[2026-03-05 18:09:56] (step=0069848) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.666210135002935, LR: 0.0003 +[2026-03-05 18:10:04] (step=0069849) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 13.666405791430249, LR: 0.0003 +[2026-03-05 18:10:11] (step=0069850) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.666601447857563, LR: 0.0003 +[2026-03-05 18:10:19] (step=0069851) Train Loss: 0.4488, Train Steps/Sec: 0.12, Epoch: 13.666797104284877, LR: 0.0003 +[2026-03-05 18:10:27] (step=0069852) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.666992760712189, LR: 0.0003 +[2026-03-05 18:10:35] (step=0069853) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.667188417139503, LR: 0.0003 +[2026-03-05 18:10:43] (step=0069854) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.667384073566817, LR: 0.0003 +[2026-03-05 18:10:51] (step=0069855) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 13.66757972999413, LR: 0.0003 +[2026-03-05 18:10:59] (step=0069856) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.667775386421443, LR: 0.0003 +[2026-03-05 18:11:07] (step=0069857) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.667971042848757, LR: 0.0003 +[2026-03-05 18:11:14] (step=0069858) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.668166699276071, LR: 0.0003 +[2026-03-05 18:11:22] (step=0069859) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.668362355703385, LR: 0.0003 +[2026-03-05 18:11:30] (step=0069860) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.668558012130699, LR: 0.0003 +[2026-03-05 18:11:38] (step=0069861) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.668753668558011, LR: 0.0003 +[2026-03-05 18:11:46] (step=0069862) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.668949324985325, LR: 0.0003 +[2026-03-05 18:11:54] (step=0069863) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 13.66914498141264, LR: 0.0003 +[2026-03-05 18:12:01] (step=0069864) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.669340637839953, LR: 0.0003 +[2026-03-05 18:12:09] (step=0069865) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.669536294267267, LR: 0.0003 +[2026-03-05 18:12:17] (step=0069866) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.66973195069458, LR: 0.0003 +[2026-03-05 18:12:25] (step=0069867) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 13.669927607121894, LR: 0.0003 +[2026-03-05 18:12:33] (step=0069868) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.670123263549208, LR: 0.0003 +[2026-03-05 18:12:41] (step=0069869) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.670318919976522, LR: 0.0003 +[2026-03-05 18:12:48] (step=0069870) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 13.670514576403836, LR: 0.0003 +[2026-03-05 18:12:56] (step=0069871) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.670710232831148, LR: 0.0003 +[2026-03-05 18:13:04] (step=0069872) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.670905889258462, LR: 0.0003 +[2026-03-05 18:13:12] (step=0069873) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 13.671101545685776, LR: 0.0003 +[2026-03-05 18:13:20] (step=0069874) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.67129720211309, LR: 0.0003 +[2026-03-05 18:13:28] (step=0069875) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.671492858540404, LR: 0.0003 +[2026-03-05 18:13:35] (step=0069876) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.671688514967716, LR: 0.0003 +[2026-03-05 18:13:43] (step=0069877) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.67188417139503, LR: 0.0003 +[2026-03-05 18:13:51] (step=0069878) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.672079827822344, LR: 0.0003 +[2026-03-05 18:13:59] (step=0069879) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.672275484249658, LR: 0.0003 +[2026-03-05 18:14:07] (step=0069880) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.672471140676972, LR: 0.0003 +[2026-03-05 18:14:15] (step=0069881) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.672666797104284, LR: 0.0003 +[2026-03-05 18:14:23] (step=0069882) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.672862453531598, LR: 0.0003 +[2026-03-05 18:14:30] (step=0069883) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 13.673058109958912, LR: 0.0003 +[2026-03-05 18:14:38] (step=0069884) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.673253766386226, LR: 0.0003 +[2026-03-05 18:14:46] (step=0069885) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 13.673449422813539, LR: 0.0003 +[2026-03-05 18:14:54] (step=0069886) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.673645079240853, LR: 0.0003 +[2026-03-05 18:15:02] (step=0069887) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.673840735668167, LR: 0.0003 +[2026-03-05 18:15:10] (step=0069888) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.67403639209548, LR: 0.0003 +[2026-03-05 18:15:17] (step=0069889) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.674232048522795, LR: 0.0003 +[2026-03-05 18:15:25] (step=0069890) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.674427704950107, LR: 0.0003 +[2026-03-05 18:15:33] (step=0069891) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.67462336137742, LR: 0.0003 +[2026-03-05 18:15:41] (step=0069892) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.674819017804735, LR: 0.0003 +[2026-03-05 18:15:49] (step=0069893) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.675014674232049, LR: 0.0003 +[2026-03-05 18:15:57] (step=0069894) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.675210330659363, LR: 0.0003 +[2026-03-05 18:16:05] (step=0069895) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.675405987086675, LR: 0.0003 +[2026-03-05 18:16:13] (step=0069896) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.675601643513989, LR: 0.0003 +[2026-03-05 18:16:20] (step=0069897) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.675797299941303, LR: 0.0003 +[2026-03-05 18:16:28] (step=0069898) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.675992956368617, LR: 0.0003 +[2026-03-05 18:16:36] (step=0069899) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.676188612795931, LR: 0.0003 +[2026-03-05 18:16:44] (step=0069900) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 13.676384269223243, LR: 0.0003 +[2026-03-05 18:16:52] (step=0069901) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.676579925650557, LR: 0.0003 +[2026-03-05 18:17:00] (step=0069902) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.676775582077871, LR: 0.0003 +[2026-03-05 18:17:08] (step=0069903) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.676971238505185, LR: 0.0003 +[2026-03-05 18:17:15] (step=0069904) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.6771668949325, LR: 0.0003 +[2026-03-05 18:17:23] (step=0069905) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.677362551359812, LR: 0.0003 +[2026-03-05 18:17:31] (step=0069906) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.677558207787126, LR: 0.0003 +[2026-03-05 18:17:39] (step=0069907) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 13.67775386421444, LR: 0.0003 +[2026-03-05 18:17:47] (step=0069908) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.677949520641754, LR: 0.0003 +[2026-03-05 18:17:55] (step=0069909) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.678145177069066, LR: 0.0003 +[2026-03-05 18:18:02] (step=0069910) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.67834083349638, LR: 0.0003 +[2026-03-05 18:18:10] (step=0069911) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 13.678536489923694, LR: 0.0003 +[2026-03-05 18:18:18] (step=0069912) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.678732146351008, LR: 0.0003 +[2026-03-05 18:18:26] (step=0069913) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.678927802778322, LR: 0.0003 +[2026-03-05 18:18:34] (step=0069914) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.679123459205634, LR: 0.0003 +[2026-03-05 18:18:42] (step=0069915) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.679319115632948, LR: 0.0003 +[2026-03-05 18:18:50] (step=0069916) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.679514772060262, LR: 0.0003 +[2026-03-05 18:18:57] (step=0069917) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 13.679710428487576, LR: 0.0003 +[2026-03-05 18:19:05] (step=0069918) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 13.67990608491489, LR: 0.0003 +[2026-03-05 18:19:13] (step=0069919) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.680101741342202, LR: 0.0003 +[2026-03-05 18:19:21] (step=0069920) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.680297397769516, LR: 0.0003 +[2026-03-05 18:19:29] (step=0069921) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.68049305419683, LR: 0.0003 +[2026-03-05 18:19:37] (step=0069922) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.680688710624144, LR: 0.0003 +[2026-03-05 18:19:44] (step=0069923) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 13.680884367051458, LR: 0.0003 +[2026-03-05 18:19:52] (step=0069924) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 13.68108002347877, LR: 0.0003 +[2026-03-05 18:20:00] (step=0069925) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 13.681275679906085, LR: 0.0003 +[2026-03-05 18:20:08] (step=0069926) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.681471336333399, LR: 0.0003 +[2026-03-05 18:20:16] (step=0069927) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.681666992760713, LR: 0.0003 +[2026-03-05 18:20:24] (step=0069928) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.681862649188027, LR: 0.0003 +[2026-03-05 18:20:31] (step=0069929) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 13.682058305615339, LR: 0.0003 +[2026-03-05 18:20:39] (step=0069930) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.682253962042653, LR: 0.0003 +[2026-03-05 18:20:47] (step=0069931) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.682449618469967, LR: 0.0003 +[2026-03-05 18:20:55] (step=0069932) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.68264527489728, LR: 0.0003 +[2026-03-05 18:21:03] (step=0069933) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.682840931324595, LR: 0.0003 +[2026-03-05 18:21:11] (step=0069934) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.683036587751907, LR: 0.0003 +[2026-03-05 18:21:18] (step=0069935) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.683232244179221, LR: 0.0003 +[2026-03-05 18:21:26] (step=0069936) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.683427900606535, LR: 0.0003 +[2026-03-05 18:21:34] (step=0069937) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.68362355703385, LR: 0.0003 +[2026-03-05 18:21:42] (step=0069938) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.683819213461161, LR: 0.0003 +[2026-03-05 18:21:50] (step=0069939) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 13.684014869888475, LR: 0.0003 +[2026-03-05 18:21:58] (step=0069940) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.68421052631579, LR: 0.0003 +[2026-03-05 18:22:06] (step=0069941) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.684406182743103, LR: 0.0003 +[2026-03-05 18:22:14] (step=0069942) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.684601839170417, LR: 0.0003 +[2026-03-05 18:22:21] (step=0069943) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.68479749559773, LR: 0.0003 +[2026-03-05 18:22:29] (step=0069944) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.684993152025044, LR: 0.0003 +[2026-03-05 18:22:37] (step=0069945) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.685188808452358, LR: 0.0003 +[2026-03-05 18:22:45] (step=0069946) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.685384464879672, LR: 0.0003 +[2026-03-05 18:22:53] (step=0069947) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 13.685580121306986, LR: 0.0003 +[2026-03-05 18:23:01] (step=0069948) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.685775777734298, LR: 0.0003 +[2026-03-05 18:23:09] (step=0069949) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.685971434161612, LR: 0.0003 +[2026-03-05 18:23:16] (step=0069950) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.686167090588926, LR: 0.0003 +[2026-03-05 18:23:24] (step=0069951) Train Loss: 0.4212, Train Steps/Sec: 0.13, Epoch: 13.68636274701624, LR: 0.0003 +[2026-03-05 18:23:32] (step=0069952) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.686558403443554, LR: 0.0003 +[2026-03-05 18:23:40] (step=0069953) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.686754059870866, LR: 0.0003 +[2026-03-05 18:23:48] (step=0069954) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 13.68694971629818, LR: 0.0003 +[2026-03-05 18:23:56] (step=0069955) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.687145372725494, LR: 0.0003 +[2026-03-05 18:24:03] (step=0069956) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.687341029152808, LR: 0.0003 +[2026-03-05 18:24:11] (step=0069957) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.687536685580122, LR: 0.0003 +[2026-03-05 18:24:19] (step=0069958) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.687732342007434, LR: 0.0003 +[2026-03-05 18:24:27] (step=0069959) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.687927998434748, LR: 0.0003 +[2026-03-05 18:24:35] (step=0069960) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.688123654862062, LR: 0.0003 +[2026-03-05 18:24:43] (step=0069961) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 13.688319311289376, LR: 0.0003 +[2026-03-05 18:24:50] (step=0069962) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.688514967716689, LR: 0.0003 +[2026-03-05 18:24:58] (step=0069963) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 13.688710624144003, LR: 0.0003 +[2026-03-05 18:25:06] (step=0069964) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.688906280571317, LR: 0.0003 +[2026-03-05 18:25:14] (step=0069965) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 13.68910193699863, LR: 0.0003 +[2026-03-05 18:25:22] (step=0069966) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.689297593425945, LR: 0.0003 +[2026-03-05 18:25:30] (step=0069967) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.689493249853257, LR: 0.0003 +[2026-03-05 18:25:37] (step=0069968) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.689688906280571, LR: 0.0003 +[2026-03-05 18:25:45] (step=0069969) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.689884562707885, LR: 0.0003 +[2026-03-05 18:25:53] (step=0069970) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.690080219135199, LR: 0.0003 +[2026-03-05 18:26:01] (step=0069971) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.690275875562513, LR: 0.0003 +[2026-03-05 18:26:09] (step=0069972) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.690471531989825, LR: 0.0003 +[2026-03-05 18:26:17] (step=0069973) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.69066718841714, LR: 0.0003 +[2026-03-05 18:26:25] (step=0069974) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.690862844844453, LR: 0.0003 +[2026-03-05 18:26:32] (step=0069975) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.691058501271767, LR: 0.0003 +[2026-03-05 18:26:40] (step=0069976) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 13.691254157699081, LR: 0.0003 +[2026-03-05 18:26:48] (step=0069977) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 13.691449814126393, LR: 0.0003 +[2026-03-05 18:26:56] (step=0069978) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.691645470553707, LR: 0.0003 +[2026-03-05 18:27:04] (step=0069979) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.691841126981021, LR: 0.0003 +[2026-03-05 18:27:12] (step=0069980) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.692036783408335, LR: 0.0003 +[2026-03-05 18:27:19] (step=0069981) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.69223243983565, LR: 0.0003 +[2026-03-05 18:27:27] (step=0069982) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.692428096262962, LR: 0.0003 +[2026-03-05 18:27:35] (step=0069983) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.692623752690276, LR: 0.0003 +[2026-03-05 18:27:43] (step=0069984) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.69281940911759, LR: 0.0003 +[2026-03-05 18:27:51] (step=0069985) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.693015065544904, LR: 0.0003 +[2026-03-05 18:27:59] (step=0069986) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.693210721972218, LR: 0.0003 +[2026-03-05 18:28:06] (step=0069987) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 13.69340637839953, LR: 0.0003 +[2026-03-05 18:28:14] (step=0069988) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 13.693602034826844, LR: 0.0003 +[2026-03-05 18:28:22] (step=0069989) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.693797691254158, LR: 0.0003 +[2026-03-05 18:28:30] (step=0069990) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.693993347681472, LR: 0.0003 +[2026-03-05 18:28:38] (step=0069991) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.694189004108784, LR: 0.0003 +[2026-03-05 18:28:46] (step=0069992) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.694384660536098, LR: 0.0003 +[2026-03-05 18:28:54] (step=0069993) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 13.694580316963412, LR: 0.0003 +[2026-03-05 18:29:01] (step=0069994) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.694775973390726, LR: 0.0003 +[2026-03-05 18:29:09] (step=0069995) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 13.69497162981804, LR: 0.0003 +[2026-03-05 18:29:17] (step=0069996) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.695167286245352, LR: 0.0003 +[2026-03-05 18:29:25] (step=0069997) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.695362942672666, LR: 0.0003 +[2026-03-05 18:29:33] (step=0069998) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.69555859909998, LR: 0.0003 +[2026-03-05 18:29:41] (step=0069999) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.695754255527294, LR: 0.0003 +[2026-03-05 18:29:49] (step=0070000) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.695949911954608, LR: 0.0003 +[2026-03-05 18:29:49] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0070000/ +[2026-03-05 18:29:57] (step=0070001) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.69614556838192, LR: 0.0003 +[2026-03-05 18:30:04] (step=0070002) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.696341224809235, LR: 0.0003 +[2026-03-05 18:30:12] (step=0070003) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 13.696536881236549, LR: 0.0003 +[2026-03-05 18:30:20] (step=0070004) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 13.696732537663863, LR: 0.0003 +[2026-03-05 18:30:28] (step=0070005) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 13.696928194091177, LR: 0.0003 +[2026-03-05 18:30:36] (step=0070006) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.697123850518489, LR: 0.0003 +[2026-03-05 18:30:44] (step=0070007) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 13.697319506945803, LR: 0.0003 +[2026-03-05 18:30:51] (step=0070008) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 13.697515163373117, LR: 0.0003 +[2026-03-05 18:30:59] (step=0070009) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.697710819800431, LR: 0.0003 +[2026-03-05 18:31:07] (step=0070010) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.697906476227745, LR: 0.0003 +[2026-03-05 18:31:15] (step=0070011) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.698102132655057, LR: 0.0003 +[2026-03-05 18:31:23] (step=0070012) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 13.698297789082371, LR: 0.0003 +[2026-03-05 18:31:31] (step=0070013) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 13.698493445509685, LR: 0.0003 +[2026-03-05 18:31:38] (step=0070014) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 13.698689101937, LR: 0.0003 +[2026-03-05 18:31:46] (step=0070015) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 13.698884758364311, LR: 0.0003 +[2026-03-05 18:31:54] (step=0070016) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 13.699080414791625, LR: 0.0003 +[2026-03-05 18:32:02] (step=0070017) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 13.69927607121894, LR: 0.0003 +[2026-03-05 18:32:10] (step=0070018) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.699471727646253, LR: 0.0003 +[2026-03-05 18:32:18] (step=0070019) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.699667384073567, LR: 0.0003 +[2026-03-05 18:32:25] (step=0070020) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 13.69986304050088, LR: 0.0003 +[2026-03-05 18:32:33] (step=0070021) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.700058696928194, LR: 0.0003 +[2026-03-05 18:32:41] (step=0070022) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.700254353355508, LR: 0.0003 +[2026-03-05 18:32:49] (step=0070023) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.700450009782822, LR: 0.0003 +[2026-03-05 18:32:57] (step=0070024) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.700645666210136, LR: 0.0003 +[2026-03-05 18:33:05] (step=0070025) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.700841322637448, LR: 0.0003 +[2026-03-05 18:33:13] (step=0070026) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.701036979064762, LR: 0.0003 +[2026-03-05 18:33:20] (step=0070027) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.701232635492076, LR: 0.0003 +[2026-03-05 18:33:28] (step=0070028) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.70142829191939, LR: 0.0003 +[2026-03-05 18:33:36] (step=0070029) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.701623948346704, LR: 0.0003 +[2026-03-05 18:33:44] (step=0070030) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.701819604774016, LR: 0.0003 +[2026-03-05 18:33:52] (step=0070031) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 13.70201526120133, LR: 0.0003 +[2026-03-05 18:34:00] (step=0070032) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 13.702210917628644, LR: 0.0003 +[2026-03-05 18:34:07] (step=0070033) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 13.702406574055958, LR: 0.0003 +[2026-03-05 18:34:15] (step=0070034) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.702602230483272, LR: 0.0003 +[2026-03-05 18:34:23] (step=0070035) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.702797886910584, LR: 0.0003 +[2026-03-05 18:34:31] (step=0070036) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.702993543337898, LR: 0.0003 +[2026-03-05 18:34:39] (step=0070037) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.703189199765212, LR: 0.0003 +[2026-03-05 18:34:47] (step=0070038) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.703384856192526, LR: 0.0003 +[2026-03-05 18:34:55] (step=0070039) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.70358051261984, LR: 0.0003 +[2026-03-05 18:35:02] (step=0070040) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.703776169047153, LR: 0.0003 +[2026-03-05 18:35:10] (step=0070041) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 13.703971825474467, LR: 0.0003 +[2026-03-05 18:35:18] (step=0070042) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.70416748190178, LR: 0.0003 +[2026-03-05 18:35:26] (step=0070043) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.704363138329095, LR: 0.0003 +[2026-03-05 18:35:34] (step=0070044) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.704558794756407, LR: 0.0003 +[2026-03-05 18:35:42] (step=0070045) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.704754451183721, LR: 0.0003 +[2026-03-05 18:35:50] (step=0070046) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.704950107611035, LR: 0.0003 +[2026-03-05 18:35:57] (step=0070047) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.705145764038349, LR: 0.0003 +[2026-03-05 18:36:05] (step=0070048) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.705341420465663, LR: 0.0003 +[2026-03-05 18:36:13] (step=0070049) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.705537076892975, LR: 0.0003 +[2026-03-05 18:36:21] (step=0070050) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 13.70573273332029, LR: 0.0003 +[2026-03-05 18:36:29] (step=0070051) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.705928389747603, LR: 0.0003 +[2026-03-05 18:36:37] (step=0070052) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.706124046174917, LR: 0.0003 +[2026-03-05 18:36:45] (step=0070053) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.706319702602231, LR: 0.0003 +[2026-03-05 18:36:52] (step=0070054) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.706515359029543, LR: 0.0003 +[2026-03-05 18:37:00] (step=0070055) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.706711015456857, LR: 0.0003 +[2026-03-05 18:37:08] (step=0070056) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.706906671884171, LR: 0.0003 +[2026-03-05 18:37:16] (step=0070057) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.707102328311485, LR: 0.0003 +[2026-03-05 18:37:24] (step=0070058) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.7072979847388, LR: 0.0003 +[2026-03-05 18:37:32] (step=0070059) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.707493641166112, LR: 0.0003 +[2026-03-05 18:37:39] (step=0070060) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 13.707689297593426, LR: 0.0003 +[2026-03-05 18:37:47] (step=0070061) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.70788495402074, LR: 0.0003 +[2026-03-05 18:37:55] (step=0070062) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 13.708080610448054, LR: 0.0003 +[2026-03-05 18:38:03] (step=0070063) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 13.708276266875368, LR: 0.0003 +[2026-03-05 18:38:11] (step=0070064) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 13.70847192330268, LR: 0.0003 +[2026-03-05 18:38:19] (step=0070065) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.708667579729994, LR: 0.0003 +[2026-03-05 18:38:26] (step=0070066) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 13.708863236157308, LR: 0.0003 +[2026-03-05 18:38:34] (step=0070067) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.709058892584622, LR: 0.0003 +[2026-03-05 18:38:42] (step=0070068) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.709254549011934, LR: 0.0003 +[2026-03-05 18:38:50] (step=0070069) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.709450205439248, LR: 0.0003 +[2026-03-05 18:38:58] (step=0070070) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.709645861866562, LR: 0.0003 +[2026-03-05 18:39:06] (step=0070071) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.709841518293876, LR: 0.0003 +[2026-03-05 18:39:13] (step=0070072) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.71003717472119, LR: 0.0003 +[2026-03-05 18:39:21] (step=0070073) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.710232831148502, LR: 0.0003 +[2026-03-05 18:39:29] (step=0070074) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.710428487575816, LR: 0.0003 +[2026-03-05 18:39:37] (step=0070075) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 13.71062414400313, LR: 0.0003 +[2026-03-05 18:39:45] (step=0070076) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 13.710819800430444, LR: 0.0003 +[2026-03-05 18:39:53] (step=0070077) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.711015456857758, LR: 0.0003 +[2026-03-05 18:40:00] (step=0070078) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 13.71121111328507, LR: 0.0003 +[2026-03-05 18:40:08] (step=0070079) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.711406769712385, LR: 0.0003 +[2026-03-05 18:40:16] (step=0070080) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.711602426139699, LR: 0.0003 +[2026-03-05 18:40:24] (step=0070081) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.711798082567013, LR: 0.0003 +[2026-03-05 18:40:32] (step=0070082) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.711993738994327, LR: 0.0003 +[2026-03-05 18:40:40] (step=0070083) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.712189395421639, LR: 0.0003 +[2026-03-05 18:40:47] (step=0070084) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.712385051848953, LR: 0.0003 +[2026-03-05 18:40:55] (step=0070085) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 13.712580708276267, LR: 0.0003 +[2026-03-05 18:41:03] (step=0070086) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 13.712776364703581, LR: 0.0003 +[2026-03-05 18:41:11] (step=0070087) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.712972021130895, LR: 0.0003 +[2026-03-05 18:41:19] (step=0070088) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.713167677558207, LR: 0.0003 +[2026-03-05 18:41:27] (step=0070089) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 13.713363333985521, LR: 0.0003 +[2026-03-05 18:41:35] (step=0070090) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.713558990412835, LR: 0.0003 +[2026-03-05 18:41:42] (step=0070091) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.71375464684015, LR: 0.0003 +[2026-03-05 18:41:50] (step=0070092) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.713950303267461, LR: 0.0003 +[2026-03-05 18:41:58] (step=0070093) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.714145959694775, LR: 0.0003 +[2026-03-05 18:42:06] (step=0070094) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.71434161612209, LR: 0.0003 +[2026-03-05 18:42:14] (step=0070095) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.714537272549403, LR: 0.0003 +[2026-03-05 18:42:22] (step=0070096) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 13.714732928976717, LR: 0.0003 +[2026-03-05 18:42:29] (step=0070097) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.71492858540403, LR: 0.0003 +[2026-03-05 18:42:37] (step=0070098) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.715124241831344, LR: 0.0003 +[2026-03-05 18:42:45] (step=0070099) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.715319898258658, LR: 0.0003 +[2026-03-05 18:42:53] (step=0070100) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 13.715515554685972, LR: 0.0003 +[2026-03-05 18:43:01] (step=0070101) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.715711211113286, LR: 0.0003 +[2026-03-05 18:43:09] (step=0070102) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.715906867540598, LR: 0.0003 +[2026-03-05 18:43:17] (step=0070103) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 13.716102523967912, LR: 0.0003 +[2026-03-05 18:43:24] (step=0070104) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.716298180395226, LR: 0.0003 +[2026-03-05 18:43:32] (step=0070105) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.71649383682254, LR: 0.0003 +[2026-03-05 18:43:40] (step=0070106) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.716689493249854, LR: 0.0003 +[2026-03-05 18:43:48] (step=0070107) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.716885149677166, LR: 0.0003 +[2026-03-05 18:43:56] (step=0070108) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.71708080610448, LR: 0.0003 +[2026-03-05 18:44:04] (step=0070109) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.717276462531794, LR: 0.0003 +[2026-03-05 18:44:11] (step=0070110) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 13.717472118959108, LR: 0.0003 +[2026-03-05 18:44:19] (step=0070111) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.717667775386422, LR: 0.0003 +[2026-03-05 18:44:27] (step=0070112) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 13.717863431813734, LR: 0.0003 +[2026-03-05 18:44:35] (step=0070113) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.718059088241048, LR: 0.0003 +[2026-03-05 18:44:43] (step=0070114) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.718254744668362, LR: 0.0003 +[2026-03-05 18:44:51] (step=0070115) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.718450401095676, LR: 0.0003 +[2026-03-05 18:44:59] (step=0070116) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.71864605752299, LR: 0.0003 +[2026-03-05 18:45:06] (step=0070117) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.718841713950303, LR: 0.0003 +[2026-03-05 18:45:14] (step=0070118) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.719037370377617, LR: 0.0003 +[2026-03-05 18:45:22] (step=0070119) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.71923302680493, LR: 0.0003 +[2026-03-05 18:45:30] (step=0070120) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.719428683232245, LR: 0.0003 +[2026-03-05 18:45:38] (step=0070121) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.719624339659557, LR: 0.0003 +[2026-03-05 18:45:46] (step=0070122) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 13.719819996086871, LR: 0.0003 +[2026-03-05 18:45:53] (step=0070123) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.720015652514185, LR: 0.0003 +[2026-03-05 18:46:01] (step=0070124) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.720211308941499, LR: 0.0003 +[2026-03-05 18:46:09] (step=0070125) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.720406965368813, LR: 0.0003 +[2026-03-05 18:46:17] (step=0070126) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.720602621796125, LR: 0.0003 +[2026-03-05 18:46:25] (step=0070127) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.72079827822344, LR: 0.0003 +[2026-03-05 18:46:33] (step=0070128) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 13.720993934650753, LR: 0.0003 +[2026-03-05 18:46:40] (step=0070129) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.721189591078067, LR: 0.0003 +[2026-03-05 18:46:48] (step=0070130) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.721385247505381, LR: 0.0003 +[2026-03-05 18:46:56] (step=0070131) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.721580903932693, LR: 0.0003 +[2026-03-05 18:47:04] (step=0070132) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 13.721776560360007, LR: 0.0003 +[2026-03-05 18:47:12] (step=0070133) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 13.721972216787321, LR: 0.0003 +[2026-03-05 18:47:20] (step=0070134) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.722167873214635, LR: 0.0003 +[2026-03-05 18:47:27] (step=0070135) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.72236352964195, LR: 0.0003 +[2026-03-05 18:47:35] (step=0070136) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.722559186069262, LR: 0.0003 +[2026-03-05 18:47:43] (step=0070137) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 13.722754842496576, LR: 0.0003 +[2026-03-05 18:47:51] (step=0070138) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.72295049892389, LR: 0.0003 +[2026-03-05 18:47:59] (step=0070139) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 13.723146155351204, LR: 0.0003 +[2026-03-05 18:48:07] (step=0070140) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.723341811778518, LR: 0.0003 +[2026-03-05 18:48:15] (step=0070141) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.72353746820583, LR: 0.0003 +[2026-03-05 18:48:23] (step=0070142) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.723733124633144, LR: 0.0003 +[2026-03-05 18:48:30] (step=0070143) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.723928781060458, LR: 0.0003 +[2026-03-05 18:48:38] (step=0070144) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.724124437487772, LR: 0.0003 +[2026-03-05 18:48:46] (step=0070145) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.724320093915084, LR: 0.0003 +[2026-03-05 18:48:54] (step=0070146) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.724515750342398, LR: 0.0003 +[2026-03-05 18:49:02] (step=0070147) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.724711406769712, LR: 0.0003 +[2026-03-05 18:49:10] (step=0070148) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.724907063197026, LR: 0.0003 +[2026-03-05 18:49:18] (step=0070149) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.72510271962434, LR: 0.0003 +[2026-03-05 18:49:26] (step=0070150) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.725298376051652, LR: 0.0003 +[2026-03-05 18:49:33] (step=0070151) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.725494032478966, LR: 0.0003 +[2026-03-05 18:49:41] (step=0070152) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.72568968890628, LR: 0.0003 +[2026-03-05 18:49:49] (step=0070153) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 13.725885345333595, LR: 0.0003 +[2026-03-05 18:49:57] (step=0070154) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.726081001760909, LR: 0.0003 +[2026-03-05 18:50:05] (step=0070155) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.72627665818822, LR: 0.0003 +[2026-03-05 18:50:13] (step=0070156) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.726472314615535, LR: 0.0003 +[2026-03-05 18:50:20] (step=0070157) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.726667971042849, LR: 0.0003 +[2026-03-05 18:50:28] (step=0070158) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.726863627470163, LR: 0.0003 +[2026-03-05 18:50:36] (step=0070159) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.727059283897477, LR: 0.0003 +[2026-03-05 18:50:44] (step=0070160) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.727254940324789, LR: 0.0003 +[2026-03-05 18:50:52] (step=0070161) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.727450596752103, LR: 0.0003 +[2026-03-05 18:51:00] (step=0070162) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.727646253179417, LR: 0.0003 +[2026-03-05 18:51:07] (step=0070163) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 13.727841909606731, LR: 0.0003 +[2026-03-05 18:51:15] (step=0070164) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.728037566034045, LR: 0.0003 +[2026-03-05 18:51:23] (step=0070165) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.728233222461357, LR: 0.0003 +[2026-03-05 18:51:31] (step=0070166) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 13.728428878888671, LR: 0.0003 +[2026-03-05 18:51:39] (step=0070167) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.728624535315985, LR: 0.0003 +[2026-03-05 18:51:47] (step=0070168) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 13.7288201917433, LR: 0.0003 +[2026-03-05 18:51:54] (step=0070169) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.729015848170613, LR: 0.0003 +[2026-03-05 18:52:02] (step=0070170) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.729211504597925, LR: 0.0003 +[2026-03-05 18:52:10] (step=0070171) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 13.72940716102524, LR: 0.0003 +[2026-03-05 18:52:18] (step=0070172) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.729602817452554, LR: 0.0003 +[2026-03-05 18:52:26] (step=0070173) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.729798473879868, LR: 0.0003 +[2026-03-05 18:52:34] (step=0070174) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.72999413030718, LR: 0.0003 +[2026-03-05 18:52:42] (step=0070175) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.730189786734494, LR: 0.0003 +[2026-03-05 18:52:49] (step=0070176) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.730385443161808, LR: 0.0003 +[2026-03-05 18:52:57] (step=0070177) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.730581099589122, LR: 0.0003 +[2026-03-05 18:53:05] (step=0070178) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 13.730776756016436, LR: 0.0003 +[2026-03-05 18:53:13] (step=0070179) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 13.730972412443748, LR: 0.0003 +[2026-03-05 18:53:21] (step=0070180) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 13.731168068871062, LR: 0.0003 +[2026-03-05 18:53:29] (step=0070181) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.731363725298376, LR: 0.0003 +[2026-03-05 18:53:36] (step=0070182) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 13.73155938172569, LR: 0.0003 +[2026-03-05 18:53:44] (step=0070183) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 13.731755038153004, LR: 0.0003 +[2026-03-05 18:53:52] (step=0070184) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.731950694580316, LR: 0.0003 +[2026-03-05 18:54:00] (step=0070185) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.73214635100763, LR: 0.0003 +[2026-03-05 18:54:08] (step=0070186) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.732342007434944, LR: 0.0003 +[2026-03-05 18:54:16] (step=0070187) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.732537663862258, LR: 0.0003 +[2026-03-05 18:54:24] (step=0070188) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.732733320289572, LR: 0.0003 +[2026-03-05 18:54:31] (step=0070189) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 13.732928976716885, LR: 0.0003 +[2026-03-05 18:54:39] (step=0070190) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 13.733124633144199, LR: 0.0003 +[2026-03-05 18:54:47] (step=0070191) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 13.733320289571513, LR: 0.0003 +[2026-03-05 18:54:55] (step=0070192) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 13.733515945998827, LR: 0.0003 +[2026-03-05 18:55:03] (step=0070193) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.73371160242614, LR: 0.0003 +[2026-03-05 18:55:11] (step=0070194) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.733907258853453, LR: 0.0003 +[2026-03-05 18:55:18] (step=0070195) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 13.734102915280767, LR: 0.0003 +[2026-03-05 18:55:26] (step=0070196) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 13.73429857170808, LR: 0.0003 +[2026-03-05 18:55:34] (step=0070197) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.734494228135395, LR: 0.0003 +[2026-03-05 18:55:42] (step=0070198) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.734689884562707, LR: 0.0003 +[2026-03-05 18:55:50] (step=0070199) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 13.734885540990021, LR: 0.0003 +[2026-03-05 18:55:58] (step=0070200) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.735081197417335, LR: 0.0003 +[2026-03-05 18:56:06] (step=0070201) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 13.735276853844649, LR: 0.0003 +[2026-03-05 18:56:13] (step=0070202) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.735472510271963, LR: 0.0003 +[2026-03-05 18:56:21] (step=0070203) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.735668166699275, LR: 0.0003 +[2026-03-05 18:56:29] (step=0070204) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.73586382312659, LR: 0.0003 +[2026-03-05 18:56:37] (step=0070205) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.736059479553903, LR: 0.0003 +[2026-03-05 18:56:45] (step=0070206) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.736255135981217, LR: 0.0003 +[2026-03-05 18:56:53] (step=0070207) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.736450792408531, LR: 0.0003 +[2026-03-05 18:57:01] (step=0070208) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 13.736646448835844, LR: 0.0003 +[2026-03-05 18:57:08] (step=0070209) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.736842105263158, LR: 0.0003 +[2026-03-05 18:57:16] (step=0070210) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 13.737037761690472, LR: 0.0003 +[2026-03-05 18:57:24] (step=0070211) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.737233418117786, LR: 0.0003 +[2026-03-05 18:57:32] (step=0070212) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.7374290745451, LR: 0.0003 +[2026-03-05 18:57:40] (step=0070213) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 13.737624730972412, LR: 0.0003 +[2026-03-05 18:57:48] (step=0070214) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 13.737820387399726, LR: 0.0003 +[2026-03-05 18:57:56] (step=0070215) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.73801604382704, LR: 0.0003 +[2026-03-05 18:58:03] (step=0070216) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.738211700254354, LR: 0.0003 +[2026-03-05 18:58:11] (step=0070217) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.738407356681668, LR: 0.0003 +[2026-03-05 18:58:19] (step=0070218) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 13.73860301310898, LR: 0.0003 +[2026-03-05 18:58:27] (step=0070219) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.738798669536294, LR: 0.0003 +[2026-03-05 18:58:35] (step=0070220) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.738994325963608, LR: 0.0003 +[2026-03-05 18:58:43] (step=0070221) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.739189982390922, LR: 0.0003 +[2026-03-05 18:58:50] (step=0070222) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.739385638818236, LR: 0.0003 +[2026-03-05 18:58:58] (step=0070223) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 13.739581295245548, LR: 0.0003 +[2026-03-05 18:59:06] (step=0070224) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.739776951672862, LR: 0.0003 +[2026-03-05 18:59:14] (step=0070225) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 13.739972608100176, LR: 0.0003 +[2026-03-05 18:59:22] (step=0070226) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.74016826452749, LR: 0.0003 +[2026-03-05 18:59:30] (step=0070227) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.740363920954803, LR: 0.0003 +[2026-03-05 18:59:37] (step=0070228) Train Loss: 0.4273, Train Steps/Sec: 0.13, Epoch: 13.740559577382117, LR: 0.0003 +[2026-03-05 18:59:45] (step=0070229) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 13.74075523380943, LR: 0.0003 +[2026-03-05 18:59:53] (step=0070230) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.740950890236745, LR: 0.0003 +[2026-03-05 19:00:01] (step=0070231) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.741146546664059, LR: 0.0003 +[2026-03-05 19:00:09] (step=0070232) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.74134220309137, LR: 0.0003 +[2026-03-05 19:00:17] (step=0070233) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.741537859518685, LR: 0.0003 +[2026-03-05 19:00:24] (step=0070234) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.741733515945999, LR: 0.0003 +[2026-03-05 19:00:32] (step=0070235) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.741929172373313, LR: 0.0003 +[2026-03-05 19:00:40] (step=0070236) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.742124828800627, LR: 0.0003 +[2026-03-05 19:00:48] (step=0070237) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.742320485227939, LR: 0.0003 +[2026-03-05 19:00:56] (step=0070238) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 13.742516141655253, LR: 0.0003 +[2026-03-05 19:01:04] (step=0070239) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.742711798082567, LR: 0.0003 +[2026-03-05 19:01:12] (step=0070240) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.742907454509881, LR: 0.0003 +[2026-03-05 19:01:20] (step=0070241) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.743103110937195, LR: 0.0003 +[2026-03-05 19:01:27] (step=0070242) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.743298767364507, LR: 0.0003 +[2026-03-05 19:01:35] (step=0070243) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 13.743494423791821, LR: 0.0003 +[2026-03-05 19:01:43] (step=0070244) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.743690080219135, LR: 0.0003 +[2026-03-05 19:01:51] (step=0070245) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 13.74388573664645, LR: 0.0003 +[2026-03-05 19:01:59] (step=0070246) Train Loss: 0.4256, Train Steps/Sec: 0.13, Epoch: 13.744081393073763, LR: 0.0003 +[2026-03-05 19:02:07] (step=0070247) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.744277049501076, LR: 0.0003 +[2026-03-05 19:02:14] (step=0070248) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 13.74447270592839, LR: 0.0003 +[2026-03-05 19:02:22] (step=0070249) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.744668362355704, LR: 0.0003 +[2026-03-05 19:02:30] (step=0070250) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.744864018783018, LR: 0.0003 +[2026-03-05 19:02:38] (step=0070251) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 13.74505967521033, LR: 0.0003 +[2026-03-05 19:02:46] (step=0070252) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.745255331637644, LR: 0.0003 +[2026-03-05 19:02:54] (step=0070253) Train Loss: 0.4448, Train Steps/Sec: 0.12, Epoch: 13.745450988064958, LR: 0.0003 +[2026-03-05 19:03:02] (step=0070254) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.745646644492272, LR: 0.0003 +[2026-03-05 19:03:09] (step=0070255) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.745842300919586, LR: 0.0003 +[2026-03-05 19:03:17] (step=0070256) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.746037957346898, LR: 0.0003 +[2026-03-05 19:03:25] (step=0070257) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 13.746233613774212, LR: 0.0003 +[2026-03-05 19:03:33] (step=0070258) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.746429270201526, LR: 0.0003 +[2026-03-05 19:03:41] (step=0070259) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.74662492662884, LR: 0.0003 +[2026-03-05 19:03:49] (step=0070260) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.746820583056154, LR: 0.0003 +[2026-03-05 19:03:57] (step=0070261) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.747016239483466, LR: 0.0003 +[2026-03-05 19:04:04] (step=0070262) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 13.74721189591078, LR: 0.0003 +[2026-03-05 19:04:12] (step=0070263) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.747407552338094, LR: 0.0003 +[2026-03-05 19:04:20] (step=0070264) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.747603208765408, LR: 0.0003 +[2026-03-05 19:04:28] (step=0070265) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.747798865192722, LR: 0.0003 +[2026-03-05 19:04:36] (step=0070266) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 13.747994521620035, LR: 0.0003 +[2026-03-05 19:04:44] (step=0070267) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.748190178047349, LR: 0.0003 +[2026-03-05 19:04:51] (step=0070268) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.748385834474663, LR: 0.0003 +[2026-03-05 19:04:59] (step=0070269) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.748581490901977, LR: 0.0003 +[2026-03-05 19:05:07] (step=0070270) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 13.74877714732929, LR: 0.0003 +[2026-03-05 19:05:15] (step=0070271) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.748972803756603, LR: 0.0003 +[2026-03-05 19:05:23] (step=0070272) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.749168460183917, LR: 0.0003 +[2026-03-05 19:05:31] (step=0070273) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.74936411661123, LR: 0.0003 +[2026-03-05 19:05:39] (step=0070274) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 13.749559773038545, LR: 0.0003 +[2026-03-05 19:05:46] (step=0070275) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.749755429465859, LR: 0.0003 +[2026-03-05 19:05:54] (step=0070276) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.749951085893171, LR: 0.0003 +[2026-03-05 19:06:02] (step=0070277) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 13.750146742320485, LR: 0.0003 +[2026-03-05 19:06:10] (step=0070278) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 13.750342398747799, LR: 0.0003 +[2026-03-05 19:06:18] (step=0070279) Train Loss: 0.4246, Train Steps/Sec: 0.13, Epoch: 13.750538055175113, LR: 0.0003 +[2026-03-05 19:06:26] (step=0070280) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 13.750733711602425, LR: 0.0003 +[2026-03-05 19:06:33] (step=0070281) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.75092936802974, LR: 0.0003 +[2026-03-05 19:06:41] (step=0070282) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.751125024457053, LR: 0.0003 +[2026-03-05 19:06:49] (step=0070283) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 13.751320680884367, LR: 0.0003 +[2026-03-05 19:06:57] (step=0070284) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.751516337311681, LR: 0.0003 +[2026-03-05 19:07:05] (step=0070285) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.751711993738994, LR: 0.0003 +[2026-03-05 19:07:13] (step=0070286) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.751907650166308, LR: 0.0003 +[2026-03-05 19:07:21] (step=0070287) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.752103306593622, LR: 0.0003 +[2026-03-05 19:07:28] (step=0070288) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.752298963020936, LR: 0.0003 +[2026-03-05 19:07:36] (step=0070289) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 13.75249461944825, LR: 0.0003 +[2026-03-05 19:07:44] (step=0070290) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 13.752690275875562, LR: 0.0003 +[2026-03-05 19:07:52] (step=0070291) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 13.752885932302876, LR: 0.0003 +[2026-03-05 19:08:00] (step=0070292) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 13.75308158873019, LR: 0.0003 +[2026-03-05 19:08:08] (step=0070293) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.753277245157504, LR: 0.0003 +[2026-03-05 19:08:16] (step=0070294) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 13.753472901584818, LR: 0.0003 +[2026-03-05 19:08:23] (step=0070295) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.75366855801213, LR: 0.0003 +[2026-03-05 19:08:31] (step=0070296) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.753864214439444, LR: 0.0003 +[2026-03-05 19:08:39] (step=0070297) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.754059870866758, LR: 0.0003 +[2026-03-05 19:08:47] (step=0070298) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.754255527294072, LR: 0.0003 +[2026-03-05 19:08:55] (step=0070299) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.754451183721386, LR: 0.0003 +[2026-03-05 19:09:03] (step=0070300) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 13.754646840148698, LR: 0.0003 +[2026-03-05 19:09:10] (step=0070301) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.754842496576012, LR: 0.0003 +[2026-03-05 19:09:18] (step=0070302) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.755038153003326, LR: 0.0003 +[2026-03-05 19:09:26] (step=0070303) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.75523380943064, LR: 0.0003 +[2026-03-05 19:09:34] (step=0070304) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.755429465857953, LR: 0.0003 +[2026-03-05 19:09:42] (step=0070305) Train Loss: 0.4420, Train Steps/Sec: 0.12, Epoch: 13.755625122285267, LR: 0.0003 +[2026-03-05 19:09:50] (step=0070306) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.75582077871258, LR: 0.0003 +[2026-03-05 19:09:58] (step=0070307) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.756016435139895, LR: 0.0003 +[2026-03-05 19:10:05] (step=0070308) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.756212091567209, LR: 0.0003 +[2026-03-05 19:10:13] (step=0070309) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.75640774799452, LR: 0.0003 +[2026-03-05 19:10:21] (step=0070310) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 13.756603404421835, LR: 0.0003 +[2026-03-05 19:10:29] (step=0070311) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.756799060849149, LR: 0.0003 +[2026-03-05 19:10:37] (step=0070312) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.756994717276463, LR: 0.0003 +[2026-03-05 19:10:45] (step=0070313) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.757190373703777, LR: 0.0003 +[2026-03-05 19:10:53] (step=0070314) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 13.757386030131089, LR: 0.0003 +[2026-03-05 19:11:00] (step=0070315) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.757581686558403, LR: 0.0003 +[2026-03-05 19:11:08] (step=0070316) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.757777342985717, LR: 0.0003 +[2026-03-05 19:11:16] (step=0070317) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 13.757972999413031, LR: 0.0003 +[2026-03-05 19:11:24] (step=0070318) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.758168655840345, LR: 0.0003 +[2026-03-05 19:11:32] (step=0070319) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.758364312267657, LR: 0.0003 +[2026-03-05 19:11:40] (step=0070320) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 13.758559968694971, LR: 0.0003 +[2026-03-05 19:11:47] (step=0070321) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.758755625122285, LR: 0.0003 +[2026-03-05 19:11:55] (step=0070322) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.7589512815496, LR: 0.0003 +[2026-03-05 19:12:03] (step=0070323) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.759146937976913, LR: 0.0003 +[2026-03-05 19:12:11] (step=0070324) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.759342594404226, LR: 0.0003 +[2026-03-05 19:12:19] (step=0070325) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.75953825083154, LR: 0.0003 +[2026-03-05 19:12:27] (step=0070326) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.759733907258854, LR: 0.0003 +[2026-03-05 19:12:34] (step=0070327) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 13.759929563686168, LR: 0.0003 +[2026-03-05 19:12:42] (step=0070328) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.760125220113482, LR: 0.0003 +[2026-03-05 19:12:50] (step=0070329) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.760320876540794, LR: 0.0003 +[2026-03-05 19:12:58] (step=0070330) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.760516532968108, LR: 0.0003 +[2026-03-05 19:13:06] (step=0070331) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 13.760712189395422, LR: 0.0003 +[2026-03-05 19:13:14] (step=0070332) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.760907845822736, LR: 0.0003 +[2026-03-05 19:13:22] (step=0070333) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 13.761103502250048, LR: 0.0003 +[2026-03-05 19:13:29] (step=0070334) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.761299158677362, LR: 0.0003 +[2026-03-05 19:13:37] (step=0070335) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.761494815104676, LR: 0.0003 +[2026-03-05 19:13:45] (step=0070336) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.76169047153199, LR: 0.0003 +[2026-03-05 19:13:53] (step=0070337) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.761886127959304, LR: 0.0003 +[2026-03-05 19:14:01] (step=0070338) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 13.762081784386616, LR: 0.0003 +[2026-03-05 19:14:09] (step=0070339) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.76227744081393, LR: 0.0003 +[2026-03-05 19:14:16] (step=0070340) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.762473097241244, LR: 0.0003 +[2026-03-05 19:14:24] (step=0070341) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.762668753668558, LR: 0.0003 +[2026-03-05 19:14:32] (step=0070342) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.762864410095872, LR: 0.0003 +[2026-03-05 19:14:40] (step=0070343) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 13.763060066523185, LR: 0.0003 +[2026-03-05 19:14:48] (step=0070344) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 13.763255722950499, LR: 0.0003 +[2026-03-05 19:14:56] (step=0070345) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 13.763451379377813, LR: 0.0003 +[2026-03-05 19:15:04] (step=0070346) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.763647035805127, LR: 0.0003 +[2026-03-05 19:15:11] (step=0070347) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.76384269223244, LR: 0.0003 +[2026-03-05 19:15:19] (step=0070348) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.764038348659753, LR: 0.0003 +[2026-03-05 19:15:27] (step=0070349) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.764234005087067, LR: 0.0003 +[2026-03-05 19:15:35] (step=0070350) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.76442966151438, LR: 0.0003 +[2026-03-05 19:15:43] (step=0070351) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.764625317941695, LR: 0.0003 +[2026-03-05 19:15:51] (step=0070352) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 13.764820974369009, LR: 0.0003 +[2026-03-05 19:15:58] (step=0070353) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.765016630796321, LR: 0.0003 +[2026-03-05 19:16:06] (step=0070354) Train Loss: 0.4338, Train Steps/Sec: 0.12, Epoch: 13.765212287223635, LR: 0.0003 +[2026-03-05 19:16:14] (step=0070355) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.765407943650949, LR: 0.0003 +[2026-03-05 19:16:22] (step=0070356) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.765603600078263, LR: 0.0003 +[2026-03-05 19:16:30] (step=0070357) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.765799256505575, LR: 0.0003 +[2026-03-05 19:16:38] (step=0070358) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.76599491293289, LR: 0.0003 +[2026-03-05 19:16:46] (step=0070359) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.766190569360203, LR: 0.0003 +[2026-03-05 19:16:54] (step=0070360) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.766386225787517, LR: 0.0003 +[2026-03-05 19:17:01] (step=0070361) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.766581882214831, LR: 0.0003 +[2026-03-05 19:17:09] (step=0070362) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.766777538642144, LR: 0.0003 +[2026-03-05 19:17:17] (step=0070363) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.766973195069458, LR: 0.0003 +[2026-03-05 19:17:25] (step=0070364) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.767168851496772, LR: 0.0003 +[2026-03-05 19:17:33] (step=0070365) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.767364507924086, LR: 0.0003 +[2026-03-05 19:17:41] (step=0070366) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.7675601643514, LR: 0.0003 +[2026-03-05 19:17:48] (step=0070367) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.767755820778712, LR: 0.0003 +[2026-03-05 19:17:56] (step=0070368) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.767951477206026, LR: 0.0003 +[2026-03-05 19:18:04] (step=0070369) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 13.76814713363334, LR: 0.0003 +[2026-03-05 19:18:12] (step=0070370) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.768342790060654, LR: 0.0003 +[2026-03-05 19:18:20] (step=0070371) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.768538446487968, LR: 0.0003 +[2026-03-05 19:18:28] (step=0070372) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.76873410291528, LR: 0.0003 +[2026-03-05 19:18:36] (step=0070373) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 13.768929759342594, LR: 0.0003 +[2026-03-05 19:18:43] (step=0070374) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.769125415769908, LR: 0.0003 +[2026-03-05 19:18:51] (step=0070375) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 13.769321072197222, LR: 0.0003 +[2026-03-05 19:18:59] (step=0070376) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.769516728624536, LR: 0.0003 +[2026-03-05 19:19:07] (step=0070377) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.769712385051848, LR: 0.0003 +[2026-03-05 19:19:15] (step=0070378) Train Loss: 0.4212, Train Steps/Sec: 0.13, Epoch: 13.769908041479162, LR: 0.0003 +[2026-03-05 19:19:23] (step=0070379) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.770103697906476, LR: 0.0003 +[2026-03-05 19:19:31] (step=0070380) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.77029935433379, LR: 0.0003 +[2026-03-05 19:19:38] (step=0070381) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.770495010761104, LR: 0.0003 +[2026-03-05 19:19:46] (step=0070382) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 13.770690667188417, LR: 0.0003 +[2026-03-05 19:19:54] (step=0070383) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 13.77088632361573, LR: 0.0003 +[2026-03-05 19:20:02] (step=0070384) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.771081980043045, LR: 0.0003 +[2026-03-05 19:20:10] (step=0070385) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 13.771277636470359, LR: 0.0003 +[2026-03-05 19:20:18] (step=0070386) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.77147329289767, LR: 0.0003 +[2026-03-05 19:20:26] (step=0070387) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.771668949324985, LR: 0.0003 +[2026-03-05 19:20:33] (step=0070388) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.771864605752299, LR: 0.0003 +[2026-03-05 19:20:41] (step=0070389) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.772060262179613, LR: 0.0003 +[2026-03-05 19:20:49] (step=0070390) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.772255918606927, LR: 0.0003 +[2026-03-05 19:20:57] (step=0070391) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.77245157503424, LR: 0.0003 +[2026-03-05 19:21:05] (step=0070392) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.772647231461553, LR: 0.0003 +[2026-03-05 19:21:13] (step=0070393) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 13.772842887888867, LR: 0.0003 +[2026-03-05 19:21:20] (step=0070394) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.773038544316181, LR: 0.0003 +[2026-03-05 19:21:28] (step=0070395) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 13.773234200743495, LR: 0.0003 +[2026-03-05 19:21:36] (step=0070396) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.773429857170807, LR: 0.0003 +[2026-03-05 19:21:44] (step=0070397) Train Loss: 0.4244, Train Steps/Sec: 0.13, Epoch: 13.773625513598121, LR: 0.0003 +[2026-03-05 19:21:52] (step=0070398) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.773821170025435, LR: 0.0003 +[2026-03-05 19:22:00] (step=0070399) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.77401682645275, LR: 0.0003 +[2026-03-05 19:22:08] (step=0070400) Train Loss: 0.4238, Train Steps/Sec: 0.13, Epoch: 13.774212482880063, LR: 0.0003 +[2026-03-05 19:22:15] (step=0070401) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.774408139307376, LR: 0.0003 +[2026-03-05 19:22:23] (step=0070402) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 13.77460379573469, LR: 0.0003 +[2026-03-05 19:22:31] (step=0070403) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.774799452162004, LR: 0.0003 +[2026-03-05 19:22:39] (step=0070404) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 13.774995108589318, LR: 0.0003 +[2026-03-05 19:22:47] (step=0070405) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.775190765016632, LR: 0.0003 +[2026-03-05 19:22:55] (step=0070406) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.775386421443944, LR: 0.0003 +[2026-03-05 19:23:03] (step=0070407) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.775582077871258, LR: 0.0003 +[2026-03-05 19:23:10] (step=0070408) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.775777734298572, LR: 0.0003 +[2026-03-05 19:23:18] (step=0070409) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.775973390725886, LR: 0.0003 +[2026-03-05 19:23:26] (step=0070410) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.776169047153198, LR: 0.0003 +[2026-03-05 19:23:34] (step=0070411) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.776364703580512, LR: 0.0003 +[2026-03-05 19:23:42] (step=0070412) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.776560360007826, LR: 0.0003 +[2026-03-05 19:23:50] (step=0070413) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.77675601643514, LR: 0.0003 +[2026-03-05 19:23:57] (step=0070414) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.776951672862454, LR: 0.0003 +[2026-03-05 19:24:05] (step=0070415) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.777147329289766, LR: 0.0003 +[2026-03-05 19:24:13] (step=0070416) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.77734298571708, LR: 0.0003 +[2026-03-05 19:24:21] (step=0070417) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 13.777538642144394, LR: 0.0003 +[2026-03-05 19:24:29] (step=0070418) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.777734298571708, LR: 0.0003 +[2026-03-05 19:24:37] (step=0070419) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 13.777929954999022, LR: 0.0003 +[2026-03-05 19:24:44] (step=0070420) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 13.778125611426335, LR: 0.0003 +[2026-03-05 19:24:52] (step=0070421) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.778321267853649, LR: 0.0003 +[2026-03-05 19:25:00] (step=0070422) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.778516924280963, LR: 0.0003 +[2026-03-05 19:25:08] (step=0070423) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.778712580708277, LR: 0.0003 +[2026-03-05 19:25:16] (step=0070424) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.77890823713559, LR: 0.0003 +[2026-03-05 19:25:24] (step=0070425) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.779103893562903, LR: 0.0003 +[2026-03-05 19:25:32] (step=0070426) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.779299549990217, LR: 0.0003 +[2026-03-05 19:25:39] (step=0070427) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.779495206417531, LR: 0.0003 +[2026-03-05 19:25:47] (step=0070428) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.779690862844845, LR: 0.0003 +[2026-03-05 19:25:55] (step=0070429) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.779886519272159, LR: 0.0003 +[2026-03-05 19:26:03] (step=0070430) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.780082175699471, LR: 0.0003 +[2026-03-05 19:26:11] (step=0070431) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.780277832126785, LR: 0.0003 +[2026-03-05 19:26:19] (step=0070432) Train Loss: 0.4603, Train Steps/Sec: 0.13, Epoch: 13.7804734885541, LR: 0.0003 +[2026-03-05 19:26:27] (step=0070433) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.780669144981413, LR: 0.0003 +[2026-03-05 19:26:34] (step=0070434) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.780864801408727, LR: 0.0003 +[2026-03-05 19:26:42] (step=0070435) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.78106045783604, LR: 0.0003 +[2026-03-05 19:26:50] (step=0070436) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.781256114263353, LR: 0.0003 +[2026-03-05 19:26:58] (step=0070437) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.781451770690667, LR: 0.0003 +[2026-03-05 19:27:06] (step=0070438) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.781647427117981, LR: 0.0003 +[2026-03-05 19:27:14] (step=0070439) Train Loss: 0.4248, Train Steps/Sec: 0.13, Epoch: 13.781843083545294, LR: 0.0003 +[2026-03-05 19:27:21] (step=0070440) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.782038739972608, LR: 0.0003 +[2026-03-05 19:27:29] (step=0070441) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.782234396399922, LR: 0.0003 +[2026-03-05 19:27:37] (step=0070442) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.782430052827236, LR: 0.0003 +[2026-03-05 19:27:45] (step=0070443) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.78262570925455, LR: 0.0003 +[2026-03-05 19:27:53] (step=0070444) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.782821365681862, LR: 0.0003 +[2026-03-05 19:28:01] (step=0070445) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 13.783017022109176, LR: 0.0003 +[2026-03-05 19:28:08] (step=0070446) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.78321267853649, LR: 0.0003 +[2026-03-05 19:28:16] (step=0070447) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.783408334963804, LR: 0.0003 +[2026-03-05 19:28:24] (step=0070448) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 13.783603991391118, LR: 0.0003 +[2026-03-05 19:28:32] (step=0070449) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.78379964781843, LR: 0.0003 +[2026-03-05 19:28:40] (step=0070450) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 13.783995304245744, LR: 0.0003 +[2026-03-05 19:28:48] (step=0070451) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.784190960673058, LR: 0.0003 +[2026-03-05 19:28:56] (step=0070452) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 13.784386617100372, LR: 0.0003 +[2026-03-05 19:29:04] (step=0070453) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 13.784582273527686, LR: 0.0003 +[2026-03-05 19:29:11] (step=0070454) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.784777929954998, LR: 0.0003 +[2026-03-05 19:29:19] (step=0070455) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.784973586382312, LR: 0.0003 +[2026-03-05 19:29:27] (step=0070456) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.785169242809626, LR: 0.0003 +[2026-03-05 19:29:35] (step=0070457) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.78536489923694, LR: 0.0003 +[2026-03-05 19:29:43] (step=0070458) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.785560555664254, LR: 0.0003 +[2026-03-05 19:29:51] (step=0070459) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.785756212091567, LR: 0.0003 +[2026-03-05 19:29:58] (step=0070460) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 13.78595186851888, LR: 0.0003 +[2026-03-05 19:30:06] (step=0070461) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 13.786147524946195, LR: 0.0003 +[2026-03-05 19:30:14] (step=0070462) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 13.786343181373509, LR: 0.0003 +[2026-03-05 19:30:22] (step=0070463) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.786538837800821, LR: 0.0003 +[2026-03-05 19:30:30] (step=0070464) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.786734494228135, LR: 0.0003 +[2026-03-05 19:30:38] (step=0070465) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 13.786930150655449, LR: 0.0003 +[2026-03-05 19:30:46] (step=0070466) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 13.787125807082763, LR: 0.0003 +[2026-03-05 19:30:53] (step=0070467) Train Loss: 0.4258, Train Steps/Sec: 0.13, Epoch: 13.787321463510077, LR: 0.0003 +[2026-03-05 19:31:01] (step=0070468) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 13.78751711993739, LR: 0.0003 +[2026-03-05 19:31:09] (step=0070469) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.787712776364703, LR: 0.0003 +[2026-03-05 19:31:17] (step=0070470) Train Loss: 0.4257, Train Steps/Sec: 0.13, Epoch: 13.787908432792017, LR: 0.0003 +[2026-03-05 19:31:25] (step=0070471) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 13.788104089219331, LR: 0.0003 +[2026-03-05 19:31:33] (step=0070472) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 13.788299745646645, LR: 0.0003 +[2026-03-05 19:31:40] (step=0070473) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 13.788495402073957, LR: 0.0003 +[2026-03-05 19:31:48] (step=0070474) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.788691058501271, LR: 0.0003 +[2026-03-05 19:31:56] (step=0070475) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.788886714928585, LR: 0.0003 +[2026-03-05 19:32:04] (step=0070476) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.7890823713559, LR: 0.0003 +[2026-03-05 19:32:12] (step=0070477) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.789278027783213, LR: 0.0003 +[2026-03-05 19:32:20] (step=0070478) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.789473684210526, LR: 0.0003 +[2026-03-05 19:32:28] (step=0070479) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 13.78966934063784, LR: 0.0003 +[2026-03-05 19:32:35] (step=0070480) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.789864997065154, LR: 0.0003 +[2026-03-05 19:32:43] (step=0070481) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.790060653492468, LR: 0.0003 +[2026-03-05 19:32:51] (step=0070482) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.790256309919782, LR: 0.0003 +[2026-03-05 19:32:59] (step=0070483) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.790451966347094, LR: 0.0003 +[2026-03-05 19:33:07] (step=0070484) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.790647622774408, LR: 0.0003 +[2026-03-05 19:33:15] (step=0070485) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 13.790843279201722, LR: 0.0003 +[2026-03-05 19:33:23] (step=0070486) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.791038935629036, LR: 0.0003 +[2026-03-05 19:33:30] (step=0070487) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 13.79123459205635, LR: 0.0003 +[2026-03-05 19:33:38] (step=0070488) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.791430248483662, LR: 0.0003 +[2026-03-05 19:33:46] (step=0070489) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.791625904910976, LR: 0.0003 +[2026-03-05 19:33:54] (step=0070490) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.79182156133829, LR: 0.0003 +[2026-03-05 19:34:02] (step=0070491) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.792017217765604, LR: 0.0003 +[2026-03-05 19:34:10] (step=0070492) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.792212874192916, LR: 0.0003 +[2026-03-05 19:34:17] (step=0070493) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.79240853062023, LR: 0.0003 +[2026-03-05 19:34:25] (step=0070494) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.792604187047544, LR: 0.0003 +[2026-03-05 19:34:33] (step=0070495) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 13.792799843474858, LR: 0.0003 +[2026-03-05 19:34:41] (step=0070496) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.792995499902172, LR: 0.0003 +[2026-03-05 19:34:49] (step=0070497) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.793191156329485, LR: 0.0003 +[2026-03-05 19:34:57] (step=0070498) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.793386812756799, LR: 0.0003 +[2026-03-05 19:35:04] (step=0070499) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.793582469184113, LR: 0.0003 +[2026-03-05 19:35:12] (step=0070500) Train Loss: 0.4278, Train Steps/Sec: 0.13, Epoch: 13.793778125611427, LR: 0.0003 +[2026-03-05 19:35:12] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0070500/ +[2026-03-05 19:35:20] (step=0070501) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 13.79397378203874, LR: 0.0003 +[2026-03-05 19:35:28] (step=0070502) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.794169438466053, LR: 0.0003 +[2026-03-05 19:35:36] (step=0070503) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.794365094893367, LR: 0.0003 +[2026-03-05 19:35:44] (step=0070504) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.794560751320681, LR: 0.0003 +[2026-03-05 19:35:52] (step=0070505) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.794756407747995, LR: 0.0003 +[2026-03-05 19:35:59] (step=0070506) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.794952064175309, LR: 0.0003 +[2026-03-05 19:36:07] (step=0070507) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.795147720602621, LR: 0.0003 +[2026-03-05 19:36:15] (step=0070508) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.795343377029935, LR: 0.0003 +[2026-03-05 19:36:23] (step=0070509) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 13.79553903345725, LR: 0.0003 +[2026-03-05 19:36:31] (step=0070510) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.795734689884563, LR: 0.0003 +[2026-03-05 19:36:39] (step=0070511) Train Loss: 0.4241, Train Steps/Sec: 0.13, Epoch: 13.795930346311877, LR: 0.0003 +[2026-03-05 19:36:47] (step=0070512) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.79612600273919, LR: 0.0003 +[2026-03-05 19:36:54] (step=0070513) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.796321659166503, LR: 0.0003 +[2026-03-05 19:37:02] (step=0070514) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 13.796517315593817, LR: 0.0003 +[2026-03-05 19:37:10] (step=0070515) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.796712972021131, LR: 0.0003 +[2026-03-05 19:37:18] (step=0070516) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.796908628448444, LR: 0.0003 +[2026-03-05 19:37:26] (step=0070517) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.797104284875758, LR: 0.0003 +[2026-03-05 19:37:34] (step=0070518) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.797299941303072, LR: 0.0003 +[2026-03-05 19:37:41] (step=0070519) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 13.797495597730386, LR: 0.0003 +[2026-03-05 19:37:49] (step=0070520) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.7976912541577, LR: 0.0003 +[2026-03-05 19:37:57] (step=0070521) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.797886910585012, LR: 0.0003 +[2026-03-05 19:38:05] (step=0070522) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 13.798082567012326, LR: 0.0003 +[2026-03-05 19:38:13] (step=0070523) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.79827822343964, LR: 0.0003 +[2026-03-05 19:38:21] (step=0070524) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.798473879866954, LR: 0.0003 +[2026-03-05 19:38:29] (step=0070525) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.798669536294268, LR: 0.0003 +[2026-03-05 19:38:37] (step=0070526) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.79886519272158, LR: 0.0003 +[2026-03-05 19:38:44] (step=0070527) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 13.799060849148894, LR: 0.0003 +[2026-03-05 19:38:52] (step=0070528) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.799256505576208, LR: 0.0003 +[2026-03-05 19:39:00] (step=0070529) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.799452162003522, LR: 0.0003 +[2026-03-05 19:39:08] (step=0070530) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.799647818430836, LR: 0.0003 +[2026-03-05 19:39:16] (step=0070531) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.799843474858148, LR: 0.0003 +[2026-03-05 19:39:24] (step=0070532) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 13.800039131285462, LR: 0.0003 +[2026-03-05 19:39:31] (step=0070533) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.800234787712776, LR: 0.0003 +[2026-03-05 19:39:39] (step=0070534) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.80043044414009, LR: 0.0003 +[2026-03-05 19:39:47] (step=0070535) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.800626100567404, LR: 0.0003 +[2026-03-05 19:39:55] (step=0070536) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.800821756994717, LR: 0.0003 +[2026-03-05 19:40:03] (step=0070537) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.80101741342203, LR: 0.0003 +[2026-03-05 19:40:11] (step=0070538) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.801213069849345, LR: 0.0003 +[2026-03-05 19:40:18] (step=0070539) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.801408726276659, LR: 0.0003 +[2026-03-05 19:40:26] (step=0070540) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.801604382703971, LR: 0.0003 +[2026-03-05 19:40:34] (step=0070541) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 13.801800039131285, LR: 0.0003 +[2026-03-05 19:40:42] (step=0070542) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.801995695558599, LR: 0.0003 +[2026-03-05 19:40:50] (step=0070543) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.802191351985913, LR: 0.0003 +[2026-03-05 19:40:58] (step=0070544) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.802387008413227, LR: 0.0003 +[2026-03-05 19:41:05] (step=0070545) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.80258266484054, LR: 0.0003 +[2026-03-05 19:41:13] (step=0070546) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.802778321267853, LR: 0.0003 +[2026-03-05 19:41:21] (step=0070547) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 13.802973977695167, LR: 0.0003 +[2026-03-05 19:41:29] (step=0070548) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.803169634122481, LR: 0.0003 +[2026-03-05 19:41:37] (step=0070549) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 13.803365290549795, LR: 0.0003 +[2026-03-05 19:41:45] (step=0070550) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 13.803560946977107, LR: 0.0003 +[2026-03-05 19:41:52] (step=0070551) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 13.803756603404421, LR: 0.0003 +[2026-03-05 19:42:00] (step=0070552) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.803952259831735, LR: 0.0003 +[2026-03-05 19:42:08] (step=0070553) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.80414791625905, LR: 0.0003 +[2026-03-05 19:42:16] (step=0070554) Train Loss: 0.4334, Train Steps/Sec: 0.12, Epoch: 13.804343572686363, LR: 0.0003 +[2026-03-05 19:42:24] (step=0070555) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.804539229113676, LR: 0.0003 +[2026-03-05 19:42:32] (step=0070556) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 13.80473488554099, LR: 0.0003 +[2026-03-05 19:42:40] (step=0070557) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 13.804930541968304, LR: 0.0003 +[2026-03-05 19:42:48] (step=0070558) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.805126198395618, LR: 0.0003 +[2026-03-05 19:42:55] (step=0070559) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.805321854822932, LR: 0.0003 +[2026-03-05 19:43:03] (step=0070560) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.805517511250244, LR: 0.0003 +[2026-03-05 19:43:11] (step=0070561) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.805713167677558, LR: 0.0003 +[2026-03-05 19:43:19] (step=0070562) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.805908824104872, LR: 0.0003 +[2026-03-05 19:43:27] (step=0070563) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.806104480532186, LR: 0.0003 +[2026-03-05 19:43:35] (step=0070564) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.8063001369595, LR: 0.0003 +[2026-03-05 19:43:42] (step=0070565) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.806495793386812, LR: 0.0003 +[2026-03-05 19:43:50] (step=0070566) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.806691449814126, LR: 0.0003 +[2026-03-05 19:43:58] (step=0070567) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.80688710624144, LR: 0.0003 +[2026-03-05 19:44:06] (step=0070568) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.807082762668754, LR: 0.0003 +[2026-03-05 19:44:14] (step=0070569) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.807278419096066, LR: 0.0003 +[2026-03-05 19:44:22] (step=0070570) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.80747407552338, LR: 0.0003 +[2026-03-05 19:44:30] (step=0070571) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.807669731950694, LR: 0.0003 +[2026-03-05 19:44:38] (step=0070572) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.807865388378008, LR: 0.0003 +[2026-03-05 19:44:45] (step=0070573) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.808061044805322, LR: 0.0003 +[2026-03-05 19:44:53] (step=0070574) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.808256701232635, LR: 0.0003 +[2026-03-05 19:45:01] (step=0070575) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.808452357659949, LR: 0.0003 +[2026-03-05 19:45:09] (step=0070576) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.808648014087263, LR: 0.0003 +[2026-03-05 19:45:17] (step=0070577) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.808843670514577, LR: 0.0003 +[2026-03-05 19:45:25] (step=0070578) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.80903932694189, LR: 0.0003 +[2026-03-05 19:45:32] (step=0070579) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.809234983369203, LR: 0.0003 +[2026-03-05 19:45:40] (step=0070580) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.809430639796517, LR: 0.0003 +[2026-03-05 19:45:48] (step=0070581) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.809626296223831, LR: 0.0003 +[2026-03-05 19:45:56] (step=0070582) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.809821952651145, LR: 0.0003 +[2026-03-05 19:46:04] (step=0070583) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.810017609078459, LR: 0.0003 +[2026-03-05 19:46:12] (step=0070584) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 13.810213265505771, LR: 0.0003 +[2026-03-05 19:46:19] (step=0070585) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.810408921933085, LR: 0.0003 +[2026-03-05 19:46:27] (step=0070586) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.8106045783604, LR: 0.0003 +[2026-03-05 19:46:35] (step=0070587) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.810800234787713, LR: 0.0003 +[2026-03-05 19:46:43] (step=0070588) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.810995891215027, LR: 0.0003 +[2026-03-05 19:46:51] (step=0070589) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 13.81119154764234, LR: 0.0003 +[2026-03-05 19:46:59] (step=0070590) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.811387204069653, LR: 0.0003 +[2026-03-05 19:47:07] (step=0070591) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.811582860496967, LR: 0.0003 +[2026-03-05 19:47:14] (step=0070592) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.811778516924281, LR: 0.0003 +[2026-03-05 19:47:22] (step=0070593) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.811974173351594, LR: 0.0003 +[2026-03-05 19:47:30] (step=0070594) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.812169829778908, LR: 0.0003 +[2026-03-05 19:47:38] (step=0070595) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.812365486206222, LR: 0.0003 +[2026-03-05 19:47:46] (step=0070596) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.812561142633536, LR: 0.0003 +[2026-03-05 19:47:54] (step=0070597) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.81275679906085, LR: 0.0003 +[2026-03-05 19:48:02] (step=0070598) Train Loss: 0.4462, Train Steps/Sec: 0.12, Epoch: 13.812952455488162, LR: 0.0003 +[2026-03-05 19:48:09] (step=0070599) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 13.813148111915476, LR: 0.0003 +[2026-03-05 19:48:17] (step=0070600) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.81334376834279, LR: 0.0003 +[2026-03-05 19:48:25] (step=0070601) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 13.813539424770104, LR: 0.0003 +[2026-03-05 19:48:33] (step=0070602) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 13.813735081197418, LR: 0.0003 +[2026-03-05 19:48:41] (step=0070603) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.81393073762473, LR: 0.0003 +[2026-03-05 19:48:49] (step=0070604) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.814126394052044, LR: 0.0003 +[2026-03-05 19:48:57] (step=0070605) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.814322050479358, LR: 0.0003 +[2026-03-05 19:49:04] (step=0070606) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.814517706906672, LR: 0.0003 +[2026-03-05 19:49:12] (step=0070607) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.814713363333986, LR: 0.0003 +[2026-03-05 19:49:20] (step=0070608) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.814909019761298, LR: 0.0003 +[2026-03-05 19:49:28] (step=0070609) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.815104676188612, LR: 0.0003 +[2026-03-05 19:49:36] (step=0070610) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.815300332615926, LR: 0.0003 +[2026-03-05 19:49:44] (step=0070611) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.81549598904324, LR: 0.0003 +[2026-03-05 19:49:51] (step=0070612) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.815691645470555, LR: 0.0003 +[2026-03-05 19:49:59] (step=0070613) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 13.815887301897867, LR: 0.0003 +[2026-03-05 19:50:07] (step=0070614) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 13.81608295832518, LR: 0.0003 +[2026-03-05 19:50:15] (step=0070615) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.816278614752495, LR: 0.0003 +[2026-03-05 19:50:23] (step=0070616) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.816474271179809, LR: 0.0003 +[2026-03-05 19:50:31] (step=0070617) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 13.816669927607123, LR: 0.0003 +[2026-03-05 19:50:38] (step=0070618) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 13.816865584034435, LR: 0.0003 +[2026-03-05 19:50:46] (step=0070619) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.817061240461749, LR: 0.0003 +[2026-03-05 19:50:54] (step=0070620) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.817256896889063, LR: 0.0003 +[2026-03-05 19:51:02] (step=0070621) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.817452553316377, LR: 0.0003 +[2026-03-05 19:51:10] (step=0070622) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 13.81764820974369, LR: 0.0003 +[2026-03-05 19:51:18] (step=0070623) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.817843866171003, LR: 0.0003 +[2026-03-05 19:51:26] (step=0070624) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.818039522598317, LR: 0.0003 +[2026-03-05 19:51:34] (step=0070625) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 13.818235179025631, LR: 0.0003 +[2026-03-05 19:51:41] (step=0070626) Train Loss: 0.4238, Train Steps/Sec: 0.13, Epoch: 13.818430835452945, LR: 0.0003 +[2026-03-05 19:51:49] (step=0070627) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 13.818626491880257, LR: 0.0003 +[2026-03-05 19:51:57] (step=0070628) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.818822148307571, LR: 0.0003 +[2026-03-05 19:52:05] (step=0070629) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.819017804734886, LR: 0.0003 +[2026-03-05 19:52:13] (step=0070630) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.8192134611622, LR: 0.0003 +[2026-03-05 19:52:21] (step=0070631) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.819409117589514, LR: 0.0003 +[2026-03-05 19:52:28] (step=0070632) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.819604774016826, LR: 0.0003 +[2026-03-05 19:52:36] (step=0070633) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.81980043044414, LR: 0.0003 +[2026-03-05 19:52:44] (step=0070634) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.819996086871454, LR: 0.0003 +[2026-03-05 19:52:52] (step=0070635) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.820191743298768, LR: 0.0003 +[2026-03-05 19:53:00] (step=0070636) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.820387399726082, LR: 0.0003 +[2026-03-05 19:53:08] (step=0070637) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.820583056153394, LR: 0.0003 +[2026-03-05 19:53:15] (step=0070638) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 13.820778712580708, LR: 0.0003 +[2026-03-05 19:53:23] (step=0070639) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.820974369008022, LR: 0.0003 +[2026-03-05 19:53:31] (step=0070640) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 13.821170025435336, LR: 0.0003 +[2026-03-05 19:53:39] (step=0070641) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.82136568186265, LR: 0.0003 +[2026-03-05 19:53:47] (step=0070642) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.821561338289962, LR: 0.0003 +[2026-03-05 19:53:55] (step=0070643) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.821756994717276, LR: 0.0003 +[2026-03-05 19:54:02] (step=0070644) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.82195265114459, LR: 0.0003 +[2026-03-05 19:54:10] (step=0070645) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 13.822148307571904, LR: 0.0003 +[2026-03-05 19:54:18] (step=0070646) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.822343963999216, LR: 0.0003 +[2026-03-05 19:54:26] (step=0070647) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.82253962042653, LR: 0.0003 +[2026-03-05 19:54:34] (step=0070648) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.822735276853845, LR: 0.0003 +[2026-03-05 19:54:42] (step=0070649) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.822930933281159, LR: 0.0003 +[2026-03-05 19:54:50] (step=0070650) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.823126589708473, LR: 0.0003 +[2026-03-05 19:54:58] (step=0070651) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.823322246135785, LR: 0.0003 +[2026-03-05 19:55:05] (step=0070652) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 13.823517902563099, LR: 0.0003 +[2026-03-05 19:55:13] (step=0070653) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.823713558990413, LR: 0.0003 +[2026-03-05 19:55:21] (step=0070654) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 13.823909215417727, LR: 0.0003 +[2026-03-05 19:55:29] (step=0070655) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.82410487184504, LR: 0.0003 +[2026-03-05 19:55:37] (step=0070656) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.824300528272353, LR: 0.0003 +[2026-03-05 19:55:45] (step=0070657) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.824496184699667, LR: 0.0003 +[2026-03-05 19:55:53] (step=0070658) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 13.824691841126981, LR: 0.0003 +[2026-03-05 19:56:00] (step=0070659) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.824887497554295, LR: 0.0003 +[2026-03-05 19:56:08] (step=0070660) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.825083153981609, LR: 0.0003 +[2026-03-05 19:56:16] (step=0070661) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.825278810408921, LR: 0.0003 +[2026-03-05 19:56:24] (step=0070662) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.825474466836235, LR: 0.0003 +[2026-03-05 19:56:32] (step=0070663) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.82567012326355, LR: 0.0003 +[2026-03-05 19:56:40] (step=0070664) Train Loss: 0.4577, Train Steps/Sec: 0.13, Epoch: 13.825865779690863, LR: 0.0003 +[2026-03-05 19:56:47] (step=0070665) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 13.826061436118177, LR: 0.0003 +[2026-03-05 19:56:55] (step=0070666) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.82625709254549, LR: 0.0003 +[2026-03-05 19:57:03] (step=0070667) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.826452748972804, LR: 0.0003 +[2026-03-05 19:57:11] (step=0070668) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.826648405400118, LR: 0.0003 +[2026-03-05 19:57:19] (step=0070669) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.826844061827432, LR: 0.0003 +[2026-03-05 19:57:27] (step=0070670) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 13.827039718254746, LR: 0.0003 +[2026-03-05 19:57:35] (step=0070671) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.827235374682058, LR: 0.0003 +[2026-03-05 19:57:43] (step=0070672) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.827431031109372, LR: 0.0003 +[2026-03-05 19:57:50] (step=0070673) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.827626687536686, LR: 0.0003 +[2026-03-05 19:57:58] (step=0070674) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.827822343964, LR: 0.0003 +[2026-03-05 19:58:06] (step=0070675) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.828018000391312, LR: 0.0003 +[2026-03-05 19:58:14] (step=0070676) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.828213656818626, LR: 0.0003 +[2026-03-05 19:58:22] (step=0070677) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.82840931324594, LR: 0.0003 +[2026-03-05 19:58:30] (step=0070678) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.828604969673254, LR: 0.0003 +[2026-03-05 19:58:37] (step=0070679) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 13.828800626100568, LR: 0.0003 +[2026-03-05 19:58:45] (step=0070680) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.82899628252788, LR: 0.0003 +[2026-03-05 19:58:53] (step=0070681) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.829191938955194, LR: 0.0003 +[2026-03-05 19:59:01] (step=0070682) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.829387595382508, LR: 0.0003 +[2026-03-05 19:59:09] (step=0070683) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 13.829583251809822, LR: 0.0003 +[2026-03-05 19:59:17] (step=0070684) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.829778908237136, LR: 0.0003 +[2026-03-05 19:59:24] (step=0070685) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.829974564664449, LR: 0.0003 +[2026-03-05 19:59:32] (step=0070686) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 13.830170221091763, LR: 0.0003 +[2026-03-05 19:59:40] (step=0070687) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 13.830365877519077, LR: 0.0003 +[2026-03-05 19:59:48] (step=0070688) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 13.83056153394639, LR: 0.0003 +[2026-03-05 19:59:56] (step=0070689) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.830757190373705, LR: 0.0003 +[2026-03-05 20:00:04] (step=0070690) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 13.830952846801017, LR: 0.0003 +[2026-03-05 20:00:11] (step=0070691) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.83114850322833, LR: 0.0003 +[2026-03-05 20:00:19] (step=0070692) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.831344159655645, LR: 0.0003 +[2026-03-05 20:00:27] (step=0070693) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.831539816082959, LR: 0.0003 +[2026-03-05 20:00:35] (step=0070694) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.831735472510273, LR: 0.0003 +[2026-03-05 20:00:43] (step=0070695) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 13.831931128937585, LR: 0.0003 +[2026-03-05 20:00:51] (step=0070696) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.832126785364899, LR: 0.0003 +[2026-03-05 20:00:59] (step=0070697) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.832322441792213, LR: 0.0003 +[2026-03-05 20:01:06] (step=0070698) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.832518098219527, LR: 0.0003 +[2026-03-05 20:01:14] (step=0070699) Train Loss: 0.4362, Train Steps/Sec: 0.12, Epoch: 13.83271375464684, LR: 0.0003 +[2026-03-05 20:01:22] (step=0070700) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.832909411074153, LR: 0.0003 +[2026-03-05 20:01:30] (step=0070701) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.833105067501467, LR: 0.0003 +[2026-03-05 20:01:38] (step=0070702) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.833300723928781, LR: 0.0003 +[2026-03-05 20:01:46] (step=0070703) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.833496380356095, LR: 0.0003 +[2026-03-05 20:01:54] (step=0070704) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.833692036783408, LR: 0.0003 +[2026-03-05 20:02:01] (step=0070705) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.833887693210722, LR: 0.0003 +[2026-03-05 20:02:09] (step=0070706) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.834083349638036, LR: 0.0003 +[2026-03-05 20:02:17] (step=0070707) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 13.83427900606535, LR: 0.0003 +[2026-03-05 20:02:25] (step=0070708) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.834474662492664, LR: 0.0003 +[2026-03-05 20:02:33] (step=0070709) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.834670318919976, LR: 0.0003 +[2026-03-05 20:02:41] (step=0070710) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.83486597534729, LR: 0.0003 +[2026-03-05 20:02:49] (step=0070711) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 13.835061631774604, LR: 0.0003 +[2026-03-05 20:02:56] (step=0070712) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.835257288201918, LR: 0.0003 +[2026-03-05 20:03:04] (step=0070713) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.835452944629232, LR: 0.0003 +[2026-03-05 20:03:12] (step=0070714) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.835648601056544, LR: 0.0003 +[2026-03-05 20:03:20] (step=0070715) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.835844257483858, LR: 0.0003 +[2026-03-05 20:03:28] (step=0070716) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 13.836039913911172, LR: 0.0003 +[2026-03-05 20:03:36] (step=0070717) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.836235570338486, LR: 0.0003 +[2026-03-05 20:03:44] (step=0070718) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 13.8364312267658, LR: 0.0003 +[2026-03-05 20:03:51] (step=0070719) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.836626883193112, LR: 0.0003 +[2026-03-05 20:03:59] (step=0070720) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.836822539620426, LR: 0.0003 +[2026-03-05 20:04:07] (step=0070721) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.83701819604774, LR: 0.0003 +[2026-03-05 20:04:15] (step=0070722) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 13.837213852475054, LR: 0.0003 +[2026-03-05 20:04:23] (step=0070723) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 13.837409508902368, LR: 0.0003 +[2026-03-05 20:04:31] (step=0070724) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.83760516532968, LR: 0.0003 +[2026-03-05 20:04:38] (step=0070725) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 13.837800821756995, LR: 0.0003 +[2026-03-05 20:04:46] (step=0070726) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 13.837996478184309, LR: 0.0003 +[2026-03-05 20:04:54] (step=0070727) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.838192134611623, LR: 0.0003 +[2026-03-05 20:05:02] (step=0070728) Train Loss: 0.4273, Train Steps/Sec: 0.13, Epoch: 13.838387791038935, LR: 0.0003 +[2026-03-05 20:05:10] (step=0070729) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.838583447466249, LR: 0.0003 +[2026-03-05 20:05:18] (step=0070730) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.838779103893563, LR: 0.0003 +[2026-03-05 20:05:25] (step=0070731) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.838974760320877, LR: 0.0003 +[2026-03-05 20:05:33] (step=0070732) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.83917041674819, LR: 0.0003 +[2026-03-05 20:05:41] (step=0070733) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.839366073175503, LR: 0.0003 +[2026-03-05 20:05:49] (step=0070734) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.839561729602817, LR: 0.0003 +[2026-03-05 20:05:57] (step=0070735) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 13.839757386030131, LR: 0.0003 +[2026-03-05 20:06:05] (step=0070736) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.839953042457445, LR: 0.0003 +[2026-03-05 20:06:13] (step=0070737) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.840148698884759, LR: 0.0003 +[2026-03-05 20:06:20] (step=0070738) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.840344355312071, LR: 0.0003 +[2026-03-05 20:06:28] (step=0070739) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.840540011739385, LR: 0.0003 +[2026-03-05 20:06:36] (step=0070740) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.8407356681667, LR: 0.0003 +[2026-03-05 20:06:44] (step=0070741) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.840931324594013, LR: 0.0003 +[2026-03-05 20:06:52] (step=0070742) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.841126981021327, LR: 0.0003 +[2026-03-05 20:07:00] (step=0070743) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.84132263744864, LR: 0.0003 +[2026-03-05 20:07:08] (step=0070744) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.841518293875954, LR: 0.0003 +[2026-03-05 20:07:15] (step=0070745) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.841713950303268, LR: 0.0003 +[2026-03-05 20:07:23] (step=0070746) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.841909606730582, LR: 0.0003 +[2026-03-05 20:07:31] (step=0070747) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.842105263157896, LR: 0.0003 +[2026-03-05 20:07:39] (step=0070748) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 13.842300919585208, LR: 0.0003 +[2026-03-05 20:07:47] (step=0070749) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.842496576012522, LR: 0.0003 +[2026-03-05 20:07:55] (step=0070750) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.842692232439836, LR: 0.0003 +[2026-03-05 20:08:03] (step=0070751) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.84288788886715, LR: 0.0003 +[2026-03-05 20:08:10] (step=0070752) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 13.843083545294462, LR: 0.0003 +[2026-03-05 20:08:18] (step=0070753) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 13.843279201721776, LR: 0.0003 +[2026-03-05 20:08:26] (step=0070754) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.84347485814909, LR: 0.0003 +[2026-03-05 20:08:34] (step=0070755) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 13.843670514576404, LR: 0.0003 +[2026-03-05 20:08:42] (step=0070756) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.843866171003718, LR: 0.0003 +[2026-03-05 20:08:50] (step=0070757) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.84406182743103, LR: 0.0003 +[2026-03-05 20:08:58] (step=0070758) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.844257483858344, LR: 0.0003 +[2026-03-05 20:09:05] (step=0070759) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.844453140285658, LR: 0.0003 +[2026-03-05 20:09:13] (step=0070760) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 13.844648796712972, LR: 0.0003 +[2026-03-05 20:09:21] (step=0070761) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.844844453140286, LR: 0.0003 +[2026-03-05 20:09:29] (step=0070762) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.845040109567599, LR: 0.0003 +[2026-03-05 20:09:37] (step=0070763) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.845235765994913, LR: 0.0003 +[2026-03-05 20:09:45] (step=0070764) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.845431422422227, LR: 0.0003 +[2026-03-05 20:09:52] (step=0070765) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 13.84562707884954, LR: 0.0003 +[2026-03-05 20:10:00] (step=0070766) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 13.845822735276855, LR: 0.0003 +[2026-03-05 20:10:08] (step=0070767) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.846018391704167, LR: 0.0003 +[2026-03-05 20:10:16] (step=0070768) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.84621404813148, LR: 0.0003 +[2026-03-05 20:10:24] (step=0070769) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.846409704558795, LR: 0.0003 +[2026-03-05 20:10:32] (step=0070770) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.846605360986109, LR: 0.0003 +[2026-03-05 20:10:40] (step=0070771) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.846801017413423, LR: 0.0003 +[2026-03-05 20:10:48] (step=0070772) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.846996673840735, LR: 0.0003 +[2026-03-05 20:10:55] (step=0070773) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.847192330268049, LR: 0.0003 +[2026-03-05 20:11:03] (step=0070774) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.847387986695363, LR: 0.0003 +[2026-03-05 20:11:11] (step=0070775) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.847583643122677, LR: 0.0003 +[2026-03-05 20:11:19] (step=0070776) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.847779299549991, LR: 0.0003 +[2026-03-05 20:11:27] (step=0070777) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.847974955977303, LR: 0.0003 +[2026-03-05 20:11:35] (step=0070778) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.848170612404617, LR: 0.0003 +[2026-03-05 20:11:42] (step=0070779) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.848366268831931, LR: 0.0003 +[2026-03-05 20:11:50] (step=0070780) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.848561925259245, LR: 0.0003 +[2026-03-05 20:11:58] (step=0070781) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.848757581686558, LR: 0.0003 +[2026-03-05 20:12:06] (step=0070782) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 13.848953238113872, LR: 0.0003 +[2026-03-05 20:12:14] (step=0070783) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.849148894541186, LR: 0.0003 +[2026-03-05 20:12:22] (step=0070784) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.8493445509685, LR: 0.0003 +[2026-03-05 20:12:30] (step=0070785) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.849540207395814, LR: 0.0003 +[2026-03-05 20:12:37] (step=0070786) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.849735863823126, LR: 0.0003 +[2026-03-05 20:12:45] (step=0070787) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.84993152025044, LR: 0.0003 +[2026-03-05 20:12:53] (step=0070788) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 13.850127176677754, LR: 0.0003 +[2026-03-05 20:13:01] (step=0070789) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.850322833105068, LR: 0.0003 +[2026-03-05 20:13:09] (step=0070790) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.850518489532382, LR: 0.0003 +[2026-03-05 20:13:17] (step=0070791) Train Loss: 0.4241, Train Steps/Sec: 0.13, Epoch: 13.850714145959694, LR: 0.0003 +[2026-03-05 20:13:24] (step=0070792) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.850909802387008, LR: 0.0003 +[2026-03-05 20:13:32] (step=0070793) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.851105458814322, LR: 0.0003 +[2026-03-05 20:13:40] (step=0070794) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 13.851301115241636, LR: 0.0003 +[2026-03-05 20:13:48] (step=0070795) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.85149677166895, LR: 0.0003 +[2026-03-05 20:13:56] (step=0070796) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.851692428096262, LR: 0.0003 +[2026-03-05 20:14:04] (step=0070797) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.851888084523576, LR: 0.0003 +[2026-03-05 20:14:12] (step=0070798) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 13.85208374095089, LR: 0.0003 +[2026-03-05 20:14:19] (step=0070799) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.852279397378204, LR: 0.0003 +[2026-03-05 20:14:27] (step=0070800) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.852475053805518, LR: 0.0003 +[2026-03-05 20:14:35] (step=0070801) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.85267071023283, LR: 0.0003 +[2026-03-05 20:14:43] (step=0070802) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.852866366660145, LR: 0.0003 +[2026-03-05 20:14:51] (step=0070803) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.853062023087459, LR: 0.0003 +[2026-03-05 20:14:59] (step=0070804) Train Loss: 0.4278, Train Steps/Sec: 0.13, Epoch: 13.853257679514773, LR: 0.0003 +[2026-03-05 20:15:06] (step=0070805) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.853453335942085, LR: 0.0003 +[2026-03-05 20:15:14] (step=0070806) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.853648992369399, LR: 0.0003 +[2026-03-05 20:15:22] (step=0070807) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 13.853844648796713, LR: 0.0003 +[2026-03-05 20:15:30] (step=0070808) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 13.854040305224027, LR: 0.0003 +[2026-03-05 20:15:38] (step=0070809) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.85423596165134, LR: 0.0003 +[2026-03-05 20:15:46] (step=0070810) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 13.854431618078653, LR: 0.0003 +[2026-03-05 20:15:53] (step=0070811) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.854627274505967, LR: 0.0003 +[2026-03-05 20:16:01] (step=0070812) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 13.854822930933281, LR: 0.0003 +[2026-03-05 20:16:09] (step=0070813) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.855018587360595, LR: 0.0003 +[2026-03-05 20:16:17] (step=0070814) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.85521424378791, LR: 0.0003 +[2026-03-05 20:16:25] (step=0070815) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 13.855409900215221, LR: 0.0003 +[2026-03-05 20:16:33] (step=0070816) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.855605556642535, LR: 0.0003 +[2026-03-05 20:16:41] (step=0070817) Train Loss: 0.4258, Train Steps/Sec: 0.13, Epoch: 13.85580121306985, LR: 0.0003 +[2026-03-05 20:16:48] (step=0070818) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.855996869497163, LR: 0.0003 +[2026-03-05 20:16:56] (step=0070819) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.856192525924477, LR: 0.0003 +[2026-03-05 20:17:04] (step=0070820) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.85638818235179, LR: 0.0003 +[2026-03-05 20:17:12] (step=0070821) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.856583838779104, LR: 0.0003 +[2026-03-05 20:17:20] (step=0070822) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.856779495206418, LR: 0.0003 +[2026-03-05 20:17:28] (step=0070823) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 13.856975151633732, LR: 0.0003 +[2026-03-05 20:17:36] (step=0070824) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.857170808061046, LR: 0.0003 +[2026-03-05 20:17:43] (step=0070825) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.857366464488358, LR: 0.0003 +[2026-03-05 20:17:51] (step=0070826) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.857562120915672, LR: 0.0003 +[2026-03-05 20:17:59] (step=0070827) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.857757777342986, LR: 0.0003 +[2026-03-05 20:18:07] (step=0070828) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.8579534337703, LR: 0.0003 +[2026-03-05 20:18:15] (step=0070829) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.858149090197614, LR: 0.0003 +[2026-03-05 20:18:23] (step=0070830) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 13.858344746624926, LR: 0.0003 +[2026-03-05 20:18:30] (step=0070831) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.85854040305224, LR: 0.0003 +[2026-03-05 20:18:38] (step=0070832) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.858736059479554, LR: 0.0003 +[2026-03-05 20:18:46] (step=0070833) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.858931715906868, LR: 0.0003 +[2026-03-05 20:18:54] (step=0070834) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 13.85912737233418, LR: 0.0003 +[2026-03-05 20:19:02] (step=0070835) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.859323028761494, LR: 0.0003 +[2026-03-05 20:19:10] (step=0070836) Train Loss: 0.4637, Train Steps/Sec: 0.13, Epoch: 13.859518685188808, LR: 0.0003 +[2026-03-05 20:19:18] (step=0070837) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.859714341616122, LR: 0.0003 +[2026-03-05 20:19:25] (step=0070838) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.859909998043436, LR: 0.0003 +[2026-03-05 20:19:33] (step=0070839) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.860105654470749, LR: 0.0003 +[2026-03-05 20:19:41] (step=0070840) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.860301310898063, LR: 0.0003 +[2026-03-05 20:19:49] (step=0070841) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.860496967325377, LR: 0.0003 +[2026-03-05 20:19:57] (step=0070842) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 13.86069262375269, LR: 0.0003 +[2026-03-05 20:20:05] (step=0070843) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.860888280180005, LR: 0.0003 +[2026-03-05 20:20:12] (step=0070844) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 13.861083936607317, LR: 0.0003 +[2026-03-05 20:20:20] (step=0070845) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 13.86127959303463, LR: 0.0003 +[2026-03-05 20:20:28] (step=0070846) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 13.861475249461945, LR: 0.0003 +[2026-03-05 20:20:36] (step=0070847) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.861670905889259, LR: 0.0003 +[2026-03-05 20:20:44] (step=0070848) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.861866562316573, LR: 0.0003 +[2026-03-05 20:20:52] (step=0070849) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.862062218743885, LR: 0.0003 +[2026-03-05 20:20:59] (step=0070850) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.8622578751712, LR: 0.0003 +[2026-03-05 20:21:07] (step=0070851) Train Loss: 0.4273, Train Steps/Sec: 0.13, Epoch: 13.862453531598513, LR: 0.0003 +[2026-03-05 20:21:15] (step=0070852) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.862649188025827, LR: 0.0003 +[2026-03-05 20:21:23] (step=0070853) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 13.862844844453141, LR: 0.0003 +[2026-03-05 20:21:31] (step=0070854) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 13.863040500880453, LR: 0.0003 +[2026-03-05 20:21:39] (step=0070855) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.863236157307767, LR: 0.0003 +[2026-03-05 20:21:47] (step=0070856) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 13.863431813735081, LR: 0.0003 +[2026-03-05 20:21:54] (step=0070857) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.863627470162395, LR: 0.0003 +[2026-03-05 20:22:02] (step=0070858) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.863823126589708, LR: 0.0003 +[2026-03-05 20:22:10] (step=0070859) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 13.864018783017022, LR: 0.0003 +[2026-03-05 20:22:18] (step=0070860) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.864214439444336, LR: 0.0003 +[2026-03-05 20:22:26] (step=0070861) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 13.86441009587165, LR: 0.0003 +[2026-03-05 20:22:34] (step=0070862) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 13.864605752298964, LR: 0.0003 +[2026-03-05 20:22:41] (step=0070863) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.864801408726276, LR: 0.0003 +[2026-03-05 20:22:49] (step=0070864) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.86499706515359, LR: 0.0003 +[2026-03-05 20:22:57] (step=0070865) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.865192721580904, LR: 0.0003 +[2026-03-05 20:23:05] (step=0070866) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.865388378008218, LR: 0.0003 +[2026-03-05 20:23:13] (step=0070867) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.865584034435532, LR: 0.0003 +[2026-03-05 20:23:21] (step=0070868) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.865779690862844, LR: 0.0003 +[2026-03-05 20:23:29] (step=0070869) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.865975347290158, LR: 0.0003 +[2026-03-05 20:23:37] (step=0070870) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 13.866171003717472, LR: 0.0003 +[2026-03-05 20:23:44] (step=0070871) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 13.866366660144786, LR: 0.0003 +[2026-03-05 20:23:52] (step=0070872) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.8665623165721, LR: 0.0003 +[2026-03-05 20:24:00] (step=0070873) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.866757972999412, LR: 0.0003 +[2026-03-05 20:24:08] (step=0070874) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.866953629426726, LR: 0.0003 +[2026-03-05 20:24:16] (step=0070875) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.86714928585404, LR: 0.0003 +[2026-03-05 20:24:24] (step=0070876) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.867344942281354, LR: 0.0003 +[2026-03-05 20:24:32] (step=0070877) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.867540598708668, LR: 0.0003 +[2026-03-05 20:24:39] (step=0070878) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 13.86773625513598, LR: 0.0003 +[2026-03-05 20:24:47] (step=0070879) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 13.867931911563295, LR: 0.0003 +[2026-03-05 20:24:55] (step=0070880) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.868127567990609, LR: 0.0003 +[2026-03-05 20:25:03] (step=0070881) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 13.868323224417923, LR: 0.0003 +[2026-03-05 20:25:11] (step=0070882) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 13.868518880845237, LR: 0.0003 +[2026-03-05 20:25:19] (step=0070883) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 13.868714537272549, LR: 0.0003 +[2026-03-05 20:25:27] (step=0070884) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 13.868910193699863, LR: 0.0003 +[2026-03-05 20:25:34] (step=0070885) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 13.869105850127177, LR: 0.0003 +[2026-03-05 20:25:42] (step=0070886) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.869301506554491, LR: 0.0003 +[2026-03-05 20:25:50] (step=0070887) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.869497162981803, LR: 0.0003 +[2026-03-05 20:25:58] (step=0070888) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 13.869692819409117, LR: 0.0003 +[2026-03-05 20:26:06] (step=0070889) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.869888475836431, LR: 0.0003 +[2026-03-05 20:26:14] (step=0070890) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 13.870084132263745, LR: 0.0003 +[2026-03-05 20:26:21] (step=0070891) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.87027978869106, LR: 0.0003 +[2026-03-05 20:26:29] (step=0070892) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.870475445118371, LR: 0.0003 +[2026-03-05 20:26:37] (step=0070893) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 13.870671101545685, LR: 0.0003 +[2026-03-05 20:26:45] (step=0070894) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.870866757973, LR: 0.0003 +[2026-03-05 20:26:53] (step=0070895) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.871062414400313, LR: 0.0003 +[2026-03-05 20:27:01] (step=0070896) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.871258070827627, LR: 0.0003 +[2026-03-05 20:27:08] (step=0070897) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.87145372725494, LR: 0.0003 +[2026-03-05 20:27:16] (step=0070898) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 13.871649383682254, LR: 0.0003 +[2026-03-05 20:27:24] (step=0070899) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.871845040109568, LR: 0.0003 +[2026-03-05 20:27:32] (step=0070900) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.872040696536882, LR: 0.0003 +[2026-03-05 20:27:40] (step=0070901) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 13.872236352964196, LR: 0.0003 +[2026-03-05 20:27:48] (step=0070902) Train Loss: 0.4425, Train Steps/Sec: 0.12, Epoch: 13.872432009391508, LR: 0.0003 +[2026-03-05 20:27:56] (step=0070903) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 13.872627665818822, LR: 0.0003 +[2026-03-05 20:28:04] (step=0070904) Train Loss: 0.4220, Train Steps/Sec: 0.13, Epoch: 13.872823322246136, LR: 0.0003 +[2026-03-05 20:28:11] (step=0070905) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.87301897867345, LR: 0.0003 +[2026-03-05 20:28:19] (step=0070906) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.873214635100764, LR: 0.0003 +[2026-03-05 20:28:27] (step=0070907) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.873410291528076, LR: 0.0003 +[2026-03-05 20:28:35] (step=0070908) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.87360594795539, LR: 0.0003 +[2026-03-05 20:28:43] (step=0070909) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 13.873801604382704, LR: 0.0003 +[2026-03-05 20:28:51] (step=0070910) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 13.873997260810018, LR: 0.0003 +[2026-03-05 20:28:59] (step=0070911) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.87419291723733, LR: 0.0003 +[2026-03-05 20:29:06] (step=0070912) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.874388573664644, LR: 0.0003 +[2026-03-05 20:29:14] (step=0070913) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.874584230091958, LR: 0.0003 +[2026-03-05 20:29:22] (step=0070914) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.874779886519272, LR: 0.0003 +[2026-03-05 20:29:30] (step=0070915) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.874975542946586, LR: 0.0003 +[2026-03-05 20:29:38] (step=0070916) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.875171199373899, LR: 0.0003 +[2026-03-05 20:29:46] (step=0070917) Train Loss: 0.4257, Train Steps/Sec: 0.13, Epoch: 13.875366855801213, LR: 0.0003 +[2026-03-05 20:29:54] (step=0070918) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.875562512228527, LR: 0.0003 +[2026-03-05 20:30:01] (step=0070919) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.87575816865584, LR: 0.0003 +[2026-03-05 20:30:09] (step=0070920) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.875953825083155, LR: 0.0003 +[2026-03-05 20:30:17] (step=0070921) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.876149481510467, LR: 0.0003 +[2026-03-05 20:30:25] (step=0070922) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.876345137937781, LR: 0.0003 +[2026-03-05 20:30:33] (step=0070923) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 13.876540794365095, LR: 0.0003 +[2026-03-05 20:30:41] (step=0070924) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 13.876736450792409, LR: 0.0003 +[2026-03-05 20:30:49] (step=0070925) Train Loss: 0.4664, Train Steps/Sec: 0.13, Epoch: 13.876932107219723, LR: 0.0003 +[2026-03-05 20:30:56] (step=0070926) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.877127763647035, LR: 0.0003 +[2026-03-05 20:31:04] (step=0070927) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.87732342007435, LR: 0.0003 +[2026-03-05 20:31:12] (step=0070928) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.877519076501663, LR: 0.0003 +[2026-03-05 20:31:20] (step=0070929) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.877714732928977, LR: 0.0003 +[2026-03-05 20:31:28] (step=0070930) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.877910389356291, LR: 0.0003 +[2026-03-05 20:31:36] (step=0070931) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 13.878106045783603, LR: 0.0003 +[2026-03-05 20:31:43] (step=0070932) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.878301702210917, LR: 0.0003 +[2026-03-05 20:31:51] (step=0070933) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.878497358638231, LR: 0.0003 +[2026-03-05 20:31:59] (step=0070934) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 13.878693015065545, LR: 0.0003 +[2026-03-05 20:32:07] (step=0070935) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.878888671492858, LR: 0.0003 +[2026-03-05 20:32:15] (step=0070936) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.879084327920172, LR: 0.0003 +[2026-03-05 20:32:23] (step=0070937) Train Loss: 0.4235, Train Steps/Sec: 0.13, Epoch: 13.879279984347486, LR: 0.0003 +[2026-03-05 20:32:30] (step=0070938) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.8794756407748, LR: 0.0003 +[2026-03-05 20:32:38] (step=0070939) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 13.879671297202114, LR: 0.0003 +[2026-03-05 20:32:46] (step=0070940) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 13.879866953629426, LR: 0.0003 +[2026-03-05 20:32:54] (step=0070941) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.88006261005674, LR: 0.0003 +[2026-03-05 20:33:02] (step=0070942) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.880258266484054, LR: 0.0003 +[2026-03-05 20:33:10] (step=0070943) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.880453922911368, LR: 0.0003 +[2026-03-05 20:33:17] (step=0070944) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.880649579338682, LR: 0.0003 +[2026-03-05 20:33:25] (step=0070945) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 13.880845235765994, LR: 0.0003 +[2026-03-05 20:33:33] (step=0070946) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.881040892193308, LR: 0.0003 +[2026-03-05 20:33:41] (step=0070947) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 13.881236548620622, LR: 0.0003 +[2026-03-05 20:33:49] (step=0070948) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 13.881432205047936, LR: 0.0003 +[2026-03-05 20:33:57] (step=0070949) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 13.88162786147525, LR: 0.0003 +[2026-03-05 20:34:05] (step=0070950) Train Loss: 0.4579, Train Steps/Sec: 0.12, Epoch: 13.881823517902562, LR: 0.0003 +[2026-03-05 20:34:12] (step=0070951) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.882019174329876, LR: 0.0003 +[2026-03-05 20:34:20] (step=0070952) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 13.88221483075719, LR: 0.0003 +[2026-03-05 20:34:28] (step=0070953) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.882410487184504, LR: 0.0003 +[2026-03-05 20:34:36] (step=0070954) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.882606143611818, LR: 0.0003 +[2026-03-05 20:34:44] (step=0070955) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.88280180003913, LR: 0.0003 +[2026-03-05 20:34:52] (step=0070956) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 13.882997456466445, LR: 0.0003 +[2026-03-05 20:35:00] (step=0070957) Train Loss: 0.4276, Train Steps/Sec: 0.13, Epoch: 13.883193112893759, LR: 0.0003 +[2026-03-05 20:35:07] (step=0070958) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.883388769321073, LR: 0.0003 +[2026-03-05 20:35:15] (step=0070959) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.883584425748387, LR: 0.0003 +[2026-03-05 20:35:23] (step=0070960) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 13.883780082175699, LR: 0.0003 +[2026-03-05 20:35:31] (step=0070961) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.883975738603013, LR: 0.0003 +[2026-03-05 20:35:39] (step=0070962) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 13.884171395030327, LR: 0.0003 +[2026-03-05 20:35:47] (step=0070963) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 13.884367051457641, LR: 0.0003 +[2026-03-05 20:35:54] (step=0070964) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.884562707884953, LR: 0.0003 +[2026-03-05 20:36:02] (step=0070965) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.884758364312267, LR: 0.0003 +[2026-03-05 20:36:10] (step=0070966) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 13.884954020739581, LR: 0.0003 +[2026-03-05 20:36:18] (step=0070967) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 13.885149677166895, LR: 0.0003 +[2026-03-05 20:36:26] (step=0070968) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.88534533359421, LR: 0.0003 +[2026-03-05 20:36:34] (step=0070969) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 13.885540990021521, LR: 0.0003 +[2026-03-05 20:36:42] (step=0070970) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.885736646448835, LR: 0.0003 +[2026-03-05 20:36:50] (step=0070971) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.88593230287615, LR: 0.0003 +[2026-03-05 20:36:57] (step=0070972) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.886127959303463, LR: 0.0003 +[2026-03-05 20:37:05] (step=0070973) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.886323615730777, LR: 0.0003 +[2026-03-05 20:37:13] (step=0070974) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.88651927215809, LR: 0.0003 +[2026-03-05 20:37:21] (step=0070975) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.886714928585404, LR: 0.0003 +[2026-03-05 20:37:29] (step=0070976) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 13.886910585012718, LR: 0.0003 +[2026-03-05 20:37:37] (step=0070977) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.887106241440032, LR: 0.0003 +[2026-03-05 20:37:44] (step=0070978) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.887301897867346, LR: 0.0003 +[2026-03-05 20:37:52] (step=0070979) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.887497554294658, LR: 0.0003 +[2026-03-05 20:38:00] (step=0070980) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.887693210721972, LR: 0.0003 +[2026-03-05 20:38:08] (step=0070981) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.887888867149286, LR: 0.0003 +[2026-03-05 20:38:16] (step=0070982) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.8880845235766, LR: 0.0003 +[2026-03-05 20:38:24] (step=0070983) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.888280180003914, LR: 0.0003 +[2026-03-05 20:38:32] (step=0070984) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.888475836431226, LR: 0.0003 +[2026-03-05 20:38:39] (step=0070985) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.88867149285854, LR: 0.0003 +[2026-03-05 20:38:47] (step=0070986) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.888867149285854, LR: 0.0003 +[2026-03-05 20:38:55] (step=0070987) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.889062805713168, LR: 0.0003 +[2026-03-05 20:39:03] (step=0070988) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.88925846214048, LR: 0.0003 +[2026-03-05 20:39:11] (step=0070989) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.889454118567794, LR: 0.0003 +[2026-03-05 20:39:19] (step=0070990) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.889649774995108, LR: 0.0003 +[2026-03-05 20:39:26] (step=0070991) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.889845431422422, LR: 0.0003 +[2026-03-05 20:39:34] (step=0070992) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.890041087849736, LR: 0.0003 +[2026-03-05 20:39:42] (step=0070993) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 13.890236744277049, LR: 0.0003 +[2026-03-05 20:39:50] (step=0070994) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.890432400704363, LR: 0.0003 +[2026-03-05 20:39:58] (step=0070995) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.890628057131677, LR: 0.0003 +[2026-03-05 20:40:06] (step=0070996) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.89082371355899, LR: 0.0003 +[2026-03-05 20:40:14] (step=0070997) Train Loss: 0.4431, Train Steps/Sec: 0.12, Epoch: 13.891019369986305, LR: 0.0003 +[2026-03-05 20:40:22] (step=0070998) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.891215026413617, LR: 0.0003 +[2026-03-05 20:40:29] (step=0070999) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.891410682840931, LR: 0.0003 +[2026-03-05 20:40:37] (step=0071000) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 13.891606339268245, LR: 0.0003 +[2026-03-05 20:40:37] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0071000/ +[2026-03-05 20:40:45] (step=0071001) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.891801995695559, LR: 0.0003 +[2026-03-05 20:40:53] (step=0071002) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 13.891997652122873, LR: 0.0003 +[2026-03-05 20:41:01] (step=0071003) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.892193308550185, LR: 0.0003 +[2026-03-05 20:41:09] (step=0071004) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.8923889649775, LR: 0.0003 +[2026-03-05 20:41:17] (step=0071005) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 13.892584621404813, LR: 0.0003 +[2026-03-05 20:41:24] (step=0071006) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 13.892780277832127, LR: 0.0003 +[2026-03-05 20:41:32] (step=0071007) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.892975934259441, LR: 0.0003 +[2026-03-05 20:41:40] (step=0071008) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.893171590686753, LR: 0.0003 +[2026-03-05 20:41:48] (step=0071009) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 13.893367247114067, LR: 0.0003 +[2026-03-05 20:41:56] (step=0071010) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 13.893562903541381, LR: 0.0003 +[2026-03-05 20:42:04] (step=0071011) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.893758559968695, LR: 0.0003 +[2026-03-05 20:42:12] (step=0071012) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 13.89395421639601, LR: 0.0003 +[2026-03-05 20:42:19] (step=0071013) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.894149872823322, LR: 0.0003 +[2026-03-05 20:42:27] (step=0071014) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.894345529250636, LR: 0.0003 +[2026-03-05 20:42:35] (step=0071015) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.89454118567795, LR: 0.0003 +[2026-03-05 20:42:43] (step=0071016) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.894736842105264, LR: 0.0003 +[2026-03-05 20:42:51] (step=0071017) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.894932498532576, LR: 0.0003 +[2026-03-05 20:42:59] (step=0071018) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.89512815495989, LR: 0.0003 +[2026-03-05 20:43:07] (step=0071019) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.895323811387204, LR: 0.0003 +[2026-03-05 20:43:14] (step=0071020) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.895519467814518, LR: 0.0003 +[2026-03-05 20:43:22] (step=0071021) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 13.895715124241832, LR: 0.0003 +[2026-03-05 20:43:30] (step=0071022) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.895910780669144, LR: 0.0003 +[2026-03-05 20:43:38] (step=0071023) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.896106437096458, LR: 0.0003 +[2026-03-05 20:43:46] (step=0071024) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 13.896302093523772, LR: 0.0003 +[2026-03-05 20:43:54] (step=0071025) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.896497749951086, LR: 0.0003 +[2026-03-05 20:44:01] (step=0071026) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 13.8966934063784, LR: 0.0003 +[2026-03-05 20:44:09] (step=0071027) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.896889062805712, LR: 0.0003 +[2026-03-05 20:44:17] (step=0071028) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.897084719233026, LR: 0.0003 +[2026-03-05 20:44:25] (step=0071029) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.89728037566034, LR: 0.0003 +[2026-03-05 20:44:33] (step=0071030) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.897476032087654, LR: 0.0003 +[2026-03-05 20:44:41] (step=0071031) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.897671688514968, LR: 0.0003 +[2026-03-05 20:44:48] (step=0071032) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.89786734494228, LR: 0.0003 +[2026-03-05 20:44:56] (step=0071033) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.898063001369595, LR: 0.0003 +[2026-03-05 20:45:04] (step=0071034) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.898258657796909, LR: 0.0003 +[2026-03-05 20:45:12] (step=0071035) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.898454314224223, LR: 0.0003 +[2026-03-05 20:45:20] (step=0071036) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.898649970651537, LR: 0.0003 +[2026-03-05 20:45:28] (step=0071037) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 13.898845627078849, LR: 0.0003 +[2026-03-05 20:45:36] (step=0071038) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.899041283506163, LR: 0.0003 +[2026-03-05 20:45:43] (step=0071039) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 13.899236939933477, LR: 0.0003 +[2026-03-05 20:45:51] (step=0071040) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.899432596360791, LR: 0.0003 +[2026-03-05 20:45:59] (step=0071041) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.899628252788103, LR: 0.0003 +[2026-03-05 20:46:07] (step=0071042) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 13.899823909215417, LR: 0.0003 +[2026-03-05 20:46:15] (step=0071043) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 13.900019565642731, LR: 0.0003 +[2026-03-05 20:46:23] (step=0071044) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 13.900215222070045, LR: 0.0003 +[2026-03-05 20:46:30] (step=0071045) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.90041087849736, LR: 0.0003 +[2026-03-05 20:46:38] (step=0071046) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.900606534924671, LR: 0.0003 +[2026-03-05 20:46:46] (step=0071047) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.900802191351985, LR: 0.0003 +[2026-03-05 20:46:54] (step=0071048) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.9009978477793, LR: 0.0003 +[2026-03-05 20:47:02] (step=0071049) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.901193504206613, LR: 0.0003 +[2026-03-05 20:47:10] (step=0071050) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 13.901389160633927, LR: 0.0003 +[2026-03-05 20:47:18] (step=0071051) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.90158481706124, LR: 0.0003 +[2026-03-05 20:47:25] (step=0071052) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 13.901780473488554, LR: 0.0003 +[2026-03-05 20:47:33] (step=0071053) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.901976129915868, LR: 0.0003 +[2026-03-05 20:47:41] (step=0071054) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.902171786343182, LR: 0.0003 +[2026-03-05 20:47:49] (step=0071055) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.902367442770496, LR: 0.0003 +[2026-03-05 20:47:57] (step=0071056) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.902563099197808, LR: 0.0003 +[2026-03-05 20:48:05] (step=0071057) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.902758755625122, LR: 0.0003 +[2026-03-05 20:48:12] (step=0071058) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.902954412052436, LR: 0.0003 +[2026-03-05 20:48:20] (step=0071059) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 13.90315006847975, LR: 0.0003 +[2026-03-05 20:48:28] (step=0071060) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.903345724907064, LR: 0.0003 +[2026-03-05 20:48:36] (step=0071061) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.903541381334376, LR: 0.0003 +[2026-03-05 20:48:44] (step=0071062) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.90373703776169, LR: 0.0003 +[2026-03-05 20:48:52] (step=0071063) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.903932694189004, LR: 0.0003 +[2026-03-05 20:48:59] (step=0071064) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.904128350616318, LR: 0.0003 +[2026-03-05 20:49:07] (step=0071065) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.904324007043632, LR: 0.0003 +[2026-03-05 20:49:15] (step=0071066) Train Loss: 0.4254, Train Steps/Sec: 0.13, Epoch: 13.904519663470944, LR: 0.0003 +[2026-03-05 20:49:23] (step=0071067) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.904715319898258, LR: 0.0003 +[2026-03-05 20:49:31] (step=0071068) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 13.904910976325572, LR: 0.0003 +[2026-03-05 20:49:39] (step=0071069) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 13.905106632752886, LR: 0.0003 +[2026-03-05 20:49:46] (step=0071070) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 13.905302289180199, LR: 0.0003 +[2026-03-05 20:49:54] (step=0071071) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.905497945607513, LR: 0.0003 +[2026-03-05 20:50:02] (step=0071072) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 13.905693602034827, LR: 0.0003 +[2026-03-05 20:50:10] (step=0071073) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 13.90588925846214, LR: 0.0003 +[2026-03-05 20:50:18] (step=0071074) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.906084914889455, LR: 0.0003 +[2026-03-05 20:50:26] (step=0071075) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.906280571316767, LR: 0.0003 +[2026-03-05 20:50:34] (step=0071076) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 13.906476227744081, LR: 0.0003 +[2026-03-05 20:50:41] (step=0071077) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.906671884171395, LR: 0.0003 +[2026-03-05 20:50:49] (step=0071078) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 13.906867540598709, LR: 0.0003 +[2026-03-05 20:50:57] (step=0071079) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.907063197026023, LR: 0.0003 +[2026-03-05 20:51:05] (step=0071080) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.907258853453335, LR: 0.0003 +[2026-03-05 20:51:13] (step=0071081) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.90745450988065, LR: 0.0003 +[2026-03-05 20:51:21] (step=0071082) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 13.907650166307963, LR: 0.0003 +[2026-03-05 20:51:28] (step=0071083) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.907845822735277, LR: 0.0003 +[2026-03-05 20:51:36] (step=0071084) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.908041479162591, LR: 0.0003 +[2026-03-05 20:51:44] (step=0071085) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 13.908237135589903, LR: 0.0003 +[2026-03-05 20:51:52] (step=0071086) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.908432792017217, LR: 0.0003 +[2026-03-05 20:52:00] (step=0071087) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 13.908628448444532, LR: 0.0003 +[2026-03-05 20:52:08] (step=0071088) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 13.908824104871846, LR: 0.0003 +[2026-03-05 20:52:15] (step=0071089) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.90901976129916, LR: 0.0003 +[2026-03-05 20:52:23] (step=0071090) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.909215417726472, LR: 0.0003 +[2026-03-05 20:52:31] (step=0071091) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.909411074153786, LR: 0.0003 +[2026-03-05 20:52:39] (step=0071092) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 13.9096067305811, LR: 0.0003 +[2026-03-05 20:52:47] (step=0071093) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 13.909802387008414, LR: 0.0003 +[2026-03-05 20:52:55] (step=0071094) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.909998043435726, LR: 0.0003 +[2026-03-05 20:53:03] (step=0071095) Train Loss: 0.4436, Train Steps/Sec: 0.12, Epoch: 13.91019369986304, LR: 0.0003 +[2026-03-05 20:53:11] (step=0071096) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 13.910389356290354, LR: 0.0003 +[2026-03-05 20:53:18] (step=0071097) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.910585012717668, LR: 0.0003 +[2026-03-05 20:53:26] (step=0071098) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.910780669144982, LR: 0.0003 +[2026-03-05 20:53:34] (step=0071099) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 13.910976325572294, LR: 0.0003 +[2026-03-05 20:53:42] (step=0071100) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.911171981999608, LR: 0.0003 +[2026-03-05 20:53:50] (step=0071101) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.911367638426922, LR: 0.0003 +[2026-03-05 20:53:58] (step=0071102) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.911563294854236, LR: 0.0003 +[2026-03-05 20:54:06] (step=0071103) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.91175895128155, LR: 0.0003 +[2026-03-05 20:54:13] (step=0071104) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.911954607708862, LR: 0.0003 +[2026-03-05 20:54:21] (step=0071105) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.912150264136177, LR: 0.0003 +[2026-03-05 20:54:29] (step=0071106) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 13.91234592056349, LR: 0.0003 +[2026-03-05 20:54:37] (step=0071107) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 13.912541576990805, LR: 0.0003 +[2026-03-05 20:54:45] (step=0071108) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.912737233418119, LR: 0.0003 +[2026-03-05 20:54:53] (step=0071109) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.91293288984543, LR: 0.0003 +[2026-03-05 20:55:00] (step=0071110) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 13.913128546272745, LR: 0.0003 +[2026-03-05 20:55:08] (step=0071111) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 13.913324202700059, LR: 0.0003 +[2026-03-05 20:55:16] (step=0071112) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 13.913519859127373, LR: 0.0003 +[2026-03-05 20:55:24] (step=0071113) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.913715515554687, LR: 0.0003 +[2026-03-05 20:55:32] (step=0071114) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.913911171981999, LR: 0.0003 +[2026-03-05 20:55:40] (step=0071115) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.914106828409313, LR: 0.0003 +[2026-03-05 20:55:47] (step=0071116) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.914302484836627, LR: 0.0003 +[2026-03-05 20:55:55] (step=0071117) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 13.914498141263941, LR: 0.0003 +[2026-03-05 20:56:03] (step=0071118) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.914693797691255, LR: 0.0003 +[2026-03-05 20:56:11] (step=0071119) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.914889454118567, LR: 0.0003 +[2026-03-05 20:56:19] (step=0071120) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 13.915085110545881, LR: 0.0003 +[2026-03-05 20:56:27] (step=0071121) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.915280766973195, LR: 0.0003 +[2026-03-05 20:56:35] (step=0071122) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.91547642340051, LR: 0.0003 +[2026-03-05 20:56:43] (step=0071123) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.915672079827822, LR: 0.0003 +[2026-03-05 20:56:50] (step=0071124) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 13.915867736255136, LR: 0.0003 +[2026-03-05 20:56:58] (step=0071125) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.91606339268245, LR: 0.0003 +[2026-03-05 20:57:06] (step=0071126) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 13.916259049109764, LR: 0.0003 +[2026-03-05 20:57:14] (step=0071127) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.916454705537078, LR: 0.0003 +[2026-03-05 20:57:22] (step=0071128) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 13.91665036196439, LR: 0.0003 +[2026-03-05 20:57:30] (step=0071129) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.916846018391704, LR: 0.0003 +[2026-03-05 20:57:37] (step=0071130) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 13.917041674819018, LR: 0.0003 +[2026-03-05 20:57:45] (step=0071131) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.917237331246332, LR: 0.0003 +[2026-03-05 20:57:53] (step=0071132) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 13.917432987673646, LR: 0.0003 +[2026-03-05 20:58:01] (step=0071133) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 13.917628644100958, LR: 0.0003 +[2026-03-05 20:58:09] (step=0071134) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.917824300528272, LR: 0.0003 +[2026-03-05 20:58:17] (step=0071135) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 13.918019956955586, LR: 0.0003 +[2026-03-05 20:58:25] (step=0071136) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 13.9182156133829, LR: 0.0003 +[2026-03-05 20:58:32] (step=0071137) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.918411269810214, LR: 0.0003 +[2026-03-05 20:58:40] (step=0071138) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.918606926237526, LR: 0.0003 +[2026-03-05 20:58:48] (step=0071139) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.91880258266484, LR: 0.0003 +[2026-03-05 20:58:56] (step=0071140) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.918998239092154, LR: 0.0003 +[2026-03-05 20:59:04] (step=0071141) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.919193895519468, LR: 0.0003 +[2026-03-05 20:59:12] (step=0071142) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.919389551946782, LR: 0.0003 +[2026-03-05 20:59:19] (step=0071143) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.919585208374095, LR: 0.0003 +[2026-03-05 20:59:27] (step=0071144) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.919780864801409, LR: 0.0003 +[2026-03-05 20:59:35] (step=0071145) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 13.919976521228723, LR: 0.0003 +[2026-03-05 20:59:43] (step=0071146) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.920172177656037, LR: 0.0003 +[2026-03-05 20:59:51] (step=0071147) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.920367834083349, LR: 0.0003 +[2026-03-05 20:59:59] (step=0071148) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 13.920563490510663, LR: 0.0003 +[2026-03-05 21:00:07] (step=0071149) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 13.920759146937977, LR: 0.0003 +[2026-03-05 21:00:14] (step=0071150) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.92095480336529, LR: 0.0003 +[2026-03-05 21:00:22] (step=0071151) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 13.921150459792605, LR: 0.0003 +[2026-03-05 21:00:30] (step=0071152) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.921346116219917, LR: 0.0003 +[2026-03-05 21:00:38] (step=0071153) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 13.921541772647231, LR: 0.0003 +[2026-03-05 21:00:46] (step=0071154) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 13.921737429074545, LR: 0.0003 +[2026-03-05 21:00:54] (step=0071155) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 13.921933085501859, LR: 0.0003 +[2026-03-05 21:01:01] (step=0071156) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.922128741929173, LR: 0.0003 +[2026-03-05 21:01:09] (step=0071157) Train Loss: 0.4276, Train Steps/Sec: 0.13, Epoch: 13.922324398356485, LR: 0.0003 +[2026-03-05 21:01:17] (step=0071158) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.9225200547838, LR: 0.0003 +[2026-03-05 21:01:25] (step=0071159) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.922715711211113, LR: 0.0003 +[2026-03-05 21:01:33] (step=0071160) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.922911367638427, LR: 0.0003 +[2026-03-05 21:01:41] (step=0071161) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.923107024065741, LR: 0.0003 +[2026-03-05 21:01:48] (step=0071162) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.923302680493054, LR: 0.0003 +[2026-03-05 21:01:56] (step=0071163) Train Loss: 0.4289, Train Steps/Sec: 0.13, Epoch: 13.923498336920368, LR: 0.0003 +[2026-03-05 21:02:04] (step=0071164) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.923693993347682, LR: 0.0003 +[2026-03-05 21:02:12] (step=0071165) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 13.923889649774996, LR: 0.0003 +[2026-03-05 21:02:20] (step=0071166) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 13.92408530620231, LR: 0.0003 +[2026-03-05 21:02:28] (step=0071167) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 13.924280962629622, LR: 0.0003 +[2026-03-05 21:02:36] (step=0071168) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.924476619056936, LR: 0.0003 +[2026-03-05 21:02:43] (step=0071169) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.92467227548425, LR: 0.0003 +[2026-03-05 21:02:51] (step=0071170) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.924867931911564, LR: 0.0003 +[2026-03-05 21:02:59] (step=0071171) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.925063588338878, LR: 0.0003 +[2026-03-05 21:03:07] (step=0071172) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.92525924476619, LR: 0.0003 +[2026-03-05 21:03:15] (step=0071173) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.925454901193504, LR: 0.0003 +[2026-03-05 21:03:23] (step=0071174) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.925650557620818, LR: 0.0003 +[2026-03-05 21:03:31] (step=0071175) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.925846214048132, LR: 0.0003 +[2026-03-05 21:03:38] (step=0071176) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 13.926041870475444, LR: 0.0003 +[2026-03-05 21:03:46] (step=0071177) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.926237526902758, LR: 0.0003 +[2026-03-05 21:03:54] (step=0071178) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.926433183330072, LR: 0.0003 +[2026-03-05 21:04:02] (step=0071179) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.926628839757386, LR: 0.0003 +[2026-03-05 21:04:10] (step=0071180) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.9268244961847, LR: 0.0003 +[2026-03-05 21:04:18] (step=0071181) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.927020152612013, LR: 0.0003 +[2026-03-05 21:04:26] (step=0071182) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 13.927215809039327, LR: 0.0003 +[2026-03-05 21:04:33] (step=0071183) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.92741146546664, LR: 0.0003 +[2026-03-05 21:04:41] (step=0071184) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 13.927607121893955, LR: 0.0003 +[2026-03-05 21:04:49] (step=0071185) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 13.927802778321269, LR: 0.0003 +[2026-03-05 21:04:57] (step=0071186) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.92799843474858, LR: 0.0003 +[2026-03-05 21:05:05] (step=0071187) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.928194091175895, LR: 0.0003 +[2026-03-05 21:05:13] (step=0071188) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 13.928389747603209, LR: 0.0003 +[2026-03-05 21:05:21] (step=0071189) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 13.928585404030523, LR: 0.0003 +[2026-03-05 21:05:28] (step=0071190) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 13.928781060457837, LR: 0.0003 +[2026-03-05 21:05:36] (step=0071191) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 13.928976716885149, LR: 0.0003 +[2026-03-05 21:05:44] (step=0071192) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.929172373312463, LR: 0.0003 +[2026-03-05 21:05:52] (step=0071193) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.929368029739777, LR: 0.0003 +[2026-03-05 21:06:00] (step=0071194) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.929563686167091, LR: 0.0003 +[2026-03-05 21:06:08] (step=0071195) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.929759342594405, LR: 0.0003 +[2026-03-05 21:06:15] (step=0071196) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.929954999021717, LR: 0.0003 +[2026-03-05 21:06:23] (step=0071197) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 13.930150655449031, LR: 0.0003 +[2026-03-05 21:06:31] (step=0071198) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 13.930346311876345, LR: 0.0003 +[2026-03-05 21:06:39] (step=0071199) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 13.93054196830366, LR: 0.0003 +[2026-03-05 21:06:47] (step=0071200) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 13.930737624730972, LR: 0.0003 +[2026-03-05 21:06:55] (step=0071201) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 13.930933281158286, LR: 0.0003 +[2026-03-05 21:07:02] (step=0071202) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.9311289375856, LR: 0.0003 +[2026-03-05 21:07:10] (step=0071203) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 13.931324594012914, LR: 0.0003 +[2026-03-05 21:07:18] (step=0071204) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.931520250440228, LR: 0.0003 +[2026-03-05 21:07:26] (step=0071205) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.93171590686754, LR: 0.0003 +[2026-03-05 21:07:34] (step=0071206) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 13.931911563294854, LR: 0.0003 +[2026-03-05 21:07:42] (step=0071207) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.932107219722168, LR: 0.0003 +[2026-03-05 21:07:50] (step=0071208) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.932302876149482, LR: 0.0003 +[2026-03-05 21:07:57] (step=0071209) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.932498532576796, LR: 0.0003 +[2026-03-05 21:08:05] (step=0071210) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.932694189004108, LR: 0.0003 +[2026-03-05 21:08:13] (step=0071211) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.932889845431422, LR: 0.0003 +[2026-03-05 21:08:21] (step=0071212) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.933085501858736, LR: 0.0003 +[2026-03-05 21:08:29] (step=0071213) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.93328115828605, LR: 0.0003 +[2026-03-05 21:08:37] (step=0071214) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.933476814713364, LR: 0.0003 +[2026-03-05 21:08:45] (step=0071215) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 13.933672471140676, LR: 0.0003 +[2026-03-05 21:08:52] (step=0071216) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.93386812756799, LR: 0.0003 +[2026-03-05 21:09:00] (step=0071217) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 13.934063783995304, LR: 0.0003 +[2026-03-05 21:09:08] (step=0071218) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.934259440422618, LR: 0.0003 +[2026-03-05 21:09:16] (step=0071219) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.934455096849932, LR: 0.0003 +[2026-03-05 21:09:24] (step=0071220) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 13.934650753277245, LR: 0.0003 +[2026-03-05 21:09:32] (step=0071221) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.934846409704559, LR: 0.0003 +[2026-03-05 21:09:40] (step=0071222) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.935042066131873, LR: 0.0003 +[2026-03-05 21:09:47] (step=0071223) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 13.935237722559187, LR: 0.0003 +[2026-03-05 21:09:55] (step=0071224) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 13.9354333789865, LR: 0.0003 +[2026-03-05 21:10:03] (step=0071225) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.935629035413813, LR: 0.0003 +[2026-03-05 21:10:11] (step=0071226) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 13.935824691841127, LR: 0.0003 +[2026-03-05 21:10:19] (step=0071227) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 13.93602034826844, LR: 0.0003 +[2026-03-05 21:10:27] (step=0071228) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.936216004695755, LR: 0.0003 +[2026-03-05 21:10:34] (step=0071229) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.936411661123067, LR: 0.0003 +[2026-03-05 21:10:42] (step=0071230) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 13.936607317550381, LR: 0.0003 +[2026-03-05 21:10:50] (step=0071231) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 13.936802973977695, LR: 0.0003 +[2026-03-05 21:10:58] (step=0071232) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 13.936998630405009, LR: 0.0003 +[2026-03-05 21:11:06] (step=0071233) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.937194286832323, LR: 0.0003 +[2026-03-05 21:11:14] (step=0071234) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 13.937389943259635, LR: 0.0003 +[2026-03-05 21:11:22] (step=0071235) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 13.93758559968695, LR: 0.0003 +[2026-03-05 21:11:29] (step=0071236) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 13.937781256114263, LR: 0.0003 +[2026-03-05 21:11:37] (step=0071237) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 13.937976912541577, LR: 0.0003 +[2026-03-05 21:11:45] (step=0071238) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 13.938172568968891, LR: 0.0003 +[2026-03-05 21:11:53] (step=0071239) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.938368225396204, LR: 0.0003 +[2026-03-05 21:12:01] (step=0071240) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 13.938563881823518, LR: 0.0003 +[2026-03-05 21:12:09] (step=0071241) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 13.938759538250832, LR: 0.0003 +[2026-03-05 21:12:17] (step=0071242) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.938955194678146, LR: 0.0003 +[2026-03-05 21:12:24] (step=0071243) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 13.93915085110546, LR: 0.0003 +[2026-03-05 21:12:32] (step=0071244) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.939346507532772, LR: 0.0003 +[2026-03-05 21:12:40] (step=0071245) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 13.939542163960086, LR: 0.0003 +[2026-03-05 21:12:48] (step=0071246) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.9397378203874, LR: 0.0003 +[2026-03-05 21:12:56] (step=0071247) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 13.939933476814714, LR: 0.0003 +[2026-03-05 21:13:04] (step=0071248) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.940129133242028, LR: 0.0003 +[2026-03-05 21:13:11] (step=0071249) Train Loss: 0.4587, Train Steps/Sec: 0.13, Epoch: 13.94032478966934, LR: 0.0003 +[2026-03-05 21:13:19] (step=0071250) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 13.940520446096654, LR: 0.0003 +[2026-03-05 21:13:27] (step=0071251) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.940716102523968, LR: 0.0003 +[2026-03-05 21:13:35] (step=0071252) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.940911758951282, LR: 0.0003 +[2026-03-05 21:13:43] (step=0071253) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 13.941107415378594, LR: 0.0003 +[2026-03-05 21:13:51] (step=0071254) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.941303071805908, LR: 0.0003 +[2026-03-05 21:13:59] (step=0071255) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.941498728233222, LR: 0.0003 +[2026-03-05 21:14:06] (step=0071256) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.941694384660536, LR: 0.0003 +[2026-03-05 21:14:14] (step=0071257) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 13.94189004108785, LR: 0.0003 +[2026-03-05 21:14:22] (step=0071258) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.942085697515163, LR: 0.0003 +[2026-03-05 21:14:30] (step=0071259) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.942281353942477, LR: 0.0003 +[2026-03-05 21:14:38] (step=0071260) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 13.94247701036979, LR: 0.0003 +[2026-03-05 21:14:46] (step=0071261) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.942672666797105, LR: 0.0003 +[2026-03-05 21:14:54] (step=0071262) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 13.942868323224419, LR: 0.0003 +[2026-03-05 21:15:01] (step=0071263) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.94306397965173, LR: 0.0003 +[2026-03-05 21:15:09] (step=0071264) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.943259636079045, LR: 0.0003 +[2026-03-05 21:15:17] (step=0071265) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.943455292506359, LR: 0.0003 +[2026-03-05 21:15:25] (step=0071266) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.943650948933673, LR: 0.0003 +[2026-03-05 21:15:33] (step=0071267) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 13.943846605360987, LR: 0.0003 +[2026-03-05 21:15:41] (step=0071268) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.944042261788299, LR: 0.0003 +[2026-03-05 21:15:49] (step=0071269) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.944237918215613, LR: 0.0003 +[2026-03-05 21:15:56] (step=0071270) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 13.944433574642927, LR: 0.0003 +[2026-03-05 21:16:04] (step=0071271) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.944629231070241, LR: 0.0003 +[2026-03-05 21:16:12] (step=0071272) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 13.944824887497555, LR: 0.0003 +[2026-03-05 21:16:20] (step=0071273) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.945020543924867, LR: 0.0003 +[2026-03-05 21:16:28] (step=0071274) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.945216200352181, LR: 0.0003 +[2026-03-05 21:16:36] (step=0071275) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 13.945411856779495, LR: 0.0003 +[2026-03-05 21:16:44] (step=0071276) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.94560751320681, LR: 0.0003 +[2026-03-05 21:16:51] (step=0071277) Train Loss: 0.4244, Train Steps/Sec: 0.13, Epoch: 13.945803169634123, LR: 0.0003 +[2026-03-05 21:16:59] (step=0071278) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 13.945998826061436, LR: 0.0003 +[2026-03-05 21:17:07] (step=0071279) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 13.94619448248875, LR: 0.0003 +[2026-03-05 21:17:15] (step=0071280) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.946390138916064, LR: 0.0003 +[2026-03-05 21:17:23] (step=0071281) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 13.946585795343378, LR: 0.0003 +[2026-03-05 21:17:31] (step=0071282) Train Loss: 0.4581, Train Steps/Sec: 0.13, Epoch: 13.94678145177069, LR: 0.0003 +[2026-03-05 21:17:38] (step=0071283) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.946977108198004, LR: 0.0003 +[2026-03-05 21:17:46] (step=0071284) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.947172764625318, LR: 0.0003 +[2026-03-05 21:17:54] (step=0071285) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.947368421052632, LR: 0.0003 +[2026-03-05 21:18:02] (step=0071286) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 13.947564077479946, LR: 0.0003 +[2026-03-05 21:18:10] (step=0071287) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 13.947759733907258, LR: 0.0003 +[2026-03-05 21:18:18] (step=0071288) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 13.947955390334572, LR: 0.0003 +[2026-03-05 21:18:26] (step=0071289) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.948151046761886, LR: 0.0003 +[2026-03-05 21:18:33] (step=0071290) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.9483467031892, LR: 0.0003 +[2026-03-05 21:18:41] (step=0071291) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.948542359616514, LR: 0.0003 +[2026-03-05 21:18:49] (step=0071292) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 13.948738016043826, LR: 0.0003 +[2026-03-05 21:18:57] (step=0071293) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 13.94893367247114, LR: 0.0003 +[2026-03-05 21:19:05] (step=0071294) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.949129328898454, LR: 0.0003 +[2026-03-05 21:19:13] (step=0071295) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.949324985325768, LR: 0.0003 +[2026-03-05 21:19:20] (step=0071296) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 13.949520641753082, LR: 0.0003 +[2026-03-05 21:19:28] (step=0071297) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 13.949716298180395, LR: 0.0003 +[2026-03-05 21:19:36] (step=0071298) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.949911954607709, LR: 0.0003 +[2026-03-05 21:19:44] (step=0071299) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 13.950107611035023, LR: 0.0003 +[2026-03-05 21:19:52] (step=0071300) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.950303267462337, LR: 0.0003 +[2026-03-05 21:20:00] (step=0071301) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 13.95049892388965, LR: 0.0003 +[2026-03-05 21:20:08] (step=0071302) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.950694580316963, LR: 0.0003 +[2026-03-05 21:20:15] (step=0071303) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 13.950890236744277, LR: 0.0003 +[2026-03-05 21:20:23] (step=0071304) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.95108589317159, LR: 0.0003 +[2026-03-05 21:20:31] (step=0071305) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.951281549598905, LR: 0.0003 +[2026-03-05 21:20:39] (step=0071306) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 13.951477206026217, LR: 0.0003 +[2026-03-05 21:20:47] (step=0071307) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.951672862453531, LR: 0.0003 +[2026-03-05 21:20:55] (step=0071308) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 13.951868518880845, LR: 0.0003 +[2026-03-05 21:21:02] (step=0071309) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 13.95206417530816, LR: 0.0003 +[2026-03-05 21:21:10] (step=0071310) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.952259831735473, LR: 0.0003 +[2026-03-05 21:21:18] (step=0071311) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 13.952455488162785, LR: 0.0003 +[2026-03-05 21:21:26] (step=0071312) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.9526511445901, LR: 0.0003 +[2026-03-05 21:21:34] (step=0071313) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 13.952846801017413, LR: 0.0003 +[2026-03-05 21:21:42] (step=0071314) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.953042457444727, LR: 0.0003 +[2026-03-05 21:21:50] (step=0071315) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 13.953238113872041, LR: 0.0003 +[2026-03-05 21:21:57] (step=0071316) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.953433770299354, LR: 0.0003 +[2026-03-05 21:22:05] (step=0071317) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.953629426726668, LR: 0.0003 +[2026-03-05 21:22:13] (step=0071318) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 13.953825083153982, LR: 0.0003 +[2026-03-05 21:22:21] (step=0071319) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 13.954020739581296, LR: 0.0003 +[2026-03-05 21:22:29] (step=0071320) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 13.95421639600861, LR: 0.0003 +[2026-03-05 21:22:37] (step=0071321) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 13.954412052435922, LR: 0.0003 +[2026-03-05 21:22:45] (step=0071322) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.954607708863236, LR: 0.0003 +[2026-03-05 21:22:52] (step=0071323) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 13.95480336529055, LR: 0.0003 +[2026-03-05 21:23:00] (step=0071324) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.954999021717864, LR: 0.0003 +[2026-03-05 21:23:08] (step=0071325) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 13.955194678145178, LR: 0.0003 +[2026-03-05 21:23:16] (step=0071326) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 13.95539033457249, LR: 0.0003 +[2026-03-05 21:23:24] (step=0071327) Train Loss: 0.4197, Train Steps/Sec: 0.13, Epoch: 13.955585990999804, LR: 0.0003 +[2026-03-05 21:23:32] (step=0071328) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 13.955781647427118, LR: 0.0003 +[2026-03-05 21:23:39] (step=0071329) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.955977303854432, LR: 0.0003 +[2026-03-05 21:23:47] (step=0071330) Train Loss: 0.4245, Train Steps/Sec: 0.13, Epoch: 13.956172960281746, LR: 0.0003 +[2026-03-05 21:23:55] (step=0071331) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.956368616709058, LR: 0.0003 +[2026-03-05 21:24:03] (step=0071332) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.956564273136372, LR: 0.0003 +[2026-03-05 21:24:11] (step=0071333) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.956759929563686, LR: 0.0003 +[2026-03-05 21:24:19] (step=0071334) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.956955585991, LR: 0.0003 +[2026-03-05 21:24:26] (step=0071335) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.957151242418313, LR: 0.0003 +[2026-03-05 21:24:34] (step=0071336) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 13.957346898845627, LR: 0.0003 +[2026-03-05 21:24:42] (step=0071337) Train Loss: 0.4243, Train Steps/Sec: 0.13, Epoch: 13.95754255527294, LR: 0.0003 +[2026-03-05 21:24:50] (step=0071338) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.957738211700255, LR: 0.0003 +[2026-03-05 21:24:58] (step=0071339) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.957933868127569, LR: 0.0003 +[2026-03-05 21:25:06] (step=0071340) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.95812952455488, LR: 0.0003 +[2026-03-05 21:25:14] (step=0071341) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 13.958325180982195, LR: 0.0003 +[2026-03-05 21:25:21] (step=0071342) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 13.958520837409509, LR: 0.0003 +[2026-03-05 21:25:29] (step=0071343) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 13.958716493836823, LR: 0.0003 +[2026-03-05 21:25:37] (step=0071344) Train Loss: 0.4369, Train Steps/Sec: 0.12, Epoch: 13.958912150264137, LR: 0.0003 +[2026-03-05 21:25:45] (step=0071345) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.95910780669145, LR: 0.0003 +[2026-03-05 21:25:53] (step=0071346) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 13.959303463118763, LR: 0.0003 +[2026-03-05 21:26:01] (step=0071347) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 13.959499119546077, LR: 0.0003 +[2026-03-05 21:26:09] (step=0071348) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 13.959694775973391, LR: 0.0003 +[2026-03-05 21:26:17] (step=0071349) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 13.959890432400705, LR: 0.0003 +[2026-03-05 21:26:24] (step=0071350) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 13.960086088828017, LR: 0.0003 +[2026-03-05 21:26:32] (step=0071351) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 13.960281745255331, LR: 0.0003 +[2026-03-05 21:26:40] (step=0071352) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.960477401682645, LR: 0.0003 +[2026-03-05 21:26:48] (step=0071353) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 13.96067305810996, LR: 0.0003 +[2026-03-05 21:26:56] (step=0071354) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.960868714537273, LR: 0.0003 +[2026-03-05 21:27:04] (step=0071355) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 13.961064370964586, LR: 0.0003 +[2026-03-05 21:27:12] (step=0071356) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 13.9612600273919, LR: 0.0003 +[2026-03-05 21:27:19] (step=0071357) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.961455683819214, LR: 0.0003 +[2026-03-05 21:27:27] (step=0071358) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.961651340246528, LR: 0.0003 +[2026-03-05 21:27:35] (step=0071359) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 13.96184699667384, LR: 0.0003 +[2026-03-05 21:27:43] (step=0071360) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 13.962042653101154, LR: 0.0003 +[2026-03-05 21:27:51] (step=0071361) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 13.962238309528468, LR: 0.0003 +[2026-03-05 21:27:59] (step=0071362) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 13.962433965955782, LR: 0.0003 +[2026-03-05 21:28:06] (step=0071363) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 13.962629622383096, LR: 0.0003 +[2026-03-05 21:28:14] (step=0071364) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.962825278810408, LR: 0.0003 +[2026-03-05 21:28:22] (step=0071365) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 13.963020935237722, LR: 0.0003 +[2026-03-05 21:28:30] (step=0071366) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.963216591665036, LR: 0.0003 +[2026-03-05 21:28:38] (step=0071367) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 13.96341224809235, LR: 0.0003 +[2026-03-05 21:28:46] (step=0071368) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 13.963607904519664, LR: 0.0003 +[2026-03-05 21:28:53] (step=0071369) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.963803560946976, LR: 0.0003 +[2026-03-05 21:29:01] (step=0071370) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.96399921737429, LR: 0.0003 +[2026-03-05 21:29:09] (step=0071371) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.964194873801604, LR: 0.0003 +[2026-03-05 21:29:17] (step=0071372) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.964390530228918, LR: 0.0003 +[2026-03-05 21:29:25] (step=0071373) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.964586186656232, LR: 0.0003 +[2026-03-05 21:29:33] (step=0071374) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.964781843083545, LR: 0.0003 +[2026-03-05 21:29:41] (step=0071375) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.964977499510859, LR: 0.0003 +[2026-03-05 21:29:48] (step=0071376) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 13.965173155938173, LR: 0.0003 +[2026-03-05 21:29:56] (step=0071377) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.965368812365487, LR: 0.0003 +[2026-03-05 21:30:04] (step=0071378) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 13.9655644687928, LR: 0.0003 +[2026-03-05 21:30:12] (step=0071379) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.965760125220113, LR: 0.0003 +[2026-03-05 21:30:20] (step=0071380) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 13.965955781647427, LR: 0.0003 +[2026-03-05 21:30:28] (step=0071381) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 13.966151438074741, LR: 0.0003 +[2026-03-05 21:30:35] (step=0071382) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 13.966347094502055, LR: 0.0003 +[2026-03-05 21:30:43] (step=0071383) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 13.966542750929367, LR: 0.0003 +[2026-03-05 21:30:51] (step=0071384) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 13.966738407356681, LR: 0.0003 +[2026-03-05 21:30:59] (step=0071385) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.966934063783995, LR: 0.0003 +[2026-03-05 21:31:07] (step=0071386) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.96712972021131, LR: 0.0003 +[2026-03-05 21:31:15] (step=0071387) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 13.967325376638623, LR: 0.0003 +[2026-03-05 21:31:23] (step=0071388) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 13.967521033065935, LR: 0.0003 +[2026-03-05 21:31:30] (step=0071389) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 13.96771668949325, LR: 0.0003 +[2026-03-05 21:31:38] (step=0071390) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.967912345920563, LR: 0.0003 +[2026-03-05 21:31:46] (step=0071391) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.968108002347877, LR: 0.0003 +[2026-03-05 21:31:54] (step=0071392) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 13.968303658775191, LR: 0.0003 +[2026-03-05 21:32:02] (step=0071393) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 13.968499315202504, LR: 0.0003 +[2026-03-05 21:32:10] (step=0071394) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 13.968694971629818, LR: 0.0003 +[2026-03-05 21:32:18] (step=0071395) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 13.968890628057132, LR: 0.0003 +[2026-03-05 21:32:25] (step=0071396) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 13.969086284484446, LR: 0.0003 +[2026-03-05 21:32:33] (step=0071397) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 13.96928194091176, LR: 0.0003 +[2026-03-05 21:32:41] (step=0071398) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 13.969477597339072, LR: 0.0003 +[2026-03-05 21:32:49] (step=0071399) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.969673253766386, LR: 0.0003 +[2026-03-05 21:32:57] (step=0071400) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 13.9698689101937, LR: 0.0003 +[2026-03-05 21:33:05] (step=0071401) Train Loss: 0.4626, Train Steps/Sec: 0.13, Epoch: 13.970064566621014, LR: 0.0003 +[2026-03-05 21:33:12] (step=0071402) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 13.970260223048328, LR: 0.0003 +[2026-03-05 21:33:20] (step=0071403) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 13.97045587947564, LR: 0.0003 +[2026-03-05 21:33:28] (step=0071404) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 13.970651535902954, LR: 0.0003 +[2026-03-05 21:33:36] (step=0071405) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 13.970847192330268, LR: 0.0003 +[2026-03-05 21:33:44] (step=0071406) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.971042848757582, LR: 0.0003 +[2026-03-05 21:33:52] (step=0071407) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 13.971238505184896, LR: 0.0003 +[2026-03-05 21:34:00] (step=0071408) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 13.971434161612208, LR: 0.0003 +[2026-03-05 21:34:07] (step=0071409) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 13.971629818039522, LR: 0.0003 +[2026-03-05 21:34:15] (step=0071410) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.971825474466836, LR: 0.0003 +[2026-03-05 21:34:23] (step=0071411) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 13.97202113089415, LR: 0.0003 +[2026-03-05 21:34:31] (step=0071412) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 13.972216787321463, LR: 0.0003 +[2026-03-05 21:34:39] (step=0071413) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.972412443748777, LR: 0.0003 +[2026-03-05 21:34:47] (step=0071414) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 13.97260810017609, LR: 0.0003 +[2026-03-05 21:34:55] (step=0071415) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.972803756603405, LR: 0.0003 +[2026-03-05 21:35:02] (step=0071416) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.972999413030719, LR: 0.0003 +[2026-03-05 21:35:10] (step=0071417) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 13.973195069458031, LR: 0.0003 +[2026-03-05 21:35:18] (step=0071418) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 13.973390725885345, LR: 0.0003 +[2026-03-05 21:35:26] (step=0071419) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.973586382312659, LR: 0.0003 +[2026-03-05 21:35:34] (step=0071420) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.973782038739973, LR: 0.0003 +[2026-03-05 21:35:42] (step=0071421) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 13.973977695167287, LR: 0.0003 +[2026-03-05 21:35:49] (step=0071422) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.9741733515946, LR: 0.0003 +[2026-03-05 21:35:57] (step=0071423) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 13.974369008021913, LR: 0.0003 +[2026-03-05 21:36:05] (step=0071424) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.974564664449227, LR: 0.0003 +[2026-03-05 21:36:13] (step=0071425) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.974760320876541, LR: 0.0003 +[2026-03-05 21:36:21] (step=0071426) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 13.974955977303855, LR: 0.0003 +[2026-03-05 21:36:29] (step=0071427) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 13.975151633731167, LR: 0.0003 +[2026-03-05 21:36:37] (step=0071428) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 13.975347290158481, LR: 0.0003 +[2026-03-05 21:36:45] (step=0071429) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 13.975542946585795, LR: 0.0003 +[2026-03-05 21:36:52] (step=0071430) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.97573860301311, LR: 0.0003 +[2026-03-05 21:37:00] (step=0071431) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 13.975934259440423, LR: 0.0003 +[2026-03-05 21:37:08] (step=0071432) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 13.976129915867736, LR: 0.0003 +[2026-03-05 21:37:16] (step=0071433) Train Loss: 0.4310, Train Steps/Sec: 0.13, Epoch: 13.97632557229505, LR: 0.0003 +[2026-03-05 21:37:24] (step=0071434) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.976521228722364, LR: 0.0003 +[2026-03-05 21:37:32] (step=0071435) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 13.976716885149678, LR: 0.0003 +[2026-03-05 21:37:40] (step=0071436) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 13.97691254157699, LR: 0.0003 +[2026-03-05 21:37:47] (step=0071437) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 13.977108198004304, LR: 0.0003 +[2026-03-05 21:37:55] (step=0071438) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 13.977303854431618, LR: 0.0003 +[2026-03-05 21:38:03] (step=0071439) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.977499510858932, LR: 0.0003 +[2026-03-05 21:38:11] (step=0071440) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 13.977695167286246, LR: 0.0003 +[2026-03-05 21:38:19] (step=0071441) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.977890823713558, LR: 0.0003 +[2026-03-05 21:38:27] (step=0071442) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 13.978086480140872, LR: 0.0003 +[2026-03-05 21:38:35] (step=0071443) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 13.978282136568186, LR: 0.0003 +[2026-03-05 21:38:42] (step=0071444) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.9784777929955, LR: 0.0003 +[2026-03-05 21:38:50] (step=0071445) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 13.978673449422814, LR: 0.0003 +[2026-03-05 21:38:58] (step=0071446) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 13.978869105850126, LR: 0.0003 +[2026-03-05 21:39:06] (step=0071447) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 13.97906476227744, LR: 0.0003 +[2026-03-05 21:39:14] (step=0071448) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 13.979260418704754, LR: 0.0003 +[2026-03-05 21:39:22] (step=0071449) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 13.979456075132068, LR: 0.0003 +[2026-03-05 21:39:29] (step=0071450) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.979651731559382, LR: 0.0003 +[2026-03-05 21:39:37] (step=0071451) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.979847387986695, LR: 0.0003 +[2026-03-05 21:39:45] (step=0071452) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 13.980043044414009, LR: 0.0003 +[2026-03-05 21:39:53] (step=0071453) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 13.980238700841323, LR: 0.0003 +[2026-03-05 21:40:01] (step=0071454) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.980434357268637, LR: 0.0003 +[2026-03-05 21:40:09] (step=0071455) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 13.98063001369595, LR: 0.0003 +[2026-03-05 21:40:16] (step=0071456) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 13.980825670123263, LR: 0.0003 +[2026-03-05 21:40:24] (step=0071457) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.981021326550577, LR: 0.0003 +[2026-03-05 21:40:32] (step=0071458) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.981216982977891, LR: 0.0003 +[2026-03-05 21:40:40] (step=0071459) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 13.981412639405205, LR: 0.0003 +[2026-03-05 21:40:48] (step=0071460) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 13.981608295832519, LR: 0.0003 +[2026-03-05 21:40:56] (step=0071461) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 13.981803952259831, LR: 0.0003 +[2026-03-05 21:41:03] (step=0071462) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 13.981999608687145, LR: 0.0003 +[2026-03-05 21:41:11] (step=0071463) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 13.98219526511446, LR: 0.0003 +[2026-03-05 21:41:19] (step=0071464) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 13.982390921541773, LR: 0.0003 +[2026-03-05 21:41:27] (step=0071465) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 13.982586577969085, LR: 0.0003 +[2026-03-05 21:41:35] (step=0071466) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.9827822343964, LR: 0.0003 +[2026-03-05 21:41:43] (step=0071467) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 13.982977890823713, LR: 0.0003 +[2026-03-05 21:41:51] (step=0071468) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 13.983173547251027, LR: 0.0003 +[2026-03-05 21:41:58] (step=0071469) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 13.983369203678341, LR: 0.0003 +[2026-03-05 21:42:06] (step=0071470) Train Loss: 0.4246, Train Steps/Sec: 0.13, Epoch: 13.983564860105654, LR: 0.0003 +[2026-03-05 21:42:14] (step=0071471) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 13.983760516532968, LR: 0.0003 +[2026-03-05 21:42:22] (step=0071472) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.983956172960282, LR: 0.0003 +[2026-03-05 21:42:30] (step=0071473) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 13.984151829387596, LR: 0.0003 +[2026-03-05 21:42:38] (step=0071474) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 13.98434748581491, LR: 0.0003 +[2026-03-05 21:42:46] (step=0071475) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 13.984543142242222, LR: 0.0003 +[2026-03-05 21:42:53] (step=0071476) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 13.984738798669536, LR: 0.0003 +[2026-03-05 21:43:01] (step=0071477) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 13.98493445509685, LR: 0.0003 +[2026-03-05 21:43:09] (step=0071478) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 13.985130111524164, LR: 0.0003 +[2026-03-05 21:43:17] (step=0071479) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.985325767951478, LR: 0.0003 +[2026-03-05 21:43:25] (step=0071480) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 13.98552142437879, LR: 0.0003 +[2026-03-05 21:43:33] (step=0071481) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 13.985717080806104, LR: 0.0003 +[2026-03-05 21:43:40] (step=0071482) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 13.985912737233418, LR: 0.0003 +[2026-03-05 21:43:48] (step=0071483) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 13.986108393660732, LR: 0.0003 +[2026-03-05 21:43:56] (step=0071484) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 13.986304050088046, LR: 0.0003 +[2026-03-05 21:44:04] (step=0071485) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 13.986499706515358, LR: 0.0003 +[2026-03-05 21:44:12] (step=0071486) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 13.986695362942672, LR: 0.0003 +[2026-03-05 21:44:20] (step=0071487) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 13.986891019369986, LR: 0.0003 +[2026-03-05 21:44:27] (step=0071488) Train Loss: 0.4269, Train Steps/Sec: 0.13, Epoch: 13.9870866757973, LR: 0.0003 +[2026-03-05 21:44:35] (step=0071489) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 13.987282332224613, LR: 0.0003 +[2026-03-05 21:44:43] (step=0071490) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 13.987477988651927, LR: 0.0003 +[2026-03-05 21:44:51] (step=0071491) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.98767364507924, LR: 0.0003 +[2026-03-05 21:44:59] (step=0071492) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 13.987869301506555, LR: 0.0003 +[2026-03-05 21:45:07] (step=0071493) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 13.988064957933869, LR: 0.0003 +[2026-03-05 21:45:15] (step=0071494) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 13.988260614361181, LR: 0.0003 +[2026-03-05 21:45:23] (step=0071495) Train Loss: 0.4448, Train Steps/Sec: 0.12, Epoch: 13.988456270788495, LR: 0.0003 +[2026-03-05 21:45:30] (step=0071496) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 13.988651927215809, LR: 0.0003 +[2026-03-05 21:45:38] (step=0071497) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 13.988847583643123, LR: 0.0003 +[2026-03-05 21:45:46] (step=0071498) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 13.989043240070437, LR: 0.0003 +[2026-03-05 21:45:54] (step=0071499) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 13.98923889649775, LR: 0.0003 +[2026-03-05 21:46:02] (step=0071500) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 13.989434552925063, LR: 0.0003 +[2026-03-05 21:46:02] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0071500/ +[2026-03-05 21:46:10] (step=0071501) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 13.989630209352377, LR: 0.0003 +[2026-03-05 21:46:18] (step=0071502) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.989825865779691, LR: 0.0003 +[2026-03-05 21:46:25] (step=0071503) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 13.990021522207005, LR: 0.0003 +[2026-03-05 21:46:33] (step=0071504) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 13.990217178634317, LR: 0.0003 +[2026-03-05 21:46:41] (step=0071505) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 13.990412835061631, LR: 0.0003 +[2026-03-05 21:46:49] (step=0071506) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 13.990608491488945, LR: 0.0003 +[2026-03-05 21:46:57] (step=0071507) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 13.99080414791626, LR: 0.0003 +[2026-03-05 21:47:05] (step=0071508) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.990999804343573, LR: 0.0003 +[2026-03-05 21:47:13] (step=0071509) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.991195460770886, LR: 0.0003 +[2026-03-05 21:47:20] (step=0071510) Train Loss: 0.4268, Train Steps/Sec: 0.13, Epoch: 13.9913911171982, LR: 0.0003 +[2026-03-05 21:47:28] (step=0071511) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.991586773625514, LR: 0.0003 +[2026-03-05 21:47:36] (step=0071512) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 13.991782430052828, LR: 0.0003 +[2026-03-05 21:47:44] (step=0071513) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 13.991978086480142, LR: 0.0003 +[2026-03-05 21:47:52] (step=0071514) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 13.992173742907454, LR: 0.0003 +[2026-03-05 21:48:00] (step=0071515) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 13.992369399334768, LR: 0.0003 +[2026-03-05 21:48:07] (step=0071516) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 13.992565055762082, LR: 0.0003 +[2026-03-05 21:48:15] (step=0071517) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 13.992760712189396, LR: 0.0003 +[2026-03-05 21:48:23] (step=0071518) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.992956368616708, LR: 0.0003 +[2026-03-05 21:48:31] (step=0071519) Train Loss: 0.4216, Train Steps/Sec: 0.13, Epoch: 13.993152025044022, LR: 0.0003 +[2026-03-05 21:48:39] (step=0071520) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 13.993347681471336, LR: 0.0003 +[2026-03-05 21:48:47] (step=0071521) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 13.99354333789865, LR: 0.0003 +[2026-03-05 21:48:54] (step=0071522) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 13.993738994325964, LR: 0.0003 +[2026-03-05 21:49:02] (step=0071523) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 13.993934650753276, LR: 0.0003 +[2026-03-05 21:49:10] (step=0071524) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 13.99413030718059, LR: 0.0003 +[2026-03-05 21:49:18] (step=0071525) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 13.994325963607904, LR: 0.0003 +[2026-03-05 21:49:26] (step=0071526) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 13.994521620035218, LR: 0.0003 +[2026-03-05 21:49:34] (step=0071527) Train Loss: 0.4244, Train Steps/Sec: 0.13, Epoch: 13.994717276462532, LR: 0.0003 +[2026-03-05 21:49:42] (step=0071528) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 13.994912932889845, LR: 0.0003 +[2026-03-05 21:49:49] (step=0071529) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 13.995108589317159, LR: 0.0003 +[2026-03-05 21:49:57] (step=0071530) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 13.995304245744473, LR: 0.0003 +[2026-03-05 21:50:05] (step=0071531) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 13.995499902171787, LR: 0.0003 +[2026-03-05 21:50:13] (step=0071532) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 13.9956955585991, LR: 0.0003 +[2026-03-05 21:50:21] (step=0071533) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 13.995891215026413, LR: 0.0003 +[2026-03-05 21:50:29] (step=0071534) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 13.996086871453727, LR: 0.0003 +[2026-03-05 21:50:36] (step=0071535) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 13.996282527881041, LR: 0.0003 +[2026-03-05 21:50:44] (step=0071536) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 13.996478184308355, LR: 0.0003 +[2026-03-05 21:50:52] (step=0071537) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.996673840735669, LR: 0.0003 +[2026-03-05 21:51:00] (step=0071538) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 13.996869497162981, LR: 0.0003 +[2026-03-05 21:51:08] (step=0071539) Train Loss: 0.4597, Train Steps/Sec: 0.13, Epoch: 13.997065153590295, LR: 0.0003 +[2026-03-05 21:51:16] (step=0071540) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 13.99726081001761, LR: 0.0003 +[2026-03-05 21:51:23] (step=0071541) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 13.997456466444923, LR: 0.0003 +[2026-03-05 21:51:31] (step=0071542) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 13.997652122872235, LR: 0.0003 +[2026-03-05 21:51:39] (step=0071543) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 13.99784777929955, LR: 0.0003 +[2026-03-05 21:51:47] (step=0071544) Train Loss: 0.4405, Train Steps/Sec: 0.12, Epoch: 13.998043435726863, LR: 0.0003 +[2026-03-05 21:51:55] (step=0071545) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 13.998239092154177, LR: 0.0003 +[2026-03-05 21:52:03] (step=0071546) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 13.998434748581492, LR: 0.0003 +[2026-03-05 21:52:11] (step=0071547) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 13.998630405008804, LR: 0.0003 +[2026-03-05 21:52:18] (step=0071548) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 13.998826061436118, LR: 0.0003 +[2026-03-05 21:52:26] (step=0071549) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 13.999021717863432, LR: 0.0003 +[2026-03-05 21:52:34] (step=0071550) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 13.999217374290746, LR: 0.0003 +[2026-03-05 21:52:42] (step=0071551) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 13.99941303071806, LR: 0.0003 +[2026-03-05 21:52:50] (step=0071552) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 13.999608687145372, LR: 0.0003 +[2026-03-05 21:52:58] (step=0071553) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 13.999804343572686, LR: 0.0003 +[2026-03-05 21:53:06] (step=0071554) Train Loss: 0.4373, Train Steps/Sec: 0.12, Epoch: 14.0, LR: 0.0003 +[2026-03-05 21:53:06] Beginning epoch 14... +[2026-03-05 21:53:16] (step=0071555) Train Loss: 0.4385, Train Steps/Sec: 0.10, Epoch: 14.000195656427314, LR: 0.0003 +[2026-03-05 21:53:24] (step=0071556) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 14.000391312854628, LR: 0.0003 +[2026-03-05 21:53:32] (step=0071557) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 14.00058696928194, LR: 0.0003 +[2026-03-05 21:53:40] (step=0071558) Train Loss: 0.4584, Train Steps/Sec: 0.13, Epoch: 14.000782625709254, LR: 0.0003 +[2026-03-05 21:53:47] (step=0071559) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 14.000978282136568, LR: 0.0003 +[2026-03-05 21:53:55] (step=0071560) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 14.001173938563882, LR: 0.0003 +[2026-03-05 21:54:03] (step=0071561) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 14.001369594991196, LR: 0.0003 +[2026-03-05 21:54:11] (step=0071562) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 14.001565251418508, LR: 0.0003 +[2026-03-05 21:54:19] (step=0071563) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 14.001760907845823, LR: 0.0003 +[2026-03-05 21:54:27] (step=0071564) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 14.001956564273137, LR: 0.0003 +[2026-03-05 21:54:35] (step=0071565) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.00215222070045, LR: 0.0003 +[2026-03-05 21:54:42] (step=0071566) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.002347877127765, LR: 0.0003 +[2026-03-05 21:54:50] (step=0071567) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.002543533555077, LR: 0.0003 +[2026-03-05 21:54:58] (step=0071568) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 14.00273918998239, LR: 0.0003 +[2026-03-05 21:55:06] (step=0071569) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 14.002934846409705, LR: 0.0003 +[2026-03-05 21:55:14] (step=0071570) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 14.003130502837019, LR: 0.0003 +[2026-03-05 21:55:22] (step=0071571) Train Loss: 0.4241, Train Steps/Sec: 0.13, Epoch: 14.003326159264331, LR: 0.0003 +[2026-03-05 21:55:30] (step=0071572) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 14.003521815691645, LR: 0.0003 +[2026-03-05 21:55:37] (step=0071573) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 14.003717472118959, LR: 0.0003 +[2026-03-05 21:55:45] (step=0071574) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 14.003913128546273, LR: 0.0003 +[2026-03-05 21:55:53] (step=0071575) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 14.004108784973587, LR: 0.0003 +[2026-03-05 21:56:01] (step=0071576) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 14.0043044414009, LR: 0.0003 +[2026-03-05 21:56:09] (step=0071577) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 14.004500097828213, LR: 0.0003 +[2026-03-05 21:56:17] (step=0071578) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 14.004695754255527, LR: 0.0003 +[2026-03-05 21:56:24] (step=0071579) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 14.004891410682841, LR: 0.0003 +[2026-03-05 21:56:32] (step=0071580) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.005087067110155, LR: 0.0003 +[2026-03-05 21:56:40] (step=0071581) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.005282723537468, LR: 0.0003 +[2026-03-05 21:56:48] (step=0071582) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.005478379964782, LR: 0.0003 +[2026-03-05 21:56:56] (step=0071583) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 14.005674036392096, LR: 0.0003 +[2026-03-05 21:57:04] (step=0071584) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.00586969281941, LR: 0.0003 +[2026-03-05 21:57:12] (step=0071585) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 14.006065349246724, LR: 0.0003 +[2026-03-05 21:57:19] (step=0071586) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.006261005674036, LR: 0.0003 +[2026-03-05 21:57:27] (step=0071587) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 14.00645666210135, LR: 0.0003 +[2026-03-05 21:57:35] (step=0071588) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 14.006652318528664, LR: 0.0003 +[2026-03-05 21:57:43] (step=0071589) Train Loss: 0.4456, Train Steps/Sec: 0.12, Epoch: 14.006847974955978, LR: 0.0003 +[2026-03-05 21:57:51] (step=0071590) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.007043631383292, LR: 0.0003 +[2026-03-05 21:57:59] (step=0071591) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 14.007239287810604, LR: 0.0003 +[2026-03-05 21:58:07] (step=0071592) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 14.007434944237918, LR: 0.0003 +[2026-03-05 21:58:15] (step=0071593) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 14.007630600665232, LR: 0.0003 +[2026-03-05 21:58:22] (step=0071594) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 14.007826257092546, LR: 0.0003 +[2026-03-05 21:58:30] (step=0071595) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 14.008021913519858, LR: 0.0003 +[2026-03-05 21:58:38] (step=0071596) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.008217569947172, LR: 0.0003 +[2026-03-05 21:58:46] (step=0071597) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 14.008413226374486, LR: 0.0003 +[2026-03-05 21:58:54] (step=0071598) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 14.0086088828018, LR: 0.0003 +[2026-03-05 21:59:02] (step=0071599) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 14.008804539229114, LR: 0.0003 +[2026-03-05 21:59:09] (step=0071600) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 14.009000195656427, LR: 0.0003 +[2026-03-05 21:59:17] (step=0071601) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.00919585208374, LR: 0.0003 +[2026-03-05 21:59:25] (step=0071602) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 14.009391508511055, LR: 0.0003 +[2026-03-05 21:59:33] (step=0071603) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.009587164938369, LR: 0.0003 +[2026-03-05 21:59:41] (step=0071604) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 14.009782821365683, LR: 0.0003 +[2026-03-05 21:59:49] (step=0071605) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 14.009978477792995, LR: 0.0003 +[2026-03-05 21:59:57] (step=0071606) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 14.010174134220309, LR: 0.0003 +[2026-03-05 22:00:04] (step=0071607) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 14.010369790647623, LR: 0.0003 +[2026-03-05 22:00:12] (step=0071608) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.010565447074937, LR: 0.0003 +[2026-03-05 22:00:20] (step=0071609) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.01076110350225, LR: 0.0003 +[2026-03-05 22:00:28] (step=0071610) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 14.010956759929563, LR: 0.0003 +[2026-03-05 22:00:36] (step=0071611) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.011152416356877, LR: 0.0003 +[2026-03-05 22:00:44] (step=0071612) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 14.011348072784191, LR: 0.0003 +[2026-03-05 22:00:51] (step=0071613) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 14.011543729211505, LR: 0.0003 +[2026-03-05 22:00:59] (step=0071614) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 14.011739385638819, LR: 0.0003 +[2026-03-05 22:01:07] (step=0071615) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 14.011935042066131, LR: 0.0003 +[2026-03-05 22:01:15] (step=0071616) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 14.012130698493445, LR: 0.0003 +[2026-03-05 22:01:23] (step=0071617) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 14.01232635492076, LR: 0.0003 +[2026-03-05 22:01:31] (step=0071618) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 14.012522011348073, LR: 0.0003 +[2026-03-05 22:01:38] (step=0071619) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 14.012717667775387, LR: 0.0003 +[2026-03-05 22:01:46] (step=0071620) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.0129133242027, LR: 0.0003 +[2026-03-05 22:01:54] (step=0071621) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 14.013108980630014, LR: 0.0003 +[2026-03-05 22:02:02] (step=0071622) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 14.013304637057328, LR: 0.0003 +[2026-03-05 22:02:10] (step=0071623) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 14.013500293484642, LR: 0.0003 +[2026-03-05 22:02:18] (step=0071624) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 14.013695949911954, LR: 0.0003 +[2026-03-05 22:02:26] (step=0071625) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 14.013891606339268, LR: 0.0003 +[2026-03-05 22:02:34] (step=0071626) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 14.014087262766582, LR: 0.0003 +[2026-03-05 22:02:41] (step=0071627) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 14.014282919193896, LR: 0.0003 +[2026-03-05 22:02:49] (step=0071628) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 14.01447857562121, LR: 0.0003 +[2026-03-05 22:02:57] (step=0071629) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 14.014674232048522, LR: 0.0003 +[2026-03-05 22:03:05] (step=0071630) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 14.014869888475836, LR: 0.0003 +[2026-03-05 22:03:13] (step=0071631) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 14.01506554490315, LR: 0.0003 +[2026-03-05 22:03:21] (step=0071632) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 14.015261201330464, LR: 0.0003 +[2026-03-05 22:03:28] (step=0071633) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.015456857757778, LR: 0.0003 +[2026-03-05 22:03:36] (step=0071634) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.01565251418509, LR: 0.0003 +[2026-03-05 22:03:44] (step=0071635) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 14.015848170612404, LR: 0.0003 +[2026-03-05 22:03:52] (step=0071636) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 14.016043827039718, LR: 0.0003 +[2026-03-05 22:04:00] (step=0071637) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 14.016239483467032, LR: 0.0003 +[2026-03-05 22:04:08] (step=0071638) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.016435139894346, LR: 0.0003 +[2026-03-05 22:04:15] (step=0071639) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 14.016630796321659, LR: 0.0003 +[2026-03-05 22:04:23] (step=0071640) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 14.016826452748973, LR: 0.0003 +[2026-03-05 22:04:31] (step=0071641) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 14.017022109176287, LR: 0.0003 +[2026-03-05 22:04:39] (step=0071642) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 14.0172177656036, LR: 0.0003 +[2026-03-05 22:04:47] (step=0071643) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 14.017413422030915, LR: 0.0003 +[2026-03-05 22:04:55] (step=0071644) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.017609078458227, LR: 0.0003 +[2026-03-05 22:05:03] (step=0071645) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 14.01780473488554, LR: 0.0003 +[2026-03-05 22:05:10] (step=0071646) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 14.018000391312855, LR: 0.0003 +[2026-03-05 22:05:18] (step=0071647) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 14.018196047740169, LR: 0.0003 +[2026-03-05 22:05:26] (step=0071648) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 14.018391704167481, LR: 0.0003 +[2026-03-05 22:05:34] (step=0071649) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.018587360594795, LR: 0.0003 +[2026-03-05 22:05:42] (step=0071650) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 14.018783017022109, LR: 0.0003 +[2026-03-05 22:05:50] (step=0071651) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 14.018978673449423, LR: 0.0003 +[2026-03-05 22:05:58] (step=0071652) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.019174329876737, LR: 0.0003 +[2026-03-05 22:06:05] (step=0071653) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 14.01936998630405, LR: 0.0003 +[2026-03-05 22:06:13] (step=0071654) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.019565642731363, LR: 0.0003 +[2026-03-05 22:06:21] (step=0071655) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 14.019761299158677, LR: 0.0003 +[2026-03-05 22:06:29] (step=0071656) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 14.019956955585991, LR: 0.0003 +[2026-03-05 22:06:37] (step=0071657) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 14.020152612013305, LR: 0.0003 +[2026-03-05 22:06:45] (step=0071658) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.020348268440618, LR: 0.0003 +[2026-03-05 22:06:52] (step=0071659) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 14.020543924867932, LR: 0.0003 +[2026-03-05 22:07:00] (step=0071660) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 14.020739581295246, LR: 0.0003 +[2026-03-05 22:07:08] (step=0071661) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 14.02093523772256, LR: 0.0003 +[2026-03-05 22:07:16] (step=0071662) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.021130894149874, LR: 0.0003 +[2026-03-05 22:07:24] (step=0071663) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 14.021326550577186, LR: 0.0003 +[2026-03-05 22:07:32] (step=0071664) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 14.0215222070045, LR: 0.0003 +[2026-03-05 22:07:39] (step=0071665) Train Loss: 0.4249, Train Steps/Sec: 0.13, Epoch: 14.021717863431814, LR: 0.0003 +[2026-03-05 22:07:47] (step=0071666) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 14.021913519859128, LR: 0.0003 +[2026-03-05 22:07:55] (step=0071667) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 14.022109176286442, LR: 0.0003 +[2026-03-05 22:08:03] (step=0071668) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 14.022304832713754, LR: 0.0003 +[2026-03-05 22:08:11] (step=0071669) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 14.022500489141068, LR: 0.0003 +[2026-03-05 22:08:19] (step=0071670) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.022696145568382, LR: 0.0003 +[2026-03-05 22:08:26] (step=0071671) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.022891801995696, LR: 0.0003 +[2026-03-05 22:08:34] (step=0071672) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 14.02308745842301, LR: 0.0003 +[2026-03-05 22:08:42] (step=0071673) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.023283114850322, LR: 0.0003 +[2026-03-05 22:08:50] (step=0071674) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 14.023478771277636, LR: 0.0003 +[2026-03-05 22:08:58] (step=0071675) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.02367442770495, LR: 0.0003 +[2026-03-05 22:09:06] (step=0071676) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 14.023870084132264, LR: 0.0003 +[2026-03-05 22:09:14] (step=0071677) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 14.024065740559577, LR: 0.0003 +[2026-03-05 22:09:22] (step=0071678) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 14.02426139698689, LR: 0.0003 +[2026-03-05 22:09:29] (step=0071679) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.024457053414205, LR: 0.0003 +[2026-03-05 22:09:37] (step=0071680) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.024652709841519, LR: 0.0003 +[2026-03-05 22:09:45] (step=0071681) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 14.024848366268833, LR: 0.0003 +[2026-03-05 22:09:53] (step=0071682) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 14.025044022696145, LR: 0.0003 +[2026-03-05 22:10:01] (step=0071683) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 14.025239679123459, LR: 0.0003 +[2026-03-05 22:10:09] (step=0071684) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.025435335550773, LR: 0.0003 +[2026-03-05 22:10:16] (step=0071685) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 14.025630991978087, LR: 0.0003 +[2026-03-05 22:10:24] (step=0071686) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 14.0258266484054, LR: 0.0003 +[2026-03-05 22:10:32] (step=0071687) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 14.026022304832713, LR: 0.0003 +[2026-03-05 22:10:40] (step=0071688) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.026217961260027, LR: 0.0003 +[2026-03-05 22:10:48] (step=0071689) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 14.026413617687341, LR: 0.0003 +[2026-03-05 22:10:56] (step=0071690) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.026609274114655, LR: 0.0003 +[2026-03-05 22:11:03] (step=0071691) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 14.026804930541969, LR: 0.0003 +[2026-03-05 22:11:11] (step=0071692) Train Loss: 0.4427, Train Steps/Sec: 0.12, Epoch: 14.027000586969281, LR: 0.0003 +[2026-03-05 22:11:19] (step=0071693) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 14.027196243396595, LR: 0.0003 +[2026-03-05 22:11:27] (step=0071694) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 14.02739189982391, LR: 0.0003 +[2026-03-05 22:11:35] (step=0071695) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 14.027587556251223, LR: 0.0003 +[2026-03-05 22:11:43] (step=0071696) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 14.027783212678537, LR: 0.0003 +[2026-03-05 22:11:51] (step=0071697) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 14.02797886910585, LR: 0.0003 +[2026-03-05 22:11:59] (step=0071698) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 14.028174525533164, LR: 0.0003 +[2026-03-05 22:12:06] (step=0071699) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.028370181960478, LR: 0.0003 +[2026-03-05 22:12:14] (step=0071700) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 14.028565838387792, LR: 0.0003 +[2026-03-05 22:12:22] (step=0071701) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 14.028761494815104, LR: 0.0003 +[2026-03-05 22:12:30] (step=0071702) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 14.028957151242418, LR: 0.0003 +[2026-03-05 22:12:38] (step=0071703) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 14.029152807669732, LR: 0.0003 +[2026-03-05 22:12:46] (step=0071704) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 14.029348464097046, LR: 0.0003 +[2026-03-05 22:12:53] (step=0071705) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 14.02954412052436, LR: 0.0003 +[2026-03-05 22:13:01] (step=0071706) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 14.029739776951672, LR: 0.0003 +[2026-03-05 22:13:09] (step=0071707) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.029935433378986, LR: 0.0003 +[2026-03-05 22:13:17] (step=0071708) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 14.0301310898063, LR: 0.0003 +[2026-03-05 22:13:25] (step=0071709) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 14.030326746233614, LR: 0.0003 +[2026-03-05 22:13:33] (step=0071710) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 14.030522402660928, LR: 0.0003 +[2026-03-05 22:13:40] (step=0071711) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 14.03071805908824, LR: 0.0003 +[2026-03-05 22:13:48] (step=0071712) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 14.030913715515554, LR: 0.0003 +[2026-03-05 22:13:56] (step=0071713) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 14.031109371942868, LR: 0.0003 +[2026-03-05 22:14:04] (step=0071714) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 14.031305028370182, LR: 0.0003 +[2026-03-05 22:14:12] (step=0071715) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 14.031500684797496, LR: 0.0003 +[2026-03-05 22:14:20] (step=0071716) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 14.031696341224809, LR: 0.0003 +[2026-03-05 22:14:28] (step=0071717) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 14.031891997652123, LR: 0.0003 +[2026-03-05 22:14:35] (step=0071718) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 14.032087654079437, LR: 0.0003 +[2026-03-05 22:14:43] (step=0071719) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 14.03228331050675, LR: 0.0003 +[2026-03-05 22:14:51] (step=0071720) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 14.032478966934065, LR: 0.0003 +[2026-03-05 22:14:59] (step=0071721) Train Loss: 0.4569, Train Steps/Sec: 0.13, Epoch: 14.032674623361377, LR: 0.0003 +[2026-03-05 22:15:07] (step=0071722) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 14.03287027978869, LR: 0.0003 +[2026-03-05 22:15:15] (step=0071723) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.033065936216005, LR: 0.0003 +[2026-03-05 22:15:22] (step=0071724) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 14.033261592643319, LR: 0.0003 +[2026-03-05 22:15:30] (step=0071725) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 14.033457249070633, LR: 0.0003 +[2026-03-05 22:15:38] (step=0071726) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 14.033652905497945, LR: 0.0003 +[2026-03-05 22:15:46] (step=0071727) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 14.033848561925259, LR: 0.0003 +[2026-03-05 22:15:54] (step=0071728) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 14.034044218352573, LR: 0.0003 +[2026-03-05 22:16:02] (step=0071729) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 14.034239874779887, LR: 0.0003 +[2026-03-05 22:16:10] (step=0071730) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 14.0344355312072, LR: 0.0003 +[2026-03-05 22:16:17] (step=0071731) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 14.034631187634513, LR: 0.0003 +[2026-03-05 22:16:25] (step=0071732) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 14.034826844061827, LR: 0.0003 +[2026-03-05 22:16:33] (step=0071733) Train Loss: 0.4613, Train Steps/Sec: 0.13, Epoch: 14.035022500489141, LR: 0.0003 +[2026-03-05 22:16:41] (step=0071734) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.035218156916455, LR: 0.0003 +[2026-03-05 22:16:49] (step=0071735) Train Loss: 0.4450, Train Steps/Sec: 0.12, Epoch: 14.035413813343768, LR: 0.0003 +[2026-03-05 22:16:57] (step=0071736) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 14.035609469771082, LR: 0.0003 +[2026-03-05 22:17:05] (step=0071737) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 14.035805126198396, LR: 0.0003 +[2026-03-05 22:17:13] (step=0071738) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 14.03600078262571, LR: 0.0003 +[2026-03-05 22:17:20] (step=0071739) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 14.036196439053024, LR: 0.0003 +[2026-03-05 22:17:28] (step=0071740) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 14.036392095480336, LR: 0.0003 +[2026-03-05 22:17:36] (step=0071741) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 14.03658775190765, LR: 0.0003 +[2026-03-05 22:17:44] (step=0071742) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 14.036783408334964, LR: 0.0003 +[2026-03-05 22:17:52] (step=0071743) Train Loss: 0.4622, Train Steps/Sec: 0.13, Epoch: 14.036979064762278, LR: 0.0003 +[2026-03-05 22:18:00] (step=0071744) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.037174721189592, LR: 0.0003 +[2026-03-05 22:18:08] (step=0071745) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 14.037370377616904, LR: 0.0003 +[2026-03-05 22:18:15] (step=0071746) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.037566034044218, LR: 0.0003 +[2026-03-05 22:18:23] (step=0071747) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 14.037761690471532, LR: 0.0003 +[2026-03-05 22:18:31] (step=0071748) Train Loss: 0.4649, Train Steps/Sec: 0.13, Epoch: 14.037957346898846, LR: 0.0003 +[2026-03-05 22:18:39] (step=0071749) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 14.03815300332616, LR: 0.0003 +[2026-03-05 22:18:47] (step=0071750) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 14.038348659753472, LR: 0.0003 +[2026-03-05 22:18:55] (step=0071751) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 14.038544316180786, LR: 0.0003 +[2026-03-05 22:19:03] (step=0071752) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 14.0387399726081, LR: 0.0003 +[2026-03-05 22:19:10] (step=0071753) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.038935629035414, LR: 0.0003 +[2026-03-05 22:19:18] (step=0071754) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 14.039131285462727, LR: 0.0003 +[2026-03-05 22:19:26] (step=0071755) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 14.03932694189004, LR: 0.0003 +[2026-03-05 22:19:34] (step=0071756) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.039522598317355, LR: 0.0003 +[2026-03-05 22:19:42] (step=0071757) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 14.039718254744669, LR: 0.0003 +[2026-03-05 22:19:50] (step=0071758) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 14.039913911171983, LR: 0.0003 +[2026-03-05 22:19:58] (step=0071759) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.040109567599295, LR: 0.0003 +[2026-03-05 22:20:05] (step=0071760) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 14.040305224026609, LR: 0.0003 +[2026-03-05 22:20:13] (step=0071761) Train Loss: 0.4629, Train Steps/Sec: 0.13, Epoch: 14.040500880453923, LR: 0.0003 +[2026-03-05 22:20:21] (step=0071762) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.040696536881237, LR: 0.0003 +[2026-03-05 22:20:29] (step=0071763) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 14.04089219330855, LR: 0.0003 +[2026-03-05 22:20:37] (step=0071764) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 14.041087849735863, LR: 0.0003 +[2026-03-05 22:20:45] (step=0071765) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 14.041283506163177, LR: 0.0003 +[2026-03-05 22:20:52] (step=0071766) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 14.041479162590491, LR: 0.0003 +[2026-03-05 22:21:00] (step=0071767) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 14.041674819017805, LR: 0.0003 +[2026-03-05 22:21:08] (step=0071768) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 14.04187047544512, LR: 0.0003 +[2026-03-05 22:21:16] (step=0071769) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.042066131872431, LR: 0.0003 +[2026-03-05 22:21:24] (step=0071770) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 14.042261788299745, LR: 0.0003 +[2026-03-05 22:21:32] (step=0071771) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.04245744472706, LR: 0.0003 +[2026-03-05 22:21:40] (step=0071772) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.042653101154373, LR: 0.0003 +[2026-03-05 22:21:47] (step=0071773) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 14.042848757581687, LR: 0.0003 +[2026-03-05 22:21:55] (step=0071774) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 14.043044414009, LR: 0.0003 +[2026-03-05 22:22:03] (step=0071775) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.043240070436314, LR: 0.0003 +[2026-03-05 22:22:11] (step=0071776) Train Loss: 0.4641, Train Steps/Sec: 0.13, Epoch: 14.043435726863628, LR: 0.0003 +[2026-03-05 22:22:19] (step=0071777) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.043631383290942, LR: 0.0003 +[2026-03-05 22:22:27] (step=0071778) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 14.043827039718254, LR: 0.0003 +[2026-03-05 22:22:34] (step=0071779) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 14.044022696145568, LR: 0.0003 +[2026-03-05 22:22:42] (step=0071780) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.044218352572882, LR: 0.0003 +[2026-03-05 22:22:50] (step=0071781) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 14.044414009000196, LR: 0.0003 +[2026-03-05 22:22:58] (step=0071782) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.04460966542751, LR: 0.0003 +[2026-03-05 22:23:06] (step=0071783) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 14.044805321854822, LR: 0.0003 +[2026-03-05 22:23:14] (step=0071784) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 14.045000978282136, LR: 0.0003 +[2026-03-05 22:23:21] (step=0071785) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.04519663470945, LR: 0.0003 +[2026-03-05 22:23:29] (step=0071786) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.045392291136764, LR: 0.0003 +[2026-03-05 22:23:37] (step=0071787) Train Loss: 0.4317, Train Steps/Sec: 0.12, Epoch: 14.045587947564078, LR: 0.0003 +[2026-03-05 22:23:45] (step=0071788) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 14.04578360399139, LR: 0.0003 +[2026-03-05 22:23:53] (step=0071789) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 14.045979260418704, LR: 0.0003 +[2026-03-05 22:24:01] (step=0071790) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 14.046174916846018, LR: 0.0003 +[2026-03-05 22:24:09] (step=0071791) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 14.046370573273332, LR: 0.0003 +[2026-03-05 22:24:17] (step=0071792) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.046566229700646, LR: 0.0003 +[2026-03-05 22:24:24] (step=0071793) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 14.046761886127959, LR: 0.0003 +[2026-03-05 22:24:32] (step=0071794) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 14.046957542555273, LR: 0.0003 +[2026-03-05 22:24:40] (step=0071795) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.047153198982587, LR: 0.0003 +[2026-03-05 22:24:48] (step=0071796) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 14.0473488554099, LR: 0.0003 +[2026-03-05 22:24:56] (step=0071797) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 14.047544511837215, LR: 0.0003 +[2026-03-05 22:25:04] (step=0071798) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 14.047740168264527, LR: 0.0003 +[2026-03-05 22:25:11] (step=0071799) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.04793582469184, LR: 0.0003 +[2026-03-05 22:25:19] (step=0071800) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 14.048131481119155, LR: 0.0003 +[2026-03-05 22:25:27] (step=0071801) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.048327137546469, LR: 0.0003 +[2026-03-05 22:25:35] (step=0071802) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.048522793973783, LR: 0.0003 +[2026-03-05 22:25:43] (step=0071803) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.048718450401095, LR: 0.0003 +[2026-03-05 22:25:51] (step=0071804) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 14.04891410682841, LR: 0.0003 +[2026-03-05 22:25:59] (step=0071805) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 14.049109763255723, LR: 0.0003 +[2026-03-05 22:26:06] (step=0071806) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 14.049305419683037, LR: 0.0003 +[2026-03-05 22:26:14] (step=0071807) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 14.04950107611035, LR: 0.0003 +[2026-03-05 22:26:22] (step=0071808) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 14.049696732537663, LR: 0.0003 +[2026-03-05 22:26:30] (step=0071809) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 14.049892388964977, LR: 0.0003 +[2026-03-05 22:26:38] (step=0071810) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.050088045392291, LR: 0.0003 +[2026-03-05 22:26:46] (step=0071811) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 14.050283701819605, LR: 0.0003 +[2026-03-05 22:26:53] (step=0071812) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 14.050479358246918, LR: 0.0003 +[2026-03-05 22:27:01] (step=0071813) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 14.050675014674232, LR: 0.0003 +[2026-03-05 22:27:09] (step=0071814) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.050870671101546, LR: 0.0003 +[2026-03-05 22:27:17] (step=0071815) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 14.05106632752886, LR: 0.0003 +[2026-03-05 22:27:25] (step=0071816) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 14.051261983956174, LR: 0.0003 +[2026-03-05 22:27:33] (step=0071817) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.051457640383486, LR: 0.0003 +[2026-03-05 22:27:41] (step=0071818) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.0516532968108, LR: 0.0003 +[2026-03-05 22:27:48] (step=0071819) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 14.051848953238114, LR: 0.0003 +[2026-03-05 22:27:56] (step=0071820) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 14.052044609665428, LR: 0.0003 +[2026-03-05 22:28:04] (step=0071821) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 14.052240266092742, LR: 0.0003 +[2026-03-05 22:28:12] (step=0071822) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.052435922520054, LR: 0.0003 +[2026-03-05 22:28:20] (step=0071823) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 14.052631578947368, LR: 0.0003 +[2026-03-05 22:28:28] (step=0071824) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 14.052827235374682, LR: 0.0003 +[2026-03-05 22:28:35] (step=0071825) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 14.053022891801996, LR: 0.0003 +[2026-03-05 22:28:43] (step=0071826) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.05321854822931, LR: 0.0003 +[2026-03-05 22:28:51] (step=0071827) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.053414204656622, LR: 0.0003 +[2026-03-05 22:28:59] (step=0071828) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 14.053609861083936, LR: 0.0003 +[2026-03-05 22:29:07] (step=0071829) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 14.05380551751125, LR: 0.0003 +[2026-03-05 22:29:15] (step=0071830) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.054001173938564, LR: 0.0003 +[2026-03-05 22:29:23] (step=0071831) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 14.054196830365877, LR: 0.0003 +[2026-03-05 22:29:30] (step=0071832) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 14.05439248679319, LR: 0.0003 +[2026-03-05 22:29:38] (step=0071833) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 14.054588143220505, LR: 0.0003 +[2026-03-05 22:29:46] (step=0071834) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 14.054783799647819, LR: 0.0003 +[2026-03-05 22:29:54] (step=0071835) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 14.054979456075133, LR: 0.0003 +[2026-03-05 22:30:02] (step=0071836) Train Loss: 0.4551, Train Steps/Sec: 0.12, Epoch: 14.055175112502445, LR: 0.0003 +[2026-03-05 22:30:10] (step=0071837) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.055370768929759, LR: 0.0003 +[2026-03-05 22:30:18] (step=0071838) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 14.055566425357073, LR: 0.0003 +[2026-03-05 22:30:26] (step=0071839) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.055762081784387, LR: 0.0003 +[2026-03-05 22:30:33] (step=0071840) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 14.055957738211701, LR: 0.0003 +[2026-03-05 22:30:41] (step=0071841) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.056153394639013, LR: 0.0003 +[2026-03-05 22:30:49] (step=0071842) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 14.056349051066327, LR: 0.0003 +[2026-03-05 22:30:57] (step=0071843) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 14.056544707493641, LR: 0.0003 +[2026-03-05 22:31:05] (step=0071844) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.056740363920955, LR: 0.0003 +[2026-03-05 22:31:13] (step=0071845) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.05693602034827, LR: 0.0003 +[2026-03-05 22:31:21] (step=0071846) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 14.057131676775581, LR: 0.0003 +[2026-03-05 22:31:28] (step=0071847) Train Loss: 0.4258, Train Steps/Sec: 0.13, Epoch: 14.057327333202895, LR: 0.0003 +[2026-03-05 22:31:36] (step=0071848) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 14.05752298963021, LR: 0.0003 +[2026-03-05 22:31:44] (step=0071849) Train Loss: 0.4564, Train Steps/Sec: 0.13, Epoch: 14.057718646057523, LR: 0.0003 +[2026-03-05 22:31:52] (step=0071850) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.057914302484837, LR: 0.0003 +[2026-03-05 22:32:00] (step=0071851) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 14.05810995891215, LR: 0.0003 +[2026-03-05 22:32:08] (step=0071852) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 14.058305615339464, LR: 0.0003 +[2026-03-05 22:32:15] (step=0071853) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.058501271766778, LR: 0.0003 +[2026-03-05 22:32:23] (step=0071854) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 14.058696928194092, LR: 0.0003 +[2026-03-05 22:32:31] (step=0071855) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 14.058892584621406, LR: 0.0003 +[2026-03-05 22:32:39] (step=0071856) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 14.059088241048718, LR: 0.0003 +[2026-03-05 22:32:47] (step=0071857) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 14.059283897476032, LR: 0.0003 +[2026-03-05 22:32:55] (step=0071858) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 14.059479553903346, LR: 0.0003 +[2026-03-05 22:33:02] (step=0071859) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 14.05967521033066, LR: 0.0003 +[2026-03-05 22:33:10] (step=0071860) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 14.059870866757972, LR: 0.0003 +[2026-03-05 22:33:18] (step=0071861) Train Loss: 0.4206, Train Steps/Sec: 0.13, Epoch: 14.060066523185286, LR: 0.0003 +[2026-03-05 22:33:26] (step=0071862) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 14.0602621796126, LR: 0.0003 +[2026-03-05 22:33:34] (step=0071863) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.060457836039914, LR: 0.0003 +[2026-03-05 22:33:42] (step=0071864) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 14.060653492467228, LR: 0.0003 +[2026-03-05 22:33:49] (step=0071865) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 14.06084914889454, LR: 0.0003 +[2026-03-05 22:33:57] (step=0071866) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 14.061044805321854, LR: 0.0003 +[2026-03-05 22:34:05] (step=0071867) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.061240461749168, LR: 0.0003 +[2026-03-05 22:34:13] (step=0071868) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 14.061436118176482, LR: 0.0003 +[2026-03-05 22:34:21] (step=0071869) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.061631774603796, LR: 0.0003 +[2026-03-05 22:34:29] (step=0071870) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 14.061827431031109, LR: 0.0003 +[2026-03-05 22:34:37] (step=0071871) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 14.062023087458423, LR: 0.0003 +[2026-03-05 22:34:45] (step=0071872) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 14.062218743885737, LR: 0.0003 +[2026-03-05 22:34:52] (step=0071873) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.06241440031305, LR: 0.0003 +[2026-03-05 22:35:00] (step=0071874) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 14.062610056740365, LR: 0.0003 +[2026-03-05 22:35:08] (step=0071875) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.062805713167677, LR: 0.0003 +[2026-03-05 22:35:16] (step=0071876) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.063001369594991, LR: 0.0003 +[2026-03-05 22:35:24] (step=0071877) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 14.063197026022305, LR: 0.0003 +[2026-03-05 22:35:32] (step=0071878) Train Loss: 0.4295, Train Steps/Sec: 0.13, Epoch: 14.063392682449619, LR: 0.0003 +[2026-03-05 22:35:39] (step=0071879) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 14.063588338876933, LR: 0.0003 +[2026-03-05 22:35:47] (step=0071880) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.063783995304245, LR: 0.0003 +[2026-03-05 22:35:55] (step=0071881) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.06397965173156, LR: 0.0003 +[2026-03-05 22:36:03] (step=0071882) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 14.064175308158873, LR: 0.0003 +[2026-03-05 22:36:11] (step=0071883) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.064370964586187, LR: 0.0003 +[2026-03-05 22:36:19] (step=0071884) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.0645666210135, LR: 0.0003 +[2026-03-05 22:36:26] (step=0071885) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 14.064762277440813, LR: 0.0003 +[2026-03-05 22:36:34] (step=0071886) Train Loss: 0.4480, Train Steps/Sec: 0.12, Epoch: 14.064957933868127, LR: 0.0003 +[2026-03-05 22:36:42] (step=0071887) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.065153590295441, LR: 0.0003 +[2026-03-05 22:36:50] (step=0071888) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 14.065349246722755, LR: 0.0003 +[2026-03-05 22:36:58] (step=0071889) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 14.065544903150068, LR: 0.0003 +[2026-03-05 22:37:06] (step=0071890) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 14.065740559577382, LR: 0.0003 +[2026-03-05 22:37:14] (step=0071891) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.065936216004696, LR: 0.0003 +[2026-03-05 22:37:21] (step=0071892) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 14.06613187243201, LR: 0.0003 +[2026-03-05 22:37:29] (step=0071893) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 14.066327528859324, LR: 0.0003 +[2026-03-05 22:37:37] (step=0071894) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 14.066523185286636, LR: 0.0003 +[2026-03-05 22:37:45] (step=0071895) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 14.06671884171395, LR: 0.0003 +[2026-03-05 22:37:53] (step=0071896) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 14.066914498141264, LR: 0.0003 +[2026-03-05 22:38:01] (step=0071897) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 14.067110154568578, LR: 0.0003 +[2026-03-05 22:38:09] (step=0071898) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 14.067305810995892, LR: 0.0003 +[2026-03-05 22:38:16] (step=0071899) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 14.067501467423204, LR: 0.0003 +[2026-03-05 22:38:24] (step=0071900) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 14.067697123850518, LR: 0.0003 +[2026-03-05 22:38:32] (step=0071901) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.067892780277832, LR: 0.0003 +[2026-03-05 22:38:40] (step=0071902) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.068088436705146, LR: 0.0003 +[2026-03-05 22:38:48] (step=0071903) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 14.06828409313246, LR: 0.0003 +[2026-03-05 22:38:56] (step=0071904) Train Loss: 0.4574, Train Steps/Sec: 0.13, Epoch: 14.068479749559772, LR: 0.0003 +[2026-03-05 22:39:03] (step=0071905) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 14.068675405987086, LR: 0.0003 +[2026-03-05 22:39:11] (step=0071906) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 14.0688710624144, LR: 0.0003 +[2026-03-05 22:39:19] (step=0071907) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.069066718841714, LR: 0.0003 +[2026-03-05 22:39:27] (step=0071908) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 14.069262375269028, LR: 0.0003 +[2026-03-05 22:39:35] (step=0071909) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.06945803169634, LR: 0.0003 +[2026-03-05 22:39:43] (step=0071910) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.069653688123655, LR: 0.0003 +[2026-03-05 22:39:50] (step=0071911) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 14.069849344550969, LR: 0.0003 +[2026-03-05 22:39:58] (step=0071912) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 14.070045000978283, LR: 0.0003 +[2026-03-05 22:40:06] (step=0071913) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.070240657405595, LR: 0.0003 +[2026-03-05 22:40:14] (step=0071914) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 14.070436313832909, LR: 0.0003 +[2026-03-05 22:40:22] (step=0071915) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 14.070631970260223, LR: 0.0003 +[2026-03-05 22:40:30] (step=0071916) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 14.070827626687537, LR: 0.0003 +[2026-03-05 22:40:38] (step=0071917) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.071023283114851, LR: 0.0003 +[2026-03-05 22:40:45] (step=0071918) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 14.071218939542163, LR: 0.0003 +[2026-03-05 22:40:53] (step=0071919) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.071414595969477, LR: 0.0003 +[2026-03-05 22:41:01] (step=0071920) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.071610252396791, LR: 0.0003 +[2026-03-05 22:41:09] (step=0071921) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 14.071805908824105, LR: 0.0003 +[2026-03-05 22:41:17] (step=0071922) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 14.07200156525142, LR: 0.0003 +[2026-03-05 22:41:25] (step=0071923) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.072197221678731, LR: 0.0003 +[2026-03-05 22:41:33] (step=0071924) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 14.072392878106045, LR: 0.0003 +[2026-03-05 22:41:40] (step=0071925) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 14.07258853453336, LR: 0.0003 +[2026-03-05 22:41:48] (step=0071926) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 14.072784190960673, LR: 0.0003 +[2026-03-05 22:41:56] (step=0071927) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 14.072979847387987, LR: 0.0003 +[2026-03-05 22:42:04] (step=0071928) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 14.0731755038153, LR: 0.0003 +[2026-03-05 22:42:12] (step=0071929) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 14.073371160242614, LR: 0.0003 +[2026-03-05 22:42:20] (step=0071930) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 14.073566816669928, LR: 0.0003 +[2026-03-05 22:42:27] (step=0071931) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 14.073762473097242, LR: 0.0003 +[2026-03-05 22:42:35] (step=0071932) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.073958129524556, LR: 0.0003 +[2026-03-05 22:42:43] (step=0071933) Train Loss: 0.4366, Train Steps/Sec: 0.12, Epoch: 14.074153785951868, LR: 0.0003 +[2026-03-05 22:42:51] (step=0071934) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 14.074349442379182, LR: 0.0003 +[2026-03-05 22:42:59] (step=0071935) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.074545098806496, LR: 0.0003 +[2026-03-05 22:43:07] (step=0071936) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 14.07474075523381, LR: 0.0003 +[2026-03-05 22:43:15] (step=0071937) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 14.074936411661122, LR: 0.0003 +[2026-03-05 22:43:23] (step=0071938) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 14.075132068088436, LR: 0.0003 +[2026-03-05 22:43:30] (step=0071939) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 14.07532772451575, LR: 0.0003 +[2026-03-05 22:43:38] (step=0071940) Train Loss: 0.4632, Train Steps/Sec: 0.13, Epoch: 14.075523380943064, LR: 0.0003 +[2026-03-05 22:43:46] (step=0071941) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 14.075719037370378, LR: 0.0003 +[2026-03-05 22:43:54] (step=0071942) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.07591469379769, LR: 0.0003 +[2026-03-05 22:44:02] (step=0071943) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 14.076110350225004, LR: 0.0003 +[2026-03-05 22:44:10] (step=0071944) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 14.076306006652318, LR: 0.0003 +[2026-03-05 22:44:18] (step=0071945) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 14.076501663079632, LR: 0.0003 +[2026-03-05 22:44:25] (step=0071946) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 14.076697319506946, LR: 0.0003 +[2026-03-05 22:44:33] (step=0071947) Train Loss: 0.4264, Train Steps/Sec: 0.13, Epoch: 14.076892975934259, LR: 0.0003 +[2026-03-05 22:44:41] (step=0071948) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 14.077088632361573, LR: 0.0003 +[2026-03-05 22:44:49] (step=0071949) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 14.077284288788887, LR: 0.0003 +[2026-03-05 22:44:57] (step=0071950) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.0774799452162, LR: 0.0003 +[2026-03-05 22:45:05] (step=0071951) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 14.077675601643515, LR: 0.0003 +[2026-03-05 22:45:12] (step=0071952) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 14.077871258070827, LR: 0.0003 +[2026-03-05 22:45:20] (step=0071953) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 14.078066914498141, LR: 0.0003 +[2026-03-05 22:45:28] (step=0071954) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 14.078262570925455, LR: 0.0003 +[2026-03-05 22:45:36] (step=0071955) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 14.078458227352769, LR: 0.0003 +[2026-03-05 22:45:44] (step=0071956) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.078653883780083, LR: 0.0003 +[2026-03-05 22:45:52] (step=0071957) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 14.078849540207395, LR: 0.0003 +[2026-03-05 22:45:59] (step=0071958) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 14.07904519663471, LR: 0.0003 +[2026-03-05 22:46:07] (step=0071959) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 14.079240853062023, LR: 0.0003 +[2026-03-05 22:46:15] (step=0071960) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 14.079436509489337, LR: 0.0003 +[2026-03-05 22:46:23] (step=0071961) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 14.079632165916651, LR: 0.0003 +[2026-03-05 22:46:31] (step=0071962) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 14.079827822343963, LR: 0.0003 +[2026-03-05 22:46:39] (step=0071963) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.080023478771277, LR: 0.0003 +[2026-03-05 22:46:46] (step=0071964) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.080219135198591, LR: 0.0003 +[2026-03-05 22:46:54] (step=0071965) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.080414791625905, LR: 0.0003 +[2026-03-05 22:47:02] (step=0071966) Train Loss: 0.4392, Train Steps/Sec: 0.12, Epoch: 14.080610448053218, LR: 0.0003 +[2026-03-05 22:47:10] (step=0071967) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 14.080806104480532, LR: 0.0003 +[2026-03-05 22:47:18] (step=0071968) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 14.081001760907846, LR: 0.0003 +[2026-03-05 22:47:26] (step=0071969) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 14.08119741733516, LR: 0.0003 +[2026-03-05 22:47:34] (step=0071970) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 14.081393073762474, LR: 0.0003 +[2026-03-05 22:47:42] (step=0071971) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 14.081588730189786, LR: 0.0003 +[2026-03-05 22:47:49] (step=0071972) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 14.0817843866171, LR: 0.0003 +[2026-03-05 22:47:57] (step=0071973) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.081980043044414, LR: 0.0003 +[2026-03-05 22:48:05] (step=0071974) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 14.082175699471728, LR: 0.0003 +[2026-03-05 22:48:13] (step=0071975) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 14.082371355899042, LR: 0.0003 +[2026-03-05 22:48:21] (step=0071976) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 14.082567012326354, LR: 0.0003 +[2026-03-05 22:48:29] (step=0071977) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 14.082762668753668, LR: 0.0003 +[2026-03-05 22:48:37] (step=0071978) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 14.082958325180982, LR: 0.0003 +[2026-03-05 22:48:44] (step=0071979) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 14.083153981608296, LR: 0.0003 +[2026-03-05 22:48:52] (step=0071980) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 14.08334963803561, LR: 0.0003 +[2026-03-05 22:49:00] (step=0071981) Train Loss: 0.4239, Train Steps/Sec: 0.13, Epoch: 14.083545294462922, LR: 0.0003 +[2026-03-05 22:49:08] (step=0071982) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.083740950890236, LR: 0.0003 +[2026-03-05 22:49:16] (step=0071983) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 14.08393660731755, LR: 0.0003 +[2026-03-05 22:49:24] (step=0071984) Train Loss: 0.4267, Train Steps/Sec: 0.13, Epoch: 14.084132263744864, LR: 0.0003 +[2026-03-05 22:49:32] (step=0071985) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.084327920172178, LR: 0.0003 +[2026-03-05 22:49:39] (step=0071986) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 14.08452357659949, LR: 0.0003 +[2026-03-05 22:49:47] (step=0071987) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.084719233026805, LR: 0.0003 +[2026-03-05 22:49:55] (step=0071988) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 14.084914889454119, LR: 0.0003 +[2026-03-05 22:50:03] (step=0071989) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 14.085110545881433, LR: 0.0003 +[2026-03-05 22:50:11] (step=0071990) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 14.085306202308745, LR: 0.0003 +[2026-03-05 22:50:19] (step=0071991) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 14.085501858736059, LR: 0.0003 +[2026-03-05 22:50:26] (step=0071992) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 14.085697515163373, LR: 0.0003 +[2026-03-05 22:50:34] (step=0071993) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 14.085893171590687, LR: 0.0003 +[2026-03-05 22:50:42] (step=0071994) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 14.086088828018001, LR: 0.0003 +[2026-03-05 22:50:50] (step=0071995) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 14.086284484445313, LR: 0.0003 +[2026-03-05 22:50:58] (step=0071996) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.086480140872627, LR: 0.0003 +[2026-03-05 22:51:06] (step=0071997) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.086675797299941, LR: 0.0003 +[2026-03-05 22:51:13] (step=0071998) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 14.086871453727255, LR: 0.0003 +[2026-03-05 22:51:21] (step=0071999) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 14.08706711015457, LR: 0.0003 +[2026-03-05 22:51:29] (step=0072000) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 14.087262766581881, LR: 0.0003 +[2026-03-05 22:51:29] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0072000/ +[2026-03-05 22:51:37] (step=0072001) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 14.087458423009195, LR: 0.0003 +[2026-03-05 22:51:45] (step=0072002) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 14.08765407943651, LR: 0.0003 +[2026-03-05 22:51:53] (step=0072003) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 14.087849735863823, LR: 0.0003 +[2026-03-05 22:52:01] (step=0072004) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.088045392291138, LR: 0.0003 +[2026-03-05 22:52:08] (step=0072005) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.08824104871845, LR: 0.0003 +[2026-03-05 22:52:16] (step=0072006) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 14.088436705145764, LR: 0.0003 +[2026-03-05 22:52:24] (step=0072007) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 14.088632361573078, LR: 0.0003 +[2026-03-05 22:52:32] (step=0072008) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 14.088828018000392, LR: 0.0003 +[2026-03-05 22:52:40] (step=0072009) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.089023674427706, LR: 0.0003 +[2026-03-05 22:52:48] (step=0072010) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 14.089219330855018, LR: 0.0003 +[2026-03-05 22:52:56] (step=0072011) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 14.089414987282332, LR: 0.0003 +[2026-03-05 22:53:03] (step=0072012) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.089610643709646, LR: 0.0003 +[2026-03-05 22:53:11] (step=0072013) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.08980630013696, LR: 0.0003 +[2026-03-05 22:53:19] (step=0072014) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.090001956564274, LR: 0.0003 +[2026-03-05 22:53:27] (step=0072015) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 14.090197612991586, LR: 0.0003 +[2026-03-05 22:53:35] (step=0072016) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.0903932694189, LR: 0.0003 +[2026-03-05 22:53:43] (step=0072017) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.090588925846214, LR: 0.0003 +[2026-03-05 22:53:51] (step=0072018) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 14.090784582273528, LR: 0.0003 +[2026-03-05 22:53:58] (step=0072019) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 14.09098023870084, LR: 0.0003 +[2026-03-05 22:54:06] (step=0072020) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 14.091175895128154, LR: 0.0003 +[2026-03-05 22:54:14] (step=0072021) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 14.091371551555468, LR: 0.0003 +[2026-03-05 22:54:22] (step=0072022) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 14.091567207982783, LR: 0.0003 +[2026-03-05 22:54:30] (step=0072023) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 14.091762864410097, LR: 0.0003 +[2026-03-05 22:54:38] (step=0072024) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.091958520837409, LR: 0.0003 +[2026-03-05 22:54:45] (step=0072025) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 14.092154177264723, LR: 0.0003 +[2026-03-05 22:54:53] (step=0072026) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 14.092349833692037, LR: 0.0003 +[2026-03-05 22:55:01] (step=0072027) Train Loss: 0.4468, Train Steps/Sec: 0.12, Epoch: 14.09254549011935, LR: 0.0003 +[2026-03-05 22:55:09] (step=0072028) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 14.092741146546665, LR: 0.0003 +[2026-03-05 22:55:17] (step=0072029) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 14.092936802973977, LR: 0.0003 +[2026-03-05 22:55:25] (step=0072030) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 14.093132459401291, LR: 0.0003 +[2026-03-05 22:55:33] (step=0072031) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 14.093328115828605, LR: 0.0003 +[2026-03-05 22:55:41] (step=0072032) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 14.093523772255919, LR: 0.0003 +[2026-03-05 22:55:48] (step=0072033) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 14.093719428683233, LR: 0.0003 +[2026-03-05 22:55:56] (step=0072034) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.093915085110545, LR: 0.0003 +[2026-03-05 22:56:04] (step=0072035) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 14.09411074153786, LR: 0.0003 +[2026-03-05 22:56:12] (step=0072036) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 14.094306397965173, LR: 0.0003 +[2026-03-05 22:56:20] (step=0072037) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 14.094502054392487, LR: 0.0003 +[2026-03-05 22:56:28] (step=0072038) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 14.094697710819801, LR: 0.0003 +[2026-03-05 22:56:35] (step=0072039) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 14.094893367247114, LR: 0.0003 +[2026-03-05 22:56:43] (step=0072040) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.095089023674428, LR: 0.0003 +[2026-03-05 22:56:51] (step=0072041) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 14.095284680101742, LR: 0.0003 +[2026-03-05 22:56:59] (step=0072042) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.095480336529056, LR: 0.0003 +[2026-03-05 22:57:07] (step=0072043) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 14.095675992956368, LR: 0.0003 +[2026-03-05 22:57:15] (step=0072044) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 14.095871649383682, LR: 0.0003 +[2026-03-05 22:57:23] (step=0072045) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 14.096067305810996, LR: 0.0003 +[2026-03-05 22:57:30] (step=0072046) Train Loss: 0.4616, Train Steps/Sec: 0.13, Epoch: 14.09626296223831, LR: 0.0003 +[2026-03-05 22:57:38] (step=0072047) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.096458618665624, LR: 0.0003 +[2026-03-05 22:57:46] (step=0072048) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.096654275092936, LR: 0.0003 +[2026-03-05 22:57:54] (step=0072049) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.09684993152025, LR: 0.0003 +[2026-03-05 22:58:02] (step=0072050) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 14.097045587947564, LR: 0.0003 +[2026-03-05 22:58:10] (step=0072051) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 14.097241244374878, LR: 0.0003 +[2026-03-05 22:58:17] (step=0072052) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 14.097436900802192, LR: 0.0003 +[2026-03-05 22:58:25] (step=0072053) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 14.097632557229504, LR: 0.0003 +[2026-03-05 22:58:33] (step=0072054) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 14.097828213656818, LR: 0.0003 +[2026-03-05 22:58:41] (step=0072055) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 14.098023870084132, LR: 0.0003 +[2026-03-05 22:58:49] (step=0072056) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 14.098219526511446, LR: 0.0003 +[2026-03-05 22:58:57] (step=0072057) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 14.09841518293876, LR: 0.0003 +[2026-03-05 22:59:05] (step=0072058) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 14.098610839366073, LR: 0.0003 +[2026-03-05 22:59:13] (step=0072059) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.098806495793387, LR: 0.0003 +[2026-03-05 22:59:20] (step=0072060) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 14.0990021522207, LR: 0.0003 +[2026-03-05 22:59:28] (step=0072061) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 14.099197808648015, LR: 0.0003 +[2026-03-05 22:59:36] (step=0072062) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 14.099393465075329, LR: 0.0003 +[2026-03-05 22:59:44] (step=0072063) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.09958912150264, LR: 0.0003 +[2026-03-05 22:59:52] (step=0072064) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 14.099784777929955, LR: 0.0003 +[2026-03-05 23:00:00] (step=0072065) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 14.099980434357269, LR: 0.0003 +[2026-03-05 23:00:07] (step=0072066) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 14.100176090784583, LR: 0.0003 +[2026-03-05 23:00:15] (step=0072067) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.100371747211897, LR: 0.0003 +[2026-03-05 23:00:23] (step=0072068) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 14.100567403639209, LR: 0.0003 +[2026-03-05 23:00:31] (step=0072069) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 14.100763060066523, LR: 0.0003 +[2026-03-05 23:00:39] (step=0072070) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.100958716493837, LR: 0.0003 +[2026-03-05 23:00:47] (step=0072071) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 14.101154372921151, LR: 0.0003 +[2026-03-05 23:00:54] (step=0072072) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 14.101350029348463, LR: 0.0003 +[2026-03-05 23:01:02] (step=0072073) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.101545685775777, LR: 0.0003 +[2026-03-05 23:01:10] (step=0072074) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 14.101741342203091, LR: 0.0003 +[2026-03-05 23:01:18] (step=0072075) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 14.101936998630405, LR: 0.0003 +[2026-03-05 23:01:26] (step=0072076) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.10213265505772, LR: 0.0003 +[2026-03-05 23:01:34] (step=0072077) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.102328311485032, LR: 0.0003 +[2026-03-05 23:01:42] (step=0072078) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.102523967912346, LR: 0.0003 +[2026-03-05 23:01:50] (step=0072079) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 14.10271962433966, LR: 0.0003 +[2026-03-05 23:01:57] (step=0072080) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 14.102915280766974, LR: 0.0003 +[2026-03-05 23:02:05] (step=0072081) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 14.103110937194288, LR: 0.0003 +[2026-03-05 23:02:13] (step=0072082) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 14.1033065936216, LR: 0.0003 +[2026-03-05 23:02:21] (step=0072083) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.103502250048914, LR: 0.0003 +[2026-03-05 23:02:29] (step=0072084) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 14.103697906476228, LR: 0.0003 +[2026-03-05 23:02:37] (step=0072085) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 14.103893562903542, LR: 0.0003 +[2026-03-05 23:02:44] (step=0072086) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 14.104089219330856, LR: 0.0003 +[2026-03-05 23:02:52] (step=0072087) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 14.104284875758168, LR: 0.0003 +[2026-03-05 23:03:00] (step=0072088) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 14.104480532185482, LR: 0.0003 +[2026-03-05 23:03:08] (step=0072089) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 14.104676188612796, LR: 0.0003 +[2026-03-05 23:03:16] (step=0072090) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.10487184504011, LR: 0.0003 +[2026-03-05 23:03:24] (step=0072091) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 14.105067501467424, LR: 0.0003 +[2026-03-05 23:03:31] (step=0072092) Train Loss: 0.4325, Train Steps/Sec: 0.13, Epoch: 14.105263157894736, LR: 0.0003 +[2026-03-05 23:03:39] (step=0072093) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 14.10545881432205, LR: 0.0003 +[2026-03-05 23:03:47] (step=0072094) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 14.105654470749364, LR: 0.0003 +[2026-03-05 23:03:55] (step=0072095) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.105850127176678, LR: 0.0003 +[2026-03-05 23:04:03] (step=0072096) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 14.10604578360399, LR: 0.0003 +[2026-03-05 23:04:11] (step=0072097) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.106241440031305, LR: 0.0003 +[2026-03-05 23:04:18] (step=0072098) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 14.106437096458619, LR: 0.0003 +[2026-03-05 23:04:26] (step=0072099) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 14.106632752885933, LR: 0.0003 +[2026-03-05 23:04:34] (step=0072100) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 14.106828409313247, LR: 0.0003 +[2026-03-05 23:04:42] (step=0072101) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 14.107024065740559, LR: 0.0003 +[2026-03-05 23:04:50] (step=0072102) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 14.107219722167873, LR: 0.0003 +[2026-03-05 23:04:58] (step=0072103) Train Loss: 0.4258, Train Steps/Sec: 0.13, Epoch: 14.107415378595187, LR: 0.0003 +[2026-03-05 23:05:06] (step=0072104) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 14.1076110350225, LR: 0.0003 +[2026-03-05 23:05:13] (step=0072105) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 14.107806691449815, LR: 0.0003 +[2026-03-05 23:05:21] (step=0072106) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 14.108002347877127, LR: 0.0003 +[2026-03-05 23:05:29] (step=0072107) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.108198004304441, LR: 0.0003 +[2026-03-05 23:05:37] (step=0072108) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.108393660731755, LR: 0.0003 +[2026-03-05 23:05:45] (step=0072109) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 14.108589317159069, LR: 0.0003 +[2026-03-05 23:05:53] (step=0072110) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 14.108784973586383, LR: 0.0003 +[2026-03-05 23:06:00] (step=0072111) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 14.108980630013695, LR: 0.0003 +[2026-03-05 23:06:08] (step=0072112) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 14.10917628644101, LR: 0.0003 +[2026-03-05 23:06:16] (step=0072113) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.109371942868323, LR: 0.0003 +[2026-03-05 23:06:24] (step=0072114) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.109567599295637, LR: 0.0003 +[2026-03-05 23:06:32] (step=0072115) Train Loss: 0.4251, Train Steps/Sec: 0.13, Epoch: 14.109763255722951, LR: 0.0003 +[2026-03-05 23:06:40] (step=0072116) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 14.109958912150264, LR: 0.0003 +[2026-03-05 23:06:48] (step=0072117) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.110154568577578, LR: 0.0003 +[2026-03-05 23:06:56] (step=0072118) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 14.110350225004892, LR: 0.0003 +[2026-03-05 23:07:03] (step=0072119) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 14.110545881432206, LR: 0.0003 +[2026-03-05 23:07:11] (step=0072120) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 14.11074153785952, LR: 0.0003 +[2026-03-05 23:07:19] (step=0072121) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 14.110937194286832, LR: 0.0003 +[2026-03-05 23:07:27] (step=0072122) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 14.111132850714146, LR: 0.0003 +[2026-03-05 23:07:35] (step=0072123) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.11132850714146, LR: 0.0003 +[2026-03-05 23:07:43] (step=0072124) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 14.111524163568774, LR: 0.0003 +[2026-03-05 23:07:51] (step=0072125) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 14.111719819996086, LR: 0.0003 +[2026-03-05 23:07:58] (step=0072126) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 14.1119154764234, LR: 0.0003 +[2026-03-05 23:08:06] (step=0072127) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 14.112111132850714, LR: 0.0003 +[2026-03-05 23:08:14] (step=0072128) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.112306789278028, LR: 0.0003 +[2026-03-05 23:08:22] (step=0072129) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.112502445705342, LR: 0.0003 +[2026-03-05 23:08:30] (step=0072130) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 14.112698102132654, LR: 0.0003 +[2026-03-05 23:08:38] (step=0072131) Train Loss: 0.4228, Train Steps/Sec: 0.13, Epoch: 14.112893758559968, LR: 0.0003 +[2026-03-05 23:08:45] (step=0072132) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.113089414987282, LR: 0.0003 +[2026-03-05 23:08:53] (step=0072133) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 14.113285071414596, LR: 0.0003 +[2026-03-05 23:09:01] (step=0072134) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 14.11348072784191, LR: 0.0003 +[2026-03-05 23:09:09] (step=0072135) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 14.113676384269223, LR: 0.0003 +[2026-03-05 23:09:17] (step=0072136) Train Loss: 0.4208, Train Steps/Sec: 0.13, Epoch: 14.113872040696537, LR: 0.0003 +[2026-03-05 23:09:25] (step=0072137) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 14.11406769712385, LR: 0.0003 +[2026-03-05 23:09:33] (step=0072138) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 14.114263353551165, LR: 0.0003 +[2026-03-05 23:09:40] (step=0072139) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.114459009978479, LR: 0.0003 +[2026-03-05 23:09:48] (step=0072140) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 14.11465466640579, LR: 0.0003 +[2026-03-05 23:09:56] (step=0072141) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.114850322833105, LR: 0.0003 +[2026-03-05 23:10:04] (step=0072142) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 14.115045979260419, LR: 0.0003 +[2026-03-05 23:10:12] (step=0072143) Train Loss: 0.4265, Train Steps/Sec: 0.13, Epoch: 14.115241635687733, LR: 0.0003 +[2026-03-05 23:10:20] (step=0072144) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 14.115437292115047, LR: 0.0003 +[2026-03-05 23:10:27] (step=0072145) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.115632948542359, LR: 0.0003 +[2026-03-05 23:10:35] (step=0072146) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 14.115828604969673, LR: 0.0003 +[2026-03-05 23:10:43] (step=0072147) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 14.116024261396987, LR: 0.0003 +[2026-03-05 23:10:51] (step=0072148) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 14.116219917824301, LR: 0.0003 +[2026-03-05 23:10:59] (step=0072149) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 14.116415574251613, LR: 0.0003 +[2026-03-05 23:11:07] (step=0072150) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 14.116611230678927, LR: 0.0003 +[2026-03-05 23:11:14] (step=0072151) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 14.116806887106241, LR: 0.0003 +[2026-03-05 23:11:22] (step=0072152) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 14.117002543533555, LR: 0.0003 +[2026-03-05 23:11:30] (step=0072153) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.11719819996087, LR: 0.0003 +[2026-03-05 23:11:38] (step=0072154) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.117393856388182, LR: 0.0003 +[2026-03-05 23:11:46] (step=0072155) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 14.117589512815496, LR: 0.0003 +[2026-03-05 23:11:54] (step=0072156) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 14.11778516924281, LR: 0.0003 +[2026-03-05 23:12:02] (step=0072157) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 14.117980825670124, LR: 0.0003 +[2026-03-05 23:12:09] (step=0072158) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 14.118176482097438, LR: 0.0003 +[2026-03-05 23:12:17] (step=0072159) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 14.11837213852475, LR: 0.0003 +[2026-03-05 23:12:25] (step=0072160) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 14.118567794952064, LR: 0.0003 +[2026-03-05 23:12:33] (step=0072161) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 14.118763451379378, LR: 0.0003 +[2026-03-05 23:12:41] (step=0072162) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 14.118959107806692, LR: 0.0003 +[2026-03-05 23:12:49] (step=0072163) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 14.119154764234006, LR: 0.0003 +[2026-03-05 23:12:57] (step=0072164) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 14.119350420661318, LR: 0.0003 +[2026-03-05 23:13:05] (step=0072165) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 14.119546077088632, LR: 0.0003 +[2026-03-05 23:13:12] (step=0072166) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.119741733515946, LR: 0.0003 +[2026-03-05 23:13:20] (step=0072167) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 14.11993738994326, LR: 0.0003 +[2026-03-05 23:13:28] (step=0072168) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 14.120133046370574, LR: 0.0003 +[2026-03-05 23:13:36] (step=0072169) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 14.120328702797886, LR: 0.0003 +[2026-03-05 23:13:44] (step=0072170) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 14.1205243592252, LR: 0.0003 +[2026-03-05 23:13:52] (step=0072171) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 14.120720015652514, LR: 0.0003 +[2026-03-05 23:13:59] (step=0072172) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.120915672079828, LR: 0.0003 +[2026-03-05 23:14:07] (step=0072173) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 14.121111328507142, LR: 0.0003 +[2026-03-05 23:14:15] (step=0072174) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 14.121306984934455, LR: 0.0003 +[2026-03-05 23:14:23] (step=0072175) Train Loss: 0.4442, Train Steps/Sec: 0.12, Epoch: 14.121502641361769, LR: 0.0003 +[2026-03-05 23:14:31] (step=0072176) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 14.121698297789083, LR: 0.0003 +[2026-03-05 23:14:39] (step=0072177) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 14.121893954216397, LR: 0.0003 +[2026-03-05 23:14:47] (step=0072178) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 14.122089610643709, LR: 0.0003 +[2026-03-05 23:14:55] (step=0072179) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 14.122285267071023, LR: 0.0003 +[2026-03-05 23:15:02] (step=0072180) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.122480923498337, LR: 0.0003 +[2026-03-05 23:15:10] (step=0072181) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.12267657992565, LR: 0.0003 +[2026-03-05 23:15:18] (step=0072182) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 14.122872236352965, LR: 0.0003 +[2026-03-05 23:15:26] (step=0072183) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 14.123067892780277, LR: 0.0003 +[2026-03-05 23:15:34] (step=0072184) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 14.123263549207591, LR: 0.0003 +[2026-03-05 23:15:42] (step=0072185) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 14.123459205634905, LR: 0.0003 +[2026-03-05 23:15:49] (step=0072186) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.123654862062219, LR: 0.0003 +[2026-03-05 23:15:57] (step=0072187) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 14.123850518489533, LR: 0.0003 +[2026-03-05 23:16:05] (step=0072188) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 14.124046174916845, LR: 0.0003 +[2026-03-05 23:16:13] (step=0072189) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 14.12424183134416, LR: 0.0003 +[2026-03-05 23:16:21] (step=0072190) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 14.124437487771473, LR: 0.0003 +[2026-03-05 23:16:29] (step=0072191) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 14.124633144198787, LR: 0.0003 +[2026-03-05 23:16:37] (step=0072192) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 14.124828800626101, LR: 0.0003 +[2026-03-05 23:16:44] (step=0072193) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 14.125024457053414, LR: 0.0003 +[2026-03-05 23:16:52] (step=0072194) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 14.125220113480728, LR: 0.0003 +[2026-03-05 23:17:00] (step=0072195) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 14.125415769908042, LR: 0.0003 +[2026-03-05 23:17:08] (step=0072196) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 14.125611426335356, LR: 0.0003 +[2026-03-05 23:17:16] (step=0072197) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 14.12580708276267, LR: 0.0003 +[2026-03-05 23:17:24] (step=0072198) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 14.126002739189982, LR: 0.0003 +[2026-03-05 23:17:31] (step=0072199) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 14.126198395617296, LR: 0.0003 +[2026-03-05 23:17:39] (step=0072200) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 14.12639405204461, LR: 0.0003 +[2026-03-05 23:17:47] (step=0072201) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 14.126589708471924, LR: 0.0003 +[2026-03-05 23:17:55] (step=0072202) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 14.126785364899236, LR: 0.0003 +[2026-03-05 23:18:03] (step=0072203) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 14.12698102132655, LR: 0.0003 +[2026-03-05 23:18:11] (step=0072204) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 14.127176677753864, LR: 0.0003 +[2026-03-05 23:18:18] (step=0072205) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 14.127372334181178, LR: 0.0003 +[2026-03-05 23:18:26] (step=0072206) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 14.127567990608492, LR: 0.0003 +[2026-03-05 23:18:34] (step=0072207) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 14.127763647035804, LR: 0.0003 +[2026-03-05 23:18:42] (step=0072208) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.127959303463118, LR: 0.0003 +[2026-03-05 23:18:50] (step=0072209) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 14.128154959890432, LR: 0.0003 +[2026-03-05 23:18:58] (step=0072210) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 14.128350616317746, LR: 0.0003 +[2026-03-05 23:19:06] (step=0072211) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 14.12854627274506, LR: 0.0003 +[2026-03-05 23:19:13] (step=0072212) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 14.128741929172373, LR: 0.0003 +[2026-03-05 23:19:21] (step=0072213) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 14.128937585599687, LR: 0.0003 +[2026-03-05 23:19:29] (step=0072214) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 14.129133242027, LR: 0.0003 +[2026-03-05 23:19:37] (step=0072215) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 14.129328898454315, LR: 0.0003 +[2026-03-05 23:19:45] (step=0072216) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.129524554881629, LR: 0.0003 +[2026-03-05 23:19:53] (step=0072217) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.12972021130894, LR: 0.0003 +[2026-03-05 23:20:01] (step=0072218) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 14.129915867736255, LR: 0.0003 +[2026-03-05 23:20:08] (step=0072219) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 14.130111524163569, LR: 0.0003 +[2026-03-05 23:20:16] (step=0072220) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 14.130307180590883, LR: 0.0003 +[2026-03-05 23:20:24] (step=0072221) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 14.130502837018197, LR: 0.0003 +[2026-03-05 23:20:32] (step=0072222) Train Loss: 0.4561, Train Steps/Sec: 0.12, Epoch: 14.130698493445509, LR: 0.0003 +[2026-03-05 23:20:40] (step=0072223) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 14.130894149872823, LR: 0.0003 +[2026-03-05 23:20:48] (step=0072224) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.131089806300137, LR: 0.0003 +[2026-03-05 23:20:56] (step=0072225) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.131285462727451, LR: 0.0003 +[2026-03-05 23:21:04] (step=0072226) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 14.131481119154763, LR: 0.0003 +[2026-03-05 23:21:11] (step=0072227) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 14.131676775582077, LR: 0.0003 +[2026-03-05 23:21:19] (step=0072228) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.131872432009391, LR: 0.0003 +[2026-03-05 23:21:27] (step=0072229) Train Loss: 0.4539, Train Steps/Sec: 0.13, Epoch: 14.132068088436705, LR: 0.0003 +[2026-03-05 23:21:35] (step=0072230) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 14.13226374486402, LR: 0.0003 +[2026-03-05 23:21:43] (step=0072231) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 14.132459401291332, LR: 0.0003 +[2026-03-05 23:21:51] (step=0072232) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.132655057718646, LR: 0.0003 +[2026-03-05 23:21:58] (step=0072233) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 14.13285071414596, LR: 0.0003 +[2026-03-05 23:22:06] (step=0072234) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 14.133046370573274, LR: 0.0003 +[2026-03-05 23:22:14] (step=0072235) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 14.133242027000588, LR: 0.0003 +[2026-03-05 23:22:22] (step=0072236) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 14.1334376834279, LR: 0.0003 +[2026-03-05 23:22:30] (step=0072237) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 14.133633339855214, LR: 0.0003 +[2026-03-05 23:22:38] (step=0072238) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 14.133828996282528, LR: 0.0003 +[2026-03-05 23:22:46] (step=0072239) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 14.134024652709842, LR: 0.0003 +[2026-03-05 23:22:53] (step=0072240) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 14.134220309137156, LR: 0.0003 +[2026-03-05 23:23:01] (step=0072241) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 14.134415965564468, LR: 0.0003 +[2026-03-05 23:23:09] (step=0072242) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.134611621991782, LR: 0.0003 +[2026-03-05 23:23:17] (step=0072243) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.134807278419096, LR: 0.0003 +[2026-03-05 23:23:25] (step=0072244) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 14.13500293484641, LR: 0.0003 +[2026-03-05 23:23:33] (step=0072245) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 14.135198591273724, LR: 0.0003 +[2026-03-05 23:23:40] (step=0072246) Train Loss: 0.4518, Train Steps/Sec: 0.13, Epoch: 14.135394247701036, LR: 0.0003 +[2026-03-05 23:23:48] (step=0072247) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.13558990412835, LR: 0.0003 +[2026-03-05 23:23:56] (step=0072248) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 14.135785560555664, LR: 0.0003 +[2026-03-05 23:24:04] (step=0072249) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.135981216982978, LR: 0.0003 +[2026-03-05 23:24:12] (step=0072250) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 14.136176873410292, LR: 0.0003 +[2026-03-05 23:24:20] (step=0072251) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 14.136372529837605, LR: 0.0003 +[2026-03-05 23:24:28] (step=0072252) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 14.136568186264919, LR: 0.0003 +[2026-03-05 23:24:35] (step=0072253) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 14.136763842692233, LR: 0.0003 +[2026-03-05 23:24:43] (step=0072254) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 14.136959499119547, LR: 0.0003 +[2026-03-05 23:24:51] (step=0072255) Train Loss: 0.4272, Train Steps/Sec: 0.13, Epoch: 14.137155155546859, LR: 0.0003 +[2026-03-05 23:24:59] (step=0072256) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 14.137350811974173, LR: 0.0003 +[2026-03-05 23:25:07] (step=0072257) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.137546468401487, LR: 0.0003 +[2026-03-05 23:25:15] (step=0072258) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 14.1377421248288, LR: 0.0003 +[2026-03-05 23:25:22] (step=0072259) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 14.137937781256115, LR: 0.0003 +[2026-03-05 23:25:30] (step=0072260) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 14.138133437683427, LR: 0.0003 +[2026-03-05 23:25:38] (step=0072261) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 14.138329094110741, LR: 0.0003 +[2026-03-05 23:25:46] (step=0072262) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 14.138524750538055, LR: 0.0003 +[2026-03-05 23:25:54] (step=0072263) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 14.13872040696537, LR: 0.0003 +[2026-03-05 23:26:02] (step=0072264) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 14.138916063392683, LR: 0.0003 +[2026-03-05 23:26:10] (step=0072265) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.139111719819995, LR: 0.0003 +[2026-03-05 23:26:17] (step=0072266) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 14.13930737624731, LR: 0.0003 +[2026-03-05 23:26:25] (step=0072267) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.139503032674623, LR: 0.0003 +[2026-03-05 23:26:33] (step=0072268) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.139698689101937, LR: 0.0003 +[2026-03-05 23:26:41] (step=0072269) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 14.139894345529251, LR: 0.0003 +[2026-03-05 23:26:49] (step=0072270) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.140090001956564, LR: 0.0003 +[2026-03-05 23:26:57] (step=0072271) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 14.140285658383878, LR: 0.0003 +[2026-03-05 23:27:04] (step=0072272) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 14.140481314811192, LR: 0.0003 +[2026-03-05 23:27:12] (step=0072273) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.140676971238506, LR: 0.0003 +[2026-03-05 23:27:20] (step=0072274) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 14.14087262766582, LR: 0.0003 +[2026-03-05 23:27:28] (step=0072275) Train Loss: 0.4292, Train Steps/Sec: 0.12, Epoch: 14.141068284093132, LR: 0.0003 +[2026-03-05 23:27:36] (step=0072276) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.141263940520446, LR: 0.0003 +[2026-03-05 23:27:44] (step=0072277) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 14.14145959694776, LR: 0.0003 +[2026-03-05 23:27:52] (step=0072278) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 14.141655253375074, LR: 0.0003 +[2026-03-05 23:28:00] (step=0072279) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 14.141850909802386, LR: 0.0003 +[2026-03-05 23:28:07] (step=0072280) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 14.1420465662297, LR: 0.0003 +[2026-03-05 23:28:15] (step=0072281) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.142242222657014, LR: 0.0003 +[2026-03-05 23:28:23] (step=0072282) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.142437879084328, LR: 0.0003 +[2026-03-05 23:28:31] (step=0072283) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 14.142633535511642, LR: 0.0003 +[2026-03-05 23:28:39] (step=0072284) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 14.142829191938954, LR: 0.0003 +[2026-03-05 23:28:47] (step=0072285) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.143024848366268, LR: 0.0003 +[2026-03-05 23:28:54] (step=0072286) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 14.143220504793582, LR: 0.0003 +[2026-03-05 23:29:02] (step=0072287) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 14.143416161220896, LR: 0.0003 +[2026-03-05 23:29:10] (step=0072288) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 14.14361181764821, LR: 0.0003 +[2026-03-05 23:29:18] (step=0072289) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.143807474075523, LR: 0.0003 +[2026-03-05 23:29:26] (step=0072290) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.144003130502837, LR: 0.0003 +[2026-03-05 23:29:34] (step=0072291) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 14.14419878693015, LR: 0.0003 +[2026-03-05 23:29:42] (step=0072292) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.144394443357465, LR: 0.0003 +[2026-03-05 23:29:49] (step=0072293) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.144590099784779, LR: 0.0003 +[2026-03-05 23:29:57] (step=0072294) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 14.14478575621209, LR: 0.0003 +[2026-03-05 23:30:05] (step=0072295) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 14.144981412639405, LR: 0.0003 +[2026-03-05 23:30:13] (step=0072296) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.145177069066719, LR: 0.0003 +[2026-03-05 23:30:21] (step=0072297) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.145372725494033, LR: 0.0003 +[2026-03-05 23:30:29] (step=0072298) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 14.145568381921347, LR: 0.0003 +[2026-03-05 23:30:36] (step=0072299) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.14576403834866, LR: 0.0003 +[2026-03-05 23:30:44] (step=0072300) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 14.145959694775973, LR: 0.0003 +[2026-03-05 23:30:52] (step=0072301) Train Loss: 0.4262, Train Steps/Sec: 0.13, Epoch: 14.146155351203287, LR: 0.0003 +[2026-03-05 23:31:00] (step=0072302) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 14.146351007630601, LR: 0.0003 +[2026-03-05 23:31:08] (step=0072303) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.146546664057915, LR: 0.0003 +[2026-03-05 23:31:16] (step=0072304) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 14.146742320485227, LR: 0.0003 +[2026-03-05 23:31:24] (step=0072305) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 14.146937976912541, LR: 0.0003 +[2026-03-05 23:31:31] (step=0072306) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 14.147133633339855, LR: 0.0003 +[2026-03-05 23:31:39] (step=0072307) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 14.14732928976717, LR: 0.0003 +[2026-03-05 23:31:47] (step=0072308) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 14.147524946194482, LR: 0.0003 +[2026-03-05 23:31:55] (step=0072309) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.147720602621796, LR: 0.0003 +[2026-03-05 23:32:03] (step=0072310) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 14.14791625904911, LR: 0.0003 +[2026-03-05 23:32:11] (step=0072311) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 14.148111915476424, LR: 0.0003 +[2026-03-05 23:32:19] (step=0072312) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 14.148307571903738, LR: 0.0003 +[2026-03-05 23:32:26] (step=0072313) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 14.14850322833105, LR: 0.0003 +[2026-03-05 23:32:34] (step=0072314) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 14.148698884758364, LR: 0.0003 +[2026-03-05 23:32:42] (step=0072315) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 14.148894541185678, LR: 0.0003 +[2026-03-05 23:32:50] (step=0072316) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 14.149090197612992, LR: 0.0003 +[2026-03-05 23:32:58] (step=0072317) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 14.149285854040306, LR: 0.0003 +[2026-03-05 23:33:06] (step=0072318) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.149481510467618, LR: 0.0003 +[2026-03-05 23:33:14] (step=0072319) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 14.149677166894932, LR: 0.0003 +[2026-03-05 23:33:21] (step=0072320) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 14.149872823322246, LR: 0.0003 +[2026-03-05 23:33:29] (step=0072321) Train Loss: 0.4379, Train Steps/Sec: 0.12, Epoch: 14.15006847974956, LR: 0.0003 +[2026-03-05 23:33:37] (step=0072322) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 14.150264136176874, LR: 0.0003 +[2026-03-05 23:33:45] (step=0072323) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 14.150459792604186, LR: 0.0003 +[2026-03-05 23:33:53] (step=0072324) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 14.1506554490315, LR: 0.0003 +[2026-03-05 23:34:01] (step=0072325) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.150851105458814, LR: 0.0003 +[2026-03-05 23:34:09] (step=0072326) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 14.151046761886128, LR: 0.0003 +[2026-03-05 23:34:16] (step=0072327) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 14.151242418313442, LR: 0.0003 +[2026-03-05 23:34:24] (step=0072328) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 14.151438074740755, LR: 0.0003 +[2026-03-05 23:34:32] (step=0072329) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 14.151633731168069, LR: 0.0003 +[2026-03-05 23:34:40] (step=0072330) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.151829387595383, LR: 0.0003 +[2026-03-05 23:34:48] (step=0072331) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.152025044022697, LR: 0.0003 +[2026-03-05 23:34:56] (step=0072332) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.152220700450009, LR: 0.0003 +[2026-03-05 23:35:03] (step=0072333) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 14.152416356877323, LR: 0.0003 +[2026-03-05 23:35:11] (step=0072334) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.152612013304637, LR: 0.0003 +[2026-03-05 23:35:19] (step=0072335) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 14.152807669731951, LR: 0.0003 +[2026-03-05 23:35:27] (step=0072336) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 14.153003326159265, LR: 0.0003 +[2026-03-05 23:35:35] (step=0072337) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 14.153198982586577, LR: 0.0003 +[2026-03-05 23:35:43] (step=0072338) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 14.153394639013891, LR: 0.0003 +[2026-03-05 23:35:51] (step=0072339) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 14.153590295441205, LR: 0.0003 +[2026-03-05 23:35:58] (step=0072340) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 14.15378595186852, LR: 0.0003 +[2026-03-05 23:36:06] (step=0072341) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.153981608295833, LR: 0.0003 +[2026-03-05 23:36:14] (step=0072342) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 14.154177264723145, LR: 0.0003 +[2026-03-05 23:36:22] (step=0072343) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 14.15437292115046, LR: 0.0003 +[2026-03-05 23:36:30] (step=0072344) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 14.154568577577773, LR: 0.0003 +[2026-03-05 23:36:37] (step=0072345) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 14.154764234005087, LR: 0.0003 +[2026-03-05 23:36:45] (step=0072346) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 14.154959890432401, LR: 0.0003 +[2026-03-05 23:36:53] (step=0072347) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.155155546859714, LR: 0.0003 +[2026-03-05 23:37:01] (step=0072348) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 14.155351203287028, LR: 0.0003 +[2026-03-05 23:37:09] (step=0072349) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 14.155546859714342, LR: 0.0003 +[2026-03-05 23:37:17] (step=0072350) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 14.155742516141656, LR: 0.0003 +[2026-03-05 23:37:24] (step=0072351) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 14.15593817256897, LR: 0.0003 +[2026-03-05 23:37:32] (step=0072352) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 14.156133828996282, LR: 0.0003 +[2026-03-05 23:37:40] (step=0072353) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.156329485423596, LR: 0.0003 +[2026-03-05 23:37:48] (step=0072354) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 14.15652514185091, LR: 0.0003 +[2026-03-05 23:37:56] (step=0072355) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.156720798278224, LR: 0.0003 +[2026-03-05 23:38:04] (step=0072356) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 14.156916454705538, LR: 0.0003 +[2026-03-05 23:38:12] (step=0072357) Train Loss: 0.4273, Train Steps/Sec: 0.13, Epoch: 14.15711211113285, LR: 0.0003 +[2026-03-05 23:38:19] (step=0072358) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 14.157307767560164, LR: 0.0003 +[2026-03-05 23:38:27] (step=0072359) Train Loss: 0.4560, Train Steps/Sec: 0.13, Epoch: 14.157503423987478, LR: 0.0003 +[2026-03-05 23:38:35] (step=0072360) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 14.157699080414792, LR: 0.0003 +[2026-03-05 23:38:43] (step=0072361) Train Loss: 0.4240, Train Steps/Sec: 0.13, Epoch: 14.157894736842104, LR: 0.0003 +[2026-03-05 23:38:51] (step=0072362) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.158090393269418, LR: 0.0003 +[2026-03-05 23:38:59] (step=0072363) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.158286049696732, LR: 0.0003 +[2026-03-05 23:39:07] (step=0072364) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.158481706124046, LR: 0.0003 +[2026-03-05 23:39:14] (step=0072365) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 14.15867736255136, LR: 0.0003 +[2026-03-05 23:39:22] (step=0072366) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 14.158873018978673, LR: 0.0003 +[2026-03-05 23:39:30] (step=0072367) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 14.159068675405987, LR: 0.0003 +[2026-03-05 23:39:38] (step=0072368) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 14.1592643318333, LR: 0.0003 +[2026-03-05 23:39:46] (step=0072369) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.159459988260615, LR: 0.0003 +[2026-03-05 23:39:54] (step=0072370) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.159655644687929, LR: 0.0003 +[2026-03-05 23:40:02] (step=0072371) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 14.159851301115241, LR: 0.0003 +[2026-03-05 23:40:09] (step=0072372) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 14.160046957542555, LR: 0.0003 +[2026-03-05 23:40:17] (step=0072373) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 14.160242613969869, LR: 0.0003 +[2026-03-05 23:40:25] (step=0072374) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 14.160438270397183, LR: 0.0003 +[2026-03-05 23:40:33] (step=0072375) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.160633926824497, LR: 0.0003 +[2026-03-05 23:40:41] (step=0072376) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.16082958325181, LR: 0.0003 +[2026-03-05 23:40:49] (step=0072377) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.161025239679123, LR: 0.0003 +[2026-03-05 23:40:57] (step=0072378) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.161220896106437, LR: 0.0003 +[2026-03-05 23:41:04] (step=0072379) Train Loss: 0.4589, Train Steps/Sec: 0.13, Epoch: 14.161416552533751, LR: 0.0003 +[2026-03-05 23:41:12] (step=0072380) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 14.161612208961065, LR: 0.0003 +[2026-03-05 23:41:20] (step=0072381) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.161807865388377, LR: 0.0003 +[2026-03-05 23:41:28] (step=0072382) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 14.162003521815691, LR: 0.0003 +[2026-03-05 23:41:36] (step=0072383) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 14.162199178243005, LR: 0.0003 +[2026-03-05 23:41:44] (step=0072384) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.16239483467032, LR: 0.0003 +[2026-03-05 23:41:51] (step=0072385) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.162590491097632, LR: 0.0003 +[2026-03-05 23:41:59] (step=0072386) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 14.162786147524946, LR: 0.0003 +[2026-03-05 23:42:07] (step=0072387) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.16298180395226, LR: 0.0003 +[2026-03-05 23:42:15] (step=0072388) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 14.163177460379574, LR: 0.0003 +[2026-03-05 23:42:23] (step=0072389) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.163373116806888, LR: 0.0003 +[2026-03-05 23:42:31] (step=0072390) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 14.1635687732342, LR: 0.0003 +[2026-03-05 23:42:39] (step=0072391) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 14.163764429661514, LR: 0.0003 +[2026-03-05 23:42:46] (step=0072392) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 14.163960086088828, LR: 0.0003 +[2026-03-05 23:42:54] (step=0072393) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 14.164155742516142, LR: 0.0003 +[2026-03-05 23:43:02] (step=0072394) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 14.164351398943456, LR: 0.0003 +[2026-03-05 23:43:10] (step=0072395) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 14.164547055370768, LR: 0.0003 +[2026-03-05 23:43:18] (step=0072396) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 14.164742711798082, LR: 0.0003 +[2026-03-05 23:43:26] (step=0072397) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.164938368225396, LR: 0.0003 +[2026-03-05 23:43:33] (step=0072398) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 14.16513402465271, LR: 0.0003 +[2026-03-05 23:43:41] (step=0072399) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 14.165329681080024, LR: 0.0003 +[2026-03-05 23:43:49] (step=0072400) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.165525337507336, LR: 0.0003 +[2026-03-05 23:43:57] (step=0072401) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.16572099393465, LR: 0.0003 +[2026-03-05 23:44:05] (step=0072402) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.165916650361964, LR: 0.0003 +[2026-03-05 23:44:13] (step=0072403) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 14.166112306789278, LR: 0.0003 +[2026-03-05 23:44:20] (step=0072404) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.166307963216592, LR: 0.0003 +[2026-03-05 23:44:28] (step=0072405) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 14.166503619643905, LR: 0.0003 +[2026-03-05 23:44:36] (step=0072406) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 14.166699276071219, LR: 0.0003 +[2026-03-05 23:44:44] (step=0072407) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 14.166894932498533, LR: 0.0003 +[2026-03-05 23:44:52] (step=0072408) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 14.167090588925847, LR: 0.0003 +[2026-03-05 23:45:00] (step=0072409) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 14.16728624535316, LR: 0.0003 +[2026-03-05 23:45:08] (step=0072410) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 14.167481901780473, LR: 0.0003 +[2026-03-05 23:45:15] (step=0072411) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.167677558207787, LR: 0.0003 +[2026-03-05 23:45:23] (step=0072412) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 14.167873214635101, LR: 0.0003 +[2026-03-05 23:45:31] (step=0072413) Train Loss: 0.4638, Train Steps/Sec: 0.13, Epoch: 14.168068871062415, LR: 0.0003 +[2026-03-05 23:45:39] (step=0072414) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 14.168264527489727, LR: 0.0003 +[2026-03-05 23:45:47] (step=0072415) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 14.168460183917041, LR: 0.0003 +[2026-03-05 23:45:55] (step=0072416) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 14.168655840344355, LR: 0.0003 +[2026-03-05 23:46:02] (step=0072417) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.16885149677167, LR: 0.0003 +[2026-03-05 23:46:10] (step=0072418) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.169047153198983, LR: 0.0003 +[2026-03-05 23:46:18] (step=0072419) Train Loss: 0.4529, Train Steps/Sec: 0.12, Epoch: 14.169242809626295, LR: 0.0003 +[2026-03-05 23:46:26] (step=0072420) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 14.16943846605361, LR: 0.0003 +[2026-03-05 23:46:34] (step=0072421) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 14.169634122480923, LR: 0.0003 +[2026-03-05 23:46:42] (step=0072422) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.169829778908237, LR: 0.0003 +[2026-03-05 23:46:50] (step=0072423) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.170025435335551, LR: 0.0003 +[2026-03-05 23:46:58] (step=0072424) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.170221091762864, LR: 0.0003 +[2026-03-05 23:47:05] (step=0072425) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 14.170416748190178, LR: 0.0003 +[2026-03-05 23:47:13] (step=0072426) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 14.170612404617492, LR: 0.0003 +[2026-03-05 23:47:21] (step=0072427) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.170808061044806, LR: 0.0003 +[2026-03-05 23:47:29] (step=0072428) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 14.17100371747212, LR: 0.0003 +[2026-03-05 23:47:37] (step=0072429) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.171199373899432, LR: 0.0003 +[2026-03-05 23:47:45] (step=0072430) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 14.171395030326746, LR: 0.0003 +[2026-03-05 23:47:53] (step=0072431) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.17159068675406, LR: 0.0003 +[2026-03-05 23:48:00] (step=0072432) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.171786343181374, LR: 0.0003 +[2026-03-05 23:48:08] (step=0072433) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 14.171981999608688, LR: 0.0003 +[2026-03-05 23:48:16] (step=0072434) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 14.172177656036, LR: 0.0003 +[2026-03-05 23:48:24] (step=0072435) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 14.172373312463314, LR: 0.0003 +[2026-03-05 23:48:32] (step=0072436) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 14.172568968890628, LR: 0.0003 +[2026-03-05 23:48:40] (step=0072437) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 14.172764625317942, LR: 0.0003 +[2026-03-05 23:48:47] (step=0072438) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 14.172960281745254, LR: 0.0003 +[2026-03-05 23:48:55] (step=0072439) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 14.173155938172568, LR: 0.0003 +[2026-03-05 23:49:03] (step=0072440) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 14.173351594599882, LR: 0.0003 +[2026-03-05 23:49:11] (step=0072441) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 14.173547251027196, LR: 0.0003 +[2026-03-05 23:49:19] (step=0072442) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.17374290745451, LR: 0.0003 +[2026-03-05 23:49:27] (step=0072443) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.173938563881823, LR: 0.0003 +[2026-03-05 23:49:34] (step=0072444) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 14.174134220309137, LR: 0.0003 +[2026-03-05 23:49:42] (step=0072445) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 14.17432987673645, LR: 0.0003 +[2026-03-05 23:49:50] (step=0072446) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 14.174525533163765, LR: 0.0003 +[2026-03-05 23:49:58] (step=0072447) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 14.174721189591079, LR: 0.0003 +[2026-03-05 23:50:06] (step=0072448) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 14.174916846018391, LR: 0.0003 +[2026-03-05 23:50:14] (step=0072449) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 14.175112502445705, LR: 0.0003 +[2026-03-05 23:50:22] (step=0072450) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 14.175308158873019, LR: 0.0003 +[2026-03-05 23:50:29] (step=0072451) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 14.175503815300333, LR: 0.0003 +[2026-03-05 23:50:37] (step=0072452) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 14.175699471727647, LR: 0.0003 +[2026-03-05 23:50:45] (step=0072453) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 14.17589512815496, LR: 0.0003 +[2026-03-05 23:50:53] (step=0072454) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.176090784582273, LR: 0.0003 +[2026-03-05 23:51:01] (step=0072455) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.176286441009587, LR: 0.0003 +[2026-03-05 23:51:09] (step=0072456) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 14.176482097436901, LR: 0.0003 +[2026-03-05 23:51:16] (step=0072457) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 14.176677753864215, LR: 0.0003 +[2026-03-05 23:51:24] (step=0072458) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 14.176873410291527, LR: 0.0003 +[2026-03-05 23:51:32] (step=0072459) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 14.177069066718841, LR: 0.0003 +[2026-03-05 23:51:40] (step=0072460) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 14.177264723146155, LR: 0.0003 +[2026-03-05 23:51:48] (step=0072461) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.17746037957347, LR: 0.0003 +[2026-03-05 23:51:56] (step=0072462) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 14.177656036000784, LR: 0.0003 +[2026-03-05 23:52:04] (step=0072463) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 14.177851692428096, LR: 0.0003 +[2026-03-05 23:52:11] (step=0072464) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 14.17804734885541, LR: 0.0003 +[2026-03-05 23:52:19] (step=0072465) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 14.178243005282724, LR: 0.0003 +[2026-03-05 23:52:27] (step=0072466) Train Loss: 0.4368, Train Steps/Sec: 0.12, Epoch: 14.178438661710038, LR: 0.0003 +[2026-03-05 23:52:35] (step=0072467) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 14.17863431813735, LR: 0.0003 +[2026-03-05 23:52:43] (step=0072468) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 14.178829974564664, LR: 0.0003 +[2026-03-05 23:52:51] (step=0072469) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 14.179025630991978, LR: 0.0003 +[2026-03-05 23:52:59] (step=0072470) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 14.179221287419292, LR: 0.0003 +[2026-03-05 23:53:07] (step=0072471) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 14.179416943846606, LR: 0.0003 +[2026-03-05 23:53:14] (step=0072472) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 14.179612600273918, LR: 0.0003 +[2026-03-05 23:53:22] (step=0072473) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 14.179808256701232, LR: 0.0003 +[2026-03-05 23:53:30] (step=0072474) Train Loss: 0.4580, Train Steps/Sec: 0.13, Epoch: 14.180003913128546, LR: 0.0003 +[2026-03-05 23:53:38] (step=0072475) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.18019956955586, LR: 0.0003 +[2026-03-05 23:53:46] (step=0072476) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.180395225983174, LR: 0.0003 +[2026-03-05 23:53:54] (step=0072477) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 14.180590882410486, LR: 0.0003 +[2026-03-05 23:54:01] (step=0072478) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 14.1807865388378, LR: 0.0003 +[2026-03-05 23:54:09] (step=0072479) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 14.180982195265114, LR: 0.0003 +[2026-03-05 23:54:17] (step=0072480) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 14.181177851692429, LR: 0.0003 +[2026-03-05 23:54:25] (step=0072481) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.181373508119743, LR: 0.0003 +[2026-03-05 23:54:33] (step=0072482) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 14.181569164547055, LR: 0.0003 +[2026-03-05 23:54:41] (step=0072483) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 14.181764820974369, LR: 0.0003 +[2026-03-05 23:54:49] (step=0072484) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.181960477401683, LR: 0.0003 +[2026-03-05 23:54:56] (step=0072485) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 14.182156133828997, LR: 0.0003 +[2026-03-05 23:55:04] (step=0072486) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.18235179025631, LR: 0.0003 +[2026-03-05 23:55:12] (step=0072487) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.182547446683623, LR: 0.0003 +[2026-03-05 23:55:20] (step=0072488) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 14.182743103110937, LR: 0.0003 +[2026-03-05 23:55:28] (step=0072489) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 14.182938759538251, LR: 0.0003 +[2026-03-05 23:55:36] (step=0072490) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 14.183134415965565, LR: 0.0003 +[2026-03-05 23:55:44] (step=0072491) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 14.183330072392877, LR: 0.0003 +[2026-03-05 23:55:51] (step=0072492) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 14.183525728820191, LR: 0.0003 +[2026-03-05 23:55:59] (step=0072493) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 14.183721385247505, LR: 0.0003 +[2026-03-05 23:56:07] (step=0072494) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 14.18391704167482, LR: 0.0003 +[2026-03-05 23:56:15] (step=0072495) Train Loss: 0.4541, Train Steps/Sec: 0.13, Epoch: 14.184112698102133, LR: 0.0003 +[2026-03-05 23:56:23] (step=0072496) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 14.184308354529445, LR: 0.0003 +[2026-03-05 23:56:31] (step=0072497) Train Loss: 0.4537, Train Steps/Sec: 0.13, Epoch: 14.18450401095676, LR: 0.0003 +[2026-03-05 23:56:38] (step=0072498) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.184699667384074, LR: 0.0003 +[2026-03-05 23:56:46] (step=0072499) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 14.184895323811388, LR: 0.0003 +[2026-03-05 23:56:54] (step=0072500) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 14.185090980238702, LR: 0.0003 +[2026-03-05 23:56:54] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0072500/ +[2026-03-05 23:57:02] (step=0072501) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 14.185286636666014, LR: 0.0003 +[2026-03-05 23:57:10] (step=0072502) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 14.185482293093328, LR: 0.0003 +[2026-03-05 23:57:18] (step=0072503) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 14.185677949520642, LR: 0.0003 +[2026-03-05 23:57:25] (step=0072504) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 14.185873605947956, LR: 0.0003 +[2026-03-05 23:57:33] (step=0072505) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 14.18606926237527, LR: 0.0003 +[2026-03-05 23:57:41] (step=0072506) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 14.186264918802582, LR: 0.0003 +[2026-03-05 23:57:49] (step=0072507) Train Loss: 0.4640, Train Steps/Sec: 0.13, Epoch: 14.186460575229896, LR: 0.0003 +[2026-03-05 23:57:57] (step=0072508) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 14.18665623165721, LR: 0.0003 +[2026-03-05 23:58:05] (step=0072509) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.186851888084524, LR: 0.0003 +[2026-03-05 23:58:13] (step=0072510) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 14.187047544511838, LR: 0.0003 +[2026-03-05 23:58:20] (step=0072511) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 14.18724320093915, LR: 0.0003 +[2026-03-05 23:58:28] (step=0072512) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.187438857366464, LR: 0.0003 +[2026-03-05 23:58:36] (step=0072513) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 14.187634513793778, LR: 0.0003 +[2026-03-05 23:58:44] (step=0072514) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 14.187830170221092, LR: 0.0003 +[2026-03-05 23:58:52] (step=0072515) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.188025826648406, LR: 0.0003 +[2026-03-05 23:59:00] (step=0072516) Train Loss: 0.4588, Train Steps/Sec: 0.13, Epoch: 14.188221483075719, LR: 0.0003 +[2026-03-05 23:59:08] (step=0072517) Train Loss: 0.4653, Train Steps/Sec: 0.13, Epoch: 14.188417139503033, LR: 0.0003 +[2026-03-05 23:59:16] (step=0072518) Train Loss: 0.4405, Train Steps/Sec: 0.12, Epoch: 14.188612795930347, LR: 0.0003 +[2026-03-05 23:59:23] (step=0072519) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 14.18880845235766, LR: 0.0003 +[2026-03-05 23:59:31] (step=0072520) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 14.189004108784973, LR: 0.0003 +[2026-03-05 23:59:39] (step=0072521) Train Loss: 0.4256, Train Steps/Sec: 0.13, Epoch: 14.189199765212287, LR: 0.0003 +[2026-03-05 23:59:47] (step=0072522) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.1893954216396, LR: 0.0003 +[2026-03-05 23:59:55] (step=0072523) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.189591078066915, LR: 0.0003 +[2026-03-06 00:00:03] (step=0072524) Train Loss: 0.4205, Train Steps/Sec: 0.13, Epoch: 14.189786734494229, LR: 0.0003 +[2026-03-06 00:00:11] (step=0072525) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 14.189982390921541, LR: 0.0003 +[2026-03-06 00:00:18] (step=0072526) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 14.190178047348855, LR: 0.0003 +[2026-03-06 00:00:26] (step=0072527) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.190373703776169, LR: 0.0003 +[2026-03-06 00:00:34] (step=0072528) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.190569360203483, LR: 0.0003 +[2026-03-06 00:00:42] (step=0072529) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 14.190765016630797, LR: 0.0003 +[2026-03-06 00:00:50] (step=0072530) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.19096067305811, LR: 0.0003 +[2026-03-06 00:00:58] (step=0072531) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.191156329485423, LR: 0.0003 +[2026-03-06 00:01:05] (step=0072532) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.191351985912737, LR: 0.0003 +[2026-03-06 00:01:13] (step=0072533) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.191547642340051, LR: 0.0003 +[2026-03-06 00:01:21] (step=0072534) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 14.191743298767365, LR: 0.0003 +[2026-03-06 00:01:29] (step=0072535) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 14.191938955194678, LR: 0.0003 +[2026-03-06 00:01:37] (step=0072536) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.192134611621992, LR: 0.0003 +[2026-03-06 00:01:45] (step=0072537) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 14.192330268049306, LR: 0.0003 +[2026-03-06 00:01:53] (step=0072538) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 14.19252592447662, LR: 0.0003 +[2026-03-06 00:02:00] (step=0072539) Train Loss: 0.4270, Train Steps/Sec: 0.13, Epoch: 14.192721580903934, LR: 0.0003 +[2026-03-06 00:02:08] (step=0072540) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.192917237331246, LR: 0.0003 +[2026-03-06 00:02:16] (step=0072541) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 14.19311289375856, LR: 0.0003 +[2026-03-06 00:02:24] (step=0072542) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 14.193308550185874, LR: 0.0003 +[2026-03-06 00:02:32] (step=0072543) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 14.193504206613188, LR: 0.0003 +[2026-03-06 00:02:40] (step=0072544) Train Loss: 0.4586, Train Steps/Sec: 0.13, Epoch: 14.1936998630405, LR: 0.0003 +[2026-03-06 00:02:47] (step=0072545) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.193895519467814, LR: 0.0003 +[2026-03-06 00:02:55] (step=0072546) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.194091175895128, LR: 0.0003 +[2026-03-06 00:03:03] (step=0072547) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 14.194286832322442, LR: 0.0003 +[2026-03-06 00:03:11] (step=0072548) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 14.194482488749756, LR: 0.0003 +[2026-03-06 00:03:19] (step=0072549) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 14.194678145177068, LR: 0.0003 +[2026-03-06 00:03:27] (step=0072550) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.194873801604382, LR: 0.0003 +[2026-03-06 00:03:35] (step=0072551) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 14.195069458031696, LR: 0.0003 +[2026-03-06 00:03:42] (step=0072552) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 14.19526511445901, LR: 0.0003 +[2026-03-06 00:03:50] (step=0072553) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 14.195460770886324, LR: 0.0003 +[2026-03-06 00:03:58] (step=0072554) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 14.195656427313637, LR: 0.0003 +[2026-03-06 00:04:06] (step=0072555) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 14.19585208374095, LR: 0.0003 +[2026-03-06 00:04:14] (step=0072556) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 14.196047740168265, LR: 0.0003 +[2026-03-06 00:04:22] (step=0072557) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 14.196243396595579, LR: 0.0003 +[2026-03-06 00:04:29] (step=0072558) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 14.196439053022893, LR: 0.0003 +[2026-03-06 00:04:37] (step=0072559) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 14.196634709450205, LR: 0.0003 +[2026-03-06 00:04:45] (step=0072560) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 14.196830365877519, LR: 0.0003 +[2026-03-06 00:04:53] (step=0072561) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.197026022304833, LR: 0.0003 +[2026-03-06 00:05:01] (step=0072562) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 14.197221678732147, LR: 0.0003 +[2026-03-06 00:05:09] (step=0072563) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.19741733515946, LR: 0.0003 +[2026-03-06 00:05:17] (step=0072564) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.197612991586773, LR: 0.0003 +[2026-03-06 00:05:25] (step=0072565) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.197808648014087, LR: 0.0003 +[2026-03-06 00:05:33] (step=0072566) Train Loss: 0.4413, Train Steps/Sec: 0.12, Epoch: 14.198004304441401, LR: 0.0003 +[2026-03-06 00:05:41] (step=0072567) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.198199960868715, LR: 0.0003 +[2026-03-06 00:05:48] (step=0072568) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.198395617296029, LR: 0.0003 +[2026-03-06 00:05:56] (step=0072569) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 14.198591273723341, LR: 0.0003 +[2026-03-06 00:06:04] (step=0072570) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 14.198786930150655, LR: 0.0003 +[2026-03-06 00:06:12] (step=0072571) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 14.19898258657797, LR: 0.0003 +[2026-03-06 00:06:20] (step=0072572) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.199178243005283, LR: 0.0003 +[2026-03-06 00:06:28] (step=0072573) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 14.199373899432596, LR: 0.0003 +[2026-03-06 00:06:35] (step=0072574) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 14.19956955585991, LR: 0.0003 +[2026-03-06 00:06:43] (step=0072575) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 14.199765212287224, LR: 0.0003 +[2026-03-06 00:06:51] (step=0072576) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.199960868714538, LR: 0.0003 +[2026-03-06 00:06:59] (step=0072577) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 14.200156525141852, LR: 0.0003 +[2026-03-06 00:07:07] (step=0072578) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 14.200352181569164, LR: 0.0003 +[2026-03-06 00:07:15] (step=0072579) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.200547837996478, LR: 0.0003 +[2026-03-06 00:07:22] (step=0072580) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 14.200743494423792, LR: 0.0003 +[2026-03-06 00:07:30] (step=0072581) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 14.200939150851106, LR: 0.0003 +[2026-03-06 00:07:38] (step=0072582) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 14.20113480727842, LR: 0.0003 +[2026-03-06 00:07:46] (step=0072583) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.201330463705732, LR: 0.0003 +[2026-03-06 00:07:54] (step=0072584) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.201526120133046, LR: 0.0003 +[2026-03-06 00:08:02] (step=0072585) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.20172177656036, LR: 0.0003 +[2026-03-06 00:08:10] (step=0072586) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 14.201917432987674, LR: 0.0003 +[2026-03-06 00:08:17] (step=0072587) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 14.202113089414988, LR: 0.0003 +[2026-03-06 00:08:25] (step=0072588) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 14.2023087458423, LR: 0.0003 +[2026-03-06 00:08:33] (step=0072589) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 14.202504402269614, LR: 0.0003 +[2026-03-06 00:08:41] (step=0072590) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 14.202700058696928, LR: 0.0003 +[2026-03-06 00:08:49] (step=0072591) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.202895715124242, LR: 0.0003 +[2026-03-06 00:08:57] (step=0072592) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 14.203091371551556, LR: 0.0003 +[2026-03-06 00:09:04] (step=0072593) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.203287027978869, LR: 0.0003 +[2026-03-06 00:09:12] (step=0072594) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.203482684406183, LR: 0.0003 +[2026-03-06 00:09:20] (step=0072595) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 14.203678340833497, LR: 0.0003 +[2026-03-06 00:09:28] (step=0072596) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 14.20387399726081, LR: 0.0003 +[2026-03-06 00:09:36] (step=0072597) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 14.204069653688123, LR: 0.0003 +[2026-03-06 00:09:44] (step=0072598) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.204265310115437, LR: 0.0003 +[2026-03-06 00:09:51] (step=0072599) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.20446096654275, LR: 0.0003 +[2026-03-06 00:09:59] (step=0072600) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 14.204656622970065, LR: 0.0003 +[2026-03-06 00:10:07] (step=0072601) Train Loss: 0.4255, Train Steps/Sec: 0.13, Epoch: 14.204852279397379, LR: 0.0003 +[2026-03-06 00:10:15] (step=0072602) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.205047935824691, LR: 0.0003 +[2026-03-06 00:10:23] (step=0072603) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 14.205243592252005, LR: 0.0003 +[2026-03-06 00:10:31] (step=0072604) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 14.205439248679319, LR: 0.0003 +[2026-03-06 00:10:39] (step=0072605) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 14.205634905106633, LR: 0.0003 +[2026-03-06 00:10:46] (step=0072606) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 14.205830561533947, LR: 0.0003 +[2026-03-06 00:10:54] (step=0072607) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 14.20602621796126, LR: 0.0003 +[2026-03-06 00:11:02] (step=0072608) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 14.206221874388573, LR: 0.0003 +[2026-03-06 00:11:10] (step=0072609) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 14.206417530815887, LR: 0.0003 +[2026-03-06 00:11:18] (step=0072610) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 14.206613187243201, LR: 0.0003 +[2026-03-06 00:11:26] (step=0072611) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 14.206808843670515, LR: 0.0003 +[2026-03-06 00:11:33] (step=0072612) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 14.207004500097828, LR: 0.0003 +[2026-03-06 00:11:41] (step=0072613) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 14.207200156525142, LR: 0.0003 +[2026-03-06 00:11:49] (step=0072614) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 14.207395812952456, LR: 0.0003 +[2026-03-06 00:11:57] (step=0072615) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 14.20759146937977, LR: 0.0003 +[2026-03-06 00:12:05] (step=0072616) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.207787125807084, LR: 0.0003 +[2026-03-06 00:12:13] (step=0072617) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.207982782234396, LR: 0.0003 +[2026-03-06 00:12:21] (step=0072618) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 14.20817843866171, LR: 0.0003 +[2026-03-06 00:12:29] (step=0072619) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.208374095089024, LR: 0.0003 +[2026-03-06 00:12:36] (step=0072620) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 14.208569751516338, LR: 0.0003 +[2026-03-06 00:12:44] (step=0072621) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 14.20876540794365, LR: 0.0003 +[2026-03-06 00:12:52] (step=0072622) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 14.208961064370964, LR: 0.0003 +[2026-03-06 00:13:00] (step=0072623) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 14.209156720798278, LR: 0.0003 +[2026-03-06 00:13:08] (step=0072624) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 14.209352377225592, LR: 0.0003 +[2026-03-06 00:13:16] (step=0072625) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 14.209548033652906, LR: 0.0003 +[2026-03-06 00:13:24] (step=0072626) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 14.209743690080218, LR: 0.0003 +[2026-03-06 00:13:31] (step=0072627) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 14.209939346507532, LR: 0.0003 +[2026-03-06 00:13:39] (step=0072628) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.210135002934846, LR: 0.0003 +[2026-03-06 00:13:47] (step=0072629) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 14.21033065936216, LR: 0.0003 +[2026-03-06 00:13:55] (step=0072630) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 14.210526315789474, LR: 0.0003 +[2026-03-06 00:14:03] (step=0072631) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.210721972216787, LR: 0.0003 +[2026-03-06 00:14:11] (step=0072632) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 14.2109176286441, LR: 0.0003 +[2026-03-06 00:14:19] (step=0072633) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 14.211113285071415, LR: 0.0003 +[2026-03-06 00:14:26] (step=0072634) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 14.211308941498729, LR: 0.0003 +[2026-03-06 00:14:34] (step=0072635) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 14.211504597926043, LR: 0.0003 +[2026-03-06 00:14:42] (step=0072636) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.211700254353355, LR: 0.0003 +[2026-03-06 00:14:50] (step=0072637) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.211895910780669, LR: 0.0003 +[2026-03-06 00:14:58] (step=0072638) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.212091567207983, LR: 0.0003 +[2026-03-06 00:15:06] (step=0072639) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 14.212287223635297, LR: 0.0003 +[2026-03-06 00:15:13] (step=0072640) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 14.21248288006261, LR: 0.0003 +[2026-03-06 00:15:21] (step=0072641) Train Loss: 0.4599, Train Steps/Sec: 0.13, Epoch: 14.212678536489923, LR: 0.0003 +[2026-03-06 00:15:29] (step=0072642) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 14.212874192917237, LR: 0.0003 +[2026-03-06 00:15:37] (step=0072643) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 14.213069849344551, LR: 0.0003 +[2026-03-06 00:15:45] (step=0072644) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 14.213265505771865, LR: 0.0003 +[2026-03-06 00:15:53] (step=0072645) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 14.213461162199179, LR: 0.0003 +[2026-03-06 00:16:00] (step=0072646) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 14.213656818626491, LR: 0.0003 +[2026-03-06 00:16:08] (step=0072647) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 14.213852475053805, LR: 0.0003 +[2026-03-06 00:16:16] (step=0072648) Train Loss: 0.4571, Train Steps/Sec: 0.13, Epoch: 14.21404813148112, LR: 0.0003 +[2026-03-06 00:16:24] (step=0072649) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 14.214243787908433, LR: 0.0003 +[2026-03-06 00:16:32] (step=0072650) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.214439444335746, LR: 0.0003 +[2026-03-06 00:16:40] (step=0072651) Train Loss: 0.4241, Train Steps/Sec: 0.13, Epoch: 14.21463510076306, LR: 0.0003 +[2026-03-06 00:16:47] (step=0072652) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 14.214830757190374, LR: 0.0003 +[2026-03-06 00:16:55] (step=0072653) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 14.215026413617688, LR: 0.0003 +[2026-03-06 00:17:03] (step=0072654) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 14.215222070045002, LR: 0.0003 +[2026-03-06 00:17:11] (step=0072655) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 14.215417726472314, LR: 0.0003 +[2026-03-06 00:17:19] (step=0072656) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 14.215613382899628, LR: 0.0003 +[2026-03-06 00:17:27] (step=0072657) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 14.215809039326942, LR: 0.0003 +[2026-03-06 00:17:34] (step=0072658) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 14.216004695754256, LR: 0.0003 +[2026-03-06 00:17:42] (step=0072659) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 14.21620035218157, LR: 0.0003 +[2026-03-06 00:17:50] (step=0072660) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.216396008608882, LR: 0.0003 +[2026-03-06 00:17:58] (step=0072661) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 14.216591665036196, LR: 0.0003 +[2026-03-06 00:18:06] (step=0072662) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 14.21678732146351, LR: 0.0003 +[2026-03-06 00:18:14] (step=0072663) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 14.216982977890824, LR: 0.0003 +[2026-03-06 00:18:22] (step=0072664) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 14.217178634318138, LR: 0.0003 +[2026-03-06 00:18:30] (step=0072665) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 14.21737429074545, LR: 0.0003 +[2026-03-06 00:18:37] (step=0072666) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 14.217569947172764, LR: 0.0003 +[2026-03-06 00:18:45] (step=0072667) Train Loss: 0.4329, Train Steps/Sec: 0.12, Epoch: 14.217765603600078, LR: 0.0003 +[2026-03-06 00:18:53] (step=0072668) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 14.217961260027392, LR: 0.0003 +[2026-03-06 00:19:01] (step=0072669) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 14.218156916454706, LR: 0.0003 +[2026-03-06 00:19:09] (step=0072670) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.218352572882019, LR: 0.0003 +[2026-03-06 00:19:17] (step=0072671) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 14.218548229309333, LR: 0.0003 +[2026-03-06 00:19:25] (step=0072672) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.218743885736647, LR: 0.0003 +[2026-03-06 00:19:33] (step=0072673) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 14.21893954216396, LR: 0.0003 +[2026-03-06 00:19:40] (step=0072674) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.219135198591273, LR: 0.0003 +[2026-03-06 00:19:48] (step=0072675) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 14.219330855018587, LR: 0.0003 +[2026-03-06 00:19:56] (step=0072676) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 14.2195265114459, LR: 0.0003 +[2026-03-06 00:20:04] (step=0072677) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 14.219722167873215, LR: 0.0003 +[2026-03-06 00:20:12] (step=0072678) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.219917824300529, LR: 0.0003 +[2026-03-06 00:20:20] (step=0072679) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 14.220113480727841, LR: 0.0003 +[2026-03-06 00:20:27] (step=0072680) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 14.220309137155155, LR: 0.0003 +[2026-03-06 00:20:35] (step=0072681) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 14.220504793582469, LR: 0.0003 +[2026-03-06 00:20:43] (step=0072682) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 14.220700450009783, LR: 0.0003 +[2026-03-06 00:20:51] (step=0072683) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 14.220896106437097, LR: 0.0003 +[2026-03-06 00:20:59] (step=0072684) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 14.22109176286441, LR: 0.0003 +[2026-03-06 00:21:07] (step=0072685) Train Loss: 0.4279, Train Steps/Sec: 0.13, Epoch: 14.221287419291723, LR: 0.0003 +[2026-03-06 00:21:15] (step=0072686) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 14.221483075719037, LR: 0.0003 +[2026-03-06 00:21:22] (step=0072687) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 14.221678732146351, LR: 0.0003 +[2026-03-06 00:21:30] (step=0072688) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 14.221874388573665, LR: 0.0003 +[2026-03-06 00:21:38] (step=0072689) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 14.222070045000978, LR: 0.0003 +[2026-03-06 00:21:46] (step=0072690) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 14.222265701428292, LR: 0.0003 +[2026-03-06 00:21:54] (step=0072691) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.222461357855606, LR: 0.0003 +[2026-03-06 00:22:02] (step=0072692) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 14.22265701428292, LR: 0.0003 +[2026-03-06 00:22:09] (step=0072693) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.222852670710234, LR: 0.0003 +[2026-03-06 00:22:17] (step=0072694) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.223048327137546, LR: 0.0003 +[2026-03-06 00:22:25] (step=0072695) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 14.22324398356486, LR: 0.0003 +[2026-03-06 00:22:33] (step=0072696) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.223439639992174, LR: 0.0003 +[2026-03-06 00:22:41] (step=0072697) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 14.223635296419488, LR: 0.0003 +[2026-03-06 00:22:49] (step=0072698) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.223830952846802, LR: 0.0003 +[2026-03-06 00:22:56] (step=0072699) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 14.224026609274114, LR: 0.0003 +[2026-03-06 00:23:04] (step=0072700) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 14.224222265701428, LR: 0.0003 +[2026-03-06 00:23:12] (step=0072701) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 14.224417922128742, LR: 0.0003 +[2026-03-06 00:23:20] (step=0072702) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 14.224613578556056, LR: 0.0003 +[2026-03-06 00:23:28] (step=0072703) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 14.224809234983368, LR: 0.0003 +[2026-03-06 00:23:36] (step=0072704) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 14.225004891410682, LR: 0.0003 +[2026-03-06 00:23:43] (step=0072705) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 14.225200547837996, LR: 0.0003 +[2026-03-06 00:23:51] (step=0072706) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 14.22539620426531, LR: 0.0003 +[2026-03-06 00:23:59] (step=0072707) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.225591860692624, LR: 0.0003 +[2026-03-06 00:24:07] (step=0072708) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.225787517119937, LR: 0.0003 +[2026-03-06 00:24:15] (step=0072709) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 14.22598317354725, LR: 0.0003 +[2026-03-06 00:24:23] (step=0072710) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 14.226178829974565, LR: 0.0003 +[2026-03-06 00:24:31] (step=0072711) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 14.226374486401879, LR: 0.0003 +[2026-03-06 00:24:39] (step=0072712) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.226570142829193, LR: 0.0003 +[2026-03-06 00:24:46] (step=0072713) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.226765799256505, LR: 0.0003 +[2026-03-06 00:24:54] (step=0072714) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 14.226961455683819, LR: 0.0003 +[2026-03-06 00:25:02] (step=0072715) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 14.227157112111133, LR: 0.0003 +[2026-03-06 00:25:10] (step=0072716) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.227352768538447, LR: 0.0003 +[2026-03-06 00:25:18] (step=0072717) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.22754842496576, LR: 0.0003 +[2026-03-06 00:25:26] (step=0072718) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 14.227744081393073, LR: 0.0003 +[2026-03-06 00:25:34] (step=0072719) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.227939737820387, LR: 0.0003 +[2026-03-06 00:25:41] (step=0072720) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.228135394247701, LR: 0.0003 +[2026-03-06 00:25:49] (step=0072721) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 14.228331050675015, LR: 0.0003 +[2026-03-06 00:25:57] (step=0072722) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 14.22852670710233, LR: 0.0003 +[2026-03-06 00:26:05] (step=0072723) Train Loss: 0.4591, Train Steps/Sec: 0.13, Epoch: 14.228722363529641, LR: 0.0003 +[2026-03-06 00:26:13] (step=0072724) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 14.228918019956955, LR: 0.0003 +[2026-03-06 00:26:21] (step=0072725) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 14.22911367638427, LR: 0.0003 +[2026-03-06 00:26:28] (step=0072726) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.229309332811583, LR: 0.0003 +[2026-03-06 00:26:36] (step=0072727) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 14.229504989238896, LR: 0.0003 +[2026-03-06 00:26:44] (step=0072728) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 14.22970064566621, LR: 0.0003 +[2026-03-06 00:26:52] (step=0072729) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 14.229896302093524, LR: 0.0003 +[2026-03-06 00:27:00] (step=0072730) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 14.230091958520838, LR: 0.0003 +[2026-03-06 00:27:08] (step=0072731) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 14.230287614948152, LR: 0.0003 +[2026-03-06 00:27:16] (step=0072732) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 14.230483271375464, LR: 0.0003 +[2026-03-06 00:27:23] (step=0072733) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.230678927802778, LR: 0.0003 +[2026-03-06 00:27:31] (step=0072734) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 14.230874584230092, LR: 0.0003 +[2026-03-06 00:27:39] (step=0072735) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 14.231070240657406, LR: 0.0003 +[2026-03-06 00:27:47] (step=0072736) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 14.23126589708472, LR: 0.0003 +[2026-03-06 00:27:55] (step=0072737) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 14.231461553512032, LR: 0.0003 +[2026-03-06 00:28:03] (step=0072738) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.231657209939346, LR: 0.0003 +[2026-03-06 00:28:11] (step=0072739) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 14.23185286636666, LR: 0.0003 +[2026-03-06 00:28:18] (step=0072740) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 14.232048522793974, LR: 0.0003 +[2026-03-06 00:28:26] (step=0072741) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 14.232244179221288, LR: 0.0003 +[2026-03-06 00:28:34] (step=0072742) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.2324398356486, LR: 0.0003 +[2026-03-06 00:28:42] (step=0072743) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.232635492075914, LR: 0.0003 +[2026-03-06 00:28:50] (step=0072744) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 14.232831148503228, LR: 0.0003 +[2026-03-06 00:28:58] (step=0072745) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 14.233026804930542, LR: 0.0003 +[2026-03-06 00:29:05] (step=0072746) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 14.233222461357856, LR: 0.0003 +[2026-03-06 00:29:13] (step=0072747) Train Loss: 0.4266, Train Steps/Sec: 0.13, Epoch: 14.233418117785169, LR: 0.0003 +[2026-03-06 00:29:21] (step=0072748) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 14.233613774212483, LR: 0.0003 +[2026-03-06 00:29:29] (step=0072749) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 14.233809430639797, LR: 0.0003 +[2026-03-06 00:29:37] (step=0072750) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.23400508706711, LR: 0.0003 +[2026-03-06 00:29:45] (step=0072751) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 14.234200743494425, LR: 0.0003 +[2026-03-06 00:29:53] (step=0072752) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.234396399921737, LR: 0.0003 +[2026-03-06 00:30:00] (step=0072753) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.234592056349051, LR: 0.0003 +[2026-03-06 00:30:08] (step=0072754) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 14.234787712776365, LR: 0.0003 +[2026-03-06 00:30:16] (step=0072755) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 14.234983369203679, LR: 0.0003 +[2026-03-06 00:30:24] (step=0072756) Train Loss: 0.4395, Train Steps/Sec: 0.12, Epoch: 14.235179025630991, LR: 0.0003 +[2026-03-06 00:30:32] (step=0072757) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 14.235374682058305, LR: 0.0003 +[2026-03-06 00:30:40] (step=0072758) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 14.23557033848562, LR: 0.0003 +[2026-03-06 00:30:48] (step=0072759) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 14.235765994912933, LR: 0.0003 +[2026-03-06 00:30:55] (step=0072760) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 14.235961651340247, LR: 0.0003 +[2026-03-06 00:31:03] (step=0072761) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 14.23615730776756, LR: 0.0003 +[2026-03-06 00:31:11] (step=0072762) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 14.236352964194873, LR: 0.0003 +[2026-03-06 00:31:19] (step=0072763) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 14.236548620622187, LR: 0.0003 +[2026-03-06 00:31:27] (step=0072764) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.236744277049501, LR: 0.0003 +[2026-03-06 00:31:35] (step=0072765) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 14.236939933476815, LR: 0.0003 +[2026-03-06 00:31:43] (step=0072766) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 14.237135589904128, LR: 0.0003 +[2026-03-06 00:31:51] (step=0072767) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 14.237331246331442, LR: 0.0003 +[2026-03-06 00:31:58] (step=0072768) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.237526902758756, LR: 0.0003 +[2026-03-06 00:32:06] (step=0072769) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 14.23772255918607, LR: 0.0003 +[2026-03-06 00:32:14] (step=0072770) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 14.237918215613384, LR: 0.0003 +[2026-03-06 00:32:22] (step=0072771) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 14.238113872040696, LR: 0.0003 +[2026-03-06 00:32:30] (step=0072772) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.23830952846801, LR: 0.0003 +[2026-03-06 00:32:38] (step=0072773) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 14.238505184895324, LR: 0.0003 +[2026-03-06 00:32:45] (step=0072774) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.238700841322638, LR: 0.0003 +[2026-03-06 00:32:53] (step=0072775) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 14.238896497749952, LR: 0.0003 +[2026-03-06 00:33:01] (step=0072776) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 14.239092154177264, LR: 0.0003 +[2026-03-06 00:33:09] (step=0072777) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 14.239287810604578, LR: 0.0003 +[2026-03-06 00:33:17] (step=0072778) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.239483467031892, LR: 0.0003 +[2026-03-06 00:33:25] (step=0072779) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 14.239679123459206, LR: 0.0003 +[2026-03-06 00:33:32] (step=0072780) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 14.239874779886518, LR: 0.0003 +[2026-03-06 00:33:40] (step=0072781) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.240070436313832, LR: 0.0003 +[2026-03-06 00:33:48] (step=0072782) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 14.240266092741146, LR: 0.0003 +[2026-03-06 00:33:56] (step=0072783) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 14.24046174916846, LR: 0.0003 +[2026-03-06 00:34:04] (step=0072784) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.240657405595774, LR: 0.0003 +[2026-03-06 00:34:12] (step=0072785) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.240853062023087, LR: 0.0003 +[2026-03-06 00:34:20] (step=0072786) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.2410487184504, LR: 0.0003 +[2026-03-06 00:34:27] (step=0072787) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 14.241244374877715, LR: 0.0003 +[2026-03-06 00:34:35] (step=0072788) Train Loss: 0.4543, Train Steps/Sec: 0.13, Epoch: 14.241440031305029, LR: 0.0003 +[2026-03-06 00:34:43] (step=0072789) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 14.241635687732343, LR: 0.0003 +[2026-03-06 00:34:51] (step=0072790) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 14.241831344159655, LR: 0.0003 +[2026-03-06 00:34:59] (step=0072791) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 14.242027000586969, LR: 0.0003 +[2026-03-06 00:35:07] (step=0072792) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 14.242222657014283, LR: 0.0003 +[2026-03-06 00:35:14] (step=0072793) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.242418313441597, LR: 0.0003 +[2026-03-06 00:35:22] (step=0072794) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.242613969868911, LR: 0.0003 +[2026-03-06 00:35:30] (step=0072795) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 14.242809626296223, LR: 0.0003 +[2026-03-06 00:35:38] (step=0072796) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 14.243005282723537, LR: 0.0003 +[2026-03-06 00:35:46] (step=0072797) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 14.243200939150851, LR: 0.0003 +[2026-03-06 00:35:54] (step=0072798) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 14.243396595578165, LR: 0.0003 +[2026-03-06 00:36:02] (step=0072799) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 14.24359225200548, LR: 0.0003 +[2026-03-06 00:36:09] (step=0072800) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.243787908432791, LR: 0.0003 +[2026-03-06 00:36:17] (step=0072801) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 14.243983564860105, LR: 0.0003 +[2026-03-06 00:36:25] (step=0072802) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 14.24417922128742, LR: 0.0003 +[2026-03-06 00:36:33] (step=0072803) Train Loss: 0.4350, Train Steps/Sec: 0.12, Epoch: 14.244374877714733, LR: 0.0003 +[2026-03-06 00:36:41] (step=0072804) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 14.244570534142047, LR: 0.0003 +[2026-03-06 00:36:49] (step=0072805) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.24476619056936, LR: 0.0003 +[2026-03-06 00:36:57] (step=0072806) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 14.244961846996674, LR: 0.0003 +[2026-03-06 00:37:04] (step=0072807) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.245157503423988, LR: 0.0003 +[2026-03-06 00:37:12] (step=0072808) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.245353159851302, LR: 0.0003 +[2026-03-06 00:37:20] (step=0072809) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.245548816278614, LR: 0.0003 +[2026-03-06 00:37:28] (step=0072810) Train Loss: 0.4252, Train Steps/Sec: 0.13, Epoch: 14.245744472705928, LR: 0.0003 +[2026-03-06 00:37:36] (step=0072811) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.245940129133242, LR: 0.0003 +[2026-03-06 00:37:44] (step=0072812) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.246135785560556, LR: 0.0003 +[2026-03-06 00:37:52] (step=0072813) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 14.24633144198787, LR: 0.0003 +[2026-03-06 00:38:00] (step=0072814) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 14.246527098415182, LR: 0.0003 +[2026-03-06 00:38:07] (step=0072815) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.246722754842496, LR: 0.0003 +[2026-03-06 00:38:15] (step=0072816) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.24691841126981, LR: 0.0003 +[2026-03-06 00:38:23] (step=0072817) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.247114067697124, LR: 0.0003 +[2026-03-06 00:38:31] (step=0072818) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 14.247309724124438, LR: 0.0003 +[2026-03-06 00:38:39] (step=0072819) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.24750538055175, LR: 0.0003 +[2026-03-06 00:38:47] (step=0072820) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 14.247701036979064, LR: 0.0003 +[2026-03-06 00:38:54] (step=0072821) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.247896693406378, LR: 0.0003 +[2026-03-06 00:39:02] (step=0072822) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 14.248092349833692, LR: 0.0003 +[2026-03-06 00:39:10] (step=0072823) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 14.248288006261006, LR: 0.0003 +[2026-03-06 00:39:18] (step=0072824) Train Loss: 0.4516, Train Steps/Sec: 0.12, Epoch: 14.248483662688319, LR: 0.0003 +[2026-03-06 00:39:26] (step=0072825) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.248679319115633, LR: 0.0003 +[2026-03-06 00:39:34] (step=0072826) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.248874975542947, LR: 0.0003 +[2026-03-06 00:39:42] (step=0072827) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 14.24907063197026, LR: 0.0003 +[2026-03-06 00:39:49] (step=0072828) Train Loss: 0.4567, Train Steps/Sec: 0.13, Epoch: 14.249266288397575, LR: 0.0003 +[2026-03-06 00:39:57] (step=0072829) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 14.249461944824887, LR: 0.0003 +[2026-03-06 00:40:05] (step=0072830) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 14.249657601252201, LR: 0.0003 +[2026-03-06 00:40:13] (step=0072831) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 14.249853257679515, LR: 0.0003 +[2026-03-06 00:40:21] (step=0072832) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.250048914106829, LR: 0.0003 +[2026-03-06 00:40:29] (step=0072833) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.250244570534141, LR: 0.0003 +[2026-03-06 00:40:37] (step=0072834) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 14.250440226961455, LR: 0.0003 +[2026-03-06 00:40:44] (step=0072835) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 14.25063588338877, LR: 0.0003 +[2026-03-06 00:40:52] (step=0072836) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 14.250831539816083, LR: 0.0003 +[2026-03-06 00:41:00] (step=0072837) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 14.251027196243397, LR: 0.0003 +[2026-03-06 00:41:08] (step=0072838) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.25122285267071, LR: 0.0003 +[2026-03-06 00:41:16] (step=0072839) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.251418509098023, LR: 0.0003 +[2026-03-06 00:41:24] (step=0072840) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 14.251614165525337, LR: 0.0003 +[2026-03-06 00:41:31] (step=0072841) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 14.251809821952651, LR: 0.0003 +[2026-03-06 00:41:39] (step=0072842) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.252005478379965, LR: 0.0003 +[2026-03-06 00:41:47] (step=0072843) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 14.252201134807278, LR: 0.0003 +[2026-03-06 00:41:55] (step=0072844) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 14.252396791234592, LR: 0.0003 +[2026-03-06 00:42:03] (step=0072845) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.252592447661906, LR: 0.0003 +[2026-03-06 00:42:11] (step=0072846) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 14.25278810408922, LR: 0.0003 +[2026-03-06 00:42:18] (step=0072847) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 14.252983760516534, LR: 0.0003 +[2026-03-06 00:42:26] (step=0072848) Train Loss: 0.4553, Train Steps/Sec: 0.13, Epoch: 14.253179416943846, LR: 0.0003 +[2026-03-06 00:42:34] (step=0072849) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 14.25337507337116, LR: 0.0003 +[2026-03-06 00:42:42] (step=0072850) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 14.253570729798474, LR: 0.0003 +[2026-03-06 00:42:50] (step=0072851) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 14.253766386225788, LR: 0.0003 +[2026-03-06 00:42:58] (step=0072852) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.253962042653102, LR: 0.0003 +[2026-03-06 00:43:06] (step=0072853) Train Loss: 0.4373, Train Steps/Sec: 0.12, Epoch: 14.254157699080414, LR: 0.0003 +[2026-03-06 00:43:14] (step=0072854) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 14.254353355507728, LR: 0.0003 +[2026-03-06 00:43:21] (step=0072855) Train Loss: 0.4275, Train Steps/Sec: 0.13, Epoch: 14.254549011935042, LR: 0.0003 +[2026-03-06 00:43:29] (step=0072856) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 14.254744668362356, LR: 0.0003 +[2026-03-06 00:43:37] (step=0072857) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 14.25494032478967, LR: 0.0003 +[2026-03-06 00:43:45] (step=0072858) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 14.255135981216982, LR: 0.0003 +[2026-03-06 00:43:53] (step=0072859) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 14.255331637644296, LR: 0.0003 +[2026-03-06 00:44:01] (step=0072860) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.25552729407161, LR: 0.0003 +[2026-03-06 00:44:08] (step=0072861) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 14.255722950498924, LR: 0.0003 +[2026-03-06 00:44:16] (step=0072862) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 14.255918606926237, LR: 0.0003 +[2026-03-06 00:44:24] (step=0072863) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 14.25611426335355, LR: 0.0003 +[2026-03-06 00:44:32] (step=0072864) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 14.256309919780865, LR: 0.0003 +[2026-03-06 00:44:40] (step=0072865) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 14.256505576208179, LR: 0.0003 +[2026-03-06 00:44:48] (step=0072866) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 14.256701232635493, LR: 0.0003 +[2026-03-06 00:44:56] (step=0072867) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 14.256896889062805, LR: 0.0003 +[2026-03-06 00:45:04] (step=0072868) Train Loss: 0.4566, Train Steps/Sec: 0.13, Epoch: 14.257092545490119, LR: 0.0003 +[2026-03-06 00:45:11] (step=0072869) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 14.257288201917433, LR: 0.0003 +[2026-03-06 00:45:19] (step=0072870) Train Loss: 0.4667, Train Steps/Sec: 0.13, Epoch: 14.257483858344747, LR: 0.0003 +[2026-03-06 00:45:27] (step=0072871) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.257679514772061, LR: 0.0003 +[2026-03-06 00:45:35] (step=0072872) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 14.257875171199373, LR: 0.0003 +[2026-03-06 00:45:43] (step=0072873) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 14.258070827626687, LR: 0.0003 +[2026-03-06 00:45:51] (step=0072874) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 14.258266484054001, LR: 0.0003 +[2026-03-06 00:45:59] (step=0072875) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 14.258462140481315, LR: 0.0003 +[2026-03-06 00:46:06] (step=0072876) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 14.25865779690863, LR: 0.0003 +[2026-03-06 00:46:14] (step=0072877) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 14.258853453335941, LR: 0.0003 +[2026-03-06 00:46:22] (step=0072878) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 14.259049109763255, LR: 0.0003 +[2026-03-06 00:46:30] (step=0072879) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 14.25924476619057, LR: 0.0003 +[2026-03-06 00:46:38] (step=0072880) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 14.259440422617883, LR: 0.0003 +[2026-03-06 00:46:46] (step=0072881) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 14.259636079045197, LR: 0.0003 +[2026-03-06 00:46:53] (step=0072882) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.25983173547251, LR: 0.0003 +[2026-03-06 00:47:01] (step=0072883) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 14.260027391899824, LR: 0.0003 +[2026-03-06 00:47:09] (step=0072884) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 14.260223048327138, LR: 0.0003 +[2026-03-06 00:47:17] (step=0072885) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.260418704754452, LR: 0.0003 +[2026-03-06 00:47:25] (step=0072886) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 14.260614361181764, LR: 0.0003 +[2026-03-06 00:47:33] (step=0072887) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 14.260810017609078, LR: 0.0003 +[2026-03-06 00:47:40] (step=0072888) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 14.261005674036392, LR: 0.0003 +[2026-03-06 00:47:48] (step=0072889) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.261201330463706, LR: 0.0003 +[2026-03-06 00:47:56] (step=0072890) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 14.26139698689102, LR: 0.0003 +[2026-03-06 00:48:04] (step=0072891) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.261592643318332, LR: 0.0003 +[2026-03-06 00:48:12] (step=0072892) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 14.261788299745646, LR: 0.0003 +[2026-03-06 00:48:20] (step=0072893) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 14.26198395617296, LR: 0.0003 +[2026-03-06 00:48:28] (step=0072894) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 14.262179612600274, LR: 0.0003 +[2026-03-06 00:48:35] (step=0072895) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 14.262375269027588, LR: 0.0003 +[2026-03-06 00:48:43] (step=0072896) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.2625709254549, LR: 0.0003 +[2026-03-06 00:48:51] (step=0072897) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 14.262766581882214, LR: 0.0003 +[2026-03-06 00:48:59] (step=0072898) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.262962238309528, LR: 0.0003 +[2026-03-06 00:49:07] (step=0072899) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 14.263157894736842, LR: 0.0003 +[2026-03-06 00:49:15] (step=0072900) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 14.263353551164156, LR: 0.0003 +[2026-03-06 00:49:22] (step=0072901) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 14.263549207591469, LR: 0.0003 +[2026-03-06 00:49:30] (step=0072902) Train Loss: 0.4459, Train Steps/Sec: 0.13, Epoch: 14.263744864018783, LR: 0.0003 +[2026-03-06 00:49:38] (step=0072903) Train Loss: 0.4472, Train Steps/Sec: 0.12, Epoch: 14.263940520446097, LR: 0.0003 +[2026-03-06 00:49:46] (step=0072904) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 14.26413617687341, LR: 0.0003 +[2026-03-06 00:49:54] (step=0072905) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 14.264331833300725, LR: 0.0003 +[2026-03-06 00:50:02] (step=0072906) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 14.264527489728037, LR: 0.0003 +[2026-03-06 00:50:10] (step=0072907) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 14.264723146155351, LR: 0.0003 +[2026-03-06 00:50:18] (step=0072908) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 14.264918802582665, LR: 0.0003 +[2026-03-06 00:50:25] (step=0072909) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 14.265114459009979, LR: 0.0003 +[2026-03-06 00:50:33] (step=0072910) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 14.265310115437293, LR: 0.0003 +[2026-03-06 00:50:41] (step=0072911) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.265505771864605, LR: 0.0003 +[2026-03-06 00:50:49] (step=0072912) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 14.26570142829192, LR: 0.0003 +[2026-03-06 00:50:57] (step=0072913) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 14.265897084719233, LR: 0.0003 +[2026-03-06 00:51:05] (step=0072914) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.266092741146547, LR: 0.0003 +[2026-03-06 00:51:13] (step=0072915) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 14.26628839757386, LR: 0.0003 +[2026-03-06 00:51:20] (step=0072916) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 14.266484054001173, LR: 0.0003 +[2026-03-06 00:51:28] (step=0072917) Train Loss: 0.4547, Train Steps/Sec: 0.13, Epoch: 14.266679710428487, LR: 0.0003 +[2026-03-06 00:51:36] (step=0072918) Train Loss: 0.4236, Train Steps/Sec: 0.13, Epoch: 14.266875366855801, LR: 0.0003 +[2026-03-06 00:51:44] (step=0072919) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 14.267071023283115, LR: 0.0003 +[2026-03-06 00:51:52] (step=0072920) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 14.267266679710428, LR: 0.0003 +[2026-03-06 00:52:00] (step=0072921) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 14.267462336137742, LR: 0.0003 +[2026-03-06 00:52:07] (step=0072922) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.267657992565056, LR: 0.0003 +[2026-03-06 00:52:15] (step=0072923) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 14.26785364899237, LR: 0.0003 +[2026-03-06 00:52:23] (step=0072924) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 14.268049305419684, LR: 0.0003 +[2026-03-06 00:52:31] (step=0072925) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 14.268244961846996, LR: 0.0003 +[2026-03-06 00:52:39] (step=0072926) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 14.26844061827431, LR: 0.0003 +[2026-03-06 00:52:47] (step=0072927) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.268636274701624, LR: 0.0003 +[2026-03-06 00:52:55] (step=0072928) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 14.268831931128938, LR: 0.0003 +[2026-03-06 00:53:02] (step=0072929) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 14.269027587556252, LR: 0.0003 +[2026-03-06 00:53:10] (step=0072930) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 14.269223243983564, LR: 0.0003 +[2026-03-06 00:53:18] (step=0072931) Train Loss: 0.4557, Train Steps/Sec: 0.13, Epoch: 14.269418900410878, LR: 0.0003 +[2026-03-06 00:53:26] (step=0072932) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.269614556838192, LR: 0.0003 +[2026-03-06 00:53:34] (step=0072933) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 14.269810213265506, LR: 0.0003 +[2026-03-06 00:53:42] (step=0072934) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 14.27000586969282, LR: 0.0003 +[2026-03-06 00:53:49] (step=0072935) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 14.270201526120132, LR: 0.0003 +[2026-03-06 00:53:57] (step=0072936) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 14.270397182547446, LR: 0.0003 +[2026-03-06 00:54:05] (step=0072937) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.27059283897476, LR: 0.0003 +[2026-03-06 00:54:13] (step=0072938) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.270788495402075, LR: 0.0003 +[2026-03-06 00:54:21] (step=0072939) Train Loss: 0.4298, Train Steps/Sec: 0.13, Epoch: 14.270984151829387, LR: 0.0003 +[2026-03-06 00:54:29] (step=0072940) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.2711798082567, LR: 0.0003 +[2026-03-06 00:54:37] (step=0072941) Train Loss: 0.4341, Train Steps/Sec: 0.13, Epoch: 14.271375464684015, LR: 0.0003 +[2026-03-06 00:54:44] (step=0072942) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.271571121111329, LR: 0.0003 +[2026-03-06 00:54:52] (step=0072943) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 14.271766777538643, LR: 0.0003 +[2026-03-06 00:55:00] (step=0072944) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 14.271962433965955, LR: 0.0003 +[2026-03-06 00:55:08] (step=0072945) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 14.272158090393269, LR: 0.0003 +[2026-03-06 00:55:16] (step=0072946) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.272353746820583, LR: 0.0003 +[2026-03-06 00:55:24] (step=0072947) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 14.272549403247897, LR: 0.0003 +[2026-03-06 00:55:31] (step=0072948) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.272745059675211, LR: 0.0003 +[2026-03-06 00:55:39] (step=0072949) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 14.272940716102523, LR: 0.0003 +[2026-03-06 00:55:47] (step=0072950) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 14.273136372529837, LR: 0.0003 +[2026-03-06 00:55:55] (step=0072951) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 14.273332028957151, LR: 0.0003 +[2026-03-06 00:56:03] (step=0072952) Train Loss: 0.4286, Train Steps/Sec: 0.13, Epoch: 14.273527685384465, LR: 0.0003 +[2026-03-06 00:56:11] (step=0072953) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.27372334181178, LR: 0.0003 +[2026-03-06 00:56:19] (step=0072954) Train Loss: 0.4345, Train Steps/Sec: 0.13, Epoch: 14.273918998239091, LR: 0.0003 +[2026-03-06 00:56:27] (step=0072955) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 14.274114654666405, LR: 0.0003 +[2026-03-06 00:56:35] (step=0072956) Train Loss: 0.4554, Train Steps/Sec: 0.13, Epoch: 14.27431031109372, LR: 0.0003 +[2026-03-06 00:56:42] (step=0072957) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 14.274505967521034, LR: 0.0003 +[2026-03-06 00:56:50] (step=0072958) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 14.274701623948348, LR: 0.0003 +[2026-03-06 00:56:58] (step=0072959) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 14.27489728037566, LR: 0.0003 +[2026-03-06 00:57:06] (step=0072960) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 14.275092936802974, LR: 0.0003 +[2026-03-06 00:57:14] (step=0072961) Train Loss: 0.4552, Train Steps/Sec: 0.13, Epoch: 14.275288593230288, LR: 0.0003 +[2026-03-06 00:57:22] (step=0072962) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 14.275484249657602, LR: 0.0003 +[2026-03-06 00:57:29] (step=0072963) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 14.275679906084916, LR: 0.0003 +[2026-03-06 00:57:37] (step=0072964) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 14.275875562512228, LR: 0.0003 +[2026-03-06 00:57:45] (step=0072965) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 14.276071218939542, LR: 0.0003 +[2026-03-06 00:57:53] (step=0072966) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 14.276266875366856, LR: 0.0003 +[2026-03-06 00:58:01] (step=0072967) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 14.27646253179417, LR: 0.0003 +[2026-03-06 00:58:09] (step=0072968) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 14.276658188221482, LR: 0.0003 +[2026-03-06 00:58:17] (step=0072969) Train Loss: 0.4285, Train Steps/Sec: 0.13, Epoch: 14.276853844648796, LR: 0.0003 +[2026-03-06 00:58:24] (step=0072970) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.27704950107611, LR: 0.0003 +[2026-03-06 00:58:32] (step=0072971) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 14.277245157503424, LR: 0.0003 +[2026-03-06 00:58:40] (step=0072972) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 14.277440813930738, LR: 0.0003 +[2026-03-06 00:58:48] (step=0072973) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 14.27763647035805, LR: 0.0003 +[2026-03-06 00:58:56] (step=0072974) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 14.277832126785365, LR: 0.0003 +[2026-03-06 00:59:04] (step=0072975) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.278027783212679, LR: 0.0003 +[2026-03-06 00:59:11] (step=0072976) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.278223439639993, LR: 0.0003 +[2026-03-06 00:59:19] (step=0072977) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 14.278419096067307, LR: 0.0003 +[2026-03-06 00:59:27] (step=0072978) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 14.278614752494619, LR: 0.0003 +[2026-03-06 00:59:35] (step=0072979) Train Loss: 0.4280, Train Steps/Sec: 0.13, Epoch: 14.278810408921933, LR: 0.0003 +[2026-03-06 00:59:43] (step=0072980) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.279006065349247, LR: 0.0003 +[2026-03-06 00:59:51] (step=0072981) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 14.27920172177656, LR: 0.0003 +[2026-03-06 00:59:58] (step=0072982) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 14.279397378203875, LR: 0.0003 +[2026-03-06 01:00:06] (step=0072983) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 14.279593034631187, LR: 0.0003 +[2026-03-06 01:00:14] (step=0072984) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 14.279788691058501, LR: 0.0003 +[2026-03-06 01:00:22] (step=0072985) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.279984347485815, LR: 0.0003 +[2026-03-06 01:00:30] (step=0072986) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.280180003913129, LR: 0.0003 +[2026-03-06 01:00:38] (step=0072987) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 14.280375660340443, LR: 0.0003 +[2026-03-06 01:00:46] (step=0072988) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.280571316767755, LR: 0.0003 +[2026-03-06 01:00:53] (step=0072989) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 14.28076697319507, LR: 0.0003 +[2026-03-06 01:01:01] (step=0072990) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 14.280962629622383, LR: 0.0003 +[2026-03-06 01:01:09] (step=0072991) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 14.281158286049697, LR: 0.0003 +[2026-03-06 01:01:17] (step=0072992) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 14.28135394247701, LR: 0.0003 +[2026-03-06 01:01:25] (step=0072993) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 14.281549598904324, LR: 0.0003 +[2026-03-06 01:01:33] (step=0072994) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 14.281745255331638, LR: 0.0003 +[2026-03-06 01:01:40] (step=0072995) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 14.281940911758952, LR: 0.0003 +[2026-03-06 01:01:48] (step=0072996) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.282136568186266, LR: 0.0003 +[2026-03-06 01:01:56] (step=0072997) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 14.282332224613578, LR: 0.0003 +[2026-03-06 01:02:04] (step=0072998) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 14.282527881040892, LR: 0.0003 +[2026-03-06 01:02:12] (step=0072999) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 14.282723537468206, LR: 0.0003 +[2026-03-06 01:02:20] (step=0073000) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.28291919389552, LR: 0.0003 +[2026-03-06 01:02:20] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0073000/ +[2026-03-06 01:02:28] (step=0073001) Train Loss: 0.4338, Train Steps/Sec: 0.12, Epoch: 14.283114850322834, LR: 0.0003 +[2026-03-06 01:02:36] (step=0073002) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 14.283310506750146, LR: 0.0003 +[2026-03-06 01:02:44] (step=0073003) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 14.28350616317746, LR: 0.0003 +[2026-03-06 01:02:51] (step=0073004) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 14.283701819604774, LR: 0.0003 +[2026-03-06 01:02:59] (step=0073005) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 14.283897476032088, LR: 0.0003 +[2026-03-06 01:03:07] (step=0073006) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 14.284093132459402, LR: 0.0003 +[2026-03-06 01:03:15] (step=0073007) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 14.284288788886714, LR: 0.0003 +[2026-03-06 01:03:23] (step=0073008) Train Loss: 0.4254, Train Steps/Sec: 0.13, Epoch: 14.284484445314028, LR: 0.0003 +[2026-03-06 01:03:31] (step=0073009) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 14.284680101741342, LR: 0.0003 +[2026-03-06 01:03:38] (step=0073010) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 14.284875758168656, LR: 0.0003 +[2026-03-06 01:03:46] (step=0073011) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 14.28507141459597, LR: 0.0003 +[2026-03-06 01:03:54] (step=0073012) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 14.285267071023283, LR: 0.0003 +[2026-03-06 01:04:02] (step=0073013) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.285462727450597, LR: 0.0003 +[2026-03-06 01:04:10] (step=0073014) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 14.28565838387791, LR: 0.0003 +[2026-03-06 01:04:18] (step=0073015) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 14.285854040305225, LR: 0.0003 +[2026-03-06 01:04:26] (step=0073016) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 14.286049696732539, LR: 0.0003 +[2026-03-06 01:04:33] (step=0073017) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 14.28624535315985, LR: 0.0003 +[2026-03-06 01:04:41] (step=0073018) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.286441009587165, LR: 0.0003 +[2026-03-06 01:04:49] (step=0073019) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 14.286636666014479, LR: 0.0003 +[2026-03-06 01:04:57] (step=0073020) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.286832322441793, LR: 0.0003 +[2026-03-06 01:05:05] (step=0073021) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.287027978869105, LR: 0.0003 +[2026-03-06 01:05:13] (step=0073022) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 14.287223635296419, LR: 0.0003 +[2026-03-06 01:05:20] (step=0073023) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 14.287419291723733, LR: 0.0003 +[2026-03-06 01:05:28] (step=0073024) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 14.287614948151047, LR: 0.0003 +[2026-03-06 01:05:36] (step=0073025) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.287810604578361, LR: 0.0003 +[2026-03-06 01:05:44] (step=0073026) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 14.288006261005673, LR: 0.0003 +[2026-03-06 01:05:52] (step=0073027) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 14.288201917432987, LR: 0.0003 +[2026-03-06 01:06:00] (step=0073028) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 14.288397573860301, LR: 0.0003 +[2026-03-06 01:06:08] (step=0073029) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 14.288593230287615, LR: 0.0003 +[2026-03-06 01:06:15] (step=0073030) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.28878888671493, LR: 0.0003 +[2026-03-06 01:06:23] (step=0073031) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 14.288984543142242, LR: 0.0003 +[2026-03-06 01:06:31] (step=0073032) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 14.289180199569556, LR: 0.0003 +[2026-03-06 01:06:39] (step=0073033) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 14.28937585599687, LR: 0.0003 +[2026-03-06 01:06:47] (step=0073034) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 14.289571512424184, LR: 0.0003 +[2026-03-06 01:06:55] (step=0073035) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.289767168851498, LR: 0.0003 +[2026-03-06 01:07:02] (step=0073036) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 14.28996282527881, LR: 0.0003 +[2026-03-06 01:07:10] (step=0073037) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 14.290158481706124, LR: 0.0003 +[2026-03-06 01:07:18] (step=0073038) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 14.290354138133438, LR: 0.0003 +[2026-03-06 01:07:26] (step=0073039) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 14.290549794560752, LR: 0.0003 +[2026-03-06 01:07:34] (step=0073040) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.290745450988066, LR: 0.0003 +[2026-03-06 01:07:42] (step=0073041) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 14.290941107415378, LR: 0.0003 +[2026-03-06 01:07:49] (step=0073042) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.291136763842692, LR: 0.0003 +[2026-03-06 01:07:57] (step=0073043) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 14.291332420270006, LR: 0.0003 +[2026-03-06 01:08:05] (step=0073044) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.29152807669732, LR: 0.0003 +[2026-03-06 01:08:13] (step=0073045) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.291723733124632, LR: 0.0003 +[2026-03-06 01:08:21] (step=0073046) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 14.291919389551946, LR: 0.0003 +[2026-03-06 01:08:29] (step=0073047) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 14.29211504597926, LR: 0.0003 +[2026-03-06 01:08:37] (step=0073048) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.292310702406574, LR: 0.0003 +[2026-03-06 01:08:44] (step=0073049) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 14.292506358833888, LR: 0.0003 +[2026-03-06 01:08:52] (step=0073050) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 14.2927020152612, LR: 0.0003 +[2026-03-06 01:09:00] (step=0073051) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 14.292897671688515, LR: 0.0003 +[2026-03-06 01:09:08] (step=0073052) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.293093328115829, LR: 0.0003 +[2026-03-06 01:09:16] (step=0073053) Train Loss: 0.4326, Train Steps/Sec: 0.13, Epoch: 14.293288984543143, LR: 0.0003 +[2026-03-06 01:09:24] (step=0073054) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 14.293484640970457, LR: 0.0003 +[2026-03-06 01:09:32] (step=0073055) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 14.293680297397769, LR: 0.0003 +[2026-03-06 01:09:40] (step=0073056) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 14.293875953825083, LR: 0.0003 +[2026-03-06 01:09:47] (step=0073057) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.294071610252397, LR: 0.0003 +[2026-03-06 01:09:55] (step=0073058) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 14.29426726667971, LR: 0.0003 +[2026-03-06 01:10:03] (step=0073059) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 14.294462923107025, LR: 0.0003 +[2026-03-06 01:10:11] (step=0073060) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.294658579534337, LR: 0.0003 +[2026-03-06 01:10:19] (step=0073061) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 14.294854235961651, LR: 0.0003 +[2026-03-06 01:10:27] (step=0073062) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 14.295049892388965, LR: 0.0003 +[2026-03-06 01:10:34] (step=0073063) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 14.295245548816279, LR: 0.0003 +[2026-03-06 01:10:42] (step=0073064) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 14.295441205243593, LR: 0.0003 +[2026-03-06 01:10:50] (step=0073065) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 14.295636861670905, LR: 0.0003 +[2026-03-06 01:10:58] (step=0073066) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.29583251809822, LR: 0.0003 +[2026-03-06 01:11:06] (step=0073067) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 14.296028174525533, LR: 0.0003 +[2026-03-06 01:11:14] (step=0073068) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 14.296223830952847, LR: 0.0003 +[2026-03-06 01:11:22] (step=0073069) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.29641948738016, LR: 0.0003 +[2026-03-06 01:11:29] (step=0073070) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 14.296615143807474, LR: 0.0003 +[2026-03-06 01:11:37] (step=0073071) Train Loss: 0.4284, Train Steps/Sec: 0.13, Epoch: 14.296810800234788, LR: 0.0003 +[2026-03-06 01:11:45] (step=0073072) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.297006456662102, LR: 0.0003 +[2026-03-06 01:11:53] (step=0073073) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 14.297202113089416, LR: 0.0003 +[2026-03-06 01:12:01] (step=0073074) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 14.297397769516728, LR: 0.0003 +[2026-03-06 01:12:09] (step=0073075) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 14.297593425944042, LR: 0.0003 +[2026-03-06 01:12:16] (step=0073076) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 14.297789082371356, LR: 0.0003 +[2026-03-06 01:12:24] (step=0073077) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 14.29798473879867, LR: 0.0003 +[2026-03-06 01:12:32] (step=0073078) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.298180395225984, LR: 0.0003 +[2026-03-06 01:12:40] (step=0073079) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.298376051653296, LR: 0.0003 +[2026-03-06 01:12:48] (step=0073080) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 14.29857170808061, LR: 0.0003 +[2026-03-06 01:12:56] (step=0073081) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 14.298767364507924, LR: 0.0003 +[2026-03-06 01:13:04] (step=0073082) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 14.298963020935238, LR: 0.0003 +[2026-03-06 01:13:11] (step=0073083) Train Loss: 0.4271, Train Steps/Sec: 0.13, Epoch: 14.299158677362552, LR: 0.0003 +[2026-03-06 01:13:19] (step=0073084) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 14.299354333789864, LR: 0.0003 +[2026-03-06 01:13:27] (step=0073085) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 14.299549990217178, LR: 0.0003 +[2026-03-06 01:13:35] (step=0073086) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 14.299745646644492, LR: 0.0003 +[2026-03-06 01:13:43] (step=0073087) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 14.299941303071806, LR: 0.0003 +[2026-03-06 01:13:51] (step=0073088) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 14.30013695949912, LR: 0.0003 +[2026-03-06 01:13:58] (step=0073089) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.300332615926433, LR: 0.0003 +[2026-03-06 01:14:06] (step=0073090) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 14.300528272353747, LR: 0.0003 +[2026-03-06 01:14:14] (step=0073091) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 14.30072392878106, LR: 0.0003 +[2026-03-06 01:14:22] (step=0073092) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 14.300919585208375, LR: 0.0003 +[2026-03-06 01:14:30] (step=0073093) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 14.301115241635689, LR: 0.0003 +[2026-03-06 01:14:38] (step=0073094) Train Loss: 0.4521, Train Steps/Sec: 0.12, Epoch: 14.301310898063, LR: 0.0003 +[2026-03-06 01:14:46] (step=0073095) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 14.301506554490315, LR: 0.0003 +[2026-03-06 01:14:54] (step=0073096) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 14.301702210917629, LR: 0.0003 +[2026-03-06 01:15:01] (step=0073097) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 14.301897867344943, LR: 0.0003 +[2026-03-06 01:15:09] (step=0073098) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 14.302093523772255, LR: 0.0003 +[2026-03-06 01:15:17] (step=0073099) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.302289180199569, LR: 0.0003 +[2026-03-06 01:15:25] (step=0073100) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.302484836626883, LR: 0.0003 +[2026-03-06 01:15:33] (step=0073101) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 14.302680493054197, LR: 0.0003 +[2026-03-06 01:15:41] (step=0073102) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 14.302876149481511, LR: 0.0003 +[2026-03-06 01:15:49] (step=0073103) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 14.303071805908823, LR: 0.0003 +[2026-03-06 01:15:56] (step=0073104) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.303267462336137, LR: 0.0003 +[2026-03-06 01:16:04] (step=0073105) Train Loss: 0.4533, Train Steps/Sec: 0.13, Epoch: 14.303463118763451, LR: 0.0003 +[2026-03-06 01:16:12] (step=0073106) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.303658775190765, LR: 0.0003 +[2026-03-06 01:16:20] (step=0073107) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.30385443161808, LR: 0.0003 +[2026-03-06 01:16:28] (step=0073108) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 14.304050088045392, LR: 0.0003 +[2026-03-06 01:16:36] (step=0073109) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 14.304245744472706, LR: 0.0003 +[2026-03-06 01:16:43] (step=0073110) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 14.30444140090002, LR: 0.0003 +[2026-03-06 01:16:51] (step=0073111) Train Loss: 0.4573, Train Steps/Sec: 0.13, Epoch: 14.304637057327334, LR: 0.0003 +[2026-03-06 01:16:59] (step=0073112) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 14.304832713754648, LR: 0.0003 +[2026-03-06 01:17:07] (step=0073113) Train Loss: 0.4565, Train Steps/Sec: 0.13, Epoch: 14.30502837018196, LR: 0.0003 +[2026-03-06 01:17:15] (step=0073114) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 14.305224026609274, LR: 0.0003 +[2026-03-06 01:17:23] (step=0073115) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 14.305419683036588, LR: 0.0003 +[2026-03-06 01:17:31] (step=0073116) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.305615339463902, LR: 0.0003 +[2026-03-06 01:17:38] (step=0073117) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.305810995891216, LR: 0.0003 +[2026-03-06 01:17:46] (step=0073118) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 14.306006652318528, LR: 0.0003 +[2026-03-06 01:17:54] (step=0073119) Train Loss: 0.4526, Train Steps/Sec: 0.13, Epoch: 14.306202308745842, LR: 0.0003 +[2026-03-06 01:18:02] (step=0073120) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 14.306397965173156, LR: 0.0003 +[2026-03-06 01:18:10] (step=0073121) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 14.30659362160047, LR: 0.0003 +[2026-03-06 01:18:18] (step=0073122) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 14.306789278027782, LR: 0.0003 +[2026-03-06 01:18:25] (step=0073123) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.306984934455096, LR: 0.0003 +[2026-03-06 01:18:33] (step=0073124) Train Loss: 0.4266, Train Steps/Sec: 0.13, Epoch: 14.30718059088241, LR: 0.0003 +[2026-03-06 01:18:41] (step=0073125) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.307376247309724, LR: 0.0003 +[2026-03-06 01:18:49] (step=0073126) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 14.307571903737038, LR: 0.0003 +[2026-03-06 01:18:57] (step=0073127) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 14.30776756016435, LR: 0.0003 +[2026-03-06 01:19:05] (step=0073128) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.307963216591665, LR: 0.0003 +[2026-03-06 01:19:12] (step=0073129) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 14.308158873018979, LR: 0.0003 +[2026-03-06 01:19:20] (step=0073130) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 14.308354529446293, LR: 0.0003 +[2026-03-06 01:19:28] (step=0073131) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 14.308550185873607, LR: 0.0003 +[2026-03-06 01:19:36] (step=0073132) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 14.308745842300919, LR: 0.0003 +[2026-03-06 01:19:44] (step=0073133) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 14.308941498728233, LR: 0.0003 +[2026-03-06 01:19:52] (step=0073134) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 14.309137155155547, LR: 0.0003 +[2026-03-06 01:19:59] (step=0073135) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 14.30933281158286, LR: 0.0003 +[2026-03-06 01:20:07] (step=0073136) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 14.309528468010175, LR: 0.0003 +[2026-03-06 01:20:15] (step=0073137) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.309724124437487, LR: 0.0003 +[2026-03-06 01:20:23] (step=0073138) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 14.309919780864801, LR: 0.0003 +[2026-03-06 01:20:31] (step=0073139) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.310115437292115, LR: 0.0003 +[2026-03-06 01:20:39] (step=0073140) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.310311093719429, LR: 0.0003 +[2026-03-06 01:20:46] (step=0073141) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.310506750146743, LR: 0.0003 +[2026-03-06 01:20:54] (step=0073142) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.310702406574055, LR: 0.0003 +[2026-03-06 01:21:02] (step=0073143) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 14.31089806300137, LR: 0.0003 +[2026-03-06 01:21:10] (step=0073144) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 14.311093719428683, LR: 0.0003 +[2026-03-06 01:21:18] (step=0073145) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.311289375855997, LR: 0.0003 +[2026-03-06 01:21:26] (step=0073146) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 14.311485032283311, LR: 0.0003 +[2026-03-06 01:21:34] (step=0073147) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 14.311680688710624, LR: 0.0003 +[2026-03-06 01:21:42] (step=0073148) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 14.311876345137938, LR: 0.0003 +[2026-03-06 01:21:50] (step=0073149) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 14.312072001565252, LR: 0.0003 +[2026-03-06 01:21:57] (step=0073150) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 14.312267657992566, LR: 0.0003 +[2026-03-06 01:22:05] (step=0073151) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.312463314419878, LR: 0.0003 +[2026-03-06 01:22:13] (step=0073152) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.312658970847192, LR: 0.0003 +[2026-03-06 01:22:21] (step=0073153) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 14.312854627274506, LR: 0.0003 +[2026-03-06 01:22:29] (step=0073154) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 14.31305028370182, LR: 0.0003 +[2026-03-06 01:22:37] (step=0073155) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.313245940129134, LR: 0.0003 +[2026-03-06 01:22:44] (step=0073156) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.313441596556446, LR: 0.0003 +[2026-03-06 01:22:52] (step=0073157) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 14.31363725298376, LR: 0.0003 +[2026-03-06 01:23:00] (step=0073158) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 14.313832909411074, LR: 0.0003 +[2026-03-06 01:23:08] (step=0073159) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 14.314028565838388, LR: 0.0003 +[2026-03-06 01:23:16] (step=0073160) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 14.314224222265702, LR: 0.0003 +[2026-03-06 01:23:24] (step=0073161) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 14.314419878693014, LR: 0.0003 +[2026-03-06 01:23:31] (step=0073162) Train Loss: 0.4308, Train Steps/Sec: 0.13, Epoch: 14.314615535120328, LR: 0.0003 +[2026-03-06 01:23:39] (step=0073163) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 14.314811191547642, LR: 0.0003 +[2026-03-06 01:23:47] (step=0073164) Train Loss: 0.4358, Train Steps/Sec: 0.13, Epoch: 14.315006847974956, LR: 0.0003 +[2026-03-06 01:23:55] (step=0073165) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 14.31520250440227, LR: 0.0003 +[2026-03-06 01:24:03] (step=0073166) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.315398160829583, LR: 0.0003 +[2026-03-06 01:24:11] (step=0073167) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.315593817256897, LR: 0.0003 +[2026-03-06 01:24:19] (step=0073168) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 14.31578947368421, LR: 0.0003 +[2026-03-06 01:24:26] (step=0073169) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 14.315985130111525, LR: 0.0003 +[2026-03-06 01:24:34] (step=0073170) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 14.316180786538839, LR: 0.0003 +[2026-03-06 01:24:42] (step=0073171) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 14.31637644296615, LR: 0.0003 +[2026-03-06 01:24:50] (step=0073172) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 14.316572099393465, LR: 0.0003 +[2026-03-06 01:24:58] (step=0073173) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 14.316767755820779, LR: 0.0003 +[2026-03-06 01:25:06] (step=0073174) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 14.316963412248093, LR: 0.0003 +[2026-03-06 01:25:13] (step=0073175) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.317159068675405, LR: 0.0003 +[2026-03-06 01:25:21] (step=0073176) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 14.31735472510272, LR: 0.0003 +[2026-03-06 01:25:29] (step=0073177) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 14.317550381530033, LR: 0.0003 +[2026-03-06 01:25:37] (step=0073178) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 14.317746037957347, LR: 0.0003 +[2026-03-06 01:25:45] (step=0073179) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 14.317941694384661, LR: 0.0003 +[2026-03-06 01:25:53] (step=0073180) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 14.318137350811973, LR: 0.0003 +[2026-03-06 01:26:00] (step=0073181) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 14.318333007239287, LR: 0.0003 +[2026-03-06 01:26:08] (step=0073182) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.318528663666601, LR: 0.0003 +[2026-03-06 01:26:16] (step=0073183) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.318724320093915, LR: 0.0003 +[2026-03-06 01:26:24] (step=0073184) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 14.31891997652123, LR: 0.0003 +[2026-03-06 01:26:32] (step=0073185) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 14.319115632948542, LR: 0.0003 +[2026-03-06 01:26:40] (step=0073186) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 14.319311289375856, LR: 0.0003 +[2026-03-06 01:26:47] (step=0073187) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 14.31950694580317, LR: 0.0003 +[2026-03-06 01:26:55] (step=0073188) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.319702602230484, LR: 0.0003 +[2026-03-06 01:27:03] (step=0073189) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 14.319898258657798, LR: 0.0003 +[2026-03-06 01:27:11] (step=0073190) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.32009391508511, LR: 0.0003 +[2026-03-06 01:27:19] (step=0073191) Train Loss: 0.4294, Train Steps/Sec: 0.13, Epoch: 14.320289571512424, LR: 0.0003 +[2026-03-06 01:27:27] (step=0073192) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 14.320485227939738, LR: 0.0003 +[2026-03-06 01:27:34] (step=0073193) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 14.320680884367052, LR: 0.0003 +[2026-03-06 01:27:42] (step=0073194) Train Loss: 0.4314, Train Steps/Sec: 0.13, Epoch: 14.320876540794366, LR: 0.0003 +[2026-03-06 01:27:50] (step=0073195) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 14.321072197221678, LR: 0.0003 +[2026-03-06 01:27:58] (step=0073196) Train Loss: 0.4656, Train Steps/Sec: 0.13, Epoch: 14.321267853648992, LR: 0.0003 +[2026-03-06 01:28:06] (step=0073197) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 14.321463510076306, LR: 0.0003 +[2026-03-06 01:28:14] (step=0073198) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 14.32165916650362, LR: 0.0003 +[2026-03-06 01:28:22] (step=0073199) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.321854822930934, LR: 0.0003 +[2026-03-06 01:28:29] (step=0073200) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 14.322050479358246, LR: 0.0003 +[2026-03-06 01:28:37] (step=0073201) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 14.32224613578556, LR: 0.0003 +[2026-03-06 01:28:45] (step=0073202) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.322441792212874, LR: 0.0003 +[2026-03-06 01:28:53] (step=0073203) Train Loss: 0.4288, Train Steps/Sec: 0.13, Epoch: 14.322637448640188, LR: 0.0003 +[2026-03-06 01:29:01] (step=0073204) Train Loss: 0.4272, Train Steps/Sec: 0.13, Epoch: 14.3228331050675, LR: 0.0003 +[2026-03-06 01:29:09] (step=0073205) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.323028761494815, LR: 0.0003 +[2026-03-06 01:29:17] (step=0073206) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 14.323224417922129, LR: 0.0003 +[2026-03-06 01:29:25] (step=0073207) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 14.323420074349443, LR: 0.0003 +[2026-03-06 01:29:32] (step=0073208) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.323615730776757, LR: 0.0003 +[2026-03-06 01:29:40] (step=0073209) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 14.323811387204069, LR: 0.0003 +[2026-03-06 01:29:48] (step=0073210) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 14.324007043631383, LR: 0.0003 +[2026-03-06 01:29:56] (step=0073211) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 14.324202700058697, LR: 0.0003 +[2026-03-06 01:30:04] (step=0073212) Train Loss: 0.4367, Train Steps/Sec: 0.13, Epoch: 14.324398356486011, LR: 0.0003 +[2026-03-06 01:30:12] (step=0073213) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.324594012913325, LR: 0.0003 +[2026-03-06 01:30:19] (step=0073214) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 14.324789669340637, LR: 0.0003 +[2026-03-06 01:30:27] (step=0073215) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.324985325767951, LR: 0.0003 +[2026-03-06 01:30:35] (step=0073216) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 14.325180982195265, LR: 0.0003 +[2026-03-06 01:30:43] (step=0073217) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.32537663862258, LR: 0.0003 +[2026-03-06 01:30:51] (step=0073218) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 14.325572295049893, LR: 0.0003 +[2026-03-06 01:30:59] (step=0073219) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.325767951477205, LR: 0.0003 +[2026-03-06 01:31:07] (step=0073220) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 14.32596360790452, LR: 0.0003 +[2026-03-06 01:31:14] (step=0073221) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 14.326159264331833, LR: 0.0003 +[2026-03-06 01:31:22] (step=0073222) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 14.326354920759147, LR: 0.0003 +[2026-03-06 01:31:30] (step=0073223) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.326550577186461, LR: 0.0003 +[2026-03-06 01:31:38] (step=0073224) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.326746233613774, LR: 0.0003 +[2026-03-06 01:31:46] (step=0073225) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.326941890041088, LR: 0.0003 +[2026-03-06 01:31:54] (step=0073226) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 14.327137546468402, LR: 0.0003 +[2026-03-06 01:32:01] (step=0073227) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 14.327333202895716, LR: 0.0003 +[2026-03-06 01:32:09] (step=0073228) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.327528859323028, LR: 0.0003 +[2026-03-06 01:32:17] (step=0073229) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.327724515750342, LR: 0.0003 +[2026-03-06 01:32:25] (step=0073230) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 14.327920172177656, LR: 0.0003 +[2026-03-06 01:32:33] (step=0073231) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 14.32811582860497, LR: 0.0003 +[2026-03-06 01:32:41] (step=0073232) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 14.328311485032284, LR: 0.0003 +[2026-03-06 01:32:49] (step=0073233) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.328507141459596, LR: 0.0003 +[2026-03-06 01:32:56] (step=0073234) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 14.32870279788691, LR: 0.0003 +[2026-03-06 01:33:04] (step=0073235) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.328898454314224, LR: 0.0003 +[2026-03-06 01:33:12] (step=0073236) Train Loss: 0.4528, Train Steps/Sec: 0.13, Epoch: 14.329094110741538, LR: 0.0003 +[2026-03-06 01:33:20] (step=0073237) Train Loss: 0.4351, Train Steps/Sec: 0.13, Epoch: 14.329289767168852, LR: 0.0003 +[2026-03-06 01:33:28] (step=0073238) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.329485423596164, LR: 0.0003 +[2026-03-06 01:33:36] (step=0073239) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.329681080023478, LR: 0.0003 +[2026-03-06 01:33:44] (step=0073240) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 14.329876736450792, LR: 0.0003 +[2026-03-06 01:33:51] (step=0073241) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.330072392878106, LR: 0.0003 +[2026-03-06 01:33:59] (step=0073242) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.33026804930542, LR: 0.0003 +[2026-03-06 01:34:07] (step=0073243) Train Loss: 0.4500, Train Steps/Sec: 0.13, Epoch: 14.330463705732733, LR: 0.0003 +[2026-03-06 01:34:15] (step=0073244) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 14.330659362160047, LR: 0.0003 +[2026-03-06 01:34:23] (step=0073245) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 14.33085501858736, LR: 0.0003 +[2026-03-06 01:34:31] (step=0073246) Train Loss: 0.4344, Train Steps/Sec: 0.13, Epoch: 14.331050675014675, LR: 0.0003 +[2026-03-06 01:34:39] (step=0073247) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 14.331246331441989, LR: 0.0003 +[2026-03-06 01:34:46] (step=0073248) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 14.331441987869301, LR: 0.0003 +[2026-03-06 01:34:54] (step=0073249) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.331637644296615, LR: 0.0003 +[2026-03-06 01:35:02] (step=0073250) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 14.331833300723929, LR: 0.0003 +[2026-03-06 01:35:10] (step=0073251) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 14.332028957151243, LR: 0.0003 +[2026-03-06 01:35:18] (step=0073252) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 14.332224613578557, LR: 0.0003 +[2026-03-06 01:35:26] (step=0073253) Train Loss: 0.4235, Train Steps/Sec: 0.13, Epoch: 14.33242027000587, LR: 0.0003 +[2026-03-06 01:35:34] (step=0073254) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 14.332615926433183, LR: 0.0003 +[2026-03-06 01:35:42] (step=0073255) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 14.332811582860497, LR: 0.0003 +[2026-03-06 01:35:49] (step=0073256) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 14.333007239287811, LR: 0.0003 +[2026-03-06 01:35:57] (step=0073257) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.333202895715123, LR: 0.0003 +[2026-03-06 01:36:05] (step=0073258) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 14.333398552142437, LR: 0.0003 +[2026-03-06 01:36:13] (step=0073259) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.333594208569751, LR: 0.0003 +[2026-03-06 01:36:21] (step=0073260) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.333789864997065, LR: 0.0003 +[2026-03-06 01:36:29] (step=0073261) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 14.33398552142438, LR: 0.0003 +[2026-03-06 01:36:37] (step=0073262) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.334181177851692, LR: 0.0003 +[2026-03-06 01:36:44] (step=0073263) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.334376834279006, LR: 0.0003 +[2026-03-06 01:36:52] (step=0073264) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 14.33457249070632, LR: 0.0003 +[2026-03-06 01:37:00] (step=0073265) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 14.334768147133634, LR: 0.0003 +[2026-03-06 01:37:08] (step=0073266) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 14.334963803560948, LR: 0.0003 +[2026-03-06 01:37:16] (step=0073267) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 14.33515945998826, LR: 0.0003 +[2026-03-06 01:37:24] (step=0073268) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.335355116415574, LR: 0.0003 +[2026-03-06 01:37:31] (step=0073269) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 14.335550772842888, LR: 0.0003 +[2026-03-06 01:37:39] (step=0073270) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 14.335746429270202, LR: 0.0003 +[2026-03-06 01:37:47] (step=0073271) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 14.335942085697516, LR: 0.0003 +[2026-03-06 01:37:55] (step=0073272) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 14.336137742124828, LR: 0.0003 +[2026-03-06 01:38:03] (step=0073273) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 14.336333398552142, LR: 0.0003 +[2026-03-06 01:38:11] (step=0073274) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 14.336529054979456, LR: 0.0003 +[2026-03-06 01:38:18] (step=0073275) Train Loss: 0.4524, Train Steps/Sec: 0.13, Epoch: 14.33672471140677, LR: 0.0003 +[2026-03-06 01:38:26] (step=0073276) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 14.336920367834084, LR: 0.0003 +[2026-03-06 01:38:34] (step=0073277) Train Loss: 0.4510, Train Steps/Sec: 0.13, Epoch: 14.337116024261396, LR: 0.0003 +[2026-03-06 01:38:42] (step=0073278) Train Loss: 0.4360, Train Steps/Sec: 0.13, Epoch: 14.33731168068871, LR: 0.0003 +[2026-03-06 01:38:50] (step=0073279) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 14.337507337116024, LR: 0.0003 +[2026-03-06 01:38:58] (step=0073280) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.337702993543338, LR: 0.0003 +[2026-03-06 01:39:06] (step=0073281) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.33789864997065, LR: 0.0003 +[2026-03-06 01:39:13] (step=0073282) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 14.338094306397965, LR: 0.0003 +[2026-03-06 01:39:21] (step=0073283) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.338289962825279, LR: 0.0003 +[2026-03-06 01:39:29] (step=0073284) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.338485619252593, LR: 0.0003 +[2026-03-06 01:39:37] (step=0073285) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 14.338681275679907, LR: 0.0003 +[2026-03-06 01:39:45] (step=0073286) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 14.338876932107219, LR: 0.0003 +[2026-03-06 01:39:53] (step=0073287) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 14.339072588534533, LR: 0.0003 +[2026-03-06 01:40:00] (step=0073288) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 14.339268244961847, LR: 0.0003 +[2026-03-06 01:40:08] (step=0073289) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 14.339463901389161, LR: 0.0003 +[2026-03-06 01:40:16] (step=0073290) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 14.339659557816475, LR: 0.0003 +[2026-03-06 01:40:24] (step=0073291) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 14.339855214243787, LR: 0.0003 +[2026-03-06 01:40:32] (step=0073292) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.340050870671101, LR: 0.0003 +[2026-03-06 01:40:40] (step=0073293) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 14.340246527098415, LR: 0.0003 +[2026-03-06 01:40:47] (step=0073294) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 14.34044218352573, LR: 0.0003 +[2026-03-06 01:40:55] (step=0073295) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 14.340637839953043, LR: 0.0003 +[2026-03-06 01:41:03] (step=0073296) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 14.340833496380355, LR: 0.0003 +[2026-03-06 01:41:11] (step=0073297) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 14.34102915280767, LR: 0.0003 +[2026-03-06 01:41:19] (step=0073298) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 14.341224809234983, LR: 0.0003 +[2026-03-06 01:41:27] (step=0073299) Train Loss: 0.4412, Train Steps/Sec: 0.12, Epoch: 14.341420465662297, LR: 0.0003 +[2026-03-06 01:41:35] (step=0073300) Train Loss: 0.4486, Train Steps/Sec: 0.13, Epoch: 14.341616122089611, LR: 0.0003 +[2026-03-06 01:41:43] (step=0073301) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 14.341811778516924, LR: 0.0003 +[2026-03-06 01:41:51] (step=0073302) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 14.342007434944238, LR: 0.0003 +[2026-03-06 01:41:58] (step=0073303) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 14.342203091371552, LR: 0.0003 +[2026-03-06 01:42:06] (step=0073304) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.342398747798866, LR: 0.0003 +[2026-03-06 01:42:14] (step=0073305) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 14.34259440422618, LR: 0.0003 +[2026-03-06 01:42:22] (step=0073306) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.342790060653492, LR: 0.0003 +[2026-03-06 01:42:30] (step=0073307) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 14.342985717080806, LR: 0.0003 +[2026-03-06 01:42:38] (step=0073308) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 14.34318137350812, LR: 0.0003 +[2026-03-06 01:42:45] (step=0073309) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.343377029935434, LR: 0.0003 +[2026-03-06 01:42:53] (step=0073310) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 14.343572686362746, LR: 0.0003 +[2026-03-06 01:43:01] (step=0073311) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.34376834279006, LR: 0.0003 +[2026-03-06 01:43:09] (step=0073312) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 14.343963999217374, LR: 0.0003 +[2026-03-06 01:43:17] (step=0073313) Train Loss: 0.4363, Train Steps/Sec: 0.13, Epoch: 14.344159655644688, LR: 0.0003 +[2026-03-06 01:43:25] (step=0073314) Train Loss: 0.4238, Train Steps/Sec: 0.13, Epoch: 14.344355312072002, LR: 0.0003 +[2026-03-06 01:43:32] (step=0073315) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 14.344550968499314, LR: 0.0003 +[2026-03-06 01:43:40] (step=0073316) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 14.344746624926628, LR: 0.0003 +[2026-03-06 01:43:48] (step=0073317) Train Loss: 0.4334, Train Steps/Sec: 0.13, Epoch: 14.344942281353942, LR: 0.0003 +[2026-03-06 01:43:56] (step=0073318) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 14.345137937781256, LR: 0.0003 +[2026-03-06 01:44:04] (step=0073319) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 14.34533359420857, LR: 0.0003 +[2026-03-06 01:44:12] (step=0073320) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 14.345529250635883, LR: 0.0003 +[2026-03-06 01:44:20] (step=0073321) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 14.345724907063197, LR: 0.0003 +[2026-03-06 01:44:27] (step=0073322) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 14.34592056349051, LR: 0.0003 +[2026-03-06 01:44:35] (step=0073323) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 14.346116219917825, LR: 0.0003 +[2026-03-06 01:44:43] (step=0073324) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 14.346311876345139, LR: 0.0003 +[2026-03-06 01:44:51] (step=0073325) Train Loss: 0.4536, Train Steps/Sec: 0.13, Epoch: 14.346507532772451, LR: 0.0003 +[2026-03-06 01:44:59] (step=0073326) Train Loss: 0.4681, Train Steps/Sec: 0.13, Epoch: 14.346703189199765, LR: 0.0003 +[2026-03-06 01:45:07] (step=0073327) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 14.346898845627079, LR: 0.0003 +[2026-03-06 01:45:14] (step=0073328) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.347094502054393, LR: 0.0003 +[2026-03-06 01:45:22] (step=0073329) Train Loss: 0.4290, Train Steps/Sec: 0.13, Epoch: 14.347290158481707, LR: 0.0003 +[2026-03-06 01:45:30] (step=0073330) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 14.34748581490902, LR: 0.0003 +[2026-03-06 01:45:38] (step=0073331) Train Loss: 0.4558, Train Steps/Sec: 0.13, Epoch: 14.347681471336333, LR: 0.0003 +[2026-03-06 01:45:46] (step=0073332) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.347877127763647, LR: 0.0003 +[2026-03-06 01:45:54] (step=0073333) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 14.348072784190961, LR: 0.0003 +[2026-03-06 01:46:01] (step=0073334) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 14.348268440618273, LR: 0.0003 +[2026-03-06 01:46:09] (step=0073335) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 14.348464097045587, LR: 0.0003 +[2026-03-06 01:46:17] (step=0073336) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 14.348659753472901, LR: 0.0003 +[2026-03-06 01:46:25] (step=0073337) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 14.348855409900215, LR: 0.0003 +[2026-03-06 01:46:33] (step=0073338) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.34905106632753, LR: 0.0003 +[2026-03-06 01:46:41] (step=0073339) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 14.349246722754842, LR: 0.0003 +[2026-03-06 01:46:49] (step=0073340) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 14.349442379182156, LR: 0.0003 +[2026-03-06 01:46:56] (step=0073341) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 14.34963803560947, LR: 0.0003 +[2026-03-06 01:47:04] (step=0073342) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.349833692036784, LR: 0.0003 +[2026-03-06 01:47:12] (step=0073343) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 14.350029348464098, LR: 0.0003 +[2026-03-06 01:47:20] (step=0073344) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.35022500489141, LR: 0.0003 +[2026-03-06 01:47:28] (step=0073345) Train Loss: 0.4370, Train Steps/Sec: 0.13, Epoch: 14.350420661318724, LR: 0.0003 +[2026-03-06 01:47:36] (step=0073346) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 14.350616317746038, LR: 0.0003 +[2026-03-06 01:47:43] (step=0073347) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 14.350811974173352, LR: 0.0003 +[2026-03-06 01:47:51] (step=0073348) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 14.351007630600666, LR: 0.0003 +[2026-03-06 01:47:59] (step=0073349) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 14.351203287027978, LR: 0.0003 +[2026-03-06 01:48:07] (step=0073350) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 14.351398943455292, LR: 0.0003 +[2026-03-06 01:48:15] (step=0073351) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 14.351594599882606, LR: 0.0003 +[2026-03-06 01:48:23] (step=0073352) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.35179025630992, LR: 0.0003 +[2026-03-06 01:48:31] (step=0073353) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.351985912737234, LR: 0.0003 +[2026-03-06 01:48:39] (step=0073354) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.352181569164546, LR: 0.0003 +[2026-03-06 01:48:46] (step=0073355) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 14.35237722559186, LR: 0.0003 +[2026-03-06 01:48:54] (step=0073356) Train Loss: 0.4535, Train Steps/Sec: 0.13, Epoch: 14.352572882019174, LR: 0.0003 +[2026-03-06 01:49:02] (step=0073357) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 14.352768538446488, LR: 0.0003 +[2026-03-06 01:49:10] (step=0073358) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 14.352964194873802, LR: 0.0003 +[2026-03-06 01:49:18] (step=0073359) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 14.353159851301115, LR: 0.0003 +[2026-03-06 01:49:26] (step=0073360) Train Loss: 0.4585, Train Steps/Sec: 0.13, Epoch: 14.353355507728429, LR: 0.0003 +[2026-03-06 01:49:33] (step=0073361) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.353551164155743, LR: 0.0003 +[2026-03-06 01:49:41] (step=0073362) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 14.353746820583057, LR: 0.0003 +[2026-03-06 01:49:49] (step=0073363) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.353942477010369, LR: 0.0003 +[2026-03-06 01:49:57] (step=0073364) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.354138133437683, LR: 0.0003 +[2026-03-06 01:50:05] (step=0073365) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 14.354333789864997, LR: 0.0003 +[2026-03-06 01:50:13] (step=0073366) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 14.354529446292311, LR: 0.0003 +[2026-03-06 01:50:21] (step=0073367) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.354725102719625, LR: 0.0003 +[2026-03-06 01:50:28] (step=0073368) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.354920759146937, LR: 0.0003 +[2026-03-06 01:50:36] (step=0073369) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.355116415574251, LR: 0.0003 +[2026-03-06 01:50:44] (step=0073370) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 14.355312072001565, LR: 0.0003 +[2026-03-06 01:50:52] (step=0073371) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 14.35550772842888, LR: 0.0003 +[2026-03-06 01:51:00] (step=0073372) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.355703384856193, LR: 0.0003 +[2026-03-06 01:51:08] (step=0073373) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.355899041283505, LR: 0.0003 +[2026-03-06 01:51:15] (step=0073374) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 14.35609469771082, LR: 0.0003 +[2026-03-06 01:51:23] (step=0073375) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 14.356290354138133, LR: 0.0003 +[2026-03-06 01:51:31] (step=0073376) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.356486010565447, LR: 0.0003 +[2026-03-06 01:51:39] (step=0073377) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 14.356681666992761, LR: 0.0003 +[2026-03-06 01:51:47] (step=0073378) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 14.356877323420074, LR: 0.0003 +[2026-03-06 01:51:55] (step=0073379) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 14.357072979847388, LR: 0.0003 +[2026-03-06 01:52:02] (step=0073380) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 14.357268636274702, LR: 0.0003 +[2026-03-06 01:52:10] (step=0073381) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.357464292702016, LR: 0.0003 +[2026-03-06 01:52:18] (step=0073382) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 14.35765994912933, LR: 0.0003 +[2026-03-06 01:52:26] (step=0073383) Train Loss: 0.4604, Train Steps/Sec: 0.13, Epoch: 14.357855605556642, LR: 0.0003 +[2026-03-06 01:52:34] (step=0073384) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.358051261983956, LR: 0.0003 +[2026-03-06 01:52:42] (step=0073385) Train Loss: 0.4296, Train Steps/Sec: 0.13, Epoch: 14.35824691841127, LR: 0.0003 +[2026-03-06 01:52:50] (step=0073386) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.358442574838584, LR: 0.0003 +[2026-03-06 01:52:57] (step=0073387) Train Loss: 0.4578, Train Steps/Sec: 0.13, Epoch: 14.358638231265896, LR: 0.0003 +[2026-03-06 01:53:05] (step=0073388) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 14.35883388769321, LR: 0.0003 +[2026-03-06 01:53:13] (step=0073389) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 14.359029544120524, LR: 0.0003 +[2026-03-06 01:53:21] (step=0073390) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.359225200547838, LR: 0.0003 +[2026-03-06 01:53:29] (step=0073391) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 14.359420856975152, LR: 0.0003 +[2026-03-06 01:53:37] (step=0073392) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.359616513402464, LR: 0.0003 +[2026-03-06 01:53:44] (step=0073393) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 14.359812169829778, LR: 0.0003 +[2026-03-06 01:53:52] (step=0073394) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 14.360007826257092, LR: 0.0003 +[2026-03-06 01:54:00] (step=0073395) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 14.360203482684406, LR: 0.0003 +[2026-03-06 01:54:08] (step=0073396) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.36039913911172, LR: 0.0003 +[2026-03-06 01:54:16] (step=0073397) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 14.360594795539033, LR: 0.0003 +[2026-03-06 01:54:24] (step=0073398) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.360790451966347, LR: 0.0003 +[2026-03-06 01:54:32] (step=0073399) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.36098610839366, LR: 0.0003 +[2026-03-06 01:54:39] (step=0073400) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.361181764820975, LR: 0.0003 +[2026-03-06 01:54:47] (step=0073401) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 14.361377421248289, LR: 0.0003 +[2026-03-06 01:54:55] (step=0073402) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 14.361573077675601, LR: 0.0003 +[2026-03-06 01:55:03] (step=0073403) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 14.361768734102915, LR: 0.0003 +[2026-03-06 01:55:11] (step=0073404) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 14.361964390530229, LR: 0.0003 +[2026-03-06 01:55:19] (step=0073405) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.362160046957543, LR: 0.0003 +[2026-03-06 01:55:27] (step=0073406) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.362355703384857, LR: 0.0003 +[2026-03-06 01:55:34] (step=0073407) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.36255135981217, LR: 0.0003 +[2026-03-06 01:55:42] (step=0073408) Train Loss: 0.4485, Train Steps/Sec: 0.13, Epoch: 14.362747016239483, LR: 0.0003 +[2026-03-06 01:55:50] (step=0073409) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 14.362942672666797, LR: 0.0003 +[2026-03-06 01:55:58] (step=0073410) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 14.363138329094111, LR: 0.0003 +[2026-03-06 01:56:06] (step=0073411) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 14.363333985521425, LR: 0.0003 +[2026-03-06 01:56:14] (step=0073412) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 14.363529641948737, LR: 0.0003 +[2026-03-06 01:56:21] (step=0073413) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.363725298376051, LR: 0.0003 +[2026-03-06 01:56:29] (step=0073414) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 14.363920954803366, LR: 0.0003 +[2026-03-06 01:56:37] (step=0073415) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.36411661123068, LR: 0.0003 +[2026-03-06 01:56:45] (step=0073416) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 14.364312267657992, LR: 0.0003 +[2026-03-06 01:56:53] (step=0073417) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.364507924085306, LR: 0.0003 +[2026-03-06 01:57:01] (step=0073418) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 14.36470358051262, LR: 0.0003 +[2026-03-06 01:57:08] (step=0073419) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 14.364899236939934, LR: 0.0003 +[2026-03-06 01:57:16] (step=0073420) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 14.365094893367248, LR: 0.0003 +[2026-03-06 01:57:24] (step=0073421) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 14.36529054979456, LR: 0.0003 +[2026-03-06 01:57:32] (step=0073422) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 14.365486206221874, LR: 0.0003 +[2026-03-06 01:57:40] (step=0073423) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 14.365681862649188, LR: 0.0003 +[2026-03-06 01:57:48] (step=0073424) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.365877519076502, LR: 0.0003 +[2026-03-06 01:57:56] (step=0073425) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 14.366073175503816, LR: 0.0003 +[2026-03-06 01:58:03] (step=0073426) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 14.366268831931128, LR: 0.0003 +[2026-03-06 01:58:11] (step=0073427) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 14.366464488358442, LR: 0.0003 +[2026-03-06 01:58:19] (step=0073428) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.366660144785756, LR: 0.0003 +[2026-03-06 01:58:27] (step=0073429) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 14.36685580121307, LR: 0.0003 +[2026-03-06 01:58:35] (step=0073430) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.367051457640384, LR: 0.0003 +[2026-03-06 01:58:43] (step=0073431) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 14.367247114067697, LR: 0.0003 +[2026-03-06 01:58:50] (step=0073432) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.36744277049501, LR: 0.0003 +[2026-03-06 01:58:58] (step=0073433) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 14.367638426922325, LR: 0.0003 +[2026-03-06 01:59:06] (step=0073434) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 14.367834083349639, LR: 0.0003 +[2026-03-06 01:59:14] (step=0073435) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 14.368029739776953, LR: 0.0003 +[2026-03-06 01:59:22] (step=0073436) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.368225396204265, LR: 0.0003 +[2026-03-06 01:59:30] (step=0073437) Train Loss: 0.4579, Train Steps/Sec: 0.13, Epoch: 14.368421052631579, LR: 0.0003 +[2026-03-06 01:59:38] (step=0073438) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 14.368616709058893, LR: 0.0003 +[2026-03-06 01:59:45] (step=0073439) Train Loss: 0.4301, Train Steps/Sec: 0.13, Epoch: 14.368812365486207, LR: 0.0003 +[2026-03-06 01:59:53] (step=0073440) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 14.369008021913519, LR: 0.0003 +[2026-03-06 02:00:01] (step=0073441) Train Loss: 0.4609, Train Steps/Sec: 0.13, Epoch: 14.369203678340833, LR: 0.0003 +[2026-03-06 02:00:09] (step=0073442) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.369399334768147, LR: 0.0003 +[2026-03-06 02:00:17] (step=0073443) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.369594991195461, LR: 0.0003 +[2026-03-06 02:00:25] (step=0073444) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 14.369790647622775, LR: 0.0003 +[2026-03-06 02:00:32] (step=0073445) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.369986304050087, LR: 0.0003 +[2026-03-06 02:00:40] (step=0073446) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 14.370181960477401, LR: 0.0003 +[2026-03-06 02:00:48] (step=0073447) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 14.370377616904715, LR: 0.0003 +[2026-03-06 02:00:56] (step=0073448) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 14.37057327333203, LR: 0.0003 +[2026-03-06 02:01:04] (step=0073449) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 14.370768929759343, LR: 0.0003 +[2026-03-06 02:01:12] (step=0073450) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.370964586186656, LR: 0.0003 +[2026-03-06 02:01:20] (step=0073451) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 14.37116024261397, LR: 0.0003 +[2026-03-06 02:01:28] (step=0073452) Train Loss: 0.4357, Train Steps/Sec: 0.13, Epoch: 14.371355899041284, LR: 0.0003 +[2026-03-06 02:01:35] (step=0073453) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 14.371551555468598, LR: 0.0003 +[2026-03-06 02:01:43] (step=0073454) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 14.371747211895912, LR: 0.0003 +[2026-03-06 02:01:51] (step=0073455) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 14.371942868323224, LR: 0.0003 +[2026-03-06 02:01:59] (step=0073456) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.372138524750538, LR: 0.0003 +[2026-03-06 02:02:07] (step=0073457) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 14.372334181177852, LR: 0.0003 +[2026-03-06 02:02:15] (step=0073458) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.372529837605166, LR: 0.0003 +[2026-03-06 02:02:22] (step=0073459) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 14.37272549403248, LR: 0.0003 +[2026-03-06 02:02:30] (step=0073460) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 14.372921150459792, LR: 0.0003 +[2026-03-06 02:02:38] (step=0073461) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.373116806887106, LR: 0.0003 +[2026-03-06 02:02:46] (step=0073462) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 14.37331246331442, LR: 0.0003 +[2026-03-06 02:02:54] (step=0073463) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.373508119741734, LR: 0.0003 +[2026-03-06 02:03:02] (step=0073464) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 14.373703776169048, LR: 0.0003 +[2026-03-06 02:03:09] (step=0073465) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 14.37389943259636, LR: 0.0003 +[2026-03-06 02:03:17] (step=0073466) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 14.374095089023674, LR: 0.0003 +[2026-03-06 02:03:25] (step=0073467) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 14.374290745450988, LR: 0.0003 +[2026-03-06 02:03:33] (step=0073468) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 14.374486401878302, LR: 0.0003 +[2026-03-06 02:03:41] (step=0073469) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 14.374682058305615, LR: 0.0003 +[2026-03-06 02:03:49] (step=0073470) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.374877714732929, LR: 0.0003 +[2026-03-06 02:03:57] (step=0073471) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 14.375073371160243, LR: 0.0003 +[2026-03-06 02:04:04] (step=0073472) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 14.375269027587557, LR: 0.0003 +[2026-03-06 02:04:12] (step=0073473) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.37546468401487, LR: 0.0003 +[2026-03-06 02:04:20] (step=0073474) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 14.375660340442183, LR: 0.0003 +[2026-03-06 02:04:28] (step=0073475) Train Loss: 0.4540, Train Steps/Sec: 0.13, Epoch: 14.375855996869497, LR: 0.0003 +[2026-03-06 02:04:36] (step=0073476) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.37605165329681, LR: 0.0003 +[2026-03-06 02:04:44] (step=0073477) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 14.376247309724125, LR: 0.0003 +[2026-03-06 02:04:51] (step=0073478) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 14.376442966151439, LR: 0.0003 +[2026-03-06 02:04:59] (step=0073479) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 14.376638622578751, LR: 0.0003 +[2026-03-06 02:05:07] (step=0073480) Train Loss: 0.4612, Train Steps/Sec: 0.13, Epoch: 14.376834279006065, LR: 0.0003 +[2026-03-06 02:05:15] (step=0073481) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 14.377029935433379, LR: 0.0003 +[2026-03-06 02:05:23] (step=0073482) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 14.377225591860693, LR: 0.0003 +[2026-03-06 02:05:31] (step=0073483) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 14.377421248288007, LR: 0.0003 +[2026-03-06 02:05:38] (step=0073484) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 14.37761690471532, LR: 0.0003 +[2026-03-06 02:05:46] (step=0073485) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.377812561142633, LR: 0.0003 +[2026-03-06 02:05:54] (step=0073486) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.378008217569947, LR: 0.0003 +[2026-03-06 02:06:02] (step=0073487) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 14.378203873997261, LR: 0.0003 +[2026-03-06 02:06:10] (step=0073488) Train Loss: 0.4593, Train Steps/Sec: 0.13, Epoch: 14.378399530424575, LR: 0.0003 +[2026-03-06 02:06:18] (step=0073489) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 14.378595186851888, LR: 0.0003 +[2026-03-06 02:06:25] (step=0073490) Train Loss: 0.4509, Train Steps/Sec: 0.13, Epoch: 14.378790843279202, LR: 0.0003 +[2026-03-06 02:06:33] (step=0073491) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.378986499706516, LR: 0.0003 +[2026-03-06 02:06:41] (step=0073492) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.37918215613383, LR: 0.0003 +[2026-03-06 02:06:49] (step=0073493) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 14.379377812561142, LR: 0.0003 +[2026-03-06 02:06:57] (step=0073494) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.379573468988456, LR: 0.0003 +[2026-03-06 02:07:05] (step=0073495) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.37976912541577, LR: 0.0003 +[2026-03-06 02:07:12] (step=0073496) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 14.379964781843084, LR: 0.0003 +[2026-03-06 02:07:20] (step=0073497) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.380160438270398, LR: 0.0003 +[2026-03-06 02:07:28] (step=0073498) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 14.38035609469771, LR: 0.0003 +[2026-03-06 02:07:36] (step=0073499) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.380551751125024, LR: 0.0003 +[2026-03-06 02:07:44] (step=0073500) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.380747407552338, LR: 0.0003 +[2026-03-06 02:07:44] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0073500/ +[2026-03-06 02:07:52] (step=0073501) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 14.380943063979652, LR: 0.0003 +[2026-03-06 02:08:00] (step=0073502) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 14.381138720406966, LR: 0.0003 +[2026-03-06 02:08:07] (step=0073503) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.381334376834278, LR: 0.0003 +[2026-03-06 02:08:15] (step=0073504) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 14.381530033261592, LR: 0.0003 +[2026-03-06 02:08:23] (step=0073505) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 14.381725689688906, LR: 0.0003 +[2026-03-06 02:08:31] (step=0073506) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 14.38192134611622, LR: 0.0003 +[2026-03-06 02:08:39] (step=0073507) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 14.382117002543534, LR: 0.0003 +[2026-03-06 02:08:47] (step=0073508) Train Loss: 0.4303, Train Steps/Sec: 0.13, Epoch: 14.382312658970847, LR: 0.0003 +[2026-03-06 02:08:55] (step=0073509) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 14.38250831539816, LR: 0.0003 +[2026-03-06 02:09:02] (step=0073510) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 14.382703971825475, LR: 0.0003 +[2026-03-06 02:09:10] (step=0073511) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.382899628252789, LR: 0.0003 +[2026-03-06 02:09:18] (step=0073512) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 14.383095284680103, LR: 0.0003 +[2026-03-06 02:09:26] (step=0073513) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 14.383290941107415, LR: 0.0003 +[2026-03-06 02:09:34] (step=0073514) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 14.383486597534729, LR: 0.0003 +[2026-03-06 02:09:42] (step=0073515) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 14.383682253962043, LR: 0.0003 +[2026-03-06 02:09:49] (step=0073516) Train Loss: 0.4292, Train Steps/Sec: 0.13, Epoch: 14.383877910389357, LR: 0.0003 +[2026-03-06 02:09:57] (step=0073517) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.384073566816669, LR: 0.0003 +[2026-03-06 02:10:05] (step=0073518) Train Loss: 0.4548, Train Steps/Sec: 0.13, Epoch: 14.384269223243983, LR: 0.0003 +[2026-03-06 02:10:13] (step=0073519) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 14.384464879671297, LR: 0.0003 +[2026-03-06 02:10:21] (step=0073520) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.384660536098611, LR: 0.0003 +[2026-03-06 02:10:29] (step=0073521) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 14.384856192525925, LR: 0.0003 +[2026-03-06 02:10:37] (step=0073522) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.385051848953237, LR: 0.0003 +[2026-03-06 02:10:44] (step=0073523) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 14.385247505380551, LR: 0.0003 +[2026-03-06 02:10:52] (step=0073524) Train Loss: 0.4283, Train Steps/Sec: 0.13, Epoch: 14.385443161807865, LR: 0.0003 +[2026-03-06 02:11:00] (step=0073525) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.38563881823518, LR: 0.0003 +[2026-03-06 02:11:08] (step=0073526) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.385834474662493, LR: 0.0003 +[2026-03-06 02:11:16] (step=0073527) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.386030131089806, LR: 0.0003 +[2026-03-06 02:11:24] (step=0073528) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 14.38622578751712, LR: 0.0003 +[2026-03-06 02:11:31] (step=0073529) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.386421443944434, LR: 0.0003 +[2026-03-06 02:11:39] (step=0073530) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.386617100371748, LR: 0.0003 +[2026-03-06 02:11:47] (step=0073531) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 14.386812756799062, LR: 0.0003 +[2026-03-06 02:11:55] (step=0073532) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 14.387008413226374, LR: 0.0003 +[2026-03-06 02:12:03] (step=0073533) Train Loss: 0.4583, Train Steps/Sec: 0.13, Epoch: 14.387204069653688, LR: 0.0003 +[2026-03-06 02:12:11] (step=0073534) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 14.387399726081002, LR: 0.0003 +[2026-03-06 02:12:18] (step=0073535) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 14.387595382508316, LR: 0.0003 +[2026-03-06 02:12:26] (step=0073536) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 14.38779103893563, LR: 0.0003 +[2026-03-06 02:12:34] (step=0073537) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.387986695362942, LR: 0.0003 +[2026-03-06 02:12:42] (step=0073538) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 14.388182351790256, LR: 0.0003 +[2026-03-06 02:12:50] (step=0073539) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 14.38837800821757, LR: 0.0003 +[2026-03-06 02:12:58] (step=0073540) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.388573664644884, LR: 0.0003 +[2026-03-06 02:13:05] (step=0073541) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 14.388769321072198, LR: 0.0003 +[2026-03-06 02:13:13] (step=0073542) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.38896497749951, LR: 0.0003 +[2026-03-06 02:13:21] (step=0073543) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.389160633926824, LR: 0.0003 +[2026-03-06 02:13:29] (step=0073544) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 14.389356290354138, LR: 0.0003 +[2026-03-06 02:13:37] (step=0073545) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 14.389551946781452, LR: 0.0003 +[2026-03-06 02:13:45] (step=0073546) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 14.389747603208765, LR: 0.0003 +[2026-03-06 02:13:52] (step=0073547) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.389943259636079, LR: 0.0003 +[2026-03-06 02:14:00] (step=0073548) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 14.390138916063393, LR: 0.0003 +[2026-03-06 02:14:08] (step=0073549) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 14.390334572490707, LR: 0.0003 +[2026-03-06 02:14:16] (step=0073550) Train Loss: 0.4469, Train Steps/Sec: 0.13, Epoch: 14.39053022891802, LR: 0.0003 +[2026-03-06 02:14:24] (step=0073551) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 14.390725885345333, LR: 0.0003 +[2026-03-06 02:14:32] (step=0073552) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 14.390921541772647, LR: 0.0003 +[2026-03-06 02:14:40] (step=0073553) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 14.39111719819996, LR: 0.0003 +[2026-03-06 02:14:48] (step=0073554) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.391312854627275, LR: 0.0003 +[2026-03-06 02:14:55] (step=0073555) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 14.391508511054589, LR: 0.0003 +[2026-03-06 02:15:03] (step=0073556) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 14.391704167481901, LR: 0.0003 +[2026-03-06 02:15:11] (step=0073557) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 14.391899823909215, LR: 0.0003 +[2026-03-06 02:15:19] (step=0073558) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 14.392095480336529, LR: 0.0003 +[2026-03-06 02:15:27] (step=0073559) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 14.392291136763843, LR: 0.0003 +[2026-03-06 02:15:35] (step=0073560) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.392486793191157, LR: 0.0003 +[2026-03-06 02:15:42] (step=0073561) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 14.39268244961847, LR: 0.0003 +[2026-03-06 02:15:50] (step=0073562) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 14.392878106045783, LR: 0.0003 +[2026-03-06 02:15:58] (step=0073563) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 14.393073762473097, LR: 0.0003 +[2026-03-06 02:16:06] (step=0073564) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.393269418900411, LR: 0.0003 +[2026-03-06 02:16:14] (step=0073565) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 14.393465075327725, LR: 0.0003 +[2026-03-06 02:16:22] (step=0073566) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.393660731755038, LR: 0.0003 +[2026-03-06 02:16:29] (step=0073567) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 14.393856388182352, LR: 0.0003 +[2026-03-06 02:16:37] (step=0073568) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 14.394052044609666, LR: 0.0003 +[2026-03-06 02:16:45] (step=0073569) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 14.39424770103698, LR: 0.0003 +[2026-03-06 02:16:53] (step=0073570) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 14.394443357464292, LR: 0.0003 +[2026-03-06 02:17:01] (step=0073571) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.394639013891606, LR: 0.0003 +[2026-03-06 02:17:09] (step=0073572) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 14.39483467031892, LR: 0.0003 +[2026-03-06 02:17:16] (step=0073573) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 14.395030326746234, LR: 0.0003 +[2026-03-06 02:17:24] (step=0073574) Train Loss: 0.4520, Train Steps/Sec: 0.13, Epoch: 14.395225983173548, LR: 0.0003 +[2026-03-06 02:17:32] (step=0073575) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.39542163960086, LR: 0.0003 +[2026-03-06 02:17:40] (step=0073576) Train Loss: 0.4332, Train Steps/Sec: 0.13, Epoch: 14.395617296028174, LR: 0.0003 +[2026-03-06 02:17:48] (step=0073577) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 14.395812952455488, LR: 0.0003 +[2026-03-06 02:17:56] (step=0073578) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 14.396008608882802, LR: 0.0003 +[2026-03-06 02:18:04] (step=0073579) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 14.396204265310116, LR: 0.0003 +[2026-03-06 02:18:11] (step=0073580) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 14.396399921737428, LR: 0.0003 +[2026-03-06 02:18:19] (step=0073581) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 14.396595578164742, LR: 0.0003 +[2026-03-06 02:18:27] (step=0073582) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.396791234592056, LR: 0.0003 +[2026-03-06 02:18:35] (step=0073583) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 14.39698689101937, LR: 0.0003 +[2026-03-06 02:18:43] (step=0073584) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 14.397182547446684, LR: 0.0003 +[2026-03-06 02:18:51] (step=0073585) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 14.397378203873997, LR: 0.0003 +[2026-03-06 02:18:59] (step=0073586) Train Loss: 0.4497, Train Steps/Sec: 0.13, Epoch: 14.39757386030131, LR: 0.0003 +[2026-03-06 02:19:06] (step=0073587) Train Loss: 0.4495, Train Steps/Sec: 0.13, Epoch: 14.397769516728625, LR: 0.0003 +[2026-03-06 02:19:14] (step=0073588) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 14.397965173155939, LR: 0.0003 +[2026-03-06 02:19:22] (step=0073589) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.398160829583253, LR: 0.0003 +[2026-03-06 02:19:30] (step=0073590) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 14.398356486010565, LR: 0.0003 +[2026-03-06 02:19:38] (step=0073591) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 14.398552142437879, LR: 0.0003 +[2026-03-06 02:19:46] (step=0073592) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 14.398747798865193, LR: 0.0003 +[2026-03-06 02:19:53] (step=0073593) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 14.398943455292507, LR: 0.0003 +[2026-03-06 02:20:01] (step=0073594) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.39913911171982, LR: 0.0003 +[2026-03-06 02:20:09] (step=0073595) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 14.399334768147133, LR: 0.0003 +[2026-03-06 02:20:17] (step=0073596) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 14.399530424574447, LR: 0.0003 +[2026-03-06 02:20:25] (step=0073597) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 14.399726081001761, LR: 0.0003 +[2026-03-06 02:20:32] (step=0073598) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 14.399921737429075, LR: 0.0003 +[2026-03-06 02:20:40] (step=0073599) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.400117393856387, LR: 0.0003 +[2026-03-06 02:20:48] (step=0073600) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.400313050283701, LR: 0.0003 +[2026-03-06 02:20:56] (step=0073601) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 14.400508706711015, LR: 0.0003 +[2026-03-06 02:21:04] (step=0073602) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.40070436313833, LR: 0.0003 +[2026-03-06 02:21:12] (step=0073603) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 14.400900019565643, LR: 0.0003 +[2026-03-06 02:21:20] (step=0073604) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 14.401095675992956, LR: 0.0003 +[2026-03-06 02:21:28] (step=0073605) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 14.40129133242027, LR: 0.0003 +[2026-03-06 02:21:35] (step=0073606) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 14.401486988847584, LR: 0.0003 +[2026-03-06 02:21:43] (step=0073607) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 14.401682645274898, LR: 0.0003 +[2026-03-06 02:21:51] (step=0073608) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 14.401878301702212, LR: 0.0003 +[2026-03-06 02:21:59] (step=0073609) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.402073958129524, LR: 0.0003 +[2026-03-06 02:22:07] (step=0073610) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 14.402269614556838, LR: 0.0003 +[2026-03-06 02:22:14] (step=0073611) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 14.402465270984152, LR: 0.0003 +[2026-03-06 02:22:22] (step=0073612) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 14.402660927411466, LR: 0.0003 +[2026-03-06 02:22:30] (step=0073613) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.40285658383878, LR: 0.0003 +[2026-03-06 02:22:38] (step=0073614) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.403052240266092, LR: 0.0003 +[2026-03-06 02:22:46] (step=0073615) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 14.403247896693406, LR: 0.0003 +[2026-03-06 02:22:54] (step=0073616) Train Loss: 0.4419, Train Steps/Sec: 0.13, Epoch: 14.40344355312072, LR: 0.0003 +[2026-03-06 02:23:01] (step=0073617) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.403639209548034, LR: 0.0003 +[2026-03-06 02:23:09] (step=0073618) Train Loss: 0.4568, Train Steps/Sec: 0.13, Epoch: 14.403834865975348, LR: 0.0003 +[2026-03-06 02:23:17] (step=0073619) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 14.40403052240266, LR: 0.0003 +[2026-03-06 02:23:25] (step=0073620) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 14.404226178829974, LR: 0.0003 +[2026-03-06 02:23:33] (step=0073621) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 14.404421835257288, LR: 0.0003 +[2026-03-06 02:23:41] (step=0073622) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 14.404617491684602, LR: 0.0003 +[2026-03-06 02:23:48] (step=0073623) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 14.404813148111915, LR: 0.0003 +[2026-03-06 02:23:56] (step=0073624) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 14.405008804539229, LR: 0.0003 +[2026-03-06 02:24:04] (step=0073625) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 14.405204460966543, LR: 0.0003 +[2026-03-06 02:24:12] (step=0073626) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.405400117393857, LR: 0.0003 +[2026-03-06 02:24:20] (step=0073627) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.40559577382117, LR: 0.0003 +[2026-03-06 02:24:28] (step=0073628) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.405791430248483, LR: 0.0003 +[2026-03-06 02:24:35] (step=0073629) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.405987086675797, LR: 0.0003 +[2026-03-06 02:24:43] (step=0073630) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.40618274310311, LR: 0.0003 +[2026-03-06 02:24:51] (step=0073631) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.406378399530425, LR: 0.0003 +[2026-03-06 02:24:59] (step=0073632) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.406574055957739, LR: 0.0003 +[2026-03-06 02:25:07] (step=0073633) Train Loss: 0.4282, Train Steps/Sec: 0.13, Epoch: 14.406769712385051, LR: 0.0003 +[2026-03-06 02:25:15] (step=0073634) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 14.406965368812365, LR: 0.0003 +[2026-03-06 02:25:22] (step=0073635) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.40716102523968, LR: 0.0003 +[2026-03-06 02:25:30] (step=0073636) Train Loss: 0.4620, Train Steps/Sec: 0.13, Epoch: 14.407356681666993, LR: 0.0003 +[2026-03-06 02:25:38] (step=0073637) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.407552338094307, LR: 0.0003 +[2026-03-06 02:25:46] (step=0073638) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 14.40774799452162, LR: 0.0003 +[2026-03-06 02:25:54] (step=0073639) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 14.407943650948933, LR: 0.0003 +[2026-03-06 02:26:01] (step=0073640) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 14.408139307376247, LR: 0.0003 +[2026-03-06 02:26:09] (step=0073641) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 14.408334963803561, LR: 0.0003 +[2026-03-06 02:26:17] (step=0073642) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.408530620230875, LR: 0.0003 +[2026-03-06 02:26:25] (step=0073643) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 14.408726276658188, LR: 0.0003 +[2026-03-06 02:26:33] (step=0073644) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 14.408921933085502, LR: 0.0003 +[2026-03-06 02:26:41] (step=0073645) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 14.409117589512816, LR: 0.0003 +[2026-03-06 02:26:48] (step=0073646) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 14.40931324594013, LR: 0.0003 +[2026-03-06 02:26:56] (step=0073647) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.409508902367444, LR: 0.0003 +[2026-03-06 02:27:04] (step=0073648) Train Loss: 0.4522, Train Steps/Sec: 0.13, Epoch: 14.409704558794756, LR: 0.0003 +[2026-03-06 02:27:12] (step=0073649) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 14.40990021522207, LR: 0.0003 +[2026-03-06 02:27:20] (step=0073650) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 14.410095871649384, LR: 0.0003 +[2026-03-06 02:27:28] (step=0073651) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 14.410291528076698, LR: 0.0003 +[2026-03-06 02:27:36] (step=0073652) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 14.41048718450401, LR: 0.0003 +[2026-03-06 02:27:44] (step=0073653) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 14.410682840931324, LR: 0.0003 +[2026-03-06 02:27:51] (step=0073654) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.410878497358638, LR: 0.0003 +[2026-03-06 02:27:59] (step=0073655) Train Loss: 0.4330, Train Steps/Sec: 0.13, Epoch: 14.411074153785952, LR: 0.0003 +[2026-03-06 02:28:07] (step=0073656) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 14.411269810213266, LR: 0.0003 +[2026-03-06 02:28:15] (step=0073657) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 14.411465466640578, LR: 0.0003 +[2026-03-06 02:28:23] (step=0073658) Train Loss: 0.4371, Train Steps/Sec: 0.13, Epoch: 14.411661123067892, LR: 0.0003 +[2026-03-06 02:28:31] (step=0073659) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 14.411856779495206, LR: 0.0003 +[2026-03-06 02:28:38] (step=0073660) Train Loss: 0.4302, Train Steps/Sec: 0.13, Epoch: 14.41205243592252, LR: 0.0003 +[2026-03-06 02:28:46] (step=0073661) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.412248092349834, LR: 0.0003 +[2026-03-06 02:28:54] (step=0073662) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 14.412443748777147, LR: 0.0003 +[2026-03-06 02:29:02] (step=0073663) Train Loss: 0.4380, Train Steps/Sec: 0.13, Epoch: 14.41263940520446, LR: 0.0003 +[2026-03-06 02:29:10] (step=0073664) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 14.412835061631775, LR: 0.0003 +[2026-03-06 02:29:18] (step=0073665) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 14.413030718059089, LR: 0.0003 +[2026-03-06 02:29:25] (step=0073666) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.413226374486403, LR: 0.0003 +[2026-03-06 02:29:33] (step=0073667) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 14.413422030913715, LR: 0.0003 +[2026-03-06 02:29:41] (step=0073668) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.413617687341029, LR: 0.0003 +[2026-03-06 02:29:49] (step=0073669) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 14.413813343768343, LR: 0.0003 +[2026-03-06 02:29:57] (step=0073670) Train Loss: 0.4274, Train Steps/Sec: 0.13, Epoch: 14.414009000195657, LR: 0.0003 +[2026-03-06 02:30:05] (step=0073671) Train Loss: 0.4436, Train Steps/Sec: 0.13, Epoch: 14.414204656622971, LR: 0.0003 +[2026-03-06 02:30:12] (step=0073672) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 14.414400313050283, LR: 0.0003 +[2026-03-06 02:30:20] (step=0073673) Train Loss: 0.4402, Train Steps/Sec: 0.13, Epoch: 14.414595969477597, LR: 0.0003 +[2026-03-06 02:30:28] (step=0073674) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 14.414791625904911, LR: 0.0003 +[2026-03-06 02:30:36] (step=0073675) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 14.414987282332225, LR: 0.0003 +[2026-03-06 02:30:44] (step=0073676) Train Loss: 0.4563, Train Steps/Sec: 0.13, Epoch: 14.415182938759537, LR: 0.0003 +[2026-03-06 02:30:52] (step=0073677) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 14.415378595186851, LR: 0.0003 +[2026-03-06 02:30:59] (step=0073678) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 14.415574251614165, LR: 0.0003 +[2026-03-06 02:31:07] (step=0073679) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 14.41576990804148, LR: 0.0003 +[2026-03-06 02:31:15] (step=0073680) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.415965564468793, LR: 0.0003 +[2026-03-06 02:31:23] (step=0073681) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 14.416161220896106, LR: 0.0003 +[2026-03-06 02:31:31] (step=0073682) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 14.41635687732342, LR: 0.0003 +[2026-03-06 02:31:39] (step=0073683) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.416552533750734, LR: 0.0003 +[2026-03-06 02:31:46] (step=0073684) Train Loss: 0.4461, Train Steps/Sec: 0.13, Epoch: 14.416748190178048, LR: 0.0003 +[2026-03-06 02:31:54] (step=0073685) Train Loss: 0.4412, Train Steps/Sec: 0.13, Epoch: 14.416943846605362, LR: 0.0003 +[2026-03-06 02:32:02] (step=0073686) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 14.417139503032674, LR: 0.0003 +[2026-03-06 02:32:10] (step=0073687) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.417335159459988, LR: 0.0003 +[2026-03-06 02:32:18] (step=0073688) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 14.417530815887302, LR: 0.0003 +[2026-03-06 02:32:26] (step=0073689) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 14.417726472314616, LR: 0.0003 +[2026-03-06 02:32:33] (step=0073690) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 14.41792212874193, LR: 0.0003 +[2026-03-06 02:32:41] (step=0073691) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 14.418117785169242, LR: 0.0003 +[2026-03-06 02:32:49] (step=0073692) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 14.418313441596556, LR: 0.0003 +[2026-03-06 02:32:57] (step=0073693) Train Loss: 0.4505, Train Steps/Sec: 0.13, Epoch: 14.41850909802387, LR: 0.0003 +[2026-03-06 02:33:05] (step=0073694) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 14.418704754451184, LR: 0.0003 +[2026-03-06 02:33:13] (step=0073695) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.418900410878498, LR: 0.0003 +[2026-03-06 02:33:20] (step=0073696) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.41909606730581, LR: 0.0003 +[2026-03-06 02:33:28] (step=0073697) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 14.419291723733124, LR: 0.0003 +[2026-03-06 02:33:36] (step=0073698) Train Loss: 0.4517, Train Steps/Sec: 0.13, Epoch: 14.419487380160438, LR: 0.0003 +[2026-03-06 02:33:44] (step=0073699) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.419683036587752, LR: 0.0003 +[2026-03-06 02:33:52] (step=0073700) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 14.419878693015066, LR: 0.0003 +[2026-03-06 02:34:00] (step=0073701) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 14.420074349442379, LR: 0.0003 +[2026-03-06 02:34:08] (step=0073702) Train Loss: 0.4592, Train Steps/Sec: 0.13, Epoch: 14.420270005869693, LR: 0.0003 +[2026-03-06 02:34:15] (step=0073703) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 14.420465662297007, LR: 0.0003 +[2026-03-06 02:34:23] (step=0073704) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.42066131872432, LR: 0.0003 +[2026-03-06 02:34:31] (step=0073705) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 14.420856975151633, LR: 0.0003 +[2026-03-06 02:34:39] (step=0073706) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 14.421052631578947, LR: 0.0003 +[2026-03-06 02:34:47] (step=0073707) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 14.421248288006261, LR: 0.0003 +[2026-03-06 02:34:55] (step=0073708) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 14.421443944433575, LR: 0.0003 +[2026-03-06 02:35:03] (step=0073709) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 14.421639600860889, LR: 0.0003 +[2026-03-06 02:35:10] (step=0073710) Train Loss: 0.4488, Train Steps/Sec: 0.13, Epoch: 14.421835257288201, LR: 0.0003 +[2026-03-06 02:35:18] (step=0073711) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 14.422030913715515, LR: 0.0003 +[2026-03-06 02:35:26] (step=0073712) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.42222657014283, LR: 0.0003 +[2026-03-06 02:35:34] (step=0073713) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.422422226570143, LR: 0.0003 +[2026-03-06 02:35:42] (step=0073714) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 14.422617882997457, LR: 0.0003 +[2026-03-06 02:35:50] (step=0073715) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 14.42281353942477, LR: 0.0003 +[2026-03-06 02:35:57] (step=0073716) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 14.423009195852083, LR: 0.0003 +[2026-03-06 02:36:05] (step=0073717) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 14.423204852279397, LR: 0.0003 +[2026-03-06 02:36:13] (step=0073718) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.423400508706711, LR: 0.0003 +[2026-03-06 02:36:21] (step=0073719) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.423596165134025, LR: 0.0003 +[2026-03-06 02:36:29] (step=0073720) Train Loss: 0.4319, Train Steps/Sec: 0.13, Epoch: 14.423791821561338, LR: 0.0003 +[2026-03-06 02:36:37] (step=0073721) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 14.423987477988652, LR: 0.0003 +[2026-03-06 02:36:44] (step=0073722) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 14.424183134415966, LR: 0.0003 +[2026-03-06 02:36:52] (step=0073723) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 14.42437879084328, LR: 0.0003 +[2026-03-06 02:37:00] (step=0073724) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 14.424574447270594, LR: 0.0003 +[2026-03-06 02:37:08] (step=0073725) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.424770103697906, LR: 0.0003 +[2026-03-06 02:37:16] (step=0073726) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 14.42496576012522, LR: 0.0003 +[2026-03-06 02:37:24] (step=0073727) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 14.425161416552534, LR: 0.0003 +[2026-03-06 02:37:31] (step=0073728) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 14.425357072979848, LR: 0.0003 +[2026-03-06 02:37:39] (step=0073729) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 14.42555272940716, LR: 0.0003 +[2026-03-06 02:37:47] (step=0073730) Train Loss: 0.4388, Train Steps/Sec: 0.13, Epoch: 14.425748385834474, LR: 0.0003 +[2026-03-06 02:37:55] (step=0073731) Train Loss: 0.4277, Train Steps/Sec: 0.13, Epoch: 14.425944042261788, LR: 0.0003 +[2026-03-06 02:38:03] (step=0073732) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 14.426139698689102, LR: 0.0003 +[2026-03-06 02:38:11] (step=0073733) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.426335355116416, LR: 0.0003 +[2026-03-06 02:38:18] (step=0073734) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 14.426531011543728, LR: 0.0003 +[2026-03-06 02:38:26] (step=0073735) Train Loss: 0.4534, Train Steps/Sec: 0.13, Epoch: 14.426726667971042, LR: 0.0003 +[2026-03-06 02:38:34] (step=0073736) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.426922324398356, LR: 0.0003 +[2026-03-06 02:38:42] (step=0073737) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 14.42711798082567, LR: 0.0003 +[2026-03-06 02:38:50] (step=0073738) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.427313637252984, LR: 0.0003 +[2026-03-06 02:38:58] (step=0073739) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 14.427509293680297, LR: 0.0003 +[2026-03-06 02:39:05] (step=0073740) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.42770495010761, LR: 0.0003 +[2026-03-06 02:39:13] (step=0073741) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.427900606534925, LR: 0.0003 +[2026-03-06 02:39:21] (step=0073742) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 14.428096262962239, LR: 0.0003 +[2026-03-06 02:39:29] (step=0073743) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.428291919389553, LR: 0.0003 +[2026-03-06 02:39:37] (step=0073744) Train Loss: 0.4595, Train Steps/Sec: 0.13, Epoch: 14.428487575816865, LR: 0.0003 +[2026-03-06 02:39:45] (step=0073745) Train Loss: 0.4291, Train Steps/Sec: 0.13, Epoch: 14.428683232244179, LR: 0.0003 +[2026-03-06 02:39:53] (step=0073746) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.428878888671493, LR: 0.0003 +[2026-03-06 02:40:00] (step=0073747) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 14.429074545098807, LR: 0.0003 +[2026-03-06 02:40:08] (step=0073748) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.429270201526121, LR: 0.0003 +[2026-03-06 02:40:16] (step=0073749) Train Loss: 0.4575, Train Steps/Sec: 0.13, Epoch: 14.429465857953433, LR: 0.0003 +[2026-03-06 02:40:24] (step=0073750) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 14.429661514380747, LR: 0.0003 +[2026-03-06 02:40:32] (step=0073751) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 14.429857170808061, LR: 0.0003 +[2026-03-06 02:40:40] (step=0073752) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.430052827235375, LR: 0.0003 +[2026-03-06 02:40:48] (step=0073753) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.43024848366269, LR: 0.0003 +[2026-03-06 02:40:55] (step=0073754) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 14.430444140090001, LR: 0.0003 +[2026-03-06 02:41:03] (step=0073755) Train Loss: 0.4299, Train Steps/Sec: 0.13, Epoch: 14.430639796517315, LR: 0.0003 +[2026-03-06 02:41:11] (step=0073756) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 14.43083545294463, LR: 0.0003 +[2026-03-06 02:41:19] (step=0073757) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 14.431031109371943, LR: 0.0003 +[2026-03-06 02:41:27] (step=0073758) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 14.431226765799256, LR: 0.0003 +[2026-03-06 02:41:35] (step=0073759) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 14.43142242222657, LR: 0.0003 +[2026-03-06 02:41:43] (step=0073760) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.431618078653884, LR: 0.0003 +[2026-03-06 02:41:50] (step=0073761) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.431813735081198, LR: 0.0003 +[2026-03-06 02:41:58] (step=0073762) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 14.432009391508512, LR: 0.0003 +[2026-03-06 02:42:06] (step=0073763) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 14.432205047935824, LR: 0.0003 +[2026-03-06 02:42:14] (step=0073764) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.432400704363138, LR: 0.0003 +[2026-03-06 02:42:22] (step=0073765) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 14.432596360790452, LR: 0.0003 +[2026-03-06 02:42:30] (step=0073766) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 14.432792017217766, LR: 0.0003 +[2026-03-06 02:42:37] (step=0073767) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 14.43298767364508, LR: 0.0003 +[2026-03-06 02:42:45] (step=0073768) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.433183330072392, LR: 0.0003 +[2026-03-06 02:42:53] (step=0073769) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 14.433378986499706, LR: 0.0003 +[2026-03-06 02:43:01] (step=0073770) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 14.43357464292702, LR: 0.0003 +[2026-03-06 02:43:09] (step=0073771) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 14.433770299354334, LR: 0.0003 +[2026-03-06 02:43:17] (step=0073772) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 14.433965955781648, LR: 0.0003 +[2026-03-06 02:43:24] (step=0073773) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.43416161220896, LR: 0.0003 +[2026-03-06 02:43:32] (step=0073774) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 14.434357268636274, LR: 0.0003 +[2026-03-06 02:43:40] (step=0073775) Train Loss: 0.4354, Train Steps/Sec: 0.13, Epoch: 14.434552925063588, LR: 0.0003 +[2026-03-06 02:43:48] (step=0073776) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.434748581490902, LR: 0.0003 +[2026-03-06 02:43:56] (step=0073777) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 14.434944237918216, LR: 0.0003 +[2026-03-06 02:44:04] (step=0073778) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 14.435139894345529, LR: 0.0003 +[2026-03-06 02:44:11] (step=0073779) Train Loss: 0.4331, Train Steps/Sec: 0.13, Epoch: 14.435335550772843, LR: 0.0003 +[2026-03-06 02:44:19] (step=0073780) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 14.435531207200157, LR: 0.0003 +[2026-03-06 02:44:27] (step=0073781) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.43572686362747, LR: 0.0003 +[2026-03-06 02:44:35] (step=0073782) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.435922520054783, LR: 0.0003 +[2026-03-06 02:44:43] (step=0073783) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 14.436118176482097, LR: 0.0003 +[2026-03-06 02:44:51] (step=0073784) Train Loss: 0.4311, Train Steps/Sec: 0.13, Epoch: 14.436313832909411, LR: 0.0003 +[2026-03-06 02:44:59] (step=0073785) Train Loss: 0.4532, Train Steps/Sec: 0.13, Epoch: 14.436509489336725, LR: 0.0003 +[2026-03-06 02:45:06] (step=0073786) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 14.436705145764039, LR: 0.0003 +[2026-03-06 02:45:14] (step=0073787) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 14.436900802191351, LR: 0.0003 +[2026-03-06 02:45:22] (step=0073788) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 14.437096458618665, LR: 0.0003 +[2026-03-06 02:45:30] (step=0073789) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.43729211504598, LR: 0.0003 +[2026-03-06 02:45:38] (step=0073790) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 14.437487771473293, LR: 0.0003 +[2026-03-06 02:45:46] (step=0073791) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 14.437683427900607, LR: 0.0003 +[2026-03-06 02:45:53] (step=0073792) Train Loss: 0.4265, Train Steps/Sec: 0.13, Epoch: 14.43787908432792, LR: 0.0003 +[2026-03-06 02:46:01] (step=0073793) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 14.438074740755233, LR: 0.0003 +[2026-03-06 02:46:09] (step=0073794) Train Loss: 0.4550, Train Steps/Sec: 0.13, Epoch: 14.438270397182547, LR: 0.0003 +[2026-03-06 02:46:17] (step=0073795) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 14.438466053609861, LR: 0.0003 +[2026-03-06 02:46:25] (step=0073796) Train Loss: 0.4596, Train Steps/Sec: 0.13, Epoch: 14.438661710037175, LR: 0.0003 +[2026-03-06 02:46:33] (step=0073797) Train Loss: 0.4377, Train Steps/Sec: 0.13, Epoch: 14.438857366464488, LR: 0.0003 +[2026-03-06 02:46:40] (step=0073798) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.439053022891802, LR: 0.0003 +[2026-03-06 02:46:48] (step=0073799) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.439248679319116, LR: 0.0003 +[2026-03-06 02:46:56] (step=0073800) Train Loss: 0.4477, Train Steps/Sec: 0.13, Epoch: 14.43944433574643, LR: 0.0003 +[2026-03-06 02:47:04] (step=0073801) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 14.439639992173744, LR: 0.0003 +[2026-03-06 02:47:12] (step=0073802) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 14.439835648601056, LR: 0.0003 +[2026-03-06 02:47:20] (step=0073803) Train Loss: 0.4466, Train Steps/Sec: 0.12, Epoch: 14.44003130502837, LR: 0.0003 +[2026-03-06 02:47:28] (step=0073804) Train Loss: 0.4241, Train Steps/Sec: 0.13, Epoch: 14.440226961455684, LR: 0.0003 +[2026-03-06 02:47:35] (step=0073805) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 14.440422617882998, LR: 0.0003 +[2026-03-06 02:47:43] (step=0073806) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.440618274310312, LR: 0.0003 +[2026-03-06 02:47:51] (step=0073807) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 14.440813930737624, LR: 0.0003 +[2026-03-06 02:47:59] (step=0073808) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.441009587164938, LR: 0.0003 +[2026-03-06 02:48:07] (step=0073809) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.441205243592252, LR: 0.0003 +[2026-03-06 02:48:15] (step=0073810) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 14.441400900019566, LR: 0.0003 +[2026-03-06 02:48:23] (step=0073811) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 14.441596556446878, LR: 0.0003 +[2026-03-06 02:48:30] (step=0073812) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 14.441792212874192, LR: 0.0003 +[2026-03-06 02:48:38] (step=0073813) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 14.441987869301506, LR: 0.0003 +[2026-03-06 02:48:46] (step=0073814) Train Loss: 0.4364, Train Steps/Sec: 0.13, Epoch: 14.44218352572882, LR: 0.0003 +[2026-03-06 02:48:54] (step=0073815) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 14.442379182156134, LR: 0.0003 +[2026-03-06 02:49:02] (step=0073816) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.442574838583447, LR: 0.0003 +[2026-03-06 02:49:10] (step=0073817) Train Loss: 0.4490, Train Steps/Sec: 0.13, Epoch: 14.44277049501076, LR: 0.0003 +[2026-03-06 02:49:18] (step=0073818) Train Loss: 0.4343, Train Steps/Sec: 0.13, Epoch: 14.442966151438075, LR: 0.0003 +[2026-03-06 02:49:25] (step=0073819) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.443161807865389, LR: 0.0003 +[2026-03-06 02:49:33] (step=0073820) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 14.443357464292703, LR: 0.0003 +[2026-03-06 02:49:41] (step=0073821) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 14.443553120720015, LR: 0.0003 +[2026-03-06 02:49:49] (step=0073822) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 14.443748777147329, LR: 0.0003 +[2026-03-06 02:49:57] (step=0073823) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 14.443944433574643, LR: 0.0003 +[2026-03-06 02:50:05] (step=0073824) Train Loss: 0.4238, Train Steps/Sec: 0.13, Epoch: 14.444140090001957, LR: 0.0003 +[2026-03-06 02:50:12] (step=0073825) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 14.444335746429271, LR: 0.0003 +[2026-03-06 02:50:20] (step=0073826) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 14.444531402856583, LR: 0.0003 +[2026-03-06 02:50:28] (step=0073827) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 14.444727059283897, LR: 0.0003 +[2026-03-06 02:50:36] (step=0073828) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 14.444922715711211, LR: 0.0003 +[2026-03-06 02:50:44] (step=0073829) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 14.445118372138525, LR: 0.0003 +[2026-03-06 02:50:52] (step=0073830) Train Loss: 0.4415, Train Steps/Sec: 0.13, Epoch: 14.44531402856584, LR: 0.0003 +[2026-03-06 02:50:59] (step=0073831) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 14.445509684993151, LR: 0.0003 +[2026-03-06 02:51:07] (step=0073832) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.445705341420465, LR: 0.0003 +[2026-03-06 02:51:15] (step=0073833) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 14.44590099784778, LR: 0.0003 +[2026-03-06 02:51:23] (step=0073834) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.446096654275093, LR: 0.0003 +[2026-03-06 02:51:31] (step=0073835) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 14.446292310702406, LR: 0.0003 +[2026-03-06 02:51:39] (step=0073836) Train Loss: 0.4441, Train Steps/Sec: 0.13, Epoch: 14.44648796712972, LR: 0.0003 +[2026-03-06 02:51:46] (step=0073837) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 14.446683623557034, LR: 0.0003 +[2026-03-06 02:51:54] (step=0073838) Train Loss: 0.4470, Train Steps/Sec: 0.13, Epoch: 14.446879279984348, LR: 0.0003 +[2026-03-06 02:52:02] (step=0073839) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 14.447074936411662, LR: 0.0003 +[2026-03-06 02:52:10] (step=0073840) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.447270592838974, LR: 0.0003 +[2026-03-06 02:52:18] (step=0073841) Train Loss: 0.4374, Train Steps/Sec: 0.13, Epoch: 14.447466249266288, LR: 0.0003 +[2026-03-06 02:52:25] (step=0073842) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 14.447661905693602, LR: 0.0003 +[2026-03-06 02:52:33] (step=0073843) Train Loss: 0.4457, Train Steps/Sec: 0.13, Epoch: 14.447857562120916, LR: 0.0003 +[2026-03-06 02:52:41] (step=0073844) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.44805321854823, LR: 0.0003 +[2026-03-06 02:52:49] (step=0073845) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.448248874975542, LR: 0.0003 +[2026-03-06 02:52:57] (step=0073846) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.448444531402856, LR: 0.0003 +[2026-03-06 02:53:05] (step=0073847) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 14.44864018783017, LR: 0.0003 +[2026-03-06 02:53:13] (step=0073848) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 14.448835844257484, LR: 0.0003 +[2026-03-06 02:53:20] (step=0073849) Train Loss: 0.4379, Train Steps/Sec: 0.13, Epoch: 14.449031500684798, LR: 0.0003 +[2026-03-06 02:53:28] (step=0073850) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 14.44922715711211, LR: 0.0003 +[2026-03-06 02:53:36] (step=0073851) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 14.449422813539424, LR: 0.0003 +[2026-03-06 02:53:44] (step=0073852) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 14.449618469966738, LR: 0.0003 +[2026-03-06 02:53:52] (step=0073853) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.449814126394052, LR: 0.0003 +[2026-03-06 02:54:00] (step=0073854) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 14.450009782821366, LR: 0.0003 +[2026-03-06 02:54:07] (step=0073855) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 14.450205439248679, LR: 0.0003 +[2026-03-06 02:54:15] (step=0073856) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 14.450401095675993, LR: 0.0003 +[2026-03-06 02:54:23] (step=0073857) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 14.450596752103307, LR: 0.0003 +[2026-03-06 02:54:31] (step=0073858) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 14.45079240853062, LR: 0.0003 +[2026-03-06 02:54:39] (step=0073859) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 14.450988064957935, LR: 0.0003 +[2026-03-06 02:54:47] (step=0073860) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 14.451183721385247, LR: 0.0003 +[2026-03-06 02:54:55] (step=0073861) Train Loss: 0.4446, Train Steps/Sec: 0.13, Epoch: 14.451379377812561, LR: 0.0003 +[2026-03-06 02:55:02] (step=0073862) Train Loss: 0.4323, Train Steps/Sec: 0.13, Epoch: 14.451575034239875, LR: 0.0003 +[2026-03-06 02:55:10] (step=0073863) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 14.451770690667189, LR: 0.0003 +[2026-03-06 02:55:18] (step=0073864) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 14.451966347094501, LR: 0.0003 +[2026-03-06 02:55:26] (step=0073865) Train Loss: 0.4263, Train Steps/Sec: 0.13, Epoch: 14.452162003521815, LR: 0.0003 +[2026-03-06 02:55:34] (step=0073866) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 14.45235765994913, LR: 0.0003 +[2026-03-06 02:55:42] (step=0073867) Train Loss: 0.4417, Train Steps/Sec: 0.13, Epoch: 14.452553316376443, LR: 0.0003 +[2026-03-06 02:55:49] (step=0073868) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 14.452748972803757, LR: 0.0003 +[2026-03-06 02:55:57] (step=0073869) Train Loss: 0.4521, Train Steps/Sec: 0.13, Epoch: 14.45294462923107, LR: 0.0003 +[2026-03-06 02:56:05] (step=0073870) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 14.453140285658383, LR: 0.0003 +[2026-03-06 02:56:13] (step=0073871) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.453335942085697, LR: 0.0003 +[2026-03-06 02:56:21] (step=0073872) Train Loss: 0.4321, Train Steps/Sec: 0.13, Epoch: 14.453531598513012, LR: 0.0003 +[2026-03-06 02:56:29] (step=0073873) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 14.453727254940326, LR: 0.0003 +[2026-03-06 02:56:36] (step=0073874) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.453922911367638, LR: 0.0003 +[2026-03-06 02:56:44] (step=0073875) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.454118567794952, LR: 0.0003 +[2026-03-06 02:56:52] (step=0073876) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 14.454314224222266, LR: 0.0003 +[2026-03-06 02:57:00] (step=0073877) Train Loss: 0.4628, Train Steps/Sec: 0.13, Epoch: 14.45450988064958, LR: 0.0003 +[2026-03-06 02:57:08] (step=0073878) Train Loss: 0.4349, Train Steps/Sec: 0.13, Epoch: 14.454705537076894, LR: 0.0003 +[2026-03-06 02:57:16] (step=0073879) Train Loss: 0.4561, Train Steps/Sec: 0.13, Epoch: 14.454901193504206, LR: 0.0003 +[2026-03-06 02:57:23] (step=0073880) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 14.45509684993152, LR: 0.0003 +[2026-03-06 02:57:31] (step=0073881) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 14.455292506358834, LR: 0.0003 +[2026-03-06 02:57:39] (step=0073882) Train Loss: 0.4398, Train Steps/Sec: 0.13, Epoch: 14.455488162786148, LR: 0.0003 +[2026-03-06 02:57:47] (step=0073883) Train Loss: 0.4259, Train Steps/Sec: 0.13, Epoch: 14.455683819213462, LR: 0.0003 +[2026-03-06 02:57:55] (step=0073884) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.455879475640774, LR: 0.0003 +[2026-03-06 02:58:03] (step=0073885) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.456075132068088, LR: 0.0003 +[2026-03-06 02:58:10] (step=0073886) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 14.456270788495402, LR: 0.0003 +[2026-03-06 02:58:18] (step=0073887) Train Loss: 0.4395, Train Steps/Sec: 0.13, Epoch: 14.456466444922716, LR: 0.0003 +[2026-03-06 02:58:26] (step=0073888) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.456662101350028, LR: 0.0003 +[2026-03-06 02:58:34] (step=0073889) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 14.456857757777342, LR: 0.0003 +[2026-03-06 02:58:42] (step=0073890) Train Loss: 0.4340, Train Steps/Sec: 0.13, Epoch: 14.457053414204657, LR: 0.0003 +[2026-03-06 02:58:50] (step=0073891) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 14.45724907063197, LR: 0.0003 +[2026-03-06 02:58:57] (step=0073892) Train Loss: 0.4425, Train Steps/Sec: 0.13, Epoch: 14.457444727059285, LR: 0.0003 +[2026-03-06 02:59:05] (step=0073893) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 14.457640383486597, LR: 0.0003 +[2026-03-06 02:59:13] (step=0073894) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.45783603991391, LR: 0.0003 +[2026-03-06 02:59:21] (step=0073895) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.458031696341225, LR: 0.0003 +[2026-03-06 02:59:29] (step=0073896) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 14.458227352768539, LR: 0.0003 +[2026-03-06 02:59:37] (step=0073897) Train Loss: 0.4300, Train Steps/Sec: 0.13, Epoch: 14.458423009195853, LR: 0.0003 +[2026-03-06 02:59:44] (step=0073898) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.458618665623165, LR: 0.0003 +[2026-03-06 02:59:52] (step=0073899) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 14.458814322050479, LR: 0.0003 +[2026-03-06 03:00:00] (step=0073900) Train Loss: 0.4353, Train Steps/Sec: 0.13, Epoch: 14.459009978477793, LR: 0.0003 +[2026-03-06 03:00:08] (step=0073901) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 14.459205634905107, LR: 0.0003 +[2026-03-06 03:00:16] (step=0073902) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.459401291332421, LR: 0.0003 +[2026-03-06 03:00:24] (step=0073903) Train Loss: 0.4516, Train Steps/Sec: 0.13, Epoch: 14.459596947759733, LR: 0.0003 +[2026-03-06 03:00:31] (step=0073904) Train Loss: 0.4401, Train Steps/Sec: 0.13, Epoch: 14.459792604187047, LR: 0.0003 +[2026-03-06 03:00:39] (step=0073905) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 14.459988260614361, LR: 0.0003 +[2026-03-06 03:00:47] (step=0073906) Train Loss: 0.4545, Train Steps/Sec: 0.13, Epoch: 14.460183917041675, LR: 0.0003 +[2026-03-06 03:00:55] (step=0073907) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.46037957346899, LR: 0.0003 +[2026-03-06 03:01:03] (step=0073908) Train Loss: 0.4346, Train Steps/Sec: 0.13, Epoch: 14.460575229896302, LR: 0.0003 +[2026-03-06 03:01:11] (step=0073909) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 14.460770886323616, LR: 0.0003 +[2026-03-06 03:01:19] (step=0073910) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 14.46096654275093, LR: 0.0003 +[2026-03-06 03:01:26] (step=0073911) Train Loss: 0.4487, Train Steps/Sec: 0.13, Epoch: 14.461162199178244, LR: 0.0003 +[2026-03-06 03:01:34] (step=0073912) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.461357855605556, LR: 0.0003 +[2026-03-06 03:01:42] (step=0073913) Train Loss: 0.4478, Train Steps/Sec: 0.13, Epoch: 14.46155351203287, LR: 0.0003 +[2026-03-06 03:01:50] (step=0073914) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 14.461749168460184, LR: 0.0003 +[2026-03-06 03:01:58] (step=0073915) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 14.461944824887498, LR: 0.0003 +[2026-03-06 03:02:06] (step=0073916) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 14.462140481314812, LR: 0.0003 +[2026-03-06 03:02:13] (step=0073917) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 14.462336137742124, LR: 0.0003 +[2026-03-06 03:02:21] (step=0073918) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.462531794169438, LR: 0.0003 +[2026-03-06 03:02:29] (step=0073919) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 14.462727450596752, LR: 0.0003 +[2026-03-06 03:02:37] (step=0073920) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.462923107024066, LR: 0.0003 +[2026-03-06 03:02:45] (step=0073921) Train Loss: 0.4508, Train Steps/Sec: 0.13, Epoch: 14.46311876345138, LR: 0.0003 +[2026-03-06 03:02:53] (step=0073922) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 14.463314419878692, LR: 0.0003 +[2026-03-06 03:03:00] (step=0073923) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 14.463510076306006, LR: 0.0003 +[2026-03-06 03:03:08] (step=0073924) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 14.46370573273332, LR: 0.0003 +[2026-03-06 03:03:16] (step=0073925) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.463901389160634, LR: 0.0003 +[2026-03-06 03:03:24] (step=0073926) Train Loss: 0.4455, Train Steps/Sec: 0.13, Epoch: 14.464097045587948, LR: 0.0003 +[2026-03-06 03:03:32] (step=0073927) Train Loss: 0.4513, Train Steps/Sec: 0.13, Epoch: 14.46429270201526, LR: 0.0003 +[2026-03-06 03:03:40] (step=0073928) Train Loss: 0.4531, Train Steps/Sec: 0.13, Epoch: 14.464488358442575, LR: 0.0003 +[2026-03-06 03:03:47] (step=0073929) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 14.464684014869889, LR: 0.0003 +[2026-03-06 03:03:55] (step=0073930) Train Loss: 0.4559, Train Steps/Sec: 0.13, Epoch: 14.464879671297203, LR: 0.0003 +[2026-03-06 03:04:03] (step=0073931) Train Loss: 0.4420, Train Steps/Sec: 0.13, Epoch: 14.465075327724517, LR: 0.0003 +[2026-03-06 03:04:11] (step=0073932) Train Loss: 0.4397, Train Steps/Sec: 0.13, Epoch: 14.465270984151829, LR: 0.0003 +[2026-03-06 03:04:19] (step=0073933) Train Loss: 0.4328, Train Steps/Sec: 0.13, Epoch: 14.465466640579143, LR: 0.0003 +[2026-03-06 03:04:27] (step=0073934) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.465662297006457, LR: 0.0003 +[2026-03-06 03:04:35] (step=0073935) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 14.46585795343377, LR: 0.0003 +[2026-03-06 03:04:42] (step=0073936) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 14.466053609861085, LR: 0.0003 +[2026-03-06 03:04:50] (step=0073937) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.466249266288397, LR: 0.0003 +[2026-03-06 03:04:58] (step=0073938) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 14.466444922715711, LR: 0.0003 +[2026-03-06 03:05:06] (step=0073939) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.466640579143025, LR: 0.0003 +[2026-03-06 03:05:14] (step=0073940) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 14.466836235570339, LR: 0.0003 +[2026-03-06 03:05:22] (step=0073941) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 14.467031891997651, LR: 0.0003 +[2026-03-06 03:05:29] (step=0073942) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.467227548424965, LR: 0.0003 +[2026-03-06 03:05:37] (step=0073943) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.46742320485228, LR: 0.0003 +[2026-03-06 03:05:45] (step=0073944) Train Loss: 0.4400, Train Steps/Sec: 0.13, Epoch: 14.467618861279593, LR: 0.0003 +[2026-03-06 03:05:53] (step=0073945) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.467814517706907, LR: 0.0003 +[2026-03-06 03:06:01] (step=0073946) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.46801017413422, LR: 0.0003 +[2026-03-06 03:06:09] (step=0073947) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 14.468205830561534, LR: 0.0003 +[2026-03-06 03:06:16] (step=0073948) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 14.468401486988848, LR: 0.0003 +[2026-03-06 03:06:24] (step=0073949) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 14.468597143416162, LR: 0.0003 +[2026-03-06 03:06:32] (step=0073950) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.468792799843476, LR: 0.0003 +[2026-03-06 03:06:40] (step=0073951) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.468988456270788, LR: 0.0003 +[2026-03-06 03:06:48] (step=0073952) Train Loss: 0.4538, Train Steps/Sec: 0.13, Epoch: 14.469184112698102, LR: 0.0003 +[2026-03-06 03:06:56] (step=0073953) Train Loss: 0.4306, Train Steps/Sec: 0.13, Epoch: 14.469379769125416, LR: 0.0003 +[2026-03-06 03:07:04] (step=0073954) Train Loss: 0.4379, Train Steps/Sec: 0.12, Epoch: 14.46957542555273, LR: 0.0003 +[2026-03-06 03:07:12] (step=0073955) Train Loss: 0.4312, Train Steps/Sec: 0.13, Epoch: 14.469771081980044, LR: 0.0003 +[2026-03-06 03:07:19] (step=0073956) Train Loss: 0.4362, Train Steps/Sec: 0.13, Epoch: 14.469966738407356, LR: 0.0003 +[2026-03-06 03:07:27] (step=0073957) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 14.47016239483467, LR: 0.0003 +[2026-03-06 03:07:35] (step=0073958) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 14.470358051261984, LR: 0.0003 +[2026-03-06 03:07:43] (step=0073959) Train Loss: 0.4451, Train Steps/Sec: 0.13, Epoch: 14.470553707689298, LR: 0.0003 +[2026-03-06 03:07:51] (step=0073960) Train Loss: 0.4590, Train Steps/Sec: 0.13, Epoch: 14.470749364116612, LR: 0.0003 +[2026-03-06 03:07:59] (step=0073961) Train Loss: 0.4594, Train Steps/Sec: 0.13, Epoch: 14.470945020543924, LR: 0.0003 +[2026-03-06 03:08:07] (step=0073962) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.471140676971238, LR: 0.0003 +[2026-03-06 03:08:14] (step=0073963) Train Loss: 0.4384, Train Steps/Sec: 0.13, Epoch: 14.471336333398552, LR: 0.0003 +[2026-03-06 03:08:22] (step=0073964) Train Loss: 0.4336, Train Steps/Sec: 0.13, Epoch: 14.471531989825866, LR: 0.0003 +[2026-03-06 03:08:30] (step=0073965) Train Loss: 0.4329, Train Steps/Sec: 0.13, Epoch: 14.471727646253179, LR: 0.0003 +[2026-03-06 03:08:38] (step=0073966) Train Loss: 0.4444, Train Steps/Sec: 0.13, Epoch: 14.471923302680493, LR: 0.0003 +[2026-03-06 03:08:46] (step=0073967) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.472118959107807, LR: 0.0003 +[2026-03-06 03:08:54] (step=0073968) Train Loss: 0.4385, Train Steps/Sec: 0.13, Epoch: 14.47231461553512, LR: 0.0003 +[2026-03-06 03:09:01] (step=0073969) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.472510271962435, LR: 0.0003 +[2026-03-06 03:09:09] (step=0073970) Train Loss: 0.4305, Train Steps/Sec: 0.13, Epoch: 14.472705928389747, LR: 0.0003 +[2026-03-06 03:09:17] (step=0073971) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.47290158481706, LR: 0.0003 +[2026-03-06 03:09:25] (step=0073972) Train Loss: 0.4501, Train Steps/Sec: 0.13, Epoch: 14.473097241244375, LR: 0.0003 +[2026-03-06 03:09:33] (step=0073973) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 14.473292897671689, LR: 0.0003 +[2026-03-06 03:09:41] (step=0073974) Train Loss: 0.4555, Train Steps/Sec: 0.13, Epoch: 14.473488554099003, LR: 0.0003 +[2026-03-06 03:09:48] (step=0073975) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 14.473684210526315, LR: 0.0003 +[2026-03-06 03:09:56] (step=0073976) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 14.473879866953629, LR: 0.0003 +[2026-03-06 03:10:04] (step=0073977) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 14.474075523380943, LR: 0.0003 +[2026-03-06 03:10:12] (step=0073978) Train Loss: 0.4342, Train Steps/Sec: 0.13, Epoch: 14.474271179808257, LR: 0.0003 +[2026-03-06 03:10:20] (step=0073979) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 14.474466836235571, LR: 0.0003 +[2026-03-06 03:10:28] (step=0073980) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 14.474662492662883, LR: 0.0003 +[2026-03-06 03:10:35] (step=0073981) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 14.474858149090197, LR: 0.0003 +[2026-03-06 03:10:43] (step=0073982) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 14.475053805517511, LR: 0.0003 +[2026-03-06 03:10:51] (step=0073983) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 14.475249461944825, LR: 0.0003 +[2026-03-06 03:10:59] (step=0073984) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.47544511837214, LR: 0.0003 +[2026-03-06 03:11:07] (step=0073985) Train Loss: 0.4393, Train Steps/Sec: 0.13, Epoch: 14.475640774799452, LR: 0.0003 +[2026-03-06 03:11:15] (step=0073986) Train Loss: 0.4426, Train Steps/Sec: 0.13, Epoch: 14.475836431226766, LR: 0.0003 +[2026-03-06 03:11:22] (step=0073987) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 14.47603208765408, LR: 0.0003 +[2026-03-06 03:11:30] (step=0073988) Train Loss: 0.4376, Train Steps/Sec: 0.13, Epoch: 14.476227744081394, LR: 0.0003 +[2026-03-06 03:11:38] (step=0073989) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.476423400508708, LR: 0.0003 +[2026-03-06 03:11:46] (step=0073990) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.47661905693602, LR: 0.0003 +[2026-03-06 03:11:54] (step=0073991) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 14.476814713363334, LR: 0.0003 +[2026-03-06 03:12:02] (step=0073992) Train Loss: 0.4324, Train Steps/Sec: 0.13, Epoch: 14.477010369790648, LR: 0.0003 +[2026-03-06 03:12:09] (step=0073993) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 14.477206026217962, LR: 0.0003 +[2026-03-06 03:12:17] (step=0073994) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 14.477401682645274, LR: 0.0003 +[2026-03-06 03:12:25] (step=0073995) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.477597339072588, LR: 0.0003 +[2026-03-06 03:12:33] (step=0073996) Train Loss: 0.4433, Train Steps/Sec: 0.13, Epoch: 14.477792995499902, LR: 0.0003 +[2026-03-06 03:12:41] (step=0073997) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.477988651927216, LR: 0.0003 +[2026-03-06 03:12:49] (step=0073998) Train Loss: 0.4453, Train Steps/Sec: 0.13, Epoch: 14.47818430835453, LR: 0.0003 +[2026-03-06 03:12:56] (step=0073999) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 14.478379964781842, LR: 0.0003 +[2026-03-06 03:13:04] (step=0074000) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 14.478575621209156, LR: 0.0003 +[2026-03-06 03:13:04] Saved checkpoint to ./results/cxr_finetune_lora/checkpoints/0074000/ +[2026-03-06 03:13:12] (step=0074001) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 14.47877127763647, LR: 0.0003 +[2026-03-06 03:13:20] (step=0074002) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 14.478966934063784, LR: 0.0003 +[2026-03-06 03:13:28] (step=0074003) Train Loss: 0.4572, Train Steps/Sec: 0.13, Epoch: 14.479162590491098, LR: 0.0003 +[2026-03-06 03:13:36] (step=0074004) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 14.47935824691841, LR: 0.0003 +[2026-03-06 03:13:44] (step=0074005) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 14.479553903345725, LR: 0.0003 +[2026-03-06 03:13:51] (step=0074006) Train Loss: 0.4381, Train Steps/Sec: 0.13, Epoch: 14.479749559773039, LR: 0.0003 +[2026-03-06 03:13:59] (step=0074007) Train Loss: 0.4479, Train Steps/Sec: 0.13, Epoch: 14.479945216200353, LR: 0.0003 +[2026-03-06 03:14:07] (step=0074008) Train Loss: 0.4375, Train Steps/Sec: 0.13, Epoch: 14.480140872627667, LR: 0.0003 +[2026-03-06 03:14:15] (step=0074009) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.480336529054979, LR: 0.0003 +[2026-03-06 03:14:23] (step=0074010) Train Loss: 0.4365, Train Steps/Sec: 0.13, Epoch: 14.480532185482293, LR: 0.0003 +[2026-03-06 03:14:31] (step=0074011) Train Loss: 0.4467, Train Steps/Sec: 0.13, Epoch: 14.480727841909607, LR: 0.0003 +[2026-03-06 03:14:38] (step=0074012) Train Loss: 0.4502, Train Steps/Sec: 0.13, Epoch: 14.48092349833692, LR: 0.0003 +[2026-03-06 03:14:46] (step=0074013) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 14.481119154764235, LR: 0.0003 +[2026-03-06 03:14:54] (step=0074014) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 14.481314811191547, LR: 0.0003 +[2026-03-06 03:15:02] (step=0074015) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 14.481510467618861, LR: 0.0003 +[2026-03-06 03:15:10] (step=0074016) Train Loss: 0.4382, Train Steps/Sec: 0.13, Epoch: 14.481706124046175, LR: 0.0003 +[2026-03-06 03:15:18] (step=0074017) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 14.481901780473489, LR: 0.0003 +[2026-03-06 03:15:26] (step=0074018) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.482097436900801, LR: 0.0003 +[2026-03-06 03:15:33] (step=0074019) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.482293093328115, LR: 0.0003 +[2026-03-06 03:15:41] (step=0074020) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 14.48248874975543, LR: 0.0003 +[2026-03-06 03:15:49] (step=0074021) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 14.482684406182743, LR: 0.0003 +[2026-03-06 03:15:57] (step=0074022) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.482880062610057, LR: 0.0003 +[2026-03-06 03:16:05] (step=0074023) Train Loss: 0.4452, Train Steps/Sec: 0.13, Epoch: 14.48307571903737, LR: 0.0003 +[2026-03-06 03:16:13] (step=0074024) Train Loss: 0.4551, Train Steps/Sec: 0.13, Epoch: 14.483271375464684, LR: 0.0003 +[2026-03-06 03:16:20] (step=0074025) Train Loss: 0.4438, Train Steps/Sec: 0.13, Epoch: 14.483467031891998, LR: 0.0003 +[2026-03-06 03:16:28] (step=0074026) Train Loss: 0.4556, Train Steps/Sec: 0.13, Epoch: 14.483662688319312, LR: 0.0003 +[2026-03-06 03:16:36] (step=0074027) Train Loss: 0.4466, Train Steps/Sec: 0.13, Epoch: 14.483858344746626, LR: 0.0003 +[2026-03-06 03:16:44] (step=0074028) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 14.484054001173938, LR: 0.0003 +[2026-03-06 03:16:52] (step=0074029) Train Loss: 0.4423, Train Steps/Sec: 0.13, Epoch: 14.484249657601252, LR: 0.0003 +[2026-03-06 03:17:00] (step=0074030) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.484445314028566, LR: 0.0003 +[2026-03-06 03:17:07] (step=0074031) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 14.48464097045588, LR: 0.0003 +[2026-03-06 03:17:15] (step=0074032) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.484836626883194, LR: 0.0003 +[2026-03-06 03:17:23] (step=0074033) Train Loss: 0.4390, Train Steps/Sec: 0.13, Epoch: 14.485032283310506, LR: 0.0003 +[2026-03-06 03:17:31] (step=0074034) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.48522793973782, LR: 0.0003 +[2026-03-06 03:17:39] (step=0074035) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 14.485423596165134, LR: 0.0003 +[2026-03-06 03:17:47] (step=0074036) Train Loss: 0.4427, Train Steps/Sec: 0.13, Epoch: 14.485619252592448, LR: 0.0003 +[2026-03-06 03:17:54] (step=0074037) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 14.485814909019762, LR: 0.0003 +[2026-03-06 03:18:02] (step=0074038) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 14.486010565447074, LR: 0.0003 +[2026-03-06 03:18:10] (step=0074039) Train Loss: 0.4442, Train Steps/Sec: 0.13, Epoch: 14.486206221874388, LR: 0.0003 +[2026-03-06 03:18:18] (step=0074040) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.486401878301702, LR: 0.0003 +[2026-03-06 03:18:26] (step=0074041) Train Loss: 0.4542, Train Steps/Sec: 0.13, Epoch: 14.486597534729016, LR: 0.0003 +[2026-03-06 03:18:34] (step=0074042) Train Loss: 0.4523, Train Steps/Sec: 0.13, Epoch: 14.48679319115633, LR: 0.0003 +[2026-03-06 03:18:42] (step=0074043) Train Loss: 0.4428, Train Steps/Sec: 0.13, Epoch: 14.486988847583643, LR: 0.0003 +[2026-03-06 03:18:49] (step=0074044) Train Loss: 0.4496, Train Steps/Sec: 0.13, Epoch: 14.487184504010957, LR: 0.0003 +[2026-03-06 03:18:57] (step=0074045) Train Loss: 0.4333, Train Steps/Sec: 0.13, Epoch: 14.48738016043827, LR: 0.0003 +[2026-03-06 03:19:05] (step=0074046) Train Loss: 0.4304, Train Steps/Sec: 0.13, Epoch: 14.487575816865585, LR: 0.0003 +[2026-03-06 03:19:13] (step=0074047) Train Loss: 0.4472, Train Steps/Sec: 0.13, Epoch: 14.487771473292897, LR: 0.0003 +[2026-03-06 03:19:21] (step=0074048) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 14.48796712972021, LR: 0.0003 +[2026-03-06 03:19:29] (step=0074049) Train Loss: 0.4391, Train Steps/Sec: 0.13, Epoch: 14.488162786147525, LR: 0.0003 +[2026-03-06 03:19:36] (step=0074050) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 14.488358442574839, LR: 0.0003 +[2026-03-06 03:19:44] (step=0074051) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 14.488554099002153, LR: 0.0003 +[2026-03-06 03:19:52] (step=0074052) Train Loss: 0.4281, Train Steps/Sec: 0.13, Epoch: 14.488749755429465, LR: 0.0003 +[2026-03-06 03:20:00] (step=0074053) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.488945411856779, LR: 0.0003 +[2026-03-06 03:20:08] (step=0074054) Train Loss: 0.4418, Train Steps/Sec: 0.13, Epoch: 14.489141068284093, LR: 0.0003 +[2026-03-06 03:20:16] (step=0074055) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 14.489336724711407, LR: 0.0003 +[2026-03-06 03:20:24] (step=0074056) Train Loss: 0.4356, Train Steps/Sec: 0.13, Epoch: 14.489532381138721, LR: 0.0003 +[2026-03-06 03:20:32] (step=0074057) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.489728037566033, LR: 0.0003 +[2026-03-06 03:20:39] (step=0074058) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.489923693993347, LR: 0.0003 +[2026-03-06 03:20:47] (step=0074059) Train Loss: 0.4392, Train Steps/Sec: 0.13, Epoch: 14.490119350420661, LR: 0.0003 +[2026-03-06 03:20:55] (step=0074060) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 14.490315006847975, LR: 0.0003 +[2026-03-06 03:21:03] (step=0074061) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.49051066327529, LR: 0.0003 +[2026-03-06 03:21:11] (step=0074062) Train Loss: 0.4359, Train Steps/Sec: 0.13, Epoch: 14.490706319702602, LR: 0.0003 +[2026-03-06 03:21:19] (step=0074063) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 14.490901976129916, LR: 0.0003 +[2026-03-06 03:21:26] (step=0074064) Train Loss: 0.4460, Train Steps/Sec: 0.13, Epoch: 14.49109763255723, LR: 0.0003 +[2026-03-06 03:21:34] (step=0074065) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 14.491293288984544, LR: 0.0003 +[2026-03-06 03:21:42] (step=0074066) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 14.491488945411858, LR: 0.0003 +[2026-03-06 03:21:50] (step=0074067) Train Loss: 0.4525, Train Steps/Sec: 0.13, Epoch: 14.49168460183917, LR: 0.0003 +[2026-03-06 03:21:58] (step=0074068) Train Loss: 0.4256, Train Steps/Sec: 0.13, Epoch: 14.491880258266484, LR: 0.0003 +[2026-03-06 03:22:06] (step=0074069) Train Loss: 0.4421, Train Steps/Sec: 0.13, Epoch: 14.492075914693798, LR: 0.0003 +[2026-03-06 03:22:13] (step=0074070) Train Loss: 0.4394, Train Steps/Sec: 0.13, Epoch: 14.492271571121112, LR: 0.0003 +[2026-03-06 03:22:21] (step=0074071) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 14.492467227548424, LR: 0.0003 +[2026-03-06 03:22:29] (step=0074072) Train Loss: 0.4389, Train Steps/Sec: 0.13, Epoch: 14.492662883975738, LR: 0.0003 +[2026-03-06 03:22:37] (step=0074073) Train Loss: 0.4675, Train Steps/Sec: 0.13, Epoch: 14.492858540403052, LR: 0.0003 +[2026-03-06 03:22:45] (step=0074074) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 14.493054196830366, LR: 0.0003 +[2026-03-06 03:22:53] (step=0074075) Train Loss: 0.4468, Train Steps/Sec: 0.13, Epoch: 14.49324985325768, LR: 0.0003 +[2026-03-06 03:23:00] (step=0074076) Train Loss: 0.4464, Train Steps/Sec: 0.13, Epoch: 14.493445509684992, LR: 0.0003 +[2026-03-06 03:23:08] (step=0074077) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 14.493641166112306, LR: 0.0003 +[2026-03-06 03:23:16] (step=0074078) Train Loss: 0.4546, Train Steps/Sec: 0.13, Epoch: 14.49383682253962, LR: 0.0003 +[2026-03-06 03:23:24] (step=0074079) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 14.494032478966934, LR: 0.0003 +[2026-03-06 03:23:32] (step=0074080) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.494228135394248, LR: 0.0003 +[2026-03-06 03:23:40] (step=0074081) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 14.49442379182156, LR: 0.0003 +[2026-03-06 03:23:47] (step=0074082) Train Loss: 0.4440, Train Steps/Sec: 0.13, Epoch: 14.494619448248875, LR: 0.0003 +[2026-03-06 03:23:55] (step=0074083) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.494815104676189, LR: 0.0003 +[2026-03-06 03:24:03] (step=0074084) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 14.495010761103503, LR: 0.0003 +[2026-03-06 03:24:11] (step=0074085) Train Loss: 0.4297, Train Steps/Sec: 0.13, Epoch: 14.495206417530817, LR: 0.0003 +[2026-03-06 03:24:19] (step=0074086) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 14.495402073958129, LR: 0.0003 +[2026-03-06 03:24:27] (step=0074087) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 14.495597730385443, LR: 0.0003 +[2026-03-06 03:24:34] (step=0074088) Train Loss: 0.4549, Train Steps/Sec: 0.13, Epoch: 14.495793386812757, LR: 0.0003 +[2026-03-06 03:24:42] (step=0074089) Train Loss: 0.4405, Train Steps/Sec: 0.13, Epoch: 14.49598904324007, LR: 0.0003 +[2026-03-06 03:24:50] (step=0074090) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.496184699667385, LR: 0.0003 +[2026-03-06 03:24:58] (step=0074091) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.496380356094697, LR: 0.0003 +[2026-03-06 03:25:06] (step=0074092) Train Loss: 0.4318, Train Steps/Sec: 0.13, Epoch: 14.496576012522011, LR: 0.0003 +[2026-03-06 03:25:14] (step=0074093) Train Loss: 0.4387, Train Steps/Sec: 0.13, Epoch: 14.496771668949325, LR: 0.0003 +[2026-03-06 03:25:21] (step=0074094) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.49696732537664, LR: 0.0003 +[2026-03-06 03:25:29] (step=0074095) Train Loss: 0.4317, Train Steps/Sec: 0.13, Epoch: 14.497162981803953, LR: 0.0003 +[2026-03-06 03:25:37] (step=0074096) Train Loss: 0.4562, Train Steps/Sec: 0.13, Epoch: 14.497358638231265, LR: 0.0003 +[2026-03-06 03:25:45] (step=0074097) Train Loss: 0.4454, Train Steps/Sec: 0.13, Epoch: 14.49755429465858, LR: 0.0003 +[2026-03-06 03:25:53] (step=0074098) Train Loss: 0.4293, Train Steps/Sec: 0.13, Epoch: 14.497749951085893, LR: 0.0003 +[2026-03-06 03:26:01] (step=0074099) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.497945607513207, LR: 0.0003 +[2026-03-06 03:26:08] (step=0074100) Train Loss: 0.4519, Train Steps/Sec: 0.13, Epoch: 14.49814126394052, LR: 0.0003 +[2026-03-06 03:26:16] (step=0074101) Train Loss: 0.4361, Train Steps/Sec: 0.13, Epoch: 14.498336920367834, LR: 0.0003 +[2026-03-06 03:26:24] (step=0074102) Train Loss: 0.4408, Train Steps/Sec: 0.13, Epoch: 14.498532576795148, LR: 0.0003 +[2026-03-06 03:26:32] (step=0074103) Train Loss: 0.4447, Train Steps/Sec: 0.13, Epoch: 14.498728233222462, LR: 0.0003 +[2026-03-06 03:26:40] (step=0074104) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.498923889649776, LR: 0.0003 +[2026-03-06 03:26:48] (step=0074105) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 14.499119546077088, LR: 0.0003 +[2026-03-06 03:26:56] (step=0074106) Train Loss: 0.4349, Train Steps/Sec: 0.12, Epoch: 14.499315202504402, LR: 0.0003 +[2026-03-06 03:27:04] (step=0074107) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 14.499510858931716, LR: 0.0003 +[2026-03-06 03:27:11] (step=0074108) Train Loss: 0.4570, Train Steps/Sec: 0.13, Epoch: 14.49970651535903, LR: 0.0003 +[2026-03-06 03:27:19] (step=0074109) Train Loss: 0.4504, Train Steps/Sec: 0.13, Epoch: 14.499902171786344, LR: 0.0003 +[2026-03-06 03:27:27] (step=0074110) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 14.500097828213656, LR: 0.0003 +[2026-03-06 03:27:35] (step=0074111) Train Loss: 0.4404, Train Steps/Sec: 0.13, Epoch: 14.50029348464097, LR: 0.0003 +[2026-03-06 03:27:43] (step=0074112) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.500489141068284, LR: 0.0003 +[2026-03-06 03:27:51] (step=0074113) Train Loss: 0.4348, Train Steps/Sec: 0.13, Epoch: 14.500684797495598, LR: 0.0003 +[2026-03-06 03:27:59] (step=0074114) Train Loss: 0.4347, Train Steps/Sec: 0.13, Epoch: 14.500880453922912, LR: 0.0003 +[2026-03-06 03:28:06] (step=0074115) Train Loss: 0.4350, Train Steps/Sec: 0.13, Epoch: 14.501076110350224, LR: 0.0003 +[2026-03-06 03:28:14] (step=0074116) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.501271766777538, LR: 0.0003 +[2026-03-06 03:28:22] (step=0074117) Train Loss: 0.4480, Train Steps/Sec: 0.13, Epoch: 14.501467423204852, LR: 0.0003 +[2026-03-06 03:28:30] (step=0074118) Train Loss: 0.4503, Train Steps/Sec: 0.13, Epoch: 14.501663079632166, LR: 0.0003 +[2026-03-06 03:28:38] (step=0074119) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 14.50185873605948, LR: 0.0003 +[2026-03-06 03:28:46] (step=0074120) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.502054392486793, LR: 0.0003 +[2026-03-06 03:28:53] (step=0074121) Train Loss: 0.4514, Train Steps/Sec: 0.13, Epoch: 14.502250048914107, LR: 0.0003 +[2026-03-06 03:29:01] (step=0074122) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 14.50244570534142, LR: 0.0003 +[2026-03-06 03:29:09] (step=0074123) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 14.502641361768735, LR: 0.0003 +[2026-03-06 03:29:17] (step=0074124) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 14.502837018196047, LR: 0.0003 +[2026-03-06 03:29:25] (step=0074125) Train Loss: 0.4450, Train Steps/Sec: 0.13, Epoch: 14.50303267462336, LR: 0.0003 +[2026-03-06 03:29:33] (step=0074126) Train Loss: 0.4383, Train Steps/Sec: 0.13, Epoch: 14.503228331050675, LR: 0.0003 +[2026-03-06 03:29:40] (step=0074127) Train Loss: 0.4424, Train Steps/Sec: 0.13, Epoch: 14.503423987477989, LR: 0.0003 +[2026-03-06 03:29:48] (step=0074128) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 14.503619643905303, LR: 0.0003 +[2026-03-06 03:29:56] (step=0074129) Train Loss: 0.4430, Train Steps/Sec: 0.13, Epoch: 14.503815300332615, LR: 0.0003 +[2026-03-06 03:30:04] (step=0074130) Train Loss: 0.4411, Train Steps/Sec: 0.13, Epoch: 14.50401095675993, LR: 0.0003 +[2026-03-06 03:30:12] (step=0074131) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 14.504206613187243, LR: 0.0003 +[2026-03-06 03:30:20] (step=0074132) Train Loss: 0.4406, Train Steps/Sec: 0.13, Epoch: 14.504402269614557, LR: 0.0003 +[2026-03-06 03:30:28] (step=0074133) Train Loss: 0.4576, Train Steps/Sec: 0.13, Epoch: 14.504597926041871, LR: 0.0003 +[2026-03-06 03:30:35] (step=0074134) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 14.504793582469183, LR: 0.0003 +[2026-03-06 03:30:43] (step=0074135) Train Loss: 0.4492, Train Steps/Sec: 0.13, Epoch: 14.504989238896497, LR: 0.0003 +[2026-03-06 03:30:51] (step=0074136) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.505184895323811, LR: 0.0003 +[2026-03-06 03:30:59] (step=0074137) Train Loss: 0.4482, Train Steps/Sec: 0.13, Epoch: 14.505380551751125, LR: 0.0003 +[2026-03-06 03:31:07] (step=0074138) Train Loss: 0.4445, Train Steps/Sec: 0.13, Epoch: 14.50557620817844, LR: 0.0003 +[2026-03-06 03:31:15] (step=0074139) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 14.505771864605752, LR: 0.0003 +[2026-03-06 03:31:22] (step=0074140) Train Loss: 0.4449, Train Steps/Sec: 0.13, Epoch: 14.505967521033066, LR: 0.0003 +[2026-03-06 03:31:30] (step=0074141) Train Loss: 0.4366, Train Steps/Sec: 0.13, Epoch: 14.50616317746038, LR: 0.0003 +[2026-03-06 03:31:38] (step=0074142) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 14.506358833887694, LR: 0.0003 +[2026-03-06 03:31:46] (step=0074143) Train Loss: 0.4448, Train Steps/Sec: 0.13, Epoch: 14.506554490315008, LR: 0.0003 +[2026-03-06 03:31:54] (step=0074144) Train Loss: 0.4458, Train Steps/Sec: 0.13, Epoch: 14.50675014674232, LR: 0.0003 +[2026-03-06 03:32:02] (step=0074145) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.506945803169634, LR: 0.0003 +[2026-03-06 03:32:10] (step=0074146) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 14.507141459596948, LR: 0.0003 +[2026-03-06 03:32:17] (step=0074147) Train Loss: 0.4287, Train Steps/Sec: 0.13, Epoch: 14.507337116024262, LR: 0.0003 +[2026-03-06 03:32:25] (step=0074148) Train Loss: 0.4515, Train Steps/Sec: 0.13, Epoch: 14.507532772451576, LR: 0.0003 +[2026-03-06 03:32:33] (step=0074149) Train Loss: 0.4307, Train Steps/Sec: 0.13, Epoch: 14.507728428878888, LR: 0.0003 +[2026-03-06 03:32:41] (step=0074150) Train Loss: 0.4512, Train Steps/Sec: 0.13, Epoch: 14.507924085306202, LR: 0.0003 +[2026-03-06 03:32:49] (step=0074151) Train Loss: 0.4403, Train Steps/Sec: 0.13, Epoch: 14.508119741733516, LR: 0.0003 +[2026-03-06 03:32:57] (step=0074152) Train Loss: 0.4483, Train Steps/Sec: 0.13, Epoch: 14.50831539816083, LR: 0.0003 +[2026-03-06 03:33:05] (step=0074153) Train Loss: 0.4386, Train Steps/Sec: 0.13, Epoch: 14.508511054588142, LR: 0.0003 +[2026-03-06 03:33:12] (step=0074154) Train Loss: 0.4338, Train Steps/Sec: 0.13, Epoch: 14.508706711015456, LR: 0.0003 +[2026-03-06 03:33:20] (step=0074155) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.50890236744277, LR: 0.0003 +[2026-03-06 03:33:28] (step=0074156) Train Loss: 0.4494, Train Steps/Sec: 0.13, Epoch: 14.509098023870084, LR: 0.0003 +[2026-03-06 03:33:36] (step=0074157) Train Loss: 0.4489, Train Steps/Sec: 0.13, Epoch: 14.509293680297398, LR: 0.0003 +[2026-03-06 03:33:44] (step=0074158) Train Loss: 0.4414, Train Steps/Sec: 0.13, Epoch: 14.50948933672471, LR: 0.0003 +[2026-03-06 03:33:52] (step=0074159) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.509684993152025, LR: 0.0003 +[2026-03-06 03:34:00] (step=0074160) Train Loss: 0.4437, Train Steps/Sec: 0.13, Epoch: 14.509880649579339, LR: 0.0003 +[2026-03-06 03:34:08] (step=0074161) Train Loss: 0.4422, Train Steps/Sec: 0.13, Epoch: 14.510076306006653, LR: 0.0003 +[2026-03-06 03:34:15] (step=0074162) Train Loss: 0.4309, Train Steps/Sec: 0.13, Epoch: 14.510271962433967, LR: 0.0003 +[2026-03-06 03:34:23] (step=0074163) Train Loss: 0.4355, Train Steps/Sec: 0.13, Epoch: 14.510467618861279, LR: 0.0003 +[2026-03-06 03:34:31] (step=0074164) Train Loss: 0.4322, Train Steps/Sec: 0.13, Epoch: 14.510663275288593, LR: 0.0003 +[2026-03-06 03:34:39] (step=0074165) Train Loss: 0.4369, Train Steps/Sec: 0.13, Epoch: 14.510858931715907, LR: 0.0003 +[2026-03-06 03:34:47] (step=0074166) Train Loss: 0.4491, Train Steps/Sec: 0.13, Epoch: 14.511054588143221, LR: 0.0003 +[2026-03-06 03:34:55] (step=0074167) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 14.511250244570535, LR: 0.0003 +[2026-03-06 03:35:02] (step=0074168) Train Loss: 0.4339, Train Steps/Sec: 0.13, Epoch: 14.511445900997847, LR: 0.0003 +[2026-03-06 03:35:10] (step=0074169) Train Loss: 0.4439, Train Steps/Sec: 0.13, Epoch: 14.511641557425161, LR: 0.0003 +[2026-03-06 03:35:18] (step=0074170) Train Loss: 0.4507, Train Steps/Sec: 0.13, Epoch: 14.511837213852475, LR: 0.0003 +[2026-03-06 03:35:26] (step=0074171) Train Loss: 0.4529, Train Steps/Sec: 0.13, Epoch: 14.51203287027979, LR: 0.0003 +[2026-03-06 03:35:34] (step=0074172) Train Loss: 0.4396, Train Steps/Sec: 0.13, Epoch: 14.512228526707103, LR: 0.0003 +[2026-03-06 03:35:42] (step=0074173) Train Loss: 0.4410, Train Steps/Sec: 0.13, Epoch: 14.512424183134415, LR: 0.0003 +[2026-03-06 03:35:49] (step=0074174) Train Loss: 0.4372, Train Steps/Sec: 0.13, Epoch: 14.51261983956173, LR: 0.0003 +[2026-03-06 03:35:57] (step=0074175) Train Loss: 0.4435, Train Steps/Sec: 0.13, Epoch: 14.512815495989043, LR: 0.0003 +[2026-03-06 03:36:05] (step=0074176) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.513011152416357, LR: 0.0003 +[2026-03-06 03:36:13] (step=0074177) Train Loss: 0.4511, Train Steps/Sec: 0.13, Epoch: 14.51320680884367, LR: 0.0003 +[2026-03-06 03:36:21] (step=0074178) Train Loss: 0.4320, Train Steps/Sec: 0.13, Epoch: 14.513402465270984, LR: 0.0003 +[2026-03-06 03:36:29] (step=0074179) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.513598121698298, LR: 0.0003 +[2026-03-06 03:36:36] (step=0074180) Train Loss: 0.4493, Train Steps/Sec: 0.13, Epoch: 14.513793778125612, LR: 0.0003 +[2026-03-06 03:36:44] (step=0074181) Train Loss: 0.4475, Train Steps/Sec: 0.13, Epoch: 14.513989434552926, LR: 0.0003 +[2026-03-06 03:36:52] (step=0074182) Train Loss: 0.4582, Train Steps/Sec: 0.13, Epoch: 14.514185090980238, LR: 0.0003 +[2026-03-06 03:37:00] (step=0074183) Train Loss: 0.4327, Train Steps/Sec: 0.13, Epoch: 14.514380747407552, LR: 0.0003 +[2026-03-06 03:37:08] (step=0074184) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.514576403834866, LR: 0.0003 +[2026-03-06 03:37:16] (step=0074185) Train Loss: 0.4431, Train Steps/Sec: 0.13, Epoch: 14.51477206026218, LR: 0.0003 +[2026-03-06 03:37:24] (step=0074186) Train Loss: 0.4335, Train Steps/Sec: 0.13, Epoch: 14.514967716689494, LR: 0.0003 +[2026-03-06 03:37:31] (step=0074187) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 14.515163373116806, LR: 0.0003 +[2026-03-06 03:37:39] (step=0074188) Train Loss: 0.4544, Train Steps/Sec: 0.13, Epoch: 14.51535902954412, LR: 0.0003 +[2026-03-06 03:37:47] (step=0074189) Train Loss: 0.4429, Train Steps/Sec: 0.13, Epoch: 14.515554685971434, LR: 0.0003 +[2026-03-06 03:37:55] (step=0074190) Train Loss: 0.4315, Train Steps/Sec: 0.13, Epoch: 14.515750342398748, LR: 0.0003 +[2026-03-06 03:38:03] (step=0074191) Train Loss: 0.4352, Train Steps/Sec: 0.13, Epoch: 14.515945998826062, LR: 0.0003 +[2026-03-06 03:38:11] (step=0074192) Train Loss: 0.4399, Train Steps/Sec: 0.13, Epoch: 14.516141655253374, LR: 0.0003 +[2026-03-06 03:38:18] (step=0074193) Train Loss: 0.4443, Train Steps/Sec: 0.13, Epoch: 14.516337311680688, LR: 0.0003 +[2026-03-06 03:38:26] (step=0074194) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 14.516532968108002, LR: 0.0003 +[2026-03-06 03:38:34] (step=0074195) Train Loss: 0.4337, Train Steps/Sec: 0.13, Epoch: 14.516728624535316, LR: 0.0003 +[2026-03-06 03:38:42] (step=0074196) Train Loss: 0.4527, Train Steps/Sec: 0.13, Epoch: 14.51692428096263, LR: 0.0003 +[2026-03-06 03:38:50] (step=0074197) Train Loss: 0.4368, Train Steps/Sec: 0.13, Epoch: 14.517119937389943, LR: 0.0003 +[2026-03-06 03:38:58] (step=0074198) Train Loss: 0.4313, Train Steps/Sec: 0.13, Epoch: 14.517315593817257, LR: 0.0003 +[2026-03-06 03:39:06] (step=0074199) Train Loss: 0.4473, Train Steps/Sec: 0.13, Epoch: 14.51751125024457, LR: 0.0003 +[2026-03-06 03:39:13] (step=0074200) Train Loss: 0.4465, Train Steps/Sec: 0.13, Epoch: 14.517706906671885, LR: 0.0003 +[2026-03-06 03:39:21] (step=0074201) Train Loss: 0.4434, Train Steps/Sec: 0.13, Epoch: 14.517902563099199, LR: 0.0003 +[2026-03-06 03:39:29] (step=0074202) Train Loss: 0.4474, Train Steps/Sec: 0.13, Epoch: 14.518098219526511, LR: 0.0003 +[2026-03-06 03:39:37] (step=0074203) Train Loss: 0.4416, Train Steps/Sec: 0.13, Epoch: 14.518293875953825, LR: 0.0003 +[2026-03-06 03:39:45] (step=0074204) Train Loss: 0.4373, Train Steps/Sec: 0.13, Epoch: 14.518489532381139, LR: 0.0003 +[2026-03-06 03:39:53] (step=0074205) Train Loss: 0.4506, Train Steps/Sec: 0.13, Epoch: 14.518685188808453, LR: 0.0003 +[2026-03-06 03:40:01] (step=0074206) Train Loss: 0.4456, Train Steps/Sec: 0.13, Epoch: 14.518880845235765, LR: 0.0003 +[2026-03-06 03:40:08] (step=0074207) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 14.51907650166308, LR: 0.0003 +[2026-03-06 03:40:16] (step=0074208) Train Loss: 0.4462, Train Steps/Sec: 0.13, Epoch: 14.519272158090393, LR: 0.0003 +[2026-03-06 03:40:24] (step=0074209) Train Loss: 0.4484, Train Steps/Sec: 0.13, Epoch: 14.519467814517707, LR: 0.0003 +[2026-03-06 03:40:32] (step=0074210) Train Loss: 0.4530, Train Steps/Sec: 0.13, Epoch: 14.519663470945021, LR: 0.0003 +[2026-03-06 03:40:40] (step=0074211) Train Loss: 0.4413, Train Steps/Sec: 0.13, Epoch: 14.519859127372333, LR: 0.0003 +[2026-03-06 03:40:48] (step=0074212) Train Loss: 0.4476, Train Steps/Sec: 0.13, Epoch: 14.520054783799647, LR: 0.0003 +[2026-03-06 03:40:56] (step=0074213) Train Loss: 0.4498, Train Steps/Sec: 0.13, Epoch: 14.520250440226961, LR: 0.0003 +[2026-03-06 03:41:03] (step=0074214) Train Loss: 0.4602, Train Steps/Sec: 0.13, Epoch: 14.520446096654275, LR: 0.0003 +[2026-03-06 03:41:11] (step=0074215) Train Loss: 0.4409, Train Steps/Sec: 0.13, Epoch: 14.52064175308159, LR: 0.0003 +[2026-03-06 03:41:19] (step=0074216) Train Loss: 0.4463, Train Steps/Sec: 0.13, Epoch: 14.520837409508902, LR: 0.0003 +[2026-03-06 03:41:27] (step=0074217) Train Loss: 0.4316, Train Steps/Sec: 0.13, Epoch: 14.521033065936216, LR: 0.0003 +[2026-03-06 03:41:35] (step=0074218) Train Loss: 0.4407, Train Steps/Sec: 0.13, Epoch: 14.52122872236353, LR: 0.0003 +[2026-03-06 03:41:43] (step=0074219) Train Loss: 0.4471, Train Steps/Sec: 0.13, Epoch: 14.521424378790844, LR: 0.0003 +[2026-03-06 03:41:50] (step=0074220) Train Loss: 0.4432, Train Steps/Sec: 0.13, Epoch: 14.521620035218158, LR: 0.0003 +[2026-03-06 03:41:58] (step=0074221) Train Loss: 0.4499, Train Steps/Sec: 0.13, Epoch: 14.52181569164547, LR: 0.0003 +[2026-03-06 03:42:06] (step=0074222) Train Loss: 0.4481, Train Steps/Sec: 0.13, Epoch: 14.522011348072784, LR: 0.0003 +[2026-03-06 03:42:14] (step=0074223) Train Loss: 0.4378, Train Steps/Sec: 0.13, Epoch: 14.522207004500098, LR: 0.0003 diff --git a/cxr_finetune_lora/train_args.json b/cxr_finetune_lora/train_args.json new file mode 100644 index 0000000000000000000000000000000000000000..be9d4ff1891b3425f37f86cc333dcbbe94b7b22a --- /dev/null +++ b/cxr_finetune_lora/train_args.json @@ -0,0 +1 @@ +{"results_dir": "./results/cxr_finetune_lora", "model_name_or_path": "Shitao/OmniGen-v1", "json_file": "/raid/home/CAMCA/hj880/wt/dataset/cxr_sythn/anno/cxr_synth_anno_train.jsonl", "image_path": "./", "epochs": 100, "batch_size_per_device": 128, "vae_path": null, "num_workers": 2, "log_every": 1, "ckpt_every": 500, "max_grad_norm": 1.0, "lr": 0.0003, "max_input_length_limit": 18000, "condition_dropout_prob": 0.01, "adam_weight_decay": 0.0, "keep_raw_resolution": true, "max_image_size": 1024, "use_lora": true, "lora_rank": 8, "use_ema": false, "lr_scheduler": "constant", "lr_warmup_steps": 1000, "report_to": "tensorboard", "mixed_precision": "bf16", "gradient_accumulation_steps": 1} \ No newline at end of file diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0000500/README.md b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0000500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0000500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0000500/adapter_config.json b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0000500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4fa5936d4a19a7af15ecbdac9a7ce2c6749f723d --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0000500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "qkv_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0000500/adapter_model.safetensors b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0000500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4b6aff0d47991ca99f5042e6dfc834c0a643c44 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0000500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eabef8545173b61dfbb45449feb36d5d38c6486ec6a462821c6a2556b72ab4bd +size 9454048 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001000/README.md b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001000/adapter_config.json b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4fa5936d4a19a7af15ecbdac9a7ce2c6749f723d --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "qkv_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001000/adapter_model.safetensors b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f3a9bfc2ef0abaf46071d89d40ffd22065191f7 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:171c19d7640c37ca9cec177dc9a45665329e54ceabe0c96f39e1f2f67e034c1a +size 9454048 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001500/README.md b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001500/adapter_config.json b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4fa5936d4a19a7af15ecbdac9a7ce2c6749f723d --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "qkv_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001500/adapter_model.safetensors b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dddd0623787b0c690c13b3c15459c7427ac8da1a --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0068e36c908c8e3fbeebee410ab634bfba43e639d50102ae677d6883c6fb1e74 +size 9454048 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002000/README.md b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002000/adapter_config.json b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4fa5936d4a19a7af15ecbdac9a7ce2c6749f723d --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "qkv_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002000/adapter_model.safetensors b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..960053f3b4729bdd98a1a08c0624afdd978dc7a8 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e79b2aad04f4dbcf21f3a7894f080dca74e04906d6c406864679432b765a2915 +size 9454048 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002500/README.md b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002500/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002500/adapter_config.json b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4fa5936d4a19a7af15ecbdac9a7ce2c6749f723d --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002500/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "qkv_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002500/adapter_model.safetensors b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f4a3e73c7d326f8d956fa5ffe883641a0f1a247 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43fb1e10e2bb7893da0734f65cc813346a03139c2468096c3c06e3c2e5bec1c0 +size 9454048 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0003000/README.md b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0003000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..86c9eedd3aa734a240cfacc101cd21b927d397cd --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0003000/README.md @@ -0,0 +1,203 @@ +--- +library_name: peft +tags: +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0003000/adapter_config.json b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0003000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4fa5936d4a19a7af15ecbdac9a7ce2c6749f723d --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0003000/adapter_config.json @@ -0,0 +1,44 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "OmniGen", + "parent_library": "OmniGen.model" + }, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": "gaussian", + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "qkv_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0003000/adapter_model.safetensors b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0003000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7dfc406e288256d55f5fb7e9fb641ded855249ac --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0003000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2535e3d6a8d039657f753c8161456d42842f1c3c18cb3715dca455401655a4d +size 9454048 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/model.safetensors b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b526e32657423921db1764e86f5d6b319428494 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:856c23c9306ee887253d9479fc6d329108d76d67b08dfdad8a2f1c738bc46cb6 +size 7760121672 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/optimizer.bin b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/optimizer.bin new file mode 100644 index 0000000000000000000000000000000000000000..17560d26a098a59252c23714d4ad22f2a98d65c2 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/optimizer.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d5cb54692c273c7a6cf29342d84614df439e3de87b4d5bc724f9aef8a89311 +size 18982010 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/random_states_0.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/random_states_0.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b9e37846fe4056f34e9184a1dcb94e6550b4be9b --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/random_states_0.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3364febd63b1b8392ee21b93940847b5ca7e69a7ef348fbdfe3ffa454f7998a +size 15060 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/random_states_1.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/random_states_1.pkl new file mode 100644 index 0000000000000000000000000000000000000000..85c15e1b711ed6c1421c71001610b8e09c9082c2 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/random_states_1.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcd2ec2e2a35bb11aa59c63fdb6cf8a779c87379fa27f1d3009d0b889b8a822f +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/random_states_2.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/random_states_2.pkl new file mode 100644 index 0000000000000000000000000000000000000000..c646b2b7f582d411e6b2354ae137e9ad732b068d --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/random_states_2.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:025b8c99106247cdf5357d6632acbfbd66156d629b48497fa9973407eef6269f +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/random_states_3.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/random_states_3.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5f334345ed8e694ed23d77e1b1f3ea958abc490e --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/random_states_3.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c360d83516a3918465097f0d39771110d8a89a876d934275b783ba937f09263 +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/scheduler.bin b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/scheduler.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b94020292222ea6f8e1a598eec2693ca6976ebf --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/scheduler.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2310f63637c6cf3848fe289e21d70c2e6b3542292bf5c5d51e6dab0afab209f7 +size 1000 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/model.safetensors b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1039811b4d1b36ac3e29b18a4a46830a868510da --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37bff60d85e832894964db80945c0298da1fde61d50d3a4337703e1a8a4d7ae5 +size 7760121672 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/optimizer.bin b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/optimizer.bin new file mode 100644 index 0000000000000000000000000000000000000000..03a596f668c8b98cf23bd7c8950599221b1d4de8 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/optimizer.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edd6d30fbc9efb9bc855174938a6f61253132821dbcf8ba26f86288fae5aff16 +size 18982010 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/random_states_0.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/random_states_0.pkl new file mode 100644 index 0000000000000000000000000000000000000000..65eb29f17b7830521bf6aa357eda9f9e00ec7547 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/random_states_0.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0950644e21cef8a11794a832919ed58e28f84da57997fb25b8bc2e3638dc4bc +size 15060 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/random_states_1.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/random_states_1.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6e5488b884202f7451941e231aad61fe29c3c0c0 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/random_states_1.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37e4c7c595d689ede2f826fc9903510cbcaef5a2fb09147b390ae96be5dba782 +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/random_states_2.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/random_states_2.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3dc500e7145bcdbc6d76a858df3af244c249c132 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/random_states_2.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77dc2b77646536a73e886dc534628a9e729af4410a93cc6339c4375fb140f567 +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/random_states_3.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/random_states_3.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e0432dc1acce6f4b5943bed0570c7e14af591fdb --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/random_states_3.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2ca654b7cda64716b2e60f2dfc94d4ebfee581399cd841375c4aec2b0d3e2af +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/scheduler.bin b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/scheduler.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe9cc268208d251a624292f6e5165c9a40886a57 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/scheduler.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42516625df2678a0f87de778a3f0ead58da148ac4f25fc5c6212c01a977100d6 +size 1000 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/model.safetensors b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab79a68332db96719d151414bb93084df2babca1 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f0f3ae845df5205f856907c028125d1a288ceb481467753263388f427c0f96a +size 7760121672 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/optimizer.bin b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/optimizer.bin new file mode 100644 index 0000000000000000000000000000000000000000..d5dd7528f007c2266d234845e89d943bce1a4b96 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/optimizer.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6f57a9dce2a9490cfa1347b5cb2b2cc9c5f5e07d6ac4a92380860f773bf70f +size 18982010 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/random_states_0.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/random_states_0.pkl new file mode 100644 index 0000000000000000000000000000000000000000..61887c8cb3098e8b8a70e6b241378b9f9f935450 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/random_states_0.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70668de652320d67ff31cd59b4894ff9a11a0fd32592faa348c2e78d9b4b3ad1 +size 15060 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/random_states_1.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/random_states_1.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9488e8b07dc1792551b34132392d83895f644d5c --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/random_states_1.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ffff378de8c4b5fb2e23d5846182bb1e3c0b32258bd14e8dcfe7d83190f408b +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/random_states_2.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/random_states_2.pkl new file mode 100644 index 0000000000000000000000000000000000000000..eba9b50e7ee0a76ce7e116ba5cb94b9f5a17cf74 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/random_states_2.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebf0997b68902bb48490b10a8e8e58076c3f993b24d8795fdb57c94d14a9e194 +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/random_states_3.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/random_states_3.pkl new file mode 100644 index 0000000000000000000000000000000000000000..acd735d208b91d652b16400d42a01aa4c9336215 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/random_states_3.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fc1ffa0dc6b3797f968a24eb20aa4c9dba8446708799fac0978f807196e0393 +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/scheduler.bin b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/scheduler.bin new file mode 100644 index 0000000000000000000000000000000000000000..a28552ca94bd7af14ba63ad5fae06bb17fa4c454 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/scheduler.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18dcbef2f94efbfbf0d21e52f8edce7c027f51fde92c6a88e6b0f0d961476be0 +size 1000 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/model.safetensors b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0f83671b44b4de735484e51206cf29b1bfad8e2 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f0818f51c7884fcbff688fe411c94f413211e39bed5520f9c62d5b9175651b1 +size 7760121672 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/optimizer.bin b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/optimizer.bin new file mode 100644 index 0000000000000000000000000000000000000000..6b336ef47b26ea29d16adbb4446510757cd2915b --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/optimizer.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc687fcb1ae8d80917be7b4ae8e8876f87e8ff443aeb011f88de5a1c00be4311 +size 18982010 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/random_states_0.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/random_states_0.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b764e7a2c62bd20518d5bdd66c35bc61351cf552 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/random_states_0.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e9f24c1fbcde944c4b693f334802c9c8211d6c6e3c93b1493a7f5939112cfc6 +size 15060 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/random_states_1.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/random_states_1.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0a1c43c2fdab6d2a9816275b89ac6d341fdc42d1 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/random_states_1.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d17dc5d234f688a0ab5757f6db1c1ef446922b4bf25e57404b773b8931e6dce +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/random_states_2.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/random_states_2.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3259030716493f95ca22ab5cbb38e5d190e963ea --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/random_states_2.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57b9a005d743d1cd5346f2a6c2a12c2e8811631428a1bdfe8c583e948fb93606 +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/random_states_3.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/random_states_3.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0822e5f6cf5cbe244250d9963809f7fef59f54e8 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/random_states_3.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac5f28ae0d350505d4da59ac15b66147ee62b7816c95975563472e3144cf7c0 +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/scheduler.bin b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/scheduler.bin new file mode 100644 index 0000000000000000000000000000000000000000..c97e6ae54bcf1b124d4c0e48d148952673d2edcc --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/scheduler.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2bd2fea55575e9e5201cd70f190230d22db4f67ec868e253d7e182dd49a19ee +size 1000 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/model.safetensors b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bebb8f08c39c569c8e52137291d3c6cddbd17445 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd60af275d212f8b2ff47eafb93297cd1899ef71e6529c133913feee3c585814 +size 7760121672 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/optimizer.bin b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/optimizer.bin new file mode 100644 index 0000000000000000000000000000000000000000..c8dd1c11db43f1d519fb8f65f25bc44874de05db --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/optimizer.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:837117d5fdfee4b0bfc991d1834f3ae468eacfe975d8871e750c1f4a591df575 +size 18982010 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/random_states_0.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/random_states_0.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0022830dbfbdc529331646d7c071f059269b2020 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/random_states_0.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2a100d5eaa07d7394ca848c68838faa245b2008353f83f9c19ca2676f167f9 +size 15060 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/random_states_1.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/random_states_1.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4d313d3ac644d386cbfe0492de5c517ce7369eec --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/random_states_1.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1488b5f663750d9eb317926a7db2d259ac5956e256d69a26b6d407bd2b20a20 +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/random_states_2.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/random_states_2.pkl new file mode 100644 index 0000000000000000000000000000000000000000..c8c704d6f80a23c437ec56da5eef2c28fd667a72 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/random_states_2.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3ecaedc08b1ec9d08b2fc3b0de230ff8a56b3ab133fe4d780ba0addbfd547f7 +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/random_states_3.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/random_states_3.pkl new file mode 100644 index 0000000000000000000000000000000000000000..967aeddf4b6df06b91f9cc4401f4a2ba4bfdb93c --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/random_states_3.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee10499d8d28ab59d97054828f6f7f5d3bda7d4a562c758ae801e027ae858b4a +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/scheduler.bin b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/scheduler.bin new file mode 100644 index 0000000000000000000000000000000000000000..6bdf17af1f99c81f133601e635d46318d3a4eee8 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/scheduler.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62fa42bc0f349bf17974b501b2e04e150f941778516a7dc2dafa712c6dbb5f91 +size 1000 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/model.safetensors b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba6cc2f6480e4087cac25c27f9db9e4d552b2a32 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:769f7c78162277533c0f4189c46cd7a45c27738d6a24d2564d3fa44477fbebef +size 7760121672 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/optimizer.bin b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/optimizer.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff2c6a569a612c3a296fa41d55274246c068feff --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/optimizer.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5fbbbf1cfb995c8246d59b94a106600132e7397cf2c9974eb53f44f70a56cf5 +size 18982010 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/random_states_0.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/random_states_0.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3735dc7bab11cbfd089c7fdccd2229c5876383be --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/random_states_0.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:642f6253586c3cbd6621f62d7a2dc81202e1b0b220adabc1e008d656b8fbe410 +size 15060 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/random_states_1.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/random_states_1.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f2488d457214b77eec1edac6e4eca9b898718d3e --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/random_states_1.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36dce826f6ebfbb4c43cf178ba7a580cb92718f016cec4f2dbb5a9090d0c0d1e +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/random_states_2.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/random_states_2.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ea0d9f350f6a3739b1404ee5505495071b93e5ae --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/random_states_2.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f116948dc84be7108813f79d265c2514bef14090ac578dc70a49b9977ebbbccb +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/random_states_3.pkl b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/random_states_3.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e20973bb17e0016531af13bf522e360b6c5bf59a --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/random_states_3.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:546c224c3b41e66f915af8ffc10f701a7e0ac771414febe79204950dacb64cbd +size 15124 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/scheduler.bin b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/scheduler.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad8add0011b4489817c78628cca0b949c492cbf3 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/scheduler.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a14cab18e3cf6bf4020ba19261b57bd47f02eb286b8412b0d80f0e1c055663a +size 1000 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/log.txt b/cxr_finetune_lora_30ksteps_maskmse_timefilter/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..22e5a2740d06c4ab826adff1076a37b338504658 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/log.txt @@ -0,0 +1,3139 @@ +[2026-04-14 09:26:52] Experiment directory created at ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter +[2026-04-14 09:26:52] Downloaded model to /home/wenting/.cache/huggingface/hub/models--Shitao--OmniGen-v1/snapshots/58e249c7c7634423c0ba41c34a774af79aa87889 +[2026-04-14 09:26:52] Downloaded model to /home/wenting/.cache/huggingface/hub/models--Shitao--OmniGen-v1/snapshots/58e249c7c7634423c0ba41c34a774af79aa87889 +[2026-04-14 09:26:52] Downloaded model to /home/wenting/.cache/huggingface/hub/models--Shitao--OmniGen-v1/snapshots/58e249c7c7634423c0ba41c34a774af79aa87889 +[2026-04-14 09:26:52] Downloaded model to /home/wenting/.cache/huggingface/hub/models--Shitao--OmniGen-v1/snapshots/58e249c7c7634423c0ba41c34a774af79aa87889 +[2026-04-14 09:27:40] Loading segmentation model from /home/wenting/zr/Segmentation/checkpoints/train_Seg/best_anatomy_model.pth +[2026-04-14 09:27:40] Segmentation model loaded. lambda_anatomy=1.0, subbatch=16 +[2026-04-14 09:27:42] Loading LoRA adapter weights from /home/wenting/zr/gen_code/results/cxr_finetune_lora/checkpoints/0030000/ +[2026-04-14 09:27:42] Optimizer will be created fresh (no momentum transfer). +[2026-04-14 09:27:42] Loaded 128 LoRA parameter tensors. +[2026-04-14 09:27:43] Dataset contains 1,308,627 +[2026-04-14 09:27:48] Training for 10 epochs... +[2026-04-14 09:27:48] Beginning epoch 0... +[2026-04-14 09:28:10] (step=0000001) Loss: 0.4951 (diff=0.4464, anat=0.0487), Steps/Sec: 0.04, Epoch: 0.00, LR: 2.0000000000000002e-07 +[2026-04-14 09:28:29] (step=0000002) Loss: 0.4907 (diff=0.4448, anat=0.0459), Steps/Sec: 0.05, Epoch: 0.00, LR: 4.0000000000000003e-07 +[2026-04-14 09:28:48] (step=0000003) Loss: 0.4840 (diff=0.4377, anat=0.0463), Steps/Sec: 0.05, Epoch: 0.00, LR: 6.000000000000001e-07 +[2026-04-14 09:29:07] (step=0000004) Loss: 0.5098 (diff=0.4582, anat=0.0517), Steps/Sec: 0.05, Epoch: 0.00, LR: 8.000000000000001e-07 +[2026-04-14 09:29:27] (step=0000005) Loss: 0.4899 (diff=0.4447, anat=0.0452), Steps/Sec: 0.05, Epoch: 0.00, LR: 1.0000000000000002e-06 +[2026-04-14 09:29:47] (step=0000006) Loss: 0.4986 (diff=0.4507, anat=0.0479), Steps/Sec: 0.05, Epoch: 0.00, LR: 1.2000000000000002e-06 +[2026-04-14 09:30:07] (step=0000007) Loss: 0.4871 (diff=0.4404, anat=0.0467), Steps/Sec: 0.05, Epoch: 0.00, LR: 1.4000000000000001e-06 +[2026-04-14 09:30:26] (step=0000008) Loss: 0.4905 (diff=0.4459, anat=0.0445), Steps/Sec: 0.05, Epoch: 0.00, LR: 1.6000000000000001e-06 +[2026-04-14 09:30:46] (step=0000009) Loss: 0.5080 (diff=0.4593, anat=0.0487), Steps/Sec: 0.05, Epoch: 0.00, LR: 1.8e-06 +[2026-04-14 09:31:06] (step=0000010) Loss: 0.4992 (diff=0.4498, anat=0.0494), Steps/Sec: 0.05, Epoch: 0.00, LR: 2.0000000000000003e-06 +[2026-04-14 09:31:25] (step=0000011) Loss: 0.4988 (diff=0.4500, anat=0.0487), Steps/Sec: 0.05, Epoch: 0.00, LR: 2.2e-06 +[2026-04-14 09:31:45] (step=0000012) Loss: 0.4845 (diff=0.4371, anat=0.0473), Steps/Sec: 0.05, Epoch: 0.00, LR: 2.4000000000000003e-06 +[2026-04-14 09:32:05] (step=0000013) Loss: 0.4948 (diff=0.4467, anat=0.0481), Steps/Sec: 0.05, Epoch: 0.00, LR: 2.6e-06 +[2026-04-14 09:32:24] (step=0000014) Loss: 0.4951 (diff=0.4498, anat=0.0453), Steps/Sec: 0.05, Epoch: 0.00, LR: 2.8000000000000003e-06 +[2026-04-14 09:32:44] (step=0000015) Loss: 0.5040 (diff=0.4530, anat=0.0510), Steps/Sec: 0.05, Epoch: 0.00, LR: 3e-06 +[2026-04-14 09:33:04] (step=0000016) Loss: 0.4895 (diff=0.4432, anat=0.0463), Steps/Sec: 0.05, Epoch: 0.00, LR: 3.2000000000000003e-06 +[2026-04-14 09:33:23] (step=0000017) Loss: 0.4817 (diff=0.4372, anat=0.0446), Steps/Sec: 0.05, Epoch: 0.00, LR: 3.4000000000000005e-06 +[2026-04-14 09:33:43] (step=0000018) Loss: 0.4839 (diff=0.4378, anat=0.0461), Steps/Sec: 0.05, Epoch: 0.00, LR: 3.6e-06 +[2026-04-14 09:34:03] (step=0000019) Loss: 0.5044 (diff=0.4535, anat=0.0510), Steps/Sec: 0.05, Epoch: 0.00, LR: 3.8e-06 +[2026-04-14 09:34:22] (step=0000020) Loss: 0.4907 (diff=0.4407, anat=0.0500), Steps/Sec: 0.05, Epoch: 0.00, LR: 4.000000000000001e-06 +[2026-04-14 09:34:42] (step=0000021) Loss: 0.4915 (diff=0.4433, anat=0.0482), Steps/Sec: 0.05, Epoch: 0.00, LR: 4.2000000000000004e-06 +[2026-04-14 09:35:02] (step=0000022) Loss: 0.5042 (diff=0.4559, anat=0.0483), Steps/Sec: 0.05, Epoch: 0.00, LR: 4.4e-06 +[2026-04-14 09:35:21] (step=0000023) Loss: 0.4982 (diff=0.4491, anat=0.0490), Steps/Sec: 0.05, Epoch: 0.00, LR: 4.6e-06 +[2026-04-14 09:35:41] (step=0000024) Loss: 0.4921 (diff=0.4448, anat=0.0473), Steps/Sec: 0.05, Epoch: 0.00, LR: 4.800000000000001e-06 +[2026-04-14 09:36:01] (step=0000025) Loss: 0.4991 (diff=0.4475, anat=0.0516), Steps/Sec: 0.05, Epoch: 0.00, LR: 5e-06 +[2026-04-14 09:36:20] (step=0000026) Loss: 0.4815 (diff=0.4360, anat=0.0455), Steps/Sec: 0.05, Epoch: 0.01, LR: 5.2e-06 +[2026-04-14 09:36:40] (step=0000027) Loss: 0.4906 (diff=0.4452, anat=0.0454), Steps/Sec: 0.05, Epoch: 0.01, LR: 5.4e-06 +[2026-04-14 09:37:00] (step=0000028) Loss: 0.4909 (diff=0.4444, anat=0.0465), Steps/Sec: 0.05, Epoch: 0.01, LR: 5.600000000000001e-06 +[2026-04-14 09:37:19] (step=0000029) Loss: 0.4964 (diff=0.4484, anat=0.0480), Steps/Sec: 0.05, Epoch: 0.01, LR: 5.8e-06 +[2026-04-14 09:37:39] (step=0000030) Loss: 0.4940 (diff=0.4454, anat=0.0486), Steps/Sec: 0.05, Epoch: 0.01, LR: 6e-06 +[2026-04-14 09:37:59] (step=0000031) Loss: 0.4944 (diff=0.4495, anat=0.0448), Steps/Sec: 0.05, Epoch: 0.01, LR: 6.2e-06 +[2026-04-14 09:38:18] (step=0000032) Loss: 0.5000 (diff=0.4515, anat=0.0485), Steps/Sec: 0.05, Epoch: 0.01, LR: 6.4000000000000006e-06 +[2026-04-14 09:38:38] (step=0000033) Loss: 0.4914 (diff=0.4437, anat=0.0476), Steps/Sec: 0.05, Epoch: 0.01, LR: 6.6e-06 +[2026-04-14 09:38:58] (step=0000034) Loss: 0.4928 (diff=0.4458, anat=0.0470), Steps/Sec: 0.05, Epoch: 0.01, LR: 6.800000000000001e-06 +[2026-04-14 09:39:17] (step=0000035) Loss: 0.4983 (diff=0.4502, anat=0.0481), Steps/Sec: 0.05, Epoch: 0.01, LR: 7.000000000000001e-06 +[2026-04-14 09:39:37] (step=0000036) Loss: 0.4909 (diff=0.4429, anat=0.0480), Steps/Sec: 0.05, Epoch: 0.01, LR: 7.2e-06 +[2026-04-14 09:39:57] (step=0000037) Loss: 0.4849 (diff=0.4411, anat=0.0438), Steps/Sec: 0.05, Epoch: 0.01, LR: 7.4e-06 +[2026-04-14 09:40:16] (step=0000038) Loss: 0.4841 (diff=0.4404, anat=0.0436), Steps/Sec: 0.05, Epoch: 0.01, LR: 7.6e-06 +[2026-04-14 09:40:36] (step=0000039) Loss: 0.4905 (diff=0.4455, anat=0.0450), Steps/Sec: 0.05, Epoch: 0.01, LR: 7.8e-06 +[2026-04-14 09:40:56] (step=0000040) Loss: 0.4872 (diff=0.4417, anat=0.0456), Steps/Sec: 0.05, Epoch: 0.01, LR: 8.000000000000001e-06 +[2026-04-14 09:41:15] (step=0000041) Loss: 0.5014 (diff=0.4533, anat=0.0481), Steps/Sec: 0.05, Epoch: 0.01, LR: 8.200000000000001e-06 +[2026-04-14 09:41:35] (step=0000042) Loss: 0.5008 (diff=0.4554, anat=0.0454), Steps/Sec: 0.05, Epoch: 0.01, LR: 8.400000000000001e-06 +[2026-04-14 09:41:55] (step=0000043) Loss: 0.5190 (diff=0.4694, anat=0.0495), Steps/Sec: 0.05, Epoch: 0.01, LR: 8.599999999999999e-06 +[2026-04-14 09:42:14] (step=0000044) Loss: 0.4939 (diff=0.4472, anat=0.0467), Steps/Sec: 0.05, Epoch: 0.01, LR: 8.8e-06 +[2026-04-14 09:42:34] (step=0000045) Loss: 0.4954 (diff=0.4507, anat=0.0447), Steps/Sec: 0.05, Epoch: 0.01, LR: 9e-06 +[2026-04-14 09:42:54] (step=0000046) Loss: 0.4923 (diff=0.4436, anat=0.0486), Steps/Sec: 0.05, Epoch: 0.01, LR: 9.2e-06 +[2026-04-14 09:43:13] (step=0000047) Loss: 0.4832 (diff=0.4404, anat=0.0429), Steps/Sec: 0.05, Epoch: 0.01, LR: 9.4e-06 +[2026-04-14 09:43:33] (step=0000048) Loss: 0.4980 (diff=0.4497, anat=0.0483), Steps/Sec: 0.05, Epoch: 0.01, LR: 9.600000000000001e-06 +[2026-04-14 09:43:53] (step=0000049) Loss: 0.4886 (diff=0.4423, anat=0.0463), Steps/Sec: 0.05, Epoch: 0.01, LR: 9.800000000000001e-06 +[2026-04-14 09:44:12] (step=0000050) Loss: 0.4987 (diff=0.4507, anat=0.0480), Steps/Sec: 0.05, Epoch: 0.01, LR: 1e-05 +[2026-04-14 09:44:32] (step=0000051) Loss: 0.4942 (diff=0.4495, anat=0.0447), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.02e-05 +[2026-04-14 09:44:52] (step=0000052) Loss: 0.4932 (diff=0.4479, anat=0.0453), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.04e-05 +[2026-04-14 09:45:11] (step=0000053) Loss: 0.4977 (diff=0.4516, anat=0.0461), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.06e-05 +[2026-04-14 09:45:31] (step=0000054) Loss: 0.4942 (diff=0.4475, anat=0.0467), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.08e-05 +[2026-04-14 09:45:51] (step=0000055) Loss: 0.5003 (diff=0.4549, anat=0.0453), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.1000000000000001e-05 +[2026-04-14 09:46:10] (step=0000056) Loss: 0.4955 (diff=0.4491, anat=0.0464), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.1200000000000001e-05 +[2026-04-14 09:46:30] (step=0000057) Loss: 0.4919 (diff=0.4454, anat=0.0465), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.1400000000000001e-05 +[2026-04-14 09:46:50] (step=0000058) Loss: 0.4894 (diff=0.4430, anat=0.0465), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.16e-05 +[2026-04-14 09:47:09] (step=0000059) Loss: 0.4855 (diff=0.4386, anat=0.0469), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.18e-05 +[2026-04-14 09:47:29] (step=0000060) Loss: 0.4932 (diff=0.4458, anat=0.0474), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.2e-05 +[2026-04-14 09:47:49] (step=0000061) Loss: 0.5014 (diff=0.4512, anat=0.0503), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.22e-05 +[2026-04-14 09:48:08] (step=0000062) Loss: 0.4837 (diff=0.4397, anat=0.0440), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.24e-05 +[2026-04-14 09:48:28] (step=0000063) Loss: 0.4999 (diff=0.4526, anat=0.0473), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.2600000000000001e-05 +[2026-04-14 09:48:47] (step=0000064) Loss: 0.4723 (diff=0.4284, anat=0.0439), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.2800000000000001e-05 +[2026-04-14 09:49:07] (step=0000065) Loss: 0.4956 (diff=0.4493, anat=0.0463), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.3000000000000001e-05 +[2026-04-14 09:49:27] (step=0000066) Loss: 0.5028 (diff=0.4565, anat=0.0463), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.32e-05 +[2026-04-14 09:49:47] (step=0000067) Loss: 0.4870 (diff=0.4423, anat=0.0447), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.3400000000000002e-05 +[2026-04-14 09:50:06] (step=0000068) Loss: 0.4811 (diff=0.4373, anat=0.0437), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.3600000000000002e-05 +[2026-04-14 09:50:26] (step=0000069) Loss: 0.5053 (diff=0.4559, anat=0.0494), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.3800000000000002e-05 +[2026-04-14 09:50:45] (step=0000070) Loss: 0.4931 (diff=0.4479, anat=0.0452), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.4000000000000001e-05 +[2026-04-14 09:51:05] (step=0000071) Loss: 0.4858 (diff=0.4410, anat=0.0448), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.42e-05 +[2026-04-14 09:51:25] (step=0000072) Loss: 0.4927 (diff=0.4459, anat=0.0469), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.44e-05 +[2026-04-14 09:51:44] (step=0000073) Loss: 0.4847 (diff=0.4417, anat=0.0430), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.4599999999999999e-05 +[2026-04-14 09:52:04] (step=0000074) Loss: 0.4889 (diff=0.4440, anat=0.0448), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.48e-05 +[2026-04-14 09:52:24] (step=0000075) Loss: 0.4812 (diff=0.4377, anat=0.0434), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.5e-05 +[2026-04-14 09:52:43] (step=0000076) Loss: 0.4762 (diff=0.4336, anat=0.0426), Steps/Sec: 0.05, Epoch: 0.01, LR: 1.52e-05 +[2026-04-14 09:53:03] (step=0000077) Loss: 0.5064 (diff=0.4594, anat=0.0470), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.54e-05 +[2026-04-14 09:53:23] (step=0000078) Loss: 0.5047 (diff=0.4588, anat=0.0459), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.56e-05 +[2026-04-14 09:53:42] (step=0000079) Loss: 0.4902 (diff=0.4479, anat=0.0424), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.58e-05 +[2026-04-14 09:54:02] (step=0000080) Loss: 0.4737 (diff=0.4343, anat=0.0394), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.6000000000000003e-05 +[2026-04-14 09:54:22] (step=0000081) Loss: 0.4890 (diff=0.4466, anat=0.0425), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.62e-05 +[2026-04-14 09:54:41] (step=0000082) Loss: 0.4864 (diff=0.4449, anat=0.0414), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.6400000000000002e-05 +[2026-04-14 09:55:01] (step=0000083) Loss: 0.4682 (diff=0.4266, anat=0.0416), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.66e-05 +[2026-04-14 09:55:21] (step=0000084) Loss: 0.4886 (diff=0.4445, anat=0.0441), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.6800000000000002e-05 +[2026-04-14 09:55:40] (step=0000085) Loss: 0.4909 (diff=0.4461, anat=0.0448), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.7000000000000003e-05 +[2026-04-14 09:56:00] (step=0000086) Loss: 0.4971 (diff=0.4525, anat=0.0446), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.7199999999999998e-05 +[2026-04-14 09:56:20] (step=0000087) Loss: 0.4800 (diff=0.4360, anat=0.0440), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.74e-05 +[2026-04-14 09:56:39] (step=0000088) Loss: 0.4889 (diff=0.4471, anat=0.0418), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.76e-05 +[2026-04-14 09:56:59] (step=0000089) Loss: 0.5087 (diff=0.4616, anat=0.0472), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.78e-05 +[2026-04-14 09:57:18] (step=0000090) Loss: 0.4954 (diff=0.4490, anat=0.0463), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.8e-05 +[2026-04-14 09:57:38] (step=0000091) Loss: 0.4800 (diff=0.4382, anat=0.0418), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.8200000000000002e-05 +[2026-04-14 09:57:58] (step=0000092) Loss: 0.4997 (diff=0.4549, anat=0.0448), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.84e-05 +[2026-04-14 09:58:17] (step=0000093) Loss: 0.4951 (diff=0.4507, anat=0.0444), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.86e-05 +[2026-04-14 09:58:37] (step=0000094) Loss: 0.4935 (diff=0.4507, anat=0.0428), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.88e-05 +[2026-04-14 09:58:57] (step=0000095) Loss: 0.4851 (diff=0.4439, anat=0.0412), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.9e-05 +[2026-04-14 09:59:16] (step=0000096) Loss: 0.4935 (diff=0.4503, anat=0.0432), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.9200000000000003e-05 +[2026-04-14 09:59:36] (step=0000097) Loss: 0.5044 (diff=0.4573, anat=0.0471), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.94e-05 +[2026-04-14 09:59:56] (step=0000098) Loss: 0.4808 (diff=0.4396, anat=0.0412), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.9600000000000002e-05 +[2026-04-14 10:00:15] (step=0000099) Loss: 0.4813 (diff=0.4399, anat=0.0414), Steps/Sec: 0.05, Epoch: 0.02, LR: 1.9800000000000004e-05 +[2026-04-14 10:00:35] (step=0000100) Loss: 0.4974 (diff=0.4517, anat=0.0457), Steps/Sec: 0.05, Epoch: 0.02, LR: 2e-05 +[2026-04-14 10:00:55] (step=0000101) Loss: 0.4758 (diff=0.4361, anat=0.0397), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.0200000000000003e-05 +[2026-04-14 10:01:14] (step=0000102) Loss: 0.4881 (diff=0.4459, anat=0.0422), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.04e-05 +[2026-04-14 10:01:34] (step=0000103) Loss: 0.4946 (diff=0.4522, anat=0.0424), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.06e-05 +[2026-04-14 10:01:54] (step=0000104) Loss: 0.4869 (diff=0.4448, anat=0.0421), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.08e-05 +[2026-04-14 10:02:13] (step=0000105) Loss: 0.4867 (diff=0.4462, anat=0.0404), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.1e-05 +[2026-04-14 10:02:33] (step=0000106) Loss: 0.5019 (diff=0.4595, anat=0.0424), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.12e-05 +[2026-04-14 10:02:53] (step=0000107) Loss: 0.4866 (diff=0.4466, anat=0.0399), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.1400000000000002e-05 +[2026-04-14 10:03:12] (step=0000108) Loss: 0.4819 (diff=0.4401, anat=0.0418), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.16e-05 +[2026-04-14 10:03:32] (step=0000109) Loss: 0.4752 (diff=0.4353, anat=0.0399), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.18e-05 +[2026-04-14 10:03:51] (step=0000110) Loss: 0.4819 (diff=0.4405, anat=0.0415), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.2000000000000003e-05 +[2026-04-14 10:04:11] (step=0000111) Loss: 0.4867 (diff=0.4447, anat=0.0420), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.22e-05 +[2026-04-14 10:04:31] (step=0000112) Loss: 0.4901 (diff=0.4489, anat=0.0412), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.2400000000000002e-05 +[2026-04-14 10:04:50] (step=0000113) Loss: 0.4910 (diff=0.4467, anat=0.0443), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.26e-05 +[2026-04-14 10:05:10] (step=0000114) Loss: 0.4887 (diff=0.4468, anat=0.0420), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.2800000000000002e-05 +[2026-04-14 10:05:30] (step=0000115) Loss: 0.4899 (diff=0.4496, anat=0.0403), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.3000000000000003e-05 +[2026-04-14 10:05:49] (step=0000116) Loss: 0.4838 (diff=0.4431, anat=0.0407), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.32e-05 +[2026-04-14 10:06:09] (step=0000117) Loss: 0.4930 (diff=0.4485, anat=0.0445), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.3400000000000003e-05 +[2026-04-14 10:06:29] (step=0000118) Loss: 0.4864 (diff=0.4453, anat=0.0411), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.36e-05 +[2026-04-14 10:06:48] (step=0000119) Loss: 0.4736 (diff=0.4350, anat=0.0386), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.38e-05 +[2026-04-14 10:07:08] (step=0000120) Loss: 0.4988 (diff=0.4556, anat=0.0432), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.4e-05 +[2026-04-14 10:07:28] (step=0000121) Loss: 0.4935 (diff=0.4509, anat=0.0425), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.4200000000000002e-05 +[2026-04-14 10:07:47] (step=0000122) Loss: 0.4853 (diff=0.4454, anat=0.0399), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.44e-05 +[2026-04-14 10:08:07] (step=0000123) Loss: 0.4921 (diff=0.4512, anat=0.0409), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.46e-05 +[2026-04-14 10:08:27] (step=0000124) Loss: 0.4821 (diff=0.4407, anat=0.0413), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.48e-05 +[2026-04-14 10:08:47] (step=0000125) Loss: 0.4826 (diff=0.4452, anat=0.0373), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.5e-05 +[2026-04-14 10:09:06] (step=0000126) Loss: 0.4823 (diff=0.4436, anat=0.0386), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.5200000000000003e-05 +[2026-04-14 10:09:26] (step=0000127) Loss: 0.4963 (diff=0.4542, anat=0.0421), Steps/Sec: 0.05, Epoch: 0.02, LR: 2.54e-05 +[2026-04-14 10:09:45] (step=0000128) Loss: 0.4701 (diff=0.4322, anat=0.0380), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.5600000000000002e-05 +[2026-04-14 10:10:05] (step=0000129) Loss: 0.4952 (diff=0.4547, anat=0.0405), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.58e-05 +[2026-04-14 10:10:25] (step=0000130) Loss: 0.4912 (diff=0.4504, anat=0.0408), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.6000000000000002e-05 +[2026-04-14 10:10:44] (step=0000131) Loss: 0.4782 (diff=0.4392, anat=0.0389), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.6200000000000003e-05 +[2026-04-14 10:11:04] (step=0000132) Loss: 0.4803 (diff=0.4437, anat=0.0366), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.64e-05 +[2026-04-14 10:11:24] (step=0000133) Loss: 0.4964 (diff=0.4528, anat=0.0437), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.6600000000000003e-05 +[2026-04-14 10:11:43] (step=0000134) Loss: 0.4779 (diff=0.4381, anat=0.0398), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.6800000000000004e-05 +[2026-04-14 10:12:03] (step=0000135) Loss: 0.5043 (diff=0.4622, anat=0.0421), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.7000000000000002e-05 +[2026-04-14 10:12:23] (step=0000136) Loss: 0.4874 (diff=0.4459, anat=0.0416), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.7200000000000004e-05 +[2026-04-14 10:12:42] (step=0000137) Loss: 0.4947 (diff=0.4542, anat=0.0405), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.7400000000000002e-05 +[2026-04-14 10:13:02] (step=0000138) Loss: 0.4881 (diff=0.4483, anat=0.0398), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.7600000000000003e-05 +[2026-04-14 10:13:22] (step=0000139) Loss: 0.4983 (diff=0.4564, anat=0.0419), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.7800000000000005e-05 +[2026-04-14 10:13:41] (step=0000140) Loss: 0.4886 (diff=0.4470, anat=0.0416), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.8000000000000003e-05 +[2026-04-14 10:14:01] (step=0000141) Loss: 0.4819 (diff=0.4421, anat=0.0398), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.8199999999999998e-05 +[2026-04-14 10:14:21] (step=0000142) Loss: 0.4915 (diff=0.4511, anat=0.0404), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.84e-05 +[2026-04-14 10:14:40] (step=0000143) Loss: 0.4751 (diff=0.4381, anat=0.0370), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.86e-05 +[2026-04-14 10:15:00] (step=0000144) Loss: 0.4797 (diff=0.4402, anat=0.0394), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.88e-05 +[2026-04-14 10:15:20] (step=0000145) Loss: 0.4806 (diff=0.4427, anat=0.0379), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.9e-05 +[2026-04-14 10:15:39] (step=0000146) Loss: 0.4943 (diff=0.4552, anat=0.0391), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.9199999999999998e-05 +[2026-04-14 10:15:59] (step=0000147) Loss: 0.4825 (diff=0.4444, anat=0.0381), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.94e-05 +[2026-04-14 10:16:19] (step=0000148) Loss: 0.4755 (diff=0.4399, anat=0.0357), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.96e-05 +[2026-04-14 10:16:38] (step=0000149) Loss: 0.4893 (diff=0.4506, anat=0.0388), Steps/Sec: 0.05, Epoch: 0.03, LR: 2.98e-05 +[2026-04-14 10:16:58] (step=0000150) Loss: 0.4881 (diff=0.4503, anat=0.0378), Steps/Sec: 0.05, Epoch: 0.03, LR: 3e-05 +[2026-04-14 10:17:18] (step=0000151) Loss: 0.4925 (diff=0.4516, anat=0.0409), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.02e-05 +[2026-04-14 10:17:37] (step=0000152) Loss: 0.4859 (diff=0.4451, anat=0.0408), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.04e-05 +[2026-04-14 10:17:57] (step=0000153) Loss: 0.4893 (diff=0.4490, anat=0.0403), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.06e-05 +[2026-04-14 10:18:17] (step=0000154) Loss: 0.4837 (diff=0.4460, anat=0.0377), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.08e-05 +[2026-04-14 10:18:36] (step=0000155) Loss: 0.4906 (diff=0.4510, anat=0.0396), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.1e-05 +[2026-04-14 10:18:56] (step=0000156) Loss: 0.4863 (diff=0.4471, anat=0.0392), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.12e-05 +[2026-04-14 10:19:16] (step=0000157) Loss: 0.4810 (diff=0.4401, anat=0.0409), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.1400000000000004e-05 +[2026-04-14 10:19:35] (step=0000158) Loss: 0.4990 (diff=0.4583, anat=0.0407), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.16e-05 +[2026-04-14 10:19:55] (step=0000159) Loss: 0.4855 (diff=0.4472, anat=0.0383), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.18e-05 +[2026-04-14 10:20:15] (step=0000160) Loss: 0.4723 (diff=0.4348, anat=0.0375), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.2000000000000005e-05 +[2026-04-14 10:20:34] (step=0000161) Loss: 0.4773 (diff=0.4407, anat=0.0366), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.2200000000000003e-05 +[2026-04-14 10:20:54] (step=0000162) Loss: 0.4756 (diff=0.4406, anat=0.0351), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.24e-05 +[2026-04-14 10:21:14] (step=0000163) Loss: 0.4837 (diff=0.4470, anat=0.0367), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.26e-05 +[2026-04-14 10:21:33] (step=0000164) Loss: 0.4706 (diff=0.4354, anat=0.0352), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.2800000000000004e-05 +[2026-04-14 10:21:53] (step=0000165) Loss: 0.4992 (diff=0.4606, anat=0.0385), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.3e-05 +[2026-04-14 10:22:12] (step=0000166) Loss: 0.4669 (diff=0.4307, anat=0.0362), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.32e-05 +[2026-04-14 10:22:32] (step=0000167) Loss: 0.5044 (diff=0.4643, anat=0.0400), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.3400000000000005e-05 +[2026-04-14 10:22:52] (step=0000168) Loss: 0.4932 (diff=0.4509, anat=0.0422), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.3600000000000004e-05 +[2026-04-14 10:23:11] (step=0000169) Loss: 0.4811 (diff=0.4429, anat=0.0382), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.38e-05 +[2026-04-14 10:23:31] (step=0000170) Loss: 0.4851 (diff=0.4451, anat=0.0400), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.4000000000000007e-05 +[2026-04-14 10:23:51] (step=0000171) Loss: 0.4872 (diff=0.4504, anat=0.0368), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.4200000000000005e-05 +[2026-04-14 10:24:10] (step=0000172) Loss: 0.4691 (diff=0.4353, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.4399999999999996e-05 +[2026-04-14 10:24:30] (step=0000173) Loss: 0.4792 (diff=0.4423, anat=0.0368), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.46e-05 +[2026-04-14 10:24:50] (step=0000174) Loss: 0.4690 (diff=0.4318, anat=0.0372), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.48e-05 +[2026-04-14 10:25:09] (step=0000175) Loss: 0.5099 (diff=0.4696, anat=0.0403), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.5e-05 +[2026-04-14 10:25:29] (step=0000176) Loss: 0.4969 (diff=0.4559, anat=0.0410), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.52e-05 +[2026-04-14 10:25:49] (step=0000177) Loss: 0.4716 (diff=0.4358, anat=0.0357), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.54e-05 +[2026-04-14 10:26:08] (step=0000178) Loss: 0.4907 (diff=0.4527, anat=0.0381), Steps/Sec: 0.05, Epoch: 0.03, LR: 3.56e-05 +[2026-04-14 10:26:28] (step=0000179) Loss: 0.4813 (diff=0.4444, anat=0.0369), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.58e-05 +[2026-04-14 10:26:47] (step=0000180) Loss: 0.4854 (diff=0.4473, anat=0.0381), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.6e-05 +[2026-04-14 10:27:07] (step=0000181) Loss: 0.4772 (diff=0.4404, anat=0.0369), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.62e-05 +[2026-04-14 10:27:27] (step=0000182) Loss: 0.4780 (diff=0.4417, anat=0.0363), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.6400000000000004e-05 +[2026-04-14 10:27:46] (step=0000183) Loss: 0.4916 (diff=0.4528, anat=0.0387), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.66e-05 +[2026-04-14 10:28:06] (step=0000184) Loss: 0.4898 (diff=0.4516, anat=0.0382), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.68e-05 +[2026-04-14 10:28:26] (step=0000185) Loss: 0.4874 (diff=0.4493, anat=0.0381), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.7e-05 +[2026-04-14 10:28:45] (step=0000186) Loss: 0.4834 (diff=0.4472, anat=0.0363), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.72e-05 +[2026-04-14 10:29:05] (step=0000187) Loss: 0.4899 (diff=0.4511, anat=0.0388), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.74e-05 +[2026-04-14 10:29:25] (step=0000188) Loss: 0.4848 (diff=0.4471, anat=0.0377), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.76e-05 +[2026-04-14 10:29:44] (step=0000189) Loss: 0.4917 (diff=0.4542, anat=0.0375), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.7800000000000004e-05 +[2026-04-14 10:30:04] (step=0000190) Loss: 0.4904 (diff=0.4534, anat=0.0371), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.8e-05 +[2026-04-14 10:30:24] (step=0000191) Loss: 0.4883 (diff=0.4494, anat=0.0389), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.82e-05 +[2026-04-14 10:30:43] (step=0000192) Loss: 0.4884 (diff=0.4498, anat=0.0386), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.8400000000000005e-05 +[2026-04-14 10:31:03] (step=0000193) Loss: 0.5000 (diff=0.4591, anat=0.0408), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.86e-05 +[2026-04-14 10:31:22] (step=0000194) Loss: 0.4826 (diff=0.4474, anat=0.0352), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.88e-05 +[2026-04-14 10:31:42] (step=0000195) Loss: 0.4932 (diff=0.4558, anat=0.0374), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.9000000000000006e-05 +[2026-04-14 10:32:02] (step=0000196) Loss: 0.4841 (diff=0.4456, anat=0.0385), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.9200000000000004e-05 +[2026-04-14 10:32:22] (step=0000197) Loss: 0.4900 (diff=0.4519, anat=0.0381), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.94e-05 +[2026-04-14 10:32:41] (step=0000198) Loss: 0.4876 (diff=0.4489, anat=0.0387), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.960000000000001e-05 +[2026-04-14 10:33:01] (step=0000199) Loss: 0.4759 (diff=0.4399, anat=0.0360), Steps/Sec: 0.05, Epoch: 0.04, LR: 3.9800000000000005e-05 +[2026-04-14 10:33:20] (step=0000200) Loss: 0.4938 (diff=0.4551, anat=0.0388), Steps/Sec: 0.05, Epoch: 0.04, LR: 4e-05 +[2026-04-14 10:33:40] (step=0000201) Loss: 0.4808 (diff=0.4453, anat=0.0354), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.02e-05 +[2026-04-14 10:34:00] (step=0000202) Loss: 0.4822 (diff=0.4452, anat=0.0370), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.0400000000000006e-05 +[2026-04-14 10:34:19] (step=0000203) Loss: 0.4907 (diff=0.4529, anat=0.0378), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.0600000000000004e-05 +[2026-04-14 10:34:39] (step=0000204) Loss: 0.4784 (diff=0.4415, anat=0.0369), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.08e-05 +[2026-04-14 10:34:59] (step=0000205) Loss: 0.4882 (diff=0.4516, anat=0.0366), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.1e-05 +[2026-04-14 10:35:18] (step=0000206) Loss: 0.4779 (diff=0.4403, anat=0.0376), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.12e-05 +[2026-04-14 10:35:38] (step=0000207) Loss: 0.4860 (diff=0.4462, anat=0.0399), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.14e-05 +[2026-04-14 10:35:58] (step=0000208) Loss: 0.4797 (diff=0.4423, anat=0.0373), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.16e-05 +[2026-04-14 10:36:17] (step=0000209) Loss: 0.4845 (diff=0.4466, anat=0.0379), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.18e-05 +[2026-04-14 10:36:37] (step=0000210) Loss: 0.4988 (diff=0.4606, anat=0.0382), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.2e-05 +[2026-04-14 10:36:57] (step=0000211) Loss: 0.4861 (diff=0.4497, anat=0.0364), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.22e-05 +[2026-04-14 10:37:16] (step=0000212) Loss: 0.4724 (diff=0.4361, anat=0.0363), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.24e-05 +[2026-04-14 10:37:36] (step=0000213) Loss: 0.5039 (diff=0.4635, anat=0.0403), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.26e-05 +[2026-04-14 10:37:56] (step=0000214) Loss: 0.4889 (diff=0.4513, anat=0.0376), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.2800000000000004e-05 +[2026-04-14 10:38:15] (step=0000215) Loss: 0.4760 (diff=0.4400, anat=0.0360), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.3e-05 +[2026-04-14 10:38:35] (step=0000216) Loss: 0.4939 (diff=0.4533, anat=0.0406), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.32e-05 +[2026-04-14 10:38:55] (step=0000217) Loss: 0.4949 (diff=0.4571, anat=0.0378), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.3400000000000005e-05 +[2026-04-14 10:39:14] (step=0000218) Loss: 0.4828 (diff=0.4424, anat=0.0404), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.36e-05 +[2026-04-14 10:39:34] (step=0000219) Loss: 0.4784 (diff=0.4414, anat=0.0371), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.38e-05 +[2026-04-14 10:39:54] (step=0000220) Loss: 0.4960 (diff=0.4560, anat=0.0400), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.4000000000000006e-05 +[2026-04-14 10:40:13] (step=0000221) Loss: 0.4968 (diff=0.4593, anat=0.0375), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.4200000000000004e-05 +[2026-04-14 10:40:33] (step=0000222) Loss: 0.4847 (diff=0.4473, anat=0.0373), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.44e-05 +[2026-04-14 10:40:53] (step=0000223) Loss: 0.5068 (diff=0.4653, anat=0.0415), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.46e-05 +[2026-04-14 10:41:12] (step=0000224) Loss: 0.4873 (diff=0.4497, anat=0.0376), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.4800000000000005e-05 +[2026-04-14 10:41:32] (step=0000225) Loss: 0.5010 (diff=0.4629, anat=0.0381), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.5e-05 +[2026-04-14 10:41:51] (step=0000226) Loss: 0.4832 (diff=0.4478, anat=0.0354), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.52e-05 +[2026-04-14 10:42:11] (step=0000227) Loss: 0.5002 (diff=0.4634, anat=0.0369), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.5400000000000006e-05 +[2026-04-14 10:42:31] (step=0000228) Loss: 0.4845 (diff=0.4477, anat=0.0368), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.5600000000000004e-05 +[2026-04-14 10:42:50] (step=0000229) Loss: 0.4811 (diff=0.4430, anat=0.0381), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.58e-05 +[2026-04-14 10:43:10] (step=0000230) Loss: 0.4875 (diff=0.4513, anat=0.0362), Steps/Sec: 0.05, Epoch: 0.04, LR: 4.600000000000001e-05 +[2026-04-14 10:43:29] (step=0000231) Loss: 0.4912 (diff=0.4531, anat=0.0381), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.6200000000000005e-05 +[2026-04-14 10:43:49] (step=0000232) Loss: 0.4737 (diff=0.4372, anat=0.0365), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.64e-05 +[2026-04-14 10:44:09] (step=0000233) Loss: 0.4863 (diff=0.4504, anat=0.0359), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.660000000000001e-05 +[2026-04-14 10:44:28] (step=0000234) Loss: 0.4890 (diff=0.4520, anat=0.0370), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.6800000000000006e-05 +[2026-04-14 10:44:48] (step=0000235) Loss: 0.4978 (diff=0.4591, anat=0.0386), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.7e-05 +[2026-04-14 10:45:08] (step=0000236) Loss: 0.4857 (diff=0.4504, anat=0.0353), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.72e-05 +[2026-04-14 10:45:27] (step=0000237) Loss: 0.4805 (diff=0.4440, anat=0.0366), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.74e-05 +[2026-04-14 10:45:47] (step=0000238) Loss: 0.4796 (diff=0.4441, anat=0.0355), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.76e-05 +[2026-04-14 10:46:07] (step=0000239) Loss: 0.4808 (diff=0.4458, anat=0.0351), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.78e-05 +[2026-04-14 10:46:26] (step=0000240) Loss: 0.4915 (diff=0.4519, anat=0.0395), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.8e-05 +[2026-04-14 10:46:46] (step=0000241) Loss: 0.4869 (diff=0.4506, anat=0.0362), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.82e-05 +[2026-04-14 10:47:06] (step=0000242) Loss: 0.4914 (diff=0.4547, anat=0.0367), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.8400000000000004e-05 +[2026-04-14 10:47:25] (step=0000243) Loss: 0.5125 (diff=0.4708, anat=0.0417), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.86e-05 +[2026-04-14 10:47:45] (step=0000244) Loss: 0.4867 (diff=0.4509, anat=0.0358), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.88e-05 +[2026-04-14 10:48:04] (step=0000245) Loss: 0.4822 (diff=0.4456, anat=0.0366), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.9e-05 +[2026-04-14 10:48:24] (step=0000246) Loss: 0.4798 (diff=0.4431, anat=0.0367), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.92e-05 +[2026-04-14 10:48:44] (step=0000247) Loss: 0.4796 (diff=0.4426, anat=0.0370), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.94e-05 +[2026-04-14 10:49:03] (step=0000248) Loss: 0.4887 (diff=0.4514, anat=0.0374), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.96e-05 +[2026-04-14 10:49:23] (step=0000249) Loss: 0.4763 (diff=0.4399, anat=0.0364), Steps/Sec: 0.05, Epoch: 0.05, LR: 4.9800000000000004e-05 +[2026-04-14 10:49:43] (step=0000250) Loss: 0.4867 (diff=0.4500, anat=0.0367), Steps/Sec: 0.05, Epoch: 0.05, LR: 5e-05 +[2026-04-14 10:50:02] (step=0000251) Loss: 0.4991 (diff=0.4622, anat=0.0368), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.02e-05 +[2026-04-14 10:50:22] (step=0000252) Loss: 0.4936 (diff=0.4541, anat=0.0395), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.0400000000000005e-05 +[2026-04-14 10:50:42] (step=0000253) Loss: 0.4987 (diff=0.4620, anat=0.0368), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.0600000000000003e-05 +[2026-04-14 10:51:01] (step=0000254) Loss: 0.4905 (diff=0.4523, anat=0.0382), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.08e-05 +[2026-04-14 10:51:21] (step=0000255) Loss: 0.4772 (diff=0.4434, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.1000000000000006e-05 +[2026-04-14 10:51:41] (step=0000256) Loss: 0.4814 (diff=0.4464, anat=0.0350), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.1200000000000004e-05 +[2026-04-14 10:52:00] (step=0000257) Loss: 0.4843 (diff=0.4474, anat=0.0370), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.14e-05 +[2026-04-14 10:52:20] (step=0000258) Loss: 0.4881 (diff=0.4512, anat=0.0369), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.16e-05 +[2026-04-14 10:52:40] (step=0000259) Loss: 0.4872 (diff=0.4532, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.1800000000000005e-05 +[2026-04-14 10:52:59] (step=0000260) Loss: 0.4729 (diff=0.4368, anat=0.0361), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.2000000000000004e-05 +[2026-04-14 10:53:19] (step=0000261) Loss: 0.4824 (diff=0.4493, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.22e-05 +[2026-04-14 10:53:38] (step=0000262) Loss: 0.4926 (diff=0.4544, anat=0.0382), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.2400000000000007e-05 +[2026-04-14 10:53:58] (step=0000263) Loss: 0.4877 (diff=0.4498, anat=0.0380), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.2600000000000005e-05 +[2026-04-14 10:54:18] (step=0000264) Loss: 0.4738 (diff=0.4406, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.28e-05 +[2026-04-14 10:54:37] (step=0000265) Loss: 0.4704 (diff=0.4375, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.300000000000001e-05 +[2026-04-14 10:54:57] (step=0000266) Loss: 0.4712 (diff=0.4376, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.3200000000000006e-05 +[2026-04-14 10:55:17] (step=0000267) Loss: 0.4853 (diff=0.4511, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.3400000000000004e-05 +[2026-04-14 10:55:36] (step=0000268) Loss: 0.4809 (diff=0.4464, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.360000000000001e-05 +[2026-04-14 10:55:56] (step=0000269) Loss: 0.4822 (diff=0.4463, anat=0.0358), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.380000000000001e-05 +[2026-04-14 10:56:16] (step=0000270) Loss: 0.4892 (diff=0.4550, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.4000000000000005e-05 +[2026-04-14 10:56:35] (step=0000271) Loss: 0.4889 (diff=0.4505, anat=0.0383), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.420000000000001e-05 +[2026-04-14 10:56:55] (step=0000272) Loss: 0.4794 (diff=0.4455, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.440000000000001e-05 +[2026-04-14 10:57:15] (step=0000273) Loss: 0.4797 (diff=0.4424, anat=0.0373), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.4600000000000006e-05 +[2026-04-14 10:57:34] (step=0000274) Loss: 0.4964 (diff=0.4593, anat=0.0371), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.4800000000000004e-05 +[2026-04-14 10:57:54] (step=0000275) Loss: 0.4742 (diff=0.4371, anat=0.0372), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.500000000000001e-05 +[2026-04-14 10:58:14] (step=0000276) Loss: 0.4838 (diff=0.4484, anat=0.0354), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.520000000000001e-05 +[2026-04-14 10:58:33] (step=0000277) Loss: 0.4874 (diff=0.4501, anat=0.0373), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.5400000000000005e-05 +[2026-04-14 10:58:53] (step=0000278) Loss: 0.4807 (diff=0.4450, anat=0.0356), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.560000000000001e-05 +[2026-04-14 10:59:12] (step=0000279) Loss: 0.4740 (diff=0.4394, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.580000000000001e-05 +[2026-04-14 10:59:32] (step=0000280) Loss: 0.4972 (diff=0.4585, anat=0.0387), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.6000000000000006e-05 +[2026-04-14 10:59:52] (step=0000281) Loss: 0.4789 (diff=0.4434, anat=0.0355), Steps/Sec: 0.05, Epoch: 0.05, LR: 5.620000000000001e-05 +[2026-04-14 11:00:11] (step=0000282) Loss: 0.4892 (diff=0.4527, anat=0.0366), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.6399999999999995e-05 +[2026-04-14 11:00:31] (step=0000283) Loss: 0.4922 (diff=0.4555, anat=0.0367), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.66e-05 +[2026-04-14 11:00:51] (step=0000284) Loss: 0.4946 (diff=0.4589, anat=0.0356), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.68e-05 +[2026-04-14 11:01:10] (step=0000285) Loss: 0.4958 (diff=0.4580, anat=0.0378), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.6999999999999996e-05 +[2026-04-14 11:01:30] (step=0000286) Loss: 0.4774 (diff=0.4414, anat=0.0360), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.72e-05 +[2026-04-14 11:01:49] (step=0000287) Loss: 0.4785 (diff=0.4427, anat=0.0359), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.74e-05 +[2026-04-14 11:02:09] (step=0000288) Loss: 0.4845 (diff=0.4484, anat=0.0361), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.76e-05 +[2026-04-14 11:02:29] (step=0000289) Loss: 0.4785 (diff=0.4450, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.7799999999999995e-05 +[2026-04-14 11:02:48] (step=0000290) Loss: 0.4940 (diff=0.4581, anat=0.0359), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.8e-05 +[2026-04-14 11:03:08] (step=0000291) Loss: 0.4783 (diff=0.4448, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.82e-05 +[2026-04-14 11:03:28] (step=0000292) Loss: 0.4786 (diff=0.4439, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.8399999999999997e-05 +[2026-04-14 11:03:47] (step=0000293) Loss: 0.4787 (diff=0.4419, anat=0.0368), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.86e-05 +[2026-04-14 11:04:07] (step=0000294) Loss: 0.4986 (diff=0.4634, anat=0.0353), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.88e-05 +[2026-04-14 11:04:27] (step=0000295) Loss: 0.4792 (diff=0.4449, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.9e-05 +[2026-04-14 11:04:46] (step=0000296) Loss: 0.5110 (diff=0.4728, anat=0.0382), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.92e-05 +[2026-04-14 11:05:06] (step=0000297) Loss: 0.4845 (diff=0.4499, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.94e-05 +[2026-04-14 11:05:25] (step=0000298) Loss: 0.4912 (diff=0.4543, anat=0.0369), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.96e-05 +[2026-04-14 11:05:45] (step=0000299) Loss: 0.4953 (diff=0.4592, anat=0.0362), Steps/Sec: 0.05, Epoch: 0.06, LR: 5.9800000000000003e-05 +[2026-04-14 11:06:05] (step=0000300) Loss: 0.4684 (diff=0.4334, anat=0.0350), Steps/Sec: 0.05, Epoch: 0.06, LR: 6e-05 +[2026-04-14 11:06:24] (step=0000301) Loss: 0.4809 (diff=0.4446, anat=0.0362), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.02e-05 +[2026-04-14 11:06:44] (step=0000302) Loss: 0.4888 (diff=0.4525, anat=0.0363), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.04e-05 +[2026-04-14 11:07:04] (step=0000303) Loss: 0.4798 (diff=0.4458, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.06e-05 +[2026-04-14 11:07:23] (step=0000304) Loss: 0.4849 (diff=0.4492, anat=0.0357), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.08e-05 +[2026-04-14 11:07:43] (step=0000305) Loss: 0.4756 (diff=0.4429, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.1e-05 +[2026-04-14 11:08:03] (step=0000306) Loss: 0.4836 (diff=0.4476, anat=0.0360), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.12e-05 +[2026-04-14 11:08:22] (step=0000307) Loss: 0.4729 (diff=0.4390, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.14e-05 +[2026-04-14 11:08:42] (step=0000308) Loss: 0.4811 (diff=0.4475, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.16e-05 +[2026-04-14 11:09:02] (step=0000309) Loss: 0.4849 (diff=0.4497, anat=0.0352), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.18e-05 +[2026-04-14 11:09:21] (step=0000310) Loss: 0.4985 (diff=0.4619, anat=0.0366), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.2e-05 +[2026-04-14 11:09:41] (step=0000311) Loss: 0.4819 (diff=0.4475, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.220000000000001e-05 +[2026-04-14 11:10:01] (step=0000312) Loss: 0.4990 (diff=0.4642, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.24e-05 +[2026-04-14 11:10:20] (step=0000313) Loss: 0.4861 (diff=0.4505, anat=0.0356), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.26e-05 +[2026-04-14 11:10:40] (step=0000314) Loss: 0.4731 (diff=0.4411, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.280000000000001e-05 +[2026-04-14 11:10:59] (step=0000315) Loss: 0.4977 (diff=0.4609, anat=0.0368), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.3e-05 +[2026-04-14 11:11:19] (step=0000316) Loss: 0.4835 (diff=0.4478, anat=0.0357), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.32e-05 +[2026-04-14 11:11:39] (step=0000317) Loss: 0.4915 (diff=0.4553, anat=0.0362), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.340000000000001e-05 +[2026-04-14 11:11:58] (step=0000318) Loss: 0.4765 (diff=0.4448, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.36e-05 +[2026-04-14 11:12:18] (step=0000319) Loss: 0.4702 (diff=0.4384, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.38e-05 +[2026-04-14 11:12:38] (step=0000320) Loss: 0.4814 (diff=0.4489, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.400000000000001e-05 +[2026-04-14 11:12:57] (step=0000321) Loss: 0.4716 (diff=0.4381, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.42e-05 +[2026-04-14 11:13:17] (step=0000322) Loss: 0.4810 (diff=0.4458, anat=0.0352), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.440000000000001e-05 +[2026-04-14 11:13:37] (step=0000323) Loss: 0.4813 (diff=0.4466, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.460000000000001e-05 +[2026-04-14 11:13:56] (step=0000324) Loss: 0.4834 (diff=0.4499, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.48e-05 +[2026-04-14 11:14:16] (step=0000325) Loss: 0.5093 (diff=0.4696, anat=0.0398), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.500000000000001e-05 +[2026-04-14 11:14:36] (step=0000326) Loss: 0.4683 (diff=0.4349, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.52e-05 +[2026-04-14 11:14:55] (step=0000327) Loss: 0.4899 (diff=0.4533, anat=0.0366), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.54e-05 +[2026-04-14 11:15:15] (step=0000328) Loss: 0.4984 (diff=0.4620, anat=0.0364), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.560000000000001e-05 +[2026-04-14 11:15:35] (step=0000329) Loss: 0.4774 (diff=0.4439, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.58e-05 +[2026-04-14 11:15:54] (step=0000330) Loss: 0.4991 (diff=0.4634, anat=0.0357), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.6e-05 +[2026-04-14 11:16:14] (step=0000331) Loss: 0.4813 (diff=0.4467, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.620000000000001e-05 +[2026-04-14 11:16:33] (step=0000332) Loss: 0.4822 (diff=0.4497, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.06, LR: 6.64e-05 +[2026-04-14 11:16:53] (step=0000333) Loss: 0.4759 (diff=0.4409, anat=0.0349), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.66e-05 +[2026-04-14 11:17:13] (step=0000334) Loss: 0.4778 (diff=0.4442, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.680000000000001e-05 +[2026-04-14 11:17:32] (step=0000335) Loss: 0.4738 (diff=0.4397, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.7e-05 +[2026-04-14 11:17:52] (step=0000336) Loss: 0.4970 (diff=0.4606, anat=0.0364), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.720000000000001e-05 +[2026-04-14 11:18:12] (step=0000337) Loss: 0.4901 (diff=0.4558, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.740000000000001e-05 +[2026-04-14 11:18:31] (step=0000338) Loss: 0.4875 (diff=0.4507, anat=0.0368), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.76e-05 +[2026-04-14 11:18:51] (step=0000339) Loss: 0.4956 (diff=0.4584, anat=0.0372), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.780000000000001e-05 +[2026-04-14 11:19:11] (step=0000340) Loss: 0.4799 (diff=0.4452, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.800000000000001e-05 +[2026-04-14 11:19:30] (step=0000341) Loss: 0.4742 (diff=0.4408, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.82e-05 +[2026-04-14 11:19:50] (step=0000342) Loss: 0.4840 (diff=0.4501, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.840000000000001e-05 +[2026-04-14 11:20:09] (step=0000343) Loss: 0.4912 (diff=0.4551, anat=0.0360), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.860000000000001e-05 +[2026-04-14 11:20:29] (step=0000344) Loss: 0.4935 (diff=0.4577, anat=0.0358), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.879999999999999e-05 +[2026-04-14 11:20:49] (step=0000345) Loss: 0.4984 (diff=0.4608, anat=0.0376), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.9e-05 +[2026-04-14 11:21:08] (step=0000346) Loss: 0.4779 (diff=0.4454, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.92e-05 +[2026-04-14 11:21:28] (step=0000347) Loss: 0.4656 (diff=0.4336, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.939999999999999e-05 +[2026-04-14 11:21:47] (step=0000348) Loss: 0.4873 (diff=0.4513, anat=0.0360), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.96e-05 +[2026-04-14 11:22:07] (step=0000349) Loss: 0.4838 (diff=0.4482, anat=0.0356), Steps/Sec: 0.05, Epoch: 0.07, LR: 6.98e-05 +[2026-04-14 11:22:27] (step=0000350) Loss: 0.4879 (diff=0.4536, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.07, LR: 7e-05 +[2026-04-14 11:22:46] (step=0000351) Loss: 0.4830 (diff=0.4507, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.02e-05 +[2026-04-14 11:23:06] (step=0000352) Loss: 0.4882 (diff=0.4514, anat=0.0368), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.04e-05 +[2026-04-14 11:23:26] (step=0000353) Loss: 0.4902 (diff=0.4544, anat=0.0358), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.06e-05 +[2026-04-14 11:23:45] (step=0000354) Loss: 0.4740 (diff=0.4430, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.08e-05 +[2026-04-14 11:24:05] (step=0000355) Loss: 0.4880 (diff=0.4521, anat=0.0359), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.1e-05 +[2026-04-14 11:24:24] (step=0000356) Loss: 0.4996 (diff=0.4637, anat=0.0359), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.12e-05 +[2026-04-14 11:24:44] (step=0000357) Loss: 0.4740 (diff=0.4412, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.14e-05 +[2026-04-14 11:25:04] (step=0000358) Loss: 0.4827 (diff=0.4494, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.16e-05 +[2026-04-14 11:25:24] (step=0000359) Loss: 0.4860 (diff=0.4523, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.18e-05 +[2026-04-14 11:25:43] (step=0000360) Loss: 0.4818 (diff=0.4468, anat=0.0350), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.2e-05 +[2026-04-14 11:26:03] (step=0000361) Loss: 0.4873 (diff=0.4504, anat=0.0369), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.22e-05 +[2026-04-14 11:26:23] (step=0000362) Loss: 0.4895 (diff=0.4540, anat=0.0354), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.24e-05 +[2026-04-14 11:26:42] (step=0000363) Loss: 0.4764 (diff=0.4438, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.26e-05 +[2026-04-14 11:27:02] (step=0000364) Loss: 0.4867 (diff=0.4520, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.280000000000001e-05 +[2026-04-14 11:27:21] (step=0000365) Loss: 0.4947 (diff=0.4598, anat=0.0350), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.3e-05 +[2026-04-14 11:27:41] (step=0000366) Loss: 0.4872 (diff=0.4525, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.32e-05 +[2026-04-14 11:28:01] (step=0000367) Loss: 0.4761 (diff=0.4396, anat=0.0365), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.340000000000001e-05 +[2026-04-14 11:28:20] (step=0000368) Loss: 0.4757 (diff=0.4449, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.36e-05 +[2026-04-14 11:28:40] (step=0000369) Loss: 0.4817 (diff=0.4485, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.38e-05 +[2026-04-14 11:29:00] (step=0000370) Loss: 0.4954 (diff=0.4615, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.4e-05 +[2026-04-14 11:29:19] (step=0000371) Loss: 0.4744 (diff=0.4392, anat=0.0351), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.42e-05 +[2026-04-14 11:29:39] (step=0000372) Loss: 0.4934 (diff=0.4561, anat=0.0374), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.44e-05 +[2026-04-14 11:29:59] (step=0000373) Loss: 0.4951 (diff=0.4595, anat=0.0356), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.46e-05 +[2026-04-14 11:30:18] (step=0000374) Loss: 0.4985 (diff=0.4613, anat=0.0372), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.48e-05 +[2026-04-14 11:30:38] (step=0000375) Loss: 0.4932 (diff=0.4603, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.500000000000001e-05 +[2026-04-14 11:30:58] (step=0000376) Loss: 0.4620 (diff=0.4318, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.52e-05 +[2026-04-14 11:31:17] (step=0000377) Loss: 0.4802 (diff=0.4466, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.54e-05 +[2026-04-14 11:31:37] (step=0000378) Loss: 0.4815 (diff=0.4470, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.560000000000001e-05 +[2026-04-14 11:31:57] (step=0000379) Loss: 0.4795 (diff=0.4479, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.58e-05 +[2026-04-14 11:32:16] (step=0000380) Loss: 0.4909 (diff=0.4549, anat=0.0361), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.6e-05 +[2026-04-14 11:32:36] (step=0000381) Loss: 0.5014 (diff=0.4626, anat=0.0388), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.620000000000001e-05 +[2026-04-14 11:32:56] (step=0000382) Loss: 0.4896 (diff=0.4556, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.64e-05 +[2026-04-14 11:33:15] (step=0000383) Loss: 0.4828 (diff=0.4472, anat=0.0357), Steps/Sec: 0.05, Epoch: 0.07, LR: 7.66e-05 +[2026-04-14 11:33:35] (step=0000384) Loss: 0.4813 (diff=0.4464, anat=0.0349), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.680000000000001e-05 +[2026-04-14 11:33:55] (step=0000385) Loss: 0.4908 (diff=0.4559, anat=0.0349), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.7e-05 +[2026-04-14 11:34:14] (step=0000386) Loss: 0.4998 (diff=0.4637, anat=0.0361), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.72e-05 +[2026-04-14 11:34:34] (step=0000387) Loss: 0.4844 (diff=0.4507, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.740000000000001e-05 +[2026-04-14 11:34:53] (step=0000388) Loss: 0.4803 (diff=0.4470, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.76e-05 +[2026-04-14 11:35:13] (step=0000389) Loss: 0.4854 (diff=0.4517, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.780000000000001e-05 +[2026-04-14 11:35:33] (step=0000390) Loss: 0.4826 (diff=0.4491, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.800000000000001e-05 +[2026-04-14 11:35:52] (step=0000391) Loss: 0.4795 (diff=0.4429, anat=0.0366), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.82e-05 +[2026-04-14 11:36:12] (step=0000392) Loss: 0.4866 (diff=0.4524, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.840000000000001e-05 +[2026-04-14 11:36:32] (step=0000393) Loss: 0.4634 (diff=0.4303, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.860000000000001e-05 +[2026-04-14 11:36:51] (step=0000394) Loss: 0.4850 (diff=0.4493, anat=0.0357), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.88e-05 +[2026-04-14 11:37:11] (step=0000395) Loss: 0.4898 (diff=0.4517, anat=0.0382), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.900000000000001e-05 +[2026-04-14 11:37:30] (step=0000396) Loss: 0.4798 (diff=0.4479, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.920000000000001e-05 +[2026-04-14 11:37:50] (step=0000397) Loss: 0.4830 (diff=0.4492, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.94e-05 +[2026-04-14 11:38:10] (step=0000398) Loss: 0.4766 (diff=0.4438, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.960000000000001e-05 +[2026-04-14 11:38:29] (step=0000399) Loss: 0.4881 (diff=0.4528, anat=0.0352), Steps/Sec: 0.05, Epoch: 0.08, LR: 7.98e-05 +[2026-04-14 11:38:49] (step=0000400) Loss: 0.4747 (diff=0.4431, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.08, LR: 8e-05 +[2026-04-14 11:39:09] (step=0000401) Loss: 0.4860 (diff=0.4515, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.020000000000001e-05 +[2026-04-14 11:39:28] (step=0000402) Loss: 0.4937 (diff=0.4557, anat=0.0380), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.04e-05 +[2026-04-14 11:39:48] (step=0000403) Loss: 0.4864 (diff=0.4491, anat=0.0372), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.060000000000001e-05 +[2026-04-14 11:40:08] (step=0000404) Loss: 0.4738 (diff=0.4439, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.080000000000001e-05 +[2026-04-14 11:40:27] (step=0000405) Loss: 0.4932 (diff=0.4590, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.1e-05 +[2026-04-14 11:40:47] (step=0000406) Loss: 0.4854 (diff=0.4529, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.120000000000001e-05 +[2026-04-14 11:41:06] (step=0000407) Loss: 0.4958 (diff=0.4598, anat=0.0360), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.14e-05 +[2026-04-14 11:41:26] (step=0000408) Loss: 0.4672 (diff=0.4376, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.16e-05 +[2026-04-14 11:41:46] (step=0000409) Loss: 0.4762 (diff=0.4410, anat=0.0352), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.18e-05 +[2026-04-14 11:42:05] (step=0000410) Loss: 0.4814 (diff=0.4462, anat=0.0352), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.2e-05 +[2026-04-14 11:42:25] (step=0000411) Loss: 0.4755 (diff=0.4441, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.22e-05 +[2026-04-14 11:42:45] (step=0000412) Loss: 0.4855 (diff=0.4528, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.24e-05 +[2026-04-14 11:43:04] (step=0000413) Loss: 0.4761 (diff=0.4444, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.26e-05 +[2026-04-14 11:43:24] (step=0000414) Loss: 0.4785 (diff=0.4467, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.28e-05 +[2026-04-14 11:43:43] (step=0000415) Loss: 0.4787 (diff=0.4443, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.3e-05 +[2026-04-14 11:44:03] (step=0000416) Loss: 0.4860 (diff=0.4524, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.32e-05 +[2026-04-14 11:44:23] (step=0000417) Loss: 0.4825 (diff=0.4468, anat=0.0357), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.34e-05 +[2026-04-14 11:44:42] (step=0000418) Loss: 0.4889 (diff=0.4533, anat=0.0356), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.36e-05 +[2026-04-14 11:45:02] (step=0000419) Loss: 0.4791 (diff=0.4438, anat=0.0352), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.38e-05 +[2026-04-14 11:45:21] (step=0000420) Loss: 0.4855 (diff=0.4513, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.4e-05 +[2026-04-14 11:45:41] (step=0000421) Loss: 0.4807 (diff=0.4467, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.42e-05 +[2026-04-14 11:46:01] (step=0000422) Loss: 0.4760 (diff=0.4432, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.44e-05 +[2026-04-14 11:46:20] (step=0000423) Loss: 0.4709 (diff=0.4376, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.46e-05 +[2026-04-14 11:46:40] (step=0000424) Loss: 0.4779 (diff=0.4428, anat=0.0351), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.48e-05 +[2026-04-14 11:46:59] (step=0000425) Loss: 0.4887 (diff=0.4555, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.5e-05 +[2026-04-14 11:47:19] (step=0000426) Loss: 0.4820 (diff=0.4462, anat=0.0358), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.52e-05 +[2026-04-14 11:47:39] (step=0000427) Loss: 0.4886 (diff=0.4560, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.54e-05 +[2026-04-14 11:47:58] (step=0000428) Loss: 0.4663 (diff=0.4356, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.560000000000001e-05 +[2026-04-14 11:48:18] (step=0000429) Loss: 0.4814 (diff=0.4492, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.58e-05 +[2026-04-14 11:48:38] (step=0000430) Loss: 0.4907 (diff=0.4539, anat=0.0367), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.6e-05 +[2026-04-14 11:48:57] (step=0000431) Loss: 0.4580 (diff=0.4293, anat=0.0288), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.620000000000001e-05 +[2026-04-14 11:49:17] (step=0000432) Loss: 0.4793 (diff=0.4441, anat=0.0352), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.64e-05 +[2026-04-14 11:49:36] (step=0000433) Loss: 0.4827 (diff=0.4478, anat=0.0349), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.66e-05 +[2026-04-14 11:49:56] (step=0000434) Loss: 0.4926 (diff=0.4566, anat=0.0360), Steps/Sec: 0.05, Epoch: 0.08, LR: 8.680000000000001e-05 +[2026-04-14 11:50:16] (step=0000435) Loss: 0.4761 (diff=0.4441, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.7e-05 +[2026-04-14 11:50:35] (step=0000436) Loss: 0.4736 (diff=0.4421, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.72e-05 +[2026-04-14 11:50:55] (step=0000437) Loss: 0.4914 (diff=0.4568, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.740000000000001e-05 +[2026-04-14 11:51:15] (step=0000438) Loss: 0.4885 (diff=0.4566, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.76e-05 +[2026-04-14 11:51:34] (step=0000439) Loss: 0.4856 (diff=0.4500, anat=0.0355), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.78e-05 +[2026-04-14 11:51:54] (step=0000440) Loss: 0.4871 (diff=0.4553, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.800000000000001e-05 +[2026-04-14 11:52:13] (step=0000441) Loss: 0.5034 (diff=0.4669, anat=0.0365), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.82e-05 +[2026-04-14 11:52:33] (step=0000442) Loss: 0.4825 (diff=0.4500, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.840000000000001e-05 +[2026-04-14 11:52:53] (step=0000443) Loss: 0.4877 (diff=0.4529, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.86e-05 +[2026-04-14 11:53:12] (step=0000444) Loss: 0.4820 (diff=0.4485, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.88e-05 +[2026-04-14 11:53:32] (step=0000445) Loss: 0.4886 (diff=0.4547, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.900000000000001e-05 +[2026-04-14 11:53:52] (step=0000446) Loss: 0.4827 (diff=0.4478, anat=0.0350), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.92e-05 +[2026-04-14 11:54:11] (step=0000447) Loss: 0.4857 (diff=0.4528, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.94e-05 +[2026-04-14 11:54:31] (step=0000448) Loss: 0.4913 (diff=0.4564, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.960000000000001e-05 +[2026-04-14 11:54:50] (step=0000449) Loss: 0.4957 (diff=0.4596, anat=0.0360), Steps/Sec: 0.05, Epoch: 0.09, LR: 8.98e-05 +[2026-04-14 11:55:10] (step=0000450) Loss: 0.4924 (diff=0.4571, anat=0.0353), Steps/Sec: 0.05, Epoch: 0.09, LR: 9e-05 +[2026-04-14 11:55:30] (step=0000451) Loss: 0.4708 (diff=0.4384, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.020000000000001e-05 +[2026-04-14 11:55:49] (step=0000452) Loss: 0.4812 (diff=0.4478, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.04e-05 +[2026-04-14 11:56:09] (step=0000453) Loss: 0.4742 (diff=0.4420, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.06e-05 +[2026-04-14 11:56:29] (step=0000454) Loss: 0.4964 (diff=0.4627, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.080000000000001e-05 +[2026-04-14 11:56:48] (step=0000455) Loss: 0.4832 (diff=0.4500, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.1e-05 +[2026-04-14 11:57:08] (step=0000456) Loss: 0.4785 (diff=0.4430, anat=0.0355), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.120000000000001e-05 +[2026-04-14 11:57:27] (step=0000457) Loss: 0.4843 (diff=0.4487, anat=0.0355), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.140000000000001e-05 +[2026-04-14 11:57:47] (step=0000458) Loss: 0.5056 (diff=0.4707, anat=0.0349), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.16e-05 +[2026-04-14 11:58:07] (step=0000459) Loss: 0.4799 (diff=0.4475, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.180000000000001e-05 +[2026-04-14 11:58:26] (step=0000460) Loss: 0.4892 (diff=0.4548, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.200000000000001e-05 +[2026-04-14 11:58:46] (step=0000461) Loss: 0.4929 (diff=0.4597, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.22e-05 +[2026-04-14 11:59:06] (step=0000462) Loss: 0.4799 (diff=0.4454, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.240000000000001e-05 +[2026-04-14 11:59:25] (step=0000463) Loss: 0.4829 (diff=0.4493, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.260000000000001e-05 +[2026-04-14 11:59:45] (step=0000464) Loss: 0.4633 (diff=0.4315, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.28e-05 +[2026-04-14 12:00:05] (step=0000465) Loss: 0.4828 (diff=0.4518, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.300000000000001e-05 +[2026-04-14 12:00:24] (step=0000466) Loss: 0.4706 (diff=0.4383, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.320000000000002e-05 +[2026-04-14 12:00:44] (step=0000467) Loss: 0.4914 (diff=0.4563, anat=0.0351), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.340000000000001e-05 +[2026-04-14 12:01:03] (step=0000468) Loss: 0.4866 (diff=0.4524, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.360000000000001e-05 +[2026-04-14 12:01:23] (step=0000469) Loss: 0.4793 (diff=0.4454, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.38e-05 +[2026-04-14 12:01:43] (step=0000470) Loss: 0.4853 (diff=0.4507, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.4e-05 +[2026-04-14 12:02:02] (step=0000471) Loss: 0.4800 (diff=0.4471, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.42e-05 +[2026-04-14 12:02:22] (step=0000472) Loss: 0.4834 (diff=0.4479, anat=0.0355), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.44e-05 +[2026-04-14 12:02:41] (step=0000473) Loss: 0.4790 (diff=0.4444, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.46e-05 +[2026-04-14 12:03:01] (step=0000474) Loss: 0.4786 (diff=0.4470, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.48e-05 +[2026-04-14 12:03:21] (step=0000475) Loss: 0.4955 (diff=0.4582, anat=0.0374), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.5e-05 +[2026-04-14 12:03:40] (step=0000476) Loss: 0.4944 (diff=0.4572, anat=0.0371), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.52e-05 +[2026-04-14 12:04:00] (step=0000477) Loss: 0.4792 (diff=0.4475, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.54e-05 +[2026-04-14 12:04:20] (step=0000478) Loss: 0.5011 (diff=0.4631, anat=0.0380), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.56e-05 +[2026-04-14 12:04:39] (step=0000479) Loss: 0.4761 (diff=0.4427, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.58e-05 +[2026-04-14 12:04:59] (step=0000480) Loss: 0.4746 (diff=0.4431, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.6e-05 +[2026-04-14 12:05:19] (step=0000481) Loss: 0.4722 (diff=0.4396, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.620000000000001e-05 +[2026-04-14 12:05:38] (step=0000482) Loss: 0.4807 (diff=0.4470, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.64e-05 +[2026-04-14 12:05:58] (step=0000483) Loss: 0.4884 (diff=0.4544, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.66e-05 +[2026-04-14 12:06:17] (step=0000484) Loss: 0.4927 (diff=0.4591, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.680000000000001e-05 +[2026-04-14 12:06:37] (step=0000485) Loss: 0.4870 (diff=0.4538, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.09, LR: 9.7e-05 +[2026-04-14 12:06:57] (step=0000486) Loss: 0.4947 (diff=0.4603, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.10, LR: 9.72e-05 +[2026-04-14 12:07:16] (step=0000487) Loss: 0.4975 (diff=0.4612, anat=0.0363), Steps/Sec: 0.05, Epoch: 0.10, LR: 9.74e-05 +[2026-04-14 12:07:36] (step=0000488) Loss: 0.4828 (diff=0.4521, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.10, LR: 9.76e-05 +[2026-04-14 12:07:55] (step=0000489) Loss: 0.4896 (diff=0.4559, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.10, LR: 9.78e-05 +[2026-04-14 12:08:15] (step=0000490) Loss: 0.4901 (diff=0.4562, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.10, LR: 9.8e-05 +[2026-04-14 12:08:35] (step=0000491) Loss: 0.4748 (diff=0.4402, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.10, LR: 9.82e-05 +[2026-04-14 12:08:54] (step=0000492) Loss: 0.4866 (diff=0.4531, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.10, LR: 9.84e-05 +[2026-04-14 12:09:14] (step=0000493) Loss: 0.4746 (diff=0.4421, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.10, LR: 9.86e-05 +[2026-04-14 12:09:33] (step=0000494) Loss: 0.4861 (diff=0.4506, anat=0.0355), Steps/Sec: 0.05, Epoch: 0.10, LR: 9.88e-05 +[2026-04-14 12:09:53] (step=0000495) Loss: 0.4951 (diff=0.4628, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.10, LR: 9.900000000000001e-05 +[2026-04-14 12:10:13] (step=0000496) Loss: 0.4876 (diff=0.4548, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.10, LR: 9.92e-05 +[2026-04-14 12:10:32] (step=0000497) Loss: 0.4935 (diff=0.4585, anat=0.0350), Steps/Sec: 0.05, Epoch: 0.10, LR: 9.94e-05 +[2026-04-14 12:10:52] (step=0000498) Loss: 0.4939 (diff=0.4605, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.10, LR: 9.960000000000001e-05 +[2026-04-14 12:11:12] (step=0000499) Loss: 0.4764 (diff=0.4427, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.10, LR: 9.98e-05 +[2026-04-14 12:11:31] (step=0000500) Loss: 0.4808 (diff=0.4474, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:11:31] [RANK 0] Saving current state to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500 +[2026-04-14 12:11:46] [RANK 0] Model weights saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/model.safetensors +[2026-04-14 12:11:46] [RANK 0] Optimizer state saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/optimizer.bin +[2026-04-14 12:11:46] [RANK 0] Scheduler state saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/scheduler.bin +[2026-04-14 12:11:46] [RANK 0] Sampler state for dataloader 0 saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/sampler.bin +[2026-04-14 12:11:46] [RANK 0] Random states saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500/random_states_0.pkl +[2026-04-14 12:11:46] Saved accelerator state to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-500 +[2026-04-14 12:11:46] Saved checkpoint to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0000500/ +[2026-04-14 12:12:05] (step=0000501) Loss: 0.4843 (diff=0.4510, anat=0.0333), Steps/Sec: 0.03, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:12:24] (step=0000502) Loss: 0.4930 (diff=0.4606, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:12:43] (step=0000503) Loss: 0.4756 (diff=0.4411, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:13:03] (step=0000504) Loss: 0.4820 (diff=0.4499, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:13:22] (step=0000505) Loss: 0.4935 (diff=0.4578, anat=0.0357), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:13:42] (step=0000506) Loss: 0.4675 (diff=0.4362, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:14:02] (step=0000507) Loss: 0.4824 (diff=0.4497, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:14:21] (step=0000508) Loss: 0.4846 (diff=0.4521, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:14:41] (step=0000509) Loss: 0.4751 (diff=0.4423, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:15:01] (step=0000510) Loss: 0.4719 (diff=0.4413, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:15:20] (step=0000511) Loss: 0.4783 (diff=0.4464, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:15:40] (step=0000512) Loss: 0.4899 (diff=0.4562, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:15:59] (step=0000513) Loss: 0.4863 (diff=0.4525, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:16:19] (step=0000514) Loss: 0.4846 (diff=0.4501, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:16:39] (step=0000515) Loss: 0.4864 (diff=0.4536, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:16:58] (step=0000516) Loss: 0.4745 (diff=0.4430, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:17:18] (step=0000517) Loss: 0.4928 (diff=0.4584, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:17:37] (step=0000518) Loss: 0.4927 (diff=0.4568, anat=0.0360), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:17:57] (step=0000519) Loss: 0.4845 (diff=0.4514, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:18:17] (step=0000520) Loss: 0.4703 (diff=0.4393, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:18:36] (step=0000521) Loss: 0.4893 (diff=0.4562, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:18:56] (step=0000522) Loss: 0.4843 (diff=0.4509, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:19:16] (step=0000523) Loss: 0.5048 (diff=0.4697, anat=0.0351), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:19:35] (step=0000524) Loss: 0.4713 (diff=0.4411, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:19:55] (step=0000525) Loss: 0.4749 (diff=0.4435, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:20:14] (step=0000526) Loss: 0.4746 (diff=0.4423, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:20:34] (step=0000527) Loss: 0.4911 (diff=0.4581, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:20:54] (step=0000528) Loss: 0.4714 (diff=0.4402, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:21:13] (step=0000529) Loss: 0.4992 (diff=0.4638, anat=0.0355), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:21:33] (step=0000530) Loss: 0.4757 (diff=0.4442, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:21:52] (step=0000531) Loss: 0.4882 (diff=0.4561, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:22:12] (step=0000532) Loss: 0.4748 (diff=0.4447, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:22:32] (step=0000533) Loss: 0.4814 (diff=0.4471, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:22:51] (step=0000534) Loss: 0.4804 (diff=0.4481, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:23:11] (step=0000535) Loss: 0.4652 (diff=0.4341, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:23:30] (step=0000536) Loss: 0.4902 (diff=0.4577, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.10, LR: 0.0001 +[2026-04-14 12:23:50] (step=0000537) Loss: 0.4782 (diff=0.4453, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:24:10] (step=0000538) Loss: 0.4899 (diff=0.4545, anat=0.0354), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:24:29] (step=0000539) Loss: 0.4720 (diff=0.4424, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:24:49] (step=0000540) Loss: 0.4988 (diff=0.4624, anat=0.0364), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:25:08] (step=0000541) Loss: 0.4898 (diff=0.4561, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:25:28] (step=0000542) Loss: 0.4826 (diff=0.4504, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:25:48] (step=0000543) Loss: 0.4860 (diff=0.4539, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:26:07] (step=0000544) Loss: 0.4839 (diff=0.4497, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:26:27] (step=0000545) Loss: 0.4747 (diff=0.4439, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:26:46] (step=0000546) Loss: 0.4895 (diff=0.4566, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:27:06] (step=0000547) Loss: 0.4790 (diff=0.4479, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:27:26] (step=0000548) Loss: 0.4724 (diff=0.4424, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:27:45] (step=0000549) Loss: 0.4811 (diff=0.4512, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:28:05] (step=0000550) Loss: 0.4754 (diff=0.4437, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:28:24] (step=0000551) Loss: 0.4913 (diff=0.4576, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:28:44] (step=0000552) Loss: 0.4697 (diff=0.4388, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:29:04] (step=0000553) Loss: 0.4770 (diff=0.4422, anat=0.0349), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:29:23] (step=0000554) Loss: 0.4786 (diff=0.4455, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:29:43] (step=0000555) Loss: 0.4746 (diff=0.4423, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:30:02] (step=0000556) Loss: 0.4787 (diff=0.4433, anat=0.0354), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:30:22] (step=0000557) Loss: 0.4717 (diff=0.4400, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:30:42] (step=0000558) Loss: 0.4770 (diff=0.4441, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:31:01] (step=0000559) Loss: 0.4906 (diff=0.4566, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:31:21] (step=0000560) Loss: 0.4854 (diff=0.4524, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:31:40] (step=0000561) Loss: 0.4857 (diff=0.4523, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:32:00] (step=0000562) Loss: 0.4900 (diff=0.4576, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:32:19] (step=0000563) Loss: 0.4815 (diff=0.4510, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:32:39] (step=0000564) Loss: 0.4705 (diff=0.4399, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:32:59] (step=0000565) Loss: 0.4853 (diff=0.4521, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:33:18] (step=0000566) Loss: 0.4967 (diff=0.4617, anat=0.0350), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:33:38] (step=0000567) Loss: 0.4842 (diff=0.4498, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:33:58] (step=0000568) Loss: 0.4720 (diff=0.4415, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:34:17] (step=0000569) Loss: 0.4872 (diff=0.4547, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:34:37] (step=0000570) Loss: 0.4680 (diff=0.4385, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:34:56] (step=0000571) Loss: 0.4874 (diff=0.4543, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:35:16] (step=0000572) Loss: 0.4762 (diff=0.4444, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:35:36] (step=0000573) Loss: 0.4853 (diff=0.4515, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:35:55] (step=0000574) Loss: 0.4769 (diff=0.4448, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:36:15] (step=0000575) Loss: 0.4870 (diff=0.4515, anat=0.0355), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:36:34] (step=0000576) Loss: 0.4872 (diff=0.4529, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:36:54] (step=0000577) Loss: 0.4823 (diff=0.4507, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:37:14] (step=0000578) Loss: 0.4866 (diff=0.4527, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:37:33] (step=0000579) Loss: 0.4874 (diff=0.4546, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:37:53] (step=0000580) Loss: 0.4873 (diff=0.4528, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:38:12] (step=0000581) Loss: 0.4807 (diff=0.4463, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:38:32] (step=0000582) Loss: 0.4714 (diff=0.4399, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:38:51] (step=0000583) Loss: 0.4854 (diff=0.4531, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:39:11] (step=0000584) Loss: 0.4645 (diff=0.4354, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:39:31] (step=0000585) Loss: 0.4945 (diff=0.4586, anat=0.0359), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:39:50] (step=0000586) Loss: 0.4838 (diff=0.4487, anat=0.0351), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:40:10] (step=0000587) Loss: 0.4956 (diff=0.4599, anat=0.0357), Steps/Sec: 0.05, Epoch: 0.11, LR: 0.0001 +[2026-04-14 12:40:29] (step=0000588) Loss: 0.4743 (diff=0.4415, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:40:49] (step=0000589) Loss: 0.4613 (diff=0.4311, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:41:09] (step=0000590) Loss: 0.4655 (diff=0.4349, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:41:28] (step=0000591) Loss: 0.4641 (diff=0.4330, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:41:48] (step=0000592) Loss: 0.4910 (diff=0.4557, anat=0.0353), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:42:08] (step=0000593) Loss: 0.4811 (diff=0.4492, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:42:27] (step=0000594) Loss: 0.4862 (diff=0.4530, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:42:47] (step=0000595) Loss: 0.4818 (diff=0.4483, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:43:06] (step=0000596) Loss: 0.4745 (diff=0.4437, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:43:26] (step=0000597) Loss: 0.4869 (diff=0.4561, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:43:46] (step=0000598) Loss: 0.4900 (diff=0.4554, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:44:05] (step=0000599) Loss: 0.4864 (diff=0.4542, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:44:25] (step=0000600) Loss: 0.4779 (diff=0.4452, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:44:44] (step=0000601) Loss: 0.4772 (diff=0.4458, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:45:04] (step=0000602) Loss: 0.4806 (diff=0.4488, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:45:23] (step=0000603) Loss: 0.4867 (diff=0.4531, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:45:43] (step=0000604) Loss: 0.4809 (diff=0.4469, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:46:03] (step=0000605) Loss: 0.4863 (diff=0.4520, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:46:22] (step=0000606) Loss: 0.4839 (diff=0.4500, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:46:42] (step=0000607) Loss: 0.4839 (diff=0.4516, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:47:01] (step=0000608) Loss: 0.4866 (diff=0.4519, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:47:21] (step=0000609) Loss: 0.4850 (diff=0.4522, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:47:41] (step=0000610) Loss: 0.4863 (diff=0.4531, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:48:00] (step=0000611) Loss: 0.4804 (diff=0.4488, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:48:20] (step=0000612) Loss: 0.4719 (diff=0.4424, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:48:39] (step=0000613) Loss: 0.4977 (diff=0.4638, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:48:59] (step=0000614) Loss: 0.4927 (diff=0.4572, anat=0.0355), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:49:19] (step=0000615) Loss: 0.4889 (diff=0.4552, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:49:38] (step=0000616) Loss: 0.4813 (diff=0.4466, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:49:58] (step=0000617) Loss: 0.4907 (diff=0.4563, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:50:18] (step=0000618) Loss: 0.4826 (diff=0.4508, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:50:37] (step=0000619) Loss: 0.4906 (diff=0.4573, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:50:57] (step=0000620) Loss: 0.4939 (diff=0.4599, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:51:16] (step=0000621) Loss: 0.4801 (diff=0.4447, anat=0.0354), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:51:36] (step=0000622) Loss: 0.4802 (diff=0.4474, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:51:56] (step=0000623) Loss: 0.4723 (diff=0.4387, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:52:15] (step=0000624) Loss: 0.4766 (diff=0.4458, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:52:35] (step=0000625) Loss: 0.4730 (diff=0.4412, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:52:54] (step=0000626) Loss: 0.4908 (diff=0.4545, anat=0.0363), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:53:14] (step=0000627) Loss: 0.4994 (diff=0.4655, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:53:34] (step=0000628) Loss: 0.4811 (diff=0.4483, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:53:53] (step=0000629) Loss: 0.4740 (diff=0.4431, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:54:13] (step=0000630) Loss: 0.4764 (diff=0.4446, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:54:32] (step=0000631) Loss: 0.4892 (diff=0.4557, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:54:52] (step=0000632) Loss: 0.4792 (diff=0.4465, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:55:12] (step=0000633) Loss: 0.4957 (diff=0.4626, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:55:31] (step=0000634) Loss: 0.4810 (diff=0.4495, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:55:51] (step=0000635) Loss: 0.4867 (diff=0.4549, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:56:10] (step=0000636) Loss: 0.4856 (diff=0.4524, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:56:30] (step=0000637) Loss: 0.4802 (diff=0.4461, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:56:49] (step=0000638) Loss: 0.4806 (diff=0.4492, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.12, LR: 0.0001 +[2026-04-14 12:57:09] (step=0000639) Loss: 0.4905 (diff=0.4567, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 12:57:29] (step=0000640) Loss: 0.4883 (diff=0.4567, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 12:57:48] (step=0000641) Loss: 0.4654 (diff=0.4341, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 12:58:08] (step=0000642) Loss: 0.4788 (diff=0.4486, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 12:58:28] (step=0000643) Loss: 0.4918 (diff=0.4573, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 12:58:47] (step=0000644) Loss: 0.4807 (diff=0.4480, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 12:59:07] (step=0000645) Loss: 0.4719 (diff=0.4412, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 12:59:26] (step=0000646) Loss: 0.4819 (diff=0.4485, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 12:59:46] (step=0000647) Loss: 0.4783 (diff=0.4454, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:00:05] (step=0000648) Loss: 0.4860 (diff=0.4509, anat=0.0351), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:00:25] (step=0000649) Loss: 0.4992 (diff=0.4638, anat=0.0354), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:00:45] (step=0000650) Loss: 0.4746 (diff=0.4430, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:01:04] (step=0000651) Loss: 0.4820 (diff=0.4496, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:01:24] (step=0000652) Loss: 0.4690 (diff=0.4401, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:01:43] (step=0000653) Loss: 0.4806 (diff=0.4476, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:02:03] (step=0000654) Loss: 0.4795 (diff=0.4479, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:02:23] (step=0000655) Loss: 0.4791 (diff=0.4470, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:02:42] (step=0000656) Loss: 0.4732 (diff=0.4426, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:03:02] (step=0000657) Loss: 0.4744 (diff=0.4433, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:03:21] (step=0000658) Loss: 0.4783 (diff=0.4438, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:03:41] (step=0000659) Loss: 0.4850 (diff=0.4505, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:04:00] (step=0000660) Loss: 0.4850 (diff=0.4506, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:04:20] (step=0000661) Loss: 0.4854 (diff=0.4531, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:04:40] (step=0000662) Loss: 0.4880 (diff=0.4570, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:04:59] (step=0000663) Loss: 0.4788 (diff=0.4457, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:05:19] (step=0000664) Loss: 0.4993 (diff=0.4623, anat=0.0370), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:05:39] (step=0000665) Loss: 0.4829 (diff=0.4505, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:05:58] (step=0000666) Loss: 0.4699 (diff=0.4414, anat=0.0286), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:06:18] (step=0000667) Loss: 0.4918 (diff=0.4588, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:06:37] (step=0000668) Loss: 0.4939 (diff=0.4623, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:06:57] (step=0000669) Loss: 0.4818 (diff=0.4483, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:07:16] (step=0000670) Loss: 0.4783 (diff=0.4473, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:07:36] (step=0000671) Loss: 0.4834 (diff=0.4498, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:07:56] (step=0000672) Loss: 0.4776 (diff=0.4447, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:08:15] (step=0000673) Loss: 0.4915 (diff=0.4578, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:08:35] (step=0000674) Loss: 0.4801 (diff=0.4485, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:08:54] (step=0000675) Loss: 0.4932 (diff=0.4585, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:09:14] (step=0000676) Loss: 0.4890 (diff=0.4542, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:09:34] (step=0000677) Loss: 0.4914 (diff=0.4577, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:09:53] (step=0000678) Loss: 0.4768 (diff=0.4460, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:10:13] (step=0000679) Loss: 0.4677 (diff=0.4363, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:10:32] (step=0000680) Loss: 0.4841 (diff=0.4507, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:10:52] (step=0000681) Loss: 0.4850 (diff=0.4490, anat=0.0360), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:11:12] (step=0000682) Loss: 0.4737 (diff=0.4423, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:11:31] (step=0000683) Loss: 0.4799 (diff=0.4477, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:11:51] (step=0000684) Loss: 0.4845 (diff=0.4512, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:12:10] (step=0000685) Loss: 0.4833 (diff=0.4492, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:12:30] (step=0000686) Loss: 0.4671 (diff=0.4365, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:12:50] (step=0000687) Loss: 0.4858 (diff=0.4529, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:13:09] (step=0000688) Loss: 0.4766 (diff=0.4443, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:13:29] (step=0000689) Loss: 0.4853 (diff=0.4535, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:13:48] (step=0000690) Loss: 0.4757 (diff=0.4443, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.13, LR: 0.0001 +[2026-04-14 13:14:08] (step=0000691) Loss: 0.4785 (diff=0.4477, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:14:28] (step=0000692) Loss: 0.4890 (diff=0.4575, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:14:47] (step=0000693) Loss: 0.4771 (diff=0.4444, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:15:07] (step=0000694) Loss: 0.4797 (diff=0.4480, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:15:26] (step=0000695) Loss: 0.4913 (diff=0.4574, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:15:46] (step=0000696) Loss: 0.4887 (diff=0.4534, anat=0.0353), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:16:06] (step=0000697) Loss: 0.4844 (diff=0.4515, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:16:25] (step=0000698) Loss: 0.4889 (diff=0.4556, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:16:45] (step=0000699) Loss: 0.4738 (diff=0.4422, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:17:04] (step=0000700) Loss: 0.4817 (diff=0.4497, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:17:24] (step=0000701) Loss: 0.4715 (diff=0.4399, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:17:44] (step=0000702) Loss: 0.4891 (diff=0.4551, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:18:03] (step=0000703) Loss: 0.4990 (diff=0.4652, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:18:23] (step=0000704) Loss: 0.4800 (diff=0.4460, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:18:42] (step=0000705) Loss: 0.4806 (diff=0.4481, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:19:02] (step=0000706) Loss: 0.4981 (diff=0.4622, anat=0.0359), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:19:21] (step=0000707) Loss: 0.4976 (diff=0.4641, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:19:41] (step=0000708) Loss: 0.4923 (diff=0.4581, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:20:01] (step=0000709) Loss: 0.4993 (diff=0.4625, anat=0.0368), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:20:20] (step=0000710) Loss: 0.4795 (diff=0.4478, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:20:40] (step=0000711) Loss: 0.4799 (diff=0.4479, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:21:00] (step=0000712) Loss: 0.4709 (diff=0.4388, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:21:19] (step=0000713) Loss: 0.4802 (diff=0.4472, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:21:39] (step=0000714) Loss: 0.4870 (diff=0.4536, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:21:58] (step=0000715) Loss: 0.4834 (diff=0.4510, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:22:18] (step=0000716) Loss: 0.4770 (diff=0.4443, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:22:38] (step=0000717) Loss: 0.4872 (diff=0.4556, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:22:57] (step=0000718) Loss: 0.4805 (diff=0.4497, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:23:17] (step=0000719) Loss: 0.4758 (diff=0.4437, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:23:36] (step=0000720) Loss: 0.4910 (diff=0.4561, anat=0.0349), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:23:56] (step=0000721) Loss: 0.4873 (diff=0.4546, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:24:16] (step=0000722) Loss: 0.4726 (diff=0.4392, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:24:35] (step=0000723) Loss: 0.4767 (diff=0.4447, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:24:55] (step=0000724) Loss: 0.4841 (diff=0.4514, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:25:14] (step=0000725) Loss: 0.5011 (diff=0.4641, anat=0.0370), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:25:34] (step=0000726) Loss: 0.4821 (diff=0.4497, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:25:54] (step=0000727) Loss: 0.4957 (diff=0.4616, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:26:13] (step=0000728) Loss: 0.4765 (diff=0.4442, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:26:33] (step=0000729) Loss: 0.4844 (diff=0.4520, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:26:52] (step=0000730) Loss: 0.4809 (diff=0.4490, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:27:12] (step=0000731) Loss: 0.4888 (diff=0.4544, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:27:32] (step=0000732) Loss: 0.4861 (diff=0.4524, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:27:51] (step=0000733) Loss: 0.4866 (diff=0.4538, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:28:11] (step=0000734) Loss: 0.4776 (diff=0.4436, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:28:30] (step=0000735) Loss: 0.4751 (diff=0.4445, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:28:50] (step=0000736) Loss: 0.4896 (diff=0.4579, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:29:10] (step=0000737) Loss: 0.4778 (diff=0.4467, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:29:29] (step=0000738) Loss: 0.4770 (diff=0.4470, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:29:49] (step=0000739) Loss: 0.4850 (diff=0.4532, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:30:08] (step=0000740) Loss: 0.4771 (diff=0.4454, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:30:28] (step=0000741) Loss: 0.4805 (diff=0.4469, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.14, LR: 0.0001 +[2026-04-14 13:30:48] (step=0000742) Loss: 0.4841 (diff=0.4528, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:31:07] (step=0000743) Loss: 0.4805 (diff=0.4483, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:31:27] (step=0000744) Loss: 0.4706 (diff=0.4379, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:31:46] (step=0000745) Loss: 0.4718 (diff=0.4427, anat=0.0291), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:32:06] (step=0000746) Loss: 0.4742 (diff=0.4422, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:32:25] (step=0000747) Loss: 0.4725 (diff=0.4419, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:32:45] (step=0000748) Loss: 0.4756 (diff=0.4443, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:33:05] (step=0000749) Loss: 0.4863 (diff=0.4544, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:33:24] (step=0000750) Loss: 0.4792 (diff=0.4472, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:33:44] (step=0000751) Loss: 0.4820 (diff=0.4490, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:34:03] (step=0000752) Loss: 0.4797 (diff=0.4457, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:34:23] (step=0000753) Loss: 0.4830 (diff=0.4505, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:34:43] (step=0000754) Loss: 0.4718 (diff=0.4396, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:35:02] (step=0000755) Loss: 0.4842 (diff=0.4504, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:35:22] (step=0000756) Loss: 0.4843 (diff=0.4513, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:35:41] (step=0000757) Loss: 0.4790 (diff=0.4474, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:36:01] (step=0000758) Loss: 0.4742 (diff=0.4429, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:36:21] (step=0000759) Loss: 0.4793 (diff=0.4475, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:36:40] (step=0000760) Loss: 0.4749 (diff=0.4432, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:37:00] (step=0000761) Loss: 0.4840 (diff=0.4524, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:37:19] (step=0000762) Loss: 0.4802 (diff=0.4462, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:37:39] (step=0000763) Loss: 0.4807 (diff=0.4467, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:37:59] (step=0000764) Loss: 0.4723 (diff=0.4403, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:38:18] (step=0000765) Loss: 0.4787 (diff=0.4433, anat=0.0354), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:38:38] (step=0000766) Loss: 0.4716 (diff=0.4413, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:38:57] (step=0000767) Loss: 0.4775 (diff=0.4448, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:39:17] (step=0000768) Loss: 0.4934 (diff=0.4571, anat=0.0363), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:39:37] (step=0000769) Loss: 0.4935 (diff=0.4592, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:39:56] (step=0000770) Loss: 0.4792 (diff=0.4471, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:40:16] (step=0000771) Loss: 0.4943 (diff=0.4563, anat=0.0380), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:40:35] (step=0000772) Loss: 0.4693 (diff=0.4398, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:40:55] (step=0000773) Loss: 0.4723 (diff=0.4410, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:41:14] (step=0000774) Loss: 0.4768 (diff=0.4455, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:41:34] (step=0000775) Loss: 0.4852 (diff=0.4527, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:41:54] (step=0000776) Loss: 0.4830 (diff=0.4499, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:42:13] (step=0000777) Loss: 0.4833 (diff=0.4521, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:42:33] (step=0000778) Loss: 0.4931 (diff=0.4575, anat=0.0357), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:42:53] (step=0000779) Loss: 0.4756 (diff=0.4436, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:43:12] (step=0000780) Loss: 0.4653 (diff=0.4346, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:43:32] (step=0000781) Loss: 0.4798 (diff=0.4479, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:43:51] (step=0000782) Loss: 0.4666 (diff=0.4366, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:44:11] (step=0000783) Loss: 0.4903 (diff=0.4576, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:44:30] (step=0000784) Loss: 0.4743 (diff=0.4438, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:44:50] (step=0000785) Loss: 0.4888 (diff=0.4562, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:45:10] (step=0000786) Loss: 0.4726 (diff=0.4374, anat=0.0352), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:45:29] (step=0000787) Loss: 0.4906 (diff=0.4563, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:45:49] (step=0000788) Loss: 0.4665 (diff=0.4358, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:46:08] (step=0000789) Loss: 0.4899 (diff=0.4556, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:46:28] (step=0000790) Loss: 0.4968 (diff=0.4620, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:46:48] (step=0000791) Loss: 0.4769 (diff=0.4437, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:47:07] (step=0000792) Loss: 0.4880 (diff=0.4553, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.15, LR: 0.0001 +[2026-04-14 13:47:27] (step=0000793) Loss: 0.4800 (diff=0.4477, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:47:46] (step=0000794) Loss: 0.4731 (diff=0.4436, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:48:06] (step=0000795) Loss: 0.4962 (diff=0.4637, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:48:26] (step=0000796) Loss: 0.4813 (diff=0.4465, anat=0.0349), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:48:45] (step=0000797) Loss: 0.4961 (diff=0.4618, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:49:05] (step=0000798) Loss: 0.4749 (diff=0.4417, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:49:24] (step=0000799) Loss: 0.4786 (diff=0.4477, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:49:44] (step=0000800) Loss: 0.4857 (diff=0.4522, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:50:03] (step=0000801) Loss: 0.4827 (diff=0.4492, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:50:23] (step=0000802) Loss: 0.4828 (diff=0.4475, anat=0.0353), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:50:43] (step=0000803) Loss: 0.4810 (diff=0.4474, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:51:02] (step=0000804) Loss: 0.4816 (diff=0.4488, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:51:22] (step=0000805) Loss: 0.4734 (diff=0.4427, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:51:41] (step=0000806) Loss: 0.4910 (diff=0.4564, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:52:01] (step=0000807) Loss: 0.5046 (diff=0.4719, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:52:21] (step=0000808) Loss: 0.4910 (diff=0.4579, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:52:40] (step=0000809) Loss: 0.4792 (diff=0.4470, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:53:00] (step=0000810) Loss: 0.4791 (diff=0.4470, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:53:19] (step=0000811) Loss: 0.4688 (diff=0.4381, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:53:39] (step=0000812) Loss: 0.4833 (diff=0.4501, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:53:59] (step=0000813) Loss: 0.4811 (diff=0.4498, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:54:18] (step=0000814) Loss: 0.4803 (diff=0.4491, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:54:38] (step=0000815) Loss: 0.4850 (diff=0.4532, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:54:57] (step=0000816) Loss: 0.5004 (diff=0.4655, anat=0.0350), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:55:17] (step=0000817) Loss: 0.4820 (diff=0.4495, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:55:36] (step=0000818) Loss: 0.4961 (diff=0.4619, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:55:56] (step=0000819) Loss: 0.4766 (diff=0.4456, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:56:16] (step=0000820) Loss: 0.4845 (diff=0.4524, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:56:35] (step=0000821) Loss: 0.4847 (diff=0.4537, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:56:55] (step=0000822) Loss: 0.4817 (diff=0.4497, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:57:14] (step=0000823) Loss: 0.4898 (diff=0.4563, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:57:34] (step=0000824) Loss: 0.4840 (diff=0.4524, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:57:54] (step=0000825) Loss: 0.4934 (diff=0.4581, anat=0.0353), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:58:13] (step=0000826) Loss: 0.4848 (diff=0.4512, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:58:33] (step=0000827) Loss: 0.4967 (diff=0.4629, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:58:52] (step=0000828) Loss: 0.4889 (diff=0.4560, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:59:12] (step=0000829) Loss: 0.4795 (diff=0.4481, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:59:32] (step=0000830) Loss: 0.4719 (diff=0.4403, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 13:59:51] (step=0000831) Loss: 0.4735 (diff=0.4414, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 14:00:11] (step=0000832) Loss: 0.4801 (diff=0.4489, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 14:00:30] (step=0000833) Loss: 0.4726 (diff=0.4410, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 14:00:50] (step=0000834) Loss: 0.4944 (diff=0.4603, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 14:01:10] (step=0000835) Loss: 0.4854 (diff=0.4544, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 14:01:29] (step=0000836) Loss: 0.4762 (diff=0.4433, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 14:01:49] (step=0000837) Loss: 0.4685 (diff=0.4387, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 14:02:08] (step=0000838) Loss: 0.4747 (diff=0.4419, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 14:02:28] (step=0000839) Loss: 0.4921 (diff=0.4567, anat=0.0354), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 14:02:48] (step=0000840) Loss: 0.4884 (diff=0.4571, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 14:03:07] (step=0000841) Loss: 0.4715 (diff=0.4396, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 14:03:27] (step=0000842) Loss: 0.4905 (diff=0.4573, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 14:03:46] (step=0000843) Loss: 0.4752 (diff=0.4443, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.16, LR: 0.0001 +[2026-04-14 14:04:06] (step=0000844) Loss: 0.4864 (diff=0.4540, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:04:25] (step=0000845) Loss: 0.4811 (diff=0.4502, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:04:45] (step=0000846) Loss: 0.4808 (diff=0.4487, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:05:05] (step=0000847) Loss: 0.4744 (diff=0.4424, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:05:24] (step=0000848) Loss: 0.4745 (diff=0.4416, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:05:44] (step=0000849) Loss: 0.4770 (diff=0.4475, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:06:03] (step=0000850) Loss: 0.4873 (diff=0.4532, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:06:23] (step=0000851) Loss: 0.4902 (diff=0.4561, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:06:43] (step=0000852) Loss: 0.4894 (diff=0.4574, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:07:02] (step=0000853) Loss: 0.4820 (diff=0.4507, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:07:22] (step=0000854) Loss: 0.4983 (diff=0.4654, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:07:41] (step=0000855) Loss: 0.4822 (diff=0.4516, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:08:01] (step=0000856) Loss: 0.4850 (diff=0.4510, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:08:21] (step=0000857) Loss: 0.4949 (diff=0.4620, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:08:40] (step=0000858) Loss: 0.4747 (diff=0.4445, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:09:00] (step=0000859) Loss: 0.4861 (diff=0.4530, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:09:19] (step=0000860) Loss: 0.5006 (diff=0.4656, anat=0.0350), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:09:39] (step=0000861) Loss: 0.4808 (diff=0.4498, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:09:58] (step=0000862) Loss: 0.4913 (diff=0.4556, anat=0.0357), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:10:18] (step=0000863) Loss: 0.4768 (diff=0.4462, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:10:38] (step=0000864) Loss: 0.4904 (diff=0.4582, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:10:57] (step=0000865) Loss: 0.4858 (diff=0.4526, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:11:17] (step=0000866) Loss: 0.4874 (diff=0.4517, anat=0.0357), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:11:36] (step=0000867) Loss: 0.4881 (diff=0.4541, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:11:56] (step=0000868) Loss: 0.4770 (diff=0.4442, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:12:16] (step=0000869) Loss: 0.4836 (diff=0.4535, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:12:35] (step=0000870) Loss: 0.4827 (diff=0.4494, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:12:55] (step=0000871) Loss: 0.4820 (diff=0.4493, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:13:14] (step=0000872) Loss: 0.4959 (diff=0.4619, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:13:34] (step=0000873) Loss: 0.4818 (diff=0.4502, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:13:54] (step=0000874) Loss: 0.4674 (diff=0.4348, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:14:13] (step=0000875) Loss: 0.4917 (diff=0.4600, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:14:33] (step=0000876) Loss: 0.4679 (diff=0.4387, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:14:52] (step=0000877) Loss: 0.4832 (diff=0.4520, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:15:12] (step=0000878) Loss: 0.4627 (diff=0.4313, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:15:32] (step=0000879) Loss: 0.4772 (diff=0.4450, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:15:51] (step=0000880) Loss: 0.4759 (diff=0.4454, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:16:11] (step=0000881) Loss: 0.4851 (diff=0.4546, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:16:30] (step=0000882) Loss: 0.4864 (diff=0.4557, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:16:50] (step=0000883) Loss: 0.4727 (diff=0.4412, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:17:10] (step=0000884) Loss: 0.4729 (diff=0.4409, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:17:29] (step=0000885) Loss: 0.4884 (diff=0.4545, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:17:49] (step=0000886) Loss: 0.4715 (diff=0.4399, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:18:08] (step=0000887) Loss: 0.4910 (diff=0.4586, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:18:28] (step=0000888) Loss: 0.4655 (diff=0.4374, anat=0.0281), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:18:48] (step=0000889) Loss: 0.4878 (diff=0.4546, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:19:07] (step=0000890) Loss: 0.4842 (diff=0.4511, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:19:27] (step=0000891) Loss: 0.4835 (diff=0.4522, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:19:46] (step=0000892) Loss: 0.4777 (diff=0.4453, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:20:06] (step=0000893) Loss: 0.4763 (diff=0.4464, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:20:25] (step=0000894) Loss: 0.4876 (diff=0.4558, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.17, LR: 0.0001 +[2026-04-14 14:20:45] (step=0000895) Loss: 0.4805 (diff=0.4480, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:21:05] (step=0000896) Loss: 0.4742 (diff=0.4425, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:21:24] (step=0000897) Loss: 0.4838 (diff=0.4526, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:21:44] (step=0000898) Loss: 0.4740 (diff=0.4412, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:22:04] (step=0000899) Loss: 0.4780 (diff=0.4465, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:22:23] (step=0000900) Loss: 0.4791 (diff=0.4479, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:22:43] (step=0000901) Loss: 0.4890 (diff=0.4548, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:23:02] (step=0000902) Loss: 0.4932 (diff=0.4587, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:23:22] (step=0000903) Loss: 0.4905 (diff=0.4578, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:23:41] (step=0000904) Loss: 0.4814 (diff=0.4488, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:24:01] (step=0000905) Loss: 0.4850 (diff=0.4524, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:24:21] (step=0000906) Loss: 0.4881 (diff=0.4568, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:24:40] (step=0000907) Loss: 0.4647 (diff=0.4336, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:25:00] (step=0000908) Loss: 0.4918 (diff=0.4575, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:25:19] (step=0000909) Loss: 0.4710 (diff=0.4410, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:25:39] (step=0000910) Loss: 0.4845 (diff=0.4513, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:25:59] (step=0000911) Loss: 0.4819 (diff=0.4520, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:26:18] (step=0000912) Loss: 0.4803 (diff=0.4478, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:26:38] (step=0000913) Loss: 0.4707 (diff=0.4387, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:26:57] (step=0000914) Loss: 0.4778 (diff=0.4464, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:27:17] (step=0000915) Loss: 0.4829 (diff=0.4506, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:27:37] (step=0000916) Loss: 0.4697 (diff=0.4402, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:27:56] (step=0000917) Loss: 0.4959 (diff=0.4608, anat=0.0351), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:28:16] (step=0000918) Loss: 0.4717 (diff=0.4413, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:28:35] (step=0000919) Loss: 0.4759 (diff=0.4442, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:28:55] (step=0000920) Loss: 0.4979 (diff=0.4654, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:29:15] (step=0000921) Loss: 0.4783 (diff=0.4468, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:29:34] (step=0000922) Loss: 0.4724 (diff=0.4420, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:29:54] (step=0000923) Loss: 0.4702 (diff=0.4390, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:30:13] (step=0000924) Loss: 0.4801 (diff=0.4469, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:30:33] (step=0000925) Loss: 0.4814 (diff=0.4478, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:30:52] (step=0000926) Loss: 0.4881 (diff=0.4559, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:31:12] (step=0000927) Loss: 0.4705 (diff=0.4390, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:31:32] (step=0000928) Loss: 0.4829 (diff=0.4501, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:31:51] (step=0000929) Loss: 0.4693 (diff=0.4390, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:32:11] (step=0000930) Loss: 0.4866 (diff=0.4551, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:32:30] (step=0000931) Loss: 0.4742 (diff=0.4425, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:32:50] (step=0000932) Loss: 0.4784 (diff=0.4462, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:33:10] (step=0000933) Loss: 0.4792 (diff=0.4487, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:33:29] (step=0000934) Loss: 0.4869 (diff=0.4531, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:33:49] (step=0000935) Loss: 0.4922 (diff=0.4599, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:34:08] (step=0000936) Loss: 0.4893 (diff=0.4555, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:34:28] (step=0000937) Loss: 0.4758 (diff=0.4455, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:34:48] (step=0000938) Loss: 0.5042 (diff=0.4684, anat=0.0357), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:35:07] (step=0000939) Loss: 0.4803 (diff=0.4488, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:35:27] (step=0000940) Loss: 0.4727 (diff=0.4410, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:35:46] (step=0000941) Loss: 0.4801 (diff=0.4476, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:36:06] (step=0000942) Loss: 0.4996 (diff=0.4667, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:36:25] (step=0000943) Loss: 0.4790 (diff=0.4468, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:36:45] (step=0000944) Loss: 0.4761 (diff=0.4431, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:37:05] (step=0000945) Loss: 0.4859 (diff=0.4533, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.18, LR: 0.0001 +[2026-04-14 14:37:24] (step=0000946) Loss: 0.4849 (diff=0.4531, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:37:44] (step=0000947) Loss: 0.5005 (diff=0.4646, anat=0.0359), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:38:04] (step=0000948) Loss: 0.4887 (diff=0.4548, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:38:23] (step=0000949) Loss: 0.4872 (diff=0.4538, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:38:43] (step=0000950) Loss: 0.4758 (diff=0.4454, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:39:02] (step=0000951) Loss: 0.4786 (diff=0.4460, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:39:22] (step=0000952) Loss: 0.4805 (diff=0.4494, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:39:41] (step=0000953) Loss: 0.4770 (diff=0.4464, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:40:01] (step=0000954) Loss: 0.4857 (diff=0.4521, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:40:21] (step=0000955) Loss: 0.4766 (diff=0.4459, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:40:40] (step=0000956) Loss: 0.4685 (diff=0.4358, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:41:00] (step=0000957) Loss: 0.4884 (diff=0.4564, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:41:19] (step=0000958) Loss: 0.4887 (diff=0.4548, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:41:39] (step=0000959) Loss: 0.4773 (diff=0.4447, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:41:59] (step=0000960) Loss: 0.4732 (diff=0.4427, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:42:18] (step=0000961) Loss: 0.4782 (diff=0.4475, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:42:38] (step=0000962) Loss: 0.4802 (diff=0.4488, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:42:57] (step=0000963) Loss: 0.5000 (diff=0.4645, anat=0.0355), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:43:17] (step=0000964) Loss: 0.4864 (diff=0.4537, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:43:37] (step=0000965) Loss: 0.4729 (diff=0.4416, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:43:56] (step=0000966) Loss: 0.4726 (diff=0.4402, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:44:16] (step=0000967) Loss: 0.4907 (diff=0.4575, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:44:35] (step=0000968) Loss: 0.4773 (diff=0.4452, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:44:55] (step=0000969) Loss: 0.4830 (diff=0.4506, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:45:15] (step=0000970) Loss: 0.4989 (diff=0.4618, anat=0.0371), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:45:34] (step=0000971) Loss: 0.4849 (diff=0.4523, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:45:54] (step=0000972) Loss: 0.4819 (diff=0.4498, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:46:13] (step=0000973) Loss: 0.4766 (diff=0.4421, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:46:33] (step=0000974) Loss: 0.4745 (diff=0.4455, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:46:52] (step=0000975) Loss: 0.4931 (diff=0.4597, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:47:12] (step=0000976) Loss: 0.4938 (diff=0.4618, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:47:32] (step=0000977) Loss: 0.4779 (diff=0.4457, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:47:51] (step=0000978) Loss: 0.4774 (diff=0.4462, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:48:11] (step=0000979) Loss: 0.4858 (diff=0.4515, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:48:30] (step=0000980) Loss: 0.4775 (diff=0.4462, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:48:50] (step=0000981) Loss: 0.4658 (diff=0.4351, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:49:09] (step=0000982) Loss: 0.4818 (diff=0.4511, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:49:29] (step=0000983) Loss: 0.4750 (diff=0.4461, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:49:49] (step=0000984) Loss: 0.4663 (diff=0.4370, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:50:08] (step=0000985) Loss: 0.4871 (diff=0.4546, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:50:28] (step=0000986) Loss: 0.4768 (diff=0.4446, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:50:47] (step=0000987) Loss: 0.4676 (diff=0.4389, anat=0.0288), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:51:07] (step=0000988) Loss: 0.4919 (diff=0.4587, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:51:27] (step=0000989) Loss: 0.4880 (diff=0.4541, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:51:46] (step=0000990) Loss: 0.4916 (diff=0.4584, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:52:06] (step=0000991) Loss: 0.4856 (diff=0.4536, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:52:25] (step=0000992) Loss: 0.4830 (diff=0.4512, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:52:45] (step=0000993) Loss: 0.4929 (diff=0.4595, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:53:05] (step=0000994) Loss: 0.4717 (diff=0.4405, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:53:24] (step=0000995) Loss: 0.4793 (diff=0.4486, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:53:44] (step=0000996) Loss: 0.4841 (diff=0.4487, anat=0.0354), Steps/Sec: 0.05, Epoch: 0.19, LR: 0.0001 +[2026-04-14 14:54:03] (step=0000997) Loss: 0.4727 (diff=0.4386, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:54:23] (step=0000998) Loss: 0.4839 (diff=0.4507, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:54:43] (step=0000999) Loss: 0.4719 (diff=0.4401, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:55:02] (step=0001000) Loss: 0.4902 (diff=0.4546, anat=0.0356), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:55:02] [RANK 0] Saving current state to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000 +[2026-04-14 14:55:16] [RANK 0] Model weights saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/model.safetensors +[2026-04-14 14:55:16] [RANK 0] Optimizer state saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/optimizer.bin +[2026-04-14 14:55:16] [RANK 0] Scheduler state saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/scheduler.bin +[2026-04-14 14:55:16] [RANK 0] Sampler state for dataloader 0 saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/sampler.bin +[2026-04-14 14:55:16] [RANK 0] Random states saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000/random_states_0.pkl +[2026-04-14 14:55:16] Saved accelerator state to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1000 +[2026-04-14 14:55:16] Saved checkpoint to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001000/ +[2026-04-14 14:55:34] (step=0001001) Loss: 0.4705 (diff=0.4404, anat=0.0301), Steps/Sec: 0.03, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:55:53] (step=0001002) Loss: 0.4742 (diff=0.4413, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:56:13] (step=0001003) Loss: 0.4850 (diff=0.4512, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:56:32] (step=0001004) Loss: 0.4781 (diff=0.4502, anat=0.0279), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:56:52] (step=0001005) Loss: 0.4894 (diff=0.4526, anat=0.0368), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:57:11] (step=0001006) Loss: 0.4849 (diff=0.4502, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:57:31] (step=0001007) Loss: 0.4812 (diff=0.4509, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:57:50] (step=0001008) Loss: 0.4814 (diff=0.4508, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:58:10] (step=0001009) Loss: 0.4836 (diff=0.4515, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:58:30] (step=0001010) Loss: 0.4991 (diff=0.4654, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:58:49] (step=0001011) Loss: 0.4809 (diff=0.4493, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:59:09] (step=0001012) Loss: 0.4785 (diff=0.4480, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:59:29] (step=0001013) Loss: 0.4912 (diff=0.4583, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 14:59:48] (step=0001014) Loss: 0.4911 (diff=0.4585, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:00:08] (step=0001015) Loss: 0.4717 (diff=0.4415, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:00:28] (step=0001016) Loss: 0.4830 (diff=0.4521, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:00:47] (step=0001017) Loss: 0.4810 (diff=0.4482, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:01:07] (step=0001018) Loss: 0.4778 (diff=0.4463, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:01:26] (step=0001019) Loss: 0.4829 (diff=0.4498, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:01:46] (step=0001020) Loss: 0.4689 (diff=0.4377, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:02:06] (step=0001021) Loss: 0.4973 (diff=0.4631, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:02:25] (step=0001022) Loss: 0.4777 (diff=0.4478, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:02:45] (step=0001023) Loss: 0.4801 (diff=0.4464, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:03:04] (step=0001024) Loss: 0.4883 (diff=0.4558, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:03:24] (step=0001025) Loss: 0.4708 (diff=0.4410, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:03:44] (step=0001026) Loss: 0.4855 (diff=0.4532, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:04:03] (step=0001027) Loss: 0.4718 (diff=0.4406, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:04:23] (step=0001028) Loss: 0.4765 (diff=0.4456, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:04:42] (step=0001029) Loss: 0.4903 (diff=0.4558, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:05:02] (step=0001030) Loss: 0.5031 (diff=0.4672, anat=0.0359), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:05:22] (step=0001031) Loss: 0.5003 (diff=0.4660, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:05:41] (step=0001032) Loss: 0.4883 (diff=0.4543, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:06:01] (step=0001033) Loss: 0.4734 (diff=0.4433, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:06:20] (step=0001034) Loss: 0.4739 (diff=0.4449, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:06:40] (step=0001035) Loss: 0.4653 (diff=0.4348, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:07:00] (step=0001036) Loss: 0.4815 (diff=0.4484, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:07:19] (step=0001037) Loss: 0.4879 (diff=0.4548, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:07:39] (step=0001038) Loss: 0.4640 (diff=0.4352, anat=0.0288), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:07:58] (step=0001039) Loss: 0.4828 (diff=0.4510, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:08:18] (step=0001040) Loss: 0.4845 (diff=0.4511, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:08:37] (step=0001041) Loss: 0.4877 (diff=0.4550, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:08:57] (step=0001042) Loss: 0.4898 (diff=0.4581, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:09:17] (step=0001043) Loss: 0.4767 (diff=0.4452, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:09:36] (step=0001044) Loss: 0.4839 (diff=0.4519, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:09:56] (step=0001045) Loss: 0.4800 (diff=0.4449, anat=0.0351), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:10:15] (step=0001046) Loss: 0.4832 (diff=0.4529, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:10:35] (step=0001047) Loss: 0.4844 (diff=0.4536, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.20, LR: 0.0001 +[2026-04-14 15:10:55] (step=0001048) Loss: 0.4825 (diff=0.4481, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:11:14] (step=0001049) Loss: 0.4580 (diff=0.4279, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:11:34] (step=0001050) Loss: 0.4844 (diff=0.4521, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:11:53] (step=0001051) Loss: 0.4927 (diff=0.4593, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:12:13] (step=0001052) Loss: 0.4707 (diff=0.4371, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:12:32] (step=0001053) Loss: 0.4943 (diff=0.4596, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:12:52] (step=0001054) Loss: 0.4809 (diff=0.4482, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:13:12] (step=0001055) Loss: 0.4867 (diff=0.4529, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:13:31] (step=0001056) Loss: 0.4811 (diff=0.4509, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:13:51] (step=0001057) Loss: 0.4817 (diff=0.4489, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:14:10] (step=0001058) Loss: 0.4829 (diff=0.4483, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:14:30] (step=0001059) Loss: 0.4618 (diff=0.4314, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:14:50] (step=0001060) Loss: 0.4831 (diff=0.4519, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:15:09] (step=0001061) Loss: 0.4815 (diff=0.4507, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:15:29] (step=0001062) Loss: 0.4944 (diff=0.4601, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:15:48] (step=0001063) Loss: 0.4650 (diff=0.4350, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:16:08] (step=0001064) Loss: 0.4765 (diff=0.4485, anat=0.0280), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:16:28] (step=0001065) Loss: 0.4841 (diff=0.4503, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:16:47] (step=0001066) Loss: 0.4857 (diff=0.4550, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:17:07] (step=0001067) Loss: 0.4966 (diff=0.4640, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:17:26] (step=0001068) Loss: 0.4801 (diff=0.4482, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:17:46] (step=0001069) Loss: 0.4866 (diff=0.4551, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:18:06] (step=0001070) Loss: 0.4854 (diff=0.4517, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:18:25] (step=0001071) Loss: 0.4893 (diff=0.4563, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:18:45] (step=0001072) Loss: 0.4798 (diff=0.4475, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:19:04] (step=0001073) Loss: 0.4730 (diff=0.4435, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:19:24] (step=0001074) Loss: 0.4870 (diff=0.4542, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:19:43] (step=0001075) Loss: 0.4866 (diff=0.4545, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:20:03] (step=0001076) Loss: 0.4743 (diff=0.4414, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:20:23] (step=0001077) Loss: 0.4814 (diff=0.4492, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:20:42] (step=0001078) Loss: 0.4719 (diff=0.4384, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:21:02] (step=0001079) Loss: 0.4855 (diff=0.4534, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:21:21] (step=0001080) Loss: 0.4900 (diff=0.4578, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:21:41] (step=0001081) Loss: 0.4958 (diff=0.4614, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:22:00] (step=0001082) Loss: 0.4737 (diff=0.4412, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:22:20] (step=0001083) Loss: 0.4885 (diff=0.4550, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:22:40] (step=0001084) Loss: 0.4859 (diff=0.4524, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:22:59] (step=0001085) Loss: 0.4848 (diff=0.4507, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:23:19] (step=0001086) Loss: 0.4797 (diff=0.4494, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:23:38] (step=0001087) Loss: 0.4791 (diff=0.4480, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:23:58] (step=0001088) Loss: 0.4596 (diff=0.4302, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:24:18] (step=0001089) Loss: 0.4867 (diff=0.4554, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:24:37] (step=0001090) Loss: 0.4755 (diff=0.4428, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:24:57] (step=0001091) Loss: 0.4920 (diff=0.4579, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:25:16] (step=0001092) Loss: 0.4910 (diff=0.4584, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:25:36] (step=0001093) Loss: 0.4864 (diff=0.4536, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:25:55] (step=0001094) Loss: 0.4867 (diff=0.4539, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:26:15] (step=0001095) Loss: 0.4926 (diff=0.4588, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:26:35] (step=0001096) Loss: 0.4834 (diff=0.4516, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:26:54] (step=0001097) Loss: 0.4919 (diff=0.4590, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:27:14] (step=0001098) Loss: 0.4857 (diff=0.4531, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:27:33] (step=0001099) Loss: 0.4790 (diff=0.4474, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.21, LR: 0.0001 +[2026-04-14 15:27:53] (step=0001100) Loss: 0.4829 (diff=0.4501, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:28:13] (step=0001101) Loss: 0.4772 (diff=0.4474, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:28:32] (step=0001102) Loss: 0.4813 (diff=0.4478, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:28:52] (step=0001103) Loss: 0.4727 (diff=0.4401, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:29:11] (step=0001104) Loss: 0.4756 (diff=0.4455, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:29:31] (step=0001105) Loss: 0.4799 (diff=0.4513, anat=0.0286), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:29:51] (step=0001106) Loss: 0.4785 (diff=0.4474, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:30:10] (step=0001107) Loss: 0.4839 (diff=0.4515, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:30:30] (step=0001108) Loss: 0.4767 (diff=0.4459, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:30:49] (step=0001109) Loss: 0.4764 (diff=0.4454, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:31:09] (step=0001110) Loss: 0.4885 (diff=0.4558, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:31:29] (step=0001111) Loss: 0.4725 (diff=0.4423, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:31:48] (step=0001112) Loss: 0.4651 (diff=0.4356, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:32:08] (step=0001113) Loss: 0.4683 (diff=0.4358, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:32:28] (step=0001114) Loss: 0.4866 (diff=0.4538, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:32:47] (step=0001115) Loss: 0.4915 (diff=0.4573, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:33:07] (step=0001116) Loss: 0.4959 (diff=0.4616, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:33:26] (step=0001117) Loss: 0.4754 (diff=0.4448, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:33:46] (step=0001118) Loss: 0.4795 (diff=0.4482, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:34:05] (step=0001119) Loss: 0.4706 (diff=0.4396, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:34:25] (step=0001120) Loss: 0.4912 (diff=0.4585, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:34:45] (step=0001121) Loss: 0.4844 (diff=0.4510, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:35:04] (step=0001122) Loss: 0.4909 (diff=0.4591, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:35:24] (step=0001123) Loss: 0.4839 (diff=0.4506, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:35:43] (step=0001124) Loss: 0.4922 (diff=0.4606, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:36:03] (step=0001125) Loss: 0.4783 (diff=0.4457, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:36:23] (step=0001126) Loss: 0.4865 (diff=0.4547, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:36:42] (step=0001127) Loss: 0.4896 (diff=0.4582, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:37:02] (step=0001128) Loss: 0.4884 (diff=0.4565, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:37:21] (step=0001129) Loss: 0.4904 (diff=0.4574, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:37:41] (step=0001130) Loss: 0.4804 (diff=0.4507, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:38:01] (step=0001131) Loss: 0.4831 (diff=0.4519, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:38:20] (step=0001132) Loss: 0.4739 (diff=0.4417, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:38:40] (step=0001133) Loss: 0.4843 (diff=0.4525, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:38:59] (step=0001134) Loss: 0.4743 (diff=0.4436, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:39:19] (step=0001135) Loss: 0.4762 (diff=0.4436, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:39:39] (step=0001136) Loss: 0.4804 (diff=0.4491, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:39:58] (step=0001137) Loss: 0.4746 (diff=0.4437, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:40:18] (step=0001138) Loss: 0.4881 (diff=0.4557, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:40:38] (step=0001139) Loss: 0.4950 (diff=0.4592, anat=0.0359), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:40:57] (step=0001140) Loss: 0.4835 (diff=0.4517, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:41:17] (step=0001141) Loss: 0.4947 (diff=0.4614, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:41:36] (step=0001142) Loss: 0.4992 (diff=0.4661, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:41:56] (step=0001143) Loss: 0.4908 (diff=0.4573, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:42:15] (step=0001144) Loss: 0.4790 (diff=0.4498, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:42:35] (step=0001145) Loss: 0.4766 (diff=0.4473, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:42:55] (step=0001146) Loss: 0.4892 (diff=0.4552, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:43:14] (step=0001147) Loss: 0.4976 (diff=0.4628, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:43:34] (step=0001148) Loss: 0.4668 (diff=0.4392, anat=0.0277), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:43:53] (step=0001149) Loss: 0.4838 (diff=0.4514, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:44:13] (step=0001150) Loss: 0.4823 (diff=0.4502, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.22, LR: 0.0001 +[2026-04-14 15:44:33] (step=0001151) Loss: 0.4888 (diff=0.4567, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:44:52] (step=0001152) Loss: 0.4932 (diff=0.4603, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:45:12] (step=0001153) Loss: 0.4801 (diff=0.4484, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:45:31] (step=0001154) Loss: 0.4862 (diff=0.4534, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:45:51] (step=0001155) Loss: 0.4742 (diff=0.4402, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:46:11] (step=0001156) Loss: 0.4729 (diff=0.4426, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:46:30] (step=0001157) Loss: 0.4771 (diff=0.4465, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:46:50] (step=0001158) Loss: 0.4949 (diff=0.4606, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:47:09] (step=0001159) Loss: 0.4835 (diff=0.4503, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:47:29] (step=0001160) Loss: 0.4811 (diff=0.4484, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:47:49] (step=0001161) Loss: 0.4795 (diff=0.4448, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:48:08] (step=0001162) Loss: 0.4783 (diff=0.4456, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:48:28] (step=0001163) Loss: 0.4860 (diff=0.4529, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:48:47] (step=0001164) Loss: 0.4878 (diff=0.4569, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:49:07] (step=0001165) Loss: 0.4765 (diff=0.4423, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:49:27] (step=0001166) Loss: 0.4896 (diff=0.4557, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:49:46] (step=0001167) Loss: 0.4916 (diff=0.4588, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:50:06] (step=0001168) Loss: 0.4689 (diff=0.4388, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:50:25] (step=0001169) Loss: 0.4827 (diff=0.4504, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:50:45] (step=0001170) Loss: 0.4791 (diff=0.4461, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:51:04] (step=0001171) Loss: 0.4837 (diff=0.4523, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:51:24] (step=0001172) Loss: 0.4859 (diff=0.4521, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:51:44] (step=0001173) Loss: 0.4771 (diff=0.4455, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:52:03] (step=0001174) Loss: 0.4855 (diff=0.4531, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:52:23] (step=0001175) Loss: 0.4888 (diff=0.4547, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:52:42] (step=0001176) Loss: 0.4866 (diff=0.4521, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:53:02] (step=0001177) Loss: 0.4910 (diff=0.4580, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:53:22] (step=0001178) Loss: 0.4799 (diff=0.4480, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:53:41] (step=0001179) Loss: 0.4655 (diff=0.4346, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:54:01] (step=0001180) Loss: 0.4725 (diff=0.4423, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:54:20] (step=0001181) Loss: 0.4969 (diff=0.4618, anat=0.0351), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:54:40] (step=0001182) Loss: 0.4763 (diff=0.4427, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:55:00] (step=0001183) Loss: 0.4830 (diff=0.4519, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:55:19] (step=0001184) Loss: 0.4860 (diff=0.4557, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:55:39] (step=0001185) Loss: 0.4859 (diff=0.4533, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:55:58] (step=0001186) Loss: 0.4830 (diff=0.4512, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:56:18] (step=0001187) Loss: 0.4939 (diff=0.4619, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:56:38] (step=0001188) Loss: 0.4877 (diff=0.4535, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:56:57] (step=0001189) Loss: 0.4796 (diff=0.4475, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:57:17] (step=0001190) Loss: 0.4685 (diff=0.4383, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:57:36] (step=0001191) Loss: 0.4905 (diff=0.4556, anat=0.0349), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:57:56] (step=0001192) Loss: 0.4653 (diff=0.4359, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:58:16] (step=0001193) Loss: 0.4739 (diff=0.4425, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:58:35] (step=0001194) Loss: 0.4852 (diff=0.4533, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:58:55] (step=0001195) Loss: 0.4828 (diff=0.4511, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:59:14] (step=0001196) Loss: 0.4878 (diff=0.4555, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:59:34] (step=0001197) Loss: 0.4707 (diff=0.4412, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 15:59:53] (step=0001198) Loss: 0.4794 (diff=0.4491, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 16:00:13] (step=0001199) Loss: 0.4880 (diff=0.4543, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 16:00:33] (step=0001200) Loss: 0.4700 (diff=0.4378, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 16:00:52] (step=0001201) Loss: 0.4860 (diff=0.4538, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.23, LR: 0.0001 +[2026-04-14 16:01:12] (step=0001202) Loss: 0.4930 (diff=0.4598, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:01:31] (step=0001203) Loss: 0.4773 (diff=0.4466, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:01:51] (step=0001204) Loss: 0.4793 (diff=0.4469, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:02:11] (step=0001205) Loss: 0.4757 (diff=0.4435, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:02:30] (step=0001206) Loss: 0.4898 (diff=0.4577, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:02:50] (step=0001207) Loss: 0.4811 (diff=0.4485, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:03:09] (step=0001208) Loss: 0.4818 (diff=0.4498, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:03:29] (step=0001209) Loss: 0.4975 (diff=0.4632, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:03:49] (step=0001210) Loss: 0.4835 (diff=0.4504, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:04:08] (step=0001211) Loss: 0.4723 (diff=0.4424, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:04:28] (step=0001212) Loss: 0.4872 (diff=0.4548, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:04:47] (step=0001213) Loss: 0.4942 (diff=0.4616, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:05:07] (step=0001214) Loss: 0.4850 (diff=0.4533, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:05:27] (step=0001215) Loss: 0.4810 (diff=0.4501, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:05:46] (step=0001216) Loss: 0.4879 (diff=0.4528, anat=0.0351), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:06:06] (step=0001217) Loss: 0.4819 (diff=0.4487, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:06:25] (step=0001218) Loss: 0.4885 (diff=0.4543, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:06:45] (step=0001219) Loss: 0.4746 (diff=0.4419, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:07:05] (step=0001220) Loss: 0.4787 (diff=0.4479, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:07:24] (step=0001221) Loss: 0.4754 (diff=0.4450, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:07:44] (step=0001222) Loss: 0.4849 (diff=0.4514, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:08:04] (step=0001223) Loss: 0.4807 (diff=0.4489, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:08:23] (step=0001224) Loss: 0.4874 (diff=0.4568, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:08:43] (step=0001225) Loss: 0.4912 (diff=0.4581, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:09:02] (step=0001226) Loss: 0.4878 (diff=0.4549, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:09:22] (step=0001227) Loss: 0.4808 (diff=0.4507, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:09:41] (step=0001228) Loss: 0.4797 (diff=0.4484, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:10:01] (step=0001229) Loss: 0.4783 (diff=0.4474, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:10:21] (step=0001230) Loss: 0.4774 (diff=0.4453, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:10:40] (step=0001231) Loss: 0.4862 (diff=0.4556, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:11:00] (step=0001232) Loss: 0.4904 (diff=0.4566, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:11:20] (step=0001233) Loss: 0.4759 (diff=0.4452, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:11:39] (step=0001234) Loss: 0.4721 (diff=0.4417, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:11:59] (step=0001235) Loss: 0.4830 (diff=0.4503, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:12:18] (step=0001236) Loss: 0.4905 (diff=0.4570, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:12:38] (step=0001237) Loss: 0.4945 (diff=0.4608, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:12:57] (step=0001238) Loss: 0.4800 (diff=0.4489, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:13:17] (step=0001239) Loss: 0.4748 (diff=0.4417, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:13:37] (step=0001240) Loss: 0.4833 (diff=0.4516, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:13:56] (step=0001241) Loss: 0.4827 (diff=0.4516, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:14:16] (step=0001242) Loss: 0.4706 (diff=0.4396, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:14:35] (step=0001243) Loss: 0.4931 (diff=0.4578, anat=0.0353), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:14:55] (step=0001244) Loss: 0.4771 (diff=0.4466, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:15:15] (step=0001245) Loss: 0.4771 (diff=0.4459, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:15:34] (step=0001246) Loss: 0.4949 (diff=0.4607, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:15:54] (step=0001247) Loss: 0.4806 (diff=0.4488, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:16:13] (step=0001248) Loss: 0.4819 (diff=0.4503, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:16:33] (step=0001249) Loss: 0.4955 (diff=0.4606, anat=0.0349), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:16:53] (step=0001250) Loss: 0.4974 (diff=0.4631, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:17:12] (step=0001251) Loss: 0.4855 (diff=0.4547, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:17:32] (step=0001252) Loss: 0.4722 (diff=0.4428, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.24, LR: 0.0001 +[2026-04-14 16:17:51] (step=0001253) Loss: 0.4710 (diff=0.4394, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:18:11] (step=0001254) Loss: 0.4861 (diff=0.4547, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:18:31] (step=0001255) Loss: 0.4717 (diff=0.4414, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:18:50] (step=0001256) Loss: 0.4770 (diff=0.4460, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:19:10] (step=0001257) Loss: 0.4920 (diff=0.4596, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:19:29] (step=0001258) Loss: 0.4704 (diff=0.4413, anat=0.0291), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:19:49] (step=0001259) Loss: 0.4924 (diff=0.4593, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:20:09] (step=0001260) Loss: 0.4909 (diff=0.4590, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:20:28] (step=0001261) Loss: 0.4899 (diff=0.4566, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:20:48] (step=0001262) Loss: 0.4813 (diff=0.4488, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:21:07] (step=0001263) Loss: 0.4655 (diff=0.4355, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:21:27] (step=0001264) Loss: 0.4795 (diff=0.4476, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:21:46] (step=0001265) Loss: 0.4877 (diff=0.4561, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:22:06] (step=0001266) Loss: 0.4866 (diff=0.4544, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:22:26] (step=0001267) Loss: 0.4911 (diff=0.4571, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:22:45] (step=0001268) Loss: 0.4873 (diff=0.4553, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:23:05] (step=0001269) Loss: 0.4738 (diff=0.4428, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:23:24] (step=0001270) Loss: 0.4726 (diff=0.4422, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:23:44] (step=0001271) Loss: 0.4913 (diff=0.4580, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:24:03] (step=0001272) Loss: 0.4699 (diff=0.4390, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:24:23] (step=0001273) Loss: 0.4863 (diff=0.4522, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:24:43] (step=0001274) Loss: 0.4859 (diff=0.4548, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:25:02] (step=0001275) Loss: 0.4918 (diff=0.4565, anat=0.0353), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:25:22] (step=0001276) Loss: 0.4760 (diff=0.4462, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:25:41] (step=0001277) Loss: 0.4806 (diff=0.4475, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:26:01] (step=0001278) Loss: 0.4895 (diff=0.4557, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:26:21] (step=0001279) Loss: 0.4916 (diff=0.4571, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:26:40] (step=0001280) Loss: 0.4821 (diff=0.4503, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:27:00] (step=0001281) Loss: 0.4702 (diff=0.4395, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:27:19] (step=0001282) Loss: 0.4701 (diff=0.4392, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:27:39] (step=0001283) Loss: 0.4948 (diff=0.4602, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:27:59] (step=0001284) Loss: 0.4905 (diff=0.4579, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:28:18] (step=0001285) Loss: 0.4768 (diff=0.4445, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:28:38] (step=0001286) Loss: 0.5011 (diff=0.4672, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:28:57] (step=0001287) Loss: 0.4856 (diff=0.4522, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:29:17] (step=0001288) Loss: 0.4817 (diff=0.4515, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:29:37] (step=0001289) Loss: 0.4886 (diff=0.4570, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:29:56] (step=0001290) Loss: 0.4723 (diff=0.4414, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:30:16] (step=0001291) Loss: 0.4818 (diff=0.4501, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:30:35] (step=0001292) Loss: 0.4599 (diff=0.4305, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:30:55] (step=0001293) Loss: 0.4753 (diff=0.4431, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:31:15] (step=0001294) Loss: 0.4875 (diff=0.4546, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:31:34] (step=0001295) Loss: 0.4815 (diff=0.4512, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:31:54] (step=0001296) Loss: 0.4822 (diff=0.4512, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:32:13] (step=0001297) Loss: 0.5006 (diff=0.4659, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:32:33] (step=0001298) Loss: 0.4778 (diff=0.4455, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:32:53] (step=0001299) Loss: 0.4743 (diff=0.4442, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:33:12] (step=0001300) Loss: 0.4853 (diff=0.4533, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:33:32] (step=0001301) Loss: 0.4852 (diff=0.4527, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:33:51] (step=0001302) Loss: 0.4714 (diff=0.4419, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:34:11] (step=0001303) Loss: 0.4874 (diff=0.4537, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.25, LR: 0.0001 +[2026-04-14 16:34:31] (step=0001304) Loss: 0.4884 (diff=0.4545, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:34:50] (step=0001305) Loss: 0.4776 (diff=0.4450, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:35:10] (step=0001306) Loss: 0.4733 (diff=0.4421, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:35:29] (step=0001307) Loss: 0.4829 (diff=0.4492, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:35:49] (step=0001308) Loss: 0.4779 (diff=0.4474, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:36:09] (step=0001309) Loss: 0.4795 (diff=0.4464, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:36:28] (step=0001310) Loss: 0.4769 (diff=0.4461, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:36:48] (step=0001311) Loss: 0.4852 (diff=0.4519, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:37:07] (step=0001312) Loss: 0.4836 (diff=0.4505, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:37:27] (step=0001313) Loss: 0.4889 (diff=0.4582, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:37:47] (step=0001314) Loss: 0.4979 (diff=0.4662, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:38:06] (step=0001315) Loss: 0.4825 (diff=0.4508, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:38:26] (step=0001316) Loss: 0.4796 (diff=0.4484, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:38:45] (step=0001317) Loss: 0.4865 (diff=0.4528, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:39:05] (step=0001318) Loss: 0.4881 (diff=0.4571, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:39:24] (step=0001319) Loss: 0.4762 (diff=0.4444, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:39:44] (step=0001320) Loss: 0.4833 (diff=0.4515, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:40:04] (step=0001321) Loss: 0.4902 (diff=0.4588, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:40:23] (step=0001322) Loss: 0.4763 (diff=0.4438, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:40:43] (step=0001323) Loss: 0.4808 (diff=0.4478, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:41:02] (step=0001324) Loss: 0.4704 (diff=0.4404, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:41:22] (step=0001325) Loss: 0.4764 (diff=0.4450, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:41:42] (step=0001326) Loss: 0.4921 (diff=0.4576, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:42:01] (step=0001327) Loss: 0.4775 (diff=0.4446, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:42:21] (step=0001328) Loss: 0.4760 (diff=0.4440, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:42:40] (step=0001329) Loss: 0.4659 (diff=0.4360, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:43:00] (step=0001330) Loss: 0.4803 (diff=0.4477, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:43:20] (step=0001331) Loss: 0.4903 (diff=0.4574, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:43:39] (step=0001332) Loss: 0.4819 (diff=0.4471, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:43:59] (step=0001333) Loss: 0.4745 (diff=0.4445, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:44:18] (step=0001334) Loss: 0.4976 (diff=0.4648, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:44:38] (step=0001335) Loss: 0.4817 (diff=0.4499, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:44:58] (step=0001336) Loss: 0.4785 (diff=0.4470, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:45:17] (step=0001337) Loss: 0.4880 (diff=0.4551, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:45:37] (step=0001338) Loss: 0.4870 (diff=0.4548, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:45:56] (step=0001339) Loss: 0.4805 (diff=0.4477, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:46:16] (step=0001340) Loss: 0.4767 (diff=0.4461, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:46:36] (step=0001341) Loss: 0.4771 (diff=0.4464, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:46:55] (step=0001342) Loss: 0.4796 (diff=0.4476, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:47:15] (step=0001343) Loss: 0.4891 (diff=0.4549, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:47:35] (step=0001344) Loss: 0.4912 (diff=0.4561, anat=0.0351), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:47:54] (step=0001345) Loss: 0.4854 (diff=0.4543, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:48:14] (step=0001346) Loss: 0.4954 (diff=0.4624, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:48:33] (step=0001347) Loss: 0.4869 (diff=0.4545, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:48:53] (step=0001348) Loss: 0.4679 (diff=0.4362, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:49:13] (step=0001349) Loss: 0.4810 (diff=0.4498, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:49:32] (step=0001350) Loss: 0.4751 (diff=0.4439, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:49:52] (step=0001351) Loss: 0.4817 (diff=0.4499, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:50:11] (step=0001352) Loss: 0.4790 (diff=0.4489, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:50:31] (step=0001353) Loss: 0.4783 (diff=0.4460, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:50:51] (step=0001354) Loss: 0.4807 (diff=0.4471, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.26, LR: 0.0001 +[2026-04-14 16:51:10] (step=0001355) Loss: 0.4781 (diff=0.4468, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:51:30] (step=0001356) Loss: 0.4715 (diff=0.4396, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:51:49] (step=0001357) Loss: 0.4866 (diff=0.4565, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:52:09] (step=0001358) Loss: 0.4821 (diff=0.4484, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:52:29] (step=0001359) Loss: 0.4810 (diff=0.4463, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:52:48] (step=0001360) Loss: 0.4784 (diff=0.4485, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:53:08] (step=0001361) Loss: 0.4827 (diff=0.4516, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:53:27] (step=0001362) Loss: 0.4954 (diff=0.4627, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:53:47] (step=0001363) Loss: 0.4872 (diff=0.4550, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:54:06] (step=0001364) Loss: 0.4790 (diff=0.4468, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:54:26] (step=0001365) Loss: 0.4913 (diff=0.4597, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:54:45] (step=0001366) Loss: 0.4895 (diff=0.4568, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:55:05] (step=0001367) Loss: 0.4940 (diff=0.4601, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:55:25] (step=0001368) Loss: 0.4746 (diff=0.4467, anat=0.0279), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:55:44] (step=0001369) Loss: 0.4768 (diff=0.4446, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:56:04] (step=0001370) Loss: 0.4752 (diff=0.4435, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:56:24] (step=0001371) Loss: 0.4812 (diff=0.4494, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:56:43] (step=0001372) Loss: 0.4844 (diff=0.4504, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:57:03] (step=0001373) Loss: 0.5048 (diff=0.4712, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:57:22] (step=0001374) Loss: 0.4784 (diff=0.4468, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:57:42] (step=0001375) Loss: 0.4753 (diff=0.4448, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:58:01] (step=0001376) Loss: 0.4785 (diff=0.4470, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:58:21] (step=0001377) Loss: 0.4845 (diff=0.4506, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:58:41] (step=0001378) Loss: 0.4736 (diff=0.4434, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:59:00] (step=0001379) Loss: 0.4656 (diff=0.4377, anat=0.0279), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:59:20] (step=0001380) Loss: 0.4934 (diff=0.4601, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:59:39] (step=0001381) Loss: 0.4878 (diff=0.4540, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 16:59:59] (step=0001382) Loss: 0.4794 (diff=0.4466, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:00:19] (step=0001383) Loss: 0.5053 (diff=0.4727, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:00:38] (step=0001384) Loss: 0.4893 (diff=0.4566, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:00:58] (step=0001385) Loss: 0.4662 (diff=0.4376, anat=0.0285), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:01:17] (step=0001386) Loss: 0.4751 (diff=0.4441, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:01:37] (step=0001387) Loss: 0.4950 (diff=0.4600, anat=0.0350), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:01:57] (step=0001388) Loss: 0.4803 (diff=0.4453, anat=0.0351), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:02:16] (step=0001389) Loss: 0.4797 (diff=0.4466, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:02:36] (step=0001390) Loss: 0.4859 (diff=0.4544, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:02:55] (step=0001391) Loss: 0.4648 (diff=0.4344, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:03:15] (step=0001392) Loss: 0.4789 (diff=0.4473, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:03:35] (step=0001393) Loss: 0.4759 (diff=0.4444, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:03:54] (step=0001394) Loss: 0.4742 (diff=0.4441, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:04:14] (step=0001395) Loss: 0.4819 (diff=0.4501, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:04:33] (step=0001396) Loss: 0.4744 (diff=0.4421, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:04:53] (step=0001397) Loss: 0.4770 (diff=0.4481, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:05:13] (step=0001398) Loss: 0.4781 (diff=0.4476, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:05:32] (step=0001399) Loss: 0.4702 (diff=0.4392, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:05:52] (step=0001400) Loss: 0.4698 (diff=0.4403, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:06:12] (step=0001401) Loss: 0.4704 (diff=0.4411, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:06:31] (step=0001402) Loss: 0.4811 (diff=0.4482, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:06:51] (step=0001403) Loss: 0.4888 (diff=0.4546, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:07:10] (step=0001404) Loss: 0.4818 (diff=0.4499, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:07:30] (step=0001405) Loss: 0.4737 (diff=0.4416, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.27, LR: 0.0001 +[2026-04-14 17:07:50] (step=0001406) Loss: 0.4887 (diff=0.4551, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:08:09] (step=0001407) Loss: 0.4903 (diff=0.4576, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:08:29] (step=0001408) Loss: 0.4718 (diff=0.4424, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:08:48] (step=0001409) Loss: 0.4908 (diff=0.4568, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:09:08] (step=0001410) Loss: 0.4789 (diff=0.4466, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:09:28] (step=0001411) Loss: 0.4795 (diff=0.4464, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:09:47] (step=0001412) Loss: 0.4885 (diff=0.4565, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:10:07] (step=0001413) Loss: 0.4757 (diff=0.4440, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:10:26] (step=0001414) Loss: 0.4826 (diff=0.4495, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:10:46] (step=0001415) Loss: 0.4943 (diff=0.4603, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:11:06] (step=0001416) Loss: 0.4805 (diff=0.4501, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:11:25] (step=0001417) Loss: 0.4737 (diff=0.4435, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:11:45] (step=0001418) Loss: 0.4865 (diff=0.4534, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:12:04] (step=0001419) Loss: 0.4938 (diff=0.4582, anat=0.0356), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:12:24] (step=0001420) Loss: 0.4697 (diff=0.4412, anat=0.0285), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:12:44] (step=0001421) Loss: 0.4828 (diff=0.4515, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:13:03] (step=0001422) Loss: 0.4785 (diff=0.4469, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:13:23] (step=0001423) Loss: 0.4789 (diff=0.4479, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:13:42] (step=0001424) Loss: 0.4809 (diff=0.4492, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:14:02] (step=0001425) Loss: 0.4840 (diff=0.4522, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:14:22] (step=0001426) Loss: 0.4945 (diff=0.4589, anat=0.0356), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:14:41] (step=0001427) Loss: 0.4945 (diff=0.4603, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:15:01] (step=0001428) Loss: 0.4761 (diff=0.4464, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:15:20] (step=0001429) Loss: 0.4894 (diff=0.4556, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:15:40] (step=0001430) Loss: 0.4865 (diff=0.4538, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:15:59] (step=0001431) Loss: 0.4737 (diff=0.4425, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:16:19] (step=0001432) Loss: 0.4924 (diff=0.4592, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:16:39] (step=0001433) Loss: 0.4761 (diff=0.4451, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:16:58] (step=0001434) Loss: 0.4877 (diff=0.4540, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:17:18] (step=0001435) Loss: 0.4797 (diff=0.4485, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:17:37] (step=0001436) Loss: 0.4813 (diff=0.4487, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:17:57] (step=0001437) Loss: 0.4870 (diff=0.4564, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:18:17] (step=0001438) Loss: 0.4859 (diff=0.4538, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:18:36] (step=0001439) Loss: 0.4886 (diff=0.4562, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:18:56] (step=0001440) Loss: 0.4842 (diff=0.4515, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:19:15] (step=0001441) Loss: 0.4869 (diff=0.4536, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:19:35] (step=0001442) Loss: 0.4847 (diff=0.4532, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:19:55] (step=0001443) Loss: 0.4712 (diff=0.4395, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:20:14] (step=0001444) Loss: 0.4769 (diff=0.4453, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:20:34] (step=0001445) Loss: 0.4762 (diff=0.4435, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:20:53] (step=0001446) Loss: 0.4912 (diff=0.4589, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:21:13] (step=0001447) Loss: 0.4804 (diff=0.4492, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:21:33] (step=0001448) Loss: 0.4739 (diff=0.4455, anat=0.0285), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:21:52] (step=0001449) Loss: 0.4915 (diff=0.4602, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:22:12] (step=0001450) Loss: 0.4789 (diff=0.4489, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:22:31] (step=0001451) Loss: 0.4976 (diff=0.4639, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:22:51] (step=0001452) Loss: 0.4708 (diff=0.4418, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:23:11] (step=0001453) Loss: 0.4826 (diff=0.4506, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:23:30] (step=0001454) Loss: 0.4693 (diff=0.4396, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:23:50] (step=0001455) Loss: 0.4814 (diff=0.4487, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:24:09] (step=0001456) Loss: 0.4826 (diff=0.4502, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.28, LR: 0.0001 +[2026-04-14 17:24:29] (step=0001457) Loss: 0.4803 (diff=0.4483, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:24:49] (step=0001458) Loss: 0.4949 (diff=0.4597, anat=0.0353), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:25:08] (step=0001459) Loss: 0.4876 (diff=0.4540, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:25:28] (step=0001460) Loss: 0.4952 (diff=0.4629, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:25:47] (step=0001461) Loss: 0.4781 (diff=0.4467, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:26:07] (step=0001462) Loss: 0.4861 (diff=0.4556, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:26:27] (step=0001463) Loss: 0.4767 (diff=0.4458, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:26:46] (step=0001464) Loss: 0.4777 (diff=0.4470, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:27:06] (step=0001465) Loss: 0.4809 (diff=0.4474, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:27:26] (step=0001466) Loss: 0.4796 (diff=0.4478, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:27:45] (step=0001467) Loss: 0.4799 (diff=0.4466, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:28:05] (step=0001468) Loss: 0.4756 (diff=0.4431, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:28:24] (step=0001469) Loss: 0.4841 (diff=0.4533, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:28:44] (step=0001470) Loss: 0.4707 (diff=0.4395, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:29:04] (step=0001471) Loss: 0.4725 (diff=0.4418, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:29:23] (step=0001472) Loss: 0.4742 (diff=0.4449, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:29:43] (step=0001473) Loss: 0.4720 (diff=0.4414, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:30:02] (step=0001474) Loss: 0.4766 (diff=0.4476, anat=0.0291), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:30:22] (step=0001475) Loss: 0.4785 (diff=0.4469, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:30:42] (step=0001476) Loss: 0.4779 (diff=0.4419, anat=0.0361), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:31:01] (step=0001477) Loss: 0.4741 (diff=0.4426, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:31:21] (step=0001478) Loss: 0.4662 (diff=0.4359, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:31:40] (step=0001479) Loss: 0.4793 (diff=0.4482, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:32:00] (step=0001480) Loss: 0.4781 (diff=0.4472, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:32:20] (step=0001481) Loss: 0.4882 (diff=0.4565, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:32:39] (step=0001482) Loss: 0.4883 (diff=0.4558, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:32:59] (step=0001483) Loss: 0.4837 (diff=0.4515, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:33:18] (step=0001484) Loss: 0.4671 (diff=0.4341, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:33:38] (step=0001485) Loss: 0.4874 (diff=0.4563, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:33:58] (step=0001486) Loss: 0.4794 (diff=0.4467, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:34:17] (step=0001487) Loss: 0.4864 (diff=0.4540, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:34:37] (step=0001488) Loss: 0.4757 (diff=0.4438, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:34:57] (step=0001489) Loss: 0.4814 (diff=0.4503, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:35:16] (step=0001490) Loss: 0.4805 (diff=0.4483, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:35:36] (step=0001491) Loss: 0.4816 (diff=0.4504, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:35:55] (step=0001492) Loss: 0.4734 (diff=0.4408, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:36:15] (step=0001493) Loss: 0.4800 (diff=0.4470, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:36:35] (step=0001494) Loss: 0.4834 (diff=0.4510, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:36:54] (step=0001495) Loss: 0.4912 (diff=0.4580, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:37:14] (step=0001496) Loss: 0.4759 (diff=0.4419, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:37:33] (step=0001497) Loss: 0.4759 (diff=0.4453, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:37:53] (step=0001498) Loss: 0.4773 (diff=0.4465, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:38:13] (step=0001499) Loss: 0.4798 (diff=0.4483, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:38:32] (step=0001500) Loss: 0.4842 (diff=0.4503, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:38:32] [RANK 0] Saving current state to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500 +[2026-04-14 17:38:46] [RANK 0] Model weights saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/model.safetensors +[2026-04-14 17:38:46] [RANK 0] Optimizer state saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/optimizer.bin +[2026-04-14 17:38:46] [RANK 0] Scheduler state saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/scheduler.bin +[2026-04-14 17:38:46] [RANK 0] Sampler state for dataloader 0 saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/sampler.bin +[2026-04-14 17:38:46] [RANK 0] Random states saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500/random_states_0.pkl +[2026-04-14 17:38:46] Saved accelerator state to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-1500 +[2026-04-14 17:38:46] Saved checkpoint to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0001500/ +[2026-04-14 17:39:04] (step=0001501) Loss: 0.4889 (diff=0.4572, anat=0.0317), Steps/Sec: 0.03, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:39:23] (step=0001502) Loss: 0.4830 (diff=0.4531, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:39:43] (step=0001503) Loss: 0.4711 (diff=0.4410, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:40:02] (step=0001504) Loss: 0.4864 (diff=0.4565, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:40:22] (step=0001505) Loss: 0.4700 (diff=0.4394, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:40:41] (step=0001506) Loss: 0.4764 (diff=0.4450, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:41:01] (step=0001507) Loss: 0.4825 (diff=0.4509, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.29, LR: 0.0001 +[2026-04-14 17:41:21] (step=0001508) Loss: 0.4873 (diff=0.4543, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:41:40] (step=0001509) Loss: 0.4900 (diff=0.4567, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:42:00] (step=0001510) Loss: 0.4933 (diff=0.4579, anat=0.0354), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:42:20] (step=0001511) Loss: 0.4874 (diff=0.4548, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:42:39] (step=0001512) Loss: 0.4686 (diff=0.4381, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:42:59] (step=0001513) Loss: 0.4812 (diff=0.4498, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:43:19] (step=0001514) Loss: 0.4761 (diff=0.4453, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:43:38] (step=0001515) Loss: 0.4730 (diff=0.4396, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:43:58] (step=0001516) Loss: 0.4644 (diff=0.4348, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:44:17] (step=0001517) Loss: 0.4668 (diff=0.4362, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:44:37] (step=0001518) Loss: 0.4856 (diff=0.4526, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:44:57] (step=0001519) Loss: 0.4749 (diff=0.4444, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:45:16] (step=0001520) Loss: 0.4649 (diff=0.4336, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:45:36] (step=0001521) Loss: 0.4887 (diff=0.4575, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:45:55] (step=0001522) Loss: 0.4900 (diff=0.4590, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:46:15] (step=0001523) Loss: 0.4793 (diff=0.4483, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:46:35] (step=0001524) Loss: 0.4768 (diff=0.4454, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:46:54] (step=0001525) Loss: 0.4853 (diff=0.4554, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:47:14] (step=0001526) Loss: 0.4859 (diff=0.4549, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:47:33] (step=0001527) Loss: 0.4939 (diff=0.4604, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:47:53] (step=0001528) Loss: 0.4642 (diff=0.4337, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:48:13] (step=0001529) Loss: 0.4653 (diff=0.4372, anat=0.0281), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:48:32] (step=0001530) Loss: 0.4731 (diff=0.4430, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:48:52] (step=0001531) Loss: 0.4816 (diff=0.4483, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:49:11] (step=0001532) Loss: 0.4807 (diff=0.4502, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:49:31] (step=0001533) Loss: 0.4861 (diff=0.4536, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:49:51] (step=0001534) Loss: 0.4856 (diff=0.4525, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:50:10] (step=0001535) Loss: 0.4760 (diff=0.4439, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:50:30] (step=0001536) Loss: 0.4917 (diff=0.4591, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:50:49] (step=0001537) Loss: 0.4820 (diff=0.4493, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:51:09] (step=0001538) Loss: 0.4804 (diff=0.4489, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:51:29] (step=0001539) Loss: 0.4724 (diff=0.4410, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:51:48] (step=0001540) Loss: 0.4832 (diff=0.4532, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:52:08] (step=0001541) Loss: 0.4763 (diff=0.4471, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:52:27] (step=0001542) Loss: 0.4791 (diff=0.4460, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:52:47] (step=0001543) Loss: 0.4731 (diff=0.4424, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:53:07] (step=0001544) Loss: 0.4922 (diff=0.4578, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:53:26] (step=0001545) Loss: 0.4813 (diff=0.4488, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:53:46] (step=0001546) Loss: 0.4762 (diff=0.4442, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:54:05] (step=0001547) Loss: 0.4717 (diff=0.4424, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:54:25] (step=0001548) Loss: 0.4822 (diff=0.4514, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:54:44] (step=0001549) Loss: 0.4879 (diff=0.4518, anat=0.0361), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:55:04] (step=0001550) Loss: 0.4821 (diff=0.4484, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:55:24] (step=0001551) Loss: 0.4826 (diff=0.4513, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:55:43] (step=0001552) Loss: 0.4846 (diff=0.4550, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:56:03] (step=0001553) Loss: 0.4816 (diff=0.4477, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:56:22] (step=0001554) Loss: 0.4866 (diff=0.4556, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:56:42] (step=0001555) Loss: 0.4747 (diff=0.4438, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:57:02] (step=0001556) Loss: 0.4789 (diff=0.4488, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:57:21] (step=0001557) Loss: 0.4698 (diff=0.4397, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:57:41] (step=0001558) Loss: 0.4861 (diff=0.4524, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:58:00] (step=0001559) Loss: 0.4981 (diff=0.4650, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.30, LR: 0.0001 +[2026-04-14 17:58:20] (step=0001560) Loss: 0.4751 (diff=0.4435, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 17:58:40] (step=0001561) Loss: 0.4694 (diff=0.4408, anat=0.0287), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 17:58:59] (step=0001562) Loss: 0.4804 (diff=0.4492, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 17:59:19] (step=0001563) Loss: 0.4885 (diff=0.4573, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 17:59:38] (step=0001564) Loss: 0.4820 (diff=0.4474, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 17:59:58] (step=0001565) Loss: 0.4860 (diff=0.4537, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:00:18] (step=0001566) Loss: 0.4791 (diff=0.4451, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:00:37] (step=0001567) Loss: 0.4882 (diff=0.4535, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:00:57] (step=0001568) Loss: 0.4855 (diff=0.4533, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:01:16] (step=0001569) Loss: 0.4824 (diff=0.4509, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:01:36] (step=0001570) Loss: 0.4877 (diff=0.4550, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:01:56] (step=0001571) Loss: 0.4846 (diff=0.4520, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:02:15] (step=0001572) Loss: 0.4995 (diff=0.4662, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:02:35] (step=0001573) Loss: 0.4871 (diff=0.4556, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:02:54] (step=0001574) Loss: 0.4853 (diff=0.4521, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:03:14] (step=0001575) Loss: 0.4869 (diff=0.4552, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:03:33] (step=0001576) Loss: 0.4879 (diff=0.4541, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:03:53] (step=0001577) Loss: 0.4845 (diff=0.4518, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:04:13] (step=0001578) Loss: 0.4903 (diff=0.4571, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:04:32] (step=0001579) Loss: 0.4888 (diff=0.4563, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:04:52] (step=0001580) Loss: 0.4835 (diff=0.4518, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:05:11] (step=0001581) Loss: 0.4804 (diff=0.4488, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:05:31] (step=0001582) Loss: 0.4817 (diff=0.4480, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:05:51] (step=0001583) Loss: 0.4919 (diff=0.4592, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:06:10] (step=0001584) Loss: 0.4752 (diff=0.4452, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:06:30] (step=0001585) Loss: 0.4807 (diff=0.4507, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:06:49] (step=0001586) Loss: 0.4804 (diff=0.4498, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:07:09] (step=0001587) Loss: 0.4818 (diff=0.4503, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:07:28] (step=0001588) Loss: 0.4960 (diff=0.4619, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:07:48] (step=0001589) Loss: 0.4830 (diff=0.4534, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:08:08] (step=0001590) Loss: 0.4855 (diff=0.4508, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:08:27] (step=0001591) Loss: 0.4916 (diff=0.4576, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:08:47] (step=0001592) Loss: 0.4845 (diff=0.4539, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:09:06] (step=0001593) Loss: 0.4819 (diff=0.4507, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:09:26] (step=0001594) Loss: 0.4898 (diff=0.4565, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:09:46] (step=0001595) Loss: 0.4832 (diff=0.4505, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:10:05] (step=0001596) Loss: 0.4720 (diff=0.4407, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:10:25] (step=0001597) Loss: 0.4756 (diff=0.4430, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:10:44] (step=0001598) Loss: 0.4981 (diff=0.4642, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:11:04] (step=0001599) Loss: 0.4786 (diff=0.4471, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:11:23] (step=0001600) Loss: 0.4976 (diff=0.4651, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:11:43] (step=0001601) Loss: 0.4788 (diff=0.4473, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:12:03] (step=0001602) Loss: 0.5039 (diff=0.4703, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:12:22] (step=0001603) Loss: 0.4869 (diff=0.4556, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:12:42] (step=0001604) Loss: 0.4851 (diff=0.4534, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:13:01] (step=0001605) Loss: 0.4873 (diff=0.4535, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:13:21] (step=0001606) Loss: 0.4966 (diff=0.4626, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:13:41] (step=0001607) Loss: 0.4847 (diff=0.4520, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:14:00] (step=0001608) Loss: 0.4807 (diff=0.4497, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:14:20] (step=0001609) Loss: 0.4726 (diff=0.4424, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:14:39] (step=0001610) Loss: 0.4681 (diff=0.4372, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.31, LR: 0.0001 +[2026-04-14 18:14:59] (step=0001611) Loss: 0.4828 (diff=0.4518, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:15:18] (step=0001612) Loss: 0.4831 (diff=0.4510, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:15:38] (step=0001613) Loss: 0.4665 (diff=0.4361, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:15:58] (step=0001614) Loss: 0.4939 (diff=0.4606, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:16:17] (step=0001615) Loss: 0.4764 (diff=0.4450, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:16:37] (step=0001616) Loss: 0.4725 (diff=0.4410, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:16:56] (step=0001617) Loss: 0.4812 (diff=0.4489, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:17:16] (step=0001618) Loss: 0.4963 (diff=0.4596, anat=0.0366), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:17:36] (step=0001619) Loss: 0.4865 (diff=0.4554, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:17:55] (step=0001620) Loss: 0.4731 (diff=0.4443, anat=0.0288), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:18:15] (step=0001621) Loss: 0.4777 (diff=0.4460, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:18:34] (step=0001622) Loss: 0.4778 (diff=0.4483, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:18:54] (step=0001623) Loss: 0.4729 (diff=0.4431, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:19:13] (step=0001624) Loss: 0.4742 (diff=0.4444, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:19:33] (step=0001625) Loss: 0.4836 (diff=0.4528, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:19:53] (step=0001626) Loss: 0.4861 (diff=0.4542, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:20:12] (step=0001627) Loss: 0.4869 (diff=0.4548, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:20:32] (step=0001628) Loss: 0.4783 (diff=0.4461, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:20:51] (step=0001629) Loss: 0.4796 (diff=0.4486, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:21:11] (step=0001630) Loss: 0.4791 (diff=0.4456, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:21:31] (step=0001631) Loss: 0.4860 (diff=0.4542, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:21:50] (step=0001632) Loss: 0.4749 (diff=0.4442, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:22:10] (step=0001633) Loss: 0.4834 (diff=0.4513, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:22:29] (step=0001634) Loss: 0.4755 (diff=0.4433, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:22:49] (step=0001635) Loss: 0.4706 (diff=0.4396, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:23:08] (step=0001636) Loss: 0.4806 (diff=0.4496, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:23:28] (step=0001637) Loss: 0.4828 (diff=0.4497, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:23:48] (step=0001638) Loss: 0.4902 (diff=0.4578, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:24:07] (step=0001639) Loss: 0.4839 (diff=0.4508, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:24:27] (step=0001640) Loss: 0.4900 (diff=0.4593, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:24:46] (step=0001641) Loss: 0.4878 (diff=0.4548, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:25:06] (step=0001642) Loss: 0.4791 (diff=0.4481, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:25:26] (step=0001643) Loss: 0.4705 (diff=0.4396, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:25:45] (step=0001644) Loss: 0.4951 (diff=0.4615, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:26:05] (step=0001645) Loss: 0.4798 (diff=0.4486, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:26:24] (step=0001646) Loss: 0.4803 (diff=0.4514, anat=0.0288), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:26:44] (step=0001647) Loss: 0.5048 (diff=0.4689, anat=0.0359), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:27:03] (step=0001648) Loss: 0.4825 (diff=0.4518, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:27:23] (step=0001649) Loss: 0.4776 (diff=0.4453, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:27:43] (step=0001650) Loss: 0.4797 (diff=0.4483, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:28:02] (step=0001651) Loss: 0.4891 (diff=0.4561, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:28:22] (step=0001652) Loss: 0.4804 (diff=0.4488, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:28:41] (step=0001653) Loss: 0.4738 (diff=0.4433, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:29:01] (step=0001654) Loss: 0.4863 (diff=0.4528, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:29:21] (step=0001655) Loss: 0.4820 (diff=0.4491, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:29:40] (step=0001656) Loss: 0.4813 (diff=0.4502, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:30:00] (step=0001657) Loss: 0.4755 (diff=0.4433, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:30:19] (step=0001658) Loss: 0.4954 (diff=0.4612, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:30:39] (step=0001659) Loss: 0.4877 (diff=0.4538, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:30:59] (step=0001660) Loss: 0.4888 (diff=0.4556, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:31:18] (step=0001661) Loss: 0.4774 (diff=0.4485, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.32, LR: 0.0001 +[2026-04-14 18:31:38] (step=0001662) Loss: 0.4908 (diff=0.4594, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:31:57] (step=0001663) Loss: 0.4846 (diff=0.4516, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:32:17] (step=0001664) Loss: 0.4789 (diff=0.4469, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:32:36] (step=0001665) Loss: 0.4832 (diff=0.4519, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:32:56] (step=0001666) Loss: 0.4855 (diff=0.4533, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:33:16] (step=0001667) Loss: 0.4765 (diff=0.4467, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:33:35] (step=0001668) Loss: 0.4693 (diff=0.4396, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:33:55] (step=0001669) Loss: 0.4834 (diff=0.4498, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:34:14] (step=0001670) Loss: 0.4906 (diff=0.4590, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:34:34] (step=0001671) Loss: 0.4855 (diff=0.4546, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:34:54] (step=0001672) Loss: 0.4742 (diff=0.4417, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:35:13] (step=0001673) Loss: 0.4862 (diff=0.4526, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:35:33] (step=0001674) Loss: 0.4824 (diff=0.4513, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:35:52] (step=0001675) Loss: 0.4875 (diff=0.4538, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:36:12] (step=0001676) Loss: 0.4865 (diff=0.4527, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:36:31] (step=0001677) Loss: 0.4952 (diff=0.4635, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:36:51] (step=0001678) Loss: 0.4724 (diff=0.4430, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:37:11] (step=0001679) Loss: 0.4915 (diff=0.4588, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:37:30] (step=0001680) Loss: 0.4670 (diff=0.4401, anat=0.0269), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:37:50] (step=0001681) Loss: 0.4880 (diff=0.4545, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:38:09] (step=0001682) Loss: 0.4858 (diff=0.4529, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:38:29] (step=0001683) Loss: 0.4931 (diff=0.4590, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:38:48] (step=0001684) Loss: 0.4766 (diff=0.4474, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:39:08] (step=0001685) Loss: 0.4802 (diff=0.4507, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:39:28] (step=0001686) Loss: 0.4807 (diff=0.4483, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:39:47] (step=0001687) Loss: 0.4862 (diff=0.4542, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:40:07] (step=0001688) Loss: 0.4843 (diff=0.4527, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:40:26] (step=0001689) Loss: 0.4909 (diff=0.4570, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:40:46] (step=0001690) Loss: 0.4811 (diff=0.4514, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:41:05] (step=0001691) Loss: 0.4847 (diff=0.4506, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:41:25] (step=0001692) Loss: 0.4799 (diff=0.4497, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:41:45] (step=0001693) Loss: 0.4873 (diff=0.4550, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:42:04] (step=0001694) Loss: 0.4735 (diff=0.4428, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:42:24] (step=0001695) Loss: 0.4755 (diff=0.4452, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:42:43] (step=0001696) Loss: 0.4820 (diff=0.4503, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:43:03] (step=0001697) Loss: 0.5048 (diff=0.4707, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:43:23] (step=0001698) Loss: 0.4866 (diff=0.4535, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:43:42] (step=0001699) Loss: 0.4720 (diff=0.4415, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:44:02] (step=0001700) Loss: 0.4742 (diff=0.4430, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:44:21] (step=0001701) Loss: 0.4815 (diff=0.4488, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:44:41] (step=0001702) Loss: 0.4814 (diff=0.4510, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:45:00] (step=0001703) Loss: 0.4933 (diff=0.4598, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:45:20] (step=0001704) Loss: 0.4793 (diff=0.4472, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:45:40] (step=0001705) Loss: 0.4875 (diff=0.4548, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:45:59] (step=0001706) Loss: 0.4883 (diff=0.4560, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:46:19] (step=0001707) Loss: 0.4893 (diff=0.4546, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:46:38] (step=0001708) Loss: 0.4735 (diff=0.4429, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:46:58] (step=0001709) Loss: 0.4917 (diff=0.4585, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:47:17] (step=0001710) Loss: 0.4836 (diff=0.4516, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:47:37] (step=0001711) Loss: 0.4715 (diff=0.4386, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:47:57] (step=0001712) Loss: 0.4845 (diff=0.4521, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.33, LR: 0.0001 +[2026-04-14 18:48:16] (step=0001713) Loss: 0.4716 (diff=0.4424, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:48:36] (step=0001714) Loss: 0.4864 (diff=0.4550, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:48:55] (step=0001715) Loss: 0.4834 (diff=0.4515, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:49:15] (step=0001716) Loss: 0.4793 (diff=0.4463, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:49:34] (step=0001717) Loss: 0.4761 (diff=0.4443, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:49:54] (step=0001718) Loss: 0.4976 (diff=0.4611, anat=0.0365), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:50:14] (step=0001719) Loss: 0.4988 (diff=0.4650, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:50:33] (step=0001720) Loss: 0.4707 (diff=0.4419, anat=0.0288), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:50:53] (step=0001721) Loss: 0.4938 (diff=0.4598, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:51:12] (step=0001722) Loss: 0.4841 (diff=0.4519, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:51:32] (step=0001723) Loss: 0.4668 (diff=0.4373, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:51:52] (step=0001724) Loss: 0.4718 (diff=0.4416, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:52:11] (step=0001725) Loss: 0.4870 (diff=0.4555, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:52:31] (step=0001726) Loss: 0.4832 (diff=0.4528, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:52:50] (step=0001727) Loss: 0.4826 (diff=0.4492, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:53:10] (step=0001728) Loss: 0.4965 (diff=0.4648, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:53:30] (step=0001729) Loss: 0.4913 (diff=0.4615, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:53:49] (step=0001730) Loss: 0.4825 (diff=0.4507, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:54:09] (step=0001731) Loss: 0.4794 (diff=0.4495, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:54:28] (step=0001732) Loss: 0.4924 (diff=0.4574, anat=0.0350), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:54:48] (step=0001733) Loss: 0.4768 (diff=0.4452, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:55:08] (step=0001734) Loss: 0.4809 (diff=0.4505, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:55:27] (step=0001735) Loss: 0.4758 (diff=0.4458, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:55:47] (step=0001736) Loss: 0.4820 (diff=0.4509, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:56:06] (step=0001737) Loss: 0.4853 (diff=0.4534, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:56:26] (step=0001738) Loss: 0.4722 (diff=0.4438, anat=0.0284), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:56:46] (step=0001739) Loss: 0.4934 (diff=0.4586, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:57:05] (step=0001740) Loss: 0.4850 (diff=0.4552, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:57:25] (step=0001741) Loss: 0.4738 (diff=0.4425, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:57:45] (step=0001742) Loss: 0.4974 (diff=0.4626, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:58:04] (step=0001743) Loss: 0.4740 (diff=0.4415, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:58:24] (step=0001744) Loss: 0.4892 (diff=0.4571, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:58:44] (step=0001745) Loss: 0.4868 (diff=0.4538, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:59:03] (step=0001746) Loss: 0.4787 (diff=0.4472, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:59:23] (step=0001747) Loss: 0.5029 (diff=0.4688, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 18:59:43] (step=0001748) Loss: 0.4787 (diff=0.4494, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:00:02] (step=0001749) Loss: 0.4712 (diff=0.4422, anat=0.0291), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:00:22] (step=0001750) Loss: 0.4915 (diff=0.4599, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:00:41] (step=0001751) Loss: 0.4974 (diff=0.4607, anat=0.0367), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:01:01] (step=0001752) Loss: 0.4945 (diff=0.4607, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:01:21] (step=0001753) Loss: 0.4942 (diff=0.4596, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:01:40] (step=0001754) Loss: 0.4822 (diff=0.4517, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:02:00] (step=0001755) Loss: 0.4761 (diff=0.4436, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:02:19] (step=0001756) Loss: 0.4826 (diff=0.4509, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:02:39] (step=0001757) Loss: 0.4900 (diff=0.4546, anat=0.0353), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:02:59] (step=0001758) Loss: 0.4844 (diff=0.4528, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:03:18] (step=0001759) Loss: 0.4706 (diff=0.4419, anat=0.0287), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:03:38] (step=0001760) Loss: 0.4796 (diff=0.4457, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:03:58] (step=0001761) Loss: 0.4848 (diff=0.4524, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:04:17] (step=0001762) Loss: 0.4797 (diff=0.4487, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:04:37] (step=0001763) Loss: 0.4737 (diff=0.4442, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.34, LR: 0.0001 +[2026-04-14 19:04:56] (step=0001764) Loss: 0.4730 (diff=0.4430, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:05:16] (step=0001765) Loss: 0.4968 (diff=0.4637, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:05:36] (step=0001766) Loss: 0.4798 (diff=0.4487, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:05:55] (step=0001767) Loss: 0.4831 (diff=0.4508, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:06:15] (step=0001768) Loss: 0.4830 (diff=0.4502, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:06:34] (step=0001769) Loss: 0.4873 (diff=0.4537, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:06:54] (step=0001770) Loss: 0.4824 (diff=0.4523, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:07:14] (step=0001771) Loss: 0.4696 (diff=0.4382, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:07:33] (step=0001772) Loss: 0.4773 (diff=0.4471, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:07:53] (step=0001773) Loss: 0.4687 (diff=0.4391, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:08:12] (step=0001774) Loss: 0.4619 (diff=0.4326, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:08:32] (step=0001775) Loss: 0.4722 (diff=0.4404, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:08:52] (step=0001776) Loss: 0.4862 (diff=0.4533, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:09:11] (step=0001777) Loss: 0.4943 (diff=0.4641, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:09:31] (step=0001778) Loss: 0.4856 (diff=0.4543, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:09:50] (step=0001779) Loss: 0.4870 (diff=0.4556, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:10:10] (step=0001780) Loss: 0.4838 (diff=0.4524, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:10:30] (step=0001781) Loss: 0.4805 (diff=0.4499, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:10:49] (step=0001782) Loss: 0.4736 (diff=0.4441, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:11:09] (step=0001783) Loss: 0.4893 (diff=0.4563, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:11:28] (step=0001784) Loss: 0.4868 (diff=0.4532, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:11:48] (step=0001785) Loss: 0.4717 (diff=0.4413, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:12:08] (step=0001786) Loss: 0.4895 (diff=0.4587, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:12:27] (step=0001787) Loss: 0.4866 (diff=0.4527, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:12:47] (step=0001788) Loss: 0.4998 (diff=0.4662, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:13:06] (step=0001789) Loss: 0.4742 (diff=0.4422, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:13:26] (step=0001790) Loss: 0.4993 (diff=0.4647, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:13:46] (step=0001791) Loss: 0.4690 (diff=0.4390, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:14:05] (step=0001792) Loss: 0.4769 (diff=0.4461, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:14:25] (step=0001793) Loss: 0.4737 (diff=0.4451, anat=0.0287), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:14:44] (step=0001794) Loss: 0.4744 (diff=0.4421, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:15:04] (step=0001795) Loss: 0.4870 (diff=0.4550, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:15:23] (step=0001796) Loss: 0.4779 (diff=0.4440, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:15:43] (step=0001797) Loss: 0.4837 (diff=0.4526, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:16:03] (step=0001798) Loss: 0.4754 (diff=0.4443, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:16:22] (step=0001799) Loss: 0.4968 (diff=0.4628, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:16:42] (step=0001800) Loss: 0.4844 (diff=0.4539, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:17:01] (step=0001801) Loss: 0.4744 (diff=0.4450, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:17:21] (step=0001802) Loss: 0.4861 (diff=0.4539, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:17:40] (step=0001803) Loss: 0.4855 (diff=0.4551, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:18:00] (step=0001804) Loss: 0.4787 (diff=0.4494, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:18:20] (step=0001805) Loss: 0.4844 (diff=0.4502, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:18:39] (step=0001806) Loss: 0.4709 (diff=0.4407, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:18:59] (step=0001807) Loss: 0.4666 (diff=0.4388, anat=0.0278), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:19:18] (step=0001808) Loss: 0.4808 (diff=0.4487, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:19:38] (step=0001809) Loss: 0.4926 (diff=0.4595, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:19:58] (step=0001810) Loss: 0.4913 (diff=0.4585, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:20:17] (step=0001811) Loss: 0.4817 (diff=0.4480, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:20:37] (step=0001812) Loss: 0.4798 (diff=0.4483, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:20:56] (step=0001813) Loss: 0.4839 (diff=0.4524, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:21:16] (step=0001814) Loss: 0.4775 (diff=0.4464, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.35, LR: 0.0001 +[2026-04-14 19:21:35] (step=0001815) Loss: 0.4714 (diff=0.4405, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:21:55] (step=0001816) Loss: 0.4826 (diff=0.4494, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:22:15] (step=0001817) Loss: 0.4823 (diff=0.4533, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:22:34] (step=0001818) Loss: 0.4923 (diff=0.4586, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:22:54] (step=0001819) Loss: 0.4797 (diff=0.4475, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:23:13] (step=0001820) Loss: 0.4782 (diff=0.4484, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:23:33] (step=0001821) Loss: 0.5049 (diff=0.4716, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:23:53] (step=0001822) Loss: 0.4828 (diff=0.4519, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:24:12] (step=0001823) Loss: 0.4736 (diff=0.4425, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:24:32] (step=0001824) Loss: 0.4799 (diff=0.4465, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:24:51] (step=0001825) Loss: 0.4837 (diff=0.4530, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:25:11] (step=0001826) Loss: 0.4777 (diff=0.4461, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:25:30] (step=0001827) Loss: 0.4686 (diff=0.4404, anat=0.0281), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:25:50] (step=0001828) Loss: 0.4968 (diff=0.4641, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:26:10] (step=0001829) Loss: 0.4752 (diff=0.4443, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:26:29] (step=0001830) Loss: 0.4803 (diff=0.4507, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:26:49] (step=0001831) Loss: 0.4857 (diff=0.4529, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:27:08] (step=0001832) Loss: 0.4669 (diff=0.4373, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:27:28] (step=0001833) Loss: 0.4870 (diff=0.4555, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:27:47] (step=0001834) Loss: 0.4892 (diff=0.4537, anat=0.0355), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:28:07] (step=0001835) Loss: 0.4863 (diff=0.4530, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:28:27] (step=0001836) Loss: 0.4837 (diff=0.4528, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:28:46] (step=0001837) Loss: 0.4844 (diff=0.4520, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:29:06] (step=0001838) Loss: 0.4855 (diff=0.4561, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:29:25] (step=0001839) Loss: 0.4694 (diff=0.4398, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:29:45] (step=0001840) Loss: 0.4791 (diff=0.4479, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:30:04] (step=0001841) Loss: 0.4672 (diff=0.4346, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:30:24] (step=0001842) Loss: 0.4771 (diff=0.4460, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:30:44] (step=0001843) Loss: 0.4621 (diff=0.4339, anat=0.0282), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:31:03] (step=0001844) Loss: 0.4784 (diff=0.4482, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:31:23] (step=0001845) Loss: 0.4883 (diff=0.4559, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:31:42] (step=0001846) Loss: 0.4722 (diff=0.4426, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:32:02] (step=0001847) Loss: 0.4846 (diff=0.4532, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:32:21] (step=0001848) Loss: 0.4947 (diff=0.4605, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:32:41] (step=0001849) Loss: 0.4874 (diff=0.4530, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:33:01] (step=0001850) Loss: 0.4855 (diff=0.4509, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:33:20] (step=0001851) Loss: 0.4785 (diff=0.4478, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:33:40] (step=0001852) Loss: 0.4874 (diff=0.4540, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:33:59] (step=0001853) Loss: 0.4747 (diff=0.4439, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:34:19] (step=0001854) Loss: 0.4768 (diff=0.4440, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:34:38] (step=0001855) Loss: 0.4838 (diff=0.4524, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:34:58] (step=0001856) Loss: 0.4834 (diff=0.4521, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:35:17] (step=0001857) Loss: 0.4700 (diff=0.4389, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:35:37] (step=0001858) Loss: 0.4915 (diff=0.4589, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:35:57] (step=0001859) Loss: 0.4794 (diff=0.4492, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:36:16] (step=0001860) Loss: 0.4701 (diff=0.4406, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:36:36] (step=0001861) Loss: 0.4758 (diff=0.4466, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:36:55] (step=0001862) Loss: 0.4864 (diff=0.4527, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:37:15] (step=0001863) Loss: 0.4806 (diff=0.4466, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:37:35] (step=0001864) Loss: 0.4907 (diff=0.4588, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:37:54] (step=0001865) Loss: 0.4613 (diff=0.4320, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.36, LR: 0.0001 +[2026-04-14 19:38:14] (step=0001866) Loss: 0.4718 (diff=0.4408, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:38:33] (step=0001867) Loss: 0.4818 (diff=0.4509, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:38:53] (step=0001868) Loss: 0.4590 (diff=0.4311, anat=0.0279), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:39:12] (step=0001869) Loss: 0.4913 (diff=0.4583, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:39:32] (step=0001870) Loss: 0.4883 (diff=0.4563, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:39:52] (step=0001871) Loss: 0.4728 (diff=0.4436, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:40:11] (step=0001872) Loss: 0.4803 (diff=0.4508, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:40:31] (step=0001873) Loss: 0.4877 (diff=0.4550, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:40:50] (step=0001874) Loss: 0.4765 (diff=0.4459, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:41:10] (step=0001875) Loss: 0.4728 (diff=0.4407, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:41:30] (step=0001876) Loss: 0.4665 (diff=0.4377, anat=0.0288), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:41:49] (step=0001877) Loss: 0.4896 (diff=0.4568, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:42:09] (step=0001878) Loss: 0.4825 (diff=0.4510, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:42:28] (step=0001879) Loss: 0.4753 (diff=0.4448, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:42:48] (step=0001880) Loss: 0.4860 (diff=0.4537, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:43:08] (step=0001881) Loss: 0.4709 (diff=0.4410, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:43:27] (step=0001882) Loss: 0.4872 (diff=0.4561, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:43:47] (step=0001883) Loss: 0.4955 (diff=0.4607, anat=0.0349), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:44:07] (step=0001884) Loss: 0.4827 (diff=0.4530, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:44:26] (step=0001885) Loss: 0.4919 (diff=0.4589, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:44:46] (step=0001886) Loss: 0.4823 (diff=0.4510, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:45:05] (step=0001887) Loss: 0.4858 (diff=0.4532, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:45:25] (step=0001888) Loss: 0.4820 (diff=0.4505, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:45:45] (step=0001889) Loss: 0.4734 (diff=0.4423, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:46:04] (step=0001890) Loss: 0.4828 (diff=0.4494, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:46:24] (step=0001891) Loss: 0.4749 (diff=0.4420, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:46:44] (step=0001892) Loss: 0.4800 (diff=0.4500, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:47:03] (step=0001893) Loss: 0.4856 (diff=0.4533, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:47:23] (step=0001894) Loss: 0.4668 (diff=0.4364, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:47:43] (step=0001895) Loss: 0.4769 (diff=0.4436, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:48:02] (step=0001896) Loss: 0.4784 (diff=0.4474, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:48:22] (step=0001897) Loss: 0.4832 (diff=0.4518, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:48:41] (step=0001898) Loss: 0.4913 (diff=0.4578, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:49:01] (step=0001899) Loss: 0.4755 (diff=0.4461, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:49:21] (step=0001900) Loss: 0.4774 (diff=0.4473, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:49:40] (step=0001901) Loss: 0.4830 (diff=0.4499, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:50:00] (step=0001902) Loss: 0.4789 (diff=0.4474, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:50:20] (step=0001903) Loss: 0.4671 (diff=0.4377, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:50:39] (step=0001904) Loss: 0.4921 (diff=0.4593, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:50:59] (step=0001905) Loss: 0.4851 (diff=0.4540, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:51:19] (step=0001906) Loss: 0.4854 (diff=0.4546, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:51:38] (step=0001907) Loss: 0.4902 (diff=0.4559, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:51:58] (step=0001908) Loss: 0.4792 (diff=0.4481, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:52:18] (step=0001909) Loss: 0.4746 (diff=0.4443, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:52:37] (step=0001910) Loss: 0.4746 (diff=0.4438, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:52:57] (step=0001911) Loss: 0.4751 (diff=0.4439, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:53:17] (step=0001912) Loss: 0.4781 (diff=0.4457, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:53:36] (step=0001913) Loss: 0.4820 (diff=0.4505, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:53:56] (step=0001914) Loss: 0.4893 (diff=0.4566, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:54:16] (step=0001915) Loss: 0.4865 (diff=0.4544, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:54:35] (step=0001916) Loss: 0.4884 (diff=0.4564, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.37, LR: 0.0001 +[2026-04-14 19:54:55] (step=0001917) Loss: 0.4813 (diff=0.4506, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:55:14] (step=0001918) Loss: 0.4857 (diff=0.4542, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:55:34] (step=0001919) Loss: 0.4868 (diff=0.4543, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:55:54] (step=0001920) Loss: 0.4777 (diff=0.4472, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:56:13] (step=0001921) Loss: 0.4809 (diff=0.4512, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:56:33] (step=0001922) Loss: 0.4812 (diff=0.4498, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:56:53] (step=0001923) Loss: 0.4891 (diff=0.4550, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:57:12] (step=0001924) Loss: 0.4701 (diff=0.4404, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:57:32] (step=0001925) Loss: 0.4721 (diff=0.4420, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:57:52] (step=0001926) Loss: 0.4693 (diff=0.4397, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:58:11] (step=0001927) Loss: 0.5000 (diff=0.4655, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:58:31] (step=0001928) Loss: 0.4646 (diff=0.4360, anat=0.0286), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:58:51] (step=0001929) Loss: 0.4742 (diff=0.4443, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:59:10] (step=0001930) Loss: 0.4837 (diff=0.4506, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:59:30] (step=0001931) Loss: 0.4697 (diff=0.4395, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 19:59:50] (step=0001932) Loss: 0.4822 (diff=0.4505, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:00:09] (step=0001933) Loss: 0.4836 (diff=0.4517, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:00:29] (step=0001934) Loss: 0.4855 (diff=0.4544, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:00:49] (step=0001935) Loss: 0.4902 (diff=0.4575, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:01:08] (step=0001936) Loss: 0.4859 (diff=0.4553, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:01:28] (step=0001937) Loss: 0.4660 (diff=0.4367, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:01:48] (step=0001938) Loss: 0.4672 (diff=0.4365, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:02:07] (step=0001939) Loss: 0.4810 (diff=0.4499, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:02:27] (step=0001940) Loss: 0.4942 (diff=0.4622, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:02:47] (step=0001941) Loss: 0.4807 (diff=0.4501, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:03:06] (step=0001942) Loss: 0.4834 (diff=0.4507, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:03:26] (step=0001943) Loss: 0.4821 (diff=0.4498, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:03:45] (step=0001944) Loss: 0.4853 (diff=0.4533, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:04:05] (step=0001945) Loss: 0.4778 (diff=0.4476, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:04:25] (step=0001946) Loss: 0.4999 (diff=0.4651, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:04:44] (step=0001947) Loss: 0.4821 (diff=0.4500, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:05:04] (step=0001948) Loss: 0.4754 (diff=0.4446, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:05:24] (step=0001949) Loss: 0.4772 (diff=0.4436, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:05:43] (step=0001950) Loss: 0.4854 (diff=0.4536, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:06:03] (step=0001951) Loss: 0.4777 (diff=0.4454, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:06:23] (step=0001952) Loss: 0.4762 (diff=0.4420, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:06:42] (step=0001953) Loss: 0.4744 (diff=0.4414, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:07:02] (step=0001954) Loss: 0.4813 (diff=0.4506, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:07:22] (step=0001955) Loss: 0.4893 (diff=0.4589, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:07:41] (step=0001956) Loss: 0.4895 (diff=0.4559, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:08:01] (step=0001957) Loss: 0.4851 (diff=0.4542, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:08:21] (step=0001958) Loss: 0.4889 (diff=0.4585, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:08:40] (step=0001959) Loss: 0.4803 (diff=0.4509, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:09:00] (step=0001960) Loss: 0.4883 (diff=0.4574, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:09:20] (step=0001961) Loss: 0.4773 (diff=0.4464, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:09:39] (step=0001962) Loss: 0.4795 (diff=0.4467, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:09:59] (step=0001963) Loss: 0.4888 (diff=0.4538, anat=0.0350), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:10:19] (step=0001964) Loss: 0.4883 (diff=0.4571, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:10:38] (step=0001965) Loss: 0.4852 (diff=0.4539, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:10:58] (step=0001966) Loss: 0.4815 (diff=0.4494, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:11:18] (step=0001967) Loss: 0.4846 (diff=0.4527, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:11:37] (step=0001968) Loss: 0.4818 (diff=0.4495, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.38, LR: 0.0001 +[2026-04-14 20:11:57] (step=0001969) Loss: 0.4704 (diff=0.4392, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:12:17] (step=0001970) Loss: 0.4887 (diff=0.4569, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:12:36] (step=0001971) Loss: 0.4851 (diff=0.4550, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:12:56] (step=0001972) Loss: 0.4785 (diff=0.4461, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:13:16] (step=0001973) Loss: 0.4827 (diff=0.4504, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:13:35] (step=0001974) Loss: 0.4806 (diff=0.4490, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:13:55] (step=0001975) Loss: 0.4843 (diff=0.4547, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:14:15] (step=0001976) Loss: 0.4929 (diff=0.4597, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:14:34] (step=0001977) Loss: 0.4735 (diff=0.4449, anat=0.0285), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:14:54] (step=0001978) Loss: 0.4832 (diff=0.4501, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:15:14] (step=0001979) Loss: 0.4840 (diff=0.4515, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:15:33] (step=0001980) Loss: 0.4862 (diff=0.4527, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:15:53] (step=0001981) Loss: 0.4827 (diff=0.4521, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:16:13] (step=0001982) Loss: 0.4628 (diff=0.4349, anat=0.0279), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:16:32] (step=0001983) Loss: 0.4832 (diff=0.4491, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:16:52] (step=0001984) Loss: 0.4792 (diff=0.4504, anat=0.0288), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:17:12] (step=0001985) Loss: 0.4812 (diff=0.4519, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:17:31] (step=0001986) Loss: 0.4844 (diff=0.4536, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:17:51] (step=0001987) Loss: 0.4859 (diff=0.4524, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:18:11] (step=0001988) Loss: 0.4826 (diff=0.4503, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:18:30] (step=0001989) Loss: 0.4726 (diff=0.4414, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:18:50] (step=0001990) Loss: 0.4728 (diff=0.4419, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:19:09] (step=0001991) Loss: 0.4747 (diff=0.4450, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:19:29] (step=0001992) Loss: 0.4800 (diff=0.4472, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:19:49] (step=0001993) Loss: 0.4761 (diff=0.4466, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:20:08] (step=0001994) Loss: 0.4944 (diff=0.4609, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:20:28] (step=0001995) Loss: 0.4784 (diff=0.4515, anat=0.0269), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:20:48] (step=0001996) Loss: 0.4823 (diff=0.4513, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:21:07] (step=0001997) Loss: 0.4716 (diff=0.4436, anat=0.0280), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:21:27] (step=0001998) Loss: 0.4766 (diff=0.4464, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:21:47] (step=0001999) Loss: 0.4842 (diff=0.4532, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:22:06] (step=0002000) Loss: 0.4855 (diff=0.4520, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:22:06] [RANK 0] Saving current state to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000 +[2026-04-14 20:22:18] [RANK 0] Model weights saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/model.safetensors +[2026-04-14 20:22:18] [RANK 0] Optimizer state saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/optimizer.bin +[2026-04-14 20:22:18] [RANK 0] Scheduler state saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/scheduler.bin +[2026-04-14 20:22:18] [RANK 0] Sampler state for dataloader 0 saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/sampler.bin +[2026-04-14 20:22:18] [RANK 0] Random states saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000/random_states_0.pkl +[2026-04-14 20:22:18] Saved accelerator state to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2000 +[2026-04-14 20:22:18] Saved checkpoint to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002000/ +[2026-04-14 20:22:37] (step=0002001) Loss: 0.4844 (diff=0.4537, anat=0.0308), Steps/Sec: 0.03, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:22:56] (step=0002002) Loss: 0.4798 (diff=0.4478, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:23:16] (step=0002003) Loss: 0.4788 (diff=0.4484, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:23:35] (step=0002004) Loss: 0.4869 (diff=0.4556, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:23:55] (step=0002005) Loss: 0.4873 (diff=0.4558, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:24:15] (step=0002006) Loss: 0.4743 (diff=0.4436, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:24:34] (step=0002007) Loss: 0.4828 (diff=0.4500, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:24:54] (step=0002008) Loss: 0.4928 (diff=0.4595, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:25:13] (step=0002009) Loss: 0.4956 (diff=0.4640, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:25:33] (step=0002010) Loss: 0.4795 (diff=0.4488, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:25:53] (step=0002011) Loss: 0.4725 (diff=0.4423, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:26:13] (step=0002012) Loss: 0.4895 (diff=0.4549, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:26:32] (step=0002013) Loss: 0.4920 (diff=0.4591, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:26:52] (step=0002014) Loss: 0.4797 (diff=0.4503, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:27:11] (step=0002015) Loss: 0.4774 (diff=0.4462, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:27:31] (step=0002016) Loss: 0.4825 (diff=0.4504, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:27:51] (step=0002017) Loss: 0.4848 (diff=0.4512, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:28:10] (step=0002018) Loss: 0.4773 (diff=0.4457, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:28:30] (step=0002019) Loss: 0.4863 (diff=0.4538, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.39, LR: 0.0001 +[2026-04-14 20:28:50] (step=0002020) Loss: 0.4970 (diff=0.4665, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:29:09] (step=0002021) Loss: 0.4919 (diff=0.4577, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:29:29] (step=0002022) Loss: 0.4939 (diff=0.4605, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:29:49] (step=0002023) Loss: 0.4850 (diff=0.4525, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:30:09] (step=0002024) Loss: 0.4925 (diff=0.4598, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:30:28] (step=0002025) Loss: 0.4874 (diff=0.4560, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:30:48] (step=0002026) Loss: 0.4736 (diff=0.4407, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:31:07] (step=0002027) Loss: 0.4799 (diff=0.4480, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:31:27] (step=0002028) Loss: 0.4818 (diff=0.4523, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:31:47] (step=0002029) Loss: 0.4883 (diff=0.4542, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:32:06] (step=0002030) Loss: 0.4657 (diff=0.4369, anat=0.0288), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:32:26] (step=0002031) Loss: 0.4861 (diff=0.4531, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:32:46] (step=0002032) Loss: 0.4598 (diff=0.4317, anat=0.0281), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:33:05] (step=0002033) Loss: 0.4966 (diff=0.4641, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:33:25] (step=0002034) Loss: 0.4712 (diff=0.4408, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:33:45] (step=0002035) Loss: 0.4816 (diff=0.4488, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:34:04] (step=0002036) Loss: 0.4720 (diff=0.4438, anat=0.0282), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:34:24] (step=0002037) Loss: 0.4765 (diff=0.4469, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:34:44] (step=0002038) Loss: 0.4849 (diff=0.4513, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:35:03] (step=0002039) Loss: 0.4996 (diff=0.4654, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:35:23] (step=0002040) Loss: 0.4854 (diff=0.4530, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:35:43] (step=0002041) Loss: 0.4787 (diff=0.4481, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:36:02] (step=0002042) Loss: 0.4753 (diff=0.4431, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:36:22] (step=0002043) Loss: 0.4794 (diff=0.4487, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:36:42] (step=0002044) Loss: 0.4771 (diff=0.4459, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:37:01] (step=0002045) Loss: 0.4973 (diff=0.4624, anat=0.0349), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:37:21] (step=0002046) Loss: 0.4774 (diff=0.4454, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:37:41] (step=0002047) Loss: 0.4902 (diff=0.4580, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:38:00] (step=0002048) Loss: 0.4779 (diff=0.4455, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:38:20] (step=0002049) Loss: 0.4765 (diff=0.4449, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:38:40] (step=0002050) Loss: 0.4730 (diff=0.4450, anat=0.0280), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:38:59] (step=0002051) Loss: 0.4722 (diff=0.4423, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:39:19] (step=0002052) Loss: 0.4750 (diff=0.4436, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:39:39] (step=0002053) Loss: 0.4847 (diff=0.4524, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:39:58] (step=0002054) Loss: 0.4912 (diff=0.4588, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:40:18] (step=0002055) Loss: 0.4788 (diff=0.4480, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:40:38] (step=0002056) Loss: 0.4875 (diff=0.4548, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:40:57] (step=0002057) Loss: 0.4804 (diff=0.4480, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:41:17] (step=0002058) Loss: 0.4854 (diff=0.4522, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:41:37] (step=0002059) Loss: 0.4822 (diff=0.4502, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:41:56] (step=0002060) Loss: 0.4748 (diff=0.4463, anat=0.0285), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:42:16] (step=0002061) Loss: 0.4722 (diff=0.4419, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:42:36] (step=0002062) Loss: 0.4811 (diff=0.4490, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:42:55] (step=0002063) Loss: 0.4886 (diff=0.4590, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:43:15] (step=0002064) Loss: 0.4669 (diff=0.4370, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:43:35] (step=0002065) Loss: 0.4981 (diff=0.4659, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:43:54] (step=0002066) Loss: 0.4700 (diff=0.4400, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:44:14] (step=0002067) Loss: 0.4858 (diff=0.4529, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:44:33] (step=0002068) Loss: 0.4931 (diff=0.4594, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:44:53] (step=0002069) Loss: 0.4818 (diff=0.4504, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:45:13] (step=0002070) Loss: 0.4907 (diff=0.4572, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.40, LR: 0.0001 +[2026-04-14 20:45:33] (step=0002071) Loss: 0.4712 (diff=0.4408, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:45:52] (step=0002072) Loss: 0.4709 (diff=0.4397, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:46:12] (step=0002073) Loss: 0.4606 (diff=0.4319, anat=0.0287), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:46:32] (step=0002074) Loss: 0.4694 (diff=0.4398, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:46:51] (step=0002075) Loss: 0.4784 (diff=0.4479, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:47:11] (step=0002076) Loss: 0.4876 (diff=0.4553, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:47:30] (step=0002077) Loss: 0.4841 (diff=0.4517, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:47:50] (step=0002078) Loss: 0.4734 (diff=0.4423, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:48:10] (step=0002079) Loss: 0.4885 (diff=0.4566, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:48:29] (step=0002080) Loss: 0.4818 (diff=0.4507, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:48:49] (step=0002081) Loss: 0.4721 (diff=0.4442, anat=0.0279), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:49:09] (step=0002082) Loss: 0.4866 (diff=0.4541, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:49:28] (step=0002083) Loss: 0.4886 (diff=0.4559, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:49:48] (step=0002084) Loss: 0.4764 (diff=0.4453, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:50:08] (step=0002085) Loss: 0.4800 (diff=0.4466, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:50:27] (step=0002086) Loss: 0.4729 (diff=0.4422, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:50:47] (step=0002087) Loss: 0.4758 (diff=0.4441, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:51:07] (step=0002088) Loss: 0.4679 (diff=0.4370, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:51:26] (step=0002089) Loss: 0.4774 (diff=0.4463, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:51:46] (step=0002090) Loss: 0.4863 (diff=0.4539, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:52:06] (step=0002091) Loss: 0.4704 (diff=0.4401, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:52:25] (step=0002092) Loss: 0.4819 (diff=0.4500, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:52:45] (step=0002093) Loss: 0.4791 (diff=0.4476, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:53:05] (step=0002094) Loss: 0.4952 (diff=0.4615, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:53:24] (step=0002095) Loss: 0.4821 (diff=0.4510, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:53:44] (step=0002096) Loss: 0.4743 (diff=0.4450, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:54:04] (step=0002097) Loss: 0.4850 (diff=0.4545, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:54:23] (step=0002098) Loss: 0.4738 (diff=0.4434, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:54:43] (step=0002099) Loss: 0.4917 (diff=0.4590, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:55:03] (step=0002100) Loss: 0.4698 (diff=0.4398, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:55:22] (step=0002101) Loss: 0.4703 (diff=0.4400, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:55:42] (step=0002102) Loss: 0.4796 (diff=0.4488, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:56:01] (step=0002103) Loss: 0.4719 (diff=0.4420, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:56:21] (step=0002104) Loss: 0.4862 (diff=0.4550, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:56:41] (step=0002105) Loss: 0.4756 (diff=0.4429, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:57:00] (step=0002106) Loss: 0.4756 (diff=0.4439, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:57:20] (step=0002107) Loss: 0.4666 (diff=0.4347, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:57:40] (step=0002108) Loss: 0.4764 (diff=0.4447, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:57:59] (step=0002109) Loss: 0.4843 (diff=0.4537, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:58:19] (step=0002110) Loss: 0.4779 (diff=0.4469, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:58:39] (step=0002111) Loss: 0.4885 (diff=0.4561, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:58:58] (step=0002112) Loss: 0.4656 (diff=0.4372, anat=0.0284), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:59:18] (step=0002113) Loss: 0.4880 (diff=0.4577, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:59:38] (step=0002114) Loss: 0.4780 (diff=0.4469, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 20:59:57] (step=0002115) Loss: 0.4874 (diff=0.4554, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 21:00:17] (step=0002116) Loss: 0.4902 (diff=0.4603, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 21:00:37] (step=0002117) Loss: 0.4765 (diff=0.4467, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 21:00:56] (step=0002118) Loss: 0.4864 (diff=0.4542, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 21:01:16] (step=0002119) Loss: 0.4757 (diff=0.4452, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 21:01:36] (step=0002120) Loss: 0.4887 (diff=0.4545, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 21:01:55] (step=0002121) Loss: 0.4832 (diff=0.4516, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.41, LR: 0.0001 +[2026-04-14 21:02:15] (step=0002122) Loss: 0.4775 (diff=0.4450, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:02:35] (step=0002123) Loss: 0.4889 (diff=0.4570, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:02:54] (step=0002124) Loss: 0.4805 (diff=0.4485, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:03:14] (step=0002125) Loss: 0.4824 (diff=0.4508, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:03:34] (step=0002126) Loss: 0.4697 (diff=0.4418, anat=0.0279), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:03:53] (step=0002127) Loss: 0.4759 (diff=0.4476, anat=0.0284), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:04:13] (step=0002128) Loss: 0.4795 (diff=0.4490, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:04:33] (step=0002129) Loss: 0.4806 (diff=0.4495, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:04:52] (step=0002130) Loss: 0.4815 (diff=0.4485, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:05:12] (step=0002131) Loss: 0.4922 (diff=0.4597, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:05:32] (step=0002132) Loss: 0.4775 (diff=0.4469, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:05:51] (step=0002133) Loss: 0.4976 (diff=0.4620, anat=0.0355), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:06:11] (step=0002134) Loss: 0.4867 (diff=0.4560, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:06:31] (step=0002135) Loss: 0.4772 (diff=0.4478, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:06:50] (step=0002136) Loss: 0.4804 (diff=0.4487, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:07:10] (step=0002137) Loss: 0.4866 (diff=0.4552, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:07:30] (step=0002138) Loss: 0.4743 (diff=0.4437, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:07:49] (step=0002139) Loss: 0.4842 (diff=0.4516, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:08:09] (step=0002140) Loss: 0.4824 (diff=0.4518, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:08:29] (step=0002141) Loss: 0.4806 (diff=0.4483, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:08:48] (step=0002142) Loss: 0.4689 (diff=0.4369, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:09:08] (step=0002143) Loss: 0.4695 (diff=0.4382, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:09:28] (step=0002144) Loss: 0.4792 (diff=0.4460, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:09:47] (step=0002145) Loss: 0.4657 (diff=0.4347, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:10:07] (step=0002146) Loss: 0.4846 (diff=0.4542, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:10:27] (step=0002147) Loss: 0.4805 (diff=0.4491, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:10:46] (step=0002148) Loss: 0.4981 (diff=0.4641, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:11:06] (step=0002149) Loss: 0.4944 (diff=0.4614, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:11:25] (step=0002150) Loss: 0.4850 (diff=0.4533, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:11:45] (step=0002151) Loss: 0.4797 (diff=0.4492, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:12:05] (step=0002152) Loss: 0.4697 (diff=0.4410, anat=0.0287), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:12:25] (step=0002153) Loss: 0.4719 (diff=0.4427, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:12:44] (step=0002154) Loss: 0.4738 (diff=0.4439, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:13:04] (step=0002155) Loss: 0.4834 (diff=0.4512, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:13:24] (step=0002156) Loss: 0.4737 (diff=0.4437, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:13:43] (step=0002157) Loss: 0.4943 (diff=0.4604, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:14:03] (step=0002158) Loss: 0.4689 (diff=0.4394, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:14:22] (step=0002159) Loss: 0.4864 (diff=0.4531, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:14:42] (step=0002160) Loss: 0.4824 (diff=0.4519, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:15:02] (step=0002161) Loss: 0.4685 (diff=0.4384, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:15:21] (step=0002162) Loss: 0.4650 (diff=0.4343, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:15:41] (step=0002163) Loss: 0.4902 (diff=0.4578, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:16:01] (step=0002164) Loss: 0.4959 (diff=0.4625, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:16:20] (step=0002165) Loss: 0.4764 (diff=0.4450, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:16:40] (step=0002166) Loss: 0.4820 (diff=0.4497, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:17:00] (step=0002167) Loss: 0.4911 (diff=0.4557, anat=0.0355), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:17:19] (step=0002168) Loss: 0.4741 (diff=0.4425, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:17:39] (step=0002169) Loss: 0.4769 (diff=0.4464, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:17:59] (step=0002170) Loss: 0.4754 (diff=0.4442, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:18:18] (step=0002171) Loss: 0.4821 (diff=0.4513, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:18:38] (step=0002172) Loss: 0.4838 (diff=0.4526, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.42, LR: 0.0001 +[2026-04-14 21:18:58] (step=0002173) Loss: 0.4990 (diff=0.4671, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:19:17] (step=0002174) Loss: 0.4870 (diff=0.4550, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:19:37] (step=0002175) Loss: 0.4842 (diff=0.4517, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:19:57] (step=0002176) Loss: 0.4818 (diff=0.4501, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:20:16] (step=0002177) Loss: 0.4765 (diff=0.4461, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:20:36] (step=0002178) Loss: 0.4839 (diff=0.4518, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:20:56] (step=0002179) Loss: 0.4783 (diff=0.4464, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:21:15] (step=0002180) Loss: 0.4770 (diff=0.4437, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:21:35] (step=0002181) Loss: 0.4832 (diff=0.4533, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:21:55] (step=0002182) Loss: 0.4821 (diff=0.4505, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:22:14] (step=0002183) Loss: 0.4910 (diff=0.4589, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:22:34] (step=0002184) Loss: 0.4752 (diff=0.4443, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:22:54] (step=0002185) Loss: 0.4832 (diff=0.4522, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:23:13] (step=0002186) Loss: 0.4793 (diff=0.4477, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:23:33] (step=0002187) Loss: 0.4768 (diff=0.4462, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:23:53] (step=0002188) Loss: 0.4881 (diff=0.4572, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:24:12] (step=0002189) Loss: 0.4864 (diff=0.4552, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:24:32] (step=0002190) Loss: 0.4760 (diff=0.4455, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:24:52] (step=0002191) Loss: 0.4723 (diff=0.4433, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:25:11] (step=0002192) Loss: 0.4752 (diff=0.4447, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:25:31] (step=0002193) Loss: 0.4764 (diff=0.4431, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:25:51] (step=0002194) Loss: 0.4706 (diff=0.4403, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:26:10] (step=0002195) Loss: 0.4711 (diff=0.4378, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:26:30] (step=0002196) Loss: 0.4805 (diff=0.4519, anat=0.0285), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:26:50] (step=0002197) Loss: 0.4898 (diff=0.4572, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:27:09] (step=0002198) Loss: 0.4850 (diff=0.4511, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:27:29] (step=0002199) Loss: 0.4671 (diff=0.4381, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:27:49] (step=0002200) Loss: 0.4858 (diff=0.4537, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:28:08] (step=0002201) Loss: 0.4888 (diff=0.4568, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:28:28] (step=0002202) Loss: 0.4644 (diff=0.4355, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:28:48] (step=0002203) Loss: 0.4717 (diff=0.4413, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:29:07] (step=0002204) Loss: 0.4738 (diff=0.4420, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:29:27] (step=0002205) Loss: 0.4749 (diff=0.4437, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:29:46] (step=0002206) Loss: 0.4768 (diff=0.4460, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:30:06] (step=0002207) Loss: 0.4818 (diff=0.4500, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:30:26] (step=0002208) Loss: 0.4710 (diff=0.4404, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:30:45] (step=0002209) Loss: 0.4780 (diff=0.4463, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:31:05] (step=0002210) Loss: 0.4811 (diff=0.4491, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:31:25] (step=0002211) Loss: 0.4649 (diff=0.4375, anat=0.0274), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:31:45] (step=0002212) Loss: 0.4700 (diff=0.4390, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:32:04] (step=0002213) Loss: 0.4730 (diff=0.4413, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:32:24] (step=0002214) Loss: 0.4798 (diff=0.4489, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:32:43] (step=0002215) Loss: 0.4812 (diff=0.4514, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:33:03] (step=0002216) Loss: 0.4742 (diff=0.4441, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:33:23] (step=0002217) Loss: 0.4887 (diff=0.4555, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:33:42] (step=0002218) Loss: 0.4806 (diff=0.4506, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:34:02] (step=0002219) Loss: 0.4761 (diff=0.4450, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:34:22] (step=0002220) Loss: 0.4873 (diff=0.4543, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:34:41] (step=0002221) Loss: 0.4844 (diff=0.4544, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:35:01] (step=0002222) Loss: 0.4843 (diff=0.4537, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:35:21] (step=0002223) Loss: 0.4781 (diff=0.4477, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.43, LR: 0.0001 +[2026-04-14 21:35:40] (step=0002224) Loss: 0.4900 (diff=0.4567, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:36:00] (step=0002225) Loss: 0.5078 (diff=0.4741, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:36:20] (step=0002226) Loss: 0.4772 (diff=0.4468, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:36:39] (step=0002227) Loss: 0.4752 (diff=0.4428, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:36:59] (step=0002228) Loss: 0.4874 (diff=0.4568, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:37:19] (step=0002229) Loss: 0.4795 (diff=0.4498, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:37:38] (step=0002230) Loss: 0.4723 (diff=0.4416, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:37:58] (step=0002231) Loss: 0.4905 (diff=0.4597, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:38:18] (step=0002232) Loss: 0.4734 (diff=0.4423, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:38:37] (step=0002233) Loss: 0.4698 (diff=0.4402, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:38:57] (step=0002234) Loss: 0.4662 (diff=0.4355, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:39:17] (step=0002235) Loss: 0.4813 (diff=0.4496, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:39:36] (step=0002236) Loss: 0.4826 (diff=0.4500, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:39:56] (step=0002237) Loss: 0.4809 (diff=0.4503, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:40:16] (step=0002238) Loss: 0.4885 (diff=0.4564, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:40:35] (step=0002239) Loss: 0.4732 (diff=0.4438, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:40:55] (step=0002240) Loss: 0.4811 (diff=0.4494, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:41:15] (step=0002241) Loss: 0.4843 (diff=0.4529, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:41:34] (step=0002242) Loss: 0.4616 (diff=0.4326, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:41:54] (step=0002243) Loss: 0.4704 (diff=0.4401, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:42:14] (step=0002244) Loss: 0.4860 (diff=0.4539, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:42:34] (step=0002245) Loss: 0.4833 (diff=0.4502, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:42:53] (step=0002246) Loss: 0.4996 (diff=0.4655, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:43:13] (step=0002247) Loss: 0.4891 (diff=0.4562, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:43:32] (step=0002248) Loss: 0.4646 (diff=0.4333, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:43:52] (step=0002249) Loss: 0.4774 (diff=0.4451, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:44:12] (step=0002250) Loss: 0.4815 (diff=0.4488, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:44:31] (step=0002251) Loss: 0.4863 (diff=0.4528, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:44:51] (step=0002252) Loss: 0.4757 (diff=0.4457, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:45:11] (step=0002253) Loss: 0.4913 (diff=0.4557, anat=0.0356), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:45:30] (step=0002254) Loss: 0.4758 (diff=0.4464, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:45:50] (step=0002255) Loss: 0.4781 (diff=0.4483, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:46:10] (step=0002256) Loss: 0.4810 (diff=0.4471, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:46:30] (step=0002257) Loss: 0.4791 (diff=0.4482, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:46:49] (step=0002258) Loss: 0.4761 (diff=0.4414, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:47:09] (step=0002259) Loss: 0.4905 (diff=0.4591, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:47:29] (step=0002260) Loss: 0.4769 (diff=0.4438, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:47:48] (step=0002261) Loss: 0.4899 (diff=0.4589, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:48:08] (step=0002262) Loss: 0.4829 (diff=0.4507, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:48:28] (step=0002263) Loss: 0.4792 (diff=0.4476, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:48:47] (step=0002264) Loss: 0.4762 (diff=0.4448, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:49:07] (step=0002265) Loss: 0.4692 (diff=0.4391, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:49:27] (step=0002266) Loss: 0.4718 (diff=0.4403, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:49:46] (step=0002267) Loss: 0.4768 (diff=0.4469, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:50:06] (step=0002268) Loss: 0.4973 (diff=0.4646, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:50:26] (step=0002269) Loss: 0.4722 (diff=0.4389, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:50:45] (step=0002270) Loss: 0.4803 (diff=0.4487, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:51:05] (step=0002271) Loss: 0.4719 (diff=0.4392, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:51:25] (step=0002272) Loss: 0.4935 (diff=0.4594, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:51:44] (step=0002273) Loss: 0.4875 (diff=0.4556, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:52:04] (step=0002274) Loss: 0.4863 (diff=0.4539, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.44, LR: 0.0001 +[2026-04-14 21:52:24] (step=0002275) Loss: 0.4662 (diff=0.4376, anat=0.0286), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:52:43] (step=0002276) Loss: 0.4786 (diff=0.4464, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:53:03] (step=0002277) Loss: 0.4704 (diff=0.4400, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:53:23] (step=0002278) Loss: 0.4805 (diff=0.4509, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:53:42] (step=0002279) Loss: 0.4861 (diff=0.4556, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:54:02] (step=0002280) Loss: 0.4894 (diff=0.4579, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:54:22] (step=0002281) Loss: 0.4936 (diff=0.4618, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:54:42] (step=0002282) Loss: 0.4758 (diff=0.4467, anat=0.0291), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:55:01] (step=0002283) Loss: 0.4894 (diff=0.4579, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:55:21] (step=0002284) Loss: 0.4759 (diff=0.4455, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:55:41] (step=0002285) Loss: 0.4792 (diff=0.4458, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:56:00] (step=0002286) Loss: 0.5036 (diff=0.4688, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:56:20] (step=0002287) Loss: 0.4740 (diff=0.4439, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:56:40] (step=0002288) Loss: 0.4756 (diff=0.4433, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:56:59] (step=0002289) Loss: 0.4867 (diff=0.4561, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:57:19] (step=0002290) Loss: 0.4889 (diff=0.4573, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:57:39] (step=0002291) Loss: 0.4863 (diff=0.4549, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:57:58] (step=0002292) Loss: 0.4605 (diff=0.4321, anat=0.0284), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:58:18] (step=0002293) Loss: 0.4813 (diff=0.4505, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:58:38] (step=0002294) Loss: 0.4819 (diff=0.4502, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:58:57] (step=0002295) Loss: 0.4973 (diff=0.4649, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:59:17] (step=0002296) Loss: 0.4808 (diff=0.4494, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:59:37] (step=0002297) Loss: 0.4759 (diff=0.4469, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 21:59:56] (step=0002298) Loss: 0.4879 (diff=0.4526, anat=0.0354), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:00:16] (step=0002299) Loss: 0.4882 (diff=0.4551, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:00:36] (step=0002300) Loss: 0.4835 (diff=0.4526, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:00:55] (step=0002301) Loss: 0.4878 (diff=0.4538, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:01:15] (step=0002302) Loss: 0.4848 (diff=0.4516, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:01:35] (step=0002303) Loss: 0.4884 (diff=0.4576, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:01:54] (step=0002304) Loss: 0.4726 (diff=0.4410, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:02:14] (step=0002305) Loss: 0.4857 (diff=0.4532, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:02:34] (step=0002306) Loss: 0.4802 (diff=0.4478, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:02:53] (step=0002307) Loss: 0.4834 (diff=0.4522, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:03:13] (step=0002308) Loss: 0.4724 (diff=0.4426, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:03:33] (step=0002309) Loss: 0.4689 (diff=0.4392, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:03:52] (step=0002310) Loss: 0.4791 (diff=0.4488, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:04:12] (step=0002311) Loss: 0.4831 (diff=0.4511, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:04:32] (step=0002312) Loss: 0.4752 (diff=0.4431, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:04:51] (step=0002313) Loss: 0.4732 (diff=0.4432, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:05:11] (step=0002314) Loss: 0.4705 (diff=0.4409, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:05:31] (step=0002315) Loss: 0.4816 (diff=0.4525, anat=0.0291), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:05:51] (step=0002316) Loss: 0.4882 (diff=0.4561, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:06:10] (step=0002317) Loss: 0.4776 (diff=0.4455, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:06:30] (step=0002318) Loss: 0.4818 (diff=0.4499, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:06:50] (step=0002319) Loss: 0.4917 (diff=0.4591, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:07:09] (step=0002320) Loss: 0.4674 (diff=0.4355, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:07:29] (step=0002321) Loss: 0.4750 (diff=0.4433, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:07:49] (step=0002322) Loss: 0.4688 (diff=0.4394, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:08:08] (step=0002323) Loss: 0.4821 (diff=0.4501, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:08:28] (step=0002324) Loss: 0.4903 (diff=0.4580, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:08:48] (step=0002325) Loss: 0.4797 (diff=0.4475, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.45, LR: 0.0001 +[2026-04-14 22:09:07] (step=0002326) Loss: 0.4767 (diff=0.4482, anat=0.0285), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:09:27] (step=0002327) Loss: 0.4839 (diff=0.4530, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:09:47] (step=0002328) Loss: 0.4799 (diff=0.4496, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:10:06] (step=0002329) Loss: 0.4776 (diff=0.4475, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:10:26] (step=0002330) Loss: 0.4730 (diff=0.4401, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:10:46] (step=0002331) Loss: 0.4997 (diff=0.4649, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:11:05] (step=0002332) Loss: 0.4813 (diff=0.4523, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:11:25] (step=0002333) Loss: 0.4988 (diff=0.4649, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:11:45] (step=0002334) Loss: 0.4795 (diff=0.4484, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:12:04] (step=0002335) Loss: 0.4966 (diff=0.4621, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:12:24] (step=0002336) Loss: 0.4796 (diff=0.4466, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:12:44] (step=0002337) Loss: 0.4825 (diff=0.4518, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:13:04] (step=0002338) Loss: 0.4711 (diff=0.4409, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:13:23] (step=0002339) Loss: 0.4789 (diff=0.4491, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:13:43] (step=0002340) Loss: 0.4678 (diff=0.4394, anat=0.0284), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:14:03] (step=0002341) Loss: 0.4998 (diff=0.4661, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:14:22] (step=0002342) Loss: 0.4792 (diff=0.4459, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:14:42] (step=0002343) Loss: 0.4831 (diff=0.4519, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:15:02] (step=0002344) Loss: 0.4828 (diff=0.4504, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:15:21] (step=0002345) Loss: 0.4951 (diff=0.4604, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:15:41] (step=0002346) Loss: 0.4780 (diff=0.4467, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:16:01] (step=0002347) Loss: 0.4925 (diff=0.4605, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:16:20] (step=0002348) Loss: 0.4856 (diff=0.4540, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:16:40] (step=0002349) Loss: 0.4734 (diff=0.4423, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:17:00] (step=0002350) Loss: 0.4751 (diff=0.4450, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:17:20] (step=0002351) Loss: 0.4738 (diff=0.4451, anat=0.0287), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:17:39] (step=0002352) Loss: 0.4743 (diff=0.4418, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:17:59] (step=0002353) Loss: 0.4786 (diff=0.4484, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:18:19] (step=0002354) Loss: 0.4869 (diff=0.4551, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:18:38] (step=0002355) Loss: 0.4762 (diff=0.4476, anat=0.0287), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:18:58] (step=0002356) Loss: 0.4922 (diff=0.4602, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:19:18] (step=0002357) Loss: 0.4840 (diff=0.4513, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:19:37] (step=0002358) Loss: 0.4802 (diff=0.4499, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:19:57] (step=0002359) Loss: 0.4809 (diff=0.4487, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:20:17] (step=0002360) Loss: 0.4867 (diff=0.4550, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:20:36] (step=0002361) Loss: 0.4888 (diff=0.4576, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:20:56] (step=0002362) Loss: 0.4848 (diff=0.4525, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:21:16] (step=0002363) Loss: 0.4665 (diff=0.4377, anat=0.0288), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:21:35] (step=0002364) Loss: 0.4808 (diff=0.4487, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:21:55] (step=0002365) Loss: 0.4810 (diff=0.4480, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:22:15] (step=0002366) Loss: 0.4800 (diff=0.4487, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:22:34] (step=0002367) Loss: 0.4649 (diff=0.4343, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:22:54] (step=0002368) Loss: 0.4679 (diff=0.4401, anat=0.0278), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:23:14] (step=0002369) Loss: 0.5035 (diff=0.4699, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:23:33] (step=0002370) Loss: 0.4771 (diff=0.4450, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:23:53] (step=0002371) Loss: 0.4874 (diff=0.4539, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:24:13] (step=0002372) Loss: 0.4828 (diff=0.4513, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:24:33] (step=0002373) Loss: 0.4840 (diff=0.4514, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:24:52] (step=0002374) Loss: 0.4786 (diff=0.4469, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:25:12] (step=0002375) Loss: 0.4930 (diff=0.4604, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:25:32] (step=0002376) Loss: 0.4692 (diff=0.4421, anat=0.0270), Steps/Sec: 0.05, Epoch: 0.46, LR: 0.0001 +[2026-04-14 22:25:51] (step=0002377) Loss: 0.4816 (diff=0.4519, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:26:11] (step=0002378) Loss: 0.4894 (diff=0.4589, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:26:31] (step=0002379) Loss: 0.4717 (diff=0.4425, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:26:50] (step=0002380) Loss: 0.4786 (diff=0.4473, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:27:10] (step=0002381) Loss: 0.4794 (diff=0.4472, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:27:30] (step=0002382) Loss: 0.4888 (diff=0.4576, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:27:49] (step=0002383) Loss: 0.4922 (diff=0.4553, anat=0.0370), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:28:09] (step=0002384) Loss: 0.4763 (diff=0.4443, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:28:29] (step=0002385) Loss: 0.4808 (diff=0.4491, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:28:48] (step=0002386) Loss: 0.4830 (diff=0.4514, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:29:08] (step=0002387) Loss: 0.4892 (diff=0.4574, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:29:28] (step=0002388) Loss: 0.4779 (diff=0.4476, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:29:47] (step=0002389) Loss: 0.4884 (diff=0.4575, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:30:07] (step=0002390) Loss: 0.4867 (diff=0.4545, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:30:27] (step=0002391) Loss: 0.4826 (diff=0.4483, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:30:47] (step=0002392) Loss: 0.4762 (diff=0.4473, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:31:06] (step=0002393) Loss: 0.4709 (diff=0.4415, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:31:26] (step=0002394) Loss: 0.4897 (diff=0.4570, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:31:46] (step=0002395) Loss: 0.4776 (diff=0.4466, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:32:06] (step=0002396) Loss: 0.4729 (diff=0.4422, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:32:25] (step=0002397) Loss: 0.4891 (diff=0.4566, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:32:45] (step=0002398) Loss: 0.4874 (diff=0.4545, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:33:05] (step=0002399) Loss: 0.4805 (diff=0.4500, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:33:25] (step=0002400) Loss: 0.4701 (diff=0.4408, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:33:44] (step=0002401) Loss: 0.4907 (diff=0.4588, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:34:04] (step=0002402) Loss: 0.4640 (diff=0.4364, anat=0.0276), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:34:24] (step=0002403) Loss: 0.4882 (diff=0.4567, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:34:43] (step=0002404) Loss: 0.4819 (diff=0.4524, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:35:03] (step=0002405) Loss: 0.4866 (diff=0.4528, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:35:23] (step=0002406) Loss: 0.4785 (diff=0.4507, anat=0.0278), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:35:42] (step=0002407) Loss: 0.4849 (diff=0.4521, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:36:02] (step=0002408) Loss: 0.4918 (diff=0.4568, anat=0.0350), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:36:22] (step=0002409) Loss: 0.4846 (diff=0.4529, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:36:41] (step=0002410) Loss: 0.4719 (diff=0.4416, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:37:01] (step=0002411) Loss: 0.4843 (diff=0.4520, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:37:21] (step=0002412) Loss: 0.4874 (diff=0.4559, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:37:40] (step=0002413) Loss: 0.4811 (diff=0.4488, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:38:00] (step=0002414) Loss: 0.4901 (diff=0.4578, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:38:20] (step=0002415) Loss: 0.4944 (diff=0.4598, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:38:39] (step=0002416) Loss: 0.4740 (diff=0.4446, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:38:59] (step=0002417) Loss: 0.4647 (diff=0.4357, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:39:19] (step=0002418) Loss: 0.4895 (diff=0.4582, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:39:39] (step=0002419) Loss: 0.4742 (diff=0.4440, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:39:58] (step=0002420) Loss: 0.4906 (diff=0.4597, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:40:18] (step=0002421) Loss: 0.4822 (diff=0.4514, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:40:38] (step=0002422) Loss: 0.4925 (diff=0.4573, anat=0.0352), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:40:57] (step=0002423) Loss: 0.4924 (diff=0.4601, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:41:17] (step=0002424) Loss: 0.4813 (diff=0.4503, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:41:37] (step=0002425) Loss: 0.4929 (diff=0.4608, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:41:56] (step=0002426) Loss: 0.4850 (diff=0.4535, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:42:16] (step=0002427) Loss: 0.4731 (diff=0.4420, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:42:35] (step=0002428) Loss: 0.4972 (diff=0.4632, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.47, LR: 0.0001 +[2026-04-14 22:42:55] (step=0002429) Loss: 0.4709 (diff=0.4419, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:43:15] (step=0002430) Loss: 0.4694 (diff=0.4388, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:43:35] (step=0002431) Loss: 0.4752 (diff=0.4456, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:43:54] (step=0002432) Loss: 0.4810 (diff=0.4514, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:44:14] (step=0002433) Loss: 0.4857 (diff=0.4524, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:44:34] (step=0002434) Loss: 0.4656 (diff=0.4366, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:44:53] (step=0002435) Loss: 0.4845 (diff=0.4534, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:45:13] (step=0002436) Loss: 0.4952 (diff=0.4619, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:45:33] (step=0002437) Loss: 0.4771 (diff=0.4467, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:45:52] (step=0002438) Loss: 0.4819 (diff=0.4520, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:46:12] (step=0002439) Loss: 0.4821 (diff=0.4510, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:46:32] (step=0002440) Loss: 0.4717 (diff=0.4427, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:46:51] (step=0002441) Loss: 0.4845 (diff=0.4518, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:47:11] (step=0002442) Loss: 0.4929 (diff=0.4631, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:47:31] (step=0002443) Loss: 0.4908 (diff=0.4562, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:47:50] (step=0002444) Loss: 0.4839 (diff=0.4523, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:48:10] (step=0002445) Loss: 0.4953 (diff=0.4614, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:48:30] (step=0002446) Loss: 0.4839 (diff=0.4509, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:48:50] (step=0002447) Loss: 0.4773 (diff=0.4474, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:49:09] (step=0002448) Loss: 0.4877 (diff=0.4566, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:49:29] (step=0002449) Loss: 0.4900 (diff=0.4578, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:49:49] (step=0002450) Loss: 0.4819 (diff=0.4490, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:50:09] (step=0002451) Loss: 0.4794 (diff=0.4458, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:50:28] (step=0002452) Loss: 0.4808 (diff=0.4487, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:50:48] (step=0002453) Loss: 0.4815 (diff=0.4495, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:51:08] (step=0002454) Loss: 0.4817 (diff=0.4510, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:51:28] (step=0002455) Loss: 0.4672 (diff=0.4376, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:51:47] (step=0002456) Loss: 0.4815 (diff=0.4484, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:52:07] (step=0002457) Loss: 0.4769 (diff=0.4470, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:52:27] (step=0002458) Loss: 0.4796 (diff=0.4496, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:52:46] (step=0002459) Loss: 0.4665 (diff=0.4358, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:53:06] (step=0002460) Loss: 0.4807 (diff=0.4489, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:53:26] (step=0002461) Loss: 0.4762 (diff=0.4471, anat=0.0291), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:53:46] (step=0002462) Loss: 0.4822 (diff=0.4505, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:54:05] (step=0002463) Loss: 0.4704 (diff=0.4408, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:54:25] (step=0002464) Loss: 0.4937 (diff=0.4590, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:54:45] (step=0002465) Loss: 0.4914 (diff=0.4595, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:55:04] (step=0002466) Loss: 0.4864 (diff=0.4525, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:55:24] (step=0002467) Loss: 0.4801 (diff=0.4498, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:55:44] (step=0002468) Loss: 0.4783 (diff=0.4459, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:56:04] (step=0002469) Loss: 0.4769 (diff=0.4480, anat=0.0288), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:56:23] (step=0002470) Loss: 0.4839 (diff=0.4536, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:56:43] (step=0002471) Loss: 0.4975 (diff=0.4662, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:57:03] (step=0002472) Loss: 0.4830 (diff=0.4518, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:57:23] (step=0002473) Loss: 0.4891 (diff=0.4556, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:57:42] (step=0002474) Loss: 0.4626 (diff=0.4327, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:58:02] (step=0002475) Loss: 0.4825 (diff=0.4526, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:58:22] (step=0002476) Loss: 0.4901 (diff=0.4575, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:58:41] (step=0002477) Loss: 0.4910 (diff=0.4583, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:59:01] (step=0002478) Loss: 0.4582 (diff=0.4309, anat=0.0273), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:59:21] (step=0002479) Loss: 0.4821 (diff=0.4529, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.48, LR: 0.0001 +[2026-04-14 22:59:41] (step=0002480) Loss: 0.4872 (diff=0.4554, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:00:01] (step=0002481) Loss: 0.4816 (diff=0.4496, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:00:20] (step=0002482) Loss: 0.4804 (diff=0.4488, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:00:40] (step=0002483) Loss: 0.4744 (diff=0.4442, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:01:00] (step=0002484) Loss: 0.4884 (diff=0.4553, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:01:19] (step=0002485) Loss: 0.4987 (diff=0.4646, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:01:39] (step=0002486) Loss: 0.4709 (diff=0.4432, anat=0.0277), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:01:59] (step=0002487) Loss: 0.4925 (diff=0.4615, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:02:18] (step=0002488) Loss: 0.4940 (diff=0.4594, anat=0.0346), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:02:38] (step=0002489) Loss: 0.4922 (diff=0.4601, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:02:58] (step=0002490) Loss: 0.4844 (diff=0.4532, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:03:17] (step=0002491) Loss: 0.4674 (diff=0.4385, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:03:37] (step=0002492) Loss: 0.4805 (diff=0.4496, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:03:57] (step=0002493) Loss: 0.4724 (diff=0.4429, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:04:17] (step=0002494) Loss: 0.4900 (diff=0.4575, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:04:36] (step=0002495) Loss: 0.4848 (diff=0.4543, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:04:56] (step=0002496) Loss: 0.4851 (diff=0.4543, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:05:16] (step=0002497) Loss: 0.4951 (diff=0.4625, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:05:35] (step=0002498) Loss: 0.4761 (diff=0.4470, anat=0.0291), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:05:55] (step=0002499) Loss: 0.4883 (diff=0.4548, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:06:15] (step=0002500) Loss: 0.4895 (diff=0.4569, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:06:15] [RANK 0] Saving current state to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500 +[2026-04-14 23:06:27] [RANK 0] Model weights saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/model.safetensors +[2026-04-14 23:06:27] [RANK 0] Optimizer state saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/optimizer.bin +[2026-04-14 23:06:27] [RANK 0] Scheduler state saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/scheduler.bin +[2026-04-14 23:06:27] [RANK 0] Sampler state for dataloader 0 saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/sampler.bin +[2026-04-14 23:06:27] [RANK 0] Random states saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500/random_states_0.pkl +[2026-04-14 23:06:27] Saved accelerator state to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-2500 +[2026-04-14 23:06:27] Saved checkpoint to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0002500/ +[2026-04-14 23:06:46] (step=0002501) Loss: 0.4773 (diff=0.4459, anat=0.0314), Steps/Sec: 0.03, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:07:05] (step=0002502) Loss: 0.4965 (diff=0.4623, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:07:25] (step=0002503) Loss: 0.4755 (diff=0.4459, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:07:45] (step=0002504) Loss: 0.4645 (diff=0.4352, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:08:05] (step=0002505) Loss: 0.4834 (diff=0.4531, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:08:24] (step=0002506) Loss: 0.4763 (diff=0.4448, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:08:44] (step=0002507) Loss: 0.4739 (diff=0.4421, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:09:04] (step=0002508) Loss: 0.4939 (diff=0.4614, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:09:24] (step=0002509) Loss: 0.4749 (diff=0.4429, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:09:44] (step=0002510) Loss: 0.4804 (diff=0.4509, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:10:03] (step=0002511) Loss: 0.4959 (diff=0.4646, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:10:23] (step=0002512) Loss: 0.4785 (diff=0.4476, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:10:43] (step=0002513) Loss: 0.4874 (diff=0.4535, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:11:03] (step=0002514) Loss: 0.4690 (diff=0.4396, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:11:23] (step=0002515) Loss: 0.4757 (diff=0.4452, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:11:42] (step=0002516) Loss: 0.4906 (diff=0.4578, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:12:02] (step=0002517) Loss: 0.4786 (diff=0.4470, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:12:22] (step=0002518) Loss: 0.4819 (diff=0.4506, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:12:41] (step=0002519) Loss: 0.4900 (diff=0.4585, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:13:01] (step=0002520) Loss: 0.4932 (diff=0.4622, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:13:21] (step=0002521) Loss: 0.4857 (diff=0.4549, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:13:41] (step=0002522) Loss: 0.4753 (diff=0.4438, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:14:00] (step=0002523) Loss: 0.4764 (diff=0.4457, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:14:20] (step=0002524) Loss: 0.5014 (diff=0.4704, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:14:40] (step=0002525) Loss: 0.4757 (diff=0.4456, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:14:59] (step=0002526) Loss: 0.4840 (diff=0.4523, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:15:19] (step=0002527) Loss: 0.4794 (diff=0.4489, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:15:39] (step=0002528) Loss: 0.4871 (diff=0.4559, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:15:59] (step=0002529) Loss: 0.4660 (diff=0.4367, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:16:18] (step=0002530) Loss: 0.4756 (diff=0.4445, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.49, LR: 0.0001 +[2026-04-14 23:16:38] (step=0002531) Loss: 0.4823 (diff=0.4508, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:16:58] (step=0002532) Loss: 0.4671 (diff=0.4363, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:17:17] (step=0002533) Loss: 0.4637 (diff=0.4354, anat=0.0283), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:17:37] (step=0002534) Loss: 0.4938 (diff=0.4602, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:17:57] (step=0002535) Loss: 0.4861 (diff=0.4536, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:18:17] (step=0002536) Loss: 0.4788 (diff=0.4478, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:18:36] (step=0002537) Loss: 0.4903 (diff=0.4588, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:18:56] (step=0002538) Loss: 0.4820 (diff=0.4523, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:19:16] (step=0002539) Loss: 0.4826 (diff=0.4521, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:19:35] (step=0002540) Loss: 0.4873 (diff=0.4555, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:19:55] (step=0002541) Loss: 0.4701 (diff=0.4406, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:20:15] (step=0002542) Loss: 0.4772 (diff=0.4477, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:20:35] (step=0002543) Loss: 0.4846 (diff=0.4533, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:20:54] (step=0002544) Loss: 0.4739 (diff=0.4424, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:21:14] (step=0002545) Loss: 0.4888 (diff=0.4575, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:21:34] (step=0002546) Loss: 0.4848 (diff=0.4504, anat=0.0344), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:21:54] (step=0002547) Loss: 0.4822 (diff=0.4513, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:22:14] (step=0002548) Loss: 0.4753 (diff=0.4421, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:22:33] (step=0002549) Loss: 0.4849 (diff=0.4533, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:22:53] (step=0002550) Loss: 0.4939 (diff=0.4591, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:23:13] (step=0002551) Loss: 0.4873 (diff=0.4546, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:23:33] (step=0002552) Loss: 0.4802 (diff=0.4496, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:23:52] (step=0002553) Loss: 0.4779 (diff=0.4467, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:24:12] (step=0002554) Loss: 0.4759 (diff=0.4430, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:24:32] (step=0002555) Loss: 0.4833 (diff=0.4529, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:24:51] (step=0002556) Loss: 0.4877 (diff=0.4554, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:25:11] (step=0002557) Loss: 0.4835 (diff=0.4523, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:25:31] (step=0002558) Loss: 0.4759 (diff=0.4453, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:25:51] (step=0002559) Loss: 0.4761 (diff=0.4446, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:26:10] (step=0002560) Loss: 0.4818 (diff=0.4483, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:26:30] (step=0002561) Loss: 0.4755 (diff=0.4453, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:26:50] (step=0002562) Loss: 0.4935 (diff=0.4603, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:27:10] (step=0002563) Loss: 0.4764 (diff=0.4441, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:27:29] (step=0002564) Loss: 0.4956 (diff=0.4629, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:27:49] (step=0002565) Loss: 0.4870 (diff=0.4547, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:28:09] (step=0002566) Loss: 0.4782 (diff=0.4454, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:28:28] (step=0002567) Loss: 0.4656 (diff=0.4359, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:28:48] (step=0002568) Loss: 0.4717 (diff=0.4411, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:29:08] (step=0002569) Loss: 0.4828 (diff=0.4522, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:29:28] (step=0002570) Loss: 0.4771 (diff=0.4463, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:29:47] (step=0002571) Loss: 0.4737 (diff=0.4426, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:30:07] (step=0002572) Loss: 0.4925 (diff=0.4583, anat=0.0342), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:30:27] (step=0002573) Loss: 0.4843 (diff=0.4535, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:30:47] (step=0002574) Loss: 0.4837 (diff=0.4516, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:31:06] (step=0002575) Loss: 0.4689 (diff=0.4391, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:31:26] (step=0002576) Loss: 0.4779 (diff=0.4482, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:31:46] (step=0002577) Loss: 0.4908 (diff=0.4590, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:32:06] (step=0002578) Loss: 0.4843 (diff=0.4521, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:32:25] (step=0002579) Loss: 0.4734 (diff=0.4418, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:32:45] (step=0002580) Loss: 0.4813 (diff=0.4524, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:33:05] (step=0002581) Loss: 0.4773 (diff=0.4459, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.50, LR: 0.0001 +[2026-04-14 23:33:25] (step=0002582) Loss: 0.4764 (diff=0.4469, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:33:44] (step=0002583) Loss: 0.4741 (diff=0.4430, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:34:04] (step=0002584) Loss: 0.4805 (diff=0.4506, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:34:24] (step=0002585) Loss: 0.4792 (diff=0.4484, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:34:44] (step=0002586) Loss: 0.4773 (diff=0.4469, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:35:03] (step=0002587) Loss: 0.4835 (diff=0.4525, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:35:23] (step=0002588) Loss: 0.4856 (diff=0.4538, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:35:43] (step=0002589) Loss: 0.4679 (diff=0.4389, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:36:02] (step=0002590) Loss: 0.4923 (diff=0.4591, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:36:22] (step=0002591) Loss: 0.4840 (diff=0.4519, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:36:42] (step=0002592) Loss: 0.4739 (diff=0.4426, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:37:01] (step=0002593) Loss: 0.4878 (diff=0.4556, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:37:21] (step=0002594) Loss: 0.4864 (diff=0.4541, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:37:41] (step=0002595) Loss: 0.4790 (diff=0.4495, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:38:00] (step=0002596) Loss: 0.4825 (diff=0.4511, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:38:20] (step=0002597) Loss: 0.4787 (diff=0.4484, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:38:40] (step=0002598) Loss: 0.4737 (diff=0.4412, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:38:59] (step=0002599) Loss: 0.4733 (diff=0.4421, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:39:19] (step=0002600) Loss: 0.4704 (diff=0.4423, anat=0.0281), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:39:39] (step=0002601) Loss: 0.4742 (diff=0.4457, anat=0.0285), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:39:58] (step=0002602) Loss: 0.4881 (diff=0.4573, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:40:18] (step=0002603) Loss: 0.4801 (diff=0.4485, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:40:38] (step=0002604) Loss: 0.4798 (diff=0.4491, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:40:57] (step=0002605) Loss: 0.4887 (diff=0.4574, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:41:17] (step=0002606) Loss: 0.4883 (diff=0.4574, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:41:37] (step=0002607) Loss: 0.4761 (diff=0.4437, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:41:57] (step=0002608) Loss: 0.4772 (diff=0.4452, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:42:16] (step=0002609) Loss: 0.4889 (diff=0.4558, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:42:36] (step=0002610) Loss: 0.4744 (diff=0.4429, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:42:56] (step=0002611) Loss: 0.4855 (diff=0.4535, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:43:15] (step=0002612) Loss: 0.4893 (diff=0.4570, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:43:35] (step=0002613) Loss: 0.4859 (diff=0.4541, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:43:55] (step=0002614) Loss: 0.4760 (diff=0.4453, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:44:14] (step=0002615) Loss: 0.4970 (diff=0.4636, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:44:34] (step=0002616) Loss: 0.4775 (diff=0.4481, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:44:54] (step=0002617) Loss: 0.4749 (diff=0.4450, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:45:13] (step=0002618) Loss: 0.4898 (diff=0.4577, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:45:33] (step=0002619) Loss: 0.4962 (diff=0.4633, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:45:53] (step=0002620) Loss: 0.4879 (diff=0.4564, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:46:12] (step=0002621) Loss: 0.4934 (diff=0.4595, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:46:32] (step=0002622) Loss: 0.4864 (diff=0.4528, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:46:52] (step=0002623) Loss: 0.4795 (diff=0.4488, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:47:11] (step=0002624) Loss: 0.4826 (diff=0.4507, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:47:31] (step=0002625) Loss: 0.4755 (diff=0.4461, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:47:51] (step=0002626) Loss: 0.4761 (diff=0.4457, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:48:10] (step=0002627) Loss: 0.4731 (diff=0.4450, anat=0.0281), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:48:30] (step=0002628) Loss: 0.4697 (diff=0.4365, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:48:50] (step=0002629) Loss: 0.4939 (diff=0.4621, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:49:10] (step=0002630) Loss: 0.4775 (diff=0.4480, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:49:29] (step=0002631) Loss: 0.4821 (diff=0.4516, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:49:49] (step=0002632) Loss: 0.4794 (diff=0.4491, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.51, LR: 0.0001 +[2026-04-14 23:50:09] (step=0002633) Loss: 0.4935 (diff=0.4619, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:50:28] (step=0002634) Loss: 0.4769 (diff=0.4463, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:50:48] (step=0002635) Loss: 0.4881 (diff=0.4539, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:51:08] (step=0002636) Loss: 0.4777 (diff=0.4488, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:51:27] (step=0002637) Loss: 0.4786 (diff=0.4469, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:51:47] (step=0002638) Loss: 0.4805 (diff=0.4489, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:52:06] (step=0002639) Loss: 0.4761 (diff=0.4448, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:52:26] (step=0002640) Loss: 0.4779 (diff=0.4461, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:52:46] (step=0002641) Loss: 0.4856 (diff=0.4537, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:53:06] (step=0002642) Loss: 0.4873 (diff=0.4543, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:53:25] (step=0002643) Loss: 0.4752 (diff=0.4428, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:53:45] (step=0002644) Loss: 0.4892 (diff=0.4560, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:54:05] (step=0002645) Loss: 0.4763 (diff=0.4436, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:54:24] (step=0002646) Loss: 0.4826 (diff=0.4526, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:54:44] (step=0002647) Loss: 0.4874 (diff=0.4537, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:55:04] (step=0002648) Loss: 0.4656 (diff=0.4372, anat=0.0283), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:55:23] (step=0002649) Loss: 0.4849 (diff=0.4558, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:55:43] (step=0002650) Loss: 0.4882 (diff=0.4542, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:56:03] (step=0002651) Loss: 0.4740 (diff=0.4424, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:56:22] (step=0002652) Loss: 0.4802 (diff=0.4490, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:56:42] (step=0002653) Loss: 0.4933 (diff=0.4607, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:57:02] (step=0002654) Loss: 0.4746 (diff=0.4417, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:57:21] (step=0002655) Loss: 0.4769 (diff=0.4455, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:57:41] (step=0002656) Loss: 0.4832 (diff=0.4504, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:58:01] (step=0002657) Loss: 0.4824 (diff=0.4502, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:58:20] (step=0002658) Loss: 0.4716 (diff=0.4373, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:58:40] (step=0002659) Loss: 0.4843 (diff=0.4523, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:59:00] (step=0002660) Loss: 0.4896 (diff=0.4572, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:59:19] (step=0002661) Loss: 0.4849 (diff=0.4553, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:59:39] (step=0002662) Loss: 0.4731 (diff=0.4418, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-14 23:59:59] (step=0002663) Loss: 0.4863 (diff=0.4544, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:00:18] (step=0002664) Loss: 0.4859 (diff=0.4532, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:00:38] (step=0002665) Loss: 0.4751 (diff=0.4433, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:00:58] (step=0002666) Loss: 0.4950 (diff=0.4625, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:01:17] (step=0002667) Loss: 0.4773 (diff=0.4481, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:01:37] (step=0002668) Loss: 0.4912 (diff=0.4602, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:01:57] (step=0002669) Loss: 0.4773 (diff=0.4470, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:02:16] (step=0002670) Loss: 0.4769 (diff=0.4453, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:02:36] (step=0002671) Loss: 0.4743 (diff=0.4442, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:02:56] (step=0002672) Loss: 0.4764 (diff=0.4442, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:03:15] (step=0002673) Loss: 0.4775 (diff=0.4472, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:03:35] (step=0002674) Loss: 0.4728 (diff=0.4420, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:03:55] (step=0002675) Loss: 0.4901 (diff=0.4556, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:04:15] (step=0002676) Loss: 0.4870 (diff=0.4555, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:04:34] (step=0002677) Loss: 0.4700 (diff=0.4395, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:04:54] (step=0002678) Loss: 0.4827 (diff=0.4498, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:05:14] (step=0002679) Loss: 0.4941 (diff=0.4612, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:05:33] (step=0002680) Loss: 0.4748 (diff=0.4433, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:05:53] (step=0002681) Loss: 0.4935 (diff=0.4616, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:06:13] (step=0002682) Loss: 0.4759 (diff=0.4471, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:06:32] (step=0002683) Loss: 0.4736 (diff=0.4451, anat=0.0285), Steps/Sec: 0.05, Epoch: 0.52, LR: 0.0001 +[2026-04-15 00:06:52] (step=0002684) Loss: 0.4887 (diff=0.4570, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:07:12] (step=0002685) Loss: 0.4822 (diff=0.4503, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:07:31] (step=0002686) Loss: 0.4764 (diff=0.4457, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:07:51] (step=0002687) Loss: 0.4939 (diff=0.4596, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:08:11] (step=0002688) Loss: 0.4735 (diff=0.4445, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:08:30] (step=0002689) Loss: 0.4684 (diff=0.4372, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:08:50] (step=0002690) Loss: 0.4886 (diff=0.4567, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:09:10] (step=0002691) Loss: 0.4945 (diff=0.4609, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:09:29] (step=0002692) Loss: 0.4903 (diff=0.4574, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:09:49] (step=0002693) Loss: 0.4828 (diff=0.4506, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:10:09] (step=0002694) Loss: 0.4797 (diff=0.4476, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:10:28] (step=0002695) Loss: 0.4900 (diff=0.4561, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:10:48] (step=0002696) Loss: 0.4937 (diff=0.4592, anat=0.0345), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:11:08] (step=0002697) Loss: 0.4792 (diff=0.4483, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:11:27] (step=0002698) Loss: 0.4803 (diff=0.4501, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:11:47] (step=0002699) Loss: 0.4916 (diff=0.4600, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:12:07] (step=0002700) Loss: 0.4958 (diff=0.4620, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:12:26] (step=0002701) Loss: 0.4820 (diff=0.4518, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:12:46] (step=0002702) Loss: 0.4646 (diff=0.4376, anat=0.0270), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:13:06] (step=0002703) Loss: 0.4847 (diff=0.4534, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:13:26] (step=0002704) Loss: 0.4661 (diff=0.4359, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:13:45] (step=0002705) Loss: 0.4593 (diff=0.4291, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:14:05] (step=0002706) Loss: 0.4825 (diff=0.4491, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:14:24] (step=0002707) Loss: 0.4770 (diff=0.4468, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:14:44] (step=0002708) Loss: 0.4672 (diff=0.4380, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:15:04] (step=0002709) Loss: 0.4774 (diff=0.4471, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:15:23] (step=0002710) Loss: 0.4855 (diff=0.4532, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:15:43] (step=0002711) Loss: 0.4799 (diff=0.4517, anat=0.0282), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:16:03] (step=0002712) Loss: 0.4900 (diff=0.4583, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:16:23] (step=0002713) Loss: 0.4911 (diff=0.4600, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:16:42] (step=0002714) Loss: 0.4859 (diff=0.4544, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:17:02] (step=0002715) Loss: 0.4794 (diff=0.4486, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:17:22] (step=0002716) Loss: 0.4911 (diff=0.4575, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:17:41] (step=0002717) Loss: 0.4867 (diff=0.4536, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:18:01] (step=0002718) Loss: 0.4652 (diff=0.4353, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:18:21] (step=0002719) Loss: 0.4756 (diff=0.4434, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:18:40] (step=0002720) Loss: 0.5020 (diff=0.4682, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:19:00] (step=0002721) Loss: 0.4690 (diff=0.4363, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:19:20] (step=0002722) Loss: 0.4729 (diff=0.4404, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:19:39] (step=0002723) Loss: 0.4804 (diff=0.4491, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:19:59] (step=0002724) Loss: 0.4826 (diff=0.4486, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:20:19] (step=0002725) Loss: 0.4783 (diff=0.4465, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:20:38] (step=0002726) Loss: 0.4819 (diff=0.4528, anat=0.0291), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:20:58] (step=0002727) Loss: 0.4664 (diff=0.4357, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:21:18] (step=0002728) Loss: 0.4695 (diff=0.4394, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:21:37] (step=0002729) Loss: 0.4902 (diff=0.4573, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:21:57] (step=0002730) Loss: 0.4744 (diff=0.4444, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:22:17] (step=0002731) Loss: 0.4717 (diff=0.4419, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:22:36] (step=0002732) Loss: 0.4848 (diff=0.4530, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:22:56] (step=0002733) Loss: 0.4772 (diff=0.4463, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:23:16] (step=0002734) Loss: 0.4833 (diff=0.4506, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.53, LR: 0.0001 +[2026-04-15 00:23:36] (step=0002735) Loss: 0.4797 (diff=0.4480, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:23:55] (step=0002736) Loss: 0.4827 (diff=0.4496, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:24:15] (step=0002737) Loss: 0.4765 (diff=0.4452, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:24:35] (step=0002738) Loss: 0.4811 (diff=0.4490, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:24:54] (step=0002739) Loss: 0.4798 (diff=0.4492, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:25:14] (step=0002740) Loss: 0.4838 (diff=0.4529, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:25:34] (step=0002741) Loss: 0.4904 (diff=0.4588, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:25:53] (step=0002742) Loss: 0.4736 (diff=0.4440, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:26:13] (step=0002743) Loss: 0.4742 (diff=0.4456, anat=0.0286), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:26:33] (step=0002744) Loss: 0.4886 (diff=0.4569, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:26:52] (step=0002745) Loss: 0.4828 (diff=0.4503, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:27:12] (step=0002746) Loss: 0.4852 (diff=0.4547, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:27:32] (step=0002747) Loss: 0.4749 (diff=0.4441, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:27:51] (step=0002748) Loss: 0.4795 (diff=0.4482, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:28:11] (step=0002749) Loss: 0.4819 (diff=0.4520, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:28:31] (step=0002750) Loss: 0.4777 (diff=0.4468, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:28:51] (step=0002751) Loss: 0.4760 (diff=0.4455, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:29:11] (step=0002752) Loss: 0.4735 (diff=0.4431, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:29:31] (step=0002753) Loss: 0.4634 (diff=0.4348, anat=0.0286), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:29:50] (step=0002754) Loss: 0.4685 (diff=0.4373, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:30:10] (step=0002755) Loss: 0.4822 (diff=0.4508, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:30:30] (step=0002756) Loss: 0.4711 (diff=0.4387, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:30:50] (step=0002757) Loss: 0.4777 (diff=0.4459, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:31:09] (step=0002758) Loss: 0.4736 (diff=0.4441, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:31:29] (step=0002759) Loss: 0.4917 (diff=0.4593, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:31:49] (step=0002760) Loss: 0.4782 (diff=0.4469, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:32:09] (step=0002761) Loss: 0.4820 (diff=0.4506, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:32:29] (step=0002762) Loss: 0.4838 (diff=0.4520, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:32:48] (step=0002763) Loss: 0.4801 (diff=0.4488, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:33:08] (step=0002764) Loss: 0.4992 (diff=0.4630, anat=0.0361), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:33:28] (step=0002765) Loss: 0.4795 (diff=0.4471, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:33:48] (step=0002766) Loss: 0.4811 (diff=0.4507, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:34:08] (step=0002767) Loss: 0.4635 (diff=0.4357, anat=0.0278), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:34:28] (step=0002768) Loss: 0.4729 (diff=0.4431, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:34:47] (step=0002769) Loss: 0.4961 (diff=0.4618, anat=0.0343), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:35:07] (step=0002770) Loss: 0.4823 (diff=0.4508, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:35:27] (step=0002771) Loss: 0.4573 (diff=0.4296, anat=0.0277), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:35:47] (step=0002772) Loss: 0.4822 (diff=0.4514, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:36:06] (step=0002773) Loss: 0.4837 (diff=0.4506, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:36:26] (step=0002774) Loss: 0.4810 (diff=0.4489, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:36:46] (step=0002775) Loss: 0.4667 (diff=0.4396, anat=0.0272), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:37:06] (step=0002776) Loss: 0.4904 (diff=0.4573, anat=0.0332), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:37:25] (step=0002777) Loss: 0.4864 (diff=0.4548, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:37:45] (step=0002778) Loss: 0.4756 (diff=0.4454, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:38:05] (step=0002779) Loss: 0.4805 (diff=0.4494, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:38:25] (step=0002780) Loss: 0.4858 (diff=0.4521, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:38:44] (step=0002781) Loss: 0.4937 (diff=0.4629, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:39:04] (step=0002782) Loss: 0.4879 (diff=0.4551, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:39:24] (step=0002783) Loss: 0.4886 (diff=0.4581, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:39:44] (step=0002784) Loss: 0.4946 (diff=0.4608, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:40:04] (step=0002785) Loss: 0.4775 (diff=0.4475, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.54, LR: 0.0001 +[2026-04-15 00:40:23] (step=0002786) Loss: 0.4709 (diff=0.4409, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:40:43] (step=0002787) Loss: 0.4816 (diff=0.4525, anat=0.0291), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:41:03] (step=0002788) Loss: 0.4824 (diff=0.4528, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:41:23] (step=0002789) Loss: 0.4694 (diff=0.4391, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:41:42] (step=0002790) Loss: 0.4849 (diff=0.4539, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:42:02] (step=0002791) Loss: 0.4787 (diff=0.4469, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:42:22] (step=0002792) Loss: 0.4785 (diff=0.4475, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:42:42] (step=0002793) Loss: 0.4849 (diff=0.4535, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:43:01] (step=0002794) Loss: 0.4847 (diff=0.4533, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:43:21] (step=0002795) Loss: 0.4756 (diff=0.4453, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:43:40] (step=0002796) Loss: 0.4817 (diff=0.4505, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:44:00] (step=0002797) Loss: 0.4801 (diff=0.4483, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:44:20] (step=0002798) Loss: 0.4918 (diff=0.4617, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:44:39] (step=0002799) Loss: 0.4554 (diff=0.4279, anat=0.0274), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:44:59] (step=0002800) Loss: 0.4811 (diff=0.4485, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:45:19] (step=0002801) Loss: 0.4759 (diff=0.4457, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:45:38] (step=0002802) Loss: 0.4711 (diff=0.4410, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:45:58] (step=0002803) Loss: 0.4775 (diff=0.4471, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:46:18] (step=0002804) Loss: 0.4885 (diff=0.4581, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:46:37] (step=0002805) Loss: 0.4839 (diff=0.4543, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:46:57] (step=0002806) Loss: 0.4890 (diff=0.4574, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:47:17] (step=0002807) Loss: 0.4942 (diff=0.4613, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:47:36] (step=0002808) Loss: 0.4809 (diff=0.4508, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:47:56] (step=0002809) Loss: 0.4735 (diff=0.4423, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:48:16] (step=0002810) Loss: 0.4794 (diff=0.4480, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:48:35] (step=0002811) Loss: 0.4773 (diff=0.4461, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:48:55] (step=0002812) Loss: 0.4834 (diff=0.4503, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:49:15] (step=0002813) Loss: 0.4886 (diff=0.4545, anat=0.0340), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:49:34] (step=0002814) Loss: 0.4846 (diff=0.4534, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:49:54] (step=0002815) Loss: 0.4772 (diff=0.4449, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:50:14] (step=0002816) Loss: 0.4964 (diff=0.4638, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:50:34] (step=0002817) Loss: 0.4779 (diff=0.4470, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:50:53] (step=0002818) Loss: 0.4905 (diff=0.4589, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:51:13] (step=0002819) Loss: 0.4870 (diff=0.4542, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:51:33] (step=0002820) Loss: 0.4763 (diff=0.4442, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:51:52] (step=0002821) Loss: 0.4821 (diff=0.4488, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:52:12] (step=0002822) Loss: 0.4915 (diff=0.4597, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:52:32] (step=0002823) Loss: 0.4900 (diff=0.4564, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:52:52] (step=0002824) Loss: 0.4671 (diff=0.4378, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:53:12] (step=0002825) Loss: 0.4840 (diff=0.4521, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:53:31] (step=0002826) Loss: 0.4850 (diff=0.4537, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:53:51] (step=0002827) Loss: 0.4729 (diff=0.4435, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:54:11] (step=0002828) Loss: 0.4749 (diff=0.4459, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:54:31] (step=0002829) Loss: 0.4826 (diff=0.4511, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:54:50] (step=0002830) Loss: 0.4843 (diff=0.4522, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:55:10] (step=0002831) Loss: 0.4979 (diff=0.4641, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:55:30] (step=0002832) Loss: 0.4824 (diff=0.4509, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:55:50] (step=0002833) Loss: 0.4889 (diff=0.4562, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:56:09] (step=0002834) Loss: 0.4952 (diff=0.4628, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:56:29] (step=0002835) Loss: 0.4731 (diff=0.4415, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:56:49] (step=0002836) Loss: 0.4862 (diff=0.4542, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:57:08] (step=0002837) Loss: 0.4761 (diff=0.4442, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.55, LR: 0.0001 +[2026-04-15 00:57:28] (step=0002838) Loss: 0.4826 (diff=0.4518, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 00:57:48] (step=0002839) Loss: 0.4895 (diff=0.4567, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 00:58:07] (step=0002840) Loss: 0.4823 (diff=0.4485, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 00:58:27] (step=0002841) Loss: 0.4819 (diff=0.4513, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 00:58:47] (step=0002842) Loss: 0.4889 (diff=0.4576, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 00:59:06] (step=0002843) Loss: 0.4850 (diff=0.4527, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 00:59:26] (step=0002844) Loss: 0.4742 (diff=0.4436, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 00:59:46] (step=0002845) Loss: 0.4811 (diff=0.4500, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:00:05] (step=0002846) Loss: 0.4952 (diff=0.4616, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:00:25] (step=0002847) Loss: 0.4776 (diff=0.4448, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:00:45] (step=0002848) Loss: 0.4672 (diff=0.4354, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:01:04] (step=0002849) Loss: 0.4663 (diff=0.4362, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:01:24] (step=0002850) Loss: 0.4858 (diff=0.4543, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:01:44] (step=0002851) Loss: 0.4768 (diff=0.4472, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:02:03] (step=0002852) Loss: 0.4919 (diff=0.4594, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:02:23] (step=0002853) Loss: 0.4865 (diff=0.4548, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:02:43] (step=0002854) Loss: 0.4902 (diff=0.4561, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:03:03] (step=0002855) Loss: 0.4744 (diff=0.4429, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:03:22] (step=0002856) Loss: 0.4900 (diff=0.4586, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:03:42] (step=0002857) Loss: 0.4887 (diff=0.4562, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:04:01] (step=0002858) Loss: 0.4856 (diff=0.4532, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:04:21] (step=0002859) Loss: 0.4711 (diff=0.4424, anat=0.0287), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:04:41] (step=0002860) Loss: 0.4953 (diff=0.4606, anat=0.0347), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:05:00] (step=0002861) Loss: 0.4910 (diff=0.4602, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:05:20] (step=0002862) Loss: 0.4864 (diff=0.4537, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:05:40] (step=0002863) Loss: 0.4737 (diff=0.4429, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:06:00] (step=0002864) Loss: 0.4947 (diff=0.4629, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:06:19] (step=0002865) Loss: 0.4662 (diff=0.4382, anat=0.0280), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:06:39] (step=0002866) Loss: 0.4742 (diff=0.4424, anat=0.0318), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:06:59] (step=0002867) Loss: 0.4881 (diff=0.4569, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:07:18] (step=0002868) Loss: 0.4785 (diff=0.4491, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:07:38] (step=0002869) Loss: 0.4980 (diff=0.4654, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:07:58] (step=0002870) Loss: 0.4923 (diff=0.4595, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:08:17] (step=0002871) Loss: 0.4808 (diff=0.4489, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:08:37] (step=0002872) Loss: 0.4742 (diff=0.4418, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:08:57] (step=0002873) Loss: 0.4808 (diff=0.4493, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:09:16] (step=0002874) Loss: 0.4863 (diff=0.4548, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:09:36] (step=0002875) Loss: 0.4887 (diff=0.4566, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:09:56] (step=0002876) Loss: 0.4772 (diff=0.4457, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:10:15] (step=0002877) Loss: 0.4822 (diff=0.4497, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:10:35] (step=0002878) Loss: 0.4742 (diff=0.4439, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:10:55] (step=0002879) Loss: 0.4785 (diff=0.4474, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:11:14] (step=0002880) Loss: 0.4989 (diff=0.4656, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:11:34] (step=0002881) Loss: 0.4961 (diff=0.4627, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:11:54] (step=0002882) Loss: 0.4904 (diff=0.4575, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:12:13] (step=0002883) Loss: 0.4793 (diff=0.4466, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:12:33] (step=0002884) Loss: 0.4989 (diff=0.4677, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:12:53] (step=0002885) Loss: 0.4827 (diff=0.4531, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:13:13] (step=0002886) Loss: 0.4766 (diff=0.4438, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:13:32] (step=0002887) Loss: 0.4708 (diff=0.4410, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:13:52] (step=0002888) Loss: 0.4905 (diff=0.4569, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.56, LR: 0.0001 +[2026-04-15 01:14:12] (step=0002889) Loss: 0.4923 (diff=0.4589, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:14:31] (step=0002890) Loss: 0.4736 (diff=0.4429, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:14:51] (step=0002891) Loss: 0.4825 (diff=0.4488, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:15:10] (step=0002892) Loss: 0.4884 (diff=0.4549, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:15:30] (step=0002893) Loss: 0.4888 (diff=0.4551, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:15:50] (step=0002894) Loss: 0.4786 (diff=0.4467, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:16:10] (step=0002895) Loss: 0.4804 (diff=0.4462, anat=0.0341), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:16:29] (step=0002896) Loss: 0.4822 (diff=0.4524, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:16:49] (step=0002897) Loss: 0.4727 (diff=0.4434, anat=0.0293), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:17:09] (step=0002898) Loss: 0.4865 (diff=0.4565, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:17:28] (step=0002899) Loss: 0.4844 (diff=0.4513, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:17:48] (step=0002900) Loss: 0.4765 (diff=0.4451, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:18:08] (step=0002901) Loss: 0.4962 (diff=0.4633, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:18:27] (step=0002902) Loss: 0.4867 (diff=0.4540, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:18:47] (step=0002903) Loss: 0.4789 (diff=0.4463, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:19:07] (step=0002904) Loss: 0.4700 (diff=0.4426, anat=0.0274), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:19:26] (step=0002905) Loss: 0.4899 (diff=0.4595, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:19:46] (step=0002906) Loss: 0.4702 (diff=0.4406, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:20:05] (step=0002907) Loss: 0.4868 (diff=0.4520, anat=0.0348), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:20:25] (step=0002908) Loss: 0.4729 (diff=0.4445, anat=0.0284), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:20:45] (step=0002909) Loss: 0.4740 (diff=0.4455, anat=0.0285), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:21:05] (step=0002910) Loss: 0.4869 (diff=0.4543, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:21:24] (step=0002911) Loss: 0.4736 (diff=0.4412, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:21:44] (step=0002912) Loss: 0.4761 (diff=0.4448, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:22:04] (step=0002913) Loss: 0.4754 (diff=0.4447, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:22:23] (step=0002914) Loss: 0.4843 (diff=0.4523, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:22:43] (step=0002915) Loss: 0.4913 (diff=0.4586, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:23:03] (step=0002916) Loss: 0.4688 (diff=0.4394, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:23:22] (step=0002917) Loss: 0.4856 (diff=0.4528, anat=0.0328), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:23:42] (step=0002918) Loss: 0.4863 (diff=0.4555, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:24:02] (step=0002919) Loss: 0.4886 (diff=0.4562, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:24:21] (step=0002920) Loss: 0.4873 (diff=0.4557, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:24:41] (step=0002921) Loss: 0.4975 (diff=0.4647, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:25:01] (step=0002922) Loss: 0.4863 (diff=0.4543, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:25:20] (step=0002923) Loss: 0.4758 (diff=0.4473, anat=0.0285), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:25:40] (step=0002924) Loss: 0.4812 (diff=0.4501, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:26:00] (step=0002925) Loss: 0.4756 (diff=0.4449, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:26:19] (step=0002926) Loss: 0.4734 (diff=0.4433, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:26:39] (step=0002927) Loss: 0.4810 (diff=0.4494, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:26:59] (step=0002928) Loss: 0.4957 (diff=0.4638, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:27:18] (step=0002929) Loss: 0.4620 (diff=0.4330, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:27:38] (step=0002930) Loss: 0.4704 (diff=0.4394, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:27:58] (step=0002931) Loss: 0.4715 (diff=0.4392, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:28:18] (step=0002932) Loss: 0.4918 (diff=0.4596, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:28:37] (step=0002933) Loss: 0.4756 (diff=0.4461, anat=0.0295), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:28:57] (step=0002934) Loss: 0.4600 (diff=0.4311, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:29:17] (step=0002935) Loss: 0.4798 (diff=0.4482, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:29:36] (step=0002936) Loss: 0.4664 (diff=0.4374, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:29:56] (step=0002937) Loss: 0.4889 (diff=0.4569, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:30:16] (step=0002938) Loss: 0.4784 (diff=0.4477, anat=0.0307), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:30:35] (step=0002939) Loss: 0.4856 (diff=0.4521, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.57, LR: 0.0001 +[2026-04-15 01:30:55] (step=0002940) Loss: 0.4802 (diff=0.4486, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:31:15] (step=0002941) Loss: 0.4861 (diff=0.4525, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:31:34] (step=0002942) Loss: 0.4729 (diff=0.4429, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:31:54] (step=0002943) Loss: 0.4903 (diff=0.4598, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:32:14] (step=0002944) Loss: 0.4791 (diff=0.4500, anat=0.0291), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:32:34] (step=0002945) Loss: 0.4738 (diff=0.4446, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:32:53] (step=0002946) Loss: 0.4756 (diff=0.4479, anat=0.0277), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:33:13] (step=0002947) Loss: 0.4787 (diff=0.4482, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:33:33] (step=0002948) Loss: 0.4865 (diff=0.4538, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:33:52] (step=0002949) Loss: 0.4853 (diff=0.4543, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:34:12] (step=0002950) Loss: 0.4830 (diff=0.4507, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:34:32] (step=0002951) Loss: 0.4763 (diff=0.4446, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:34:51] (step=0002952) Loss: 0.4908 (diff=0.4588, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:35:11] (step=0002953) Loss: 0.4828 (diff=0.4529, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:35:31] (step=0002954) Loss: 0.4924 (diff=0.4586, anat=0.0338), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:35:51] (step=0002955) Loss: 0.4894 (diff=0.4581, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:36:10] (step=0002956) Loss: 0.4707 (diff=0.4402, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:36:30] (step=0002957) Loss: 0.4637 (diff=0.4349, anat=0.0288), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:36:50] (step=0002958) Loss: 0.4872 (diff=0.4558, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:37:09] (step=0002959) Loss: 0.4722 (diff=0.4435, anat=0.0287), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:37:29] (step=0002960) Loss: 0.4797 (diff=0.4489, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:37:49] (step=0002961) Loss: 0.4891 (diff=0.4567, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:38:08] (step=0002962) Loss: 0.4841 (diff=0.4520, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:38:28] (step=0002963) Loss: 0.4864 (diff=0.4543, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:38:48] (step=0002964) Loss: 0.4905 (diff=0.4602, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:39:07] (step=0002965) Loss: 0.4755 (diff=0.4452, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:39:27] (step=0002966) Loss: 0.4666 (diff=0.4363, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:39:47] (step=0002967) Loss: 0.4874 (diff=0.4549, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:40:06] (step=0002968) Loss: 0.4715 (diff=0.4411, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:40:26] (step=0002969) Loss: 0.4920 (diff=0.4591, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:40:46] (step=0002970) Loss: 0.4819 (diff=0.4530, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:41:05] (step=0002971) Loss: 0.4745 (diff=0.4439, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:41:25] (step=0002972) Loss: 0.4812 (diff=0.4482, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:41:45] (step=0002973) Loss: 0.4884 (diff=0.4547, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:42:04] (step=0002974) Loss: 0.4745 (diff=0.4470, anat=0.0275), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:42:24] (step=0002975) Loss: 0.4923 (diff=0.4573, anat=0.0350), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:42:44] (step=0002976) Loss: 0.4791 (diff=0.4478, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:43:03] (step=0002977) Loss: 0.4845 (diff=0.4540, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:43:23] (step=0002978) Loss: 0.4788 (diff=0.4466, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:43:43] (step=0002979) Loss: 0.4853 (diff=0.4527, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:44:03] (step=0002980) Loss: 0.4769 (diff=0.4475, anat=0.0294), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:44:22] (step=0002981) Loss: 0.4779 (diff=0.4476, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:44:42] (step=0002982) Loss: 0.4761 (diff=0.4453, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:45:02] (step=0002983) Loss: 0.4791 (diff=0.4470, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:45:21] (step=0002984) Loss: 0.4809 (diff=0.4495, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:45:41] (step=0002985) Loss: 0.4772 (diff=0.4464, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:46:01] (step=0002986) Loss: 0.4771 (diff=0.4435, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:46:20] (step=0002987) Loss: 0.4912 (diff=0.4559, anat=0.0353), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:46:40] (step=0002988) Loss: 0.4894 (diff=0.4558, anat=0.0336), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:47:00] (step=0002989) Loss: 0.4766 (diff=0.4450, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:47:19] (step=0002990) Loss: 0.4735 (diff=0.4426, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.58, LR: 0.0001 +[2026-04-15 01:47:39] (step=0002991) Loss: 0.4818 (diff=0.4497, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:47:59] (step=0002992) Loss: 0.4758 (diff=0.4467, anat=0.0291), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:48:18] (step=0002993) Loss: 0.4877 (diff=0.4557, anat=0.0320), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:48:38] (step=0002994) Loss: 0.4689 (diff=0.4403, anat=0.0286), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:48:58] (step=0002995) Loss: 0.4762 (diff=0.4457, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:49:18] (step=0002996) Loss: 0.4932 (diff=0.4598, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:49:37] (step=0002997) Loss: 0.4827 (diff=0.4502, anat=0.0325), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:49:57] (step=0002998) Loss: 0.4703 (diff=0.4394, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:50:17] (step=0002999) Loss: 0.4769 (diff=0.4471, anat=0.0298), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:50:36] (step=0003000) Loss: 0.4889 (diff=0.4555, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:50:36] [RANK 0] Saving current state to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000 +[2026-04-15 01:50:48] [RANK 0] Model weights saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/model.safetensors +[2026-04-15 01:50:48] [RANK 0] Optimizer state saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/optimizer.bin +[2026-04-15 01:50:48] [RANK 0] Scheduler state saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/scheduler.bin +[2026-04-15 01:50:48] [RANK 0] Sampler state for dataloader 0 saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/sampler.bin +[2026-04-15 01:50:48] [RANK 0] Random states saved in results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000/random_states_0.pkl +[2026-04-15 01:50:48] Saved accelerator state to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/checkpoint-3000 +[2026-04-15 01:50:48] Saved checkpoint to ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter/checkpoints/0003000/ +[2026-04-15 01:51:07] (step=0003001) Loss: 0.4765 (diff=0.4468, anat=0.0297), Steps/Sec: 0.03, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:51:27] (step=0003002) Loss: 0.4617 (diff=0.4342, anat=0.0275), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:51:46] (step=0003003) Loss: 0.4796 (diff=0.4459, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:52:06] (step=0003004) Loss: 0.4937 (diff=0.4608, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:52:26] (step=0003005) Loss: 0.4812 (diff=0.4512, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:52:46] (step=0003006) Loss: 0.4672 (diff=0.4396, anat=0.0275), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:53:05] (step=0003007) Loss: 0.4687 (diff=0.4381, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:53:25] (step=0003008) Loss: 0.4758 (diff=0.4453, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:53:45] (step=0003009) Loss: 0.4810 (diff=0.4493, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:54:05] (step=0003010) Loss: 0.4809 (diff=0.4508, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:54:25] (step=0003011) Loss: 0.4800 (diff=0.4500, anat=0.0300), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:54:45] (step=0003012) Loss: 0.4749 (diff=0.4447, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:55:04] (step=0003013) Loss: 0.4788 (diff=0.4477, anat=0.0311), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:55:24] (step=0003014) Loss: 0.4811 (diff=0.4499, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:55:44] (step=0003015) Loss: 0.4632 (diff=0.4341, anat=0.0291), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:56:04] (step=0003016) Loss: 0.4865 (diff=0.4549, anat=0.0317), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:56:24] (step=0003017) Loss: 0.4706 (diff=0.4415, anat=0.0290), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:56:43] (step=0003018) Loss: 0.4752 (diff=0.4464, anat=0.0289), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:57:03] (step=0003019) Loss: 0.4712 (diff=0.4429, anat=0.0283), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:57:23] (step=0003020) Loss: 0.4769 (diff=0.4456, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:57:43] (step=0003021) Loss: 0.4750 (diff=0.4420, anat=0.0329), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:58:02] (step=0003022) Loss: 0.4713 (diff=0.4400, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:58:22] (step=0003023) Loss: 0.4834 (diff=0.4515, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:58:42] (step=0003024) Loss: 0.4776 (diff=0.4463, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:59:02] (step=0003025) Loss: 0.4915 (diff=0.4602, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:59:22] (step=0003026) Loss: 0.4958 (diff=0.4636, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 01:59:42] (step=0003027) Loss: 0.4777 (diff=0.4475, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:00:01] (step=0003028) Loss: 0.4749 (diff=0.4445, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:00:21] (step=0003029) Loss: 0.4738 (diff=0.4435, anat=0.0303), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:00:41] (step=0003030) Loss: 0.4887 (diff=0.4574, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:01:01] (step=0003031) Loss: 0.4761 (diff=0.4459, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:01:21] (step=0003032) Loss: 0.4871 (diff=0.4559, anat=0.0313), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:01:40] (step=0003033) Loss: 0.4630 (diff=0.4338, anat=0.0292), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:02:00] (step=0003034) Loss: 0.4941 (diff=0.4580, anat=0.0360), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:02:20] (step=0003035) Loss: 0.4757 (diff=0.4441, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:02:40] (step=0003036) Loss: 0.4973 (diff=0.4650, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:02:59] (step=0003037) Loss: 0.4770 (diff=0.4436, anat=0.0334), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:03:19] (step=0003038) Loss: 0.4857 (diff=0.4519, anat=0.0339), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:03:39] (step=0003039) Loss: 0.4874 (diff=0.4537, anat=0.0337), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:03:59] (step=0003040) Loss: 0.4801 (diff=0.4478, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:04:19] (step=0003041) Loss: 0.4735 (diff=0.4433, anat=0.0302), Steps/Sec: 0.05, Epoch: 0.59, LR: 0.0001 +[2026-04-15 02:04:38] (step=0003042) Loss: 0.4882 (diff=0.4547, anat=0.0335), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:04:58] (step=0003043) Loss: 0.4967 (diff=0.4635, anat=0.0333), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:05:18] (step=0003044) Loss: 0.4731 (diff=0.4426, anat=0.0305), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:05:38] (step=0003045) Loss: 0.4870 (diff=0.4555, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:05:58] (step=0003046) Loss: 0.4758 (diff=0.4438, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:06:17] (step=0003047) Loss: 0.4963 (diff=0.4636, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:06:37] (step=0003048) Loss: 0.4771 (diff=0.4472, anat=0.0299), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:06:57] (step=0003049) Loss: 0.4883 (diff=0.4567, anat=0.0316), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:07:17] (step=0003050) Loss: 0.4805 (diff=0.4498, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:07:37] (step=0003051) Loss: 0.4815 (diff=0.4492, anat=0.0323), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:07:56] (step=0003052) Loss: 0.4699 (diff=0.4403, anat=0.0296), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:08:16] (step=0003053) Loss: 0.4799 (diff=0.4490, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:08:36] (step=0003054) Loss: 0.4821 (diff=0.4502, anat=0.0319), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:08:56] (step=0003055) Loss: 0.4865 (diff=0.4555, anat=0.0309), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:09:15] (step=0003056) Loss: 0.4883 (diff=0.4552, anat=0.0331), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:09:35] (step=0003057) Loss: 0.4882 (diff=0.4574, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:09:55] (step=0003058) Loss: 0.4807 (diff=0.4494, anat=0.0312), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:10:15] (step=0003059) Loss: 0.4841 (diff=0.4520, anat=0.0321), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:10:35] (step=0003060) Loss: 0.4897 (diff=0.4571, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:10:54] (step=0003061) Loss: 0.4785 (diff=0.4482, anat=0.0304), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:11:14] (step=0003062) Loss: 0.4912 (diff=0.4586, anat=0.0326), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:11:34] (step=0003063) Loss: 0.4861 (diff=0.4537, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:11:54] (step=0003064) Loss: 0.4747 (diff=0.4439, anat=0.0308), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:12:14] (step=0003065) Loss: 0.4738 (diff=0.4428, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:12:33] (step=0003066) Loss: 0.4860 (diff=0.4530, anat=0.0330), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:12:53] (step=0003067) Loss: 0.4695 (diff=0.4389, anat=0.0306), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:13:13] (step=0003068) Loss: 0.4727 (diff=0.4417, anat=0.0310), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:13:33] (step=0003069) Loss: 0.4878 (diff=0.4564, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:13:52] (step=0003070) Loss: 0.4751 (diff=0.4429, anat=0.0322), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:14:12] (step=0003071) Loss: 0.4688 (diff=0.4405, anat=0.0283), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:14:32] (step=0003072) Loss: 0.4829 (diff=0.4515, anat=0.0314), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:14:52] (step=0003073) Loss: 0.4752 (diff=0.4460, anat=0.0291), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:15:11] (step=0003074) Loss: 0.4716 (diff=0.4415, anat=0.0301), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:15:31] (step=0003075) Loss: 0.4902 (diff=0.4577, anat=0.0324), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:15:51] (step=0003076) Loss: 0.4741 (diff=0.4426, anat=0.0315), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:16:10] (step=0003077) Loss: 0.4891 (diff=0.4564, anat=0.0327), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 +[2026-04-15 02:16:30] (step=0003078) Loss: 0.4704 (diff=0.4406, anat=0.0297), Steps/Sec: 0.05, Epoch: 0.60, LR: 0.0001 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/tensorboard_log/1776184012.0913074/events.out.tfevents.1776184012.xinglab4-SYS-740GP-TNRT.2420392.1 b/cxr_finetune_lora_30ksteps_maskmse_timefilter/tensorboard_log/1776184012.0913074/events.out.tfevents.1776184012.xinglab4-SYS-740GP-TNRT.2420392.1 new file mode 100644 index 0000000000000000000000000000000000000000..d50d5f774ee8c186d4085965a0f516bd820b9732 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/tensorboard_log/1776184012.0913074/events.out.tfevents.1776184012.xinglab4-SYS-740GP-TNRT.2420392.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1caf5d4892478fa67206b91b49c0cdfc9bee60bdfaef8057b135896cd41eb99 +size 2022 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/tensorboard_log/1776184012.0924573/hparams.yml b/cxr_finetune_lora_30ksteps_maskmse_timefilter/tensorboard_log/1776184012.0924573/hparams.yml new file mode 100644 index 0000000000000000000000000000000000000000..08ec17953c56cc8b58b3c94df6882f4dc6e3ffb6 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/tensorboard_log/1776184012.0924573/hparams.yml @@ -0,0 +1,32 @@ +adam_weight_decay: 0.0 +anatomy_alpha: 4.0 +anatomy_subbatch_size: 16 +batch_size_per_device: 16 +ckpt_every: 500 +condition_dropout_prob: 0.01 +debug_vis: false +epochs: 10 +gradient_accumulation_steps: 4 +image_path: ./ +json_file: /home/wenting/zr/wt_dataset/LIDC_IDRI/anno/cxr_synth_anno_mask_train.jsonl +keep_raw_resolution: true +lambda_anatomy: 1.0 +log_every: 1 +lora_rank: 8 +lora_resume_path: /home/wenting/zr/gen_code/results/cxr_finetune_lora/checkpoints/0030000/ +lr: 0.0001 +lr_scheduler: constant_with_warmup +lr_warmup_steps: 500 +max_grad_norm: 1.0 +max_image_size: 1024 +max_input_length_limit: 18000 +mixed_precision: bf16 +model_name_or_path: Shitao/OmniGen-v1 +num_workers: 4 +report_to: tensorboard +results_dir: ./results/cxr_finetune_lora_30ksteps_maskmse_timefilter +resume_from_checkpoint: null +seg_model_ckpt: /home/wenting/zr/Segmentation/checkpoints/train_Seg/best_anatomy_model.pth +use_ema: false +use_lora: true +vae_path: null diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/tensorboard_log/events.out.tfevents.1776184012.xinglab4-SYS-740GP-TNRT.2420392.0 b/cxr_finetune_lora_30ksteps_maskmse_timefilter/tensorboard_log/events.out.tfevents.1776184012.xinglab4-SYS-740GP-TNRT.2420392.0 new file mode 100644 index 0000000000000000000000000000000000000000..740f4e882a640da48151e73c87479cdbe3b08993 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/tensorboard_log/events.out.tfevents.1776184012.xinglab4-SYS-740GP-TNRT.2420392.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0df4f0a2500789af806139daac1a70c252f49af57c08a2966399f52b58251d86 +size 2215867 diff --git a/cxr_finetune_lora_30ksteps_maskmse_timefilter/train_args.json b/cxr_finetune_lora_30ksteps_maskmse_timefilter/train_args.json new file mode 100644 index 0000000000000000000000000000000000000000..aabbb6365f6fcbe6cc13bbaa5c1ed5493e3367b0 --- /dev/null +++ b/cxr_finetune_lora_30ksteps_maskmse_timefilter/train_args.json @@ -0,0 +1 @@ +{"results_dir": "./results/cxr_finetune_lora_30ksteps_maskmse_timefilter", "model_name_or_path": "Shitao/OmniGen-v1", "json_file": "/home/wenting/zr/wt_dataset/LIDC_IDRI/anno/cxr_synth_anno_mask_train.jsonl", "image_path": "./", "epochs": 10, "batch_size_per_device": 16, "vae_path": null, "num_workers": 4, "log_every": 1, "ckpt_every": 500, "max_grad_norm": 1.0, "lr": 0.0001, "max_input_length_limit": 18000, "condition_dropout_prob": 0.01, "adam_weight_decay": 0.0, "keep_raw_resolution": true, "max_image_size": 1024, "use_lora": true, "lora_rank": 8, "use_ema": false, "lr_scheduler": "constant_with_warmup", "lr_warmup_steps": 500, "report_to": "tensorboard", "mixed_precision": "bf16", "gradient_accumulation_steps": 4, "resume_from_checkpoint": null, "lora_resume_path": "/home/wenting/zr/gen_code/results/cxr_finetune_lora/checkpoints/0030000/", "seg_model_ckpt": "/home/wenting/zr/Segmentation/checkpoints/train_Seg/best_anatomy_model.pth", "lambda_anatomy": 1.0, "anatomy_subbatch_size": 16, "anatomy_alpha": 4.0, "debug_vis": false} \ No newline at end of file